]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsops.c
xnu-1699.22.81.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_vfsops.c
1 /*
2 * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1991, 1993, 1994
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * hfs_vfsops.c
66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95
67 *
68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
69 *
70 * hfs_vfsops.c -- VFS layer for loadable HFS file system.
71 *
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kauth.h>
76
77 #include <sys/ubc.h>
78 #include <sys/ubc_internal.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/mount_internal.h>
81 #include <sys/sysctl.h>
82 #include <sys/malloc.h>
83 #include <sys/stat.h>
84 #include <sys/quota.h>
85 #include <sys/disk.h>
86 #include <sys/paths.h>
87 #include <sys/utfconv.h>
88 #include <sys/kdebug.h>
89 #include <sys/fslog.h>
90 #include <sys/ubc.h>
91
92 #include <kern/locks.h>
93
94 #include <vfs/vfs_journal.h>
95
96 #include <miscfs/specfs/specdev.h>
97 #include <hfs/hfs_mount.h>
98
99 #include <libkern/crypto/md5.h>
100 #include <uuid/uuid.h>
101
102 #include "hfs.h"
103 #include "hfs_catalog.h"
104 #include "hfs_cnode.h"
105 #include "hfs_dbg.h"
106 #include "hfs_endian.h"
107 #include "hfs_hotfiles.h"
108 #include "hfs_quota.h"
109
110 #include "hfscommon/headers/FileMgrInternal.h"
111 #include "hfscommon/headers/BTreesInternal.h"
112
113 #if CONFIG_PROTECT
114 #include <sys/cprotect.h>
115 #endif
116
117 #if CONFIG_HFS_ALLOC_RBTREE
118 #include "hfscommon/headers/HybridAllocator.h"
119 #endif
120
121 #define HFS_MOUNT_DEBUG 1
122
123 #if HFS_DIAGNOSTIC
124 int hfs_dbg_all = 0;
125 int hfs_dbg_err = 0;
126 #endif
127
128 /* Enable/disable debugging code for live volume resizing */
129 int hfs_resize_debug = 0;
130
131 lck_grp_attr_t * hfs_group_attr;
132 lck_attr_t * hfs_lock_attr;
133 lck_grp_t * hfs_mutex_group;
134 lck_grp_t * hfs_rwlock_group;
135 lck_grp_t * hfs_spinlock_group;
136
137 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
138 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
139
140 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
141 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
142
143 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
144 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
145 static int hfs_flushfiles(struct mount *, int, struct proc *);
146 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
147 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
148 static int hfs_init(struct vfsconf *vfsp);
149 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
150 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
151 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
152 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
153 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
154 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
155 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
156
157 void hfs_initialize_allocator (struct hfsmount *hfsmp);
158 int hfs_teardown_allocator (struct hfsmount *hfsmp);
159
160 int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context);
161 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
162 int hfs_reload(struct mount *mp);
163 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
164 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
165 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
166 user_addr_t newp, size_t newlen, vfs_context_t context);
167 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
168
169 /*
170 * Called by vfs_mountroot when mounting HFS Plus as root.
171 */
172
173 int
174 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
175 {
176 struct hfsmount *hfsmp;
177 ExtendedVCB *vcb;
178 struct vfsstatfs *vfsp;
179 int error;
180
181 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
182 if (HFS_MOUNT_DEBUG) {
183 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
184 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
185 }
186 return (error);
187 }
188
189 /* Init hfsmp */
190 hfsmp = VFSTOHFS(mp);
191
192 hfsmp->hfs_uid = UNKNOWNUID;
193 hfsmp->hfs_gid = UNKNOWNGID;
194 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
195 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
196
197 /* Establish the free block reserve. */
198 vcb = HFSTOVCB(hfsmp);
199 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
200 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
201
202 vfsp = vfs_statfs(mp);
203 (void)hfs_statfs(mp, vfsp, NULL);
204
205 return (0);
206 }
207
208
209 /*
210 * VFS Operations.
211 *
212 * mount system call
213 */
214
215 int
216 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
217 {
218 struct proc *p = vfs_context_proc(context);
219 struct hfsmount *hfsmp = NULL;
220 struct hfs_mount_args args;
221 int retval = E_NONE;
222 u_int32_t cmdflags;
223
224 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
225 if (HFS_MOUNT_DEBUG) {
226 printf("hfs_mount: copyin returned %d for fs\n", retval);
227 }
228 return (retval);
229 }
230 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
231 if (cmdflags & MNT_UPDATE) {
232 hfsmp = VFSTOHFS(mp);
233
234 /* Reload incore data after an fsck. */
235 if (cmdflags & MNT_RELOAD) {
236 if (vfs_isrdonly(mp)) {
237 int error = hfs_reload(mp);
238 if (error && HFS_MOUNT_DEBUG) {
239 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
240 }
241 return error;
242 }
243 else {
244 if (HFS_MOUNT_DEBUG) {
245 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
246 }
247 return (EINVAL);
248 }
249 }
250
251 /* Change to a read-only file system. */
252 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
253 vfs_isrdonly(mp)) {
254 int flags;
255
256 /* Set flag to indicate that a downgrade to read-only
257 * is in progress and therefore block any further
258 * modifications to the file system.
259 */
260 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
261 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
262 hfsmp->hfs_downgrading_proc = current_thread();
263 hfs_unlock_global (hfsmp);
264
265 /* use VFS_SYNC to push out System (btree) files */
266 retval = VFS_SYNC(mp, MNT_WAIT, context);
267 if (retval && ((cmdflags & MNT_FORCE) == 0)) {
268 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
269 hfsmp->hfs_downgrading_proc = NULL;
270 if (HFS_MOUNT_DEBUG) {
271 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
272 }
273 goto out;
274 }
275
276 flags = WRITECLOSE;
277 if (cmdflags & MNT_FORCE)
278 flags |= FORCECLOSE;
279
280 if ((retval = hfs_flushfiles(mp, flags, p))) {
281 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
282 hfsmp->hfs_downgrading_proc = NULL;
283 if (HFS_MOUNT_DEBUG) {
284 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
285 }
286 goto out;
287 }
288
289 /* mark the volume cleanly unmounted */
290 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
291 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
292 hfsmp->hfs_flags |= HFS_READ_ONLY;
293
294 /* also get the volume bitmap blocks */
295 if (!retval) {
296 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
297 retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
298 } else {
299 vnode_get(hfsmp->hfs_devvp);
300 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
301 vnode_put(hfsmp->hfs_devvp);
302 }
303 }
304 if (retval) {
305 if (HFS_MOUNT_DEBUG) {
306 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
307 }
308 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
309 hfsmp->hfs_downgrading_proc = NULL;
310 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
311 goto out;
312 }
313 if (hfsmp->jnl) {
314 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
315
316 journal_close(hfsmp->jnl);
317 hfsmp->jnl = NULL;
318
319 // Note: we explicitly don't want to shutdown
320 // access to the jvp because we may need
321 // it later if we go back to being read-write.
322
323 hfs_unlock_global (hfsmp);
324 }
325
326 #if CONFIG_HFS_ALLOC_RBTREE
327 (void) hfs_teardown_allocator(hfsmp);
328 #endif
329 hfsmp->hfs_downgrading_proc = NULL;
330 }
331
332 /* Change to a writable file system. */
333 if (vfs_iswriteupgrade(mp)) {
334 #if CONFIG_HFS_ALLOC_RBTREE
335 thread_t allocator_thread;
336 #endif
337
338 /*
339 * On inconsistent disks, do not allow read-write mount
340 * unless it is the boot volume being mounted.
341 */
342 if (!(vfs_flags(mp) & MNT_ROOTFS) &&
343 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
344 if (HFS_MOUNT_DEBUG) {
345 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN));
346 }
347 retval = EINVAL;
348 goto out;
349 }
350
351 // If the journal was shut-down previously because we were
352 // asked to be read-only, let's start it back up again now
353
354 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
355 && hfsmp->jnl == NULL
356 && hfsmp->jvp != NULL) {
357 int jflags;
358
359 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
360 jflags = JOURNAL_RESET;
361 } else {
362 jflags = 0;
363 }
364
365 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
366
367 hfsmp->jnl = journal_open(hfsmp->jvp,
368 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
369 hfsmp->jnl_size,
370 hfsmp->hfs_devvp,
371 hfsmp->hfs_logical_block_size,
372 jflags,
373 0,
374 hfs_sync_metadata, hfsmp->hfs_mp);
375
376 /*
377 * Set up the trim callback function so that we can add
378 * recently freed extents to the free extent cache once
379 * the transaction that freed them is written to the
380 * journal on disk.
381 */
382 if (hfsmp->jnl)
383 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
384
385 hfs_unlock_global (hfsmp);
386
387 if (hfsmp->jnl == NULL) {
388 if (HFS_MOUNT_DEBUG) {
389 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
390 }
391 retval = EINVAL;
392 goto out;
393 } else {
394 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
395 }
396
397 }
398
399 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
400 retval = hfs_erase_unused_nodes(hfsmp);
401 if (retval != E_NONE) {
402 if (HFS_MOUNT_DEBUG) {
403 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
404 }
405 goto out;
406 }
407
408 /* If this mount point was downgraded from read-write
409 * to read-only, clear that information as we are now
410 * moving back to read-write.
411 */
412 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
413 hfsmp->hfs_downgrading_proc = NULL;
414
415 /* mark the volume dirty (clear clean unmount bit) */
416 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
417
418 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
419 if (retval != E_NONE) {
420 if (HFS_MOUNT_DEBUG) {
421 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
422 }
423 goto out;
424 }
425
426 /* Only clear HFS_READ_ONLY after a successful write */
427 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
428
429
430 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
431 /* Setup private/hidden directories for hardlinks. */
432 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
433 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
434
435 hfs_remove_orphans(hfsmp);
436
437 /*
438 * Allow hot file clustering if conditions allow.
439 */
440 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
441 ((hfsmp->hfs_flags & HFS_SSD) == 0)) {
442 (void) hfs_recording_init(hfsmp);
443 }
444 /* Force ACLs on HFS+ file systems. */
445 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
446 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
447 }
448 }
449
450 #if CONFIG_HFS_ALLOC_RBTREE
451 /*
452 * Like the normal mount case, we need to handle creation of the allocation red-black tree
453 * if we're upgrading from read-only to read-write.
454 *
455 * We spawn a thread to create the pair of red-black trees for this volume.
456 * However, in so doing, we must be careful to ensure that if this thread is still
457 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
458 * we'll need to set a bit that indicates we're in progress building the trees here.
459 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
460 * notifies the tree generation code that an unmount is waiting. Also, mark the extent
461 * tree flags that the allocator is enabled for use before we spawn the thread that will start
462 * scanning the RB tree.
463 *
464 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
465 * which has not previously encountered a bad error on the red-black tree code. Also, don't
466 * try to re-build a tree that already exists.
467 */
468
469 if (hfsmp->extent_tree_flags == 0) {
470 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
471 /* Initialize EOF counter so that the thread can assume it started at initial values */
472 hfsmp->offset_block_end = 0;
473
474 InitTree(hfsmp);
475
476 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
477 thread_deallocate(allocator_thread);
478 }
479
480 #endif
481 }
482
483 /* Update file system parameters. */
484 retval = hfs_changefs(mp, &args);
485 if (retval && HFS_MOUNT_DEBUG) {
486 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
487 }
488
489 } else /* not an update request */ {
490
491 /* Set the mount flag to indicate that we support volfs */
492 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
493
494 retval = hfs_mountfs(devvp, mp, &args, 0, context);
495 if (retval && HFS_MOUNT_DEBUG) {
496 printf("hfs_mount: hfs_mountfs returned %d\n", retval);
497 }
498 #if CONFIG_PROTECT
499 /*
500 * If above mount call was successful, and this mount is content protection
501 * enabled, then verify the on-disk EA on the root to ensure that the filesystem
502 * is of a suitable vintage to allow the mount to proceed.
503 */
504 if ((retval == 0) && (cp_fs_protected (mp))) {
505 int err = 0;
506 struct cp_root_xattr xattr;
507 bzero (&xattr, sizeof(struct cp_root_xattr));
508 hfsmp = vfs_fsprivate(mp);
509
510 /* go get the EA to get the version information */
511 err = cp_getrootxattr (hfsmp, &xattr);
512 /* If there was no EA there, then write one out. */
513 if (err == ENOATTR) {
514 bzero(&xattr, sizeof(struct cp_root_xattr));
515 xattr.major_version = CP_CURRENT_MAJOR_VERS;
516 xattr.minor_version = CP_CURRENT_MINOR_VERS;
517 xattr.flags = 0;
518
519 err = cp_setrootxattr (hfsmp, &xattr);
520 }
521 /*
522 * For any other error, including having an out of date CP version in the
523 * EA, or for an error out of cp_setrootxattr, deny the mount
524 * and do not proceed further.
525 */
526 if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS) {
527 /* Deny the mount and tear down. */
528 retval = EPERM;
529 (void) hfs_unmount (mp, MNT_FORCE, context);
530 }
531 }
532 #endif
533 }
534 out:
535 if (retval == 0) {
536 (void)hfs_statfs(mp, vfs_statfs(mp), context);
537 }
538 return (retval);
539 }
540
541
542 struct hfs_changefs_cargs {
543 struct hfsmount *hfsmp;
544 int namefix;
545 int permfix;
546 int permswitch;
547 };
548
549 static int
550 hfs_changefs_callback(struct vnode *vp, void *cargs)
551 {
552 ExtendedVCB *vcb;
553 struct cnode *cp;
554 struct cat_desc cndesc;
555 struct cat_attr cnattr;
556 struct hfs_changefs_cargs *args;
557 int lockflags;
558 int error;
559
560 args = (struct hfs_changefs_cargs *)cargs;
561
562 cp = VTOC(vp);
563 vcb = HFSTOVCB(args->hfsmp);
564
565 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
566 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
567 hfs_systemfile_unlock(args->hfsmp, lockflags);
568 if (error) {
569 /*
570 * If we couldn't find this guy skip to the next one
571 */
572 if (args->namefix)
573 cache_purge(vp);
574
575 return (VNODE_RETURNED);
576 }
577 /*
578 * Get the real uid/gid and perm mask from disk.
579 */
580 if (args->permswitch || args->permfix) {
581 cp->c_uid = cnattr.ca_uid;
582 cp->c_gid = cnattr.ca_gid;
583 cp->c_mode = cnattr.ca_mode;
584 }
585 /*
586 * If we're switching name converters then...
587 * Remove the existing entry from the namei cache.
588 * Update name to one based on new encoder.
589 */
590 if (args->namefix) {
591 cache_purge(vp);
592 replace_desc(cp, &cndesc);
593
594 if (cndesc.cd_cnid == kHFSRootFolderID) {
595 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
596 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
597 }
598 } else {
599 cat_releasedesc(&cndesc);
600 }
601 return (VNODE_RETURNED);
602 }
603
604 /* Change fs mount parameters */
605 static int
606 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
607 {
608 int retval = 0;
609 int namefix, permfix, permswitch;
610 struct hfsmount *hfsmp;
611 ExtendedVCB *vcb;
612 hfs_to_unicode_func_t get_unicode_func;
613 unicode_to_hfs_func_t get_hfsname_func;
614 u_int32_t old_encoding = 0;
615 struct hfs_changefs_cargs cargs;
616 u_int32_t mount_flags;
617
618 hfsmp = VFSTOHFS(mp);
619 vcb = HFSTOVCB(hfsmp);
620 mount_flags = (unsigned int)vfs_flags(mp);
621
622 hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
623
624 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
625 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
626 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
627 (mount_flags & MNT_UNKNOWNPERMISSIONS)));
628
629 /* The root filesystem must operate with actual permissions: */
630 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
631 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
632 retval = EINVAL;
633 goto exit;
634 }
635 if (mount_flags & MNT_UNKNOWNPERMISSIONS)
636 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
637 else
638 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
639
640 namefix = permfix = 0;
641
642 /*
643 * Tracking of hot files requires up-to-date access times. So if
644 * access time updates are disabled, we must also disable hot files.
645 */
646 if (mount_flags & MNT_NOATIME) {
647 (void) hfs_recording_suspend(hfsmp);
648 }
649
650 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
651 if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
652 gTimeZone = args->hfs_timezone;
653 }
654
655 /* Change the default uid, gid and/or mask */
656 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
657 hfsmp->hfs_uid = args->hfs_uid;
658 if (vcb->vcbSigWord == kHFSPlusSigWord)
659 ++permfix;
660 }
661 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
662 hfsmp->hfs_gid = args->hfs_gid;
663 if (vcb->vcbSigWord == kHFSPlusSigWord)
664 ++permfix;
665 }
666 if (args->hfs_mask != (mode_t)VNOVAL) {
667 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
668 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
669 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
670 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
671 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
672 if (vcb->vcbSigWord == kHFSPlusSigWord)
673 ++permfix;
674 }
675 }
676
677 /* Change the hfs encoding value (hfs only) */
678 if ((vcb->vcbSigWord == kHFSSigWord) &&
679 (args->hfs_encoding != (u_int32_t)VNOVAL) &&
680 (hfsmp->hfs_encoding != args->hfs_encoding)) {
681
682 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
683 if (retval)
684 goto exit;
685
686 /*
687 * Connect the new hfs_get_unicode converter but leave
688 * the old hfs_get_hfsname converter in place so that
689 * we can lookup existing vnodes to get their correctly
690 * encoded names.
691 *
692 * When we're all finished, we can then connect the new
693 * hfs_get_hfsname converter and release our interest
694 * in the old converters.
695 */
696 hfsmp->hfs_get_unicode = get_unicode_func;
697 old_encoding = hfsmp->hfs_encoding;
698 hfsmp->hfs_encoding = args->hfs_encoding;
699 ++namefix;
700 }
701
702 if (!(namefix || permfix || permswitch))
703 goto exit;
704
705 /* XXX 3762912 hack to support HFS filesystem 'owner' */
706 if (permfix)
707 vfs_setowner(mp,
708 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
709 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
710
711 /*
712 * For each active vnode fix things that changed
713 *
714 * Note that we can visit a vnode more than once
715 * and we can race with fsync.
716 *
717 * hfs_changefs_callback will be called for each vnode
718 * hung off of this mount point
719 *
720 * The vnode will be properly referenced and unreferenced
721 * around the callback
722 */
723 cargs.hfsmp = hfsmp;
724 cargs.namefix = namefix;
725 cargs.permfix = permfix;
726 cargs.permswitch = permswitch;
727
728 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
729
730 /*
731 * If we're switching name converters we can now
732 * connect the new hfs_get_hfsname converter and
733 * release our interest in the old converters.
734 */
735 if (namefix) {
736 hfsmp->hfs_get_hfsname = get_hfsname_func;
737 vcb->volumeNameEncodingHint = args->hfs_encoding;
738 (void) hfs_relconverter(old_encoding);
739 }
740 exit:
741 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
742 return (retval);
743 }
744
745
746 struct hfs_reload_cargs {
747 struct hfsmount *hfsmp;
748 int error;
749 };
750
751 static int
752 hfs_reload_callback(struct vnode *vp, void *cargs)
753 {
754 struct cnode *cp;
755 struct hfs_reload_cargs *args;
756 int lockflags;
757
758 args = (struct hfs_reload_cargs *)cargs;
759 /*
760 * flush all the buffers associated with this node
761 */
762 (void) buf_invalidateblks(vp, 0, 0, 0);
763
764 cp = VTOC(vp);
765 /*
766 * Remove any directory hints
767 */
768 if (vnode_isdir(vp))
769 hfs_reldirhints(cp, 0);
770
771 /*
772 * Re-read cnode data for all active vnodes (non-metadata files).
773 */
774 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
775 struct cat_fork *datafork;
776 struct cat_desc desc;
777
778 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
779
780 /* lookup by fileID since name could have changed */
781 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
782 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
783 hfs_systemfile_unlock(args->hfsmp, lockflags);
784 if (args->error) {
785 return (VNODE_RETURNED_DONE);
786 }
787
788 /* update cnode's catalog descriptor */
789 (void) replace_desc(cp, &desc);
790 }
791 return (VNODE_RETURNED);
792 }
793
794 /*
795 * Reload all incore data for a filesystem (used after running fsck on
796 * the root filesystem and finding things to fix). The filesystem must
797 * be mounted read-only.
798 *
799 * Things to do to update the mount:
800 * invalidate all cached meta-data.
801 * invalidate all inactive vnodes.
802 * invalidate all cached file data.
803 * re-read volume header from disk.
804 * re-load meta-file info (extents, file size).
805 * re-load B-tree header data.
806 * re-read cnode data for all active vnodes.
807 */
808 int
809 hfs_reload(struct mount *mountp)
810 {
811 register struct vnode *devvp;
812 struct buf *bp;
813 int error, i;
814 struct hfsmount *hfsmp;
815 struct HFSPlusVolumeHeader *vhp;
816 ExtendedVCB *vcb;
817 struct filefork *forkp;
818 struct cat_desc cndesc;
819 struct hfs_reload_cargs args;
820 daddr64_t priIDSector;
821
822 hfsmp = VFSTOHFS(mountp);
823 vcb = HFSTOVCB(hfsmp);
824
825 if (vcb->vcbSigWord == kHFSSigWord)
826 return (EINVAL); /* rooting from HFS is not supported! */
827
828 /*
829 * Invalidate all cached meta-data.
830 */
831 devvp = hfsmp->hfs_devvp;
832 if (buf_invalidateblks(devvp, 0, 0, 0))
833 panic("hfs_reload: dirty1");
834
835 args.hfsmp = hfsmp;
836 args.error = 0;
837 /*
838 * hfs_reload_callback will be called for each vnode
839 * hung off of this mount point that can't be recycled...
840 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
841 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
842 * properly referenced and unreferenced around the callback
843 */
844 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
845
846 if (args.error)
847 return (args.error);
848
849 /*
850 * Re-read VolumeHeader from disk.
851 */
852 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
853 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
854
855 error = (int)buf_meta_bread(hfsmp->hfs_devvp,
856 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
857 hfsmp->hfs_physical_block_size, NOCRED, &bp);
858 if (error) {
859 if (bp != NULL)
860 buf_brelse(bp);
861 return (error);
862 }
863
864 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
865
866 /* Do a quick sanity check */
867 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
868 SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
869 (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
870 SWAP_BE16(vhp->version) != kHFSXVersion) ||
871 SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
872 buf_brelse(bp);
873 return (EIO);
874 }
875
876 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
877 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes);
878 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
879 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize);
880 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID);
881 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
882 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount);
883 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount);
884 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount);
885 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
886 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks);
887 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks);
888 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap);
889 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
890 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
891
892 /*
893 * Re-load meta-file vnode data (extent info, file size, etc).
894 */
895 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
896 for (i = 0; i < kHFSPlusExtentDensity; i++) {
897 forkp->ff_extents[i].startBlock =
898 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
899 forkp->ff_extents[i].blockCount =
900 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
901 }
902 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
903 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
904 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
905
906
907 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
908 for (i = 0; i < kHFSPlusExtentDensity; i++) {
909 forkp->ff_extents[i].startBlock =
910 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
911 forkp->ff_extents[i].blockCount =
912 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
913 }
914 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
915 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
916 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
917
918 if (hfsmp->hfs_attribute_vp) {
919 forkp = VTOF(hfsmp->hfs_attribute_vp);
920 for (i = 0; i < kHFSPlusExtentDensity; i++) {
921 forkp->ff_extents[i].startBlock =
922 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
923 forkp->ff_extents[i].blockCount =
924 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
925 }
926 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
927 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
928 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
929 }
930
931 forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
932 for (i = 0; i < kHFSPlusExtentDensity; i++) {
933 forkp->ff_extents[i].startBlock =
934 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
935 forkp->ff_extents[i].blockCount =
936 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
937 }
938 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
939 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
940 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
941
942 buf_brelse(bp);
943 vhp = NULL;
944
945 /*
946 * Re-load B-tree header data
947 */
948 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
949 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
950 return (error);
951
952 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
953 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
954 return (error);
955
956 if (hfsmp->hfs_attribute_vp) {
957 forkp = VTOF(hfsmp->hfs_attribute_vp);
958 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
959 return (error);
960 }
961
962 /* Reload the volume name */
963 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
964 return (error);
965 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
966 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
967 cat_releasedesc(&cndesc);
968
969 /* Re-establish private/hidden directories. */
970 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
971 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
972
973 /* In case any volume information changed to trigger a notification */
974 hfs_generate_volume_notifications(hfsmp);
975
976 return (0);
977 }
978
979
980
981 static void
982 hfs_syncer(void *arg0, void *unused)
983 {
984 #pragma unused(unused)
985
986 struct hfsmount *hfsmp = arg0;
987 clock_sec_t secs;
988 clock_usec_t usecs;
989 uint32_t delay = HFS_META_DELAY;
990 uint64_t now;
991 static int no_max=1;
992
993 clock_get_calendar_microtime(&secs, &usecs);
994 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
995
996 //
997 // If the amount of pending writes is more than our limit, wait
998 // for 2/3 of it to drain and then flush the journal.
999 //
1000 if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1001 int counter=0;
1002 uint64_t pending_io, start, rate;
1003
1004 no_max = 0;
1005
1006 hfs_start_transaction(hfsmp); // so we hold off any new i/o's
1007
1008 pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1009
1010 clock_get_calendar_microtime(&secs, &usecs);
1011 start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1012
1013 while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1014 tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1015 }
1016
1017 if (counter >= 500) {
1018 printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1019 }
1020
1021 if (hfsmp->jnl) {
1022 journal_flush(hfsmp->jnl, FALSE);
1023 } else {
1024 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1025 }
1026
1027 clock_get_calendar_microtime(&secs, &usecs);
1028 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1029 hfsmp->hfs_last_sync_time = now;
1030 rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second
1031
1032 hfs_end_transaction(hfsmp);
1033
1034 //
1035 // If a reasonable amount of time elapsed then check the
1036 // i/o rate. If it's taking less than 1 second or more
1037 // than 2 seconds, adjust hfs_max_pending_io so that we
1038 // will allow about 1.5 seconds of i/o to queue up.
1039 //
1040 if ((now - start) >= 300000) {
1041 uint64_t scale = (pending_io * 100) / rate;
1042
1043 if (scale < 100 || scale > 200) {
1044 // set it so that it should take about 1.5 seconds to drain
1045 hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1046 }
1047 }
1048
1049 } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1050 || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1051 && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1052 && (hfsmp->hfs_active_threads == 0)
1053 && (hfsmp->hfs_global_lock_nesting == 0))) {
1054
1055 //
1056 // Flush the journal if more than 5 seconds elapsed since
1057 // the last sync OR we have not sync'ed recently and the
1058 // last sync request time was more than 100 milliseconds
1059 // ago and no one is in the middle of a transaction right
1060 // now. Else we defer the sync and reschedule it.
1061 //
1062 if (hfsmp->jnl) {
1063 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
1064
1065 journal_flush(hfsmp->jnl, FALSE);
1066
1067 hfs_unlock_global (hfsmp);
1068 } else {
1069 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1070 }
1071
1072 clock_get_calendar_microtime(&secs, &usecs);
1073 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1074 hfsmp->hfs_last_sync_time = now;
1075
1076 } else if (hfsmp->hfs_active_threads == 0) {
1077 uint64_t deadline;
1078
1079 clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1080 thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
1081
1082 // note: we intentionally return early here and do not
1083 // decrement the sync_scheduled and sync_incomplete
1084 // variables because we rescheduled the timer.
1085
1086 return;
1087 }
1088
1089 //
1090 // NOTE: we decrement these *after* we're done the journal_flush() since
1091 // it can take a significant amount of time and so we don't want more
1092 // callbacks scheduled until we're done this one.
1093 //
1094 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1095 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1096 wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1097 }
1098
1099
1100 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1101
1102 /*
1103 * Initialization code for Red-Black Tree Allocator
1104 *
1105 * This function will build the two red-black trees necessary for allocating space
1106 * from the metadata zone as well as normal allocations. Currently, we use
1107 * an advisory read to get most of the data into the buffer cache.
1108 * This function is intended to be run in a separate thread so as not to slow down mount.
1109 *
1110 */
1111
1112 void
1113 hfs_initialize_allocator (struct hfsmount *hfsmp) {
1114
1115 #if CONFIG_HFS_ALLOC_RBTREE
1116 u_int32_t err;
1117
1118 /*
1119 * Take the allocation file lock. Journal transactions will block until
1120 * we're done here.
1121 */
1122 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1123
1124 /*
1125 * GenerateTree assumes that the bitmap lock is held when you call the function.
1126 * It will drop and re-acquire the lock periodically as needed to let other allocations
1127 * through. It returns with the bitmap lock held. Since we only maintain one tree,
1128 * we don't need to specify a start block (always starts at 0).
1129 */
1130 err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1131 if (err) {
1132 goto bailout;
1133 }
1134 /* Mark offset tree as built */
1135 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1136
1137 bailout:
1138 /*
1139 * GenerateTree may drop the bitmap lock during operation in order to give other
1140 * threads a chance to allocate blocks, but it will always return with the lock held, so
1141 * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1142 */
1143 hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1144 if (err != 0) {
1145 /* Wakeup any waiters on the allocation bitmap lock */
1146 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1147 }
1148
1149 hfs_systemfile_unlock(hfsmp, flags);
1150 #else
1151 #pragma unused (hfsmp)
1152 #endif
1153 }
1154
1155
1156 /*
1157 * Teardown code for the Red-Black Tree allocator.
1158 * This function consolidates the code which serializes with respect
1159 * to a thread that may be potentially still building the tree when we need to begin
1160 * tearing it down. Since the red-black tree may not be live when we enter this function
1161 * we return:
1162 * 1 -> Tree was live.
1163 * 0 -> Tree was not active at time of call.
1164 */
1165
1166 int
1167 hfs_teardown_allocator (struct hfsmount *hfsmp) {
1168 int rb_used = 0;
1169
1170 #if CONFIG_HFS_ALLOC_RBTREE
1171
1172 int flags = 0;
1173
1174 /*
1175 * Check to see if the tree-generation is still on-going.
1176 * If it is, then block until it's done.
1177 */
1178
1179 flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1180
1181
1182 while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1183 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1184
1185 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1186 &hfsmp->extent_tree_flags, THREAD_UNINT);
1187 }
1188
1189 if (hfs_isrbtree_active (hfsmp)) {
1190 rb_used = 1;
1191
1192 /* Tear down the RB Trees while we have the bitmap locked */
1193 DestroyTrees(hfsmp);
1194
1195 }
1196
1197 hfs_systemfile_unlock(hfsmp, flags);
1198 #else
1199 #pragma unused (hfsmp)
1200 #endif
1201 return rb_used;
1202
1203 }
1204
1205
1206 static int hfs_root_unmounted_cleanly = 0;
1207
1208 SYSCTL_DECL(_vfs_generic);
1209 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1210
1211 /*
1212 * Common code for mount and mountroot
1213 */
1214 int
1215 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1216 int journal_replay_only, vfs_context_t context)
1217 {
1218 struct proc *p = vfs_context_proc(context);
1219 int retval = E_NONE;
1220 struct hfsmount *hfsmp = NULL;
1221 struct buf *bp;
1222 dev_t dev;
1223 HFSMasterDirectoryBlock *mdbp = NULL;
1224 int ronly;
1225 #if QUOTA
1226 int i;
1227 #endif
1228 int mntwrapper;
1229 kauth_cred_t cred;
1230 u_int64_t disksize;
1231 daddr64_t log_blkcnt;
1232 u_int32_t log_blksize;
1233 u_int32_t phys_blksize;
1234 u_int32_t minblksize;
1235 u_int32_t iswritable;
1236 daddr64_t mdb_offset;
1237 int isvirtual = 0;
1238 int isroot = 0;
1239 int isssd;
1240 #if CONFIG_HFS_ALLOC_RBTREE
1241 thread_t allocator_thread;
1242 #endif
1243
1244 if (args == NULL) {
1245 /* only hfs_mountroot passes us NULL as the 'args' argument */
1246 isroot = 1;
1247 }
1248
1249 ronly = vfs_isrdonly(mp);
1250 dev = vnode_specrdev(devvp);
1251 cred = p ? vfs_context_ucred(context) : NOCRED;
1252 mntwrapper = 0;
1253
1254 bp = NULL;
1255 hfsmp = NULL;
1256 mdbp = NULL;
1257 minblksize = kHFSBlockSize;
1258
1259 /* Advisory locking should be handled at the VFS layer */
1260 vfs_setlocklocal(mp);
1261
1262 /* Get the logical block size (treated as physical block size everywhere) */
1263 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1264 if (HFS_MOUNT_DEBUG) {
1265 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1266 }
1267 retval = ENXIO;
1268 goto error_exit;
1269 }
1270 if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1271 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize);
1272 retval = ENXIO;
1273 goto error_exit;
1274 }
1275
1276 /* Get the physical block size. */
1277 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1278 if (retval) {
1279 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1280 if (HFS_MOUNT_DEBUG) {
1281 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1282 }
1283 retval = ENXIO;
1284 goto error_exit;
1285 }
1286 /* If device does not support this ioctl, assume that physical
1287 * block size is same as logical block size
1288 */
1289 phys_blksize = log_blksize;
1290 }
1291 if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1292 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize);
1293 retval = ENXIO;
1294 goto error_exit;
1295 }
1296
1297 /* Switch to 512 byte sectors (temporarily) */
1298 if (log_blksize > 512) {
1299 u_int32_t size512 = 512;
1300
1301 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1302 if (HFS_MOUNT_DEBUG) {
1303 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1304 }
1305 retval = ENXIO;
1306 goto error_exit;
1307 }
1308 }
1309 /* Get the number of 512 byte physical blocks. */
1310 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1311 /* resetting block size may fail if getting block count did */
1312 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1313 if (HFS_MOUNT_DEBUG) {
1314 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1315 }
1316 retval = ENXIO;
1317 goto error_exit;
1318 }
1319 /* Compute an accurate disk size (i.e. within 512 bytes) */
1320 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1321
1322 /*
1323 * On Tiger it is not necessary to switch the device
1324 * block size to be 4k if there are more than 31-bits
1325 * worth of blocks but to insure compatibility with
1326 * pre-Tiger systems we have to do it.
1327 *
1328 * If the device size is not a multiple of 4K (8 * 512), then
1329 * switching the logical block size isn't going to help because
1330 * we will be unable to write the alternate volume header.
1331 * In this case, just leave the logical block size unchanged.
1332 */
1333 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1334 minblksize = log_blksize = 4096;
1335 if (phys_blksize < log_blksize)
1336 phys_blksize = log_blksize;
1337 }
1338
1339 /*
1340 * The cluster layer is not currently prepared to deal with a logical
1341 * block size larger than the system's page size. (It can handle
1342 * blocks per page, but not multiple pages per block.) So limit the
1343 * logical block size to the page size.
1344 */
1345 if (log_blksize > PAGE_SIZE)
1346 log_blksize = PAGE_SIZE;
1347
1348 /* Now switch to our preferred physical block size. */
1349 if (log_blksize > 512) {
1350 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1351 if (HFS_MOUNT_DEBUG) {
1352 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1353 }
1354 retval = ENXIO;
1355 goto error_exit;
1356 }
1357 /* Get the count of physical blocks. */
1358 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1359 if (HFS_MOUNT_DEBUG) {
1360 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1361 }
1362 retval = ENXIO;
1363 goto error_exit;
1364 }
1365 }
1366 /*
1367 * At this point:
1368 * minblksize is the minimum physical block size
1369 * log_blksize has our preferred physical block size
1370 * log_blkcnt has the total number of physical blocks
1371 */
1372
1373 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1374 if ((retval = (int)buf_meta_bread(devvp,
1375 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1376 phys_blksize, cred, &bp))) {
1377 if (HFS_MOUNT_DEBUG) {
1378 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1379 }
1380 goto error_exit;
1381 }
1382 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1383 if (mdbp == NULL) {
1384 retval = ENOMEM;
1385 if (HFS_MOUNT_DEBUG) {
1386 printf("hfs_mountfs: MALLOC failed\n");
1387 }
1388 goto error_exit;
1389 }
1390 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1391 buf_brelse(bp);
1392 bp = NULL;
1393
1394 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1395 if (hfsmp == NULL) {
1396 if (HFS_MOUNT_DEBUG) {
1397 printf("hfs_mountfs: MALLOC (2) failed\n");
1398 }
1399 retval = ENOMEM;
1400 goto error_exit;
1401 }
1402 bzero(hfsmp, sizeof(struct hfsmount));
1403
1404 hfs_chashinit_finish(hfsmp);
1405
1406 /*
1407 * See if the disk is a solid state device. We need this to decide what to do about
1408 * hotfiles.
1409 */
1410 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1411 if (isssd) {
1412 hfsmp->hfs_flags |= HFS_SSD;
1413 }
1414 }
1415
1416
1417 /*
1418 * Init the volume information structure
1419 */
1420
1421 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1422 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1423 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1424 lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1425 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1426
1427 vfs_setfsprivate(mp, hfsmp);
1428 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */
1429 hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1430 hfsmp->hfs_devvp = devvp;
1431 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
1432 hfsmp->hfs_logical_block_size = log_blksize;
1433 hfsmp->hfs_logical_block_count = log_blkcnt;
1434 hfsmp->hfs_physical_block_size = phys_blksize;
1435 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1436 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1437 if (ronly)
1438 hfsmp->hfs_flags |= HFS_READ_ONLY;
1439 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1440 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1441
1442 #if QUOTA
1443 for (i = 0; i < MAXQUOTAS; i++)
1444 dqfileinit(&hfsmp->hfs_qfiles[i]);
1445 #endif
1446
1447 if (args) {
1448 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1449 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1450 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1451 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1452 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1453 if (args->hfs_mask != (mode_t)VNOVAL) {
1454 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1455 if (args->flags & HFSFSMNT_NOXONFILES) {
1456 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1457 } else {
1458 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1459 }
1460 } else {
1461 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1462 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1463 }
1464 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1465 mntwrapper = 1;
1466 } else {
1467 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1468 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1469 hfsmp->hfs_uid = UNKNOWNUID;
1470 hfsmp->hfs_gid = UNKNOWNGID;
1471 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1472 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1473 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1474 }
1475 }
1476
1477 /* Find out if disk media is writable. */
1478 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1479 if (iswritable)
1480 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1481 else
1482 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1483 }
1484
1485 // record the current time at which we're mounting this volume
1486 struct timeval tv;
1487 microtime(&tv);
1488 hfsmp->hfs_mount_time = tv.tv_sec;
1489
1490 /* Mount a standard HFS disk */
1491 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1492 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1493
1494 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1495 if (vfs_isrdwr(mp)) {
1496 retval = EROFS;
1497 goto error_exit;
1498 }
1499
1500 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1501
1502 /* Treat it as if it's read-only and not writeable */
1503 hfsmp->hfs_flags |= HFS_READ_ONLY;
1504 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1505
1506 /* If only journal replay is requested, exit immediately */
1507 if (journal_replay_only) {
1508 retval = 0;
1509 goto error_exit;
1510 }
1511
1512 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1513 retval = EINVAL; /* Cannot root from HFS standard disks */
1514 goto error_exit;
1515 }
1516 /* HFS disks can only use 512 byte physical blocks */
1517 if (log_blksize > kHFSBlockSize) {
1518 log_blksize = kHFSBlockSize;
1519 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1520 retval = ENXIO;
1521 goto error_exit;
1522 }
1523 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1524 retval = ENXIO;
1525 goto error_exit;
1526 }
1527 hfsmp->hfs_logical_block_size = log_blksize;
1528 hfsmp->hfs_logical_block_count = log_blkcnt;
1529 hfsmp->hfs_physical_block_size = log_blksize;
1530 hfsmp->hfs_log_per_phys = 1;
1531 }
1532 if (args) {
1533 hfsmp->hfs_encoding = args->hfs_encoding;
1534 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1535
1536 /* establish the timezone */
1537 gTimeZone = args->hfs_timezone;
1538 }
1539
1540 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1541 &hfsmp->hfs_get_hfsname);
1542 if (retval)
1543 goto error_exit;
1544
1545 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1546 if (retval)
1547 (void) hfs_relconverter(hfsmp->hfs_encoding);
1548
1549 } else /* Mount an HFS Plus disk */ {
1550 HFSPlusVolumeHeader *vhp;
1551 off_t embeddedOffset;
1552 int jnl_disable = 0;
1553
1554 /* Get the embedded Volume Header */
1555 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1556 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1557 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1558 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1559
1560 /*
1561 * If the embedded volume doesn't start on a block
1562 * boundary, then switch the device to a 512-byte
1563 * block size so everything will line up on a block
1564 * boundary.
1565 */
1566 if ((embeddedOffset % log_blksize) != 0) {
1567 printf("hfs_mountfs: embedded volume offset not"
1568 " a multiple of physical block size (%d);"
1569 " switching to 512\n", log_blksize);
1570 log_blksize = 512;
1571 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1572 (caddr_t)&log_blksize, FWRITE, context)) {
1573
1574 if (HFS_MOUNT_DEBUG) {
1575 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1576 }
1577 retval = ENXIO;
1578 goto error_exit;
1579 }
1580 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1581 (caddr_t)&log_blkcnt, 0, context)) {
1582 if (HFS_MOUNT_DEBUG) {
1583 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1584 }
1585 retval = ENXIO;
1586 goto error_exit;
1587 }
1588 /* Note: relative block count adjustment */
1589 hfsmp->hfs_logical_block_count *=
1590 hfsmp->hfs_logical_block_size / log_blksize;
1591
1592 /* Update logical /physical block size */
1593 hfsmp->hfs_logical_block_size = log_blksize;
1594 hfsmp->hfs_physical_block_size = log_blksize;
1595 phys_blksize = log_blksize;
1596 hfsmp->hfs_log_per_phys = 1;
1597 }
1598
1599 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1600 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1601
1602 hfsmp->hfs_logical_block_count = disksize / log_blksize;
1603
1604 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1605 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1606 phys_blksize, cred, &bp);
1607 if (retval) {
1608 if (HFS_MOUNT_DEBUG) {
1609 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1610 }
1611 goto error_exit;
1612 }
1613 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1614 buf_brelse(bp);
1615 bp = NULL;
1616 vhp = (HFSPlusVolumeHeader*) mdbp;
1617
1618 } else /* pure HFS+ */ {
1619 embeddedOffset = 0;
1620 vhp = (HFSPlusVolumeHeader*) mdbp;
1621 }
1622
1623 if (isroot) {
1624 hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
1625 }
1626
1627 /*
1628 * On inconsistent disks, do not allow read-write mount
1629 * unless it is the boot volume being mounted. We also
1630 * always want to replay the journal if the journal_replay_only
1631 * flag is set because that will (most likely) get the
1632 * disk into a consistent state before fsck_hfs starts
1633 * looking at it.
1634 */
1635 if ( !(vfs_flags(mp) & MNT_ROOTFS)
1636 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1637 && !journal_replay_only
1638 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1639
1640 if (HFS_MOUNT_DEBUG) {
1641 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1642 }
1643 retval = EINVAL;
1644 goto error_exit;
1645 }
1646
1647
1648 // XXXdbg
1649 //
1650 hfsmp->jnl = NULL;
1651 hfsmp->jvp = NULL;
1652 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1653 args->journal_disable) {
1654 jnl_disable = 1;
1655 }
1656
1657 //
1658 // We only initialize the journal here if the last person
1659 // to mount this volume was journaling aware. Otherwise
1660 // we delay journal initialization until later at the end
1661 // of hfs_MountHFSPlusVolume() because the last person who
1662 // mounted it could have messed things up behind our back
1663 // (so we need to go find the .journal file, make sure it's
1664 // the right size, re-sync up if it was moved, etc).
1665 //
1666 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1667 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1668 && !jnl_disable) {
1669
1670 // if we're able to init the journal, mark the mount
1671 // point as journaled.
1672 //
1673 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1674 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1675 } else {
1676 if (retval == EROFS) {
1677 // EROFS is a special error code that means the volume has an external
1678 // journal which we couldn't find. in that case we do not want to
1679 // rewrite the volume header - we'll just refuse to mount the volume.
1680 if (HFS_MOUNT_DEBUG) {
1681 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1682 }
1683 retval = EINVAL;
1684 goto error_exit;
1685 }
1686
1687 // if the journal failed to open, then set the lastMountedVersion
1688 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1689 // of just bailing out because the volume is journaled.
1690 if (!ronly) {
1691 if (HFS_MOUNT_DEBUG) {
1692 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1693 }
1694
1695 HFSPlusVolumeHeader *jvhp;
1696
1697 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1698
1699 if (mdb_offset == 0) {
1700 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1701 }
1702
1703 bp = NULL;
1704 retval = (int)buf_meta_bread(devvp,
1705 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1706 phys_blksize, cred, &bp);
1707 if (retval == 0) {
1708 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1709
1710 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1711 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
1712 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1713 buf_bwrite(bp);
1714 } else {
1715 buf_brelse(bp);
1716 }
1717 bp = NULL;
1718 } else if (bp) {
1719 buf_brelse(bp);
1720 // clear this so the error exit path won't try to use it
1721 bp = NULL;
1722 }
1723 }
1724
1725 // if this isn't the root device just bail out.
1726 // If it is the root device we just continue on
1727 // in the hopes that fsck_hfs will be able to
1728 // fix any damage that exists on the volume.
1729 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1730 if (HFS_MOUNT_DEBUG) {
1731 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1732 }
1733 retval = EINVAL;
1734 goto error_exit;
1735 }
1736 }
1737 }
1738 // XXXdbg
1739
1740 /* Either the journal is replayed successfully, or there
1741 * was nothing to replay, or no journal exists. In any case,
1742 * return success.
1743 */
1744 if (journal_replay_only) {
1745 retval = 0;
1746 goto error_exit;
1747 }
1748
1749 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1750
1751 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1752 /*
1753 * If the backend didn't like our physical blocksize
1754 * then retry with physical blocksize of 512.
1755 */
1756 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1757 printf("hfs_mountfs: could not use physical block size "
1758 "(%d) switching to 512\n", log_blksize);
1759 log_blksize = 512;
1760 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1761 if (HFS_MOUNT_DEBUG) {
1762 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1763 }
1764 retval = ENXIO;
1765 goto error_exit;
1766 }
1767 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1768 if (HFS_MOUNT_DEBUG) {
1769 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1770 }
1771 retval = ENXIO;
1772 goto error_exit;
1773 }
1774 devvp->v_specsize = log_blksize;
1775 /* Note: relative block count adjustment (in case this is an embedded volume). */
1776 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1777 hfsmp->hfs_logical_block_size = log_blksize;
1778 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1779
1780 if (hfsmp->jnl && hfsmp->jvp == devvp) {
1781 // close and re-open this with the new block size
1782 journal_close(hfsmp->jnl);
1783 hfsmp->jnl = NULL;
1784 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1785 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1786 } else {
1787 // if the journal failed to open, then set the lastMountedVersion
1788 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1789 // of just bailing out because the volume is journaled.
1790 if (!ronly) {
1791 if (HFS_MOUNT_DEBUG) {
1792 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1793 }
1794 HFSPlusVolumeHeader *jvhp;
1795
1796 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1797
1798 if (mdb_offset == 0) {
1799 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1800 }
1801
1802 bp = NULL;
1803 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1804 phys_blksize, cred, &bp);
1805 if (retval == 0) {
1806 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1807
1808 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1809 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
1810 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1811 buf_bwrite(bp);
1812 } else {
1813 buf_brelse(bp);
1814 }
1815 bp = NULL;
1816 } else if (bp) {
1817 buf_brelse(bp);
1818 // clear this so the error exit path won't try to use it
1819 bp = NULL;
1820 }
1821 }
1822
1823 // if this isn't the root device just bail out.
1824 // If it is the root device we just continue on
1825 // in the hopes that fsck_hfs will be able to
1826 // fix any damage that exists on the volume.
1827 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1828 if (HFS_MOUNT_DEBUG) {
1829 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1830 }
1831 retval = EINVAL;
1832 goto error_exit;
1833 }
1834 }
1835 }
1836
1837 /* Try again with a smaller block size... */
1838 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1839 if (retval && HFS_MOUNT_DEBUG) {
1840 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1841 }
1842 }
1843 if (retval)
1844 (void) hfs_relconverter(0);
1845 }
1846
1847 // save off a snapshot of the mtime from the previous mount
1848 // (for matador).
1849 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1850
1851 if ( retval ) {
1852 if (HFS_MOUNT_DEBUG) {
1853 printf("hfs_mountfs: encountered failure %d \n", retval);
1854 }
1855 goto error_exit;
1856 }
1857
1858 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1859 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1860 vfs_setmaxsymlen(mp, 0);
1861
1862 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1863 #if NAMEDSTREAMS
1864 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1865 #endif
1866 if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1867 /* Tell VFS that we support directory hard links. */
1868 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1869 } else {
1870 /* HFS standard doesn't support extended readdir! */
1871 mount_set_noreaddirext (mp);
1872 }
1873
1874 if (args) {
1875 /*
1876 * Set the free space warning levels for a non-root volume:
1877 *
1878 * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1879 * is less. Set the "warning" limit to 2% of the volume size or 150MB,
1880 * whichever is less. And last, set the "desired" freespace level to
1881 * to 3% of the volume size or 200MB, whichever is less.
1882 */
1883 hfsmp->hfs_freespace_notify_dangerlimit =
1884 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1885 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1886 hfsmp->hfs_freespace_notify_warninglimit =
1887 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1888 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1889 hfsmp->hfs_freespace_notify_desiredlevel =
1890 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1891 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1892 } else {
1893 /*
1894 * Set the free space warning levels for the root volume:
1895 *
1896 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1897 * is less. Set the "warning" limit to 10% of the volume size or 1GB,
1898 * whichever is less. And last, set the "desired" freespace level to
1899 * to 11% of the volume size or 1.25GB, whichever is less.
1900 */
1901 hfsmp->hfs_freespace_notify_dangerlimit =
1902 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1903 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1904 hfsmp->hfs_freespace_notify_warninglimit =
1905 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1906 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1907 hfsmp->hfs_freespace_notify_desiredlevel =
1908 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1909 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1910 };
1911
1912 /* Check if the file system exists on virtual device, like disk image */
1913 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1914 if (isvirtual) {
1915 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1916 }
1917 }
1918
1919 /* do not allow ejectability checks on the root device */
1920 if (isroot == 0) {
1921 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1922 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1923 hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with.
1924 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1925 if (hfsmp->hfs_syncer == NULL) {
1926 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1927 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1928 }
1929 }
1930 }
1931
1932 #if CONFIG_HFS_ALLOC_RBTREE
1933 /*
1934 * We spawn a thread to create the pair of red-black trees for this volume.
1935 * However, in so doing, we must be careful to ensure that if this thread is still
1936 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
1937 * we'll need to set a bit that indicates we're in progress building the trees here.
1938 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
1939 * notifies the tree generation code that an unmount is waiting. Also mark the bit that
1940 * indicates the tree is live and operating.
1941 *
1942 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
1943 */
1944
1945 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
1946 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
1947
1948 /* Initialize EOF counter so that the thread can assume it started at initial values */
1949 hfsmp->offset_block_end = 0;
1950 InitTree(hfsmp);
1951
1952 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
1953 thread_deallocate(allocator_thread);
1954 }
1955
1956 #endif
1957
1958 /*
1959 * Start looking for free space to drop below this level and generate a
1960 * warning immediately if needed:
1961 */
1962 hfsmp->hfs_notification_conditions = 0;
1963 hfs_generate_volume_notifications(hfsmp);
1964
1965 if (ronly == 0) {
1966 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1967 }
1968 FREE(mdbp, M_TEMP);
1969 return (0);
1970
1971 error_exit:
1972 if (bp)
1973 buf_brelse(bp);
1974 if (mdbp)
1975 FREE(mdbp, M_TEMP);
1976
1977 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1978 vnode_clearmountedon(hfsmp->jvp);
1979 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1980 hfsmp->jvp = NULL;
1981 }
1982 if (hfsmp) {
1983 if (hfsmp->hfs_devvp) {
1984 vnode_rele(hfsmp->hfs_devvp);
1985 }
1986 hfs_delete_chash(hfsmp);
1987
1988 FREE(hfsmp, M_HFSMNT);
1989 vfs_setfsprivate(mp, NULL);
1990 }
1991 return (retval);
1992 }
1993
1994
1995 /*
1996 * Make a filesystem operational.
1997 * Nothing to do at the moment.
1998 */
1999 /* ARGSUSED */
2000 static int
2001 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2002 {
2003 return (0);
2004 }
2005
2006
2007 /*
2008 * unmount system call
2009 */
2010 int
2011 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2012 {
2013 struct proc *p = vfs_context_proc(context);
2014 struct hfsmount *hfsmp = VFSTOHFS(mp);
2015 int retval = E_NONE;
2016 int flags;
2017 int force;
2018 int started_tr = 0;
2019 int rb_used = 0;
2020
2021 flags = 0;
2022 force = 0;
2023 if (mntflags & MNT_FORCE) {
2024 flags |= FORCECLOSE;
2025 force = 1;
2026 }
2027
2028 if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2029 return (retval);
2030
2031 if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2032 (void) hfs_recording_suspend(hfsmp);
2033
2034 /*
2035 * Cancel any pending timers for this volume. Then wait for any timers
2036 * which have fired, but whose callbacks have not yet completed.
2037 */
2038 if (hfsmp->hfs_syncer)
2039 {
2040 struct timespec ts = {0, 100000000}; /* 0.1 seconds */
2041
2042 /*
2043 * Cancel any timers that have been scheduled, but have not
2044 * fired yet. NOTE: The kernel considers a timer complete as
2045 * soon as it starts your callback, so the kernel does not
2046 * keep track of the number of callbacks in progress.
2047 */
2048 if (thread_call_cancel(hfsmp->hfs_syncer))
2049 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2050 thread_call_free(hfsmp->hfs_syncer);
2051 hfsmp->hfs_syncer = NULL;
2052
2053 /*
2054 * This waits for all of the callbacks that were entered before
2055 * we did thread_call_cancel above, but have not completed yet.
2056 */
2057 while(hfsmp->hfs_sync_incomplete > 0)
2058 {
2059 msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2060 }
2061
2062 if (hfsmp->hfs_sync_incomplete < 0)
2063 panic("hfs_unmount: pm_sync_incomplete underflow!\n");
2064 }
2065
2066 #if CONFIG_HFS_ALLOC_RBTREE
2067 rb_used = hfs_teardown_allocator(hfsmp);
2068 #endif
2069
2070 /*
2071 * Flush out the b-trees, volume bitmap and Volume Header
2072 */
2073 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2074 retval = hfs_start_transaction(hfsmp);
2075 if (retval == 0) {
2076 started_tr = 1;
2077 } else if (!force) {
2078 goto err_exit;
2079 }
2080
2081 if (hfsmp->hfs_startup_vp) {
2082 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2083 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2084 hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2085 if (retval && !force)
2086 goto err_exit;
2087 }
2088
2089 if (hfsmp->hfs_attribute_vp) {
2090 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2091 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2092 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2093 if (retval && !force)
2094 goto err_exit;
2095 }
2096
2097 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2098 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2099 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2100 if (retval && !force)
2101 goto err_exit;
2102
2103 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2104 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2105 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2106 if (retval && !force)
2107 goto err_exit;
2108
2109 if (hfsmp->hfs_allocation_vp) {
2110 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2111 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2112 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2113 if (retval && !force)
2114 goto err_exit;
2115 }
2116
2117 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2118 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2119 if (retval && !force)
2120 goto err_exit;
2121 }
2122
2123 /* If runtime corruption was detected, indicate that the volume
2124 * was not unmounted cleanly.
2125 */
2126 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2127 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2128 } else {
2129 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2130 }
2131
2132
2133 if (rb_used) {
2134 /* If the rb-tree was live, just set min_start to 0 */
2135 hfsmp->nextAllocation = 0;
2136 }
2137 else {
2138 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2139 int i;
2140 u_int32_t min_start = hfsmp->totalBlocks;
2141
2142 // set the nextAllocation pointer to the smallest free block number
2143 // we've seen so on the next mount we won't rescan unnecessarily
2144 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2145 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2146 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2147 min_start = hfsmp->vcbFreeExt[i].startBlock;
2148 }
2149 }
2150 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2151 if (min_start < hfsmp->nextAllocation) {
2152 hfsmp->nextAllocation = min_start;
2153 }
2154 }
2155 }
2156
2157
2158 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2159 if (retval) {
2160 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2161 if (!force)
2162 goto err_exit; /* could not flush everything */
2163 }
2164
2165 if (started_tr) {
2166 hfs_end_transaction(hfsmp);
2167 started_tr = 0;
2168 }
2169 }
2170
2171 if (hfsmp->jnl) {
2172 hfs_journal_flush(hfsmp, FALSE);
2173 }
2174
2175 /*
2176 * Invalidate our caches and release metadata vnodes
2177 */
2178 (void) hfsUnmount(hfsmp, p);
2179
2180 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2181 (void) hfs_relconverter(hfsmp->hfs_encoding);
2182
2183 // XXXdbg
2184 if (hfsmp->jnl) {
2185 journal_close(hfsmp->jnl);
2186 hfsmp->jnl = NULL;
2187 }
2188
2189 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2190
2191 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2192 vnode_clearmountedon(hfsmp->jvp);
2193 retval = VNOP_CLOSE(hfsmp->jvp,
2194 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2195 vfs_context_kernel());
2196 vnode_put(hfsmp->jvp);
2197 hfsmp->jvp = NULL;
2198 }
2199 // XXXdbg
2200
2201 /*
2202 * Last chance to dump unreferenced system files.
2203 */
2204 (void) vflush(mp, NULLVP, FORCECLOSE);
2205
2206 #if HFS_SPARSE_DEV
2207 /* Drop our reference on the backing fs (if any). */
2208 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2209 struct vnode * tmpvp;
2210
2211 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2212 tmpvp = hfsmp->hfs_backingfs_rootvp;
2213 hfsmp->hfs_backingfs_rootvp = NULLVP;
2214 vnode_rele(tmpvp);
2215 }
2216 #endif /* HFS_SPARSE_DEV */
2217 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2218 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2219 vnode_rele(hfsmp->hfs_devvp);
2220
2221 hfs_delete_chash(hfsmp);
2222 FREE(hfsmp, M_HFSMNT);
2223
2224 return (0);
2225
2226 err_exit:
2227 if (started_tr) {
2228 hfs_end_transaction(hfsmp);
2229 }
2230 return retval;
2231 }
2232
2233
2234 /*
2235 * Return the root of a filesystem.
2236 */
2237 static int
2238 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2239 {
2240 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2241 }
2242
2243
2244 /*
2245 * Do operations associated with quotas
2246 */
2247 #if !QUOTA
2248 static int
2249 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2250 {
2251 return (ENOTSUP);
2252 }
2253 #else
2254 static int
2255 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2256 {
2257 struct proc *p = vfs_context_proc(context);
2258 int cmd, type, error;
2259
2260 if (uid == ~0U)
2261 uid = kauth_cred_getuid(vfs_context_ucred(context));
2262 cmd = cmds >> SUBCMDSHIFT;
2263
2264 switch (cmd) {
2265 case Q_SYNC:
2266 case Q_QUOTASTAT:
2267 break;
2268 case Q_GETQUOTA:
2269 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2270 break;
2271 /* fall through */
2272 default:
2273 if ( (error = vfs_context_suser(context)) )
2274 return (error);
2275 }
2276
2277 type = cmds & SUBCMDMASK;
2278 if ((u_int)type >= MAXQUOTAS)
2279 return (EINVAL);
2280 if (vfs_busy(mp, LK_NOWAIT))
2281 return (0);
2282
2283 switch (cmd) {
2284
2285 case Q_QUOTAON:
2286 error = hfs_quotaon(p, mp, type, datap);
2287 break;
2288
2289 case Q_QUOTAOFF:
2290 error = hfs_quotaoff(p, mp, type);
2291 break;
2292
2293 case Q_SETQUOTA:
2294 error = hfs_setquota(mp, uid, type, datap);
2295 break;
2296
2297 case Q_SETUSE:
2298 error = hfs_setuse(mp, uid, type, datap);
2299 break;
2300
2301 case Q_GETQUOTA:
2302 error = hfs_getquota(mp, uid, type, datap);
2303 break;
2304
2305 case Q_SYNC:
2306 error = hfs_qsync(mp);
2307 break;
2308
2309 case Q_QUOTASTAT:
2310 error = hfs_quotastat(mp, type, datap);
2311 break;
2312
2313 default:
2314 error = EINVAL;
2315 break;
2316 }
2317 vfs_unbusy(mp);
2318
2319 return (error);
2320 }
2321 #endif /* QUOTA */
2322
2323 /* Subtype is composite of bits */
2324 #define HFS_SUBTYPE_JOURNALED 0x01
2325 #define HFS_SUBTYPE_CASESENSITIVE 0x02
2326 /* bits 2 - 6 reserved */
2327 #define HFS_SUBTYPE_STANDARDHFS 0x80
2328
2329 /*
2330 * Get file system statistics.
2331 */
2332 int
2333 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2334 {
2335 ExtendedVCB *vcb = VFSTOVCB(mp);
2336 struct hfsmount *hfsmp = VFSTOHFS(mp);
2337 u_int32_t freeCNIDs;
2338 u_int16_t subtype = 0;
2339
2340 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2341
2342 sbp->f_bsize = (u_int32_t)vcb->blockSize;
2343 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2344 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2345 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2346 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2347 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */
2348 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2349
2350 /*
2351 * Subtypes (flavors) for HFS
2352 * 0: Mac OS Extended
2353 * 1: Mac OS Extended (Journaled)
2354 * 2: Mac OS Extended (Case Sensitive)
2355 * 3: Mac OS Extended (Case Sensitive, Journaled)
2356 * 4 - 127: Reserved
2357 * 128: Mac OS Standard
2358 *
2359 */
2360 if (hfsmp->hfs_flags & HFS_STANDARD) {
2361 subtype = HFS_SUBTYPE_STANDARDHFS;
2362 } else /* HFS Plus */ {
2363 if (hfsmp->jnl)
2364 subtype |= HFS_SUBTYPE_JOURNALED;
2365 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2366 subtype |= HFS_SUBTYPE_CASESENSITIVE;
2367 }
2368 sbp->f_fssubtype = subtype;
2369
2370 return (0);
2371 }
2372
2373
2374 //
2375 // XXXdbg -- this is a callback to be used by the journal to
2376 // get meta data blocks flushed out to disk.
2377 //
2378 // XXXdbg -- be smarter and don't flush *every* block on each
2379 // call. try to only flush some so we don't wind up
2380 // being too synchronous.
2381 //
2382 __private_extern__
2383 void
2384 hfs_sync_metadata(void *arg)
2385 {
2386 struct mount *mp = (struct mount *)arg;
2387 struct hfsmount *hfsmp;
2388 ExtendedVCB *vcb;
2389 buf_t bp;
2390 int retval;
2391 daddr64_t priIDSector;
2392 hfsmp = VFSTOHFS(mp);
2393 vcb = HFSTOVCB(hfsmp);
2394
2395 // now make sure the super block is flushed
2396 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2397 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2398
2399 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2400 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2401 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2402 if ((retval != 0 ) && (retval != ENXIO)) {
2403 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2404 (int)priIDSector, retval);
2405 }
2406
2407 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2408 buf_bwrite(bp);
2409 } else if (bp) {
2410 buf_brelse(bp);
2411 }
2412
2413 // the alternate super block...
2414 // XXXdbg - we probably don't need to do this each and every time.
2415 // hfs_btreeio.c:FlushAlternate() should flag when it was
2416 // written...
2417 if (hfsmp->hfs_alt_id_sector) {
2418 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2419 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2420 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2421 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2422 buf_bwrite(bp);
2423 } else if (bp) {
2424 buf_brelse(bp);
2425 }
2426 }
2427 }
2428
2429
2430 struct hfs_sync_cargs {
2431 kauth_cred_t cred;
2432 struct proc *p;
2433 int waitfor;
2434 int error;
2435 };
2436
2437
2438 static int
2439 hfs_sync_callback(struct vnode *vp, void *cargs)
2440 {
2441 struct cnode *cp;
2442 struct hfs_sync_cargs *args;
2443 int error;
2444
2445 args = (struct hfs_sync_cargs *)cargs;
2446
2447 if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2448 return (VNODE_RETURNED);
2449 }
2450 cp = VTOC(vp);
2451
2452 if ((cp->c_flag & C_MODIFIED) ||
2453 (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2454 vnode_hasdirtyblks(vp)) {
2455 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2456
2457 if (error)
2458 args->error = error;
2459 }
2460 hfs_unlock(cp);
2461 return (VNODE_RETURNED);
2462 }
2463
2464
2465
2466 /*
2467 * Go through the disk queues to initiate sandbagged IO;
2468 * go through the inodes to write those that have been modified;
2469 * initiate the writing of the super block if it has been modified.
2470 *
2471 * Note: we are always called with the filesystem marked `MPBUSY'.
2472 */
2473 int
2474 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2475 {
2476 struct proc *p = vfs_context_proc(context);
2477 struct cnode *cp;
2478 struct hfsmount *hfsmp;
2479 ExtendedVCB *vcb;
2480 struct vnode *meta_vp[4];
2481 int i;
2482 int error, allerror = 0;
2483 struct hfs_sync_cargs args;
2484
2485 hfsmp = VFSTOHFS(mp);
2486
2487 /*
2488 * hfs_changefs might be manipulating vnodes so back off
2489 */
2490 if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2491 return (0);
2492
2493 if (hfsmp->hfs_flags & HFS_READ_ONLY)
2494 return (EROFS);
2495
2496 /* skip over frozen volumes */
2497 if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2498 return 0;
2499
2500 args.cred = kauth_cred_get();
2501 args.waitfor = waitfor;
2502 args.p = p;
2503 args.error = 0;
2504 /*
2505 * hfs_sync_callback will be called for each vnode
2506 * hung off of this mount point... the vnode will be
2507 * properly referenced and unreferenced around the callback
2508 */
2509 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2510
2511 if (args.error)
2512 allerror = args.error;
2513
2514 vcb = HFSTOVCB(hfsmp);
2515
2516 meta_vp[0] = vcb->extentsRefNum;
2517 meta_vp[1] = vcb->catalogRefNum;
2518 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */
2519 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2520
2521 /* Now sync our three metadata files */
2522 for (i = 0; i < 4; ++i) {
2523 struct vnode *btvp;
2524
2525 btvp = meta_vp[i];;
2526 if ((btvp==0) || (vnode_mount(btvp) != mp))
2527 continue;
2528
2529 /* XXX use hfs_systemfile_lock instead ? */
2530 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2531 cp = VTOC(btvp);
2532
2533 if (((cp->c_flag & C_MODIFIED) == 0) &&
2534 (cp->c_touch_acctime == 0) &&
2535 (cp->c_touch_chgtime == 0) &&
2536 (cp->c_touch_modtime == 0) &&
2537 vnode_hasdirtyblks(btvp) == 0) {
2538 hfs_unlock(VTOC(btvp));
2539 continue;
2540 }
2541 error = vnode_get(btvp);
2542 if (error) {
2543 hfs_unlock(VTOC(btvp));
2544 continue;
2545 }
2546 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2547 allerror = error;
2548
2549 hfs_unlock(cp);
2550 vnode_put(btvp);
2551 };
2552
2553 /*
2554 * Force stale file system control information to be flushed.
2555 */
2556 if (vcb->vcbSigWord == kHFSSigWord) {
2557 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2558 allerror = error;
2559 }
2560 }
2561 #if QUOTA
2562 hfs_qsync(mp);
2563 #endif /* QUOTA */
2564
2565 hfs_hotfilesync(hfsmp, vfs_context_kernel());
2566
2567 /*
2568 * Write back modified superblock.
2569 */
2570 if (IsVCBDirty(vcb)) {
2571 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2572 if (error)
2573 allerror = error;
2574 }
2575
2576 if (hfsmp->jnl) {
2577 hfs_journal_flush(hfsmp, FALSE);
2578 }
2579
2580 {
2581 clock_sec_t secs;
2582 clock_usec_t usecs;
2583 uint64_t now;
2584
2585 clock_get_calendar_microtime(&secs, &usecs);
2586 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2587 hfsmp->hfs_last_sync_time = now;
2588 }
2589
2590 lck_rw_unlock_shared(&hfsmp->hfs_insync);
2591 return (allerror);
2592 }
2593
2594
2595 /*
2596 * File handle to vnode
2597 *
2598 * Have to be really careful about stale file handles:
2599 * - check that the cnode id is valid
2600 * - call hfs_vget() to get the locked cnode
2601 * - check for an unallocated cnode (i_mode == 0)
2602 * - check that the given client host has export rights and return
2603 * those rights via. exflagsp and credanonp
2604 */
2605 static int
2606 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2607 {
2608 struct hfsfid *hfsfhp;
2609 struct vnode *nvp;
2610 int result;
2611
2612 *vpp = NULL;
2613 hfsfhp = (struct hfsfid *)fhp;
2614
2615 if (fhlen < (int)sizeof(struct hfsfid))
2616 return (EINVAL);
2617
2618 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2619 if (result) {
2620 if (result == ENOENT)
2621 result = ESTALE;
2622 return result;
2623 }
2624
2625 /*
2626 * We used to use the create time as the gen id of the file handle,
2627 * but it is not static enough because it can change at any point
2628 * via system calls. We still don't have another volume ID or other
2629 * unique identifier to use for a generation ID across reboots that
2630 * persists until the file is removed. Using only the CNID exposes
2631 * us to the potential wrap-around case, but as of 2/2008, it would take
2632 * over 2 months to wrap around if the machine did nothing but allocate
2633 * CNIDs. Using some kind of wrap counter would only be effective if
2634 * each file had the wrap counter associated with it. For now,
2635 * we use only the CNID to identify the file as it's good enough.
2636 */
2637
2638 *vpp = nvp;
2639
2640 hfs_unlock(VTOC(nvp));
2641 return (0);
2642 }
2643
2644
2645 /*
2646 * Vnode pointer to File handle
2647 */
2648 /* ARGSUSED */
2649 static int
2650 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2651 {
2652 struct cnode *cp;
2653 struct hfsfid *hfsfhp;
2654
2655 if (ISHFS(VTOVCB(vp)))
2656 return (ENOTSUP); /* hfs standard is not exportable */
2657
2658 if (*fhlenp < (int)sizeof(struct hfsfid))
2659 return (EOVERFLOW);
2660
2661 cp = VTOC(vp);
2662 hfsfhp = (struct hfsfid *)fhp;
2663 /* only the CNID is used to identify the file now */
2664 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2665 hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2666 *fhlenp = sizeof(struct hfsfid);
2667
2668 return (0);
2669 }
2670
2671
2672 /*
2673 * Initial HFS filesystems, done only once.
2674 */
2675 static int
2676 hfs_init(__unused struct vfsconf *vfsp)
2677 {
2678 static int done = 0;
2679
2680 if (done)
2681 return (0);
2682 done = 1;
2683 hfs_chashinit();
2684 hfs_converterinit();
2685
2686 BTReserveSetup();
2687
2688
2689 hfs_lock_attr = lck_attr_alloc_init();
2690 hfs_group_attr = lck_grp_attr_alloc_init();
2691 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2692 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2693 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2694
2695 #if HFS_COMPRESSION
2696 decmpfs_init();
2697 #endif
2698
2699 return (0);
2700 }
2701
2702 static int
2703 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2704 {
2705 struct hfsmount * hfsmp;
2706 char fstypename[MFSNAMELEN];
2707
2708 if (vp == NULL)
2709 return (EINVAL);
2710
2711 if (!vnode_isvroot(vp))
2712 return (EINVAL);
2713
2714 vnode_vfsname(vp, fstypename);
2715 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2716 return (EINVAL);
2717
2718 hfsmp = VTOHFS(vp);
2719
2720 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2721 return (EINVAL);
2722
2723 *hfsmpp = hfsmp;
2724
2725 return (0);
2726 }
2727
2728 // XXXdbg
2729 #include <sys/filedesc.h>
2730
2731 /*
2732 * HFS filesystem related variables.
2733 */
2734 int
2735 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2736 user_addr_t newp, size_t newlen, vfs_context_t context)
2737 {
2738 struct proc *p = vfs_context_proc(context);
2739 int error;
2740 struct hfsmount *hfsmp;
2741
2742 /* all sysctl names at this level are terminal */
2743
2744 if (name[0] == HFS_ENCODINGBIAS) {
2745 int bias;
2746
2747 bias = hfs_getencodingbias();
2748 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2749 if (error == 0 && newp)
2750 hfs_setencodingbias(bias);
2751 return (error);
2752
2753 } else if (name[0] == HFS_EXTEND_FS) {
2754 u_int64_t newsize;
2755 vnode_t vp = vfs_context_cwd(context);
2756
2757 if (newp == USER_ADDR_NULL || vp == NULLVP)
2758 return (EINVAL);
2759 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2760 return (error);
2761 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2762 if (error)
2763 return (error);
2764
2765 error = hfs_extendfs(hfsmp, newsize, context);
2766 return (error);
2767
2768 } else if (name[0] == HFS_ENCODINGHINT) {
2769 size_t bufsize;
2770 size_t bytes;
2771 u_int32_t hint;
2772 u_int16_t *unicode_name = NULL;
2773 char *filename = NULL;
2774
2775 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2776 return (EINVAL);
2777
2778 bufsize = MAX(newlen * 3, MAXPATHLEN);
2779 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2780 if (filename == NULL) {
2781 error = ENOMEM;
2782 goto encodinghint_exit;
2783 }
2784 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2785 if (filename == NULL) {
2786 error = ENOMEM;
2787 goto encodinghint_exit;
2788 }
2789
2790 error = copyin(newp, (caddr_t)filename, newlen);
2791 if (error == 0) {
2792 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2793 &bytes, bufsize, 0, UTF_DECOMPOSED);
2794 if (error == 0) {
2795 hint = hfs_pickencoding(unicode_name, bytes / 2);
2796 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2797 }
2798 }
2799
2800 encodinghint_exit:
2801 if (unicode_name)
2802 FREE(unicode_name, M_TEMP);
2803 if (filename)
2804 FREE(filename, M_TEMP);
2805 return (error);
2806
2807 } else if (name[0] == HFS_ENABLE_JOURNALING) {
2808 // make the file system journaled...
2809 vnode_t vp = vfs_context_cwd(context);
2810 vnode_t jvp;
2811 ExtendedVCB *vcb;
2812 struct cat_attr jnl_attr, jinfo_attr;
2813 struct cat_fork jnl_fork, jinfo_fork;
2814 void *jnl = NULL;
2815 int lockflags;
2816
2817 /* Only root can enable journaling */
2818 if (!is_suser()) {
2819 return (EPERM);
2820 }
2821 if (vp == NULLVP)
2822 return EINVAL;
2823
2824 hfsmp = VTOHFS(vp);
2825 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2826 return EROFS;
2827 }
2828 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2829 printf("hfs: can't make a plain hfs volume journaled.\n");
2830 return EINVAL;
2831 }
2832
2833 if (hfsmp->jnl) {
2834 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2835 return EAGAIN;
2836 }
2837
2838 vcb = HFSTOVCB(hfsmp);
2839 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2840 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2841 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2842
2843 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n");
2844 hfs_systemfile_unlock(hfsmp, lockflags);
2845 return EINVAL;
2846 }
2847 hfs_systemfile_unlock(hfsmp, lockflags);
2848
2849 // make sure these both exist!
2850 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2851 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2852
2853 return EINVAL;
2854 }
2855
2856 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2857
2858 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2859 (off_t)name[2], (off_t)name[3]);
2860
2861 //
2862 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2863 // enabling the journal on a separate device so it is safe
2864 // to just copy hfs_devvp here. If hfs_util gets the ability
2865 // to dynamically enable the journal on a separate device then
2866 // we will have to do the same thing as hfs_early_journal_init()
2867 // to locate and open the journal device.
2868 //
2869 jvp = hfsmp->hfs_devvp;
2870 jnl = journal_create(jvp,
2871 (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2872 + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2873 (off_t)((unsigned)name[3]),
2874 hfsmp->hfs_devvp,
2875 hfsmp->hfs_logical_block_size,
2876 0,
2877 0,
2878 hfs_sync_metadata, hfsmp->hfs_mp);
2879
2880 /*
2881 * Set up the trim callback function so that we can add
2882 * recently freed extents to the free extent cache once
2883 * the transaction that freed them is written to the
2884 * journal on disk.
2885 */
2886 if (jnl)
2887 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2888
2889 if (jnl == NULL) {
2890 printf("hfs: FAILED to create the journal!\n");
2891 if (jvp && jvp != hfsmp->hfs_devvp) {
2892 vnode_clearmountedon(jvp);
2893 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2894 }
2895 jvp = NULL;
2896
2897 return EINVAL;
2898 }
2899
2900 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2901
2902 /*
2903 * Flush all dirty metadata buffers.
2904 */
2905 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
2906 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
2907 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
2908 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
2909 if (hfsmp->hfs_attribute_vp)
2910 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
2911
2912 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
2913 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
2914 hfsmp->jvp = jvp;
2915 hfsmp->jnl = jnl;
2916
2917 // save this off for the hack-y check in hfs_remove()
2918 hfsmp->jnl_start = (u_int32_t)name[2];
2919 hfsmp->jnl_size = (off_t)((unsigned)name[3]);
2920 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
2921 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid;
2922
2923 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2924
2925 hfs_unlock_global (hfsmp);
2926 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2927
2928 {
2929 fsid_t fsid;
2930
2931 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2932 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2933 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2934 }
2935 return 0;
2936 } else if (name[0] == HFS_DISABLE_JOURNALING) {
2937 // clear the journaling bit
2938 vnode_t vp = vfs_context_cwd(context);
2939
2940 /* Only root can disable journaling */
2941 if (!is_suser()) {
2942 return (EPERM);
2943 }
2944 if (vp == NULLVP)
2945 return EINVAL;
2946
2947 hfsmp = VTOHFS(vp);
2948
2949 /*
2950 * Disabling journaling is disallowed on volumes with directory hard links
2951 * because we have not tested the relevant code path.
2952 */
2953 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
2954 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
2955 return EPERM;
2956 }
2957
2958 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
2959
2960 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2961
2962 // Lights out for you buddy!
2963 journal_close(hfsmp->jnl);
2964 hfsmp->jnl = NULL;
2965
2966 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2967 vnode_clearmountedon(hfsmp->jvp);
2968 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2969 vnode_put(hfsmp->jvp);
2970 }
2971 hfsmp->jvp = NULL;
2972 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2973 hfsmp->jnl_start = 0;
2974 hfsmp->hfs_jnlinfoblkid = 0;
2975 hfsmp->hfs_jnlfileid = 0;
2976
2977 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
2978
2979 hfs_unlock_global (hfsmp);
2980
2981 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2982
2983 {
2984 fsid_t fsid;
2985
2986 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2987 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2988 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2989 }
2990 return 0;
2991 } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2992 vnode_t vp = vfs_context_cwd(context);
2993 off_t jnl_start, jnl_size;
2994
2995 if (vp == NULLVP)
2996 return EINVAL;
2997
2998 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
2999 if (proc_is64bit(current_proc()))
3000 return EINVAL;
3001
3002 hfsmp = VTOHFS(vp);
3003 if (hfsmp->jnl == NULL) {
3004 jnl_start = 0;
3005 jnl_size = 0;
3006 } else {
3007 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3008 jnl_size = (off_t)hfsmp->jnl_size;
3009 }
3010
3011 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3012 return error;
3013 }
3014 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3015 return error;
3016 }
3017
3018 return 0;
3019 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3020
3021 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3022
3023 } else if (name[0] == VFS_CTL_QUERY) {
3024 struct sysctl_req *req;
3025 union union_vfsidctl vc;
3026 struct mount *mp;
3027 struct vfsquery vq;
3028
3029 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */
3030
3031 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3032 if (error) return (error);
3033
3034 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3035 if (mp == NULL) return (ENOENT);
3036
3037 hfsmp = VFSTOHFS(mp);
3038 bzero(&vq, sizeof(vq));
3039 vq.vq_flags = hfsmp->hfs_notification_conditions;
3040 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3041 } else if (name[0] == HFS_REPLAY_JOURNAL) {
3042 vnode_t devvp = NULL;
3043 int device_fd;
3044 if (namelen != 2) {
3045 return (EINVAL);
3046 }
3047 device_fd = name[1];
3048 error = file_vnode(device_fd, &devvp);
3049 if (error) {
3050 return error;
3051 }
3052 error = vnode_getwithref(devvp);
3053 if (error) {
3054 file_drop(device_fd);
3055 return error;
3056 }
3057 error = hfs_journal_replay(devvp, context);
3058 file_drop(device_fd);
3059 vnode_put(devvp);
3060 return error;
3061 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3062 hfs_resize_debug = 1;
3063 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3064 return 0;
3065 }
3066
3067 return (ENOTSUP);
3068 }
3069
3070 /*
3071 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3072 * the build_path ioctl. We use it to leverage the code below that updates
3073 * the origin list cache if necessary
3074 */
3075
3076 int
3077 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3078 {
3079 int error;
3080 int lockflags;
3081 struct hfsmount *hfsmp;
3082
3083 hfsmp = VFSTOHFS(mp);
3084
3085 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3086 if (error)
3087 return (error);
3088
3089 /*
3090 * ADLs may need to have their origin state updated
3091 * since build_path needs a valid parent. The same is true
3092 * for hardlinked files as well. There isn't a race window here
3093 * in re-acquiring the cnode lock since we aren't pulling any data
3094 * out of the cnode; instead, we're going to the catalog.
3095 */
3096 if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3097 (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3098 cnode_t *cp = VTOC(*vpp);
3099 struct cat_desc cdesc;
3100
3101 if (!hfs_haslinkorigin(cp)) {
3102 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3103 error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3104 hfs_systemfile_unlock(hfsmp, lockflags);
3105 if (error == 0) {
3106 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3107 (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3108 hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3109 }
3110 cat_releasedesc(&cdesc);
3111 }
3112 }
3113 hfs_unlock(cp);
3114 }
3115 return (0);
3116 }
3117
3118
3119 /*
3120 * Look up an HFS object by ID.
3121 *
3122 * The object is returned with an iocount reference and the cnode locked.
3123 *
3124 * If the object is a file then it will represent the data fork.
3125 */
3126 int
3127 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3128 {
3129 struct vnode *vp = NULLVP;
3130 struct cat_desc cndesc;
3131 struct cat_attr cnattr;
3132 struct cat_fork cnfork;
3133 u_int32_t linkref = 0;
3134 int error;
3135
3136 /* Check for cnids that should't be exported. */
3137 if ((cnid < kHFSFirstUserCatalogNodeID) &&
3138 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3139 return (ENOENT);
3140 }
3141 /* Don't export our private directories. */
3142 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3143 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3144 return (ENOENT);
3145 }
3146 /*
3147 * Check the hash first
3148 */
3149 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3150 if (vp) {
3151 *vpp = vp;
3152 return(0);
3153 }
3154
3155 bzero(&cndesc, sizeof(cndesc));
3156 bzero(&cnattr, sizeof(cnattr));
3157 bzero(&cnfork, sizeof(cnfork));
3158
3159 /*
3160 * Not in hash, lookup in catalog
3161 */
3162 if (cnid == kHFSRootParentID) {
3163 static char hfs_rootname[] = "/";
3164
3165 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3166 cndesc.cd_namelen = 1;
3167 cndesc.cd_parentcnid = kHFSRootParentID;
3168 cndesc.cd_cnid = kHFSRootFolderID;
3169 cndesc.cd_flags = CD_ISDIR;
3170
3171 cnattr.ca_fileid = kHFSRootFolderID;
3172 cnattr.ca_linkcount = 1;
3173 cnattr.ca_entries = 1;
3174 cnattr.ca_dircount = 1;
3175 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3176 } else {
3177 int lockflags;
3178 cnid_t pid;
3179 const char *nameptr;
3180
3181 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3182 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
3183 hfs_systemfile_unlock(hfsmp, lockflags);
3184
3185 if (error) {
3186 *vpp = NULL;
3187 return (error);
3188 }
3189
3190 /*
3191 * Check for a raw hardlink inode and save its linkref.
3192 */
3193 pid = cndesc.cd_parentcnid;
3194 nameptr = (const char *)cndesc.cd_nameptr;
3195
3196 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3197 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3198 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3199
3200 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3201 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3202 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3203
3204 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3205 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3206 *vpp = NULL;
3207 cat_releasedesc(&cndesc);
3208 return (ENOENT); /* open unlinked file */
3209 }
3210 }
3211
3212 /*
3213 * Finish initializing cnode descriptor for hardlinks.
3214 *
3215 * We need a valid name and parent for reverse lookups.
3216 */
3217 if (linkref) {
3218 cnid_t nextlinkid;
3219 cnid_t prevlinkid;
3220 struct cat_desc linkdesc;
3221 int lockflags;
3222
3223 cnattr.ca_linkref = linkref;
3224
3225 /*
3226 * Pick up the first link in the chain and get a descriptor for it.
3227 * This allows blind volfs paths to work for hardlinks.
3228 */
3229 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) &&
3230 (nextlinkid != 0)) {
3231 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3232 error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3233 hfs_systemfile_unlock(hfsmp, lockflags);
3234 if (error == 0) {
3235 cat_releasedesc(&cndesc);
3236 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3237 }
3238 }
3239 }
3240
3241 if (linkref) {
3242 int newvnode_flags = 0;
3243
3244 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3245 &cnfork, &vp, &newvnode_flags);
3246 if (error == 0) {
3247 VTOC(vp)->c_flag |= C_HARDLINK;
3248 vnode_setmultipath(vp);
3249 }
3250 } else {
3251 struct componentname cn;
3252 int newvnode_flags = 0;
3253
3254 /* Supply hfs_getnewvnode with a component name. */
3255 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3256 cn.cn_nameiop = LOOKUP;
3257 cn.cn_flags = ISLASTCN | HASBUF;
3258 cn.cn_context = NULL;
3259 cn.cn_pnlen = MAXPATHLEN;
3260 cn.cn_nameptr = cn.cn_pnbuf;
3261 cn.cn_namelen = cndesc.cd_namelen;
3262 cn.cn_hash = 0;
3263 cn.cn_consume = 0;
3264 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3265
3266 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3267 &cnfork, &vp, &newvnode_flags);
3268
3269 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3270 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3271 }
3272 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3273 }
3274 cat_releasedesc(&cndesc);
3275
3276 *vpp = vp;
3277 if (vp && skiplock) {
3278 hfs_unlock(VTOC(vp));
3279 }
3280 return (error);
3281 }
3282
3283
3284 /*
3285 * Flush out all the files in a filesystem.
3286 */
3287 static int
3288 #if QUOTA
3289 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3290 #else
3291 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3292 #endif /* QUOTA */
3293 {
3294 struct hfsmount *hfsmp;
3295 struct vnode *skipvp = NULLVP;
3296 int error;
3297 #if QUOTA
3298 int quotafilecnt;
3299 int i;
3300 #endif
3301
3302 hfsmp = VFSTOHFS(mp);
3303
3304 #if QUOTA
3305 /*
3306 * The open quota files have an indirect reference on
3307 * the root directory vnode. We must account for this
3308 * extra reference when doing the intial vflush.
3309 */
3310 quotafilecnt = 0;
3311 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3312
3313 /* Find out how many quota files we have open. */
3314 for (i = 0; i < MAXQUOTAS; i++) {
3315 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3316 ++quotafilecnt;
3317 }
3318
3319 /* Obtain the root vnode so we can skip over it. */
3320 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3321 }
3322 #endif /* QUOTA */
3323
3324 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3325 if (error != 0)
3326 return(error);
3327
3328 error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3329
3330 #if QUOTA
3331 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3332 if (skipvp) {
3333 /*
3334 * See if there are additional references on the
3335 * root vp besides the ones obtained from the open
3336 * quota files and the hfs_chash_getvnode call above.
3337 */
3338 if ((error == 0) &&
3339 (vnode_isinuse(skipvp, quotafilecnt))) {
3340 error = EBUSY; /* root directory is still open */
3341 }
3342 hfs_unlock(VTOC(skipvp));
3343 vnode_put(skipvp);
3344 }
3345 if (error && (flags & FORCECLOSE) == 0)
3346 return (error);
3347
3348 for (i = 0; i < MAXQUOTAS; i++) {
3349 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3350 continue;
3351 hfs_quotaoff(p, mp, i);
3352 }
3353 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3354 }
3355 #endif /* QUOTA */
3356
3357 return (error);
3358 }
3359
3360 /*
3361 * Update volume encoding bitmap (HFS Plus only)
3362 */
3363 __private_extern__
3364 void
3365 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3366 {
3367 #define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */
3368 #define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */
3369
3370 u_int32_t index;
3371
3372 switch (encoding) {
3373 case kTextEncodingMacUkrainian:
3374 index = kIndexMacUkrainian;
3375 break;
3376 case kTextEncodingMacFarsi:
3377 index = kIndexMacFarsi;
3378 break;
3379 default:
3380 index = encoding;
3381 break;
3382 }
3383
3384 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3385 HFS_MOUNT_LOCK(hfsmp, TRUE)
3386 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3387 MarkVCBDirty(hfsmp);
3388 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3389 }
3390 }
3391
3392 /*
3393 * Update volume stats
3394 *
3395 * On journal volumes this will cause a volume header flush
3396 */
3397 int
3398 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3399 {
3400 struct timeval tv;
3401
3402 microtime(&tv);
3403
3404 lck_mtx_lock(&hfsmp->hfs_mutex);
3405
3406 MarkVCBDirty(hfsmp);
3407 hfsmp->hfs_mtime = tv.tv_sec;
3408
3409 switch (op) {
3410 case VOL_UPDATE:
3411 break;
3412 case VOL_MKDIR:
3413 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3414 ++hfsmp->hfs_dircount;
3415 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3416 ++hfsmp->vcbNmRtDirs;
3417 break;
3418 case VOL_RMDIR:
3419 if (hfsmp->hfs_dircount != 0)
3420 --hfsmp->hfs_dircount;
3421 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3422 --hfsmp->vcbNmRtDirs;
3423 break;
3424 case VOL_MKFILE:
3425 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3426 ++hfsmp->hfs_filecount;
3427 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3428 ++hfsmp->vcbNmFls;
3429 break;
3430 case VOL_RMFILE:
3431 if (hfsmp->hfs_filecount != 0)
3432 --hfsmp->hfs_filecount;
3433 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3434 --hfsmp->vcbNmFls;
3435 break;
3436 }
3437
3438 lck_mtx_unlock(&hfsmp->hfs_mutex);
3439
3440 if (hfsmp->jnl) {
3441 hfs_flushvolumeheader(hfsmp, 0, 0);
3442 }
3443
3444 return (0);
3445 }
3446
3447
3448 static int
3449 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3450 {
3451 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3452 struct filefork *fp;
3453 HFSMasterDirectoryBlock *mdb;
3454 struct buf *bp = NULL;
3455 int retval;
3456 int sectorsize;
3457 ByteCount namelen;
3458
3459 sectorsize = hfsmp->hfs_logical_block_size;
3460 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
3461 if (retval) {
3462 if (bp)
3463 buf_brelse(bp);
3464 return retval;
3465 }
3466
3467 lck_mtx_lock(&hfsmp->hfs_mutex);
3468
3469 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
3470
3471 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3472 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3473 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb);
3474 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls);
3475 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3476 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz);
3477 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID);
3478 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks);
3479
3480 namelen = strlen((char *)vcb->vcbVN);
3481 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3482 /* Retry with MacRoman in case that's how it was exported. */
3483 if (retval)
3484 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3485
3486 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3487 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt);
3488 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3489 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt);
3490 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt);
3491
3492 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3493
3494 fp = VTOF(vcb->extentsRefNum);
3495 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3496 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3497 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3498 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3499 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3500 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3501 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3502 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3503 FTOC(fp)->c_flag &= ~C_MODIFIED;
3504
3505 fp = VTOF(vcb->catalogRefNum);
3506 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3507 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3508 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3509 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3510 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3511 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3512 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3513 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3514 FTOC(fp)->c_flag &= ~C_MODIFIED;
3515
3516 MarkVCBClean( vcb );
3517
3518 lck_mtx_unlock(&hfsmp->hfs_mutex);
3519
3520 /* If requested, flush out the alternate MDB */
3521 if (altflush) {
3522 struct buf *alt_bp = NULL;
3523
3524 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
3525 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
3526
3527 (void) VNOP_BWRITE(alt_bp);
3528 } else if (alt_bp)
3529 buf_brelse(alt_bp);
3530 }
3531
3532 if (waitfor != MNT_WAIT)
3533 buf_bawrite(bp);
3534 else
3535 retval = VNOP_BWRITE(bp);
3536
3537 return (retval);
3538 }
3539
3540 /*
3541 * Flush any dirty in-memory mount data to the on-disk
3542 * volume header.
3543 *
3544 * Note: the on-disk volume signature is intentionally
3545 * not flushed since the on-disk "H+" and "HX" signatures
3546 * are always stored in-memory as "H+".
3547 */
3548 int
3549 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3550 {
3551 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3552 struct filefork *fp;
3553 HFSPlusVolumeHeader *volumeHeader, *altVH;
3554 int retval;
3555 struct buf *bp, *alt_bp;
3556 int i;
3557 daddr64_t priIDSector;
3558 int critical;
3559 u_int16_t signature;
3560 u_int16_t hfsversion;
3561
3562 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3563 return(0);
3564 }
3565 if (hfsmp->hfs_flags & HFS_STANDARD) {
3566 return hfs_flushMDB(hfsmp, waitfor, altflush);
3567 }
3568 critical = altflush;
3569 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3570 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3571
3572 if (hfs_start_transaction(hfsmp) != 0) {
3573 return EINVAL;
3574 }
3575
3576 bp = NULL;
3577 alt_bp = NULL;
3578
3579 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3580 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3581 hfsmp->hfs_physical_block_size, NOCRED, &bp);
3582 if (retval) {
3583 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3584 goto err_exit;
3585 }
3586
3587 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3588 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3589
3590 /*
3591 * Sanity check what we just read. If it's bad, try the alternate
3592 * instead.
3593 */
3594 signature = SWAP_BE16 (volumeHeader->signature);
3595 hfsversion = SWAP_BE16 (volumeHeader->version);
3596 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3597 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3598 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3599 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3600 vcb->vcbVN, signature, hfsversion,
3601 SWAP_BE32 (volumeHeader->blockSize),
3602 hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3603 hfs_mark_volume_inconsistent(hfsmp);
3604
3605 if (hfsmp->hfs_alt_id_sector) {
3606 retval = buf_meta_bread(hfsmp->hfs_devvp,
3607 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3608 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3609 if (retval) {
3610 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3611 goto err_exit;
3612 }
3613
3614 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3615 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3616 signature = SWAP_BE16(altVH->signature);
3617 hfsversion = SWAP_BE16(altVH->version);
3618
3619 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3620 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3621 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3622 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3623 vcb->vcbVN, signature, hfsversion,
3624 SWAP_BE32(altVH->blockSize));
3625 retval = EIO;
3626 goto err_exit;
3627 }
3628
3629 /* The alternate is plausible, so use it. */
3630 bcopy(altVH, volumeHeader, kMDBSize);
3631 buf_brelse(alt_bp);
3632 alt_bp = NULL;
3633 } else {
3634 /* No alternate VH, nothing more we can do. */
3635 retval = EIO;
3636 goto err_exit;
3637 }
3638 }
3639
3640 if (hfsmp->jnl) {
3641 journal_modify_block_start(hfsmp->jnl, bp);
3642 }
3643
3644 /*
3645 * For embedded HFS+ volumes, update create date if it changed
3646 * (ie from a setattrlist call)
3647 */
3648 if ((vcb->hfsPlusIOPosOffset != 0) &&
3649 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3650 struct buf *bp2;
3651 HFSMasterDirectoryBlock *mdb;
3652
3653 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3654 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3655 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3656 if (retval) {
3657 if (bp2)
3658 buf_brelse(bp2);
3659 retval = 0;
3660 } else {
3661 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3662 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3663
3664 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3665 {
3666 if (hfsmp->jnl) {
3667 journal_modify_block_start(hfsmp->jnl, bp2);
3668 }
3669
3670 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */
3671
3672 if (hfsmp->jnl) {
3673 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3674 } else {
3675 (void) VNOP_BWRITE(bp2); /* write out the changes */
3676 }
3677 }
3678 else
3679 {
3680 buf_brelse(bp2); /* just release it */
3681 }
3682 }
3683 }
3684
3685 lck_mtx_lock(&hfsmp->hfs_mutex);
3686
3687 /* Note: only update the lower 16 bits worth of attributes */
3688 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb);
3689 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3690 if (hfsmp->jnl) {
3691 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3692 } else {
3693 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3694 }
3695 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */
3696 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3697 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3698 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt);
3699 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt);
3700 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks);
3701 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks);
3702 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation);
3703 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3704 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3705 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID);
3706 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt);
3707 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
3708
3709 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3710 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3711 critical = 1;
3712 }
3713
3714 /*
3715 * System files are only dirty when altflush is set.
3716 */
3717 if (altflush == 0) {
3718 goto done;
3719 }
3720
3721 /* Sync Extents over-flow file meta data */
3722 fp = VTOF(vcb->extentsRefNum);
3723 if (FTOC(fp)->c_flag & C_MODIFIED) {
3724 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3725 volumeHeader->extentsFile.extents[i].startBlock =
3726 SWAP_BE32 (fp->ff_extents[i].startBlock);
3727 volumeHeader->extentsFile.extents[i].blockCount =
3728 SWAP_BE32 (fp->ff_extents[i].blockCount);
3729 }
3730 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3731 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3732 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3733 FTOC(fp)->c_flag &= ~C_MODIFIED;
3734 }
3735
3736 /* Sync Catalog file meta data */
3737 fp = VTOF(vcb->catalogRefNum);
3738 if (FTOC(fp)->c_flag & C_MODIFIED) {
3739 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3740 volumeHeader->catalogFile.extents[i].startBlock =
3741 SWAP_BE32 (fp->ff_extents[i].startBlock);
3742 volumeHeader->catalogFile.extents[i].blockCount =
3743 SWAP_BE32 (fp->ff_extents[i].blockCount);
3744 }
3745 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3746 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3747 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3748 FTOC(fp)->c_flag &= ~C_MODIFIED;
3749 }
3750
3751 /* Sync Allocation file meta data */
3752 fp = VTOF(vcb->allocationsRefNum);
3753 if (FTOC(fp)->c_flag & C_MODIFIED) {
3754 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3755 volumeHeader->allocationFile.extents[i].startBlock =
3756 SWAP_BE32 (fp->ff_extents[i].startBlock);
3757 volumeHeader->allocationFile.extents[i].blockCount =
3758 SWAP_BE32 (fp->ff_extents[i].blockCount);
3759 }
3760 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3761 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3762 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3763 FTOC(fp)->c_flag &= ~C_MODIFIED;
3764 }
3765
3766 /* Sync Attribute file meta data */
3767 if (hfsmp->hfs_attribute_vp) {
3768 fp = VTOF(hfsmp->hfs_attribute_vp);
3769 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3770 volumeHeader->attributesFile.extents[i].startBlock =
3771 SWAP_BE32 (fp->ff_extents[i].startBlock);
3772 volumeHeader->attributesFile.extents[i].blockCount =
3773 SWAP_BE32 (fp->ff_extents[i].blockCount);
3774 }
3775 FTOC(fp)->c_flag &= ~C_MODIFIED;
3776 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3777 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3778 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3779 }
3780
3781 /* Sync Startup file meta data */
3782 if (hfsmp->hfs_startup_vp) {
3783 fp = VTOF(hfsmp->hfs_startup_vp);
3784 if (FTOC(fp)->c_flag & C_MODIFIED) {
3785 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3786 volumeHeader->startupFile.extents[i].startBlock =
3787 SWAP_BE32 (fp->ff_extents[i].startBlock);
3788 volumeHeader->startupFile.extents[i].blockCount =
3789 SWAP_BE32 (fp->ff_extents[i].blockCount);
3790 }
3791 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3792 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3793 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3794 FTOC(fp)->c_flag &= ~C_MODIFIED;
3795 }
3796 }
3797
3798 done:
3799 MarkVCBClean(hfsmp);
3800 lck_mtx_unlock(&hfsmp->hfs_mutex);
3801
3802 /* If requested, flush out the alternate volume header */
3803 if (altflush && hfsmp->hfs_alt_id_sector) {
3804 if (buf_meta_bread(hfsmp->hfs_devvp,
3805 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3806 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3807 if (hfsmp->jnl) {
3808 journal_modify_block_start(hfsmp->jnl, alt_bp);
3809 }
3810
3811 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3812 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3813 kMDBSize);
3814
3815 if (hfsmp->jnl) {
3816 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3817 } else {
3818 (void) VNOP_BWRITE(alt_bp);
3819 }
3820 } else if (alt_bp)
3821 buf_brelse(alt_bp);
3822 }
3823
3824 if (hfsmp->jnl) {
3825 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3826 } else {
3827 if (waitfor != MNT_WAIT)
3828 buf_bawrite(bp);
3829 else {
3830 retval = VNOP_BWRITE(bp);
3831 /* When critical data changes, flush the device cache */
3832 if (critical && (retval == 0)) {
3833 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3834 NULL, FWRITE, NULL);
3835 }
3836 }
3837 }
3838 hfs_end_transaction(hfsmp);
3839
3840 return (retval);
3841
3842 err_exit:
3843 if (alt_bp)
3844 buf_brelse(alt_bp);
3845 if (bp)
3846 buf_brelse(bp);
3847 hfs_end_transaction(hfsmp);
3848 return retval;
3849 }
3850
3851
3852 /*
3853 * Extend a file system.
3854 */
3855 int
3856 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3857 {
3858 struct proc *p = vfs_context_proc(context);
3859 kauth_cred_t cred = vfs_context_ucred(context);
3860 struct vnode *vp;
3861 struct vnode *devvp;
3862 struct buf *bp;
3863 struct filefork *fp = NULL;
3864 ExtendedVCB *vcb;
3865 struct cat_fork forkdata;
3866 u_int64_t oldsize;
3867 u_int64_t newblkcnt;
3868 u_int64_t prev_phys_block_count;
3869 u_int32_t addblks;
3870 u_int64_t sectorcnt;
3871 u_int32_t sectorsize;
3872 u_int32_t phys_sectorsize;
3873 daddr64_t prev_alt_sector;
3874 daddr_t bitmapblks;
3875 int lockflags = 0;
3876 int error;
3877 int64_t oldBitmapSize;
3878 Boolean usedExtendFileC = false;
3879 int transaction_begun = 0;
3880
3881 devvp = hfsmp->hfs_devvp;
3882 vcb = HFSTOVCB(hfsmp);
3883
3884 /*
3885 * - HFS Plus file systems only.
3886 * - Journaling must be enabled.
3887 * - No embedded volumes.
3888 */
3889 if ((vcb->vcbSigWord == kHFSSigWord) ||
3890 (hfsmp->jnl == NULL) ||
3891 (vcb->hfsPlusIOPosOffset != 0)) {
3892 return (EPERM);
3893 }
3894 /*
3895 * If extending file system by non-root, then verify
3896 * ownership and check permissions.
3897 */
3898 if (suser(cred, NULL)) {
3899 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
3900
3901 if (error)
3902 return (error);
3903 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
3904 if (error == 0) {
3905 error = hfs_write_access(vp, cred, p, false);
3906 }
3907 hfs_unlock(VTOC(vp));
3908 vnode_put(vp);
3909 if (error)
3910 return (error);
3911
3912 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
3913 if (error)
3914 return (error);
3915 }
3916 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
3917 return (ENXIO);
3918 }
3919 if (sectorsize != hfsmp->hfs_logical_block_size) {
3920 return (ENXIO);
3921 }
3922 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
3923 return (ENXIO);
3924 }
3925 if ((sectorsize * sectorcnt) < newsize) {
3926 printf("hfs_extendfs: not enough space on device\n");
3927 return (ENOSPC);
3928 }
3929 error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
3930 if (error) {
3931 if ((error != ENOTSUP) && (error != ENOTTY)) {
3932 return (ENXIO);
3933 }
3934 /* If ioctl is not supported, force physical and logical sector size to be same */
3935 phys_sectorsize = sectorsize;
3936 }
3937 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3938
3939 /*
3940 * Validate new size.
3941 */
3942 if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
3943 printf("hfs_extendfs: invalid size\n");
3944 return (EINVAL);
3945 }
3946 newblkcnt = newsize / vcb->blockSize;
3947 if (newblkcnt > (u_int64_t)0xFFFFFFFF)
3948 return (EOVERFLOW);
3949
3950 addblks = newblkcnt - vcb->totalBlocks;
3951
3952 if (hfs_resize_debug) {
3953 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
3954 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
3955 }
3956 printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
3957
3958 HFS_MOUNT_LOCK(hfsmp, TRUE);
3959 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3960 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3961 error = EALREADY;
3962 goto out;
3963 }
3964 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3965 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3966
3967 /*
3968 * Enclose changes inside a transaction.
3969 */
3970 if (hfs_start_transaction(hfsmp) != 0) {
3971 error = EINVAL;
3972 goto out;
3973 }
3974 transaction_begun = 1;
3975
3976 /*
3977 * Note: we take the attributes lock in case we have an attribute data vnode
3978 * which needs to change size.
3979 */
3980 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3981 vp = vcb->allocationsRefNum;
3982 fp = VTOF(vp);
3983 bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
3984
3985 /*
3986 * Calculate additional space required (if any) by allocation bitmap.
3987 */
3988 oldBitmapSize = fp->ff_size;
3989 bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
3990 if (bitmapblks > (daddr_t)fp->ff_blocks)
3991 bitmapblks -= fp->ff_blocks;
3992 else
3993 bitmapblks = 0;
3994
3995 /*
3996 * The allocation bitmap can contain unused bits that are beyond end of
3997 * current volume's allocation blocks. Usually they are supposed to be
3998 * zero'ed out but there can be cases where they might be marked as used.
3999 * After extending the file system, those bits can represent valid
4000 * allocation blocks, so we mark all the bits from the end of current
4001 * volume to end of allocation bitmap as "free".
4002 */
4003 BlockMarkFreeUnused(vcb, vcb->totalBlocks,
4004 (fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
4005
4006 if (bitmapblks > 0) {
4007 daddr64_t blkno;
4008 daddr_t blkcnt;
4009 off_t bytesAdded;
4010
4011 /*
4012 * Get the bitmap's current size (in allocation blocks) so we know
4013 * where to start zero filling once the new space is added. We've
4014 * got to do this before the bitmap is grown.
4015 */
4016 blkno = (daddr64_t)fp->ff_blocks;
4017
4018 /*
4019 * Try to grow the allocation file in the normal way, using allocation
4020 * blocks already existing in the file system. This way, we might be
4021 * able to grow the bitmap contiguously, or at least in the metadata
4022 * zone.
4023 */
4024 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4025 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4026 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4027
4028 if (error == 0) {
4029 usedExtendFileC = true;
4030 } else {
4031 /*
4032 * If the above allocation failed, fall back to allocating the new
4033 * extent of the bitmap from the space we're going to add. Since those
4034 * blocks don't yet belong to the file system, we have to update the
4035 * extent list directly, and manually adjust the file size.
4036 */
4037 bytesAdded = 0;
4038 error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4039 if (error) {
4040 printf("hfs_extendfs: error %d adding extents\n", error);
4041 goto out;
4042 }
4043 fp->ff_blocks += bitmapblks;
4044 VTOC(vp)->c_blocks = fp->ff_blocks;
4045 VTOC(vp)->c_flag |= C_MODIFIED;
4046 }
4047
4048 /*
4049 * Update the allocation file's size to include the newly allocated
4050 * blocks. Note that ExtendFileC doesn't do this, which is why this
4051 * statement is outside the above "if" statement.
4052 */
4053 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4054
4055 /*
4056 * Zero out the new bitmap blocks.
4057 */
4058 {
4059
4060 bp = NULL;
4061 blkcnt = bitmapblks;
4062 while (blkcnt > 0) {
4063 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4064 if (error) {
4065 if (bp) {
4066 buf_brelse(bp);
4067 }
4068 break;
4069 }
4070 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4071 buf_markaged(bp);
4072 error = (int)buf_bwrite(bp);
4073 if (error)
4074 break;
4075 --blkcnt;
4076 ++blkno;
4077 }
4078 }
4079 if (error) {
4080 printf("hfs_extendfs: error %d clearing blocks\n", error);
4081 goto out;
4082 }
4083 /*
4084 * Mark the new bitmap space as allocated.
4085 *
4086 * Note that ExtendFileC will have marked any blocks it allocated, so
4087 * this is only needed if we used AddFileExtent. Also note that this
4088 * has to come *after* the zero filling of new blocks in the case where
4089 * we used AddFileExtent (since the part of the bitmap we're touching
4090 * is in those newly allocated blocks).
4091 */
4092 if (!usedExtendFileC) {
4093 error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4094 if (error) {
4095 printf("hfs_extendfs: error %d setting bitmap\n", error);
4096 goto out;
4097 }
4098 vcb->freeBlocks -= bitmapblks;
4099 }
4100 }
4101 /*
4102 * Mark the new alternate VH as allocated.
4103 */
4104 if (vcb->blockSize == 512)
4105 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4106 else
4107 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4108 if (error) {
4109 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4110 goto out;
4111 }
4112 /*
4113 * Mark the old alternate VH as free.
4114 */
4115 if (vcb->blockSize == 512)
4116 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4117 else
4118 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4119 /*
4120 * Adjust file system variables for new space.
4121 */
4122 prev_phys_block_count = hfsmp->hfs_logical_block_count;
4123 prev_alt_sector = hfsmp->hfs_alt_id_sector;
4124
4125 vcb->totalBlocks += addblks;
4126 vcb->freeBlocks += addblks;
4127 hfsmp->hfs_logical_block_count = newsize / sectorsize;
4128 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
4129 HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
4130 MarkVCBDirty(vcb);
4131 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4132 if (error) {
4133 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4134 /*
4135 * Restore to old state.
4136 */
4137 if (usedExtendFileC) {
4138 (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4139 FTOC(fp)->c_fileid, false);
4140 } else {
4141 fp->ff_blocks -= bitmapblks;
4142 fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4143 /*
4144 * No need to mark the excess blocks free since those bitmap blocks
4145 * are no longer part of the bitmap. But we do need to undo the
4146 * effect of the "vcb->freeBlocks -= bitmapblks" above.
4147 */
4148 vcb->freeBlocks += bitmapblks;
4149 }
4150 vcb->totalBlocks -= addblks;
4151 vcb->freeBlocks -= addblks;
4152 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4153 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4154 MarkVCBDirty(vcb);
4155 if (vcb->blockSize == 512) {
4156 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4157 hfs_mark_volume_inconsistent(hfsmp);
4158 }
4159 } else {
4160 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4161 hfs_mark_volume_inconsistent(hfsmp);
4162 }
4163 }
4164 goto out;
4165 }
4166 /*
4167 * Invalidate the old alternate volume header.
4168 */
4169 bp = NULL;
4170 if (prev_alt_sector) {
4171 if (buf_meta_bread(hfsmp->hfs_devvp,
4172 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4173 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4174 journal_modify_block_start(hfsmp->jnl, bp);
4175
4176 bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4177
4178 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4179 } else if (bp) {
4180 buf_brelse(bp);
4181 }
4182 }
4183
4184 /*
4185 * Update the metadata zone size based on current volume size
4186 */
4187 hfs_metadatazone_init(hfsmp, false);
4188
4189 /*
4190 * Adjust the size of hfsmp->hfs_attrdata_vp
4191 */
4192 if (hfsmp->hfs_attrdata_vp) {
4193 struct cnode *attr_cp;
4194 struct filefork *attr_fp;
4195
4196 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4197 attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4198 attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4199
4200 attr_cp->c_blocks = newblkcnt;
4201 attr_fp->ff_blocks = newblkcnt;
4202 attr_fp->ff_extents[0].blockCount = newblkcnt;
4203 attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4204 ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4205 vnode_put(hfsmp->hfs_attrdata_vp);
4206 }
4207 }
4208
4209 /*
4210 * Update the R/B Tree if necessary. Since we don't have to drop the systemfile
4211 * locks in the middle of these operations like we do in the truncate case
4212 * where we have to relocate files, we can only update the red-black tree
4213 * if there were actual changes made to the bitmap. Also, we can't really scan the
4214 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4215 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4216 * not currently controlled by the tree.
4217 *
4218 * We only update hfsmp->allocLimit if totalBlocks actually increased.
4219 */
4220
4221 if (error == 0) {
4222 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4223 }
4224
4225 /* Log successful extending */
4226 printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4227 hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4228
4229 out:
4230 if (error && fp) {
4231 /* Restore allocation fork. */
4232 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4233 VTOC(vp)->c_blocks = fp->ff_blocks;
4234
4235 }
4236
4237 HFS_MOUNT_LOCK(hfsmp, TRUE);
4238 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4239 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4240 if (lockflags) {
4241 hfs_systemfile_unlock(hfsmp, lockflags);
4242 }
4243 if (transaction_begun) {
4244 hfs_end_transaction(hfsmp);
4245 }
4246
4247 return MacToVFSError(error);
4248 }
4249
4250 #define HFS_MIN_SIZE (32LL * 1024LL * 1024LL)
4251
4252 /*
4253 * Truncate a file system (while still mounted).
4254 */
4255 int
4256 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4257 {
4258 struct buf *bp = NULL;
4259 u_int64_t oldsize;
4260 u_int32_t newblkcnt;
4261 u_int32_t reclaimblks = 0;
4262 int lockflags = 0;
4263 int transaction_begun = 0;
4264 Boolean updateFreeBlocks = false;
4265 Boolean disable_sparse = false;
4266 int error = 0;
4267
4268 lck_mtx_lock(&hfsmp->hfs_mutex);
4269 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4270 lck_mtx_unlock(&hfsmp->hfs_mutex);
4271 return (EALREADY);
4272 }
4273 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4274 hfsmp->hfs_resize_blocksmoved = 0;
4275 hfsmp->hfs_resize_totalblocks = 0;
4276 hfsmp->hfs_resize_progress = 0;
4277 lck_mtx_unlock(&hfsmp->hfs_mutex);
4278
4279 /*
4280 * - Journaled HFS Plus volumes only.
4281 * - No embedded volumes.
4282 */
4283 if ((hfsmp->jnl == NULL) ||
4284 (hfsmp->hfsPlusIOPosOffset != 0)) {
4285 error = EPERM;
4286 goto out;
4287 }
4288 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4289 newblkcnt = newsize / hfsmp->blockSize;
4290 reclaimblks = hfsmp->totalBlocks - newblkcnt;
4291
4292 if (hfs_resize_debug) {
4293 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4294 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4295 }
4296
4297 /* Make sure new size is valid. */
4298 if ((newsize < HFS_MIN_SIZE) ||
4299 (newsize >= oldsize) ||
4300 (newsize % hfsmp->hfs_logical_block_size) ||
4301 (newsize % hfsmp->hfs_physical_block_size)) {
4302 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4303 error = EINVAL;
4304 goto out;
4305 }
4306
4307 /*
4308 * Make sure that the file system has enough free blocks reclaim.
4309 *
4310 * Before resize, the disk is divided into four zones -
4311 * A. Allocated_Stationary - These are allocated blocks that exist
4312 * before the new end of disk. These blocks will not be
4313 * relocated or modified during resize.
4314 * B. Free_Stationary - These are free blocks that exist before the
4315 * new end of disk. These blocks can be used for any new
4316 * allocations during resize, including allocation for relocating
4317 * data from the area of disk being reclaimed.
4318 * C. Allocated_To-Reclaim - These are allocated blocks that exist
4319 * beyond the new end of disk. These blocks need to be reclaimed
4320 * during resize by allocating equal number of blocks in Free
4321 * Stationary zone and copying the data.
4322 * D. Free_To-Reclaim - These are free blocks that exist beyond the
4323 * new end of disk. Nothing special needs to be done to reclaim
4324 * them.
4325 *
4326 * Total number of blocks on the disk before resize:
4327 * ------------------------------------------------
4328 * Total Blocks = Allocated_Stationary + Free_Stationary +
4329 * Allocated_To-Reclaim + Free_To-Reclaim
4330 *
4331 * Total number of blocks that need to be reclaimed:
4332 * ------------------------------------------------
4333 * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4334 *
4335 * Note that the check below also makes sure that we have enough space
4336 * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4337 * Therefore we do not need to check total number of blocks to relocate
4338 * later in the code.
4339 *
4340 * The condition below gets converted to:
4341 *
4342 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4343 *
4344 * which is equivalent to:
4345 *
4346 * Allocated To-Reclaim >= Free Stationary
4347 */
4348 if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4349 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4350 error = ENOSPC;
4351 goto out;
4352 }
4353
4354 /* Start with a clean journal. */
4355 hfs_journal_flush(hfsmp, TRUE);
4356
4357 if (hfs_start_transaction(hfsmp) != 0) {
4358 error = EINVAL;
4359 goto out;
4360 }
4361 transaction_begun = 1;
4362
4363 /* Take the bitmap lock to update the alloc limit field */
4364 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4365
4366 /*
4367 * Prevent new allocations from using the part we're trying to truncate.
4368 *
4369 * NOTE: allocLimit is set to the allocation block number where the new
4370 * alternate volume header will be. That way there will be no files to
4371 * interfere with allocating the new alternate volume header, and no files
4372 * in the allocation blocks beyond (i.e. the blocks we're trying to
4373 * truncate away.
4374 *
4375 * Also shrink the red-black tree if needed.
4376 */
4377 if (hfsmp->blockSize == 512) {
4378 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4379 }
4380 else {
4381 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4382 }
4383
4384 /* Sparse devices use first fit allocation which is not ideal
4385 * for volume resize which requires best fit allocation. If a
4386 * sparse device is being truncated, disable the sparse device
4387 * property temporarily for the duration of resize. Also reset
4388 * the free extent cache so that it is rebuilt as sorted by
4389 * totalBlocks instead of startBlock.
4390 *
4391 * Note that this will affect all allocations on the volume and
4392 * ideal fix would be just to modify resize-related allocations,
4393 * but it will result in complexity like handling of two free
4394 * extent caches sorted differently, etc. So we stick to this
4395 * solution for now.
4396 */
4397 HFS_MOUNT_LOCK(hfsmp, TRUE);
4398 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4399 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4400 ResetVCBFreeExtCache(hfsmp);
4401 disable_sparse = true;
4402 }
4403
4404 /*
4405 * Update the volume free block count to reflect the total number
4406 * of free blocks that will exist after a successful resize.
4407 * Relocation of extents will result in no net change in the total
4408 * free space on the disk. Therefore the code that allocates
4409 * space for new extent and deallocates the old extent explicitly
4410 * prevents updating the volume free block count. It will also
4411 * prevent false disk full error when the number of blocks in
4412 * an extent being relocated is more than the free blocks that
4413 * will exist after the volume is resized.
4414 */
4415 hfsmp->freeBlocks -= reclaimblks;
4416 updateFreeBlocks = true;
4417 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4418
4419 if (lockflags) {
4420 hfs_systemfile_unlock(hfsmp, lockflags);
4421 lockflags = 0;
4422 }
4423
4424 /*
4425 * Update the metadata zone size to match the new volume size,
4426 * and if it too less, metadata zone might be disabled.
4427 */
4428 hfs_metadatazone_init(hfsmp, false);
4429
4430 /*
4431 * If some files have blocks at or beyond the location of the
4432 * new alternate volume header, recalculate free blocks and
4433 * reclaim blocks. Otherwise just update free blocks count.
4434 *
4435 * The current allocLimit is set to the location of new alternate
4436 * volume header, and reclaimblks are the total number of blocks
4437 * that need to be reclaimed. So the check below is really
4438 * ignoring the blocks allocated for old alternate volume header.
4439 */
4440 if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4441 /*
4442 * hfs_reclaimspace will use separate transactions when
4443 * relocating files (so we don't overwhelm the journal).
4444 */
4445 hfs_end_transaction(hfsmp);
4446 transaction_begun = 0;
4447
4448 /* Attempt to reclaim some space. */
4449 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4450 if (error != 0) {
4451 printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4452 error = ENOSPC;
4453 goto out;
4454 }
4455 if (hfs_start_transaction(hfsmp) != 0) {
4456 error = EINVAL;
4457 goto out;
4458 }
4459 transaction_begun = 1;
4460
4461 /* Check if we're clear now. */
4462 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4463 if (error != 0) {
4464 printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4465 error = EAGAIN; /* tell client to try again */
4466 goto out;
4467 }
4468 }
4469
4470 /*
4471 * Note: we take the attributes lock in case we have an attribute data vnode
4472 * which needs to change size.
4473 */
4474 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4475
4476 /*
4477 * Allocate last 1KB for alternate volume header.
4478 */
4479 error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4480 if (error) {
4481 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4482 goto out;
4483 }
4484
4485 /*
4486 * Mark the old alternate volume header as free.
4487 * We don't bother shrinking allocation bitmap file.
4488 */
4489 if (hfsmp->blockSize == 512)
4490 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4491 else
4492 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4493
4494 /*
4495 * Invalidate the existing alternate volume header.
4496 *
4497 * Don't include this in a transaction (don't call journal_modify_block)
4498 * since this block will be outside of the truncated file system!
4499 */
4500 if (hfsmp->hfs_alt_id_sector) {
4501 error = buf_meta_bread(hfsmp->hfs_devvp,
4502 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4503 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4504 if (error == 0) {
4505 bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4506 (void) VNOP_BWRITE(bp);
4507 } else {
4508 if (bp) {
4509 buf_brelse(bp);
4510 }
4511 }
4512 bp = NULL;
4513 }
4514
4515 /* Log successful shrinking. */
4516 printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4517 hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4518
4519 /*
4520 * Adjust file system variables and flush them to disk.
4521 */
4522 hfsmp->totalBlocks = newblkcnt;
4523 hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4524 hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4525 MarkVCBDirty(hfsmp);
4526 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4527 if (error)
4528 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4529
4530 /*
4531 * Adjust the size of hfsmp->hfs_attrdata_vp
4532 */
4533 if (hfsmp->hfs_attrdata_vp) {
4534 struct cnode *cp;
4535 struct filefork *fp;
4536
4537 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4538 cp = VTOC(hfsmp->hfs_attrdata_vp);
4539 fp = VTOF(hfsmp->hfs_attrdata_vp);
4540
4541 cp->c_blocks = newblkcnt;
4542 fp->ff_blocks = newblkcnt;
4543 fp->ff_extents[0].blockCount = newblkcnt;
4544 fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4545 ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4546 vnode_put(hfsmp->hfs_attrdata_vp);
4547 }
4548 }
4549
4550 out:
4551 /*
4552 * Update the allocLimit to acknowledge the last one or two blocks now.
4553 * Add it to the tree as well if necessary.
4554 */
4555 UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4556
4557 HFS_MOUNT_LOCK(hfsmp, TRUE);
4558 if (disable_sparse == true) {
4559 /* Now that resize is completed, set the volume to be sparse
4560 * device again so that all further allocations will be first
4561 * fit instead of best fit. Reset free extent cache so that
4562 * it is rebuilt.
4563 */
4564 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4565 ResetVCBFreeExtCache(hfsmp);
4566 }
4567
4568 if (error && (updateFreeBlocks == true)) {
4569 hfsmp->freeBlocks += reclaimblks;
4570 }
4571
4572 if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4573 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4574 }
4575 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4576 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4577
4578 /* On error, reset the metadata zone for original volume size */
4579 if (error && (updateFreeBlocks == true)) {
4580 hfs_metadatazone_init(hfsmp, false);
4581 }
4582
4583 if (lockflags) {
4584 hfs_systemfile_unlock(hfsmp, lockflags);
4585 }
4586 if (transaction_begun) {
4587 hfs_end_transaction(hfsmp);
4588 hfs_journal_flush(hfsmp, FALSE);
4589 /* Just to be sure, sync all data to the disk */
4590 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4591 }
4592
4593 return MacToVFSError(error);
4594 }
4595
4596
4597 /*
4598 * Invalidate the physical block numbers associated with buffer cache blocks
4599 * in the given extent of the given vnode.
4600 */
4601 struct hfs_inval_blk_no {
4602 daddr64_t sectorStart;
4603 daddr64_t sectorCount;
4604 };
4605 static int
4606 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4607 {
4608 daddr64_t blkno;
4609 struct hfs_inval_blk_no *args;
4610
4611 blkno = buf_blkno(bp);
4612 args = args_in;
4613
4614 if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4615 buf_setblkno(bp, buf_lblkno(bp));
4616
4617 return BUF_RETURNED;
4618 }
4619 static void
4620 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4621 {
4622 struct hfs_inval_blk_no args;
4623 args.sectorStart = sectorStart;
4624 args.sectorCount = sectorCount;
4625
4626 buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4627 }
4628
4629
4630 /*
4631 * Copy the contents of an extent to a new location. Also invalidates the
4632 * physical block number of any buffer cache block in the copied extent
4633 * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4634 * determine the new physical block number).
4635 */
4636 static int
4637 hfs_copy_extent(
4638 struct hfsmount *hfsmp,
4639 struct vnode *vp, /* The file whose extent is being copied. */
4640 u_int32_t oldStart, /* The start of the source extent. */
4641 u_int32_t newStart, /* The start of the destination extent. */
4642 u_int32_t blockCount, /* The number of allocation blocks to copy. */
4643 vfs_context_t context)
4644 {
4645 int err = 0;
4646 size_t bufferSize;
4647 void *buffer = NULL;
4648 struct vfsioattr ioattr;
4649 buf_t bp = NULL;
4650 off_t resid;
4651 size_t ioSize;
4652 u_int32_t ioSizeSectors; /* Device sectors in this I/O */
4653 daddr64_t srcSector, destSector;
4654 u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4655 #if CONFIG_PROTECT
4656 int cpenabled = 0;
4657 #endif
4658
4659 /*
4660 * Sanity check that we have locked the vnode of the file we're copying.
4661 *
4662 * But since hfs_systemfile_lock() doesn't actually take the lock on
4663 * the allocation file if a journal is active, ignore the check if the
4664 * file being copied is the allocation file.
4665 */
4666 struct cnode *cp = VTOC(vp);
4667 if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4668 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4669
4670 #if CONFIG_PROTECT
4671 /* Prepare the CP blob and get it ready for use */
4672 if (!vnode_issystem (vp) && vnode_isreg(vp) &&
4673 cp_fs_protected (hfsmp->hfs_mp)) {
4674 int cp_err = 0;
4675 cp_err = cp_handle_relocate (cp);
4676 if (cp_err) {
4677 /*
4678 * can't copy the file because we couldn't set up keys.
4679 * bail out
4680 */
4681 return cp_err;
4682 }
4683 else {
4684 cpenabled = 1;
4685 }
4686 }
4687 #endif
4688
4689 /*
4690 * Determine the I/O size to use
4691 *
4692 * NOTE: Many external drives will result in an ioSize of 128KB.
4693 * TODO: Should we use a larger buffer, doing several consecutive
4694 * reads, then several consecutive writes?
4695 */
4696 vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4697 bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4698 if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4699 return ENOMEM;
4700
4701 /* Get a buffer for doing the I/O */
4702 bp = buf_alloc(hfsmp->hfs_devvp);
4703 buf_setdataptr(bp, (uintptr_t)buffer);
4704
4705 resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4706 srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4707 destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4708 while (resid > 0) {
4709 ioSize = MIN(bufferSize, (size_t) resid);
4710 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4711
4712 /* Prepare the buffer for reading */
4713 buf_reset(bp, B_READ);
4714 buf_setsize(bp, ioSize);
4715 buf_setcount(bp, ioSize);
4716 buf_setblkno(bp, srcSector);
4717 buf_setlblkno(bp, srcSector);
4718
4719 /* Attach the CP to the buffer */
4720 #if CONFIG_PROTECT
4721 if (cpenabled) {
4722 buf_setcpaddr (bp, cp->c_cpentry);
4723 }
4724 #endif
4725
4726 /* Do the read */
4727 err = VNOP_STRATEGY(bp);
4728 if (!err)
4729 err = buf_biowait(bp);
4730 if (err) {
4731 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4732 break;
4733 }
4734
4735 /* Prepare the buffer for writing */
4736 buf_reset(bp, B_WRITE);
4737 buf_setsize(bp, ioSize);
4738 buf_setcount(bp, ioSize);
4739 buf_setblkno(bp, destSector);
4740 buf_setlblkno(bp, destSector);
4741 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4742 buf_markfua(bp);
4743
4744 #if CONFIG_PROTECT
4745 /* Attach the CP to the buffer */
4746 if (cpenabled) {
4747 buf_setcpaddr (bp, cp->c_cpentry);
4748 }
4749 #endif
4750
4751 /* Do the write */
4752 vnode_startwrite(hfsmp->hfs_devvp);
4753 err = VNOP_STRATEGY(bp);
4754 if (!err)
4755 err = buf_biowait(bp);
4756 if (err) {
4757 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4758 break;
4759 }
4760
4761 resid -= ioSize;
4762 srcSector += ioSizeSectors;
4763 destSector += ioSizeSectors;
4764 }
4765 if (bp)
4766 buf_free(bp);
4767 if (buffer)
4768 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4769
4770 /* Make sure all writes have been flushed to disk. */
4771 if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4772 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4773 if (err) {
4774 printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
4775 err = 0; /* Don't fail the copy. */
4776 }
4777 }
4778
4779 if (!err)
4780 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
4781
4782 return err;
4783 }
4784
4785
4786 /* Structure to store state of reclaiming extents from a
4787 * given file. hfs_reclaim_file()/hfs_reclaim_xattr()
4788 * initializes the values in this structure which are then
4789 * used by code that reclaims and splits the extents.
4790 */
4791 struct hfs_reclaim_extent_info {
4792 struct vnode *vp;
4793 u_int32_t fileID;
4794 u_int8_t forkType;
4795 u_int8_t is_dirlink; /* Extent belongs to directory hard link */
4796 u_int8_t is_sysfile; /* Extent belongs to system file */
4797 u_int8_t is_xattr; /* Extent belongs to extent-based xattr */
4798 u_int8_t extent_index;
4799 int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */
4800 u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */
4801 u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */
4802 u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */
4803 struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */
4804 union record {
4805 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
4806 HFSPlusAttrRecord xattr; /* Attribute record for large EAs */
4807 } record;
4808 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed.
4809 * For catalog extent record, points to the correct
4810 * extent information in filefork. For overflow extent
4811 * record, or xattr record, points to extent record
4812 * in the structure above
4813 */
4814 struct cat_desc *dirlink_desc;
4815 struct cat_attr *dirlink_attr;
4816 struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */
4817 struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr()
4818 * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
4819 * use it for writing updated extent record
4820 */
4821 struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */
4822 u_int16_t recordlen;
4823 int overflow_count; /* For debugging, counter for overflow extent record */
4824 FCB *fcb; /* Pointer to the current btree being traversed */
4825 };
4826
4827 /*
4828 * Split the current extent into two extents, with first extent
4829 * to contain given number of allocation blocks. Splitting of
4830 * extent creates one new extent entry which can result in
4831 * shifting of many entries through all the extent records of a
4832 * file, and/or creating a new extent record in the overflow
4833 * extent btree.
4834 *
4835 * Example:
4836 * The diagram below represents two consecutive extent records,
4837 * for simplicity, lets call them record X and X+1 respectively.
4838 * Interesting extent entries have been denoted by letters.
4839 * If the letter is unchanged before and after split, it means
4840 * that the extent entry was not modified during the split.
4841 * A '.' means that the entry remains unchanged after the split
4842 * and is not relevant for our example. A '0' means that the
4843 * extent entry is empty.
4844 *
4845 * If there isn't sufficient contiguous free space to relocate
4846 * an extent (extent "C" below), we will have to break the one
4847 * extent into multiple smaller extents, and relocate each of
4848 * the smaller extents individually. The way we do this is by
4849 * finding the largest contiguous free space that is currently
4850 * available (N allocation blocks), and then convert extent "C"
4851 * into two extents, C1 and C2, that occupy exactly the same
4852 * allocation blocks as extent C. Extent C1 is the first
4853 * N allocation blocks of extent C, and extent C2 is the remainder
4854 * of extent C. Then we can relocate extent C1 since we know
4855 * we have enough contiguous free space to relocate it in its
4856 * entirety. We then repeat the process starting with extent C2.
4857 *
4858 * In record X, only the entries following entry C are shifted, and
4859 * the original entry C is replaced with two entries C1 and C2 which
4860 * are actually two extent entries for contiguous allocation blocks.
4861 *
4862 * Note that the entry E from record X is shifted into record X+1 as
4863 * the new first entry. Since the first entry of record X+1 is updated,
4864 * the FABN will also get updated with the blockCount of entry E.
4865 * This also results in shifting of all extent entries in record X+1.
4866 * Note that the number of empty entries after the split has been
4867 * changed from 3 to 2.
4868 *
4869 * Before:
4870 * record X record X+1
4871 * ---------------------===--------- ---------------------------------
4872 * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 |
4873 * ---------------------===--------- ---------------------------------
4874 *
4875 * After:
4876 * ---------------------=======----- ---------------------------------
4877 * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 |
4878 * ---------------------=======----- ---------------------------------
4879 *
4880 * C1.startBlock = C.startBlock
4881 * C1.blockCount = N
4882 *
4883 * C2.startBlock = C.startBlock + N
4884 * C2.blockCount = C.blockCount - N
4885 *
4886 * FABN = old FABN - E.blockCount
4887 *
4888 * Inputs:
4889 * extent_info - This is the structure that contains state about
4890 * the current file, extent, and extent record that
4891 * is being relocated. This structure is shared
4892 * among code that traverses through all the extents
4893 * of the file, code that relocates extents, and
4894 * code that splits the extent.
4895 * Output:
4896 * Zero on success, non-zero on failure.
4897 */
4898 static int
4899 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
4900 {
4901 int error = 0;
4902 int index = extent_info->extent_index;
4903 int i;
4904 HFSPlusExtentDescriptor shift_extent;
4905 HFSPlusExtentDescriptor last_extent;
4906 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
4907 HFSPlusExtentRecord *extents_rec = NULL;
4908 HFSPlusExtentKey *extents_key = NULL;
4909 HFSPlusAttrRecord *xattr_rec = NULL;
4910 HFSPlusAttrKey *xattr_key = NULL;
4911 struct BTreeIterator iterator;
4912 struct FSBufferDescriptor btdata;
4913 uint16_t reclen;
4914 uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */
4915 uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */
4916 Boolean create_record = false;
4917 Boolean is_xattr;
4918
4919 is_xattr = extent_info->is_xattr;
4920 extents = extent_info->extents;
4921
4922 if (hfs_resize_debug) {
4923 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
4924 }
4925
4926 /* Determine the starting allocation block number for the following
4927 * overflow extent record, if any, before the current record
4928 * gets modified.
4929 */
4930 read_recStartBlock = extent_info->recStartBlock;
4931 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4932 if (extents[i].blockCount == 0) {
4933 break;
4934 }
4935 read_recStartBlock += extents[i].blockCount;
4936 }
4937
4938 /* Shift and split */
4939 if (index == kHFSPlusExtentDensity-1) {
4940 /* The new extent created after split will go into following overflow extent record */
4941 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
4942 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
4943
4944 /* Last extent in the record will be split, so nothing to shift */
4945 } else {
4946 /* Splitting of extents can result in at most of one
4947 * extent entry to be shifted into following overflow extent
4948 * record. So, store the last extent entry for later.
4949 */
4950 shift_extent = extents[kHFSPlusExtentDensity-1];
4951
4952 /* Start shifting extent information from the end of the extent
4953 * record to the index where we want to insert the new extent.
4954 * Note that kHFSPlusExtentDensity-1 is already saved above, and
4955 * does not need to be shifted. The extent entry that is being
4956 * split does not get shifted.
4957 */
4958 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
4959 if (hfs_resize_debug) {
4960 if (extents[i].blockCount) {
4961 printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
4962 }
4963 }
4964 extents[i+1] = extents[i];
4965 }
4966 }
4967
4968 if (index == kHFSPlusExtentDensity-1) {
4969 /* The second half of the extent being split will be the overflow
4970 * entry that will go into following overflow extent record. The
4971 * value has been stored in 'shift_extent' above, so there is
4972 * nothing to be done here.
4973 */
4974 } else {
4975 /* Update the values in the second half of the extent being split
4976 * before updating the first half of the split. Note that the
4977 * extent to split or first half of the split is at index 'index'
4978 * and a new extent or second half of the split will be inserted at
4979 * 'index+1' or into following overflow extent record.
4980 */
4981 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
4982 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
4983 }
4984 /* Update the extent being split, only the block count will change */
4985 extents[index].blockCount = newBlockCount;
4986
4987 if (hfs_resize_debug) {
4988 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
4989 if (index != kHFSPlusExtentDensity-1) {
4990 printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
4991 } else {
4992 printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
4993 }
4994 }
4995
4996 /* If the newly split extent is for large EAs or in overflow extent
4997 * record, so update it directly in the btree using the iterator
4998 * information from the shared extent_info structure
4999 */
5000 if (extent_info->catalog_fp == NULL) {
5001 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5002 &(extent_info->btdata), extent_info->recordlen);
5003 if (error) {
5004 printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5005 goto out;
5006 }
5007 }
5008
5009 /* No extent entry to be shifted into another extent overflow record */
5010 if (shift_extent.blockCount == 0) {
5011 if (hfs_resize_debug) {
5012 printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5013 }
5014 error = 0;
5015 goto out;
5016 }
5017
5018 /* The overflow extent entry has to be shifted into an extent
5019 * overflow record. This would mean that we have to shift
5020 * extent entries from all overflow records by one. We will
5021 * start iteration from the first record to the last record,
5022 * and shift the extent entry from one record to another.
5023 * We might have to create a new record for the last extent
5024 * entry for the file.
5025 */
5026
5027 /* Initialize iterator to search the next record */
5028 bzero(&iterator, sizeof(iterator));
5029 if (is_xattr) {
5030 /* Copy the key from the iterator that was to update the modified attribute record. */
5031 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5032 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5033 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5034
5035 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5036 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5037 if (xattr_rec == NULL) {
5038 error = ENOMEM;
5039 goto out;
5040 }
5041 btdata.bufferAddress = xattr_rec;
5042 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5043 btdata.itemCount = 1;
5044 extents = xattr_rec->overflowExtents.extents;
5045 } else {
5046 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5047 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5048 extents_key->forkType = extent_info->forkType;
5049 extents_key->fileID = extent_info->fileID;
5050 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5051
5052 MALLOC(extents_rec, HFSPlusExtentRecord *,
5053 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5054 if (extents_rec == NULL) {
5055 error = ENOMEM;
5056 goto out;
5057 }
5058 btdata.bufferAddress = extents_rec;
5059 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5060 btdata.itemCount = 1;
5061 extents = extents_rec[0];
5062 }
5063
5064 /* An extent entry still needs to be shifted into following overflow
5065 * extent record. This will result in the starting allocation block
5066 * number of the extent record being changed which is part of the key
5067 * for the extent record. Since the extent record key is changing,
5068 * the record can not be updated, instead has to be deleted and
5069 * inserted again.
5070 */
5071 while (shift_extent.blockCount) {
5072 if (hfs_resize_debug) {
5073 printf ("hfs_split_extent: Will shift (%u,%u) into record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5074 }
5075
5076 /* Search if there is any existing overflow extent record.
5077 * For this, the logical start block number in the key is
5078 * the value calculated based on the logical start block
5079 * number of the current extent record and the total number
5080 * of blocks existing in the current extent record.
5081 */
5082 if (is_xattr) {
5083 xattr_key->startBlock = read_recStartBlock;
5084 } else {
5085 extents_key->startBlock = read_recStartBlock;
5086 }
5087 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5088 if (error) {
5089 if (error != btNotFound) {
5090 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5091 goto out;
5092 }
5093 create_record = true;
5094 }
5095
5096 /* The extra extent entry from the previous record is being inserted
5097 * as the first entry in the current extent record. This will change
5098 * the file allocation block number (FABN) of the current extent
5099 * record, which is the startBlock value from the extent record key.
5100 * Since one extra entry is being inserted in the record, the new
5101 * FABN for the record will less than old FABN by the number of blocks
5102 * in the new extent entry being inserted at the start. We have to
5103 * do this before we update read_recStartBlock to point at the
5104 * startBlock of the following record.
5105 */
5106 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5107 if (hfs_resize_debug) {
5108 if (create_record) {
5109 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5110 }
5111 }
5112
5113 /* Now update the read_recStartBlock to account for total number
5114 * of blocks in this extent record. It will now point to the
5115 * starting allocation block number for the next extent record.
5116 */
5117 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5118 if (extents[i].blockCount == 0) {
5119 break;
5120 }
5121 read_recStartBlock += extents[i].blockCount;
5122 }
5123
5124 if (create_record == true) {
5125 /* Initialize new record content with only one extent entry */
5126 bzero(extents, sizeof(HFSPlusExtentRecord));
5127 /* The new record will contain only one extent entry */
5128 extents[0] = shift_extent;
5129 /* There are no more overflow extents to be shifted */
5130 shift_extent.startBlock = shift_extent.blockCount = 0;
5131
5132 if (is_xattr) {
5133 xattr_rec->recordType = kHFSPlusAttrExtents;
5134 xattr_rec->overflowExtents.reserved = 0;
5135 reclen = sizeof(HFSPlusAttrExtents);
5136 } else {
5137 reclen = sizeof(HFSPlusExtentRecord);
5138 }
5139 } else {
5140 /* The overflow extent entry from previous record will be
5141 * the first entry in this extent record. If the last
5142 * extent entry in this record is valid, it will be shifted
5143 * into the following extent record as its first entry. So
5144 * save the last entry before shifting entries in current
5145 * record.
5146 */
5147 last_extent = extents[kHFSPlusExtentDensity-1];
5148
5149 /* Shift all entries by one index towards the end */
5150 for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5151 extents[i+1] = extents[i];
5152 }
5153
5154 /* Overflow extent entry saved from previous record
5155 * is now the first entry in the current record.
5156 */
5157 extents[0] = shift_extent;
5158
5159 if (hfs_resize_debug) {
5160 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5161 }
5162
5163 /* The last entry from current record will be the
5164 * overflow entry which will be the first entry for
5165 * the following extent record.
5166 */
5167 shift_extent = last_extent;
5168
5169 /* Since the key->startBlock is being changed for this record,
5170 * it should be deleted and inserted with the new key.
5171 */
5172 error = BTDeleteRecord(extent_info->fcb, &iterator);
5173 if (error) {
5174 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5175 goto out;
5176 }
5177 if (hfs_resize_debug) {
5178 printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5179 }
5180 }
5181
5182 /* Insert the newly created or modified extent record */
5183 bzero(&iterator.hint, sizeof(iterator.hint));
5184 if (is_xattr) {
5185 xattr_key->startBlock = write_recStartBlock;
5186 } else {
5187 extents_key->startBlock = write_recStartBlock;
5188 }
5189 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5190 if (error) {
5191 printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5192 goto out;
5193 }
5194 if (hfs_resize_debug) {
5195 printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5196 }
5197 }
5198 BTFlushPath(extent_info->fcb);
5199 out:
5200 if (extents_rec) {
5201 FREE (extents_rec, M_TEMP);
5202 }
5203 if (xattr_rec) {
5204 FREE (xattr_rec, M_TEMP);
5205 }
5206 return error;
5207 }
5208
5209
5210 /*
5211 * Relocate an extent if it lies beyond the expected end of volume.
5212 *
5213 * This function is called for every extent of the file being relocated.
5214 * It allocates space for relocation, copies the data, deallocates
5215 * the old extent, and update corresponding on-disk extent. If the function
5216 * does not find contiguous space to relocate an extent, it splits the
5217 * extent in smaller size to be able to relocate it out of the area of
5218 * disk being reclaimed. As an optimization, if an extent lies partially
5219 * in the area of the disk being reclaimed, it is split so that we only
5220 * have to relocate the area that was overlapping with the area of disk
5221 * being reclaimed.
5222 *
5223 * Note that every extent is relocated in its own transaction so that
5224 * they do not overwhelm the journal. This function handles the extent
5225 * record that exists in the catalog record, extent record from overflow
5226 * extents btree, and extents for large EAs.
5227 *
5228 * Inputs:
5229 * extent_info - This is the structure that contains state about
5230 * the current file, extent, and extent record that
5231 * is being relocated. This structure is shared
5232 * among code that traverses through all the extents
5233 * of the file, code that relocates extents, and
5234 * code that splits the extent.
5235 */
5236 static int
5237 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5238 {
5239 int error = 0;
5240 int index;
5241 struct cnode *cp;
5242 u_int32_t oldStartBlock;
5243 u_int32_t oldBlockCount;
5244 u_int32_t newStartBlock;
5245 u_int32_t newBlockCount;
5246 u_int32_t alloc_flags;
5247 int blocks_allocated = false;
5248
5249 index = extent_info->extent_index;
5250 cp = VTOC(extent_info->vp);
5251
5252 oldStartBlock = extent_info->extents[index].startBlock;
5253 oldBlockCount = extent_info->extents[index].blockCount;
5254
5255 if (0 && hfs_resize_debug) {
5256 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5257 }
5258
5259 /* Check if the current extent lies completely within allocLimit */
5260 if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5261 extent_info->cur_blockCount += oldBlockCount;
5262 return error;
5263 }
5264
5265 /* Every extent should be relocated in its own transaction
5266 * to make sure that we don't overflow the journal buffer.
5267 */
5268 error = hfs_start_transaction(hfsmp);
5269 if (error) {
5270 return error;
5271 }
5272 extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5273
5274 /* Check if the extent lies partially in the area to reclaim,
5275 * i.e. it starts before allocLimit and ends beyond allocLimit.
5276 * We have already skipped extents that lie completely within
5277 * allocLimit in the check above, so we only check for the
5278 * startBlock. If it lies partially, split it so that we
5279 * only relocate part of the extent.
5280 */
5281 if (oldStartBlock < allocLimit) {
5282 newBlockCount = allocLimit - oldStartBlock;
5283 error = hfs_split_extent(extent_info, newBlockCount);
5284 if (error == 0) {
5285 /* After successful split, the current extent does not
5286 * need relocation, so just return back.
5287 */
5288 goto out;
5289 }
5290 /* Ignore error and try relocating the entire extent instead */
5291 }
5292
5293 alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5294 if (extent_info->is_sysfile) {
5295 alloc_flags |= HFS_ALLOC_METAZONE;
5296 }
5297
5298 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5299 &newStartBlock, &newBlockCount);
5300 if ((extent_info->is_sysfile == false) &&
5301 ((error == dskFulErr) || (error == ENOSPC))) {
5302 /* For non-system files, try reallocating space in metadata zone */
5303 alloc_flags |= HFS_ALLOC_METAZONE;
5304 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5305 alloc_flags, &newStartBlock, &newBlockCount);
5306 }
5307 if ((error == dskFulErr) || (error == ENOSPC)) {
5308 /* We did not find desired contiguous space for this extent.
5309 * So try to allocate the maximum contiguous space available.
5310 */
5311 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5312
5313 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5314 alloc_flags, &newStartBlock, &newBlockCount);
5315 if (error) {
5316 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5317 goto out;
5318 }
5319 blocks_allocated = true;
5320
5321 error = hfs_split_extent(extent_info, newBlockCount);
5322 if (error) {
5323 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5324 goto out;
5325 }
5326 oldBlockCount = newBlockCount;
5327 }
5328 if (error) {
5329 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5330 goto out;
5331 }
5332 blocks_allocated = true;
5333
5334 /* Copy data from old location to new location */
5335 error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5336 newStartBlock, newBlockCount, context);
5337 if (error) {
5338 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5339 goto out;
5340 }
5341
5342 /* Update the extent record with the new start block information */
5343 extent_info->extents[index].startBlock = newStartBlock;
5344
5345 /* Sync the content back to the disk */
5346 if (extent_info->catalog_fp) {
5347 /* Update the extents in catalog record */
5348 if (extent_info->is_dirlink) {
5349 error = cat_update_dirlink(hfsmp, extent_info->forkType,
5350 extent_info->dirlink_desc, extent_info->dirlink_attr,
5351 &(extent_info->dirlink_fork->ff_data));
5352 } else {
5353 cp->c_flag |= C_MODIFIED;
5354 /* If this is a system file, sync volume headers on disk */
5355 if (extent_info->is_sysfile) {
5356 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5357 }
5358 }
5359 } else {
5360 /* Replace record for extents overflow or extents-based xattrs */
5361 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5362 &(extent_info->btdata), extent_info->recordlen);
5363 }
5364 if (error) {
5365 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5366 goto out;
5367 }
5368
5369 /* Deallocate the old extent */
5370 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5371 if (error) {
5372 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5373 goto out;
5374 }
5375 extent_info->blocks_relocated += newBlockCount;
5376
5377 if (hfs_resize_debug) {
5378 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5379 }
5380
5381 out:
5382 if (error != 0) {
5383 if (blocks_allocated == true) {
5384 BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5385 }
5386 } else {
5387 /* On success, increment the total allocation blocks processed */
5388 extent_info->cur_blockCount += newBlockCount;
5389 }
5390
5391 hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5392
5393 /* For a non-system file, if an extent entry from catalog record
5394 * was modified, sync the in-memory changes to the catalog record
5395 * on disk before ending the transaction.
5396 */
5397 if ((error == 0) &&
5398 (extent_info->overflow_count < kHFSPlusExtentDensity) &&
5399 (extent_info->is_sysfile == false)) {
5400 (void) hfs_update(extent_info->vp, MNT_WAIT);
5401 }
5402
5403 hfs_end_transaction(hfsmp);
5404
5405 return error;
5406 }
5407
5408 /* Report intermediate progress during volume resize */
5409 static void
5410 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5411 {
5412 u_int32_t cur_progress;
5413
5414 hfs_resize_progress(hfsmp, &cur_progress);
5415 if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5416 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5417 hfsmp->hfs_resize_progress = cur_progress;
5418 }
5419 return;
5420 }
5421
5422 /*
5423 * Reclaim space at the end of a volume for given file and forktype.
5424 *
5425 * This routine attempts to move any extent which contains allocation blocks
5426 * at or after "allocLimit." A separate transaction is used for every extent
5427 * that needs to be moved. If there is not contiguous space available for
5428 * moving an extent, it can be split into smaller extents. The contents of
5429 * any moved extents are read and written via the volume's device vnode --
5430 * NOT via "vp." During the move, moved blocks which are part of a transaction
5431 * have their physical block numbers invalidated so they will eventually be
5432 * written to their new locations.
5433 *
5434 * This function is also called for directory hard links. Directory hard links
5435 * are regular files with no data fork and resource fork that contains alias
5436 * information for backward compatibility with pre-Leopard systems. However
5437 * non-Mac OS X implementation can add/modify data fork or resource fork
5438 * information to directory hard links, so we check, and if required, relocate
5439 * both data fork and resource fork.
5440 *
5441 * Inputs:
5442 * hfsmp The volume being resized.
5443 * vp The vnode for the system file.
5444 * fileID ID of the catalog record that needs to be relocated
5445 * forktype The type of fork that needs relocated,
5446 * kHFSResourceForkType for resource fork,
5447 * kHFSDataForkType for data fork
5448 * allocLimit Allocation limit for the new volume size,
5449 * do not use this block or beyond. All extents
5450 * that use this block or any blocks beyond this limit
5451 * will be relocated.
5452 *
5453 * Side Effects:
5454 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5455 * blocks that were relocated.
5456 */
5457 static int
5458 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5459 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5460 {
5461 int error = 0;
5462 struct hfs_reclaim_extent_info *extent_info;
5463 int i;
5464 int lockflags = 0;
5465 struct cnode *cp;
5466 struct filefork *fp;
5467 int took_truncate_lock = false;
5468 int release_desc = false;
5469 HFSPlusExtentKey *key;
5470
5471 /* If there is no vnode for this file, then there's nothing to do. */
5472 if (vp == NULL) {
5473 return 0;
5474 }
5475
5476 cp = VTOC(vp);
5477
5478 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5479 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5480 if (extent_info == NULL) {
5481 return ENOMEM;
5482 }
5483 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5484 extent_info->vp = vp;
5485 extent_info->fileID = fileID;
5486 extent_info->forkType = forktype;
5487 extent_info->is_sysfile = vnode_issystem(vp);
5488 if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5489 extent_info->is_dirlink = true;
5490 }
5491 /* We always need allocation bitmap and extent btree lock */
5492 lockflags = SFL_BITMAP | SFL_EXTENTS;
5493 if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5494 lockflags |= SFL_CATALOG;
5495 } else if (fileID == kHFSAttributesFileID) {
5496 lockflags |= SFL_ATTRIBUTE;
5497 } else if (fileID == kHFSStartupFileID) {
5498 lockflags |= SFL_STARTUP;
5499 }
5500 extent_info->lockflags = lockflags;
5501 extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5502
5503 /* Flush data associated with current file on disk.
5504 *
5505 * If the current vnode is directory hard link, no flushing of
5506 * journal or vnode is required. The current kernel does not
5507 * modify data/resource fork of directory hard links, so nothing
5508 * will be in the cache. If a directory hard link is newly created,
5509 * the resource fork data is written directly using devvp and
5510 * the code that actually relocates data (hfs_copy_extent()) also
5511 * uses devvp for its I/O --- so they will see a consistent copy.
5512 */
5513 if (extent_info->is_sysfile) {
5514 /* If the current vnode is system vnode, flush journal
5515 * to make sure that all data is written to the disk.
5516 */
5517 error = hfs_journal_flush(hfsmp, TRUE);
5518 if (error) {
5519 printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5520 goto out;
5521 }
5522 } else if (extent_info->is_dirlink == false) {
5523 /* Flush all blocks associated with this regular file vnode.
5524 * Normally there should not be buffer cache blocks for regular
5525 * files, but for objects like symlinks, we can have buffer cache
5526 * blocks associated with the vnode. Therefore we call
5527 * buf_flushdirtyblks() also.
5528 */
5529 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5530
5531 hfs_unlock(cp);
5532 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5533 took_truncate_lock = true;
5534 (void) cluster_push(vp, 0);
5535 error = hfs_lock(cp, HFS_FORCE_LOCK);
5536 if (error) {
5537 goto out;
5538 }
5539
5540 /* If the file no longer exists, nothing left to do */
5541 if (cp->c_flag & C_NOEXISTS) {
5542 error = 0;
5543 goto out;
5544 }
5545
5546 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5547 * be copying consistent bits. (Otherwise, it's possible that an async
5548 * write will complete to the old extent after we read from it. That
5549 * could lead to corruption.)
5550 */
5551 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5552 if (error) {
5553 goto out;
5554 }
5555 }
5556
5557 if (hfs_resize_debug) {
5558 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5559 }
5560
5561 if (extent_info->is_dirlink) {
5562 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5563 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5564 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5565 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5566 MALLOC(extent_info->dirlink_fork, struct filefork *,
5567 sizeof(struct filefork), M_TEMP, M_WAITOK);
5568 if ((extent_info->dirlink_desc == NULL) ||
5569 (extent_info->dirlink_attr == NULL) ||
5570 (extent_info->dirlink_fork == NULL)) {
5571 error = ENOMEM;
5572 goto out;
5573 }
5574
5575 /* Lookup catalog record for directory hard link and
5576 * create a fake filefork for the value looked up from
5577 * the disk.
5578 */
5579 fp = extent_info->dirlink_fork;
5580 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5581 extent_info->dirlink_fork->ff_cp = cp;
5582 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5583 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
5584 extent_info->dirlink_desc, extent_info->dirlink_attr,
5585 &(extent_info->dirlink_fork->ff_data));
5586 hfs_systemfile_unlock(hfsmp, lockflags);
5587 if (error) {
5588 printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
5589 goto out;
5590 }
5591 release_desc = true;
5592 } else {
5593 fp = VTOF(vp);
5594 }
5595
5596 extent_info->catalog_fp = fp;
5597 extent_info->recStartBlock = 0;
5598 extent_info->extents = extent_info->catalog_fp->ff_extents;
5599 /* Relocate extents from the catalog record */
5600 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5601 if (fp->ff_extents[i].blockCount == 0) {
5602 break;
5603 }
5604 extent_info->extent_index = i;
5605 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5606 if (error) {
5607 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
5608 goto out;
5609 }
5610 }
5611
5612 /* If the number of allocation blocks processed for reclaiming
5613 * are less than total number of blocks for the file, continuing
5614 * working on overflow extents record.
5615 */
5616 if (fp->ff_blocks <= extent_info->cur_blockCount) {
5617 if (0 && hfs_resize_debug) {
5618 printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5619 }
5620 goto out;
5621 }
5622
5623 if (hfs_resize_debug) {
5624 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5625 }
5626
5627 MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
5628 if (extent_info->iterator == NULL) {
5629 error = ENOMEM;
5630 goto out;
5631 }
5632 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
5633 key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
5634 key->keyLength = kHFSPlusExtentKeyMaximumLength;
5635 key->forkType = forktype;
5636 key->fileID = fileID;
5637 key->startBlock = extent_info->cur_blockCount;
5638
5639 extent_info->btdata.bufferAddress = extent_info->record.overflow;
5640 extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
5641 extent_info->btdata.itemCount = 1;
5642
5643 extent_info->catalog_fp = NULL;
5644
5645 /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
5646 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5647 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
5648 &(extent_info->btdata), &(extent_info->recordlen),
5649 extent_info->iterator);
5650 hfs_systemfile_unlock(hfsmp, lockflags);
5651 while (error == 0) {
5652 extent_info->overflow_count++;
5653 extent_info->recStartBlock = key->startBlock;
5654 extent_info->extents = extent_info->record.overflow;
5655 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5656 if (extent_info->record.overflow[i].blockCount == 0) {
5657 goto out;
5658 }
5659 extent_info->extent_index = i;
5660 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5661 if (error) {
5662 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
5663 goto out;
5664 }
5665 }
5666
5667 /* Look for more overflow records */
5668 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5669 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
5670 extent_info->iterator, &(extent_info->btdata),
5671 &(extent_info->recordlen));
5672 hfs_systemfile_unlock(hfsmp, lockflags);
5673 if (error) {
5674 break;
5675 }
5676 /* Stop when we encounter a different file or fork. */
5677 if ((key->fileID != fileID) || (key->forkType != forktype)) {
5678 break;
5679 }
5680 }
5681 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
5682 error = 0;
5683 }
5684
5685 out:
5686 /* If any blocks were relocated, account them and report progress */
5687 if (extent_info->blocks_relocated) {
5688 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
5689 hfs_truncatefs_progress(hfsmp);
5690 if (fileID < kHFSFirstUserCatalogNodeID) {
5691 printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
5692 extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
5693 }
5694 }
5695 if (extent_info->iterator) {
5696 FREE(extent_info->iterator, M_TEMP);
5697 }
5698 if (release_desc == true) {
5699 cat_releasedesc(extent_info->dirlink_desc);
5700 }
5701 if (extent_info->dirlink_desc) {
5702 FREE(extent_info->dirlink_desc, M_TEMP);
5703 }
5704 if (extent_info->dirlink_attr) {
5705 FREE(extent_info->dirlink_attr, M_TEMP);
5706 }
5707 if (extent_info->dirlink_fork) {
5708 FREE(extent_info->dirlink_fork, M_TEMP);
5709 }
5710 if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
5711 (void) hfs_update(vp, MNT_WAIT);
5712 }
5713 if (took_truncate_lock) {
5714 hfs_unlock_truncate(cp, 0);
5715 }
5716 if (extent_info) {
5717 FREE(extent_info, M_TEMP);
5718 }
5719 if (hfs_resize_debug) {
5720 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
5721 }
5722
5723 return error;
5724 }
5725
5726
5727 /*
5728 * This journal_relocate callback updates the journal info block to point
5729 * at the new journal location. This write must NOT be done using the
5730 * transaction. We must write the block immediately. We must also force
5731 * it to get to the media so that the new journal location will be seen by
5732 * the replay code before we can safely let journaled blocks be written
5733 * to their normal locations.
5734 *
5735 * The tests for journal_uses_fua below are mildly hacky. Since the journal
5736 * and the file system are both on the same device, I'm leveraging what
5737 * the journal has decided about FUA.
5738 */
5739 struct hfs_journal_relocate_args {
5740 struct hfsmount *hfsmp;
5741 vfs_context_t context;
5742 u_int32_t newStartBlock;
5743 };
5744
5745 static errno_t
5746 hfs_journal_relocate_callback(void *_args)
5747 {
5748 int error;
5749 struct hfs_journal_relocate_args *args = _args;
5750 struct hfsmount *hfsmp = args->hfsmp;
5751 buf_t bp;
5752 JournalInfoBlock *jibp;
5753
5754 error = buf_meta_bread(hfsmp->hfs_devvp,
5755 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5756 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
5757 if (error) {
5758 printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
5759 if (bp) {
5760 buf_brelse(bp);
5761 }
5762 return error;
5763 }
5764 jibp = (JournalInfoBlock*) buf_dataptr(bp);
5765 jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
5766 jibp->size = SWAP_BE64(hfsmp->jnl_size);
5767 if (journal_uses_fua(hfsmp->jnl))
5768 buf_markfua(bp);
5769 error = buf_bwrite(bp);
5770 if (error) {
5771 printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
5772 return error;
5773 }
5774 if (!journal_uses_fua(hfsmp->jnl)) {
5775 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
5776 if (error) {
5777 printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5778 error = 0; /* Don't fail the operation. */
5779 }
5780 }
5781
5782 return error;
5783 }
5784
5785
5786 static int
5787 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5788 {
5789 int error;
5790 int journal_err;
5791 int lockflags;
5792 u_int32_t oldStartBlock;
5793 u_int32_t newStartBlock;
5794 u_int32_t oldBlockCount;
5795 u_int32_t newBlockCount;
5796 struct cat_desc journal_desc;
5797 struct cat_attr journal_attr;
5798 struct cat_fork journal_fork;
5799 struct hfs_journal_relocate_args callback_args;
5800
5801 if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
5802 /* The journal does not require relocation */
5803 return 0;
5804 }
5805
5806 error = hfs_start_transaction(hfsmp);
5807 if (error) {
5808 printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
5809 return error;
5810 }
5811 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5812
5813 oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
5814
5815 /* TODO: Allow the journal to change size based on the new volume size. */
5816 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5817 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5818 &newStartBlock, &newBlockCount);
5819 if (error) {
5820 printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
5821 goto fail;
5822 }
5823 if (newBlockCount != oldBlockCount) {
5824 printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
5825 goto free_fail;
5826 }
5827
5828 error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5829 if (error) {
5830 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5831 goto free_fail;
5832 }
5833
5834 /* Update the catalog record for .journal */
5835 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
5836 if (error) {
5837 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5838 goto free_fail;
5839 }
5840 oldStartBlock = journal_fork.cf_extents[0].startBlock;
5841 journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
5842 journal_fork.cf_extents[0].startBlock = newStartBlock;
5843 journal_fork.cf_extents[0].blockCount = newBlockCount;
5844 journal_fork.cf_blocks = newBlockCount;
5845 error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
5846 cat_releasedesc(&journal_desc); /* all done with cat descriptor */
5847 if (error) {
5848 printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
5849 goto free_fail;
5850 }
5851 callback_args.hfsmp = hfsmp;
5852 callback_args.context = context;
5853 callback_args.newStartBlock = newStartBlock;
5854
5855 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
5856 (off_t)newBlockCount*hfsmp->blockSize, 0,
5857 hfs_journal_relocate_callback, &callback_args);
5858 if (error) {
5859 /* NOTE: journal_relocate will mark the journal invalid. */
5860 printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
5861 goto fail;
5862 }
5863 hfsmp->jnl_start = newStartBlock;
5864 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
5865
5866 hfs_systemfile_unlock(hfsmp, lockflags);
5867 error = hfs_end_transaction(hfsmp);
5868 if (error) {
5869 printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
5870 }
5871
5872 /* Account for the blocks relocated and print progress */
5873 hfsmp->hfs_resize_blocksmoved += oldBlockCount;
5874 hfs_truncatefs_progress(hfsmp);
5875 if (!error) {
5876 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
5877 oldBlockCount, hfsmp->vcbVN);
5878 if (hfs_resize_debug) {
5879 printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5880 }
5881 }
5882 return error;
5883
5884 free_fail:
5885 journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5886 if (journal_err) {
5887 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5888 hfs_mark_volume_inconsistent(hfsmp);
5889 }
5890 fail:
5891 hfs_systemfile_unlock(hfsmp, lockflags);
5892 (void) hfs_end_transaction(hfsmp);
5893 if (hfs_resize_debug) {
5894 printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
5895 }
5896 return error;
5897 }
5898
5899
5900 /*
5901 * Move the journal info block to a new location. We have to make sure the
5902 * new copy of the journal info block gets to the media first, then change
5903 * the field in the volume header and the catalog record.
5904 */
5905 static int
5906 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5907 {
5908 int error;
5909 int journal_err;
5910 int lockflags;
5911 u_int32_t oldBlock;
5912 u_int32_t newBlock;
5913 u_int32_t blockCount;
5914 struct cat_desc jib_desc;
5915 struct cat_attr jib_attr;
5916 struct cat_fork jib_fork;
5917 buf_t old_bp, new_bp;
5918
5919 if (hfsmp->vcbJinfoBlock <= allocLimit) {
5920 /* The journal info block does not require relocation */
5921 return 0;
5922 }
5923
5924 error = hfs_start_transaction(hfsmp);
5925 if (error) {
5926 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
5927 return error;
5928 }
5929 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5930
5931 error = BlockAllocate(hfsmp, 1, 1, 1,
5932 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5933 &newBlock, &blockCount);
5934 if (error) {
5935 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
5936 goto fail;
5937 }
5938 if (blockCount != 1) {
5939 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
5940 goto free_fail;
5941 }
5942 error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
5943 if (error) {
5944 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
5945 goto free_fail;
5946 }
5947
5948 /* Copy the old journal info block content to the new location */
5949 error = buf_meta_bread(hfsmp->hfs_devvp,
5950 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5951 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
5952 if (error) {
5953 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
5954 if (old_bp) {
5955 buf_brelse(old_bp);
5956 }
5957 goto free_fail;
5958 }
5959 new_bp = buf_getblk(hfsmp->hfs_devvp,
5960 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5961 hfsmp->blockSize, 0, 0, BLK_META);
5962 bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
5963 buf_brelse(old_bp);
5964 if (journal_uses_fua(hfsmp->jnl))
5965 buf_markfua(new_bp);
5966 error = buf_bwrite(new_bp);
5967 if (error) {
5968 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
5969 goto free_fail;
5970 }
5971 if (!journal_uses_fua(hfsmp->jnl)) {
5972 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5973 if (error) {
5974 printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5975 /* Don't fail the operation. */
5976 }
5977 }
5978
5979 /* Update the catalog record for .journal_info_block */
5980 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
5981 if (error) {
5982 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5983 goto fail;
5984 }
5985 oldBlock = jib_fork.cf_extents[0].startBlock;
5986 jib_fork.cf_size = hfsmp->blockSize;
5987 jib_fork.cf_extents[0].startBlock = newBlock;
5988 jib_fork.cf_extents[0].blockCount = 1;
5989 jib_fork.cf_blocks = 1;
5990 error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
5991 cat_releasedesc(&jib_desc); /* all done with cat descriptor */
5992 if (error) {
5993 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
5994 goto fail;
5995 }
5996
5997 /* Update the pointer to the journal info block in the volume header. */
5998 hfsmp->vcbJinfoBlock = newBlock;
5999 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6000 if (error) {
6001 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6002 goto fail;
6003 }
6004 hfs_systemfile_unlock(hfsmp, lockflags);
6005 error = hfs_end_transaction(hfsmp);
6006 if (error) {
6007 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6008 }
6009 error = hfs_journal_flush(hfsmp, FALSE);
6010 if (error) {
6011 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6012 }
6013
6014 /* Account for the block relocated and print progress */
6015 hfsmp->hfs_resize_blocksmoved += 1;
6016 hfs_truncatefs_progress(hfsmp);
6017 if (!error) {
6018 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6019 hfsmp->vcbVN);
6020 if (hfs_resize_debug) {
6021 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6022 }
6023 }
6024 return error;
6025
6026 free_fail:
6027 journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6028 if (journal_err) {
6029 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6030 hfs_mark_volume_inconsistent(hfsmp);
6031 }
6032
6033 fail:
6034 hfs_systemfile_unlock(hfsmp, lockflags);
6035 (void) hfs_end_transaction(hfsmp);
6036 if (hfs_resize_debug) {
6037 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6038 }
6039 return error;
6040 }
6041
6042
6043 /*
6044 * This function traverses through all extended attribute records for a given
6045 * fileID, and calls function that reclaims data blocks that exist in the
6046 * area of the disk being reclaimed which in turn is responsible for allocating
6047 * new space, copying extent data, deallocating new space, and if required,
6048 * splitting the extent.
6049 *
6050 * Note: The caller has already acquired the cnode lock on the file. Therefore
6051 * we are assured that no other thread would be creating/deleting/modifying
6052 * extended attributes for this file.
6053 *
6054 * Side Effects:
6055 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6056 * blocks that were relocated.
6057 *
6058 * Returns:
6059 * 0 on success, non-zero on failure.
6060 */
6061 static int
6062 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6063 {
6064 int error = 0;
6065 struct hfs_reclaim_extent_info *extent_info;
6066 int i;
6067 HFSPlusAttrKey *key;
6068 int *lockflags;
6069
6070 if (hfs_resize_debug) {
6071 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6072 }
6073
6074 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6075 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6076 if (extent_info == NULL) {
6077 return ENOMEM;
6078 }
6079 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6080 extent_info->vp = vp;
6081 extent_info->fileID = fileID;
6082 extent_info->is_xattr = true;
6083 extent_info->is_sysfile = vnode_issystem(vp);
6084 extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6085 lockflags = &(extent_info->lockflags);
6086 *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6087
6088 /* Initialize iterator from the extent_info structure */
6089 MALLOC(extent_info->iterator, struct BTreeIterator *,
6090 sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6091 if (extent_info->iterator == NULL) {
6092 error = ENOMEM;
6093 goto out;
6094 }
6095 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6096
6097 /* Build attribute key */
6098 key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6099 error = hfs_buildattrkey(fileID, NULL, key);
6100 if (error) {
6101 goto out;
6102 }
6103
6104 /* Initialize btdata from extent_info structure. Note that the
6105 * buffer pointer actually points to the xattr record from the
6106 * extent_info structure itself.
6107 */
6108 extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6109 extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6110 extent_info->btdata.itemCount = 1;
6111
6112 /*
6113 * Sync all extent-based attribute data to the disk.
6114 *
6115 * All extent-based attribute data I/O is performed via cluster
6116 * I/O using a virtual file that spans across entire file system
6117 * space.
6118 */
6119 hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6120 (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6121 error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6122 hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6123 if (error) {
6124 goto out;
6125 }
6126
6127 /* Search for extended attribute for current file. This
6128 * will place the iterator before the first matching record.
6129 */
6130 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6131 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6132 &(extent_info->btdata), &(extent_info->recordlen),
6133 extent_info->iterator);
6134 hfs_systemfile_unlock(hfsmp, *lockflags);
6135 if (error) {
6136 if (error != btNotFound) {
6137 goto out;
6138 }
6139 /* btNotFound is expected here, so just mask it */
6140 error = 0;
6141 }
6142
6143 while (1) {
6144 /* Iterate to the next record */
6145 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6146 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6147 extent_info->iterator, &(extent_info->btdata),
6148 &(extent_info->recordlen));
6149 hfs_systemfile_unlock(hfsmp, *lockflags);
6150
6151 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6152 if (error || key->fileID != fileID) {
6153 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6154 error = 0;
6155 }
6156 break;
6157 }
6158
6159 /* We only care about extent-based EAs */
6160 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6161 (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6162 continue;
6163 }
6164
6165 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6166 extent_info->overflow_count = 0;
6167 extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6168 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6169 extent_info->overflow_count++;
6170 extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6171 }
6172
6173 extent_info->recStartBlock = key->startBlock;
6174 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6175 if (extent_info->extents[i].blockCount == 0) {
6176 break;
6177 }
6178 extent_info->extent_index = i;
6179 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6180 if (error) {
6181 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6182 goto out;
6183 }
6184 }
6185 }
6186
6187 out:
6188 /* If any blocks were relocated, account them and report progress */
6189 if (extent_info->blocks_relocated) {
6190 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6191 hfs_truncatefs_progress(hfsmp);
6192 }
6193 if (extent_info->iterator) {
6194 FREE(extent_info->iterator, M_TEMP);
6195 }
6196 if (extent_info) {
6197 FREE(extent_info, M_TEMP);
6198 }
6199 if (hfs_resize_debug) {
6200 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6201 }
6202 return error;
6203 }
6204
6205 /*
6206 * Reclaim any extent-based extended attributes allocation blocks from
6207 * the area of the disk that is being truncated.
6208 *
6209 * The function traverses the attribute btree to find out the fileIDs
6210 * of the extended attributes that need to be relocated. For every
6211 * file whose large EA requires relocation, it looks up the cnode and
6212 * calls hfs_reclaim_xattr() to do all the work for allocating
6213 * new space, copying data, deallocating old space, and if required,
6214 * splitting the extents.
6215 *
6216 * Inputs:
6217 * allocLimit - starting block of the area being reclaimed
6218 *
6219 * Returns:
6220 * returns 0 on success, non-zero on failure.
6221 */
6222 static int
6223 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6224 {
6225 int error = 0;
6226 FCB *fcb;
6227 struct BTreeIterator *iterator = NULL;
6228 struct FSBufferDescriptor btdata;
6229 HFSPlusAttrKey *key;
6230 HFSPlusAttrRecord rec;
6231 int lockflags = 0;
6232 cnid_t prev_fileid = 0;
6233 struct vnode *vp;
6234 int need_relocate;
6235 int btree_operation;
6236 u_int32_t files_moved = 0;
6237 u_int32_t prev_blocksmoved;
6238 int i;
6239
6240 fcb = VTOF(hfsmp->hfs_attribute_vp);
6241 /* Store the value to print total blocks moved by this function in end */
6242 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6243
6244 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6245 return ENOMEM;
6246 }
6247 bzero(iterator, sizeof(*iterator));
6248 key = (HFSPlusAttrKey *)&iterator->key;
6249 btdata.bufferAddress = &rec;
6250 btdata.itemSize = sizeof(rec);
6251 btdata.itemCount = 1;
6252
6253 need_relocate = false;
6254 btree_operation = kBTreeFirstRecord;
6255 /* Traverse the attribute btree to find extent-based EAs to reclaim */
6256 while (1) {
6257 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6258 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6259 hfs_systemfile_unlock(hfsmp, lockflags);
6260 if (error) {
6261 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6262 error = 0;
6263 }
6264 break;
6265 }
6266 btree_operation = kBTreeNextRecord;
6267
6268 /* If the extents of current fileID were already relocated, skip it */
6269 if (prev_fileid == key->fileID) {
6270 continue;
6271 }
6272
6273 /* Check if any of the extents in the current record need to be relocated */
6274 need_relocate = false;
6275 switch(rec.recordType) {
6276 case kHFSPlusAttrForkData:
6277 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6278 if (rec.forkData.theFork.extents[i].blockCount == 0) {
6279 break;
6280 }
6281 if ((rec.forkData.theFork.extents[i].startBlock +
6282 rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6283 need_relocate = true;
6284 break;
6285 }
6286 }
6287 break;
6288
6289 case kHFSPlusAttrExtents:
6290 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6291 if (rec.overflowExtents.extents[i].blockCount == 0) {
6292 break;
6293 }
6294 if ((rec.overflowExtents.extents[i].startBlock +
6295 rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6296 need_relocate = true;
6297 break;
6298 }
6299 }
6300 break;
6301 };
6302
6303 /* Continue iterating to next attribute record */
6304 if (need_relocate == false) {
6305 continue;
6306 }
6307
6308 /* Look up the vnode for corresponding file. The cnode
6309 * will be locked which will ensure that no one modifies
6310 * the xattrs when we are relocating them.
6311 *
6312 * We want to allow open-unlinked files to be moved,
6313 * so provide allow_deleted == 1 for hfs_vget().
6314 */
6315 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6316 continue;
6317 }
6318
6319 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6320 hfs_unlock(VTOC(vp));
6321 vnode_put(vp);
6322 if (error) {
6323 printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6324 break;
6325 }
6326 prev_fileid = key->fileID;
6327 files_moved++;
6328 }
6329
6330 if (files_moved) {
6331 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6332 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6333 files_moved, hfsmp->vcbVN);
6334 }
6335
6336 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6337 return error;
6338 }
6339
6340 /*
6341 * Reclaim blocks from regular files.
6342 *
6343 * This function iterates over all the record in catalog btree looking
6344 * for files with extents that overlap into the space we're trying to
6345 * free up. If a file extent requires relocation, it looks up the vnode
6346 * and calls function to relocate the data.
6347 *
6348 * Returns:
6349 * Zero on success, non-zero on failure.
6350 */
6351 static int
6352 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6353 {
6354 int error;
6355 FCB *fcb;
6356 struct BTreeIterator *iterator = NULL;
6357 struct FSBufferDescriptor btdata;
6358 int btree_operation;
6359 int lockflags;
6360 struct HFSPlusCatalogFile filerec;
6361 struct vnode *vp;
6362 struct vnode *rvp;
6363 struct filefork *datafork;
6364 u_int32_t files_moved = 0;
6365 u_int32_t prev_blocksmoved;
6366
6367 fcb = VTOF(hfsmp->hfs_catalog_vp);
6368 /* Store the value to print total blocks moved by this function at the end */
6369 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6370
6371 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6372 return ENOMEM;
6373 }
6374 bzero(iterator, sizeof(*iterator));
6375
6376 btdata.bufferAddress = &filerec;
6377 btdata.itemSize = sizeof(filerec);
6378 btdata.itemCount = 1;
6379
6380 btree_operation = kBTreeFirstRecord;
6381 while (1) {
6382 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6383 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6384 hfs_systemfile_unlock(hfsmp, lockflags);
6385 if (error) {
6386 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6387 error = 0;
6388 }
6389 break;
6390 }
6391 btree_operation = kBTreeNextRecord;
6392
6393 if (filerec.recordType != kHFSPlusFileRecord) {
6394 continue;
6395 }
6396
6397 /* Check if any of the extents require relocation */
6398 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6399 continue;
6400 }
6401
6402 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
6403 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
6404 continue;
6405 }
6406
6407 /* If data fork exists or item is a directory hard link, relocate blocks */
6408 datafork = VTOF(vp);
6409 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
6410 error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
6411 kHFSDataForkType, allocLimit, context);
6412 if (error) {
6413 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6414 hfs_unlock(VTOC(vp));
6415 vnode_put(vp);
6416 break;
6417 }
6418 }
6419
6420 /* If resource fork exists or item is a directory hard link, relocate blocks */
6421 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
6422 if (vnode_isdir(vp)) {
6423 /* Resource fork vnode lookup is invalid for directory hard link.
6424 * So we fake data fork vnode as resource fork vnode.
6425 */
6426 rvp = vp;
6427 } else {
6428 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
6429 if (error) {
6430 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
6431 hfs_unlock(VTOC(vp));
6432 vnode_put(vp);
6433 break;
6434 }
6435 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
6436 }
6437
6438 error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
6439 kHFSResourceForkType, allocLimit, context);
6440 if (error) {
6441 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6442 hfs_unlock(VTOC(vp));
6443 vnode_put(vp);
6444 break;
6445 }
6446 }
6447
6448 /* The file forks were relocated successfully, now drop the
6449 * cnode lock and vnode reference, and continue iterating to
6450 * next catalog record.
6451 */
6452 hfs_unlock(VTOC(vp));
6453 vnode_put(vp);
6454 files_moved++;
6455 }
6456
6457 if (files_moved) {
6458 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
6459 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6460 files_moved, hfsmp->vcbVN);
6461 }
6462
6463 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6464 return error;
6465 }
6466
6467 /*
6468 * Reclaim space at the end of a file system.
6469 *
6470 * Inputs -
6471 * allocLimit - start block of the space being reclaimed
6472 * reclaimblks - number of allocation blocks to reclaim
6473 */
6474 static int
6475 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
6476 {
6477 int error = 0;
6478
6479 /*
6480 * Preflight the bitmap to find out total number of blocks that need
6481 * relocation.
6482 *
6483 * Note: Since allocLimit is set to the location of new alternate volume
6484 * header, the check below does not account for blocks allocated for old
6485 * alternate volume header.
6486 */
6487 error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
6488 if (error) {
6489 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
6490 return error;
6491 }
6492 if (hfs_resize_debug) {
6493 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
6494 }
6495
6496 /* Relocate extents of the Allocation file if they're in the way. */
6497 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
6498 kHFSDataForkType, allocLimit, context);
6499 if (error) {
6500 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
6501 return error;
6502 }
6503
6504 /* Relocate extents of the Extents B-tree if they're in the way. */
6505 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
6506 kHFSDataForkType, allocLimit, context);
6507 if (error) {
6508 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
6509 return error;
6510 }
6511
6512 /* Relocate extents of the Catalog B-tree if they're in the way. */
6513 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
6514 kHFSDataForkType, allocLimit, context);
6515 if (error) {
6516 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
6517 return error;
6518 }
6519
6520 /* Relocate extents of the Attributes B-tree if they're in the way. */
6521 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
6522 kHFSDataForkType, allocLimit, context);
6523 if (error) {
6524 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
6525 return error;
6526 }
6527
6528 /* Relocate extents of the Startup File if there is one and they're in the way. */
6529 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
6530 kHFSDataForkType, allocLimit, context);
6531 if (error) {
6532 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
6533 return error;
6534 }
6535
6536 /*
6537 * We need to make sure the alternate volume header gets flushed if we moved
6538 * any extents in the volume header. But we need to do that before
6539 * shrinking the size of the volume, or else the journal code will panic
6540 * with an invalid (too large) block number.
6541 *
6542 * Note that blks_moved will be set if ANY extent was moved, even
6543 * if it was just an overflow extent. In this case, the journal_flush isn't
6544 * strictly required, but shouldn't hurt.
6545 */
6546 if (hfsmp->hfs_resize_blocksmoved) {
6547 hfs_journal_flush(hfsmp, FALSE);
6548 }
6549
6550 /* Relocate journal file blocks if they're in the way. */
6551 error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
6552 if (error) {
6553 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
6554 return error;
6555 }
6556
6557 /* Relocate journal info block blocks if they're in the way. */
6558 error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
6559 if (error) {
6560 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
6561 return error;
6562 }
6563
6564 /* Reclaim extents from catalog file records */
6565 error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
6566 if (error) {
6567 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
6568 return error;
6569 }
6570
6571 /* Reclaim extents from extent-based extended attributes, if any */
6572 error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
6573 if (error) {
6574 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
6575 return error;
6576 }
6577
6578 return error;
6579 }
6580
6581
6582 /*
6583 * Check if there are any extents (including overflow extents) that overlap
6584 * into the disk space that is being reclaimed.
6585 *
6586 * Output -
6587 * true - One of the extents need to be relocated
6588 * false - No overflow extents need to be relocated, or there was an error
6589 */
6590 static int
6591 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
6592 {
6593 struct BTreeIterator * iterator = NULL;
6594 struct FSBufferDescriptor btdata;
6595 HFSPlusExtentRecord extrec;
6596 HFSPlusExtentKey *extkeyptr;
6597 FCB *fcb;
6598 int overlapped = false;
6599 int i, j;
6600 int error;
6601 int lockflags = 0;
6602 u_int32_t endblock;
6603
6604 /* Check if data fork overlaps the target space */
6605 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6606 if (filerec->dataFork.extents[i].blockCount == 0) {
6607 break;
6608 }
6609 endblock = filerec->dataFork.extents[i].startBlock +
6610 filerec->dataFork.extents[i].blockCount;
6611 if (endblock > allocLimit) {
6612 overlapped = true;
6613 goto out;
6614 }
6615 }
6616
6617 /* Check if resource fork overlaps the target space */
6618 for (j = 0; j < kHFSPlusExtentDensity; ++j) {
6619 if (filerec->resourceFork.extents[j].blockCount == 0) {
6620 break;
6621 }
6622 endblock = filerec->resourceFork.extents[j].startBlock +
6623 filerec->resourceFork.extents[j].blockCount;
6624 if (endblock > allocLimit) {
6625 overlapped = true;
6626 goto out;
6627 }
6628 }
6629
6630 /* Return back if there are no overflow extents for this file */
6631 if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
6632 goto out;
6633 }
6634
6635 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6636 return 0;
6637 }
6638 bzero(iterator, sizeof(*iterator));
6639 extkeyptr = (HFSPlusExtentKey *)&iterator->key;
6640 extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
6641 extkeyptr->forkType = 0;
6642 extkeyptr->fileID = filerec->fileID;
6643 extkeyptr->startBlock = 0;
6644
6645 btdata.bufferAddress = &extrec;
6646 btdata.itemSize = sizeof(extrec);
6647 btdata.itemCount = 1;
6648
6649 fcb = VTOF(hfsmp->hfs_extents_vp);
6650
6651 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
6652
6653 /* This will position the iterator just before the first overflow
6654 * extent record for given fileID. It will always return btNotFound,
6655 * so we special case the error code.
6656 */
6657 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
6658 if (error && (error != btNotFound)) {
6659 goto out;
6660 }
6661
6662 /* BTIterateRecord() might return error if the btree is empty, and
6663 * therefore we return that the extent does not overflow to the caller
6664 */
6665 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6666 while (error == 0) {
6667 /* Stop when we encounter a different file. */
6668 if (extkeyptr->fileID != filerec->fileID) {
6669 break;
6670 }
6671 /* Check if any of the forks exist in the target space. */
6672 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6673 if (extrec[i].blockCount == 0) {
6674 break;
6675 }
6676 endblock = extrec[i].startBlock + extrec[i].blockCount;
6677 if (endblock > allocLimit) {
6678 overlapped = true;
6679 goto out;
6680 }
6681 }
6682 /* Look for more records. */
6683 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6684 }
6685
6686 out:
6687 if (lockflags) {
6688 hfs_systemfile_unlock(hfsmp, lockflags);
6689 }
6690 if (iterator) {
6691 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6692 }
6693 return overlapped;
6694 }
6695
6696
6697 /*
6698 * Calculate the progress of a file system resize operation.
6699 */
6700 __private_extern__
6701 int
6702 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
6703 {
6704 if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
6705 return (ENXIO);
6706 }
6707
6708 if (hfsmp->hfs_resize_totalblocks > 0) {
6709 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
6710 } else {
6711 *progress = 0;
6712 }
6713
6714 return (0);
6715 }
6716
6717
6718 /*
6719 * Creates a UUID from a unique "name" in the HFS UUID Name space.
6720 * See version 3 UUID.
6721 */
6722 static void
6723 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
6724 {
6725 MD5_CTX md5c;
6726 uint8_t rawUUID[8];
6727
6728 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
6729 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
6730
6731 MD5Init( &md5c );
6732 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
6733 MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
6734 MD5Final( result, &md5c );
6735
6736 result[6] = 0x30 | ( result[6] & 0x0F );
6737 result[8] = 0x80 | ( result[8] & 0x3F );
6738 }
6739
6740 /*
6741 * Get file system attributes.
6742 */
6743 static int
6744 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
6745 {
6746 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
6747 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
6748 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
6749
6750 ExtendedVCB *vcb = VFSTOVCB(mp);
6751 struct hfsmount *hfsmp = VFSTOHFS(mp);
6752 u_int32_t freeCNIDs;
6753
6754 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
6755
6756 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
6757 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
6758 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
6759 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
6760 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
6761 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
6762 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
6763 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
6764 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
6765 /* XXX needs clarification */
6766 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
6767 /* Maximum files is constrained by total blocks. */
6768 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
6769 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
6770
6771 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
6772 fsap->f_fsid.val[1] = vfs_typenum(mp);
6773 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
6774
6775 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
6776 VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
6777
6778 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
6779 vol_capabilities_attr_t *cap;
6780
6781 cap = &fsap->f_capabilities;
6782
6783 if (hfsmp->hfs_flags & HFS_STANDARD) {
6784 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6785 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6786 VOL_CAP_FMT_CASE_PRESERVING |
6787 VOL_CAP_FMT_FAST_STATFS |
6788 VOL_CAP_FMT_HIDDEN_FILES |
6789 VOL_CAP_FMT_PATH_FROM_ID;
6790 } else {
6791 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6792 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6793 VOL_CAP_FMT_SYMBOLICLINKS |
6794 VOL_CAP_FMT_HARDLINKS |
6795 VOL_CAP_FMT_JOURNAL |
6796 VOL_CAP_FMT_ZERO_RUNS |
6797 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
6798 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
6799 VOL_CAP_FMT_CASE_PRESERVING |
6800 VOL_CAP_FMT_FAST_STATFS |
6801 VOL_CAP_FMT_2TB_FILESIZE |
6802 VOL_CAP_FMT_HIDDEN_FILES |
6803 #if HFS_COMPRESSION
6804 VOL_CAP_FMT_PATH_FROM_ID |
6805 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6806 #else
6807 VOL_CAP_FMT_PATH_FROM_ID;
6808 #endif
6809 }
6810 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
6811 VOL_CAP_INT_SEARCHFS |
6812 VOL_CAP_INT_ATTRLIST |
6813 VOL_CAP_INT_NFSEXPORT |
6814 VOL_CAP_INT_READDIRATTR |
6815 VOL_CAP_INT_EXCHANGEDATA |
6816 VOL_CAP_INT_ALLOCATE |
6817 VOL_CAP_INT_VOL_RENAME |
6818 VOL_CAP_INT_ADVLOCK |
6819 VOL_CAP_INT_FLOCK |
6820 #if NAMEDSTREAMS
6821 VOL_CAP_INT_EXTENDED_ATTR |
6822 VOL_CAP_INT_NAMEDSTREAMS;
6823 #else
6824 VOL_CAP_INT_EXTENDED_ATTR;
6825 #endif
6826 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
6827 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
6828
6829 cap->valid[VOL_CAPABILITIES_FORMAT] =
6830 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6831 VOL_CAP_FMT_SYMBOLICLINKS |
6832 VOL_CAP_FMT_HARDLINKS |
6833 VOL_CAP_FMT_JOURNAL |
6834 VOL_CAP_FMT_JOURNAL_ACTIVE |
6835 VOL_CAP_FMT_NO_ROOT_TIMES |
6836 VOL_CAP_FMT_SPARSE_FILES |
6837 VOL_CAP_FMT_ZERO_RUNS |
6838 VOL_CAP_FMT_CASE_SENSITIVE |
6839 VOL_CAP_FMT_CASE_PRESERVING |
6840 VOL_CAP_FMT_FAST_STATFS |
6841 VOL_CAP_FMT_2TB_FILESIZE |
6842 VOL_CAP_FMT_OPENDENYMODES |
6843 VOL_CAP_FMT_HIDDEN_FILES |
6844 #if HFS_COMPRESSION
6845 VOL_CAP_FMT_PATH_FROM_ID |
6846 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6847 #else
6848 VOL_CAP_FMT_PATH_FROM_ID;
6849 #endif
6850 cap->valid[VOL_CAPABILITIES_INTERFACES] =
6851 VOL_CAP_INT_SEARCHFS |
6852 VOL_CAP_INT_ATTRLIST |
6853 VOL_CAP_INT_NFSEXPORT |
6854 VOL_CAP_INT_READDIRATTR |
6855 VOL_CAP_INT_EXCHANGEDATA |
6856 VOL_CAP_INT_COPYFILE |
6857 VOL_CAP_INT_ALLOCATE |
6858 VOL_CAP_INT_VOL_RENAME |
6859 VOL_CAP_INT_ADVLOCK |
6860 VOL_CAP_INT_FLOCK |
6861 VOL_CAP_INT_MANLOCK |
6862 #if NAMEDSTREAMS
6863 VOL_CAP_INT_EXTENDED_ATTR |
6864 VOL_CAP_INT_NAMEDSTREAMS;
6865 #else
6866 VOL_CAP_INT_EXTENDED_ATTR;
6867 #endif
6868 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
6869 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
6870 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
6871 }
6872 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
6873 vol_attributes_attr_t *attrp = &fsap->f_attributes;
6874
6875 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6876 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6877 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
6878 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6879 attrp->validattr.forkattr = 0;
6880
6881 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6882 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6883 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
6884 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6885 attrp->nativeattr.forkattr = 0;
6886 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
6887 }
6888 fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
6889 fsap->f_create_time.tv_nsec = 0;
6890 VFSATTR_SET_SUPPORTED(fsap, f_create_time);
6891 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
6892 fsap->f_modify_time.tv_nsec = 0;
6893 VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
6894
6895 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
6896 fsap->f_backup_time.tv_nsec = 0;
6897 VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
6898 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
6899 u_int16_t subtype = 0;
6900
6901 /*
6902 * Subtypes (flavors) for HFS
6903 * 0: Mac OS Extended
6904 * 1: Mac OS Extended (Journaled)
6905 * 2: Mac OS Extended (Case Sensitive)
6906 * 3: Mac OS Extended (Case Sensitive, Journaled)
6907 * 4 - 127: Reserved
6908 * 128: Mac OS Standard
6909 *
6910 */
6911 if (hfsmp->hfs_flags & HFS_STANDARD) {
6912 subtype = HFS_SUBTYPE_STANDARDHFS;
6913 } else /* HFS Plus */ {
6914 if (hfsmp->jnl)
6915 subtype |= HFS_SUBTYPE_JOURNALED;
6916 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
6917 subtype |= HFS_SUBTYPE_CASESENSITIVE;
6918 }
6919 fsap->f_fssubtype = subtype;
6920 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
6921 }
6922
6923 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
6924 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
6925 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
6926 }
6927 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
6928 hfs_getvoluuid(hfsmp, fsap->f_uuid);
6929 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
6930 }
6931 return (0);
6932 }
6933
6934 /*
6935 * Perform a volume rename. Requires the FS' root vp.
6936 */
6937 static int
6938 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
6939 {
6940 ExtendedVCB *vcb = VTOVCB(vp);
6941 struct cnode *cp = VTOC(vp);
6942 struct hfsmount *hfsmp = VTOHFS(vp);
6943 struct cat_desc to_desc;
6944 struct cat_desc todir_desc;
6945 struct cat_desc new_desc;
6946 cat_cookie_t cookie;
6947 int lockflags;
6948 int error = 0;
6949 char converted_volname[256];
6950 size_t volname_length = 0;
6951 size_t conv_volname_length = 0;
6952
6953
6954 /*
6955 * Ignore attempts to rename a volume to a zero-length name.
6956 */
6957 if (name[0] == 0)
6958 return(0);
6959
6960 bzero(&to_desc, sizeof(to_desc));
6961 bzero(&todir_desc, sizeof(todir_desc));
6962 bzero(&new_desc, sizeof(new_desc));
6963 bzero(&cookie, sizeof(cookie));
6964
6965 todir_desc.cd_parentcnid = kHFSRootParentID;
6966 todir_desc.cd_cnid = kHFSRootFolderID;
6967 todir_desc.cd_flags = CD_ISDIR;
6968
6969 to_desc.cd_nameptr = (const u_int8_t *)name;
6970 to_desc.cd_namelen = strlen(name);
6971 to_desc.cd_parentcnid = kHFSRootParentID;
6972 to_desc.cd_cnid = cp->c_cnid;
6973 to_desc.cd_flags = CD_ISDIR;
6974
6975 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
6976 if ((error = hfs_start_transaction(hfsmp)) == 0) {
6977 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
6978 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
6979
6980 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
6981
6982 /*
6983 * If successful, update the name in the VCB, ensure it's terminated.
6984 */
6985 if (!error) {
6986 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
6987 volname_length = strlen ((const char*)vcb->vcbVN);
6988 #define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
6989 /* Send the volume name down to CoreStorage if necessary */
6990 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
6991 if (error == 0) {
6992 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
6993 }
6994 error = 0;
6995 }
6996
6997 hfs_systemfile_unlock(hfsmp, lockflags);
6998 cat_postflight(hfsmp, &cookie, p);
6999
7000 if (error)
7001 MarkVCBDirty(vcb);
7002 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7003 }
7004 hfs_end_transaction(hfsmp);
7005 }
7006 if (!error) {
7007 /* Release old allocated name buffer */
7008 if (cp->c_desc.cd_flags & CD_HASBUF) {
7009 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7010
7011 cp->c_desc.cd_nameptr = 0;
7012 cp->c_desc.cd_namelen = 0;
7013 cp->c_desc.cd_flags &= ~CD_HASBUF;
7014 vfs_removename(tmp_name);
7015 }
7016 /* Update cnode's catalog descriptor */
7017 replace_desc(cp, &new_desc);
7018 vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7019 cp->c_touch_chgtime = TRUE;
7020 }
7021
7022 hfs_unlock(cp);
7023 }
7024
7025 return(error);
7026 }
7027
7028 /*
7029 * Get file system attributes.
7030 */
7031 static int
7032 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7033 {
7034 kauth_cred_t cred = vfs_context_ucred(context);
7035 int error = 0;
7036
7037 /*
7038 * Must be superuser or owner of filesystem to change volume attributes
7039 */
7040 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7041 return(EACCES);
7042
7043 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7044 vnode_t root_vp;
7045
7046 error = hfs_vfs_root(mp, &root_vp, context);
7047 if (error)
7048 goto out;
7049
7050 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7051 (void) vnode_put(root_vp);
7052 if (error)
7053 goto out;
7054
7055 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7056 }
7057
7058 out:
7059 return error;
7060 }
7061
7062 /* If a runtime corruption is detected, set the volume inconsistent
7063 * bit in the volume attributes. The volume inconsistent bit is a persistent
7064 * bit which represents that the volume is corrupt and needs repair.
7065 * The volume inconsistent bit can be set from the kernel when it detects
7066 * runtime corruption or from file system repair utilities like fsck_hfs when
7067 * a repair operation fails. The bit should be cleared only from file system
7068 * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7069 */
7070 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7071 {
7072 HFS_MOUNT_LOCK(hfsmp, TRUE);
7073 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7074 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7075 MarkVCBDirty(hfsmp);
7076 }
7077 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7078 /* Log information to ASL log */
7079 fslog_fs_corrupt(hfsmp->hfs_mp);
7080 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7081 }
7082 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7083 }
7084
7085 /* Replay the journal on the device node provided. Returns zero if
7086 * journal replay succeeded or no journal was supposed to be replayed.
7087 */
7088 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7089 {
7090 int retval = 0;
7091 struct mount *mp = NULL;
7092 struct hfs_mount_args *args = NULL;
7093
7094 /* Replay allowed only on raw devices */
7095 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7096 retval = EINVAL;
7097 goto out;
7098 }
7099
7100 /* Create dummy mount structures */
7101 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7102 if (mp == NULL) {
7103 retval = ENOMEM;
7104 goto out;
7105 }
7106 bzero(mp, sizeof(struct mount));
7107 mount_lock_init(mp);
7108
7109 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7110 if (args == NULL) {
7111 retval = ENOMEM;
7112 goto out;
7113 }
7114 bzero(args, sizeof(struct hfs_mount_args));
7115
7116 retval = hfs_mountfs(devvp, mp, args, 1, context);
7117 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7118
7119 /* FSYNC the devnode to be sure all data has been flushed */
7120 retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
7121
7122 out:
7123 if (mp) {
7124 mount_lock_destroy(mp);
7125 FREE(mp, M_TEMP);
7126 }
7127 if (args) {
7128 FREE(args, M_TEMP);
7129 }
7130 return retval;
7131 }
7132
7133 /*
7134 * hfs vfs operations.
7135 */
7136 struct vfsops hfs_vfsops = {
7137 hfs_mount,
7138 hfs_start,
7139 hfs_unmount,
7140 hfs_vfs_root,
7141 hfs_quotactl,
7142 hfs_vfs_getattr, /* was hfs_statfs */
7143 hfs_sync,
7144 hfs_vfs_vget,
7145 hfs_fhtovp,
7146 hfs_vptofh,
7147 hfs_init,
7148 hfs_sysctl,
7149 hfs_vfs_setattr,
7150 {NULL}
7151 };