]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsops.c
adf02520b5e3648d66bbe69508189c6d3a1865cc
[apple/xnu.git] / bsd / hfs / hfs_vfsops.c
1 /*
2 * Copyright (c) 1999-2010 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1991, 1993, 1994
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * hfs_vfsops.c
66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95
67 *
68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
69 *
70 * hfs_vfsops.c -- VFS layer for loadable HFS file system.
71 *
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kauth.h>
76
77 #include <sys/ubc.h>
78 #include <sys/ubc_internal.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/mount_internal.h>
81 #include <sys/sysctl.h>
82 #include <sys/malloc.h>
83 #include <sys/stat.h>
84 #include <sys/quota.h>
85 #include <sys/disk.h>
86 #include <sys/paths.h>
87 #include <sys/utfconv.h>
88 #include <sys/kdebug.h>
89 #include <sys/fslog.h>
90 #include <sys/ubc.h>
91
92 #include <kern/locks.h>
93
94 #include <vfs/vfs_journal.h>
95
96 #include <miscfs/specfs/specdev.h>
97 #include <hfs/hfs_mount.h>
98
99 #include <libkern/crypto/md5.h>
100 #include <uuid/uuid.h>
101
102 #include "hfs.h"
103 #include "hfs_catalog.h"
104 #include "hfs_cnode.h"
105 #include "hfs_dbg.h"
106 #include "hfs_endian.h"
107 #include "hfs_hotfiles.h"
108 #include "hfs_quota.h"
109
110 #include "hfscommon/headers/FileMgrInternal.h"
111 #include "hfscommon/headers/BTreesInternal.h"
112
113 #if CONFIG_PROTECT
114 #include <sys/cprotect.h>
115 #endif
116
117 #if CONFIG_HFS_ALLOC_RBTREE
118 #include "hfscommon/headers/HybridAllocator.h"
119 #endif
120
121 #define HFS_MOUNT_DEBUG 1
122
123 #if HFS_DIAGNOSTIC
124 int hfs_dbg_all = 0;
125 int hfs_dbg_err = 0;
126 #endif
127
128 /* Enable/disable debugging code for live volume resizing */
129 int hfs_resize_debug = 0;
130
131 lck_grp_attr_t * hfs_group_attr;
132 lck_attr_t * hfs_lock_attr;
133 lck_grp_t * hfs_mutex_group;
134 lck_grp_t * hfs_rwlock_group;
135 lck_grp_t * hfs_spinlock_group;
136
137 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
138 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
139
140 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
141 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
142
143 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
144 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
145 static int hfs_flushfiles(struct mount *, int, struct proc *);
146 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
147 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
148 static int hfs_init(struct vfsconf *vfsp);
149 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
150 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
151 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
152 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
153 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
154 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
155 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
156
157 void hfs_initialize_allocator (struct hfsmount *hfsmp);
158 int hfs_teardown_allocator (struct hfsmount *hfsmp);
159
160 int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context);
161 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
162 int hfs_reload(struct mount *mp);
163 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
164 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
165 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
166 user_addr_t newp, size_t newlen, vfs_context_t context);
167 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
168
169 /*
170 * Called by vfs_mountroot when mounting HFS Plus as root.
171 */
172
173 int
174 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
175 {
176 struct hfsmount *hfsmp;
177 ExtendedVCB *vcb;
178 struct vfsstatfs *vfsp;
179 int error;
180
181 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
182 if (HFS_MOUNT_DEBUG) {
183 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
184 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
185 }
186 return (error);
187 }
188
189 /* Init hfsmp */
190 hfsmp = VFSTOHFS(mp);
191
192 hfsmp->hfs_uid = UNKNOWNUID;
193 hfsmp->hfs_gid = UNKNOWNGID;
194 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
195 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
196
197 /* Establish the free block reserve. */
198 vcb = HFSTOVCB(hfsmp);
199 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
200 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
201
202 vfsp = vfs_statfs(mp);
203 (void)hfs_statfs(mp, vfsp, NULL);
204
205 return (0);
206 }
207
208
209 /*
210 * VFS Operations.
211 *
212 * mount system call
213 */
214
215 int
216 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
217 {
218 struct proc *p = vfs_context_proc(context);
219 struct hfsmount *hfsmp = NULL;
220 struct hfs_mount_args args;
221 int retval = E_NONE;
222 u_int32_t cmdflags;
223
224 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
225 if (HFS_MOUNT_DEBUG) {
226 printf("hfs_mount: copyin returned %d for fs\n", retval);
227 }
228 return (retval);
229 }
230 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
231 if (cmdflags & MNT_UPDATE) {
232 hfsmp = VFSTOHFS(mp);
233
234 /* Reload incore data after an fsck. */
235 if (cmdflags & MNT_RELOAD) {
236 if (vfs_isrdonly(mp)) {
237 int error = hfs_reload(mp);
238 if (error && HFS_MOUNT_DEBUG) {
239 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
240 }
241 return error;
242 }
243 else {
244 if (HFS_MOUNT_DEBUG) {
245 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
246 }
247 return (EINVAL);
248 }
249 }
250
251 /* Change to a read-only file system. */
252 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
253 vfs_isrdonly(mp)) {
254 int flags;
255
256 /* Set flag to indicate that a downgrade to read-only
257 * is in progress and therefore block any further
258 * modifications to the file system.
259 */
260 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
261 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
262 hfsmp->hfs_downgrading_proc = current_thread();
263 hfs_unlock_global (hfsmp);
264
265 /* use VFS_SYNC to push out System (btree) files */
266 retval = VFS_SYNC(mp, MNT_WAIT, context);
267 if (retval && ((cmdflags & MNT_FORCE) == 0)) {
268 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
269 hfsmp->hfs_downgrading_proc = NULL;
270 if (HFS_MOUNT_DEBUG) {
271 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
272 }
273 goto out;
274 }
275
276 flags = WRITECLOSE;
277 if (cmdflags & MNT_FORCE)
278 flags |= FORCECLOSE;
279
280 if ((retval = hfs_flushfiles(mp, flags, p))) {
281 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
282 hfsmp->hfs_downgrading_proc = NULL;
283 if (HFS_MOUNT_DEBUG) {
284 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
285 }
286 goto out;
287 }
288
289 /* mark the volume cleanly unmounted */
290 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
291 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
292 hfsmp->hfs_flags |= HFS_READ_ONLY;
293
294 /* also get the volume bitmap blocks */
295 if (!retval) {
296 if (vnode_mount(hfsmp->hfs_devvp) == mp) {
297 retval = hfs_fsync(hfsmp->hfs_devvp, MNT_WAIT, 0, p);
298 } else {
299 vnode_get(hfsmp->hfs_devvp);
300 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
301 vnode_put(hfsmp->hfs_devvp);
302 }
303 }
304 if (retval) {
305 if (HFS_MOUNT_DEBUG) {
306 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
307 }
308 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
309 hfsmp->hfs_downgrading_proc = NULL;
310 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
311 goto out;
312 }
313 if (hfsmp->jnl) {
314 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
315
316 journal_close(hfsmp->jnl);
317 hfsmp->jnl = NULL;
318
319 // Note: we explicitly don't want to shutdown
320 // access to the jvp because we may need
321 // it later if we go back to being read-write.
322
323 hfs_unlock_global (hfsmp);
324 }
325
326 #if CONFIG_HFS_ALLOC_RBTREE
327 (void) hfs_teardown_allocator(hfsmp);
328 #endif
329 hfsmp->hfs_downgrading_proc = NULL;
330 }
331
332 /* Change to a writable file system. */
333 if (vfs_iswriteupgrade(mp)) {
334 #if CONFIG_HFS_ALLOC_RBTREE
335 thread_t allocator_thread;
336 #endif
337
338 /*
339 * On inconsistent disks, do not allow read-write mount
340 * unless it is the boot volume being mounted.
341 */
342 if (!(vfs_flags(mp) & MNT_ROOTFS) &&
343 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
344 if (HFS_MOUNT_DEBUG) {
345 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN));
346 }
347 retval = EINVAL;
348 goto out;
349 }
350
351 // If the journal was shut-down previously because we were
352 // asked to be read-only, let's start it back up again now
353
354 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
355 && hfsmp->jnl == NULL
356 && hfsmp->jvp != NULL) {
357 int jflags;
358
359 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
360 jflags = JOURNAL_RESET;
361 } else {
362 jflags = 0;
363 }
364
365 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
366
367 hfsmp->jnl = journal_open(hfsmp->jvp,
368 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
369 hfsmp->jnl_size,
370 hfsmp->hfs_devvp,
371 hfsmp->hfs_logical_block_size,
372 jflags,
373 0,
374 hfs_sync_metadata, hfsmp->hfs_mp);
375
376 /*
377 * Set up the trim callback function so that we can add
378 * recently freed extents to the free extent cache once
379 * the transaction that freed them is written to the
380 * journal on disk.
381 */
382 if (hfsmp->jnl)
383 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
384
385 hfs_unlock_global (hfsmp);
386
387 if (hfsmp->jnl == NULL) {
388 if (HFS_MOUNT_DEBUG) {
389 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
390 }
391 retval = EINVAL;
392 goto out;
393 } else {
394 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
395 }
396
397 }
398
399 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
400 retval = hfs_erase_unused_nodes(hfsmp);
401 if (retval != E_NONE) {
402 if (HFS_MOUNT_DEBUG) {
403 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
404 }
405 goto out;
406 }
407
408 /* If this mount point was downgraded from read-write
409 * to read-only, clear that information as we are now
410 * moving back to read-write.
411 */
412 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
413 hfsmp->hfs_downgrading_proc = NULL;
414
415 /* mark the volume dirty (clear clean unmount bit) */
416 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
417
418 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
419 if (retval != E_NONE) {
420 if (HFS_MOUNT_DEBUG) {
421 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
422 }
423 goto out;
424 }
425
426 /* Only clear HFS_READ_ONLY after a successful write */
427 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
428
429
430 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
431 /* Setup private/hidden directories for hardlinks. */
432 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
433 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
434
435 hfs_remove_orphans(hfsmp);
436
437 /*
438 * Allow hot file clustering if conditions allow.
439 */
440 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
441 ((hfsmp->hfs_flags & HFS_SSD) == 0)) {
442 (void) hfs_recording_init(hfsmp);
443 }
444 /* Force ACLs on HFS+ file systems. */
445 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
446 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
447 }
448 }
449
450 #if CONFIG_HFS_ALLOC_RBTREE
451 /*
452 * Like the normal mount case, we need to handle creation of the allocation red-black tree
453 * if we're upgrading from read-only to read-write.
454 *
455 * We spawn a thread to create the pair of red-black trees for this volume.
456 * However, in so doing, we must be careful to ensure that if this thread is still
457 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
458 * we'll need to set a bit that indicates we're in progress building the trees here.
459 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
460 * notifies the tree generation code that an unmount is waiting. Also, mark the extent
461 * tree flags that the allocator is enabled for use before we spawn the thread that will start
462 * scanning the RB tree.
463 *
464 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only),
465 * which has not previously encountered a bad error on the red-black tree code. Also, don't
466 * try to re-build a tree that already exists.
467 */
468
469 if (hfsmp->extent_tree_flags == 0) {
470 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
471 /* Initialize EOF counter so that the thread can assume it started at initial values */
472 hfsmp->offset_block_end = 0;
473
474 InitTree(hfsmp);
475
476 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
477 thread_deallocate(allocator_thread);
478 }
479
480 #endif
481 }
482
483 /* Update file system parameters. */
484 retval = hfs_changefs(mp, &args);
485 if (retval && HFS_MOUNT_DEBUG) {
486 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
487 }
488
489 } else /* not an update request */ {
490
491 /* Set the mount flag to indicate that we support volfs */
492 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
493
494 retval = hfs_mountfs(devvp, mp, &args, 0, context);
495 if (retval && HFS_MOUNT_DEBUG) {
496 printf("hfs_mount: hfs_mountfs returned %d\n", retval);
497 }
498 #if CONFIG_PROTECT
499 /*
500 * If above mount call was successful, and this mount is content protection
501 * enabled, then verify the on-disk EA on the root to ensure that the filesystem
502 * is of a suitable vintage to allow the mount to proceed.
503 */
504 if ((retval == 0) && (cp_fs_protected (mp))) {
505 int err = 0;
506 struct cp_root_xattr xattr;
507 bzero (&xattr, sizeof(struct cp_root_xattr));
508 hfsmp = vfs_fsprivate(mp);
509
510 /* go get the EA to get the version information */
511 err = cp_getrootxattr (hfsmp, &xattr);
512 /* If there was no EA there, then write one out. */
513 if (err == ENOATTR) {
514 bzero(&xattr, sizeof(struct cp_root_xattr));
515 xattr.major_version = CP_CURRENT_MAJOR_VERS;
516 xattr.minor_version = CP_CURRENT_MINOR_VERS;
517 xattr.flags = 0;
518
519 err = cp_setrootxattr (hfsmp, &xattr);
520 }
521 /*
522 * For any other error, including having an out of date CP version in the
523 * EA, or for an error out of cp_setrootxattr, deny the mount
524 * and do not proceed further.
525 */
526 if (err || xattr.major_version != CP_CURRENT_MAJOR_VERS) {
527 /* Deny the mount and tear down. */
528 retval = EPERM;
529 (void) hfs_unmount (mp, MNT_FORCE, context);
530 }
531 }
532 #endif
533 }
534 out:
535 if (retval == 0) {
536 (void)hfs_statfs(mp, vfs_statfs(mp), context);
537 }
538 return (retval);
539 }
540
541
542 struct hfs_changefs_cargs {
543 struct hfsmount *hfsmp;
544 int namefix;
545 int permfix;
546 int permswitch;
547 };
548
549 static int
550 hfs_changefs_callback(struct vnode *vp, void *cargs)
551 {
552 ExtendedVCB *vcb;
553 struct cnode *cp;
554 struct cat_desc cndesc;
555 struct cat_attr cnattr;
556 struct hfs_changefs_cargs *args;
557 int lockflags;
558 int error;
559
560 args = (struct hfs_changefs_cargs *)cargs;
561
562 cp = VTOC(vp);
563 vcb = HFSTOVCB(args->hfsmp);
564
565 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
566 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, &cndesc, &cnattr, NULL, NULL);
567 hfs_systemfile_unlock(args->hfsmp, lockflags);
568 if (error) {
569 /*
570 * If we couldn't find this guy skip to the next one
571 */
572 if (args->namefix)
573 cache_purge(vp);
574
575 return (VNODE_RETURNED);
576 }
577 /*
578 * Get the real uid/gid and perm mask from disk.
579 */
580 if (args->permswitch || args->permfix) {
581 cp->c_uid = cnattr.ca_uid;
582 cp->c_gid = cnattr.ca_gid;
583 cp->c_mode = cnattr.ca_mode;
584 }
585 /*
586 * If we're switching name converters then...
587 * Remove the existing entry from the namei cache.
588 * Update name to one based on new encoder.
589 */
590 if (args->namefix) {
591 cache_purge(vp);
592 replace_desc(cp, &cndesc);
593
594 if (cndesc.cd_cnid == kHFSRootFolderID) {
595 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
596 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
597 }
598 } else {
599 cat_releasedesc(&cndesc);
600 }
601 return (VNODE_RETURNED);
602 }
603
604 /* Change fs mount parameters */
605 static int
606 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
607 {
608 int retval = 0;
609 int namefix, permfix, permswitch;
610 struct hfsmount *hfsmp;
611 ExtendedVCB *vcb;
612 hfs_to_unicode_func_t get_unicode_func;
613 unicode_to_hfs_func_t get_hfsname_func;
614 u_int32_t old_encoding = 0;
615 struct hfs_changefs_cargs cargs;
616 u_int32_t mount_flags;
617
618 hfsmp = VFSTOHFS(mp);
619 vcb = HFSTOVCB(hfsmp);
620 mount_flags = (unsigned int)vfs_flags(mp);
621
622 hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
623
624 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
625 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
626 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
627 (mount_flags & MNT_UNKNOWNPERMISSIONS)));
628
629 /* The root filesystem must operate with actual permissions: */
630 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
631 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
632 retval = EINVAL;
633 goto exit;
634 }
635 if (mount_flags & MNT_UNKNOWNPERMISSIONS)
636 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
637 else
638 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
639
640 namefix = permfix = 0;
641
642 /*
643 * Tracking of hot files requires up-to-date access times. So if
644 * access time updates are disabled, we must also disable hot files.
645 */
646 if (mount_flags & MNT_NOATIME) {
647 (void) hfs_recording_suspend(hfsmp);
648 }
649
650 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
651 if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
652 gTimeZone = args->hfs_timezone;
653 }
654
655 /* Change the default uid, gid and/or mask */
656 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
657 hfsmp->hfs_uid = args->hfs_uid;
658 if (vcb->vcbSigWord == kHFSPlusSigWord)
659 ++permfix;
660 }
661 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
662 hfsmp->hfs_gid = args->hfs_gid;
663 if (vcb->vcbSigWord == kHFSPlusSigWord)
664 ++permfix;
665 }
666 if (args->hfs_mask != (mode_t)VNOVAL) {
667 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
668 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
669 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
670 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
671 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
672 if (vcb->vcbSigWord == kHFSPlusSigWord)
673 ++permfix;
674 }
675 }
676
677 /* Change the hfs encoding value (hfs only) */
678 if ((vcb->vcbSigWord == kHFSSigWord) &&
679 (args->hfs_encoding != (u_int32_t)VNOVAL) &&
680 (hfsmp->hfs_encoding != args->hfs_encoding)) {
681
682 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
683 if (retval)
684 goto exit;
685
686 /*
687 * Connect the new hfs_get_unicode converter but leave
688 * the old hfs_get_hfsname converter in place so that
689 * we can lookup existing vnodes to get their correctly
690 * encoded names.
691 *
692 * When we're all finished, we can then connect the new
693 * hfs_get_hfsname converter and release our interest
694 * in the old converters.
695 */
696 hfsmp->hfs_get_unicode = get_unicode_func;
697 old_encoding = hfsmp->hfs_encoding;
698 hfsmp->hfs_encoding = args->hfs_encoding;
699 ++namefix;
700 }
701
702 if (!(namefix || permfix || permswitch))
703 goto exit;
704
705 /* XXX 3762912 hack to support HFS filesystem 'owner' */
706 if (permfix)
707 vfs_setowner(mp,
708 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
709 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
710
711 /*
712 * For each active vnode fix things that changed
713 *
714 * Note that we can visit a vnode more than once
715 * and we can race with fsync.
716 *
717 * hfs_changefs_callback will be called for each vnode
718 * hung off of this mount point
719 *
720 * The vnode will be properly referenced and unreferenced
721 * around the callback
722 */
723 cargs.hfsmp = hfsmp;
724 cargs.namefix = namefix;
725 cargs.permfix = permfix;
726 cargs.permswitch = permswitch;
727
728 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
729
730 /*
731 * If we're switching name converters we can now
732 * connect the new hfs_get_hfsname converter and
733 * release our interest in the old converters.
734 */
735 if (namefix) {
736 hfsmp->hfs_get_hfsname = get_hfsname_func;
737 vcb->volumeNameEncodingHint = args->hfs_encoding;
738 (void) hfs_relconverter(old_encoding);
739 }
740 exit:
741 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
742 return (retval);
743 }
744
745
746 struct hfs_reload_cargs {
747 struct hfsmount *hfsmp;
748 int error;
749 };
750
751 static int
752 hfs_reload_callback(struct vnode *vp, void *cargs)
753 {
754 struct cnode *cp;
755 struct hfs_reload_cargs *args;
756 int lockflags;
757
758 args = (struct hfs_reload_cargs *)cargs;
759 /*
760 * flush all the buffers associated with this node
761 */
762 (void) buf_invalidateblks(vp, 0, 0, 0);
763
764 cp = VTOC(vp);
765 /*
766 * Remove any directory hints
767 */
768 if (vnode_isdir(vp))
769 hfs_reldirhints(cp, 0);
770
771 /*
772 * Re-read cnode data for all active vnodes (non-metadata files).
773 */
774 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
775 struct cat_fork *datafork;
776 struct cat_desc desc;
777
778 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
779
780 /* lookup by fileID since name could have changed */
781 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
782 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, &desc, &cp->c_attr, datafork);
783 hfs_systemfile_unlock(args->hfsmp, lockflags);
784 if (args->error) {
785 return (VNODE_RETURNED_DONE);
786 }
787
788 /* update cnode's catalog descriptor */
789 (void) replace_desc(cp, &desc);
790 }
791 return (VNODE_RETURNED);
792 }
793
794 /*
795 * Reload all incore data for a filesystem (used after running fsck on
796 * the root filesystem and finding things to fix). The filesystem must
797 * be mounted read-only.
798 *
799 * Things to do to update the mount:
800 * invalidate all cached meta-data.
801 * invalidate all inactive vnodes.
802 * invalidate all cached file data.
803 * re-read volume header from disk.
804 * re-load meta-file info (extents, file size).
805 * re-load B-tree header data.
806 * re-read cnode data for all active vnodes.
807 */
808 int
809 hfs_reload(struct mount *mountp)
810 {
811 register struct vnode *devvp;
812 struct buf *bp;
813 int error, i;
814 struct hfsmount *hfsmp;
815 struct HFSPlusVolumeHeader *vhp;
816 ExtendedVCB *vcb;
817 struct filefork *forkp;
818 struct cat_desc cndesc;
819 struct hfs_reload_cargs args;
820 daddr64_t priIDSector;
821
822 hfsmp = VFSTOHFS(mountp);
823 vcb = HFSTOVCB(hfsmp);
824
825 if (vcb->vcbSigWord == kHFSSigWord)
826 return (EINVAL); /* rooting from HFS is not supported! */
827
828 /*
829 * Invalidate all cached meta-data.
830 */
831 devvp = hfsmp->hfs_devvp;
832 if (buf_invalidateblks(devvp, 0, 0, 0))
833 panic("hfs_reload: dirty1");
834
835 args.hfsmp = hfsmp;
836 args.error = 0;
837 /*
838 * hfs_reload_callback will be called for each vnode
839 * hung off of this mount point that can't be recycled...
840 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
841 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
842 * properly referenced and unreferenced around the callback
843 */
844 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
845
846 if (args.error)
847 return (args.error);
848
849 /*
850 * Re-read VolumeHeader from disk.
851 */
852 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
853 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
854
855 error = (int)buf_meta_bread(hfsmp->hfs_devvp,
856 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
857 hfsmp->hfs_physical_block_size, NOCRED, &bp);
858 if (error) {
859 if (bp != NULL)
860 buf_brelse(bp);
861 return (error);
862 }
863
864 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
865
866 /* Do a quick sanity check */
867 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
868 SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
869 (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
870 SWAP_BE16(vhp->version) != kHFSXVersion) ||
871 SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
872 buf_brelse(bp);
873 return (EIO);
874 }
875
876 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
877 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes);
878 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
879 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize);
880 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID);
881 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
882 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount);
883 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount);
884 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount);
885 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
886 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks);
887 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks);
888 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap);
889 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
890 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
891
892 /*
893 * Re-load meta-file vnode data (extent info, file size, etc).
894 */
895 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
896 for (i = 0; i < kHFSPlusExtentDensity; i++) {
897 forkp->ff_extents[i].startBlock =
898 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
899 forkp->ff_extents[i].blockCount =
900 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
901 }
902 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
903 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
904 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
905
906
907 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
908 for (i = 0; i < kHFSPlusExtentDensity; i++) {
909 forkp->ff_extents[i].startBlock =
910 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
911 forkp->ff_extents[i].blockCount =
912 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
913 }
914 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
915 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
916 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
917
918 if (hfsmp->hfs_attribute_vp) {
919 forkp = VTOF(hfsmp->hfs_attribute_vp);
920 for (i = 0; i < kHFSPlusExtentDensity; i++) {
921 forkp->ff_extents[i].startBlock =
922 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
923 forkp->ff_extents[i].blockCount =
924 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
925 }
926 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
927 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
928 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
929 }
930
931 forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
932 for (i = 0; i < kHFSPlusExtentDensity; i++) {
933 forkp->ff_extents[i].startBlock =
934 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
935 forkp->ff_extents[i].blockCount =
936 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
937 }
938 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
939 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
940 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
941
942 buf_brelse(bp);
943 vhp = NULL;
944
945 /*
946 * Re-load B-tree header data
947 */
948 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
949 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
950 return (error);
951
952 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
953 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
954 return (error);
955
956 if (hfsmp->hfs_attribute_vp) {
957 forkp = VTOF(hfsmp->hfs_attribute_vp);
958 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
959 return (error);
960 }
961
962 /* Reload the volume name */
963 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, &cndesc, NULL, NULL)))
964 return (error);
965 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
966 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
967 cat_releasedesc(&cndesc);
968
969 /* Re-establish private/hidden directories. */
970 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
971 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
972
973 /* In case any volume information changed to trigger a notification */
974 hfs_generate_volume_notifications(hfsmp);
975
976 return (0);
977 }
978
979
980
981 static void
982 hfs_syncer(void *arg0, void *unused)
983 {
984 #pragma unused(unused)
985
986 struct hfsmount *hfsmp = arg0;
987 clock_sec_t secs;
988 clock_usec_t usecs;
989 uint32_t delay = HFS_META_DELAY;
990 uint64_t now;
991 static int no_max=1;
992
993 clock_get_calendar_microtime(&secs, &usecs);
994 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
995
996 //
997 // If the amount of pending writes is more than our limit, wait
998 // for 2/3 of it to drain and then flush the journal.
999 //
1000 if (hfsmp->hfs_mp->mnt_pending_write_size > hfsmp->hfs_max_pending_io) {
1001 int counter=0;
1002 uint64_t pending_io, start, rate = 0;
1003
1004 no_max = 0;
1005
1006 hfs_start_transaction(hfsmp); // so we hold off any new i/o's
1007
1008 pending_io = hfsmp->hfs_mp->mnt_pending_write_size;
1009
1010 clock_get_calendar_microtime(&secs, &usecs);
1011 start = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1012
1013 while(hfsmp->hfs_mp->mnt_pending_write_size > (pending_io/3) && counter++ < 500) {
1014 tsleep((caddr_t)hfsmp, PRIBIO, "hfs-wait-for-io-to-drain", 10);
1015 }
1016
1017 if (counter >= 500) {
1018 printf("hfs: timed out waiting for io to drain (%lld)\n", (int64_t)hfsmp->hfs_mp->mnt_pending_write_size);
1019 }
1020
1021 if (hfsmp->jnl) {
1022 journal_flush(hfsmp->jnl, FALSE);
1023 } else {
1024 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1025 }
1026
1027 clock_get_calendar_microtime(&secs, &usecs);
1028 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1029 hfsmp->hfs_last_sync_time = now;
1030 if (now != start) {
1031 rate = ((pending_io * 1000000ULL) / (now - start)); // yields bytes per second
1032 }
1033
1034 hfs_end_transaction(hfsmp);
1035
1036 //
1037 // If a reasonable amount of time elapsed then check the
1038 // i/o rate. If it's taking less than 1 second or more
1039 // than 2 seconds, adjust hfs_max_pending_io so that we
1040 // will allow about 1.5 seconds of i/o to queue up.
1041 //
1042 if (((now - start) >= 300000) && (rate != 0)) {
1043 uint64_t scale = (pending_io * 100) / rate;
1044
1045 if (scale < 100 || scale > 200) {
1046 // set it so that it should take about 1.5 seconds to drain
1047 hfsmp->hfs_max_pending_io = (rate * 150ULL) / 100ULL;
1048 }
1049 }
1050
1051 } else if ( ((now - hfsmp->hfs_last_sync_time) >= 5000000ULL)
1052 || (((now - hfsmp->hfs_last_sync_time) >= 100000LL)
1053 && ((now - hfsmp->hfs_last_sync_request_time) >= 100000LL)
1054 && (hfsmp->hfs_active_threads == 0)
1055 && (hfsmp->hfs_global_lock_nesting == 0))) {
1056
1057 //
1058 // Flush the journal if more than 5 seconds elapsed since
1059 // the last sync OR we have not sync'ed recently and the
1060 // last sync request time was more than 100 milliseconds
1061 // ago and no one is in the middle of a transaction right
1062 // now. Else we defer the sync and reschedule it.
1063 //
1064 if (hfsmp->jnl) {
1065 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
1066
1067 journal_flush(hfsmp->jnl, FALSE);
1068
1069 hfs_unlock_global (hfsmp);
1070 } else {
1071 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1072 }
1073
1074 clock_get_calendar_microtime(&secs, &usecs);
1075 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
1076 hfsmp->hfs_last_sync_time = now;
1077
1078 } else if (hfsmp->hfs_active_threads == 0) {
1079 uint64_t deadline;
1080
1081 clock_interval_to_deadline(delay, HFS_MILLISEC_SCALE, &deadline);
1082 thread_call_enter_delayed(hfsmp->hfs_syncer, deadline);
1083
1084 // note: we intentionally return early here and do not
1085 // decrement the sync_scheduled and sync_incomplete
1086 // variables because we rescheduled the timer.
1087
1088 return;
1089 }
1090
1091 //
1092 // NOTE: we decrement these *after* we're done the journal_flush() since
1093 // it can take a significant amount of time and so we don't want more
1094 // callbacks scheduled until we're done this one.
1095 //
1096 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_scheduled);
1097 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
1098 wakeup((caddr_t)&hfsmp->hfs_sync_incomplete);
1099 }
1100
1101
1102 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1103
1104 /*
1105 * Initialization code for Red-Black Tree Allocator
1106 *
1107 * This function will build the two red-black trees necessary for allocating space
1108 * from the metadata zone as well as normal allocations. Currently, we use
1109 * an advisory read to get most of the data into the buffer cache.
1110 * This function is intended to be run in a separate thread so as not to slow down mount.
1111 *
1112 */
1113
1114 void
1115 hfs_initialize_allocator (struct hfsmount *hfsmp) {
1116
1117 #if CONFIG_HFS_ALLOC_RBTREE
1118 u_int32_t err;
1119
1120 /*
1121 * Take the allocation file lock. Journal transactions will block until
1122 * we're done here.
1123 */
1124 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1125
1126 /*
1127 * GenerateTree assumes that the bitmap lock is held when you call the function.
1128 * It will drop and re-acquire the lock periodically as needed to let other allocations
1129 * through. It returns with the bitmap lock held. Since we only maintain one tree,
1130 * we don't need to specify a start block (always starts at 0).
1131 */
1132 err = GenerateTree(hfsmp, hfsmp->totalBlocks, &flags, 1);
1133 if (err) {
1134 goto bailout;
1135 }
1136 /* Mark offset tree as built */
1137 hfsmp->extent_tree_flags |= HFS_ALLOC_RB_ACTIVE;
1138
1139 bailout:
1140 /*
1141 * GenerateTree may drop the bitmap lock during operation in order to give other
1142 * threads a chance to allocate blocks, but it will always return with the lock held, so
1143 * we don't need to re-grab the lock in order to update the TREEBUILD_INFLIGHT bit.
1144 */
1145 hfsmp->extent_tree_flags &= ~HFS_ALLOC_TREEBUILD_INFLIGHT;
1146 if (err != 0) {
1147 /* Wakeup any waiters on the allocation bitmap lock */
1148 wakeup((caddr_t)&hfsmp->extent_tree_flags);
1149 }
1150
1151 hfs_systemfile_unlock(hfsmp, flags);
1152 #else
1153 #pragma unused (hfsmp)
1154 #endif
1155 }
1156
1157
1158 /*
1159 * Teardown code for the Red-Black Tree allocator.
1160 * This function consolidates the code which serializes with respect
1161 * to a thread that may be potentially still building the tree when we need to begin
1162 * tearing it down. Since the red-black tree may not be live when we enter this function
1163 * we return:
1164 * 1 -> Tree was live.
1165 * 0 -> Tree was not active at time of call.
1166 */
1167
1168 int
1169 hfs_teardown_allocator (struct hfsmount *hfsmp) {
1170 int rb_used = 0;
1171
1172 #if CONFIG_HFS_ALLOC_RBTREE
1173
1174 int flags = 0;
1175
1176 /*
1177 * Check to see if the tree-generation is still on-going.
1178 * If it is, then block until it's done.
1179 */
1180
1181 flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1182
1183
1184 while (hfsmp->extent_tree_flags & HFS_ALLOC_TREEBUILD_INFLIGHT) {
1185 hfsmp->extent_tree_flags |= HFS_ALLOC_TEARDOWN_INFLIGHT;
1186
1187 lck_rw_sleep(&(VTOC(hfsmp->hfs_allocation_vp))->c_rwlock, LCK_SLEEP_EXCLUSIVE,
1188 &hfsmp->extent_tree_flags, THREAD_UNINT);
1189 }
1190
1191 if (hfs_isrbtree_active (hfsmp)) {
1192 rb_used = 1;
1193
1194 /* Tear down the RB Trees while we have the bitmap locked */
1195 DestroyTrees(hfsmp);
1196
1197 }
1198
1199 hfs_systemfile_unlock(hfsmp, flags);
1200 #else
1201 #pragma unused (hfsmp)
1202 #endif
1203 return rb_used;
1204
1205 }
1206
1207
1208 static int hfs_root_unmounted_cleanly = 0;
1209
1210 SYSCTL_DECL(_vfs_generic);
1211 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1212
1213 /*
1214 * Common code for mount and mountroot
1215 */
1216 int
1217 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1218 int journal_replay_only, vfs_context_t context)
1219 {
1220 struct proc *p = vfs_context_proc(context);
1221 int retval = E_NONE;
1222 struct hfsmount *hfsmp = NULL;
1223 struct buf *bp;
1224 dev_t dev;
1225 HFSMasterDirectoryBlock *mdbp = NULL;
1226 int ronly;
1227 #if QUOTA
1228 int i;
1229 #endif
1230 int mntwrapper;
1231 kauth_cred_t cred;
1232 u_int64_t disksize;
1233 daddr64_t log_blkcnt;
1234 u_int32_t log_blksize;
1235 u_int32_t phys_blksize;
1236 u_int32_t minblksize;
1237 u_int32_t iswritable;
1238 daddr64_t mdb_offset;
1239 int isvirtual = 0;
1240 int isroot = 0;
1241 int isssd;
1242 #if CONFIG_HFS_ALLOC_RBTREE
1243 thread_t allocator_thread;
1244 #endif
1245
1246 if (args == NULL) {
1247 /* only hfs_mountroot passes us NULL as the 'args' argument */
1248 isroot = 1;
1249 }
1250
1251 ronly = vfs_isrdonly(mp);
1252 dev = vnode_specrdev(devvp);
1253 cred = p ? vfs_context_ucred(context) : NOCRED;
1254 mntwrapper = 0;
1255
1256 bp = NULL;
1257 hfsmp = NULL;
1258 mdbp = NULL;
1259 minblksize = kHFSBlockSize;
1260
1261 /* Advisory locking should be handled at the VFS layer */
1262 vfs_setlocklocal(mp);
1263
1264 /* Get the logical block size (treated as physical block size everywhere) */
1265 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1266 if (HFS_MOUNT_DEBUG) {
1267 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1268 }
1269 retval = ENXIO;
1270 goto error_exit;
1271 }
1272 if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1273 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize);
1274 retval = ENXIO;
1275 goto error_exit;
1276 }
1277
1278 /* Get the physical block size. */
1279 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1280 if (retval) {
1281 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1282 if (HFS_MOUNT_DEBUG) {
1283 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1284 }
1285 retval = ENXIO;
1286 goto error_exit;
1287 }
1288 /* If device does not support this ioctl, assume that physical
1289 * block size is same as logical block size
1290 */
1291 phys_blksize = log_blksize;
1292 }
1293 if (phys_blksize == 0 || phys_blksize > 1024*1024*1024) {
1294 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize);
1295 retval = ENXIO;
1296 goto error_exit;
1297 }
1298
1299 /* Switch to 512 byte sectors (temporarily) */
1300 if (log_blksize > 512) {
1301 u_int32_t size512 = 512;
1302
1303 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1304 if (HFS_MOUNT_DEBUG) {
1305 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1306 }
1307 retval = ENXIO;
1308 goto error_exit;
1309 }
1310 }
1311 /* Get the number of 512 byte physical blocks. */
1312 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1313 /* resetting block size may fail if getting block count did */
1314 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1315 if (HFS_MOUNT_DEBUG) {
1316 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1317 }
1318 retval = ENXIO;
1319 goto error_exit;
1320 }
1321 /* Compute an accurate disk size (i.e. within 512 bytes) */
1322 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1323
1324 /*
1325 * On Tiger it is not necessary to switch the device
1326 * block size to be 4k if there are more than 31-bits
1327 * worth of blocks but to insure compatibility with
1328 * pre-Tiger systems we have to do it.
1329 *
1330 * If the device size is not a multiple of 4K (8 * 512), then
1331 * switching the logical block size isn't going to help because
1332 * we will be unable to write the alternate volume header.
1333 * In this case, just leave the logical block size unchanged.
1334 */
1335 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1336 minblksize = log_blksize = 4096;
1337 if (phys_blksize < log_blksize)
1338 phys_blksize = log_blksize;
1339 }
1340
1341 /*
1342 * The cluster layer is not currently prepared to deal with a logical
1343 * block size larger than the system's page size. (It can handle
1344 * blocks per page, but not multiple pages per block.) So limit the
1345 * logical block size to the page size.
1346 */
1347 if (log_blksize > PAGE_SIZE)
1348 log_blksize = PAGE_SIZE;
1349
1350 /* Now switch to our preferred physical block size. */
1351 if (log_blksize > 512) {
1352 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1353 if (HFS_MOUNT_DEBUG) {
1354 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1355 }
1356 retval = ENXIO;
1357 goto error_exit;
1358 }
1359 /* Get the count of physical blocks. */
1360 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1361 if (HFS_MOUNT_DEBUG) {
1362 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1363 }
1364 retval = ENXIO;
1365 goto error_exit;
1366 }
1367 }
1368 /*
1369 * At this point:
1370 * minblksize is the minimum physical block size
1371 * log_blksize has our preferred physical block size
1372 * log_blkcnt has the total number of physical blocks
1373 */
1374
1375 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1376 if ((retval = (int)buf_meta_bread(devvp,
1377 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1378 phys_blksize, cred, &bp))) {
1379 if (HFS_MOUNT_DEBUG) {
1380 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1381 }
1382 goto error_exit;
1383 }
1384 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1385 if (mdbp == NULL) {
1386 retval = ENOMEM;
1387 if (HFS_MOUNT_DEBUG) {
1388 printf("hfs_mountfs: MALLOC failed\n");
1389 }
1390 goto error_exit;
1391 }
1392 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1393 buf_brelse(bp);
1394 bp = NULL;
1395
1396 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1397 if (hfsmp == NULL) {
1398 if (HFS_MOUNT_DEBUG) {
1399 printf("hfs_mountfs: MALLOC (2) failed\n");
1400 }
1401 retval = ENOMEM;
1402 goto error_exit;
1403 }
1404 bzero(hfsmp, sizeof(struct hfsmount));
1405
1406 hfs_chashinit_finish(hfsmp);
1407
1408 /*
1409 * See if the disk is a solid state device. We need this to decide what to do about
1410 * hotfiles.
1411 */
1412 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1413 if (isssd) {
1414 hfsmp->hfs_flags |= HFS_SSD;
1415 }
1416 }
1417
1418
1419 /*
1420 * Init the volume information structure
1421 */
1422
1423 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1424 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1425 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1426 lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1427 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1428
1429 vfs_setfsprivate(mp, hfsmp);
1430 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */
1431 hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1432 hfsmp->hfs_devvp = devvp;
1433 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
1434 hfsmp->hfs_logical_block_size = log_blksize;
1435 hfsmp->hfs_logical_block_count = log_blkcnt;
1436 hfsmp->hfs_physical_block_size = phys_blksize;
1437 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1438 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1439 if (ronly)
1440 hfsmp->hfs_flags |= HFS_READ_ONLY;
1441 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1442 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1443
1444 #if QUOTA
1445 for (i = 0; i < MAXQUOTAS; i++)
1446 dqfileinit(&hfsmp->hfs_qfiles[i]);
1447 #endif
1448
1449 if (args) {
1450 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1451 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1452 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1453 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1454 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1455 if (args->hfs_mask != (mode_t)VNOVAL) {
1456 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1457 if (args->flags & HFSFSMNT_NOXONFILES) {
1458 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1459 } else {
1460 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1461 }
1462 } else {
1463 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1464 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1465 }
1466 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1467 mntwrapper = 1;
1468 } else {
1469 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1470 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1471 hfsmp->hfs_uid = UNKNOWNUID;
1472 hfsmp->hfs_gid = UNKNOWNGID;
1473 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1474 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1475 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1476 }
1477 }
1478
1479 /* Find out if disk media is writable. */
1480 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1481 if (iswritable)
1482 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1483 else
1484 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1485 }
1486
1487 // record the current time at which we're mounting this volume
1488 struct timeval tv;
1489 microtime(&tv);
1490 hfsmp->hfs_mount_time = tv.tv_sec;
1491
1492 /* Mount a standard HFS disk */
1493 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1494 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1495
1496 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1497 if (vfs_isrdwr(mp)) {
1498 retval = EROFS;
1499 goto error_exit;
1500 }
1501
1502 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1503
1504 /* Treat it as if it's read-only and not writeable */
1505 hfsmp->hfs_flags |= HFS_READ_ONLY;
1506 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1507
1508 /* If only journal replay is requested, exit immediately */
1509 if (journal_replay_only) {
1510 retval = 0;
1511 goto error_exit;
1512 }
1513
1514 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1515 retval = EINVAL; /* Cannot root from HFS standard disks */
1516 goto error_exit;
1517 }
1518 /* HFS disks can only use 512 byte physical blocks */
1519 if (log_blksize > kHFSBlockSize) {
1520 log_blksize = kHFSBlockSize;
1521 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1522 retval = ENXIO;
1523 goto error_exit;
1524 }
1525 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1526 retval = ENXIO;
1527 goto error_exit;
1528 }
1529 hfsmp->hfs_logical_block_size = log_blksize;
1530 hfsmp->hfs_logical_block_count = log_blkcnt;
1531 hfsmp->hfs_physical_block_size = log_blksize;
1532 hfsmp->hfs_log_per_phys = 1;
1533 }
1534 if (args) {
1535 hfsmp->hfs_encoding = args->hfs_encoding;
1536 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1537
1538 /* establish the timezone */
1539 gTimeZone = args->hfs_timezone;
1540 }
1541
1542 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1543 &hfsmp->hfs_get_hfsname);
1544 if (retval)
1545 goto error_exit;
1546
1547 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1548 if (retval)
1549 (void) hfs_relconverter(hfsmp->hfs_encoding);
1550
1551 } else /* Mount an HFS Plus disk */ {
1552 HFSPlusVolumeHeader *vhp;
1553 off_t embeddedOffset;
1554 int jnl_disable = 0;
1555
1556 /* Get the embedded Volume Header */
1557 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1558 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1559 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1560 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1561
1562 /*
1563 * If the embedded volume doesn't start on a block
1564 * boundary, then switch the device to a 512-byte
1565 * block size so everything will line up on a block
1566 * boundary.
1567 */
1568 if ((embeddedOffset % log_blksize) != 0) {
1569 printf("hfs_mountfs: embedded volume offset not"
1570 " a multiple of physical block size (%d);"
1571 " switching to 512\n", log_blksize);
1572 log_blksize = 512;
1573 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1574 (caddr_t)&log_blksize, FWRITE, context)) {
1575
1576 if (HFS_MOUNT_DEBUG) {
1577 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1578 }
1579 retval = ENXIO;
1580 goto error_exit;
1581 }
1582 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1583 (caddr_t)&log_blkcnt, 0, context)) {
1584 if (HFS_MOUNT_DEBUG) {
1585 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1586 }
1587 retval = ENXIO;
1588 goto error_exit;
1589 }
1590 /* Note: relative block count adjustment */
1591 hfsmp->hfs_logical_block_count *=
1592 hfsmp->hfs_logical_block_size / log_blksize;
1593
1594 /* Update logical /physical block size */
1595 hfsmp->hfs_logical_block_size = log_blksize;
1596 hfsmp->hfs_physical_block_size = log_blksize;
1597 phys_blksize = log_blksize;
1598 hfsmp->hfs_log_per_phys = 1;
1599 }
1600
1601 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1602 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1603
1604 hfsmp->hfs_logical_block_count = disksize / log_blksize;
1605
1606 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1607 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1608 phys_blksize, cred, &bp);
1609 if (retval) {
1610 if (HFS_MOUNT_DEBUG) {
1611 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1612 }
1613 goto error_exit;
1614 }
1615 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1616 buf_brelse(bp);
1617 bp = NULL;
1618 vhp = (HFSPlusVolumeHeader*) mdbp;
1619
1620 } else /* pure HFS+ */ {
1621 embeddedOffset = 0;
1622 vhp = (HFSPlusVolumeHeader*) mdbp;
1623 }
1624
1625 if (isroot) {
1626 hfs_root_unmounted_cleanly = (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0;
1627 }
1628
1629 /*
1630 * On inconsistent disks, do not allow read-write mount
1631 * unless it is the boot volume being mounted. We also
1632 * always want to replay the journal if the journal_replay_only
1633 * flag is set because that will (most likely) get the
1634 * disk into a consistent state before fsck_hfs starts
1635 * looking at it.
1636 */
1637 if ( !(vfs_flags(mp) & MNT_ROOTFS)
1638 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1639 && !journal_replay_only
1640 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1641
1642 if (HFS_MOUNT_DEBUG) {
1643 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1644 }
1645 retval = EINVAL;
1646 goto error_exit;
1647 }
1648
1649
1650 // XXXdbg
1651 //
1652 hfsmp->jnl = NULL;
1653 hfsmp->jvp = NULL;
1654 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1655 args->journal_disable) {
1656 jnl_disable = 1;
1657 }
1658
1659 //
1660 // We only initialize the journal here if the last person
1661 // to mount this volume was journaling aware. Otherwise
1662 // we delay journal initialization until later at the end
1663 // of hfs_MountHFSPlusVolume() because the last person who
1664 // mounted it could have messed things up behind our back
1665 // (so we need to go find the .journal file, make sure it's
1666 // the right size, re-sync up if it was moved, etc).
1667 //
1668 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1669 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1670 && !jnl_disable) {
1671
1672 // if we're able to init the journal, mark the mount
1673 // point as journaled.
1674 //
1675 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1676 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1677 } else {
1678 if (retval == EROFS) {
1679 // EROFS is a special error code that means the volume has an external
1680 // journal which we couldn't find. in that case we do not want to
1681 // rewrite the volume header - we'll just refuse to mount the volume.
1682 if (HFS_MOUNT_DEBUG) {
1683 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1684 }
1685 retval = EINVAL;
1686 goto error_exit;
1687 }
1688
1689 // if the journal failed to open, then set the lastMountedVersion
1690 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1691 // of just bailing out because the volume is journaled.
1692 if (!ronly) {
1693 if (HFS_MOUNT_DEBUG) {
1694 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1695 }
1696
1697 HFSPlusVolumeHeader *jvhp;
1698
1699 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1700
1701 if (mdb_offset == 0) {
1702 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1703 }
1704
1705 bp = NULL;
1706 retval = (int)buf_meta_bread(devvp,
1707 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1708 phys_blksize, cred, &bp);
1709 if (retval == 0) {
1710 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1711
1712 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1713 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
1714 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1715 buf_bwrite(bp);
1716 } else {
1717 buf_brelse(bp);
1718 }
1719 bp = NULL;
1720 } else if (bp) {
1721 buf_brelse(bp);
1722 // clear this so the error exit path won't try to use it
1723 bp = NULL;
1724 }
1725 }
1726
1727 // if this isn't the root device just bail out.
1728 // If it is the root device we just continue on
1729 // in the hopes that fsck_hfs will be able to
1730 // fix any damage that exists on the volume.
1731 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1732 if (HFS_MOUNT_DEBUG) {
1733 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1734 }
1735 retval = EINVAL;
1736 goto error_exit;
1737 }
1738 }
1739 }
1740 // XXXdbg
1741
1742 /* Either the journal is replayed successfully, or there
1743 * was nothing to replay, or no journal exists. In any case,
1744 * return success.
1745 */
1746 if (journal_replay_only) {
1747 retval = 0;
1748 goto error_exit;
1749 }
1750
1751 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1752
1753 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1754 /*
1755 * If the backend didn't like our physical blocksize
1756 * then retry with physical blocksize of 512.
1757 */
1758 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1759 printf("hfs_mountfs: could not use physical block size "
1760 "(%d) switching to 512\n", log_blksize);
1761 log_blksize = 512;
1762 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1763 if (HFS_MOUNT_DEBUG) {
1764 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1765 }
1766 retval = ENXIO;
1767 goto error_exit;
1768 }
1769 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1770 if (HFS_MOUNT_DEBUG) {
1771 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1772 }
1773 retval = ENXIO;
1774 goto error_exit;
1775 }
1776 devvp->v_specsize = log_blksize;
1777 /* Note: relative block count adjustment (in case this is an embedded volume). */
1778 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1779 hfsmp->hfs_logical_block_size = log_blksize;
1780 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1781
1782 if (hfsmp->jnl && hfsmp->jvp == devvp) {
1783 // close and re-open this with the new block size
1784 journal_close(hfsmp->jnl);
1785 hfsmp->jnl = NULL;
1786 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1787 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1788 } else {
1789 // if the journal failed to open, then set the lastMountedVersion
1790 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1791 // of just bailing out because the volume is journaled.
1792 if (!ronly) {
1793 if (HFS_MOUNT_DEBUG) {
1794 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1795 }
1796 HFSPlusVolumeHeader *jvhp;
1797
1798 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1799
1800 if (mdb_offset == 0) {
1801 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1802 }
1803
1804 bp = NULL;
1805 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1806 phys_blksize, cred, &bp);
1807 if (retval == 0) {
1808 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1809
1810 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1811 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
1812 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1813 buf_bwrite(bp);
1814 } else {
1815 buf_brelse(bp);
1816 }
1817 bp = NULL;
1818 } else if (bp) {
1819 buf_brelse(bp);
1820 // clear this so the error exit path won't try to use it
1821 bp = NULL;
1822 }
1823 }
1824
1825 // if this isn't the root device just bail out.
1826 // If it is the root device we just continue on
1827 // in the hopes that fsck_hfs will be able to
1828 // fix any damage that exists on the volume.
1829 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1830 if (HFS_MOUNT_DEBUG) {
1831 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1832 }
1833 retval = EINVAL;
1834 goto error_exit;
1835 }
1836 }
1837 }
1838
1839 /* Try again with a smaller block size... */
1840 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1841 if (retval && HFS_MOUNT_DEBUG) {
1842 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1843 }
1844 }
1845 if (retval)
1846 (void) hfs_relconverter(0);
1847 }
1848
1849 // save off a snapshot of the mtime from the previous mount
1850 // (for matador).
1851 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1852
1853 if ( retval ) {
1854 if (HFS_MOUNT_DEBUG) {
1855 printf("hfs_mountfs: encountered failure %d \n", retval);
1856 }
1857 goto error_exit;
1858 }
1859
1860 mp->mnt_vfsstat.f_fsid.val[0] = (long)dev;
1861 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1862 vfs_setmaxsymlen(mp, 0);
1863
1864 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1865 #if NAMEDSTREAMS
1866 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1867 #endif
1868 if (!(hfsmp->hfs_flags & HFS_STANDARD)) {
1869 /* Tell VFS that we support directory hard links. */
1870 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1871 } else {
1872 /* HFS standard doesn't support extended readdir! */
1873 mount_set_noreaddirext (mp);
1874 }
1875
1876 if (args) {
1877 /*
1878 * Set the free space warning levels for a non-root volume:
1879 *
1880 * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1881 * is less. Set the "warning" limit to 2% of the volume size or 150MB,
1882 * whichever is less. And last, set the "desired" freespace level to
1883 * to 3% of the volume size or 200MB, whichever is less.
1884 */
1885 hfsmp->hfs_freespace_notify_dangerlimit =
1886 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1887 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1888 hfsmp->hfs_freespace_notify_warninglimit =
1889 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1890 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1891 hfsmp->hfs_freespace_notify_desiredlevel =
1892 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1893 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1894 } else {
1895 /*
1896 * Set the free space warning levels for the root volume:
1897 *
1898 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1899 * is less. Set the "warning" limit to 10% of the volume size or 1GB,
1900 * whichever is less. And last, set the "desired" freespace level to
1901 * to 11% of the volume size or 1.25GB, whichever is less.
1902 */
1903 hfsmp->hfs_freespace_notify_dangerlimit =
1904 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1905 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1906 hfsmp->hfs_freespace_notify_warninglimit =
1907 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1908 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1909 hfsmp->hfs_freespace_notify_desiredlevel =
1910 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1911 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1912 };
1913
1914 /* Check if the file system exists on virtual device, like disk image */
1915 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1916 if (isvirtual) {
1917 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1918 }
1919 }
1920
1921 /* do not allow ejectability checks on the root device */
1922 if (isroot == 0) {
1923 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1924 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1925 hfsmp->hfs_max_pending_io = 4096*1024; // a reasonable value to start with.
1926 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1927 if (hfsmp->hfs_syncer == NULL) {
1928 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1929 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1930 }
1931 }
1932 }
1933
1934 #if CONFIG_HFS_ALLOC_RBTREE
1935 /*
1936 * We spawn a thread to create the pair of red-black trees for this volume.
1937 * However, in so doing, we must be careful to ensure that if this thread is still
1938 * running after mount has finished, it doesn't interfere with an unmount. Specifically,
1939 * we'll need to set a bit that indicates we're in progress building the trees here.
1940 * Unmount will check for this bit, and then if it's set, mark a corresponding bit that
1941 * notifies the tree generation code that an unmount is waiting. Also mark the bit that
1942 * indicates the tree is live and operating.
1943 *
1944 * Only do this if we're operating on a read-write mount (we wouldn't care for read-only).
1945 */
1946
1947 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
1948 hfsmp->extent_tree_flags |= (HFS_ALLOC_TREEBUILD_INFLIGHT | HFS_ALLOC_RB_ENABLED);
1949
1950 /* Initialize EOF counter so that the thread can assume it started at initial values */
1951 hfsmp->offset_block_end = 0;
1952 InitTree(hfsmp);
1953
1954 kernel_thread_start ((thread_continue_t) hfs_initialize_allocator , hfsmp, &allocator_thread);
1955 thread_deallocate(allocator_thread);
1956 }
1957
1958 #endif
1959
1960 /*
1961 * Start looking for free space to drop below this level and generate a
1962 * warning immediately if needed:
1963 */
1964 hfsmp->hfs_notification_conditions = 0;
1965 hfs_generate_volume_notifications(hfsmp);
1966
1967 if (ronly == 0) {
1968 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1969 }
1970 FREE(mdbp, M_TEMP);
1971 return (0);
1972
1973 error_exit:
1974 if (bp)
1975 buf_brelse(bp);
1976 if (mdbp)
1977 FREE(mdbp, M_TEMP);
1978
1979 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1980 vnode_clearmountedon(hfsmp->jvp);
1981 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1982 hfsmp->jvp = NULL;
1983 }
1984 if (hfsmp) {
1985 if (hfsmp->hfs_devvp) {
1986 vnode_rele(hfsmp->hfs_devvp);
1987 }
1988 hfs_delete_chash(hfsmp);
1989
1990 FREE(hfsmp, M_HFSMNT);
1991 vfs_setfsprivate(mp, NULL);
1992 }
1993 return (retval);
1994 }
1995
1996
1997 /*
1998 * Make a filesystem operational.
1999 * Nothing to do at the moment.
2000 */
2001 /* ARGSUSED */
2002 static int
2003 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2004 {
2005 return (0);
2006 }
2007
2008
2009 /*
2010 * unmount system call
2011 */
2012 int
2013 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2014 {
2015 struct proc *p = vfs_context_proc(context);
2016 struct hfsmount *hfsmp = VFSTOHFS(mp);
2017 int retval = E_NONE;
2018 int flags;
2019 int force;
2020 int started_tr = 0;
2021 int rb_used = 0;
2022
2023 flags = 0;
2024 force = 0;
2025 if (mntflags & MNT_FORCE) {
2026 flags |= FORCECLOSE;
2027 force = 1;
2028 }
2029
2030 if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2031 return (retval);
2032
2033 if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2034 (void) hfs_recording_suspend(hfsmp);
2035
2036 /*
2037 * Cancel any pending timers for this volume. Then wait for any timers
2038 * which have fired, but whose callbacks have not yet completed.
2039 */
2040 if (hfsmp->hfs_syncer)
2041 {
2042 struct timespec ts = {0, 100000000}; /* 0.1 seconds */
2043
2044 /*
2045 * Cancel any timers that have been scheduled, but have not
2046 * fired yet. NOTE: The kernel considers a timer complete as
2047 * soon as it starts your callback, so the kernel does not
2048 * keep track of the number of callbacks in progress.
2049 */
2050 if (thread_call_cancel(hfsmp->hfs_syncer))
2051 OSDecrementAtomic((volatile SInt32 *)&hfsmp->hfs_sync_incomplete);
2052 thread_call_free(hfsmp->hfs_syncer);
2053 hfsmp->hfs_syncer = NULL;
2054
2055 /*
2056 * This waits for all of the callbacks that were entered before
2057 * we did thread_call_cancel above, but have not completed yet.
2058 */
2059 while(hfsmp->hfs_sync_incomplete > 0)
2060 {
2061 msleep((caddr_t)&hfsmp->hfs_sync_incomplete, NULL, PWAIT, "hfs_unmount", &ts);
2062 }
2063
2064 if (hfsmp->hfs_sync_incomplete < 0)
2065 panic("hfs_unmount: pm_sync_incomplete underflow!\n");
2066 }
2067
2068 #if CONFIG_HFS_ALLOC_RBTREE
2069 rb_used = hfs_teardown_allocator(hfsmp);
2070 #endif
2071
2072 /*
2073 * Flush out the b-trees, volume bitmap and Volume Header
2074 */
2075 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2076 retval = hfs_start_transaction(hfsmp);
2077 if (retval == 0) {
2078 started_tr = 1;
2079 } else if (!force) {
2080 goto err_exit;
2081 }
2082
2083 if (hfsmp->hfs_startup_vp) {
2084 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK);
2085 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2086 hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2087 if (retval && !force)
2088 goto err_exit;
2089 }
2090
2091 if (hfsmp->hfs_attribute_vp) {
2092 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK);
2093 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2094 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2095 if (retval && !force)
2096 goto err_exit;
2097 }
2098
2099 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK);
2100 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2101 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2102 if (retval && !force)
2103 goto err_exit;
2104
2105 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK);
2106 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2107 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2108 if (retval && !force)
2109 goto err_exit;
2110
2111 if (hfsmp->hfs_allocation_vp) {
2112 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK);
2113 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2114 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2115 if (retval && !force)
2116 goto err_exit;
2117 }
2118
2119 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2120 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2121 if (retval && !force)
2122 goto err_exit;
2123 }
2124
2125 /* If runtime corruption was detected, indicate that the volume
2126 * was not unmounted cleanly.
2127 */
2128 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2129 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2130 } else {
2131 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2132 }
2133
2134
2135 if (rb_used) {
2136 /* If the rb-tree was live, just set min_start to 0 */
2137 hfsmp->nextAllocation = 0;
2138 }
2139 else {
2140 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2141 int i;
2142 u_int32_t min_start = hfsmp->totalBlocks;
2143
2144 // set the nextAllocation pointer to the smallest free block number
2145 // we've seen so on the next mount we won't rescan unnecessarily
2146 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2147 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2148 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2149 min_start = hfsmp->vcbFreeExt[i].startBlock;
2150 }
2151 }
2152 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2153 if (min_start < hfsmp->nextAllocation) {
2154 hfsmp->nextAllocation = min_start;
2155 }
2156 }
2157 }
2158
2159
2160 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2161 if (retval) {
2162 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2163 if (!force)
2164 goto err_exit; /* could not flush everything */
2165 }
2166
2167 if (started_tr) {
2168 hfs_end_transaction(hfsmp);
2169 started_tr = 0;
2170 }
2171 }
2172
2173 if (hfsmp->jnl) {
2174 hfs_journal_flush(hfsmp, FALSE);
2175 }
2176
2177 /*
2178 * Invalidate our caches and release metadata vnodes
2179 */
2180 (void) hfsUnmount(hfsmp, p);
2181
2182 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2183 (void) hfs_relconverter(hfsmp->hfs_encoding);
2184
2185 // XXXdbg
2186 if (hfsmp->jnl) {
2187 journal_close(hfsmp->jnl);
2188 hfsmp->jnl = NULL;
2189 }
2190
2191 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2192
2193 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2194 vnode_clearmountedon(hfsmp->jvp);
2195 retval = VNOP_CLOSE(hfsmp->jvp,
2196 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2197 vfs_context_kernel());
2198 vnode_put(hfsmp->jvp);
2199 hfsmp->jvp = NULL;
2200 }
2201 // XXXdbg
2202
2203 /*
2204 * Last chance to dump unreferenced system files.
2205 */
2206 (void) vflush(mp, NULLVP, FORCECLOSE);
2207
2208 #if HFS_SPARSE_DEV
2209 /* Drop our reference on the backing fs (if any). */
2210 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2211 struct vnode * tmpvp;
2212
2213 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2214 tmpvp = hfsmp->hfs_backingfs_rootvp;
2215 hfsmp->hfs_backingfs_rootvp = NULLVP;
2216 vnode_rele(tmpvp);
2217 }
2218 #endif /* HFS_SPARSE_DEV */
2219 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2220 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2221 vnode_rele(hfsmp->hfs_devvp);
2222
2223 hfs_delete_chash(hfsmp);
2224 FREE(hfsmp, M_HFSMNT);
2225
2226 return (0);
2227
2228 err_exit:
2229 if (started_tr) {
2230 hfs_end_transaction(hfsmp);
2231 }
2232 return retval;
2233 }
2234
2235
2236 /*
2237 * Return the root of a filesystem.
2238 */
2239 static int
2240 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2241 {
2242 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2243 }
2244
2245
2246 /*
2247 * Do operations associated with quotas
2248 */
2249 #if !QUOTA
2250 static int
2251 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2252 {
2253 return (ENOTSUP);
2254 }
2255 #else
2256 static int
2257 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2258 {
2259 struct proc *p = vfs_context_proc(context);
2260 int cmd, type, error;
2261
2262 if (uid == ~0U)
2263 uid = kauth_cred_getuid(vfs_context_ucred(context));
2264 cmd = cmds >> SUBCMDSHIFT;
2265
2266 switch (cmd) {
2267 case Q_SYNC:
2268 case Q_QUOTASTAT:
2269 break;
2270 case Q_GETQUOTA:
2271 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2272 break;
2273 /* fall through */
2274 default:
2275 if ( (error = vfs_context_suser(context)) )
2276 return (error);
2277 }
2278
2279 type = cmds & SUBCMDMASK;
2280 if ((u_int)type >= MAXQUOTAS)
2281 return (EINVAL);
2282 if (vfs_busy(mp, LK_NOWAIT))
2283 return (0);
2284
2285 switch (cmd) {
2286
2287 case Q_QUOTAON:
2288 error = hfs_quotaon(p, mp, type, datap);
2289 break;
2290
2291 case Q_QUOTAOFF:
2292 error = hfs_quotaoff(p, mp, type);
2293 break;
2294
2295 case Q_SETQUOTA:
2296 error = hfs_setquota(mp, uid, type, datap);
2297 break;
2298
2299 case Q_SETUSE:
2300 error = hfs_setuse(mp, uid, type, datap);
2301 break;
2302
2303 case Q_GETQUOTA:
2304 error = hfs_getquota(mp, uid, type, datap);
2305 break;
2306
2307 case Q_SYNC:
2308 error = hfs_qsync(mp);
2309 break;
2310
2311 case Q_QUOTASTAT:
2312 error = hfs_quotastat(mp, type, datap);
2313 break;
2314
2315 default:
2316 error = EINVAL;
2317 break;
2318 }
2319 vfs_unbusy(mp);
2320
2321 return (error);
2322 }
2323 #endif /* QUOTA */
2324
2325 /* Subtype is composite of bits */
2326 #define HFS_SUBTYPE_JOURNALED 0x01
2327 #define HFS_SUBTYPE_CASESENSITIVE 0x02
2328 /* bits 2 - 6 reserved */
2329 #define HFS_SUBTYPE_STANDARDHFS 0x80
2330
2331 /*
2332 * Get file system statistics.
2333 */
2334 int
2335 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2336 {
2337 ExtendedVCB *vcb = VFSTOVCB(mp);
2338 struct hfsmount *hfsmp = VFSTOHFS(mp);
2339 u_int32_t freeCNIDs;
2340 u_int16_t subtype = 0;
2341
2342 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2343
2344 sbp->f_bsize = (u_int32_t)vcb->blockSize;
2345 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2346 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2347 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2348 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2349 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */
2350 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2351
2352 /*
2353 * Subtypes (flavors) for HFS
2354 * 0: Mac OS Extended
2355 * 1: Mac OS Extended (Journaled)
2356 * 2: Mac OS Extended (Case Sensitive)
2357 * 3: Mac OS Extended (Case Sensitive, Journaled)
2358 * 4 - 127: Reserved
2359 * 128: Mac OS Standard
2360 *
2361 */
2362 if (hfsmp->hfs_flags & HFS_STANDARD) {
2363 subtype = HFS_SUBTYPE_STANDARDHFS;
2364 } else /* HFS Plus */ {
2365 if (hfsmp->jnl)
2366 subtype |= HFS_SUBTYPE_JOURNALED;
2367 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
2368 subtype |= HFS_SUBTYPE_CASESENSITIVE;
2369 }
2370 sbp->f_fssubtype = subtype;
2371
2372 return (0);
2373 }
2374
2375
2376 //
2377 // XXXdbg -- this is a callback to be used by the journal to
2378 // get meta data blocks flushed out to disk.
2379 //
2380 // XXXdbg -- be smarter and don't flush *every* block on each
2381 // call. try to only flush some so we don't wind up
2382 // being too synchronous.
2383 //
2384 __private_extern__
2385 void
2386 hfs_sync_metadata(void *arg)
2387 {
2388 struct mount *mp = (struct mount *)arg;
2389 struct hfsmount *hfsmp;
2390 ExtendedVCB *vcb;
2391 buf_t bp;
2392 int retval;
2393 daddr64_t priIDSector;
2394 hfsmp = VFSTOHFS(mp);
2395 vcb = HFSTOVCB(hfsmp);
2396
2397 // now make sure the super block is flushed
2398 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2399 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2400
2401 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2402 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2403 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2404 if ((retval != 0 ) && (retval != ENXIO)) {
2405 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2406 (int)priIDSector, retval);
2407 }
2408
2409 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2410 buf_bwrite(bp);
2411 } else if (bp) {
2412 buf_brelse(bp);
2413 }
2414
2415 // the alternate super block...
2416 // XXXdbg - we probably don't need to do this each and every time.
2417 // hfs_btreeio.c:FlushAlternate() should flag when it was
2418 // written...
2419 if (hfsmp->hfs_alt_id_sector) {
2420 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2421 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2422 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2423 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2424 buf_bwrite(bp);
2425 } else if (bp) {
2426 buf_brelse(bp);
2427 }
2428 }
2429 }
2430
2431
2432 struct hfs_sync_cargs {
2433 kauth_cred_t cred;
2434 struct proc *p;
2435 int waitfor;
2436 int error;
2437 };
2438
2439
2440 static int
2441 hfs_sync_callback(struct vnode *vp, void *cargs)
2442 {
2443 struct cnode *cp;
2444 struct hfs_sync_cargs *args;
2445 int error;
2446
2447 args = (struct hfs_sync_cargs *)cargs;
2448
2449 if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK) != 0) {
2450 return (VNODE_RETURNED);
2451 }
2452 cp = VTOC(vp);
2453
2454 if ((cp->c_flag & C_MODIFIED) ||
2455 (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2456 vnode_hasdirtyblks(vp)) {
2457 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2458
2459 if (error)
2460 args->error = error;
2461 }
2462 hfs_unlock(cp);
2463 return (VNODE_RETURNED);
2464 }
2465
2466
2467
2468 /*
2469 * Go through the disk queues to initiate sandbagged IO;
2470 * go through the inodes to write those that have been modified;
2471 * initiate the writing of the super block if it has been modified.
2472 *
2473 * Note: we are always called with the filesystem marked `MPBUSY'.
2474 */
2475 int
2476 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2477 {
2478 struct proc *p = vfs_context_proc(context);
2479 struct cnode *cp;
2480 struct hfsmount *hfsmp;
2481 ExtendedVCB *vcb;
2482 struct vnode *meta_vp[4];
2483 int i;
2484 int error, allerror = 0;
2485 struct hfs_sync_cargs args;
2486
2487 hfsmp = VFSTOHFS(mp);
2488
2489 /*
2490 * hfs_changefs might be manipulating vnodes so back off
2491 */
2492 if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2493 return (0);
2494
2495 if (hfsmp->hfs_flags & HFS_READ_ONLY)
2496 return (EROFS);
2497
2498 /* skip over frozen volumes */
2499 if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2500 return 0;
2501
2502 args.cred = kauth_cred_get();
2503 args.waitfor = waitfor;
2504 args.p = p;
2505 args.error = 0;
2506 /*
2507 * hfs_sync_callback will be called for each vnode
2508 * hung off of this mount point... the vnode will be
2509 * properly referenced and unreferenced around the callback
2510 */
2511 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2512
2513 if (args.error)
2514 allerror = args.error;
2515
2516 vcb = HFSTOVCB(hfsmp);
2517
2518 meta_vp[0] = vcb->extentsRefNum;
2519 meta_vp[1] = vcb->catalogRefNum;
2520 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */
2521 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2522
2523 /* Now sync our three metadata files */
2524 for (i = 0; i < 4; ++i) {
2525 struct vnode *btvp;
2526
2527 btvp = meta_vp[i];;
2528 if ((btvp==0) || (vnode_mount(btvp) != mp))
2529 continue;
2530
2531 /* XXX use hfs_systemfile_lock instead ? */
2532 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK);
2533 cp = VTOC(btvp);
2534
2535 if (((cp->c_flag & C_MODIFIED) == 0) &&
2536 (cp->c_touch_acctime == 0) &&
2537 (cp->c_touch_chgtime == 0) &&
2538 (cp->c_touch_modtime == 0) &&
2539 vnode_hasdirtyblks(btvp) == 0) {
2540 hfs_unlock(VTOC(btvp));
2541 continue;
2542 }
2543 error = vnode_get(btvp);
2544 if (error) {
2545 hfs_unlock(VTOC(btvp));
2546 continue;
2547 }
2548 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2549 allerror = error;
2550
2551 hfs_unlock(cp);
2552 vnode_put(btvp);
2553 };
2554
2555 /*
2556 * Force stale file system control information to be flushed.
2557 */
2558 if (vcb->vcbSigWord == kHFSSigWord) {
2559 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2560 allerror = error;
2561 }
2562 }
2563 #if QUOTA
2564 hfs_qsync(mp);
2565 #endif /* QUOTA */
2566
2567 hfs_hotfilesync(hfsmp, vfs_context_kernel());
2568
2569 /*
2570 * Write back modified superblock.
2571 */
2572 if (IsVCBDirty(vcb)) {
2573 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2574 if (error)
2575 allerror = error;
2576 }
2577
2578 if (hfsmp->jnl) {
2579 hfs_journal_flush(hfsmp, FALSE);
2580 }
2581
2582 {
2583 clock_sec_t secs;
2584 clock_usec_t usecs;
2585 uint64_t now;
2586
2587 clock_get_calendar_microtime(&secs, &usecs);
2588 now = ((uint64_t)secs * 1000000ULL) + (uint64_t)usecs;
2589 hfsmp->hfs_last_sync_time = now;
2590 }
2591
2592 lck_rw_unlock_shared(&hfsmp->hfs_insync);
2593 return (allerror);
2594 }
2595
2596
2597 /*
2598 * File handle to vnode
2599 *
2600 * Have to be really careful about stale file handles:
2601 * - check that the cnode id is valid
2602 * - call hfs_vget() to get the locked cnode
2603 * - check for an unallocated cnode (i_mode == 0)
2604 * - check that the given client host has export rights and return
2605 * those rights via. exflagsp and credanonp
2606 */
2607 static int
2608 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2609 {
2610 struct hfsfid *hfsfhp;
2611 struct vnode *nvp;
2612 int result;
2613
2614 *vpp = NULL;
2615 hfsfhp = (struct hfsfid *)fhp;
2616
2617 if (fhlen < (int)sizeof(struct hfsfid))
2618 return (EINVAL);
2619
2620 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2621 if (result) {
2622 if (result == ENOENT)
2623 result = ESTALE;
2624 return result;
2625 }
2626
2627 /*
2628 * We used to use the create time as the gen id of the file handle,
2629 * but it is not static enough because it can change at any point
2630 * via system calls. We still don't have another volume ID or other
2631 * unique identifier to use for a generation ID across reboots that
2632 * persists until the file is removed. Using only the CNID exposes
2633 * us to the potential wrap-around case, but as of 2/2008, it would take
2634 * over 2 months to wrap around if the machine did nothing but allocate
2635 * CNIDs. Using some kind of wrap counter would only be effective if
2636 * each file had the wrap counter associated with it. For now,
2637 * we use only the CNID to identify the file as it's good enough.
2638 */
2639
2640 *vpp = nvp;
2641
2642 hfs_unlock(VTOC(nvp));
2643 return (0);
2644 }
2645
2646
2647 /*
2648 * Vnode pointer to File handle
2649 */
2650 /* ARGSUSED */
2651 static int
2652 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2653 {
2654 struct cnode *cp;
2655 struct hfsfid *hfsfhp;
2656
2657 if (ISHFS(VTOVCB(vp)))
2658 return (ENOTSUP); /* hfs standard is not exportable */
2659
2660 if (*fhlenp < (int)sizeof(struct hfsfid))
2661 return (EOVERFLOW);
2662
2663 cp = VTOC(vp);
2664 hfsfhp = (struct hfsfid *)fhp;
2665 /* only the CNID is used to identify the file now */
2666 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2667 hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2668 *fhlenp = sizeof(struct hfsfid);
2669
2670 return (0);
2671 }
2672
2673
2674 /*
2675 * Initial HFS filesystems, done only once.
2676 */
2677 static int
2678 hfs_init(__unused struct vfsconf *vfsp)
2679 {
2680 static int done = 0;
2681
2682 if (done)
2683 return (0);
2684 done = 1;
2685 hfs_chashinit();
2686 hfs_converterinit();
2687
2688 BTReserveSetup();
2689
2690
2691 hfs_lock_attr = lck_attr_alloc_init();
2692 hfs_group_attr = lck_grp_attr_alloc_init();
2693 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2694 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2695 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2696
2697 #if HFS_COMPRESSION
2698 decmpfs_init();
2699 #endif
2700
2701 return (0);
2702 }
2703
2704 static int
2705 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2706 {
2707 struct hfsmount * hfsmp;
2708 char fstypename[MFSNAMELEN];
2709
2710 if (vp == NULL)
2711 return (EINVAL);
2712
2713 if (!vnode_isvroot(vp))
2714 return (EINVAL);
2715
2716 vnode_vfsname(vp, fstypename);
2717 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2718 return (EINVAL);
2719
2720 hfsmp = VTOHFS(vp);
2721
2722 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2723 return (EINVAL);
2724
2725 *hfsmpp = hfsmp;
2726
2727 return (0);
2728 }
2729
2730 // XXXdbg
2731 #include <sys/filedesc.h>
2732
2733 /*
2734 * HFS filesystem related variables.
2735 */
2736 int
2737 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2738 user_addr_t newp, size_t newlen, vfs_context_t context)
2739 {
2740 struct proc *p = vfs_context_proc(context);
2741 int error;
2742 struct hfsmount *hfsmp;
2743
2744 /* all sysctl names at this level are terminal */
2745
2746 if (name[0] == HFS_ENCODINGBIAS) {
2747 int bias;
2748
2749 bias = hfs_getencodingbias();
2750 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2751 if (error == 0 && newp)
2752 hfs_setencodingbias(bias);
2753 return (error);
2754
2755 } else if (name[0] == HFS_EXTEND_FS) {
2756 u_int64_t newsize;
2757 vnode_t vp = vfs_context_cwd(context);
2758
2759 if (newp == USER_ADDR_NULL || vp == NULLVP)
2760 return (EINVAL);
2761 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2762 return (error);
2763 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2764 if (error)
2765 return (error);
2766
2767 error = hfs_extendfs(hfsmp, newsize, context);
2768 return (error);
2769
2770 } else if (name[0] == HFS_ENCODINGHINT) {
2771 size_t bufsize;
2772 size_t bytes;
2773 u_int32_t hint;
2774 u_int16_t *unicode_name = NULL;
2775 char *filename = NULL;
2776
2777 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2778 return (EINVAL);
2779
2780 bufsize = MAX(newlen * 3, MAXPATHLEN);
2781 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2782 if (filename == NULL) {
2783 error = ENOMEM;
2784 goto encodinghint_exit;
2785 }
2786 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2787 if (filename == NULL) {
2788 error = ENOMEM;
2789 goto encodinghint_exit;
2790 }
2791
2792 error = copyin(newp, (caddr_t)filename, newlen);
2793 if (error == 0) {
2794 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2795 &bytes, bufsize, 0, UTF_DECOMPOSED);
2796 if (error == 0) {
2797 hint = hfs_pickencoding(unicode_name, bytes / 2);
2798 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2799 }
2800 }
2801
2802 encodinghint_exit:
2803 if (unicode_name)
2804 FREE(unicode_name, M_TEMP);
2805 if (filename)
2806 FREE(filename, M_TEMP);
2807 return (error);
2808
2809 } else if (name[0] == HFS_ENABLE_JOURNALING) {
2810 // make the file system journaled...
2811 vnode_t vp = vfs_context_cwd(context);
2812 vnode_t jvp;
2813 ExtendedVCB *vcb;
2814 struct cat_attr jnl_attr, jinfo_attr;
2815 struct cat_fork jnl_fork, jinfo_fork;
2816 void *jnl = NULL;
2817 int lockflags;
2818
2819 /* Only root can enable journaling */
2820 if (!is_suser()) {
2821 return (EPERM);
2822 }
2823 if (vp == NULLVP)
2824 return EINVAL;
2825
2826 hfsmp = VTOHFS(vp);
2827 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2828 return EROFS;
2829 }
2830 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2831 printf("hfs: can't make a plain hfs volume journaled.\n");
2832 return EINVAL;
2833 }
2834
2835 if (hfsmp->jnl) {
2836 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2837 return EAGAIN;
2838 }
2839
2840 vcb = HFSTOVCB(hfsmp);
2841 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2842 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2843 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2844
2845 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n");
2846 hfs_systemfile_unlock(hfsmp, lockflags);
2847 return EINVAL;
2848 }
2849 hfs_systemfile_unlock(hfsmp, lockflags);
2850
2851 // make sure these both exist!
2852 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2853 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2854
2855 return EINVAL;
2856 }
2857
2858 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2859
2860 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2861 (off_t)name[2], (off_t)name[3]);
2862
2863 //
2864 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2865 // enabling the journal on a separate device so it is safe
2866 // to just copy hfs_devvp here. If hfs_util gets the ability
2867 // to dynamically enable the journal on a separate device then
2868 // we will have to do the same thing as hfs_early_journal_init()
2869 // to locate and open the journal device.
2870 //
2871 jvp = hfsmp->hfs_devvp;
2872 jnl = journal_create(jvp,
2873 (off_t)name[2] * (off_t)HFSTOVCB(hfsmp)->blockSize
2874 + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
2875 (off_t)((unsigned)name[3]),
2876 hfsmp->hfs_devvp,
2877 hfsmp->hfs_logical_block_size,
2878 0,
2879 0,
2880 hfs_sync_metadata, hfsmp->hfs_mp);
2881
2882 /*
2883 * Set up the trim callback function so that we can add
2884 * recently freed extents to the free extent cache once
2885 * the transaction that freed them is written to the
2886 * journal on disk.
2887 */
2888 if (jnl)
2889 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
2890
2891 if (jnl == NULL) {
2892 printf("hfs: FAILED to create the journal!\n");
2893 if (jvp && jvp != hfsmp->hfs_devvp) {
2894 vnode_clearmountedon(jvp);
2895 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2896 }
2897 jvp = NULL;
2898
2899 return EINVAL;
2900 }
2901
2902 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2903
2904 /*
2905 * Flush all dirty metadata buffers.
2906 */
2907 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
2908 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
2909 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
2910 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
2911 if (hfsmp->hfs_attribute_vp)
2912 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
2913
2914 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
2915 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
2916 hfsmp->jvp = jvp;
2917 hfsmp->jnl = jnl;
2918
2919 // save this off for the hack-y check in hfs_remove()
2920 hfsmp->jnl_start = (u_int32_t)name[2];
2921 hfsmp->jnl_size = (off_t)((unsigned)name[3]);
2922 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
2923 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid;
2924
2925 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2926
2927 hfs_unlock_global (hfsmp);
2928 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2929
2930 {
2931 fsid_t fsid;
2932
2933 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2934 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2935 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2936 }
2937 return 0;
2938 } else if (name[0] == HFS_DISABLE_JOURNALING) {
2939 // clear the journaling bit
2940 vnode_t vp = vfs_context_cwd(context);
2941
2942 /* Only root can disable journaling */
2943 if (!is_suser()) {
2944 return (EPERM);
2945 }
2946 if (vp == NULLVP)
2947 return EINVAL;
2948
2949 hfsmp = VTOHFS(vp);
2950
2951 /*
2952 * Disabling journaling is disallowed on volumes with directory hard links
2953 * because we have not tested the relevant code path.
2954 */
2955 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
2956 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
2957 return EPERM;
2958 }
2959
2960 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
2961
2962 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2963
2964 // Lights out for you buddy!
2965 journal_close(hfsmp->jnl);
2966 hfsmp->jnl = NULL;
2967
2968 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2969 vnode_clearmountedon(hfsmp->jvp);
2970 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2971 vnode_put(hfsmp->jvp);
2972 }
2973 hfsmp->jvp = NULL;
2974 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
2975 hfsmp->jnl_start = 0;
2976 hfsmp->hfs_jnlinfoblkid = 0;
2977 hfsmp->hfs_jnlfileid = 0;
2978
2979 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
2980
2981 hfs_unlock_global (hfsmp);
2982
2983 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
2984
2985 {
2986 fsid_t fsid;
2987
2988 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
2989 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
2990 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
2991 }
2992 return 0;
2993 } else if (name[0] == HFS_GET_JOURNAL_INFO) {
2994 vnode_t vp = vfs_context_cwd(context);
2995 off_t jnl_start, jnl_size;
2996
2997 if (vp == NULLVP)
2998 return EINVAL;
2999
3000 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3001 if (proc_is64bit(current_proc()))
3002 return EINVAL;
3003
3004 hfsmp = VTOHFS(vp);
3005 if (hfsmp->jnl == NULL) {
3006 jnl_start = 0;
3007 jnl_size = 0;
3008 } else {
3009 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3010 jnl_size = (off_t)hfsmp->jnl_size;
3011 }
3012
3013 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3014 return error;
3015 }
3016 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3017 return error;
3018 }
3019
3020 return 0;
3021 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3022
3023 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3024
3025 } else if (name[0] == VFS_CTL_QUERY) {
3026 struct sysctl_req *req;
3027 union union_vfsidctl vc;
3028 struct mount *mp;
3029 struct vfsquery vq;
3030
3031 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */
3032
3033 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3034 if (error) return (error);
3035
3036 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3037 if (mp == NULL) return (ENOENT);
3038
3039 hfsmp = VFSTOHFS(mp);
3040 bzero(&vq, sizeof(vq));
3041 vq.vq_flags = hfsmp->hfs_notification_conditions;
3042 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3043 } else if (name[0] == HFS_REPLAY_JOURNAL) {
3044 vnode_t devvp = NULL;
3045 int device_fd;
3046 if (namelen != 2) {
3047 return (EINVAL);
3048 }
3049 device_fd = name[1];
3050 error = file_vnode(device_fd, &devvp);
3051 if (error) {
3052 return error;
3053 }
3054 error = vnode_getwithref(devvp);
3055 if (error) {
3056 file_drop(device_fd);
3057 return error;
3058 }
3059 error = hfs_journal_replay(devvp, context);
3060 file_drop(device_fd);
3061 vnode_put(devvp);
3062 return error;
3063 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3064 hfs_resize_debug = 1;
3065 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3066 return 0;
3067 }
3068
3069 return (ENOTSUP);
3070 }
3071
3072 /*
3073 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3074 * the build_path ioctl. We use it to leverage the code below that updates
3075 * the origin list cache if necessary
3076 */
3077
3078 int
3079 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3080 {
3081 int error;
3082 int lockflags;
3083 struct hfsmount *hfsmp;
3084
3085 hfsmp = VFSTOHFS(mp);
3086
3087 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3088 if (error)
3089 return (error);
3090
3091 /*
3092 * ADLs may need to have their origin state updated
3093 * since build_path needs a valid parent. The same is true
3094 * for hardlinked files as well. There isn't a race window here
3095 * in re-acquiring the cnode lock since we aren't pulling any data
3096 * out of the cnode; instead, we're going to the catalog.
3097 */
3098 if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3099 (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK) == 0)) {
3100 cnode_t *cp = VTOC(*vpp);
3101 struct cat_desc cdesc;
3102
3103 if (!hfs_haslinkorigin(cp)) {
3104 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3105 error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3106 hfs_systemfile_unlock(hfsmp, lockflags);
3107 if (error == 0) {
3108 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3109 (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3110 hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3111 }
3112 cat_releasedesc(&cdesc);
3113 }
3114 }
3115 hfs_unlock(cp);
3116 }
3117 return (0);
3118 }
3119
3120
3121 /*
3122 * Look up an HFS object by ID.
3123 *
3124 * The object is returned with an iocount reference and the cnode locked.
3125 *
3126 * If the object is a file then it will represent the data fork.
3127 */
3128 int
3129 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3130 {
3131 struct vnode *vp = NULLVP;
3132 struct cat_desc cndesc;
3133 struct cat_attr cnattr;
3134 struct cat_fork cnfork;
3135 u_int32_t linkref = 0;
3136 int error;
3137
3138 /* Check for cnids that should't be exported. */
3139 if ((cnid < kHFSFirstUserCatalogNodeID) &&
3140 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3141 return (ENOENT);
3142 }
3143 /* Don't export our private directories. */
3144 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3145 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3146 return (ENOENT);
3147 }
3148 /*
3149 * Check the hash first
3150 */
3151 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3152 if (vp) {
3153 *vpp = vp;
3154 return(0);
3155 }
3156
3157 bzero(&cndesc, sizeof(cndesc));
3158 bzero(&cnattr, sizeof(cnattr));
3159 bzero(&cnfork, sizeof(cnfork));
3160
3161 /*
3162 * Not in hash, lookup in catalog
3163 */
3164 if (cnid == kHFSRootParentID) {
3165 static char hfs_rootname[] = "/";
3166
3167 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3168 cndesc.cd_namelen = 1;
3169 cndesc.cd_parentcnid = kHFSRootParentID;
3170 cndesc.cd_cnid = kHFSRootFolderID;
3171 cndesc.cd_flags = CD_ISDIR;
3172
3173 cnattr.ca_fileid = kHFSRootFolderID;
3174 cnattr.ca_linkcount = 1;
3175 cnattr.ca_entries = 1;
3176 cnattr.ca_dircount = 1;
3177 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3178 } else {
3179 int lockflags;
3180 cnid_t pid;
3181 const char *nameptr;
3182
3183 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3184 error = cat_idlookup(hfsmp, cnid, 0, &cndesc, &cnattr, &cnfork);
3185 hfs_systemfile_unlock(hfsmp, lockflags);
3186
3187 if (error) {
3188 *vpp = NULL;
3189 return (error);
3190 }
3191
3192 /*
3193 * Check for a raw hardlink inode and save its linkref.
3194 */
3195 pid = cndesc.cd_parentcnid;
3196 nameptr = (const char *)cndesc.cd_nameptr;
3197
3198 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3199 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3200 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3201
3202 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3203 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3204 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3205
3206 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3207 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3208 *vpp = NULL;
3209 cat_releasedesc(&cndesc);
3210 return (ENOENT); /* open unlinked file */
3211 }
3212 }
3213
3214 /*
3215 * Finish initializing cnode descriptor for hardlinks.
3216 *
3217 * We need a valid name and parent for reverse lookups.
3218 */
3219 if (linkref) {
3220 cnid_t nextlinkid;
3221 cnid_t prevlinkid;
3222 struct cat_desc linkdesc;
3223 int lockflags;
3224
3225 cnattr.ca_linkref = linkref;
3226
3227 /*
3228 * Pick up the first link in the chain and get a descriptor for it.
3229 * This allows blind volfs paths to work for hardlinks.
3230 */
3231 if ((hfs_lookup_siblinglinks(hfsmp, linkref, &prevlinkid, &nextlinkid) == 0) &&
3232 (nextlinkid != 0)) {
3233 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3234 error = cat_findname(hfsmp, nextlinkid, &linkdesc);
3235 hfs_systemfile_unlock(hfsmp, lockflags);
3236 if (error == 0) {
3237 cat_releasedesc(&cndesc);
3238 bcopy(&linkdesc, &cndesc, sizeof(linkdesc));
3239 }
3240 }
3241 }
3242
3243 if (linkref) {
3244 int newvnode_flags = 0;
3245
3246 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3247 &cnfork, &vp, &newvnode_flags);
3248 if (error == 0) {
3249 VTOC(vp)->c_flag |= C_HARDLINK;
3250 vnode_setmultipath(vp);
3251 }
3252 } else {
3253 struct componentname cn;
3254 int newvnode_flags = 0;
3255
3256 /* Supply hfs_getnewvnode with a component name. */
3257 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3258 cn.cn_nameiop = LOOKUP;
3259 cn.cn_flags = ISLASTCN | HASBUF;
3260 cn.cn_context = NULL;
3261 cn.cn_pnlen = MAXPATHLEN;
3262 cn.cn_nameptr = cn.cn_pnbuf;
3263 cn.cn_namelen = cndesc.cd_namelen;
3264 cn.cn_hash = 0;
3265 cn.cn_consume = 0;
3266 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3267
3268 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3269 &cnfork, &vp, &newvnode_flags);
3270
3271 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3272 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3273 }
3274 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3275 }
3276 cat_releasedesc(&cndesc);
3277
3278 *vpp = vp;
3279 if (vp && skiplock) {
3280 hfs_unlock(VTOC(vp));
3281 }
3282 return (error);
3283 }
3284
3285
3286 /*
3287 * Flush out all the files in a filesystem.
3288 */
3289 static int
3290 #if QUOTA
3291 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3292 #else
3293 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3294 #endif /* QUOTA */
3295 {
3296 struct hfsmount *hfsmp;
3297 struct vnode *skipvp = NULLVP;
3298 int error;
3299 #if QUOTA
3300 int quotafilecnt;
3301 int i;
3302 #endif
3303
3304 hfsmp = VFSTOHFS(mp);
3305
3306 #if QUOTA
3307 /*
3308 * The open quota files have an indirect reference on
3309 * the root directory vnode. We must account for this
3310 * extra reference when doing the intial vflush.
3311 */
3312 quotafilecnt = 0;
3313 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3314
3315 /* Find out how many quota files we have open. */
3316 for (i = 0; i < MAXQUOTAS; i++) {
3317 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3318 ++quotafilecnt;
3319 }
3320
3321 /* Obtain the root vnode so we can skip over it. */
3322 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3323 }
3324 #endif /* QUOTA */
3325
3326 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3327 if (error != 0)
3328 return(error);
3329
3330 error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3331
3332 #if QUOTA
3333 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3334 if (skipvp) {
3335 /*
3336 * See if there are additional references on the
3337 * root vp besides the ones obtained from the open
3338 * quota files and the hfs_chash_getvnode call above.
3339 */
3340 if ((error == 0) &&
3341 (vnode_isinuse(skipvp, quotafilecnt))) {
3342 error = EBUSY; /* root directory is still open */
3343 }
3344 hfs_unlock(VTOC(skipvp));
3345 vnode_put(skipvp);
3346 }
3347 if (error && (flags & FORCECLOSE) == 0)
3348 return (error);
3349
3350 for (i = 0; i < MAXQUOTAS; i++) {
3351 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3352 continue;
3353 hfs_quotaoff(p, mp, i);
3354 }
3355 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3356 }
3357 #endif /* QUOTA */
3358
3359 return (error);
3360 }
3361
3362 /*
3363 * Update volume encoding bitmap (HFS Plus only)
3364 */
3365 __private_extern__
3366 void
3367 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3368 {
3369 #define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */
3370 #define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */
3371
3372 u_int32_t index;
3373
3374 switch (encoding) {
3375 case kTextEncodingMacUkrainian:
3376 index = kIndexMacUkrainian;
3377 break;
3378 case kTextEncodingMacFarsi:
3379 index = kIndexMacFarsi;
3380 break;
3381 default:
3382 index = encoding;
3383 break;
3384 }
3385
3386 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3387 HFS_MOUNT_LOCK(hfsmp, TRUE)
3388 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3389 MarkVCBDirty(hfsmp);
3390 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3391 }
3392 }
3393
3394 /*
3395 * Update volume stats
3396 *
3397 * On journal volumes this will cause a volume header flush
3398 */
3399 int
3400 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3401 {
3402 struct timeval tv;
3403
3404 microtime(&tv);
3405
3406 lck_mtx_lock(&hfsmp->hfs_mutex);
3407
3408 MarkVCBDirty(hfsmp);
3409 hfsmp->hfs_mtime = tv.tv_sec;
3410
3411 switch (op) {
3412 case VOL_UPDATE:
3413 break;
3414 case VOL_MKDIR:
3415 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3416 ++hfsmp->hfs_dircount;
3417 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3418 ++hfsmp->vcbNmRtDirs;
3419 break;
3420 case VOL_RMDIR:
3421 if (hfsmp->hfs_dircount != 0)
3422 --hfsmp->hfs_dircount;
3423 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3424 --hfsmp->vcbNmRtDirs;
3425 break;
3426 case VOL_MKFILE:
3427 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3428 ++hfsmp->hfs_filecount;
3429 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3430 ++hfsmp->vcbNmFls;
3431 break;
3432 case VOL_RMFILE:
3433 if (hfsmp->hfs_filecount != 0)
3434 --hfsmp->hfs_filecount;
3435 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3436 --hfsmp->vcbNmFls;
3437 break;
3438 }
3439
3440 lck_mtx_unlock(&hfsmp->hfs_mutex);
3441
3442 if (hfsmp->jnl) {
3443 hfs_flushvolumeheader(hfsmp, 0, 0);
3444 }
3445
3446 return (0);
3447 }
3448
3449
3450 static int
3451 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3452 {
3453 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3454 struct filefork *fp;
3455 HFSMasterDirectoryBlock *mdb;
3456 struct buf *bp = NULL;
3457 int retval;
3458 int sectorsize;
3459 ByteCount namelen;
3460
3461 sectorsize = hfsmp->hfs_logical_block_size;
3462 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
3463 if (retval) {
3464 if (bp)
3465 buf_brelse(bp);
3466 return retval;
3467 }
3468
3469 lck_mtx_lock(&hfsmp->hfs_mutex);
3470
3471 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
3472
3473 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3474 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3475 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb);
3476 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls);
3477 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3478 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz);
3479 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID);
3480 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks);
3481
3482 namelen = strlen((char *)vcb->vcbVN);
3483 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3484 /* Retry with MacRoman in case that's how it was exported. */
3485 if (retval)
3486 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3487
3488 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3489 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt);
3490 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3491 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt);
3492 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt);
3493
3494 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3495
3496 fp = VTOF(vcb->extentsRefNum);
3497 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3498 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3499 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3500 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3501 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3502 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3503 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3504 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3505 FTOC(fp)->c_flag &= ~C_MODIFIED;
3506
3507 fp = VTOF(vcb->catalogRefNum);
3508 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3509 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3510 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3511 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3512 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3513 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3514 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3515 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3516 FTOC(fp)->c_flag &= ~C_MODIFIED;
3517
3518 MarkVCBClean( vcb );
3519
3520 lck_mtx_unlock(&hfsmp->hfs_mutex);
3521
3522 /* If requested, flush out the alternate MDB */
3523 if (altflush) {
3524 struct buf *alt_bp = NULL;
3525
3526 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
3527 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
3528
3529 (void) VNOP_BWRITE(alt_bp);
3530 } else if (alt_bp)
3531 buf_brelse(alt_bp);
3532 }
3533
3534 if (waitfor != MNT_WAIT)
3535 buf_bawrite(bp);
3536 else
3537 retval = VNOP_BWRITE(bp);
3538
3539 return (retval);
3540 }
3541
3542 /*
3543 * Flush any dirty in-memory mount data to the on-disk
3544 * volume header.
3545 *
3546 * Note: the on-disk volume signature is intentionally
3547 * not flushed since the on-disk "H+" and "HX" signatures
3548 * are always stored in-memory as "H+".
3549 */
3550 int
3551 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3552 {
3553 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3554 struct filefork *fp;
3555 HFSPlusVolumeHeader *volumeHeader, *altVH;
3556 int retval;
3557 struct buf *bp, *alt_bp;
3558 int i;
3559 daddr64_t priIDSector;
3560 int critical;
3561 u_int16_t signature;
3562 u_int16_t hfsversion;
3563
3564 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3565 return(0);
3566 }
3567 if (hfsmp->hfs_flags & HFS_STANDARD) {
3568 return hfs_flushMDB(hfsmp, waitfor, altflush);
3569 }
3570 critical = altflush;
3571 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3572 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3573
3574 if (hfs_start_transaction(hfsmp) != 0) {
3575 return EINVAL;
3576 }
3577
3578 bp = NULL;
3579 alt_bp = NULL;
3580
3581 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3582 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3583 hfsmp->hfs_physical_block_size, NOCRED, &bp);
3584 if (retval) {
3585 printf("hfs: err %d reading VH blk (%s)\n", retval, vcb->vcbVN);
3586 goto err_exit;
3587 }
3588
3589 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3590 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3591
3592 /*
3593 * Sanity check what we just read. If it's bad, try the alternate
3594 * instead.
3595 */
3596 signature = SWAP_BE16 (volumeHeader->signature);
3597 hfsversion = SWAP_BE16 (volumeHeader->version);
3598 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3599 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3600 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3601 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3602 vcb->vcbVN, signature, hfsversion,
3603 SWAP_BE32 (volumeHeader->blockSize),
3604 hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3605 hfs_mark_volume_inconsistent(hfsmp);
3606
3607 if (hfsmp->hfs_alt_id_sector) {
3608 retval = buf_meta_bread(hfsmp->hfs_devvp,
3609 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3610 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3611 if (retval) {
3612 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3613 goto err_exit;
3614 }
3615
3616 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3617 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3618 signature = SWAP_BE16(altVH->signature);
3619 hfsversion = SWAP_BE16(altVH->version);
3620
3621 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3622 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3623 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3624 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3625 vcb->vcbVN, signature, hfsversion,
3626 SWAP_BE32(altVH->blockSize));
3627 retval = EIO;
3628 goto err_exit;
3629 }
3630
3631 /* The alternate is plausible, so use it. */
3632 bcopy(altVH, volumeHeader, kMDBSize);
3633 buf_brelse(alt_bp);
3634 alt_bp = NULL;
3635 } else {
3636 /* No alternate VH, nothing more we can do. */
3637 retval = EIO;
3638 goto err_exit;
3639 }
3640 }
3641
3642 if (hfsmp->jnl) {
3643 journal_modify_block_start(hfsmp->jnl, bp);
3644 }
3645
3646 /*
3647 * For embedded HFS+ volumes, update create date if it changed
3648 * (ie from a setattrlist call)
3649 */
3650 if ((vcb->hfsPlusIOPosOffset != 0) &&
3651 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3652 struct buf *bp2;
3653 HFSMasterDirectoryBlock *mdb;
3654
3655 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3656 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3657 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3658 if (retval) {
3659 if (bp2)
3660 buf_brelse(bp2);
3661 retval = 0;
3662 } else {
3663 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3664 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3665
3666 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3667 {
3668 if (hfsmp->jnl) {
3669 journal_modify_block_start(hfsmp->jnl, bp2);
3670 }
3671
3672 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */
3673
3674 if (hfsmp->jnl) {
3675 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3676 } else {
3677 (void) VNOP_BWRITE(bp2); /* write out the changes */
3678 }
3679 }
3680 else
3681 {
3682 buf_brelse(bp2); /* just release it */
3683 }
3684 }
3685 }
3686
3687 lck_mtx_lock(&hfsmp->hfs_mutex);
3688
3689 /* Note: only update the lower 16 bits worth of attributes */
3690 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb);
3691 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3692 if (hfsmp->jnl) {
3693 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3694 } else {
3695 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3696 }
3697 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */
3698 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3699 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3700 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt);
3701 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt);
3702 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks);
3703 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks);
3704 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation);
3705 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3706 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3707 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID);
3708 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt);
3709 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
3710
3711 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3712 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3713 critical = 1;
3714 }
3715
3716 /*
3717 * System files are only dirty when altflush is set.
3718 */
3719 if (altflush == 0) {
3720 goto done;
3721 }
3722
3723 /* Sync Extents over-flow file meta data */
3724 fp = VTOF(vcb->extentsRefNum);
3725 if (FTOC(fp)->c_flag & C_MODIFIED) {
3726 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3727 volumeHeader->extentsFile.extents[i].startBlock =
3728 SWAP_BE32 (fp->ff_extents[i].startBlock);
3729 volumeHeader->extentsFile.extents[i].blockCount =
3730 SWAP_BE32 (fp->ff_extents[i].blockCount);
3731 }
3732 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3733 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3734 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3735 FTOC(fp)->c_flag &= ~C_MODIFIED;
3736 }
3737
3738 /* Sync Catalog file meta data */
3739 fp = VTOF(vcb->catalogRefNum);
3740 if (FTOC(fp)->c_flag & C_MODIFIED) {
3741 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3742 volumeHeader->catalogFile.extents[i].startBlock =
3743 SWAP_BE32 (fp->ff_extents[i].startBlock);
3744 volumeHeader->catalogFile.extents[i].blockCount =
3745 SWAP_BE32 (fp->ff_extents[i].blockCount);
3746 }
3747 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3748 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3749 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3750 FTOC(fp)->c_flag &= ~C_MODIFIED;
3751 }
3752
3753 /* Sync Allocation file meta data */
3754 fp = VTOF(vcb->allocationsRefNum);
3755 if (FTOC(fp)->c_flag & C_MODIFIED) {
3756 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3757 volumeHeader->allocationFile.extents[i].startBlock =
3758 SWAP_BE32 (fp->ff_extents[i].startBlock);
3759 volumeHeader->allocationFile.extents[i].blockCount =
3760 SWAP_BE32 (fp->ff_extents[i].blockCount);
3761 }
3762 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3763 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3764 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3765 FTOC(fp)->c_flag &= ~C_MODIFIED;
3766 }
3767
3768 /* Sync Attribute file meta data */
3769 if (hfsmp->hfs_attribute_vp) {
3770 fp = VTOF(hfsmp->hfs_attribute_vp);
3771 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3772 volumeHeader->attributesFile.extents[i].startBlock =
3773 SWAP_BE32 (fp->ff_extents[i].startBlock);
3774 volumeHeader->attributesFile.extents[i].blockCount =
3775 SWAP_BE32 (fp->ff_extents[i].blockCount);
3776 }
3777 FTOC(fp)->c_flag &= ~C_MODIFIED;
3778 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3779 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3780 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3781 }
3782
3783 /* Sync Startup file meta data */
3784 if (hfsmp->hfs_startup_vp) {
3785 fp = VTOF(hfsmp->hfs_startup_vp);
3786 if (FTOC(fp)->c_flag & C_MODIFIED) {
3787 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3788 volumeHeader->startupFile.extents[i].startBlock =
3789 SWAP_BE32 (fp->ff_extents[i].startBlock);
3790 volumeHeader->startupFile.extents[i].blockCount =
3791 SWAP_BE32 (fp->ff_extents[i].blockCount);
3792 }
3793 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3794 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3795 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3796 FTOC(fp)->c_flag &= ~C_MODIFIED;
3797 }
3798 }
3799
3800 done:
3801 MarkVCBClean(hfsmp);
3802 lck_mtx_unlock(&hfsmp->hfs_mutex);
3803
3804 /* If requested, flush out the alternate volume header */
3805 if (altflush && hfsmp->hfs_alt_id_sector) {
3806 if (buf_meta_bread(hfsmp->hfs_devvp,
3807 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3808 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3809 if (hfsmp->jnl) {
3810 journal_modify_block_start(hfsmp->jnl, alt_bp);
3811 }
3812
3813 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3814 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3815 kMDBSize);
3816
3817 if (hfsmp->jnl) {
3818 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3819 } else {
3820 (void) VNOP_BWRITE(alt_bp);
3821 }
3822 } else if (alt_bp)
3823 buf_brelse(alt_bp);
3824 }
3825
3826 if (hfsmp->jnl) {
3827 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3828 } else {
3829 if (waitfor != MNT_WAIT)
3830 buf_bawrite(bp);
3831 else {
3832 retval = VNOP_BWRITE(bp);
3833 /* When critical data changes, flush the device cache */
3834 if (critical && (retval == 0)) {
3835 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3836 NULL, FWRITE, NULL);
3837 }
3838 }
3839 }
3840 hfs_end_transaction(hfsmp);
3841
3842 return (retval);
3843
3844 err_exit:
3845 if (alt_bp)
3846 buf_brelse(alt_bp);
3847 if (bp)
3848 buf_brelse(bp);
3849 hfs_end_transaction(hfsmp);
3850 return retval;
3851 }
3852
3853
3854 /*
3855 * Extend a file system.
3856 */
3857 int
3858 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
3859 {
3860 struct proc *p = vfs_context_proc(context);
3861 kauth_cred_t cred = vfs_context_ucred(context);
3862 struct vnode *vp;
3863 struct vnode *devvp;
3864 struct buf *bp;
3865 struct filefork *fp = NULL;
3866 ExtendedVCB *vcb;
3867 struct cat_fork forkdata;
3868 u_int64_t oldsize;
3869 u_int64_t newblkcnt;
3870 u_int64_t prev_phys_block_count;
3871 u_int32_t addblks;
3872 u_int64_t sectorcnt;
3873 u_int32_t sectorsize;
3874 u_int32_t phys_sectorsize;
3875 daddr64_t prev_alt_sector;
3876 daddr_t bitmapblks;
3877 int lockflags = 0;
3878 int error;
3879 int64_t oldBitmapSize;
3880 Boolean usedExtendFileC = false;
3881 int transaction_begun = 0;
3882
3883 devvp = hfsmp->hfs_devvp;
3884 vcb = HFSTOVCB(hfsmp);
3885
3886 /*
3887 * - HFS Plus file systems only.
3888 * - Journaling must be enabled.
3889 * - No embedded volumes.
3890 */
3891 if ((vcb->vcbSigWord == kHFSSigWord) ||
3892 (hfsmp->jnl == NULL) ||
3893 (vcb->hfsPlusIOPosOffset != 0)) {
3894 return (EPERM);
3895 }
3896 /*
3897 * If extending file system by non-root, then verify
3898 * ownership and check permissions.
3899 */
3900 if (suser(cred, NULL)) {
3901 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
3902
3903 if (error)
3904 return (error);
3905 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
3906 if (error == 0) {
3907 error = hfs_write_access(vp, cred, p, false);
3908 }
3909 hfs_unlock(VTOC(vp));
3910 vnode_put(vp);
3911 if (error)
3912 return (error);
3913
3914 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
3915 if (error)
3916 return (error);
3917 }
3918 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
3919 return (ENXIO);
3920 }
3921 if (sectorsize != hfsmp->hfs_logical_block_size) {
3922 return (ENXIO);
3923 }
3924 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
3925 return (ENXIO);
3926 }
3927 if ((sectorsize * sectorcnt) < newsize) {
3928 printf("hfs_extendfs: not enough space on device\n");
3929 return (ENOSPC);
3930 }
3931 error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
3932 if (error) {
3933 if ((error != ENOTSUP) && (error != ENOTTY)) {
3934 return (ENXIO);
3935 }
3936 /* If ioctl is not supported, force physical and logical sector size to be same */
3937 phys_sectorsize = sectorsize;
3938 }
3939 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
3940
3941 /*
3942 * Validate new size.
3943 */
3944 if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
3945 printf("hfs_extendfs: invalid size\n");
3946 return (EINVAL);
3947 }
3948 newblkcnt = newsize / vcb->blockSize;
3949 if (newblkcnt > (u_int64_t)0xFFFFFFFF)
3950 return (EOVERFLOW);
3951
3952 addblks = newblkcnt - vcb->totalBlocks;
3953
3954 if (hfs_resize_debug) {
3955 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
3956 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
3957 }
3958 printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
3959
3960 HFS_MOUNT_LOCK(hfsmp, TRUE);
3961 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
3962 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3963 error = EALREADY;
3964 goto out;
3965 }
3966 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
3967 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3968
3969 /* Start with a clean journal. */
3970 hfs_journal_flush(hfsmp, TRUE);
3971
3972 /*
3973 * Enclose changes inside a transaction.
3974 */
3975 if (hfs_start_transaction(hfsmp) != 0) {
3976 error = EINVAL;
3977 goto out;
3978 }
3979 transaction_begun = 1;
3980
3981 /*
3982 * Note: we take the attributes lock in case we have an attribute data vnode
3983 * which needs to change size.
3984 */
3985 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3986 vp = vcb->allocationsRefNum;
3987 fp = VTOF(vp);
3988 bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
3989
3990 /*
3991 * Calculate additional space required (if any) by allocation bitmap.
3992 */
3993 oldBitmapSize = fp->ff_size;
3994 bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
3995 if (bitmapblks > (daddr_t)fp->ff_blocks)
3996 bitmapblks -= fp->ff_blocks;
3997 else
3998 bitmapblks = 0;
3999
4000 /*
4001 * The allocation bitmap can contain unused bits that are beyond end of
4002 * current volume's allocation blocks. Usually they are supposed to be
4003 * zero'ed out but there can be cases where they might be marked as used.
4004 * After extending the file system, those bits can represent valid
4005 * allocation blocks, so we mark all the bits from the end of current
4006 * volume to end of allocation bitmap as "free".
4007 */
4008 BlockMarkFreeUnused(vcb, vcb->totalBlocks,
4009 (fp->ff_blocks * vcb->blockSize * 8) - vcb->totalBlocks);
4010
4011 if (bitmapblks > 0) {
4012 daddr64_t blkno;
4013 daddr_t blkcnt;
4014 off_t bytesAdded;
4015
4016 /*
4017 * Get the bitmap's current size (in allocation blocks) so we know
4018 * where to start zero filling once the new space is added. We've
4019 * got to do this before the bitmap is grown.
4020 */
4021 blkno = (daddr64_t)fp->ff_blocks;
4022
4023 /*
4024 * Try to grow the allocation file in the normal way, using allocation
4025 * blocks already existing in the file system. This way, we might be
4026 * able to grow the bitmap contiguously, or at least in the metadata
4027 * zone.
4028 */
4029 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4030 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4031 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4032
4033 if (error == 0) {
4034 usedExtendFileC = true;
4035 } else {
4036 /*
4037 * If the above allocation failed, fall back to allocating the new
4038 * extent of the bitmap from the space we're going to add. Since those
4039 * blocks don't yet belong to the file system, we have to update the
4040 * extent list directly, and manually adjust the file size.
4041 */
4042 bytesAdded = 0;
4043 error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4044 if (error) {
4045 printf("hfs_extendfs: error %d adding extents\n", error);
4046 goto out;
4047 }
4048 fp->ff_blocks += bitmapblks;
4049 VTOC(vp)->c_blocks = fp->ff_blocks;
4050 VTOC(vp)->c_flag |= C_MODIFIED;
4051 }
4052
4053 /*
4054 * Update the allocation file's size to include the newly allocated
4055 * blocks. Note that ExtendFileC doesn't do this, which is why this
4056 * statement is outside the above "if" statement.
4057 */
4058 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4059
4060 /*
4061 * Zero out the new bitmap blocks.
4062 */
4063 {
4064
4065 bp = NULL;
4066 blkcnt = bitmapblks;
4067 while (blkcnt > 0) {
4068 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4069 if (error) {
4070 if (bp) {
4071 buf_brelse(bp);
4072 }
4073 break;
4074 }
4075 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4076 buf_markaged(bp);
4077 error = (int)buf_bwrite(bp);
4078 if (error)
4079 break;
4080 --blkcnt;
4081 ++blkno;
4082 }
4083 }
4084 if (error) {
4085 printf("hfs_extendfs: error %d clearing blocks\n", error);
4086 goto out;
4087 }
4088 /*
4089 * Mark the new bitmap space as allocated.
4090 *
4091 * Note that ExtendFileC will have marked any blocks it allocated, so
4092 * this is only needed if we used AddFileExtent. Also note that this
4093 * has to come *after* the zero filling of new blocks in the case where
4094 * we used AddFileExtent (since the part of the bitmap we're touching
4095 * is in those newly allocated blocks).
4096 */
4097 if (!usedExtendFileC) {
4098 error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4099 if (error) {
4100 printf("hfs_extendfs: error %d setting bitmap\n", error);
4101 goto out;
4102 }
4103 vcb->freeBlocks -= bitmapblks;
4104 }
4105 }
4106 /*
4107 * Mark the new alternate VH as allocated.
4108 */
4109 if (vcb->blockSize == 512)
4110 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4111 else
4112 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4113 if (error) {
4114 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4115 goto out;
4116 }
4117 /*
4118 * Mark the old alternate VH as free.
4119 */
4120 if (vcb->blockSize == 512)
4121 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4122 else
4123 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4124 /*
4125 * Adjust file system variables for new space.
4126 */
4127 prev_phys_block_count = hfsmp->hfs_logical_block_count;
4128 prev_alt_sector = hfsmp->hfs_alt_id_sector;
4129
4130 vcb->totalBlocks += addblks;
4131 vcb->freeBlocks += addblks;
4132 hfsmp->hfs_logical_block_count = newsize / sectorsize;
4133 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
4134 HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
4135 MarkVCBDirty(vcb);
4136 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4137 if (error) {
4138 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4139 /*
4140 * Restore to old state.
4141 */
4142 if (usedExtendFileC) {
4143 (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4144 FTOC(fp)->c_fileid, false);
4145 } else {
4146 fp->ff_blocks -= bitmapblks;
4147 fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4148 /*
4149 * No need to mark the excess blocks free since those bitmap blocks
4150 * are no longer part of the bitmap. But we do need to undo the
4151 * effect of the "vcb->freeBlocks -= bitmapblks" above.
4152 */
4153 vcb->freeBlocks += bitmapblks;
4154 }
4155 vcb->totalBlocks -= addblks;
4156 vcb->freeBlocks -= addblks;
4157 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4158 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4159 MarkVCBDirty(vcb);
4160 if (vcb->blockSize == 512) {
4161 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4162 hfs_mark_volume_inconsistent(hfsmp);
4163 }
4164 } else {
4165 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4166 hfs_mark_volume_inconsistent(hfsmp);
4167 }
4168 }
4169 goto out;
4170 }
4171 /*
4172 * Invalidate the old alternate volume header.
4173 */
4174 bp = NULL;
4175 if (prev_alt_sector) {
4176 if (buf_meta_bread(hfsmp->hfs_devvp,
4177 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4178 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4179 journal_modify_block_start(hfsmp->jnl, bp);
4180
4181 bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4182
4183 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4184 } else if (bp) {
4185 buf_brelse(bp);
4186 }
4187 }
4188
4189 /*
4190 * Update the metadata zone size based on current volume size
4191 */
4192 hfs_metadatazone_init(hfsmp, false);
4193
4194 /*
4195 * Adjust the size of hfsmp->hfs_attrdata_vp
4196 */
4197 if (hfsmp->hfs_attrdata_vp) {
4198 struct cnode *attr_cp;
4199 struct filefork *attr_fp;
4200
4201 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4202 attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4203 attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4204
4205 attr_cp->c_blocks = newblkcnt;
4206 attr_fp->ff_blocks = newblkcnt;
4207 attr_fp->ff_extents[0].blockCount = newblkcnt;
4208 attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4209 ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4210 vnode_put(hfsmp->hfs_attrdata_vp);
4211 }
4212 }
4213
4214 /*
4215 * Update the R/B Tree if necessary. Since we don't have to drop the systemfile
4216 * locks in the middle of these operations like we do in the truncate case
4217 * where we have to relocate files, we can only update the red-black tree
4218 * if there were actual changes made to the bitmap. Also, we can't really scan the
4219 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4220 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4221 * not currently controlled by the tree.
4222 *
4223 * We only update hfsmp->allocLimit if totalBlocks actually increased.
4224 */
4225
4226 if (error == 0) {
4227 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4228 }
4229
4230 /* Log successful extending */
4231 printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4232 hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4233
4234 out:
4235 if (error && fp) {
4236 /* Restore allocation fork. */
4237 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4238 VTOC(vp)->c_blocks = fp->ff_blocks;
4239
4240 }
4241
4242 HFS_MOUNT_LOCK(hfsmp, TRUE);
4243 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4244 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4245 if (lockflags) {
4246 hfs_systemfile_unlock(hfsmp, lockflags);
4247 }
4248 if (transaction_begun) {
4249 hfs_end_transaction(hfsmp);
4250 hfs_journal_flush(hfsmp, FALSE);
4251 /* Just to be sure, sync all data to the disk */
4252 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4253 }
4254
4255 return MacToVFSError(error);
4256 }
4257
4258 #define HFS_MIN_SIZE (32LL * 1024LL * 1024LL)
4259
4260 /*
4261 * Truncate a file system (while still mounted).
4262 */
4263 int
4264 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4265 {
4266 struct buf *bp = NULL;
4267 u_int64_t oldsize;
4268 u_int32_t newblkcnt;
4269 u_int32_t reclaimblks = 0;
4270 int lockflags = 0;
4271 int transaction_begun = 0;
4272 Boolean updateFreeBlocks = false;
4273 Boolean disable_sparse = false;
4274 int error = 0;
4275
4276 lck_mtx_lock(&hfsmp->hfs_mutex);
4277 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4278 lck_mtx_unlock(&hfsmp->hfs_mutex);
4279 return (EALREADY);
4280 }
4281 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4282 hfsmp->hfs_resize_blocksmoved = 0;
4283 hfsmp->hfs_resize_totalblocks = 0;
4284 hfsmp->hfs_resize_progress = 0;
4285 lck_mtx_unlock(&hfsmp->hfs_mutex);
4286
4287 /*
4288 * - Journaled HFS Plus volumes only.
4289 * - No embedded volumes.
4290 */
4291 if ((hfsmp->jnl == NULL) ||
4292 (hfsmp->hfsPlusIOPosOffset != 0)) {
4293 error = EPERM;
4294 goto out;
4295 }
4296 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4297 newblkcnt = newsize / hfsmp->blockSize;
4298 reclaimblks = hfsmp->totalBlocks - newblkcnt;
4299
4300 if (hfs_resize_debug) {
4301 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4302 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4303 }
4304
4305 /* Make sure new size is valid. */
4306 if ((newsize < HFS_MIN_SIZE) ||
4307 (newsize >= oldsize) ||
4308 (newsize % hfsmp->hfs_logical_block_size) ||
4309 (newsize % hfsmp->hfs_physical_block_size)) {
4310 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4311 error = EINVAL;
4312 goto out;
4313 }
4314
4315 /*
4316 * Make sure that the file system has enough free blocks reclaim.
4317 *
4318 * Before resize, the disk is divided into four zones -
4319 * A. Allocated_Stationary - These are allocated blocks that exist
4320 * before the new end of disk. These blocks will not be
4321 * relocated or modified during resize.
4322 * B. Free_Stationary - These are free blocks that exist before the
4323 * new end of disk. These blocks can be used for any new
4324 * allocations during resize, including allocation for relocating
4325 * data from the area of disk being reclaimed.
4326 * C. Allocated_To-Reclaim - These are allocated blocks that exist
4327 * beyond the new end of disk. These blocks need to be reclaimed
4328 * during resize by allocating equal number of blocks in Free
4329 * Stationary zone and copying the data.
4330 * D. Free_To-Reclaim - These are free blocks that exist beyond the
4331 * new end of disk. Nothing special needs to be done to reclaim
4332 * them.
4333 *
4334 * Total number of blocks on the disk before resize:
4335 * ------------------------------------------------
4336 * Total Blocks = Allocated_Stationary + Free_Stationary +
4337 * Allocated_To-Reclaim + Free_To-Reclaim
4338 *
4339 * Total number of blocks that need to be reclaimed:
4340 * ------------------------------------------------
4341 * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4342 *
4343 * Note that the check below also makes sure that we have enough space
4344 * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4345 * Therefore we do not need to check total number of blocks to relocate
4346 * later in the code.
4347 *
4348 * The condition below gets converted to:
4349 *
4350 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4351 *
4352 * which is equivalent to:
4353 *
4354 * Allocated To-Reclaim >= Free Stationary
4355 */
4356 if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4357 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4358 error = ENOSPC;
4359 goto out;
4360 }
4361
4362 /* Start with a clean journal. */
4363 hfs_journal_flush(hfsmp, TRUE);
4364
4365 if (hfs_start_transaction(hfsmp) != 0) {
4366 error = EINVAL;
4367 goto out;
4368 }
4369 transaction_begun = 1;
4370
4371 /* Take the bitmap lock to update the alloc limit field */
4372 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4373
4374 /*
4375 * Prevent new allocations from using the part we're trying to truncate.
4376 *
4377 * NOTE: allocLimit is set to the allocation block number where the new
4378 * alternate volume header will be. That way there will be no files to
4379 * interfere with allocating the new alternate volume header, and no files
4380 * in the allocation blocks beyond (i.e. the blocks we're trying to
4381 * truncate away.
4382 *
4383 * Also shrink the red-black tree if needed.
4384 */
4385 if (hfsmp->blockSize == 512) {
4386 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4387 }
4388 else {
4389 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4390 }
4391
4392 /* Sparse devices use first fit allocation which is not ideal
4393 * for volume resize which requires best fit allocation. If a
4394 * sparse device is being truncated, disable the sparse device
4395 * property temporarily for the duration of resize. Also reset
4396 * the free extent cache so that it is rebuilt as sorted by
4397 * totalBlocks instead of startBlock.
4398 *
4399 * Note that this will affect all allocations on the volume and
4400 * ideal fix would be just to modify resize-related allocations,
4401 * but it will result in complexity like handling of two free
4402 * extent caches sorted differently, etc. So we stick to this
4403 * solution for now.
4404 */
4405 HFS_MOUNT_LOCK(hfsmp, TRUE);
4406 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4407 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4408 ResetVCBFreeExtCache(hfsmp);
4409 disable_sparse = true;
4410 }
4411
4412 /*
4413 * Update the volume free block count to reflect the total number
4414 * of free blocks that will exist after a successful resize.
4415 * Relocation of extents will result in no net change in the total
4416 * free space on the disk. Therefore the code that allocates
4417 * space for new extent and deallocates the old extent explicitly
4418 * prevents updating the volume free block count. It will also
4419 * prevent false disk full error when the number of blocks in
4420 * an extent being relocated is more than the free blocks that
4421 * will exist after the volume is resized.
4422 */
4423 hfsmp->freeBlocks -= reclaimblks;
4424 updateFreeBlocks = true;
4425 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4426
4427 if (lockflags) {
4428 hfs_systemfile_unlock(hfsmp, lockflags);
4429 lockflags = 0;
4430 }
4431
4432 /*
4433 * Update the metadata zone size to match the new volume size,
4434 * and if it too less, metadata zone might be disabled.
4435 */
4436 hfs_metadatazone_init(hfsmp, false);
4437
4438 /*
4439 * If some files have blocks at or beyond the location of the
4440 * new alternate volume header, recalculate free blocks and
4441 * reclaim blocks. Otherwise just update free blocks count.
4442 *
4443 * The current allocLimit is set to the location of new alternate
4444 * volume header, and reclaimblks are the total number of blocks
4445 * that need to be reclaimed. So the check below is really
4446 * ignoring the blocks allocated for old alternate volume header.
4447 */
4448 if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4449 /*
4450 * hfs_reclaimspace will use separate transactions when
4451 * relocating files (so we don't overwhelm the journal).
4452 */
4453 hfs_end_transaction(hfsmp);
4454 transaction_begun = 0;
4455
4456 /* Attempt to reclaim some space. */
4457 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4458 if (error != 0) {
4459 printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4460 error = ENOSPC;
4461 goto out;
4462 }
4463 if (hfs_start_transaction(hfsmp) != 0) {
4464 error = EINVAL;
4465 goto out;
4466 }
4467 transaction_begun = 1;
4468
4469 /* Check if we're clear now. */
4470 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4471 if (error != 0) {
4472 printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4473 error = EAGAIN; /* tell client to try again */
4474 goto out;
4475 }
4476 }
4477
4478 /*
4479 * Note: we take the attributes lock in case we have an attribute data vnode
4480 * which needs to change size.
4481 */
4482 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4483
4484 /*
4485 * Allocate last 1KB for alternate volume header.
4486 */
4487 error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4488 if (error) {
4489 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4490 goto out;
4491 }
4492
4493 /*
4494 * Mark the old alternate volume header as free.
4495 * We don't bother shrinking allocation bitmap file.
4496 */
4497 if (hfsmp->blockSize == 512)
4498 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4499 else
4500 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4501
4502 /*
4503 * Invalidate the existing alternate volume header.
4504 *
4505 * Don't include this in a transaction (don't call journal_modify_block)
4506 * since this block will be outside of the truncated file system!
4507 */
4508 if (hfsmp->hfs_alt_id_sector) {
4509 error = buf_meta_bread(hfsmp->hfs_devvp,
4510 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4511 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4512 if (error == 0) {
4513 bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4514 (void) VNOP_BWRITE(bp);
4515 } else {
4516 if (bp) {
4517 buf_brelse(bp);
4518 }
4519 }
4520 bp = NULL;
4521 }
4522
4523 /* Log successful shrinking. */
4524 printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4525 hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4526
4527 /*
4528 * Adjust file system variables and flush them to disk.
4529 */
4530 hfsmp->totalBlocks = newblkcnt;
4531 hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4532 hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4533 MarkVCBDirty(hfsmp);
4534 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4535 if (error)
4536 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4537
4538 /*
4539 * Adjust the size of hfsmp->hfs_attrdata_vp
4540 */
4541 if (hfsmp->hfs_attrdata_vp) {
4542 struct cnode *cp;
4543 struct filefork *fp;
4544
4545 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4546 cp = VTOC(hfsmp->hfs_attrdata_vp);
4547 fp = VTOF(hfsmp->hfs_attrdata_vp);
4548
4549 cp->c_blocks = newblkcnt;
4550 fp->ff_blocks = newblkcnt;
4551 fp->ff_extents[0].blockCount = newblkcnt;
4552 fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4553 ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4554 vnode_put(hfsmp->hfs_attrdata_vp);
4555 }
4556 }
4557
4558 out:
4559 /*
4560 * Update the allocLimit to acknowledge the last one or two blocks now.
4561 * Add it to the tree as well if necessary.
4562 */
4563 UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4564
4565 HFS_MOUNT_LOCK(hfsmp, TRUE);
4566 if (disable_sparse == true) {
4567 /* Now that resize is completed, set the volume to be sparse
4568 * device again so that all further allocations will be first
4569 * fit instead of best fit. Reset free extent cache so that
4570 * it is rebuilt.
4571 */
4572 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4573 ResetVCBFreeExtCache(hfsmp);
4574 }
4575
4576 if (error && (updateFreeBlocks == true)) {
4577 hfsmp->freeBlocks += reclaimblks;
4578 }
4579
4580 if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4581 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4582 }
4583 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4584 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4585
4586 /* On error, reset the metadata zone for original volume size */
4587 if (error && (updateFreeBlocks == true)) {
4588 hfs_metadatazone_init(hfsmp, false);
4589 }
4590
4591 if (lockflags) {
4592 hfs_systemfile_unlock(hfsmp, lockflags);
4593 }
4594 if (transaction_begun) {
4595 hfs_end_transaction(hfsmp);
4596 hfs_journal_flush(hfsmp, FALSE);
4597 /* Just to be sure, sync all data to the disk */
4598 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4599 }
4600
4601 return MacToVFSError(error);
4602 }
4603
4604
4605 /*
4606 * Invalidate the physical block numbers associated with buffer cache blocks
4607 * in the given extent of the given vnode.
4608 */
4609 struct hfs_inval_blk_no {
4610 daddr64_t sectorStart;
4611 daddr64_t sectorCount;
4612 };
4613 static int
4614 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4615 {
4616 daddr64_t blkno;
4617 struct hfs_inval_blk_no *args;
4618
4619 blkno = buf_blkno(bp);
4620 args = args_in;
4621
4622 if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4623 buf_setblkno(bp, buf_lblkno(bp));
4624
4625 return BUF_RETURNED;
4626 }
4627 static void
4628 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4629 {
4630 struct hfs_inval_blk_no args;
4631 args.sectorStart = sectorStart;
4632 args.sectorCount = sectorCount;
4633
4634 buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4635 }
4636
4637
4638 /*
4639 * Copy the contents of an extent to a new location. Also invalidates the
4640 * physical block number of any buffer cache block in the copied extent
4641 * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4642 * determine the new physical block number).
4643 */
4644 static int
4645 hfs_copy_extent(
4646 struct hfsmount *hfsmp,
4647 struct vnode *vp, /* The file whose extent is being copied. */
4648 u_int32_t oldStart, /* The start of the source extent. */
4649 u_int32_t newStart, /* The start of the destination extent. */
4650 u_int32_t blockCount, /* The number of allocation blocks to copy. */
4651 vfs_context_t context)
4652 {
4653 int err = 0;
4654 size_t bufferSize;
4655 void *buffer = NULL;
4656 struct vfsioattr ioattr;
4657 buf_t bp = NULL;
4658 off_t resid;
4659 size_t ioSize;
4660 u_int32_t ioSizeSectors; /* Device sectors in this I/O */
4661 daddr64_t srcSector, destSector;
4662 u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4663 #if CONFIG_PROTECT
4664 int cpenabled = 0;
4665 #endif
4666
4667 /*
4668 * Sanity check that we have locked the vnode of the file we're copying.
4669 *
4670 * But since hfs_systemfile_lock() doesn't actually take the lock on
4671 * the allocation file if a journal is active, ignore the check if the
4672 * file being copied is the allocation file.
4673 */
4674 struct cnode *cp = VTOC(vp);
4675 if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4676 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4677
4678 #if CONFIG_PROTECT
4679 /* Prepare the CP blob and get it ready for use */
4680 if (!vnode_issystem (vp) && vnode_isreg(vp) &&
4681 cp_fs_protected (hfsmp->hfs_mp)) {
4682 int cp_err = 0;
4683 cp_err = cp_handle_relocate (cp);
4684 if (cp_err) {
4685 /*
4686 * can't copy the file because we couldn't set up keys.
4687 * bail out
4688 */
4689 return cp_err;
4690 }
4691 else {
4692 cpenabled = 1;
4693 }
4694 }
4695 #endif
4696
4697 /*
4698 * Determine the I/O size to use
4699 *
4700 * NOTE: Many external drives will result in an ioSize of 128KB.
4701 * TODO: Should we use a larger buffer, doing several consecutive
4702 * reads, then several consecutive writes?
4703 */
4704 vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4705 bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4706 if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4707 return ENOMEM;
4708
4709 /* Get a buffer for doing the I/O */
4710 bp = buf_alloc(hfsmp->hfs_devvp);
4711 buf_setdataptr(bp, (uintptr_t)buffer);
4712
4713 resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4714 srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4715 destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4716 while (resid > 0) {
4717 ioSize = MIN(bufferSize, (size_t) resid);
4718 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4719
4720 /* Prepare the buffer for reading */
4721 buf_reset(bp, B_READ);
4722 buf_setsize(bp, ioSize);
4723 buf_setcount(bp, ioSize);
4724 buf_setblkno(bp, srcSector);
4725 buf_setlblkno(bp, srcSector);
4726
4727 /* Attach the CP to the buffer */
4728 #if CONFIG_PROTECT
4729 if (cpenabled) {
4730 buf_setcpaddr (bp, cp->c_cpentry);
4731 }
4732 #endif
4733
4734 /* Do the read */
4735 err = VNOP_STRATEGY(bp);
4736 if (!err)
4737 err = buf_biowait(bp);
4738 if (err) {
4739 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
4740 break;
4741 }
4742
4743 /* Prepare the buffer for writing */
4744 buf_reset(bp, B_WRITE);
4745 buf_setsize(bp, ioSize);
4746 buf_setcount(bp, ioSize);
4747 buf_setblkno(bp, destSector);
4748 buf_setlblkno(bp, destSector);
4749 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
4750 buf_markfua(bp);
4751
4752 #if CONFIG_PROTECT
4753 /* Attach the CP to the buffer */
4754 if (cpenabled) {
4755 buf_setcpaddr (bp, cp->c_cpentry);
4756 }
4757 #endif
4758
4759 /* Do the write */
4760 vnode_startwrite(hfsmp->hfs_devvp);
4761 err = VNOP_STRATEGY(bp);
4762 if (!err)
4763 err = buf_biowait(bp);
4764 if (err) {
4765 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
4766 break;
4767 }
4768
4769 resid -= ioSize;
4770 srcSector += ioSizeSectors;
4771 destSector += ioSizeSectors;
4772 }
4773 if (bp)
4774 buf_free(bp);
4775 if (buffer)
4776 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
4777
4778 /* Make sure all writes have been flushed to disk. */
4779 if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
4780 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4781 if (err) {
4782 printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
4783 err = 0; /* Don't fail the copy. */
4784 }
4785 }
4786
4787 if (!err)
4788 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
4789
4790 return err;
4791 }
4792
4793
4794 /* Structure to store state of reclaiming extents from a
4795 * given file. hfs_reclaim_file()/hfs_reclaim_xattr()
4796 * initializes the values in this structure which are then
4797 * used by code that reclaims and splits the extents.
4798 */
4799 struct hfs_reclaim_extent_info {
4800 struct vnode *vp;
4801 u_int32_t fileID;
4802 u_int8_t forkType;
4803 u_int8_t is_dirlink; /* Extent belongs to directory hard link */
4804 u_int8_t is_sysfile; /* Extent belongs to system file */
4805 u_int8_t is_xattr; /* Extent belongs to extent-based xattr */
4806 u_int8_t extent_index;
4807 int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */
4808 u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */
4809 u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */
4810 u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */
4811 struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */
4812 union record {
4813 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
4814 HFSPlusAttrRecord xattr; /* Attribute record for large EAs */
4815 } record;
4816 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed.
4817 * For catalog extent record, points to the correct
4818 * extent information in filefork. For overflow extent
4819 * record, or xattr record, points to extent record
4820 * in the structure above
4821 */
4822 struct cat_desc *dirlink_desc;
4823 struct cat_attr *dirlink_attr;
4824 struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */
4825 struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr()
4826 * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
4827 * use it for writing updated extent record
4828 */
4829 struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */
4830 u_int16_t recordlen;
4831 int overflow_count; /* For debugging, counter for overflow extent record */
4832 FCB *fcb; /* Pointer to the current btree being traversed */
4833 };
4834
4835 /*
4836 * Split the current extent into two extents, with first extent
4837 * to contain given number of allocation blocks. Splitting of
4838 * extent creates one new extent entry which can result in
4839 * shifting of many entries through all the extent records of a
4840 * file, and/or creating a new extent record in the overflow
4841 * extent btree.
4842 *
4843 * Example:
4844 * The diagram below represents two consecutive extent records,
4845 * for simplicity, lets call them record X and X+1 respectively.
4846 * Interesting extent entries have been denoted by letters.
4847 * If the letter is unchanged before and after split, it means
4848 * that the extent entry was not modified during the split.
4849 * A '.' means that the entry remains unchanged after the split
4850 * and is not relevant for our example. A '0' means that the
4851 * extent entry is empty.
4852 *
4853 * If there isn't sufficient contiguous free space to relocate
4854 * an extent (extent "C" below), we will have to break the one
4855 * extent into multiple smaller extents, and relocate each of
4856 * the smaller extents individually. The way we do this is by
4857 * finding the largest contiguous free space that is currently
4858 * available (N allocation blocks), and then convert extent "C"
4859 * into two extents, C1 and C2, that occupy exactly the same
4860 * allocation blocks as extent C. Extent C1 is the first
4861 * N allocation blocks of extent C, and extent C2 is the remainder
4862 * of extent C. Then we can relocate extent C1 since we know
4863 * we have enough contiguous free space to relocate it in its
4864 * entirety. We then repeat the process starting with extent C2.
4865 *
4866 * In record X, only the entries following entry C are shifted, and
4867 * the original entry C is replaced with two entries C1 and C2 which
4868 * are actually two extent entries for contiguous allocation blocks.
4869 *
4870 * Note that the entry E from record X is shifted into record X+1 as
4871 * the new first entry. Since the first entry of record X+1 is updated,
4872 * the FABN will also get updated with the blockCount of entry E.
4873 * This also results in shifting of all extent entries in record X+1.
4874 * Note that the number of empty entries after the split has been
4875 * changed from 3 to 2.
4876 *
4877 * Before:
4878 * record X record X+1
4879 * ---------------------===--------- ---------------------------------
4880 * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 |
4881 * ---------------------===--------- ---------------------------------
4882 *
4883 * After:
4884 * ---------------------=======----- ---------------------------------
4885 * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 |
4886 * ---------------------=======----- ---------------------------------
4887 *
4888 * C1.startBlock = C.startBlock
4889 * C1.blockCount = N
4890 *
4891 * C2.startBlock = C.startBlock + N
4892 * C2.blockCount = C.blockCount - N
4893 *
4894 * FABN = old FABN - E.blockCount
4895 *
4896 * Inputs:
4897 * extent_info - This is the structure that contains state about
4898 * the current file, extent, and extent record that
4899 * is being relocated. This structure is shared
4900 * among code that traverses through all the extents
4901 * of the file, code that relocates extents, and
4902 * code that splits the extent.
4903 * Output:
4904 * Zero on success, non-zero on failure.
4905 */
4906 static int
4907 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
4908 {
4909 int error = 0;
4910 int index = extent_info->extent_index;
4911 int i;
4912 HFSPlusExtentDescriptor shift_extent;
4913 HFSPlusExtentDescriptor last_extent;
4914 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
4915 HFSPlusExtentRecord *extents_rec = NULL;
4916 HFSPlusExtentKey *extents_key = NULL;
4917 HFSPlusAttrRecord *xattr_rec = NULL;
4918 HFSPlusAttrKey *xattr_key = NULL;
4919 struct BTreeIterator iterator;
4920 struct FSBufferDescriptor btdata;
4921 uint16_t reclen;
4922 uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */
4923 uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */
4924 Boolean create_record = false;
4925 Boolean is_xattr;
4926
4927 is_xattr = extent_info->is_xattr;
4928 extents = extent_info->extents;
4929
4930 if (hfs_resize_debug) {
4931 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
4932 }
4933
4934 /* Determine the starting allocation block number for the following
4935 * overflow extent record, if any, before the current record
4936 * gets modified.
4937 */
4938 read_recStartBlock = extent_info->recStartBlock;
4939 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4940 if (extents[i].blockCount == 0) {
4941 break;
4942 }
4943 read_recStartBlock += extents[i].blockCount;
4944 }
4945
4946 /* Shift and split */
4947 if (index == kHFSPlusExtentDensity-1) {
4948 /* The new extent created after split will go into following overflow extent record */
4949 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
4950 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
4951
4952 /* Last extent in the record will be split, so nothing to shift */
4953 } else {
4954 /* Splitting of extents can result in at most of one
4955 * extent entry to be shifted into following overflow extent
4956 * record. So, store the last extent entry for later.
4957 */
4958 shift_extent = extents[kHFSPlusExtentDensity-1];
4959
4960 /* Start shifting extent information from the end of the extent
4961 * record to the index where we want to insert the new extent.
4962 * Note that kHFSPlusExtentDensity-1 is already saved above, and
4963 * does not need to be shifted. The extent entry that is being
4964 * split does not get shifted.
4965 */
4966 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
4967 if (hfs_resize_debug) {
4968 if (extents[i].blockCount) {
4969 printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
4970 }
4971 }
4972 extents[i+1] = extents[i];
4973 }
4974 }
4975
4976 if (index == kHFSPlusExtentDensity-1) {
4977 /* The second half of the extent being split will be the overflow
4978 * entry that will go into following overflow extent record. The
4979 * value has been stored in 'shift_extent' above, so there is
4980 * nothing to be done here.
4981 */
4982 } else {
4983 /* Update the values in the second half of the extent being split
4984 * before updating the first half of the split. Note that the
4985 * extent to split or first half of the split is at index 'index'
4986 * and a new extent or second half of the split will be inserted at
4987 * 'index+1' or into following overflow extent record.
4988 */
4989 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
4990 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
4991 }
4992 /* Update the extent being split, only the block count will change */
4993 extents[index].blockCount = newBlockCount;
4994
4995 if (hfs_resize_debug) {
4996 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
4997 if (index != kHFSPlusExtentDensity-1) {
4998 printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
4999 } else {
5000 printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
5001 }
5002 }
5003
5004 /* If the newly split extent is for large EAs or in overflow extent
5005 * record, so update it directly in the btree using the iterator
5006 * information from the shared extent_info structure
5007 */
5008 if (extent_info->catalog_fp == NULL) {
5009 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5010 &(extent_info->btdata), extent_info->recordlen);
5011 if (error) {
5012 printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5013 goto out;
5014 }
5015 }
5016
5017 /* No extent entry to be shifted into another extent overflow record */
5018 if (shift_extent.blockCount == 0) {
5019 if (hfs_resize_debug) {
5020 printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5021 }
5022 error = 0;
5023 goto out;
5024 }
5025
5026 /* The overflow extent entry has to be shifted into an extent
5027 * overflow record. This would mean that we have to shift
5028 * extent entries from all overflow records by one. We will
5029 * start iteration from the first record to the last record,
5030 * and shift the extent entry from one record to another.
5031 * We might have to create a new record for the last extent
5032 * entry for the file.
5033 */
5034
5035 /* Initialize iterator to search the next record */
5036 bzero(&iterator, sizeof(iterator));
5037 if (is_xattr) {
5038 /* Copy the key from the iterator that was to update the modified attribute record. */
5039 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5040 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5041 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5042
5043 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5044 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5045 if (xattr_rec == NULL) {
5046 error = ENOMEM;
5047 goto out;
5048 }
5049 btdata.bufferAddress = xattr_rec;
5050 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5051 btdata.itemCount = 1;
5052 extents = xattr_rec->overflowExtents.extents;
5053 } else {
5054 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5055 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5056 extents_key->forkType = extent_info->forkType;
5057 extents_key->fileID = extent_info->fileID;
5058 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5059
5060 MALLOC(extents_rec, HFSPlusExtentRecord *,
5061 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5062 if (extents_rec == NULL) {
5063 error = ENOMEM;
5064 goto out;
5065 }
5066 btdata.bufferAddress = extents_rec;
5067 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5068 btdata.itemCount = 1;
5069 extents = extents_rec[0];
5070 }
5071
5072 /* An extent entry still needs to be shifted into following overflow
5073 * extent record. This will result in the starting allocation block
5074 * number of the extent record being changed which is part of the key
5075 * for the extent record. Since the extent record key is changing,
5076 * the record can not be updated, instead has to be deleted and
5077 * inserted again.
5078 */
5079 while (shift_extent.blockCount) {
5080 if (hfs_resize_debug) {
5081 printf ("hfs_split_extent: Will shift (%u,%u) into record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5082 }
5083
5084 /* Search if there is any existing overflow extent record.
5085 * For this, the logical start block number in the key is
5086 * the value calculated based on the logical start block
5087 * number of the current extent record and the total number
5088 * of blocks existing in the current extent record.
5089 */
5090 if (is_xattr) {
5091 xattr_key->startBlock = read_recStartBlock;
5092 } else {
5093 extents_key->startBlock = read_recStartBlock;
5094 }
5095 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5096 if (error) {
5097 if (error != btNotFound) {
5098 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5099 goto out;
5100 }
5101 create_record = true;
5102 }
5103
5104 /* The extra extent entry from the previous record is being inserted
5105 * as the first entry in the current extent record. This will change
5106 * the file allocation block number (FABN) of the current extent
5107 * record, which is the startBlock value from the extent record key.
5108 * Since one extra entry is being inserted in the record, the new
5109 * FABN for the record will less than old FABN by the number of blocks
5110 * in the new extent entry being inserted at the start. We have to
5111 * do this before we update read_recStartBlock to point at the
5112 * startBlock of the following record.
5113 */
5114 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5115 if (hfs_resize_debug) {
5116 if (create_record) {
5117 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5118 }
5119 }
5120
5121 /* Now update the read_recStartBlock to account for total number
5122 * of blocks in this extent record. It will now point to the
5123 * starting allocation block number for the next extent record.
5124 */
5125 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5126 if (extents[i].blockCount == 0) {
5127 break;
5128 }
5129 read_recStartBlock += extents[i].blockCount;
5130 }
5131
5132 if (create_record == true) {
5133 /* Initialize new record content with only one extent entry */
5134 bzero(extents, sizeof(HFSPlusExtentRecord));
5135 /* The new record will contain only one extent entry */
5136 extents[0] = shift_extent;
5137 /* There are no more overflow extents to be shifted */
5138 shift_extent.startBlock = shift_extent.blockCount = 0;
5139
5140 if (is_xattr) {
5141 xattr_rec->recordType = kHFSPlusAttrExtents;
5142 xattr_rec->overflowExtents.reserved = 0;
5143 reclen = sizeof(HFSPlusAttrExtents);
5144 } else {
5145 reclen = sizeof(HFSPlusExtentRecord);
5146 }
5147 } else {
5148 /* The overflow extent entry from previous record will be
5149 * the first entry in this extent record. If the last
5150 * extent entry in this record is valid, it will be shifted
5151 * into the following extent record as its first entry. So
5152 * save the last entry before shifting entries in current
5153 * record.
5154 */
5155 last_extent = extents[kHFSPlusExtentDensity-1];
5156
5157 /* Shift all entries by one index towards the end */
5158 for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5159 extents[i+1] = extents[i];
5160 }
5161
5162 /* Overflow extent entry saved from previous record
5163 * is now the first entry in the current record.
5164 */
5165 extents[0] = shift_extent;
5166
5167 if (hfs_resize_debug) {
5168 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5169 }
5170
5171 /* The last entry from current record will be the
5172 * overflow entry which will be the first entry for
5173 * the following extent record.
5174 */
5175 shift_extent = last_extent;
5176
5177 /* Since the key->startBlock is being changed for this record,
5178 * it should be deleted and inserted with the new key.
5179 */
5180 error = BTDeleteRecord(extent_info->fcb, &iterator);
5181 if (error) {
5182 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5183 goto out;
5184 }
5185 if (hfs_resize_debug) {
5186 printf ("hfs_split_extent: Deleted record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5187 }
5188 }
5189
5190 /* Insert the newly created or modified extent record */
5191 bzero(&iterator.hint, sizeof(iterator.hint));
5192 if (is_xattr) {
5193 xattr_key->startBlock = write_recStartBlock;
5194 } else {
5195 extents_key->startBlock = write_recStartBlock;
5196 }
5197 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5198 if (error) {
5199 printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5200 goto out;
5201 }
5202 if (hfs_resize_debug) {
5203 printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5204 }
5205 }
5206 BTFlushPath(extent_info->fcb);
5207 out:
5208 if (extents_rec) {
5209 FREE (extents_rec, M_TEMP);
5210 }
5211 if (xattr_rec) {
5212 FREE (xattr_rec, M_TEMP);
5213 }
5214 return error;
5215 }
5216
5217
5218 /*
5219 * Relocate an extent if it lies beyond the expected end of volume.
5220 *
5221 * This function is called for every extent of the file being relocated.
5222 * It allocates space for relocation, copies the data, deallocates
5223 * the old extent, and update corresponding on-disk extent. If the function
5224 * does not find contiguous space to relocate an extent, it splits the
5225 * extent in smaller size to be able to relocate it out of the area of
5226 * disk being reclaimed. As an optimization, if an extent lies partially
5227 * in the area of the disk being reclaimed, it is split so that we only
5228 * have to relocate the area that was overlapping with the area of disk
5229 * being reclaimed.
5230 *
5231 * Note that every extent is relocated in its own transaction so that
5232 * they do not overwhelm the journal. This function handles the extent
5233 * record that exists in the catalog record, extent record from overflow
5234 * extents btree, and extents for large EAs.
5235 *
5236 * Inputs:
5237 * extent_info - This is the structure that contains state about
5238 * the current file, extent, and extent record that
5239 * is being relocated. This structure is shared
5240 * among code that traverses through all the extents
5241 * of the file, code that relocates extents, and
5242 * code that splits the extent.
5243 */
5244 static int
5245 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5246 {
5247 int error = 0;
5248 int index;
5249 struct cnode *cp;
5250 u_int32_t oldStartBlock;
5251 u_int32_t oldBlockCount;
5252 u_int32_t newStartBlock;
5253 u_int32_t newBlockCount;
5254 u_int32_t alloc_flags;
5255 int blocks_allocated = false;
5256
5257 index = extent_info->extent_index;
5258 cp = VTOC(extent_info->vp);
5259
5260 oldStartBlock = extent_info->extents[index].startBlock;
5261 oldBlockCount = extent_info->extents[index].blockCount;
5262
5263 if (0 && hfs_resize_debug) {
5264 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5265 }
5266
5267 /* Check if the current extent lies completely within allocLimit */
5268 if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5269 extent_info->cur_blockCount += oldBlockCount;
5270 return error;
5271 }
5272
5273 /* Every extent should be relocated in its own transaction
5274 * to make sure that we don't overflow the journal buffer.
5275 */
5276 error = hfs_start_transaction(hfsmp);
5277 if (error) {
5278 return error;
5279 }
5280 extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5281
5282 /* Check if the extent lies partially in the area to reclaim,
5283 * i.e. it starts before allocLimit and ends beyond allocLimit.
5284 * We have already skipped extents that lie completely within
5285 * allocLimit in the check above, so we only check for the
5286 * startBlock. If it lies partially, split it so that we
5287 * only relocate part of the extent.
5288 */
5289 if (oldStartBlock < allocLimit) {
5290 newBlockCount = allocLimit - oldStartBlock;
5291 error = hfs_split_extent(extent_info, newBlockCount);
5292 if (error == 0) {
5293 /* After successful split, the current extent does not
5294 * need relocation, so just return back.
5295 */
5296 goto out;
5297 }
5298 /* Ignore error and try relocating the entire extent instead */
5299 }
5300
5301 alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5302 if (extent_info->is_sysfile) {
5303 alloc_flags |= HFS_ALLOC_METAZONE;
5304 }
5305
5306 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5307 &newStartBlock, &newBlockCount);
5308 if ((extent_info->is_sysfile == false) &&
5309 ((error == dskFulErr) || (error == ENOSPC))) {
5310 /* For non-system files, try reallocating space in metadata zone */
5311 alloc_flags |= HFS_ALLOC_METAZONE;
5312 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5313 alloc_flags, &newStartBlock, &newBlockCount);
5314 }
5315 if ((error == dskFulErr) || (error == ENOSPC)) {
5316 /* We did not find desired contiguous space for this extent.
5317 * So try to allocate the maximum contiguous space available.
5318 */
5319 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5320
5321 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5322 alloc_flags, &newStartBlock, &newBlockCount);
5323 if (error) {
5324 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5325 goto out;
5326 }
5327 blocks_allocated = true;
5328
5329 error = hfs_split_extent(extent_info, newBlockCount);
5330 if (error) {
5331 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5332 goto out;
5333 }
5334 oldBlockCount = newBlockCount;
5335 }
5336 if (error) {
5337 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5338 goto out;
5339 }
5340 blocks_allocated = true;
5341
5342 /* Copy data from old location to new location */
5343 error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5344 newStartBlock, newBlockCount, context);
5345 if (error) {
5346 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5347 goto out;
5348 }
5349
5350 /* Update the extent record with the new start block information */
5351 extent_info->extents[index].startBlock = newStartBlock;
5352
5353 /* Sync the content back to the disk */
5354 if (extent_info->catalog_fp) {
5355 /* Update the extents in catalog record */
5356 if (extent_info->is_dirlink) {
5357 error = cat_update_dirlink(hfsmp, extent_info->forkType,
5358 extent_info->dirlink_desc, extent_info->dirlink_attr,
5359 &(extent_info->dirlink_fork->ff_data));
5360 } else {
5361 cp->c_flag |= C_MODIFIED;
5362 /* If this is a system file, sync volume headers on disk */
5363 if (extent_info->is_sysfile) {
5364 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5365 }
5366 }
5367 } else {
5368 /* Replace record for extents overflow or extents-based xattrs */
5369 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5370 &(extent_info->btdata), extent_info->recordlen);
5371 }
5372 if (error) {
5373 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5374 goto out;
5375 }
5376
5377 /* Deallocate the old extent */
5378 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5379 if (error) {
5380 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5381 goto out;
5382 }
5383 extent_info->blocks_relocated += newBlockCount;
5384
5385 if (hfs_resize_debug) {
5386 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5387 }
5388
5389 out:
5390 if (error != 0) {
5391 if (blocks_allocated == true) {
5392 BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5393 }
5394 } else {
5395 /* On success, increment the total allocation blocks processed */
5396 extent_info->cur_blockCount += newBlockCount;
5397 }
5398
5399 hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5400
5401 /* For a non-system file, if an extent entry from catalog record
5402 * was modified, sync the in-memory changes to the catalog record
5403 * on disk before ending the transaction.
5404 */
5405 if ((error == 0) &&
5406 (extent_info->overflow_count < kHFSPlusExtentDensity) &&
5407 (extent_info->is_sysfile == false)) {
5408 (void) hfs_update(extent_info->vp, MNT_WAIT);
5409 }
5410
5411 hfs_end_transaction(hfsmp);
5412
5413 return error;
5414 }
5415
5416 /* Report intermediate progress during volume resize */
5417 static void
5418 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5419 {
5420 u_int32_t cur_progress;
5421
5422 hfs_resize_progress(hfsmp, &cur_progress);
5423 if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5424 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5425 hfsmp->hfs_resize_progress = cur_progress;
5426 }
5427 return;
5428 }
5429
5430 /*
5431 * Reclaim space at the end of a volume for given file and forktype.
5432 *
5433 * This routine attempts to move any extent which contains allocation blocks
5434 * at or after "allocLimit." A separate transaction is used for every extent
5435 * that needs to be moved. If there is not contiguous space available for
5436 * moving an extent, it can be split into smaller extents. The contents of
5437 * any moved extents are read and written via the volume's device vnode --
5438 * NOT via "vp." During the move, moved blocks which are part of a transaction
5439 * have their physical block numbers invalidated so they will eventually be
5440 * written to their new locations.
5441 *
5442 * This function is also called for directory hard links. Directory hard links
5443 * are regular files with no data fork and resource fork that contains alias
5444 * information for backward compatibility with pre-Leopard systems. However
5445 * non-Mac OS X implementation can add/modify data fork or resource fork
5446 * information to directory hard links, so we check, and if required, relocate
5447 * both data fork and resource fork.
5448 *
5449 * Inputs:
5450 * hfsmp The volume being resized.
5451 * vp The vnode for the system file.
5452 * fileID ID of the catalog record that needs to be relocated
5453 * forktype The type of fork that needs relocated,
5454 * kHFSResourceForkType for resource fork,
5455 * kHFSDataForkType for data fork
5456 * allocLimit Allocation limit for the new volume size,
5457 * do not use this block or beyond. All extents
5458 * that use this block or any blocks beyond this limit
5459 * will be relocated.
5460 *
5461 * Side Effects:
5462 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5463 * blocks that were relocated.
5464 */
5465 static int
5466 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5467 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5468 {
5469 int error = 0;
5470 struct hfs_reclaim_extent_info *extent_info;
5471 int i;
5472 int lockflags = 0;
5473 struct cnode *cp;
5474 struct filefork *fp;
5475 int took_truncate_lock = false;
5476 int release_desc = false;
5477 HFSPlusExtentKey *key;
5478
5479 /* If there is no vnode for this file, then there's nothing to do. */
5480 if (vp == NULL) {
5481 return 0;
5482 }
5483
5484 cp = VTOC(vp);
5485
5486 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5487 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5488 if (extent_info == NULL) {
5489 return ENOMEM;
5490 }
5491 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5492 extent_info->vp = vp;
5493 extent_info->fileID = fileID;
5494 extent_info->forkType = forktype;
5495 extent_info->is_sysfile = vnode_issystem(vp);
5496 if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5497 extent_info->is_dirlink = true;
5498 }
5499 /* We always need allocation bitmap and extent btree lock */
5500 lockflags = SFL_BITMAP | SFL_EXTENTS;
5501 if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5502 lockflags |= SFL_CATALOG;
5503 } else if (fileID == kHFSAttributesFileID) {
5504 lockflags |= SFL_ATTRIBUTE;
5505 } else if (fileID == kHFSStartupFileID) {
5506 lockflags |= SFL_STARTUP;
5507 }
5508 extent_info->lockflags = lockflags;
5509 extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5510
5511 /* Flush data associated with current file on disk.
5512 *
5513 * If the current vnode is directory hard link, no flushing of
5514 * journal or vnode is required. The current kernel does not
5515 * modify data/resource fork of directory hard links, so nothing
5516 * will be in the cache. If a directory hard link is newly created,
5517 * the resource fork data is written directly using devvp and
5518 * the code that actually relocates data (hfs_copy_extent()) also
5519 * uses devvp for its I/O --- so they will see a consistent copy.
5520 */
5521 if (extent_info->is_sysfile) {
5522 /* If the current vnode is system vnode, flush journal
5523 * to make sure that all data is written to the disk.
5524 */
5525 error = hfs_journal_flush(hfsmp, TRUE);
5526 if (error) {
5527 printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
5528 goto out;
5529 }
5530 } else if (extent_info->is_dirlink == false) {
5531 /* Flush all blocks associated with this regular file vnode.
5532 * Normally there should not be buffer cache blocks for regular
5533 * files, but for objects like symlinks, we can have buffer cache
5534 * blocks associated with the vnode. Therefore we call
5535 * buf_flushdirtyblks() also.
5536 */
5537 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
5538
5539 hfs_unlock(cp);
5540 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
5541 took_truncate_lock = true;
5542 (void) cluster_push(vp, 0);
5543 error = hfs_lock(cp, HFS_FORCE_LOCK);
5544 if (error) {
5545 goto out;
5546 }
5547
5548 /* If the file no longer exists, nothing left to do */
5549 if (cp->c_flag & C_NOEXISTS) {
5550 error = 0;
5551 goto out;
5552 }
5553
5554 /* Wait for any in-progress writes to this vnode to complete, so that we'll
5555 * be copying consistent bits. (Otherwise, it's possible that an async
5556 * write will complete to the old extent after we read from it. That
5557 * could lead to corruption.)
5558 */
5559 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
5560 if (error) {
5561 goto out;
5562 }
5563 }
5564
5565 if (hfs_resize_debug) {
5566 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
5567 }
5568
5569 if (extent_info->is_dirlink) {
5570 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
5571 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
5572 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
5573 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
5574 MALLOC(extent_info->dirlink_fork, struct filefork *,
5575 sizeof(struct filefork), M_TEMP, M_WAITOK);
5576 if ((extent_info->dirlink_desc == NULL) ||
5577 (extent_info->dirlink_attr == NULL) ||
5578 (extent_info->dirlink_fork == NULL)) {
5579 error = ENOMEM;
5580 goto out;
5581 }
5582
5583 /* Lookup catalog record for directory hard link and
5584 * create a fake filefork for the value looked up from
5585 * the disk.
5586 */
5587 fp = extent_info->dirlink_fork;
5588 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
5589 extent_info->dirlink_fork->ff_cp = cp;
5590 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5591 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
5592 extent_info->dirlink_desc, extent_info->dirlink_attr,
5593 &(extent_info->dirlink_fork->ff_data));
5594 hfs_systemfile_unlock(hfsmp, lockflags);
5595 if (error) {
5596 printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
5597 goto out;
5598 }
5599 release_desc = true;
5600 } else {
5601 fp = VTOF(vp);
5602 }
5603
5604 extent_info->catalog_fp = fp;
5605 extent_info->recStartBlock = 0;
5606 extent_info->extents = extent_info->catalog_fp->ff_extents;
5607 /* Relocate extents from the catalog record */
5608 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
5609 if (fp->ff_extents[i].blockCount == 0) {
5610 break;
5611 }
5612 extent_info->extent_index = i;
5613 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5614 if (error) {
5615 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
5616 goto out;
5617 }
5618 }
5619
5620 /* If the number of allocation blocks processed for reclaiming
5621 * are less than total number of blocks for the file, continuing
5622 * working on overflow extents record.
5623 */
5624 if (fp->ff_blocks <= extent_info->cur_blockCount) {
5625 if (0 && hfs_resize_debug) {
5626 printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5627 }
5628 goto out;
5629 }
5630
5631 if (hfs_resize_debug) {
5632 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
5633 }
5634
5635 MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
5636 if (extent_info->iterator == NULL) {
5637 error = ENOMEM;
5638 goto out;
5639 }
5640 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
5641 key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
5642 key->keyLength = kHFSPlusExtentKeyMaximumLength;
5643 key->forkType = forktype;
5644 key->fileID = fileID;
5645 key->startBlock = extent_info->cur_blockCount;
5646
5647 extent_info->btdata.bufferAddress = extent_info->record.overflow;
5648 extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
5649 extent_info->btdata.itemCount = 1;
5650
5651 extent_info->catalog_fp = NULL;
5652
5653 /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
5654 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5655 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
5656 &(extent_info->btdata), &(extent_info->recordlen),
5657 extent_info->iterator);
5658 hfs_systemfile_unlock(hfsmp, lockflags);
5659 while (error == 0) {
5660 extent_info->overflow_count++;
5661 extent_info->recStartBlock = key->startBlock;
5662 extent_info->extents = extent_info->record.overflow;
5663 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5664 if (extent_info->record.overflow[i].blockCount == 0) {
5665 goto out;
5666 }
5667 extent_info->extent_index = i;
5668 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
5669 if (error) {
5670 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
5671 goto out;
5672 }
5673 }
5674
5675 /* Look for more overflow records */
5676 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5677 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
5678 extent_info->iterator, &(extent_info->btdata),
5679 &(extent_info->recordlen));
5680 hfs_systemfile_unlock(hfsmp, lockflags);
5681 if (error) {
5682 break;
5683 }
5684 /* Stop when we encounter a different file or fork. */
5685 if ((key->fileID != fileID) || (key->forkType != forktype)) {
5686 break;
5687 }
5688 }
5689 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
5690 error = 0;
5691 }
5692
5693 out:
5694 /* If any blocks were relocated, account them and report progress */
5695 if (extent_info->blocks_relocated) {
5696 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
5697 hfs_truncatefs_progress(hfsmp);
5698 if (fileID < kHFSFirstUserCatalogNodeID) {
5699 printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
5700 extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
5701 }
5702 }
5703 if (extent_info->iterator) {
5704 FREE(extent_info->iterator, M_TEMP);
5705 }
5706 if (release_desc == true) {
5707 cat_releasedesc(extent_info->dirlink_desc);
5708 }
5709 if (extent_info->dirlink_desc) {
5710 FREE(extent_info->dirlink_desc, M_TEMP);
5711 }
5712 if (extent_info->dirlink_attr) {
5713 FREE(extent_info->dirlink_attr, M_TEMP);
5714 }
5715 if (extent_info->dirlink_fork) {
5716 FREE(extent_info->dirlink_fork, M_TEMP);
5717 }
5718 if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
5719 (void) hfs_update(vp, MNT_WAIT);
5720 }
5721 if (took_truncate_lock) {
5722 hfs_unlock_truncate(cp, 0);
5723 }
5724 if (extent_info) {
5725 FREE(extent_info, M_TEMP);
5726 }
5727 if (hfs_resize_debug) {
5728 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
5729 }
5730
5731 return error;
5732 }
5733
5734
5735 /*
5736 * This journal_relocate callback updates the journal info block to point
5737 * at the new journal location. This write must NOT be done using the
5738 * transaction. We must write the block immediately. We must also force
5739 * it to get to the media so that the new journal location will be seen by
5740 * the replay code before we can safely let journaled blocks be written
5741 * to their normal locations.
5742 *
5743 * The tests for journal_uses_fua below are mildly hacky. Since the journal
5744 * and the file system are both on the same device, I'm leveraging what
5745 * the journal has decided about FUA.
5746 */
5747 struct hfs_journal_relocate_args {
5748 struct hfsmount *hfsmp;
5749 vfs_context_t context;
5750 u_int32_t newStartBlock;
5751 };
5752
5753 static errno_t
5754 hfs_journal_relocate_callback(void *_args)
5755 {
5756 int error;
5757 struct hfs_journal_relocate_args *args = _args;
5758 struct hfsmount *hfsmp = args->hfsmp;
5759 buf_t bp;
5760 JournalInfoBlock *jibp;
5761
5762 error = buf_meta_bread(hfsmp->hfs_devvp,
5763 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5764 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
5765 if (error) {
5766 printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
5767 if (bp) {
5768 buf_brelse(bp);
5769 }
5770 return error;
5771 }
5772 jibp = (JournalInfoBlock*) buf_dataptr(bp);
5773 jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
5774 jibp->size = SWAP_BE64(hfsmp->jnl_size);
5775 if (journal_uses_fua(hfsmp->jnl))
5776 buf_markfua(bp);
5777 error = buf_bwrite(bp);
5778 if (error) {
5779 printf("hfs_reclaim_journal_file: failed to write JIB (%d)\n", error);
5780 return error;
5781 }
5782 if (!journal_uses_fua(hfsmp->jnl)) {
5783 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
5784 if (error) {
5785 printf("hfs_reclaim_journal_file: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5786 error = 0; /* Don't fail the operation. */
5787 }
5788 }
5789
5790 return error;
5791 }
5792
5793
5794 static int
5795 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5796 {
5797 int error;
5798 int journal_err;
5799 int lockflags;
5800 u_int32_t oldStartBlock;
5801 u_int32_t newStartBlock;
5802 u_int32_t oldBlockCount;
5803 u_int32_t newBlockCount;
5804 struct cat_desc journal_desc;
5805 struct cat_attr journal_attr;
5806 struct cat_fork journal_fork;
5807 struct hfs_journal_relocate_args callback_args;
5808
5809 if (hfsmp->jnl_start + (hfsmp->jnl_size / hfsmp->blockSize) <= allocLimit) {
5810 /* The journal does not require relocation */
5811 return 0;
5812 }
5813
5814 error = hfs_start_transaction(hfsmp);
5815 if (error) {
5816 printf("hfs_reclaim_journal_file: hfs_start_transaction returned %d\n", error);
5817 return error;
5818 }
5819 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5820
5821 oldBlockCount = hfsmp->jnl_size / hfsmp->blockSize;
5822
5823 /* TODO: Allow the journal to change size based on the new volume size. */
5824 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5825 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5826 &newStartBlock, &newBlockCount);
5827 if (error) {
5828 printf("hfs_reclaim_journal_file: BlockAllocate returned %d\n", error);
5829 goto fail;
5830 }
5831 if (newBlockCount != oldBlockCount) {
5832 printf("hfs_reclaim_journal_file: newBlockCount != oldBlockCount (%u, %u)\n", newBlockCount, oldBlockCount);
5833 goto free_fail;
5834 }
5835
5836 error = BlockDeallocate(hfsmp, hfsmp->jnl_start, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5837 if (error) {
5838 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5839 goto free_fail;
5840 }
5841
5842 /* Update the catalog record for .journal */
5843 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, &journal_desc, &journal_attr, &journal_fork);
5844 if (error) {
5845 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5846 goto free_fail;
5847 }
5848 oldStartBlock = journal_fork.cf_extents[0].startBlock;
5849 journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
5850 journal_fork.cf_extents[0].startBlock = newStartBlock;
5851 journal_fork.cf_extents[0].blockCount = newBlockCount;
5852 journal_fork.cf_blocks = newBlockCount;
5853 error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
5854 cat_releasedesc(&journal_desc); /* all done with cat descriptor */
5855 if (error) {
5856 printf("hfs_reclaim_journal_file: cat_update returned %d\n", error);
5857 goto free_fail;
5858 }
5859 callback_args.hfsmp = hfsmp;
5860 callback_args.context = context;
5861 callback_args.newStartBlock = newStartBlock;
5862
5863 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
5864 (off_t)newBlockCount*hfsmp->blockSize, 0,
5865 hfs_journal_relocate_callback, &callback_args);
5866 if (error) {
5867 /* NOTE: journal_relocate will mark the journal invalid. */
5868 printf("hfs_reclaim_journal_file: journal_relocate returned %d\n", error);
5869 goto fail;
5870 }
5871 hfsmp->jnl_start = newStartBlock;
5872 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
5873
5874 hfs_systemfile_unlock(hfsmp, lockflags);
5875 error = hfs_end_transaction(hfsmp);
5876 if (error) {
5877 printf("hfs_reclaim_journal_file: hfs_end_transaction returned %d\n", error);
5878 }
5879
5880 /* Account for the blocks relocated and print progress */
5881 hfsmp->hfs_resize_blocksmoved += oldBlockCount;
5882 hfs_truncatefs_progress(hfsmp);
5883 if (!error) {
5884 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
5885 oldBlockCount, hfsmp->vcbVN);
5886 if (hfs_resize_debug) {
5887 printf ("hfs_reclaim_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5888 }
5889 }
5890 return error;
5891
5892 free_fail:
5893 journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5894 if (journal_err) {
5895 printf("hfs_reclaim_journal_file: BlockDeallocate returned %d\n", error);
5896 hfs_mark_volume_inconsistent(hfsmp);
5897 }
5898 fail:
5899 hfs_systemfile_unlock(hfsmp, lockflags);
5900 (void) hfs_end_transaction(hfsmp);
5901 if (hfs_resize_debug) {
5902 printf ("hfs_reclaim_journal_file: Error relocating journal file (error=%d)\n", error);
5903 }
5904 return error;
5905 }
5906
5907
5908 /*
5909 * Move the journal info block to a new location. We have to make sure the
5910 * new copy of the journal info block gets to the media first, then change
5911 * the field in the volume header and the catalog record.
5912 */
5913 static int
5914 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
5915 {
5916 int error;
5917 int journal_err;
5918 int lockflags;
5919 u_int32_t oldBlock;
5920 u_int32_t newBlock;
5921 u_int32_t blockCount;
5922 struct cat_desc jib_desc;
5923 struct cat_attr jib_attr;
5924 struct cat_fork jib_fork;
5925 buf_t old_bp, new_bp;
5926
5927 if (hfsmp->vcbJinfoBlock <= allocLimit) {
5928 /* The journal info block does not require relocation */
5929 return 0;
5930 }
5931
5932 error = hfs_start_transaction(hfsmp);
5933 if (error) {
5934 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
5935 return error;
5936 }
5937 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
5938
5939 error = BlockAllocate(hfsmp, 1, 1, 1,
5940 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS,
5941 &newBlock, &blockCount);
5942 if (error) {
5943 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
5944 goto fail;
5945 }
5946 if (blockCount != 1) {
5947 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
5948 goto free_fail;
5949 }
5950 error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
5951 if (error) {
5952 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
5953 goto free_fail;
5954 }
5955
5956 /* Copy the old journal info block content to the new location */
5957 error = buf_meta_bread(hfsmp->hfs_devvp,
5958 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5959 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
5960 if (error) {
5961 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
5962 if (old_bp) {
5963 buf_brelse(old_bp);
5964 }
5965 goto free_fail;
5966 }
5967 new_bp = buf_getblk(hfsmp->hfs_devvp,
5968 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
5969 hfsmp->blockSize, 0, 0, BLK_META);
5970 bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
5971 buf_brelse(old_bp);
5972 if (journal_uses_fua(hfsmp->jnl))
5973 buf_markfua(new_bp);
5974 error = buf_bwrite(new_bp);
5975 if (error) {
5976 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
5977 goto free_fail;
5978 }
5979 if (!journal_uses_fua(hfsmp->jnl)) {
5980 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5981 if (error) {
5982 printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
5983 /* Don't fail the operation. */
5984 }
5985 }
5986
5987 /* Update the catalog record for .journal_info_block */
5988 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, &jib_desc, &jib_attr, &jib_fork);
5989 if (error) {
5990 printf("hfs_reclaim_journal_file: cat_idlookup returned %d\n", error);
5991 goto fail;
5992 }
5993 oldBlock = jib_fork.cf_extents[0].startBlock;
5994 jib_fork.cf_size = hfsmp->blockSize;
5995 jib_fork.cf_extents[0].startBlock = newBlock;
5996 jib_fork.cf_extents[0].blockCount = 1;
5997 jib_fork.cf_blocks = 1;
5998 error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
5999 cat_releasedesc(&jib_desc); /* all done with cat descriptor */
6000 if (error) {
6001 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
6002 goto fail;
6003 }
6004
6005 /* Update the pointer to the journal info block in the volume header. */
6006 hfsmp->vcbJinfoBlock = newBlock;
6007 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6008 if (error) {
6009 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6010 goto fail;
6011 }
6012 hfs_systemfile_unlock(hfsmp, lockflags);
6013 error = hfs_end_transaction(hfsmp);
6014 if (error) {
6015 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6016 }
6017 error = hfs_journal_flush(hfsmp, FALSE);
6018 if (error) {
6019 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6020 }
6021
6022 /* Account for the block relocated and print progress */
6023 hfsmp->hfs_resize_blocksmoved += 1;
6024 hfs_truncatefs_progress(hfsmp);
6025 if (!error) {
6026 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6027 hfsmp->vcbVN);
6028 if (hfs_resize_debug) {
6029 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6030 }
6031 }
6032 return error;
6033
6034 free_fail:
6035 journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6036 if (journal_err) {
6037 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6038 hfs_mark_volume_inconsistent(hfsmp);
6039 }
6040
6041 fail:
6042 hfs_systemfile_unlock(hfsmp, lockflags);
6043 (void) hfs_end_transaction(hfsmp);
6044 if (hfs_resize_debug) {
6045 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6046 }
6047 return error;
6048 }
6049
6050
6051 /*
6052 * This function traverses through all extended attribute records for a given
6053 * fileID, and calls function that reclaims data blocks that exist in the
6054 * area of the disk being reclaimed which in turn is responsible for allocating
6055 * new space, copying extent data, deallocating new space, and if required,
6056 * splitting the extent.
6057 *
6058 * Note: The caller has already acquired the cnode lock on the file. Therefore
6059 * we are assured that no other thread would be creating/deleting/modifying
6060 * extended attributes for this file.
6061 *
6062 * Side Effects:
6063 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6064 * blocks that were relocated.
6065 *
6066 * Returns:
6067 * 0 on success, non-zero on failure.
6068 */
6069 static int
6070 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6071 {
6072 int error = 0;
6073 struct hfs_reclaim_extent_info *extent_info;
6074 int i;
6075 HFSPlusAttrKey *key;
6076 int *lockflags;
6077
6078 if (hfs_resize_debug) {
6079 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6080 }
6081
6082 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6083 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6084 if (extent_info == NULL) {
6085 return ENOMEM;
6086 }
6087 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6088 extent_info->vp = vp;
6089 extent_info->fileID = fileID;
6090 extent_info->is_xattr = true;
6091 extent_info->is_sysfile = vnode_issystem(vp);
6092 extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6093 lockflags = &(extent_info->lockflags);
6094 *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6095
6096 /* Initialize iterator from the extent_info structure */
6097 MALLOC(extent_info->iterator, struct BTreeIterator *,
6098 sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6099 if (extent_info->iterator == NULL) {
6100 error = ENOMEM;
6101 goto out;
6102 }
6103 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6104
6105 /* Build attribute key */
6106 key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6107 error = hfs_buildattrkey(fileID, NULL, key);
6108 if (error) {
6109 goto out;
6110 }
6111
6112 /* Initialize btdata from extent_info structure. Note that the
6113 * buffer pointer actually points to the xattr record from the
6114 * extent_info structure itself.
6115 */
6116 extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6117 extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6118 extent_info->btdata.itemCount = 1;
6119
6120 /*
6121 * Sync all extent-based attribute data to the disk.
6122 *
6123 * All extent-based attribute data I/O is performed via cluster
6124 * I/O using a virtual file that spans across entire file system
6125 * space.
6126 */
6127 hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK);
6128 (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6129 error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6130 hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), 0);
6131 if (error) {
6132 goto out;
6133 }
6134
6135 /* Search for extended attribute for current file. This
6136 * will place the iterator before the first matching record.
6137 */
6138 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6139 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6140 &(extent_info->btdata), &(extent_info->recordlen),
6141 extent_info->iterator);
6142 hfs_systemfile_unlock(hfsmp, *lockflags);
6143 if (error) {
6144 if (error != btNotFound) {
6145 goto out;
6146 }
6147 /* btNotFound is expected here, so just mask it */
6148 error = 0;
6149 }
6150
6151 while (1) {
6152 /* Iterate to the next record */
6153 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6154 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6155 extent_info->iterator, &(extent_info->btdata),
6156 &(extent_info->recordlen));
6157 hfs_systemfile_unlock(hfsmp, *lockflags);
6158
6159 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6160 if (error || key->fileID != fileID) {
6161 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6162 error = 0;
6163 }
6164 break;
6165 }
6166
6167 /* We only care about extent-based EAs */
6168 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6169 (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6170 continue;
6171 }
6172
6173 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6174 extent_info->overflow_count = 0;
6175 extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6176 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6177 extent_info->overflow_count++;
6178 extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6179 }
6180
6181 extent_info->recStartBlock = key->startBlock;
6182 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6183 if (extent_info->extents[i].blockCount == 0) {
6184 break;
6185 }
6186 extent_info->extent_index = i;
6187 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6188 if (error) {
6189 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6190 goto out;
6191 }
6192 }
6193 }
6194
6195 out:
6196 /* If any blocks were relocated, account them and report progress */
6197 if (extent_info->blocks_relocated) {
6198 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6199 hfs_truncatefs_progress(hfsmp);
6200 }
6201 if (extent_info->iterator) {
6202 FREE(extent_info->iterator, M_TEMP);
6203 }
6204 if (extent_info) {
6205 FREE(extent_info, M_TEMP);
6206 }
6207 if (hfs_resize_debug) {
6208 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6209 }
6210 return error;
6211 }
6212
6213 /*
6214 * Reclaim any extent-based extended attributes allocation blocks from
6215 * the area of the disk that is being truncated.
6216 *
6217 * The function traverses the attribute btree to find out the fileIDs
6218 * of the extended attributes that need to be relocated. For every
6219 * file whose large EA requires relocation, it looks up the cnode and
6220 * calls hfs_reclaim_xattr() to do all the work for allocating
6221 * new space, copying data, deallocating old space, and if required,
6222 * splitting the extents.
6223 *
6224 * Inputs:
6225 * allocLimit - starting block of the area being reclaimed
6226 *
6227 * Returns:
6228 * returns 0 on success, non-zero on failure.
6229 */
6230 static int
6231 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6232 {
6233 int error = 0;
6234 FCB *fcb;
6235 struct BTreeIterator *iterator = NULL;
6236 struct FSBufferDescriptor btdata;
6237 HFSPlusAttrKey *key;
6238 HFSPlusAttrRecord rec;
6239 int lockflags = 0;
6240 cnid_t prev_fileid = 0;
6241 struct vnode *vp;
6242 int need_relocate;
6243 int btree_operation;
6244 u_int32_t files_moved = 0;
6245 u_int32_t prev_blocksmoved;
6246 int i;
6247
6248 fcb = VTOF(hfsmp->hfs_attribute_vp);
6249 /* Store the value to print total blocks moved by this function in end */
6250 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6251
6252 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6253 return ENOMEM;
6254 }
6255 bzero(iterator, sizeof(*iterator));
6256 key = (HFSPlusAttrKey *)&iterator->key;
6257 btdata.bufferAddress = &rec;
6258 btdata.itemSize = sizeof(rec);
6259 btdata.itemCount = 1;
6260
6261 need_relocate = false;
6262 btree_operation = kBTreeFirstRecord;
6263 /* Traverse the attribute btree to find extent-based EAs to reclaim */
6264 while (1) {
6265 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6266 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6267 hfs_systemfile_unlock(hfsmp, lockflags);
6268 if (error) {
6269 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6270 error = 0;
6271 }
6272 break;
6273 }
6274 btree_operation = kBTreeNextRecord;
6275
6276 /* If the extents of current fileID were already relocated, skip it */
6277 if (prev_fileid == key->fileID) {
6278 continue;
6279 }
6280
6281 /* Check if any of the extents in the current record need to be relocated */
6282 need_relocate = false;
6283 switch(rec.recordType) {
6284 case kHFSPlusAttrForkData:
6285 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6286 if (rec.forkData.theFork.extents[i].blockCount == 0) {
6287 break;
6288 }
6289 if ((rec.forkData.theFork.extents[i].startBlock +
6290 rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6291 need_relocate = true;
6292 break;
6293 }
6294 }
6295 break;
6296
6297 case kHFSPlusAttrExtents:
6298 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6299 if (rec.overflowExtents.extents[i].blockCount == 0) {
6300 break;
6301 }
6302 if ((rec.overflowExtents.extents[i].startBlock +
6303 rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6304 need_relocate = true;
6305 break;
6306 }
6307 }
6308 break;
6309 };
6310
6311 /* Continue iterating to next attribute record */
6312 if (need_relocate == false) {
6313 continue;
6314 }
6315
6316 /* Look up the vnode for corresponding file. The cnode
6317 * will be locked which will ensure that no one modifies
6318 * the xattrs when we are relocating them.
6319 *
6320 * We want to allow open-unlinked files to be moved,
6321 * so provide allow_deleted == 1 for hfs_vget().
6322 */
6323 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6324 continue;
6325 }
6326
6327 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6328 hfs_unlock(VTOC(vp));
6329 vnode_put(vp);
6330 if (error) {
6331 printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6332 break;
6333 }
6334 prev_fileid = key->fileID;
6335 files_moved++;
6336 }
6337
6338 if (files_moved) {
6339 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6340 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6341 files_moved, hfsmp->vcbVN);
6342 }
6343
6344 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6345 return error;
6346 }
6347
6348 /*
6349 * Reclaim blocks from regular files.
6350 *
6351 * This function iterates over all the record in catalog btree looking
6352 * for files with extents that overlap into the space we're trying to
6353 * free up. If a file extent requires relocation, it looks up the vnode
6354 * and calls function to relocate the data.
6355 *
6356 * Returns:
6357 * Zero on success, non-zero on failure.
6358 */
6359 static int
6360 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6361 {
6362 int error;
6363 FCB *fcb;
6364 struct BTreeIterator *iterator = NULL;
6365 struct FSBufferDescriptor btdata;
6366 int btree_operation;
6367 int lockflags;
6368 struct HFSPlusCatalogFile filerec;
6369 struct vnode *vp;
6370 struct vnode *rvp;
6371 struct filefork *datafork;
6372 u_int32_t files_moved = 0;
6373 u_int32_t prev_blocksmoved;
6374
6375 fcb = VTOF(hfsmp->hfs_catalog_vp);
6376 /* Store the value to print total blocks moved by this function at the end */
6377 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6378
6379 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6380 return ENOMEM;
6381 }
6382 bzero(iterator, sizeof(*iterator));
6383
6384 btdata.bufferAddress = &filerec;
6385 btdata.itemSize = sizeof(filerec);
6386 btdata.itemCount = 1;
6387
6388 btree_operation = kBTreeFirstRecord;
6389 while (1) {
6390 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
6391 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6392 hfs_systemfile_unlock(hfsmp, lockflags);
6393 if (error) {
6394 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6395 error = 0;
6396 }
6397 break;
6398 }
6399 btree_operation = kBTreeNextRecord;
6400
6401 if (filerec.recordType != kHFSPlusFileRecord) {
6402 continue;
6403 }
6404
6405 /* Check if any of the extents require relocation */
6406 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
6407 continue;
6408 }
6409
6410 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
6411 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
6412 continue;
6413 }
6414
6415 /* If data fork exists or item is a directory hard link, relocate blocks */
6416 datafork = VTOF(vp);
6417 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
6418 error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
6419 kHFSDataForkType, allocLimit, context);
6420 if (error) {
6421 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6422 hfs_unlock(VTOC(vp));
6423 vnode_put(vp);
6424 break;
6425 }
6426 }
6427
6428 /* If resource fork exists or item is a directory hard link, relocate blocks */
6429 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
6430 if (vnode_isdir(vp)) {
6431 /* Resource fork vnode lookup is invalid for directory hard link.
6432 * So we fake data fork vnode as resource fork vnode.
6433 */
6434 rvp = vp;
6435 } else {
6436 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
6437 if (error) {
6438 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
6439 hfs_unlock(VTOC(vp));
6440 vnode_put(vp);
6441 break;
6442 }
6443 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
6444 }
6445
6446 error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
6447 kHFSResourceForkType, allocLimit, context);
6448 if (error) {
6449 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
6450 hfs_unlock(VTOC(vp));
6451 vnode_put(vp);
6452 break;
6453 }
6454 }
6455
6456 /* The file forks were relocated successfully, now drop the
6457 * cnode lock and vnode reference, and continue iterating to
6458 * next catalog record.
6459 */
6460 hfs_unlock(VTOC(vp));
6461 vnode_put(vp);
6462 files_moved++;
6463 }
6464
6465 if (files_moved) {
6466 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
6467 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6468 files_moved, hfsmp->vcbVN);
6469 }
6470
6471 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6472 return error;
6473 }
6474
6475 /*
6476 * Reclaim space at the end of a file system.
6477 *
6478 * Inputs -
6479 * allocLimit - start block of the space being reclaimed
6480 * reclaimblks - number of allocation blocks to reclaim
6481 */
6482 static int
6483 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
6484 {
6485 int error = 0;
6486
6487 /*
6488 * Preflight the bitmap to find out total number of blocks that need
6489 * relocation.
6490 *
6491 * Note: Since allocLimit is set to the location of new alternate volume
6492 * header, the check below does not account for blocks allocated for old
6493 * alternate volume header.
6494 */
6495 error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
6496 if (error) {
6497 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
6498 return error;
6499 }
6500 if (hfs_resize_debug) {
6501 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
6502 }
6503
6504 /* Relocate extents of the Allocation file if they're in the way. */
6505 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
6506 kHFSDataForkType, allocLimit, context);
6507 if (error) {
6508 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
6509 return error;
6510 }
6511
6512 /* Relocate extents of the Extents B-tree if they're in the way. */
6513 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
6514 kHFSDataForkType, allocLimit, context);
6515 if (error) {
6516 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
6517 return error;
6518 }
6519
6520 /* Relocate extents of the Catalog B-tree if they're in the way. */
6521 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
6522 kHFSDataForkType, allocLimit, context);
6523 if (error) {
6524 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
6525 return error;
6526 }
6527
6528 /* Relocate extents of the Attributes B-tree if they're in the way. */
6529 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
6530 kHFSDataForkType, allocLimit, context);
6531 if (error) {
6532 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
6533 return error;
6534 }
6535
6536 /* Relocate extents of the Startup File if there is one and they're in the way. */
6537 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
6538 kHFSDataForkType, allocLimit, context);
6539 if (error) {
6540 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
6541 return error;
6542 }
6543
6544 /*
6545 * We need to make sure the alternate volume header gets flushed if we moved
6546 * any extents in the volume header. But we need to do that before
6547 * shrinking the size of the volume, or else the journal code will panic
6548 * with an invalid (too large) block number.
6549 *
6550 * Note that blks_moved will be set if ANY extent was moved, even
6551 * if it was just an overflow extent. In this case, the journal_flush isn't
6552 * strictly required, but shouldn't hurt.
6553 */
6554 if (hfsmp->hfs_resize_blocksmoved) {
6555 hfs_journal_flush(hfsmp, FALSE);
6556 }
6557
6558 /* Relocate journal file blocks if they're in the way. */
6559 error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
6560 if (error) {
6561 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
6562 return error;
6563 }
6564
6565 /* Relocate journal info block blocks if they're in the way. */
6566 error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
6567 if (error) {
6568 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
6569 return error;
6570 }
6571
6572 /* Reclaim extents from catalog file records */
6573 error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
6574 if (error) {
6575 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
6576 return error;
6577 }
6578
6579 /* Reclaim extents from extent-based extended attributes, if any */
6580 error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
6581 if (error) {
6582 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
6583 return error;
6584 }
6585
6586 return error;
6587 }
6588
6589
6590 /*
6591 * Check if there are any extents (including overflow extents) that overlap
6592 * into the disk space that is being reclaimed.
6593 *
6594 * Output -
6595 * true - One of the extents need to be relocated
6596 * false - No overflow extents need to be relocated, or there was an error
6597 */
6598 static int
6599 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
6600 {
6601 struct BTreeIterator * iterator = NULL;
6602 struct FSBufferDescriptor btdata;
6603 HFSPlusExtentRecord extrec;
6604 HFSPlusExtentKey *extkeyptr;
6605 FCB *fcb;
6606 int overlapped = false;
6607 int i, j;
6608 int error;
6609 int lockflags = 0;
6610 u_int32_t endblock;
6611
6612 /* Check if data fork overlaps the target space */
6613 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6614 if (filerec->dataFork.extents[i].blockCount == 0) {
6615 break;
6616 }
6617 endblock = filerec->dataFork.extents[i].startBlock +
6618 filerec->dataFork.extents[i].blockCount;
6619 if (endblock > allocLimit) {
6620 overlapped = true;
6621 goto out;
6622 }
6623 }
6624
6625 /* Check if resource fork overlaps the target space */
6626 for (j = 0; j < kHFSPlusExtentDensity; ++j) {
6627 if (filerec->resourceFork.extents[j].blockCount == 0) {
6628 break;
6629 }
6630 endblock = filerec->resourceFork.extents[j].startBlock +
6631 filerec->resourceFork.extents[j].blockCount;
6632 if (endblock > allocLimit) {
6633 overlapped = true;
6634 goto out;
6635 }
6636 }
6637
6638 /* Return back if there are no overflow extents for this file */
6639 if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
6640 goto out;
6641 }
6642
6643 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6644 return 0;
6645 }
6646 bzero(iterator, sizeof(*iterator));
6647 extkeyptr = (HFSPlusExtentKey *)&iterator->key;
6648 extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
6649 extkeyptr->forkType = 0;
6650 extkeyptr->fileID = filerec->fileID;
6651 extkeyptr->startBlock = 0;
6652
6653 btdata.bufferAddress = &extrec;
6654 btdata.itemSize = sizeof(extrec);
6655 btdata.itemCount = 1;
6656
6657 fcb = VTOF(hfsmp->hfs_extents_vp);
6658
6659 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
6660
6661 /* This will position the iterator just before the first overflow
6662 * extent record for given fileID. It will always return btNotFound,
6663 * so we special case the error code.
6664 */
6665 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
6666 if (error && (error != btNotFound)) {
6667 goto out;
6668 }
6669
6670 /* BTIterateRecord() might return error if the btree is empty, and
6671 * therefore we return that the extent does not overflow to the caller
6672 */
6673 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6674 while (error == 0) {
6675 /* Stop when we encounter a different file. */
6676 if (extkeyptr->fileID != filerec->fileID) {
6677 break;
6678 }
6679 /* Check if any of the forks exist in the target space. */
6680 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6681 if (extrec[i].blockCount == 0) {
6682 break;
6683 }
6684 endblock = extrec[i].startBlock + extrec[i].blockCount;
6685 if (endblock > allocLimit) {
6686 overlapped = true;
6687 goto out;
6688 }
6689 }
6690 /* Look for more records. */
6691 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
6692 }
6693
6694 out:
6695 if (lockflags) {
6696 hfs_systemfile_unlock(hfsmp, lockflags);
6697 }
6698 if (iterator) {
6699 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6700 }
6701 return overlapped;
6702 }
6703
6704
6705 /*
6706 * Calculate the progress of a file system resize operation.
6707 */
6708 __private_extern__
6709 int
6710 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
6711 {
6712 if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
6713 return (ENXIO);
6714 }
6715
6716 if (hfsmp->hfs_resize_totalblocks > 0) {
6717 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
6718 } else {
6719 *progress = 0;
6720 }
6721
6722 return (0);
6723 }
6724
6725
6726 /*
6727 * Creates a UUID from a unique "name" in the HFS UUID Name space.
6728 * See version 3 UUID.
6729 */
6730 static void
6731 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
6732 {
6733 MD5_CTX md5c;
6734 uint8_t rawUUID[8];
6735
6736 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
6737 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
6738
6739 MD5Init( &md5c );
6740 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
6741 MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
6742 MD5Final( result, &md5c );
6743
6744 result[6] = 0x30 | ( result[6] & 0x0F );
6745 result[8] = 0x80 | ( result[8] & 0x3F );
6746 }
6747
6748 /*
6749 * Get file system attributes.
6750 */
6751 static int
6752 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
6753 {
6754 #define HFS_ATTR_CMN_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST))
6755 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
6756 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_NAMEDATTRCOUNT | ATTR_CMN_NAMEDATTRLIST | ATTR_CMN_ACCTIME))
6757
6758 ExtendedVCB *vcb = VFSTOVCB(mp);
6759 struct hfsmount *hfsmp = VFSTOHFS(mp);
6760 u_int32_t freeCNIDs;
6761
6762 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
6763
6764 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
6765 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
6766 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
6767 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
6768 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
6769 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
6770 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
6771 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
6772 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
6773 /* XXX needs clarification */
6774 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
6775 /* Maximum files is constrained by total blocks. */
6776 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
6777 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
6778
6779 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
6780 fsap->f_fsid.val[1] = vfs_typenum(mp);
6781 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
6782
6783 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
6784 VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
6785
6786 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
6787 vol_capabilities_attr_t *cap;
6788
6789 cap = &fsap->f_capabilities;
6790
6791 if (hfsmp->hfs_flags & HFS_STANDARD) {
6792 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6793 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6794 VOL_CAP_FMT_CASE_PRESERVING |
6795 VOL_CAP_FMT_FAST_STATFS |
6796 VOL_CAP_FMT_HIDDEN_FILES |
6797 VOL_CAP_FMT_PATH_FROM_ID;
6798 } else {
6799 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
6800 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6801 VOL_CAP_FMT_SYMBOLICLINKS |
6802 VOL_CAP_FMT_HARDLINKS |
6803 VOL_CAP_FMT_JOURNAL |
6804 VOL_CAP_FMT_ZERO_RUNS |
6805 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
6806 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
6807 VOL_CAP_FMT_CASE_PRESERVING |
6808 VOL_CAP_FMT_FAST_STATFS |
6809 VOL_CAP_FMT_2TB_FILESIZE |
6810 VOL_CAP_FMT_HIDDEN_FILES |
6811 #if HFS_COMPRESSION
6812 VOL_CAP_FMT_PATH_FROM_ID |
6813 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6814 #else
6815 VOL_CAP_FMT_PATH_FROM_ID;
6816 #endif
6817 }
6818 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
6819 VOL_CAP_INT_SEARCHFS |
6820 VOL_CAP_INT_ATTRLIST |
6821 VOL_CAP_INT_NFSEXPORT |
6822 VOL_CAP_INT_READDIRATTR |
6823 VOL_CAP_INT_EXCHANGEDATA |
6824 VOL_CAP_INT_ALLOCATE |
6825 VOL_CAP_INT_VOL_RENAME |
6826 VOL_CAP_INT_ADVLOCK |
6827 VOL_CAP_INT_FLOCK |
6828 #if NAMEDSTREAMS
6829 VOL_CAP_INT_EXTENDED_ATTR |
6830 VOL_CAP_INT_NAMEDSTREAMS;
6831 #else
6832 VOL_CAP_INT_EXTENDED_ATTR;
6833 #endif
6834 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
6835 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
6836
6837 cap->valid[VOL_CAPABILITIES_FORMAT] =
6838 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
6839 VOL_CAP_FMT_SYMBOLICLINKS |
6840 VOL_CAP_FMT_HARDLINKS |
6841 VOL_CAP_FMT_JOURNAL |
6842 VOL_CAP_FMT_JOURNAL_ACTIVE |
6843 VOL_CAP_FMT_NO_ROOT_TIMES |
6844 VOL_CAP_FMT_SPARSE_FILES |
6845 VOL_CAP_FMT_ZERO_RUNS |
6846 VOL_CAP_FMT_CASE_SENSITIVE |
6847 VOL_CAP_FMT_CASE_PRESERVING |
6848 VOL_CAP_FMT_FAST_STATFS |
6849 VOL_CAP_FMT_2TB_FILESIZE |
6850 VOL_CAP_FMT_OPENDENYMODES |
6851 VOL_CAP_FMT_HIDDEN_FILES |
6852 #if HFS_COMPRESSION
6853 VOL_CAP_FMT_PATH_FROM_ID |
6854 VOL_CAP_FMT_DECMPFS_COMPRESSION;
6855 #else
6856 VOL_CAP_FMT_PATH_FROM_ID;
6857 #endif
6858 cap->valid[VOL_CAPABILITIES_INTERFACES] =
6859 VOL_CAP_INT_SEARCHFS |
6860 VOL_CAP_INT_ATTRLIST |
6861 VOL_CAP_INT_NFSEXPORT |
6862 VOL_CAP_INT_READDIRATTR |
6863 VOL_CAP_INT_EXCHANGEDATA |
6864 VOL_CAP_INT_COPYFILE |
6865 VOL_CAP_INT_ALLOCATE |
6866 VOL_CAP_INT_VOL_RENAME |
6867 VOL_CAP_INT_ADVLOCK |
6868 VOL_CAP_INT_FLOCK |
6869 VOL_CAP_INT_MANLOCK |
6870 #if NAMEDSTREAMS
6871 VOL_CAP_INT_EXTENDED_ATTR |
6872 VOL_CAP_INT_NAMEDSTREAMS;
6873 #else
6874 VOL_CAP_INT_EXTENDED_ATTR;
6875 #endif
6876 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
6877 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
6878 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
6879 }
6880 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
6881 vol_attributes_attr_t *attrp = &fsap->f_attributes;
6882
6883 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6884 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6885 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
6886 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6887 attrp->validattr.forkattr = 0;
6888
6889 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
6890 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
6891 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
6892 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
6893 attrp->nativeattr.forkattr = 0;
6894 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
6895 }
6896 fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
6897 fsap->f_create_time.tv_nsec = 0;
6898 VFSATTR_SET_SUPPORTED(fsap, f_create_time);
6899 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
6900 fsap->f_modify_time.tv_nsec = 0;
6901 VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
6902
6903 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
6904 fsap->f_backup_time.tv_nsec = 0;
6905 VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
6906 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
6907 u_int16_t subtype = 0;
6908
6909 /*
6910 * Subtypes (flavors) for HFS
6911 * 0: Mac OS Extended
6912 * 1: Mac OS Extended (Journaled)
6913 * 2: Mac OS Extended (Case Sensitive)
6914 * 3: Mac OS Extended (Case Sensitive, Journaled)
6915 * 4 - 127: Reserved
6916 * 128: Mac OS Standard
6917 *
6918 */
6919 if (hfsmp->hfs_flags & HFS_STANDARD) {
6920 subtype = HFS_SUBTYPE_STANDARDHFS;
6921 } else /* HFS Plus */ {
6922 if (hfsmp->jnl)
6923 subtype |= HFS_SUBTYPE_JOURNALED;
6924 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE)
6925 subtype |= HFS_SUBTYPE_CASESENSITIVE;
6926 }
6927 fsap->f_fssubtype = subtype;
6928 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
6929 }
6930
6931 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
6932 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
6933 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
6934 }
6935 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
6936 hfs_getvoluuid(hfsmp, fsap->f_uuid);
6937 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
6938 }
6939 return (0);
6940 }
6941
6942 /*
6943 * Perform a volume rename. Requires the FS' root vp.
6944 */
6945 static int
6946 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
6947 {
6948 ExtendedVCB *vcb = VTOVCB(vp);
6949 struct cnode *cp = VTOC(vp);
6950 struct hfsmount *hfsmp = VTOHFS(vp);
6951 struct cat_desc to_desc;
6952 struct cat_desc todir_desc;
6953 struct cat_desc new_desc;
6954 cat_cookie_t cookie;
6955 int lockflags;
6956 int error = 0;
6957 char converted_volname[256];
6958 size_t volname_length = 0;
6959 size_t conv_volname_length = 0;
6960
6961
6962 /*
6963 * Ignore attempts to rename a volume to a zero-length name.
6964 */
6965 if (name[0] == 0)
6966 return(0);
6967
6968 bzero(&to_desc, sizeof(to_desc));
6969 bzero(&todir_desc, sizeof(todir_desc));
6970 bzero(&new_desc, sizeof(new_desc));
6971 bzero(&cookie, sizeof(cookie));
6972
6973 todir_desc.cd_parentcnid = kHFSRootParentID;
6974 todir_desc.cd_cnid = kHFSRootFolderID;
6975 todir_desc.cd_flags = CD_ISDIR;
6976
6977 to_desc.cd_nameptr = (const u_int8_t *)name;
6978 to_desc.cd_namelen = strlen(name);
6979 to_desc.cd_parentcnid = kHFSRootParentID;
6980 to_desc.cd_cnid = cp->c_cnid;
6981 to_desc.cd_flags = CD_ISDIR;
6982
6983 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK)) == 0) {
6984 if ((error = hfs_start_transaction(hfsmp)) == 0) {
6985 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
6986 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
6987
6988 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
6989
6990 /*
6991 * If successful, update the name in the VCB, ensure it's terminated.
6992 */
6993 if (!error) {
6994 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
6995 volname_length = strlen ((const char*)vcb->vcbVN);
6996 #define DKIOCCSSETLVNAME _IOW('d', 198, char[1024])
6997 /* Send the volume name down to CoreStorage if necessary */
6998 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
6999 if (error == 0) {
7000 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
7001 }
7002 error = 0;
7003 }
7004
7005 hfs_systemfile_unlock(hfsmp, lockflags);
7006 cat_postflight(hfsmp, &cookie, p);
7007
7008 if (error)
7009 MarkVCBDirty(vcb);
7010 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7011 }
7012 hfs_end_transaction(hfsmp);
7013 }
7014 if (!error) {
7015 /* Release old allocated name buffer */
7016 if (cp->c_desc.cd_flags & CD_HASBUF) {
7017 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7018
7019 cp->c_desc.cd_nameptr = 0;
7020 cp->c_desc.cd_namelen = 0;
7021 cp->c_desc.cd_flags &= ~CD_HASBUF;
7022 vfs_removename(tmp_name);
7023 }
7024 /* Update cnode's catalog descriptor */
7025 replace_desc(cp, &new_desc);
7026 vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7027 cp->c_touch_chgtime = TRUE;
7028 }
7029
7030 hfs_unlock(cp);
7031 }
7032
7033 return(error);
7034 }
7035
7036 /*
7037 * Get file system attributes.
7038 */
7039 static int
7040 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7041 {
7042 kauth_cred_t cred = vfs_context_ucred(context);
7043 int error = 0;
7044
7045 /*
7046 * Must be superuser or owner of filesystem to change volume attributes
7047 */
7048 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7049 return(EACCES);
7050
7051 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7052 vnode_t root_vp;
7053
7054 error = hfs_vfs_root(mp, &root_vp, context);
7055 if (error)
7056 goto out;
7057
7058 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7059 (void) vnode_put(root_vp);
7060 if (error)
7061 goto out;
7062
7063 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7064 }
7065
7066 out:
7067 return error;
7068 }
7069
7070 /* If a runtime corruption is detected, set the volume inconsistent
7071 * bit in the volume attributes. The volume inconsistent bit is a persistent
7072 * bit which represents that the volume is corrupt and needs repair.
7073 * The volume inconsistent bit can be set from the kernel when it detects
7074 * runtime corruption or from file system repair utilities like fsck_hfs when
7075 * a repair operation fails. The bit should be cleared only from file system
7076 * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7077 */
7078 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7079 {
7080 HFS_MOUNT_LOCK(hfsmp, TRUE);
7081 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7082 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7083 MarkVCBDirty(hfsmp);
7084 }
7085 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7086 /* Log information to ASL log */
7087 fslog_fs_corrupt(hfsmp->hfs_mp);
7088 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7089 }
7090 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
7091 }
7092
7093 /* Replay the journal on the device node provided. Returns zero if
7094 * journal replay succeeded or no journal was supposed to be replayed.
7095 */
7096 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7097 {
7098 int retval = 0;
7099 struct mount *mp = NULL;
7100 struct hfs_mount_args *args = NULL;
7101
7102 /* Replay allowed only on raw devices */
7103 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7104 retval = EINVAL;
7105 goto out;
7106 }
7107
7108 /* Create dummy mount structures */
7109 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7110 if (mp == NULL) {
7111 retval = ENOMEM;
7112 goto out;
7113 }
7114 bzero(mp, sizeof(struct mount));
7115 mount_lock_init(mp);
7116
7117 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7118 if (args == NULL) {
7119 retval = ENOMEM;
7120 goto out;
7121 }
7122 bzero(args, sizeof(struct hfs_mount_args));
7123
7124 retval = hfs_mountfs(devvp, mp, args, 1, context);
7125 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7126
7127 /* FSYNC the devnode to be sure all data has been flushed */
7128 retval = VNOP_FSYNC(devvp, MNT_WAIT, context);
7129
7130 out:
7131 if (mp) {
7132 mount_lock_destroy(mp);
7133 FREE(mp, M_TEMP);
7134 }
7135 if (args) {
7136 FREE(args, M_TEMP);
7137 }
7138 return retval;
7139 }
7140
7141 /*
7142 * hfs vfs operations.
7143 */
7144 struct vfsops hfs_vfsops = {
7145 hfs_mount,
7146 hfs_start,
7147 hfs_unmount,
7148 hfs_vfs_root,
7149 hfs_quotactl,
7150 hfs_vfs_getattr, /* was hfs_statfs */
7151 hfs_sync,
7152 hfs_vfs_vget,
7153 hfs_fhtovp,
7154 hfs_vptofh,
7155 hfs_init,
7156 hfs_sysctl,
7157 hfs_vfs_setattr,
7158 {NULL}
7159 };