]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsops.c
xnu-2422.110.17.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_vfsops.c
1 /*
2 * Copyright (c) 1999-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1991, 1993, 1994
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * hfs_vfsops.c
66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95
67 *
68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
69 *
70 * hfs_vfsops.c -- VFS layer for loadable HFS file system.
71 *
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kauth.h>
76
77 #include <sys/ubc.h>
78 #include <sys/ubc_internal.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/mount_internal.h>
81 #include <sys/sysctl.h>
82 #include <sys/malloc.h>
83 #include <sys/stat.h>
84 #include <sys/quota.h>
85 #include <sys/disk.h>
86 #include <sys/paths.h>
87 #include <sys/utfconv.h>
88 #include <sys/kdebug.h>
89 #include <sys/fslog.h>
90 #include <sys/ubc.h>
91 #include <sys/buf_internal.h>
92
93 /* for parsing boot-args */
94 #include <pexpert/pexpert.h>
95
96
97 #include <kern/locks.h>
98
99 #include <vfs/vfs_journal.h>
100
101 #include <miscfs/specfs/specdev.h>
102 #include <hfs/hfs_mount.h>
103
104 #include <libkern/crypto/md5.h>
105 #include <uuid/uuid.h>
106
107 #include "hfs.h"
108 #include "hfs_catalog.h"
109 #include "hfs_cnode.h"
110 #include "hfs_dbg.h"
111 #include "hfs_endian.h"
112 #include "hfs_hotfiles.h"
113 #include "hfs_quota.h"
114 #include "hfs_btreeio.h"
115 #include "hfs_kdebug.h"
116
117 #include "hfscommon/headers/FileMgrInternal.h"
118 #include "hfscommon/headers/BTreesInternal.h"
119
120 #if CONFIG_PROTECT
121 #include <sys/cprotect.h>
122 #endif
123
124 #define HFS_MOUNT_DEBUG 1
125
126 #if HFS_DIAGNOSTIC
127 int hfs_dbg_all = 0;
128 int hfs_dbg_err = 0;
129 #endif
130
131 /* Enable/disable debugging code for live volume resizing */
132 int hfs_resize_debug = 0;
133
134 lck_grp_attr_t * hfs_group_attr;
135 lck_attr_t * hfs_lock_attr;
136 lck_grp_t * hfs_mutex_group;
137 lck_grp_t * hfs_rwlock_group;
138 lck_grp_t * hfs_spinlock_group;
139
140 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
141
142 #if CONFIG_HFS_STD
143 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
144 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
145 #endif
146
147 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
148 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
149
150 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
151 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
152 static int hfs_flushfiles(struct mount *, int, struct proc *);
153 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
154 static int hfs_init(struct vfsconf *vfsp);
155 static void hfs_locks_destroy(struct hfsmount *hfsmp);
156 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
157 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
158 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
159 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
160 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
161 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
162 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
163 static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context);
164
165 void hfs_initialize_allocator (struct hfsmount *hfsmp);
166 int hfs_teardown_allocator (struct hfsmount *hfsmp);
167
168 int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context);
169 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
170 int hfs_reload(struct mount *mp);
171 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
172 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
173 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
174 user_addr_t newp, size_t newlen, vfs_context_t context);
175 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
176
177 /*
178 * Called by vfs_mountroot when mounting HFS Plus as root.
179 */
180
181 int
182 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
183 {
184 struct hfsmount *hfsmp;
185 ExtendedVCB *vcb;
186 struct vfsstatfs *vfsp;
187 int error;
188
189 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
190 if (HFS_MOUNT_DEBUG) {
191 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
192 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
193 }
194 return (error);
195 }
196
197 /* Init hfsmp */
198 hfsmp = VFSTOHFS(mp);
199
200 hfsmp->hfs_uid = UNKNOWNUID;
201 hfsmp->hfs_gid = UNKNOWNGID;
202 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
203 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
204
205 /* Establish the free block reserve. */
206 vcb = HFSTOVCB(hfsmp);
207 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
208 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
209
210 vfsp = vfs_statfs(mp);
211 (void)hfs_statfs(mp, vfsp, NULL);
212
213 return (0);
214 }
215
216
217 /*
218 * VFS Operations.
219 *
220 * mount system call
221 */
222
223 int
224 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
225 {
226 struct proc *p = vfs_context_proc(context);
227 struct hfsmount *hfsmp = NULL;
228 struct hfs_mount_args args;
229 int retval = E_NONE;
230 u_int32_t cmdflags;
231
232 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
233 if (HFS_MOUNT_DEBUG) {
234 printf("hfs_mount: copyin returned %d for fs\n", retval);
235 }
236 return (retval);
237 }
238 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
239 if (cmdflags & MNT_UPDATE) {
240 hfsmp = VFSTOHFS(mp);
241
242 /* Reload incore data after an fsck. */
243 if (cmdflags & MNT_RELOAD) {
244 if (vfs_isrdonly(mp)) {
245 int error = hfs_reload(mp);
246 if (error && HFS_MOUNT_DEBUG) {
247 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
248 }
249 return error;
250 }
251 else {
252 if (HFS_MOUNT_DEBUG) {
253 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
254 }
255 return (EINVAL);
256 }
257 }
258
259 /* Change to a read-only file system. */
260 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
261 vfs_isrdonly(mp)) {
262 int flags;
263
264 /* Set flag to indicate that a downgrade to read-only
265 * is in progress and therefore block any further
266 * modifications to the file system.
267 */
268 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
269 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
270 hfsmp->hfs_downgrading_proc = current_thread();
271 hfs_unlock_global (hfsmp);
272
273 /* use VFS_SYNC to push out System (btree) files */
274 retval = VFS_SYNC(mp, MNT_WAIT, context);
275 if (retval && ((cmdflags & MNT_FORCE) == 0)) {
276 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
277 hfsmp->hfs_downgrading_proc = NULL;
278 if (HFS_MOUNT_DEBUG) {
279 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
280 }
281 goto out;
282 }
283
284 flags = WRITECLOSE;
285 if (cmdflags & MNT_FORCE)
286 flags |= FORCECLOSE;
287
288 if ((retval = hfs_flushfiles(mp, flags, p))) {
289 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
290 hfsmp->hfs_downgrading_proc = NULL;
291 if (HFS_MOUNT_DEBUG) {
292 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
293 }
294 goto out;
295 }
296
297 /* mark the volume cleanly unmounted */
298 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
299 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
300 hfsmp->hfs_flags |= HFS_READ_ONLY;
301
302 /*
303 * Close down the journal.
304 *
305 * NOTE: It is critically important to close down the journal
306 * and have it issue all pending I/O prior to calling VNOP_FSYNC below.
307 * In a journaled environment it is expected that the journal be
308 * the only actor permitted to issue I/O for metadata blocks in HFS.
309 * If we were to call VNOP_FSYNC prior to closing down the journal,
310 * we would inadvertantly issue (and wait for) the I/O we just
311 * initiated above as part of the flushvolumeheader call.
312 *
313 * To avoid this, we follow the same order of operations as in
314 * unmount and issue the journal_close prior to calling VNOP_FSYNC.
315 */
316
317 if (hfsmp->jnl) {
318 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
319
320 journal_close(hfsmp->jnl);
321 hfsmp->jnl = NULL;
322
323 // Note: we explicitly don't want to shutdown
324 // access to the jvp because we may need
325 // it later if we go back to being read-write.
326
327 hfs_unlock_global (hfsmp);
328 }
329
330
331 /*
332 * Write out any pending I/O still outstanding against the device node
333 * now that the journal has been closed.
334 */
335 if (retval == 0) {
336 vnode_get(hfsmp->hfs_devvp);
337 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
338 vnode_put(hfsmp->hfs_devvp);
339 }
340
341 if (retval) {
342 if (HFS_MOUNT_DEBUG) {
343 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
344 }
345 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
346 hfsmp->hfs_downgrading_proc = NULL;
347 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
348 goto out;
349 }
350
351 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
352 if (hfsmp->hfs_summary_table) {
353 int err = 0;
354 /*
355 * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress
356 */
357 if (hfsmp->hfs_allocation_vp) {
358 err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
359 }
360 FREE (hfsmp->hfs_summary_table, M_TEMP);
361 hfsmp->hfs_summary_table = NULL;
362 hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
363 if (err == 0 && hfsmp->hfs_allocation_vp){
364 hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
365 }
366 }
367 }
368
369 hfsmp->hfs_downgrading_proc = NULL;
370 }
371
372 /* Change to a writable file system. */
373 if (vfs_iswriteupgrade(mp)) {
374 /*
375 * On inconsistent disks, do not allow read-write mount
376 * unless it is the boot volume being mounted.
377 */
378 if (!(vfs_flags(mp) & MNT_ROOTFS) &&
379 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
380 if (HFS_MOUNT_DEBUG) {
381 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN));
382 }
383 retval = EINVAL;
384 goto out;
385 }
386
387 // If the journal was shut-down previously because we were
388 // asked to be read-only, let's start it back up again now
389
390 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
391 && hfsmp->jnl == NULL
392 && hfsmp->jvp != NULL) {
393 int jflags;
394
395 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
396 jflags = JOURNAL_RESET;
397 } else {
398 jflags = 0;
399 }
400
401 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
402
403 /* We provide the mount point twice here: The first is used as
404 * an opaque argument to be passed back when hfs_sync_metadata
405 * is called. The second is provided to the throttling code to
406 * indicate which mount's device should be used when accounting
407 * for metadata writes.
408 */
409 hfsmp->jnl = journal_open(hfsmp->jvp,
410 (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
411 hfsmp->jnl_size,
412 hfsmp->hfs_devvp,
413 hfsmp->hfs_logical_block_size,
414 jflags,
415 0,
416 hfs_sync_metadata, hfsmp->hfs_mp,
417 hfsmp->hfs_mp);
418
419 /*
420 * Set up the trim callback function so that we can add
421 * recently freed extents to the free extent cache once
422 * the transaction that freed them is written to the
423 * journal on disk.
424 */
425 if (hfsmp->jnl)
426 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
427
428 hfs_unlock_global (hfsmp);
429
430 if (hfsmp->jnl == NULL) {
431 if (HFS_MOUNT_DEBUG) {
432 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
433 }
434 retval = EINVAL;
435 goto out;
436 } else {
437 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
438 }
439
440 }
441
442 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
443 retval = hfs_erase_unused_nodes(hfsmp);
444 if (retval != E_NONE) {
445 if (HFS_MOUNT_DEBUG) {
446 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
447 }
448 goto out;
449 }
450
451 /* If this mount point was downgraded from read-write
452 * to read-only, clear that information as we are now
453 * moving back to read-write.
454 */
455 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
456 hfsmp->hfs_downgrading_proc = NULL;
457
458 /* mark the volume dirty (clear clean unmount bit) */
459 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
460
461 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
462 if (retval != E_NONE) {
463 if (HFS_MOUNT_DEBUG) {
464 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
465 }
466 goto out;
467 }
468
469 /* Only clear HFS_READ_ONLY after a successful write */
470 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
471
472
473 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
474 /* Setup private/hidden directories for hardlinks. */
475 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
476 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
477
478 hfs_remove_orphans(hfsmp);
479
480 /*
481 * Allow hot file clustering if conditions allow.
482 */
483 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
484 ((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0)) {
485 (void) hfs_recording_init(hfsmp);
486 }
487 /* Force ACLs on HFS+ file systems. */
488 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
489 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
490 }
491 }
492 }
493
494 /* Update file system parameters. */
495 retval = hfs_changefs(mp, &args);
496 if (retval && HFS_MOUNT_DEBUG) {
497 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
498 }
499
500 } else /* not an update request */ {
501
502 /* Set the mount flag to indicate that we support volfs */
503 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
504
505 retval = hfs_mountfs(devvp, mp, &args, 0, context);
506 if (retval) {
507 const char *name = vnode_getname(devvp);
508 printf("hfs_mount: hfs_mountfs returned error=%d for device %s\n", retval, (name ? name : "unknown-dev"));
509 if (name) {
510 vnode_putname(name);
511 }
512 goto out;
513 }
514
515 /* After hfs_mountfs succeeds, we should have valid hfsmp */
516 hfsmp = VFSTOHFS(mp);
517
518 /*
519 * Check to see if the file system exists on CoreStorage.
520 *
521 * This must be done after examining the root folder's CP EA since
522 * hfs_vfs_root will create a vnode (which must not occur until after
523 * we've established the CP level of the FS).
524 */
525 if (retval == 0) {
526 errno_t err;
527 vnode_t root_vnode;
528 err = hfs_vfs_root(mp, &root_vnode, context);
529 if (err == 0) {
530 if (VNOP_IOCTL(devvp, _DKIOCCSSETFSVNODE,
531 (caddr_t)&root_vnode, 0, context) == 0) {
532 err = vnode_ref(root_vnode);
533 if (err == 0) {
534 hfsmp->hfs_flags |= HFS_CS;
535 }
536 }
537
538 err = vnode_put(root_vnode);
539 if (err) {
540 printf("hfs: could not release io count on root vnode with error: %d\n",
541 err);
542 }
543 } else {
544 printf("hfs: could not get root vnode with error: %d\n",
545 err);
546 }
547 }
548 }
549
550 out:
551 if (retval == 0) {
552 (void)hfs_statfs(mp, vfs_statfs(mp), context);
553 }
554 return (retval);
555 }
556
557
558 struct hfs_changefs_cargs {
559 struct hfsmount *hfsmp;
560 int namefix;
561 int permfix;
562 int permswitch;
563 };
564
565 static int
566 hfs_changefs_callback(struct vnode *vp, void *cargs)
567 {
568 ExtendedVCB *vcb;
569 struct cnode *cp;
570 struct cat_desc cndesc;
571 struct cat_attr cnattr;
572 struct hfs_changefs_cargs *args;
573 int lockflags;
574 int error;
575
576 args = (struct hfs_changefs_cargs *)cargs;
577
578 cp = VTOC(vp);
579 vcb = HFSTOVCB(args->hfsmp);
580
581 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
582 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, 0, &cndesc, &cnattr, NULL, NULL);
583 hfs_systemfile_unlock(args->hfsmp, lockflags);
584 if (error) {
585 /*
586 * If we couldn't find this guy skip to the next one
587 */
588 if (args->namefix)
589 cache_purge(vp);
590
591 return (VNODE_RETURNED);
592 }
593 /*
594 * Get the real uid/gid and perm mask from disk.
595 */
596 if (args->permswitch || args->permfix) {
597 cp->c_uid = cnattr.ca_uid;
598 cp->c_gid = cnattr.ca_gid;
599 cp->c_mode = cnattr.ca_mode;
600 }
601 /*
602 * If we're switching name converters then...
603 * Remove the existing entry from the namei cache.
604 * Update name to one based on new encoder.
605 */
606 if (args->namefix) {
607 cache_purge(vp);
608 replace_desc(cp, &cndesc);
609
610 if (cndesc.cd_cnid == kHFSRootFolderID) {
611 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
612 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
613 }
614 } else {
615 cat_releasedesc(&cndesc);
616 }
617 return (VNODE_RETURNED);
618 }
619
620 /* Change fs mount parameters */
621 static int
622 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
623 {
624 int retval = 0;
625 int namefix, permfix, permswitch;
626 struct hfsmount *hfsmp;
627 ExtendedVCB *vcb;
628 struct hfs_changefs_cargs cargs;
629 u_int32_t mount_flags;
630
631 #if CONFIG_HFS_STD
632 u_int32_t old_encoding = 0;
633 hfs_to_unicode_func_t get_unicode_func;
634 unicode_to_hfs_func_t get_hfsname_func;
635 #endif
636
637 hfsmp = VFSTOHFS(mp);
638 vcb = HFSTOVCB(hfsmp);
639 mount_flags = (unsigned int)vfs_flags(mp);
640
641 hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
642
643 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
644 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
645 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
646 (mount_flags & MNT_UNKNOWNPERMISSIONS)));
647
648 /* The root filesystem must operate with actual permissions: */
649 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
650 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
651 retval = EINVAL;
652 goto exit;
653 }
654 if (mount_flags & MNT_UNKNOWNPERMISSIONS)
655 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
656 else
657 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
658
659 namefix = permfix = 0;
660
661 /*
662 * Tracking of hot files requires up-to-date access times. So if
663 * access time updates are disabled, we must also disable hot files.
664 */
665 if (mount_flags & MNT_NOATIME) {
666 (void) hfs_recording_suspend(hfsmp);
667 }
668
669 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
670 if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
671 gTimeZone = args->hfs_timezone;
672 }
673
674 /* Change the default uid, gid and/or mask */
675 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
676 hfsmp->hfs_uid = args->hfs_uid;
677 if (vcb->vcbSigWord == kHFSPlusSigWord)
678 ++permfix;
679 }
680 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
681 hfsmp->hfs_gid = args->hfs_gid;
682 if (vcb->vcbSigWord == kHFSPlusSigWord)
683 ++permfix;
684 }
685 if (args->hfs_mask != (mode_t)VNOVAL) {
686 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
687 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
688 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
689 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
690 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
691 if (vcb->vcbSigWord == kHFSPlusSigWord)
692 ++permfix;
693 }
694 }
695
696 #if CONFIG_HFS_STD
697 /* Change the hfs encoding value (hfs only) */
698 if ((vcb->vcbSigWord == kHFSSigWord) &&
699 (args->hfs_encoding != (u_int32_t)VNOVAL) &&
700 (hfsmp->hfs_encoding != args->hfs_encoding)) {
701
702 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
703 if (retval)
704 goto exit;
705
706 /*
707 * Connect the new hfs_get_unicode converter but leave
708 * the old hfs_get_hfsname converter in place so that
709 * we can lookup existing vnodes to get their correctly
710 * encoded names.
711 *
712 * When we're all finished, we can then connect the new
713 * hfs_get_hfsname converter and release our interest
714 * in the old converters.
715 */
716 hfsmp->hfs_get_unicode = get_unicode_func;
717 old_encoding = hfsmp->hfs_encoding;
718 hfsmp->hfs_encoding = args->hfs_encoding;
719 ++namefix;
720 }
721 #endif
722
723 if (!(namefix || permfix || permswitch))
724 goto exit;
725
726 /* XXX 3762912 hack to support HFS filesystem 'owner' */
727 if (permfix)
728 vfs_setowner(mp,
729 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
730 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
731
732 /*
733 * For each active vnode fix things that changed
734 *
735 * Note that we can visit a vnode more than once
736 * and we can race with fsync.
737 *
738 * hfs_changefs_callback will be called for each vnode
739 * hung off of this mount point
740 *
741 * The vnode will be properly referenced and unreferenced
742 * around the callback
743 */
744 cargs.hfsmp = hfsmp;
745 cargs.namefix = namefix;
746 cargs.permfix = permfix;
747 cargs.permswitch = permswitch;
748
749 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
750
751 #if CONFIG_HFS_STD
752 /*
753 * If we're switching name converters we can now
754 * connect the new hfs_get_hfsname converter and
755 * release our interest in the old converters.
756 */
757 if (namefix) {
758 /* HFS standard only */
759 hfsmp->hfs_get_hfsname = get_hfsname_func;
760 vcb->volumeNameEncodingHint = args->hfs_encoding;
761 (void) hfs_relconverter(old_encoding);
762 }
763 #endif
764
765 exit:
766 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
767 return (retval);
768 }
769
770
771 struct hfs_reload_cargs {
772 struct hfsmount *hfsmp;
773 int error;
774 };
775
776 static int
777 hfs_reload_callback(struct vnode *vp, void *cargs)
778 {
779 struct cnode *cp;
780 struct hfs_reload_cargs *args;
781 int lockflags;
782
783 args = (struct hfs_reload_cargs *)cargs;
784 /*
785 * flush all the buffers associated with this node
786 */
787 (void) buf_invalidateblks(vp, 0, 0, 0);
788
789 cp = VTOC(vp);
790 /*
791 * Remove any directory hints
792 */
793 if (vnode_isdir(vp))
794 hfs_reldirhints(cp, 0);
795
796 /*
797 * Re-read cnode data for all active vnodes (non-metadata files).
798 */
799 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
800 struct cat_fork *datafork;
801 struct cat_desc desc;
802
803 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
804
805 /* lookup by fileID since name could have changed */
806 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
807 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, 0, &desc, &cp->c_attr, datafork);
808 hfs_systemfile_unlock(args->hfsmp, lockflags);
809 if (args->error) {
810 return (VNODE_RETURNED_DONE);
811 }
812
813 /* update cnode's catalog descriptor */
814 (void) replace_desc(cp, &desc);
815 }
816 return (VNODE_RETURNED);
817 }
818
819 /*
820 * Reload all incore data for a filesystem (used after running fsck on
821 * the root filesystem and finding things to fix). The filesystem must
822 * be mounted read-only.
823 *
824 * Things to do to update the mount:
825 * invalidate all cached meta-data.
826 * invalidate all inactive vnodes.
827 * invalidate all cached file data.
828 * re-read volume header from disk.
829 * re-load meta-file info (extents, file size).
830 * re-load B-tree header data.
831 * re-read cnode data for all active vnodes.
832 */
833 int
834 hfs_reload(struct mount *mountp)
835 {
836 register struct vnode *devvp;
837 struct buf *bp;
838 int error, i;
839 struct hfsmount *hfsmp;
840 struct HFSPlusVolumeHeader *vhp;
841 ExtendedVCB *vcb;
842 struct filefork *forkp;
843 struct cat_desc cndesc;
844 struct hfs_reload_cargs args;
845 daddr64_t priIDSector;
846
847 hfsmp = VFSTOHFS(mountp);
848 vcb = HFSTOVCB(hfsmp);
849
850 if (vcb->vcbSigWord == kHFSSigWord)
851 return (EINVAL); /* rooting from HFS is not supported! */
852
853 /*
854 * Invalidate all cached meta-data.
855 */
856 devvp = hfsmp->hfs_devvp;
857 if (buf_invalidateblks(devvp, 0, 0, 0))
858 panic("hfs_reload: dirty1");
859
860 args.hfsmp = hfsmp;
861 args.error = 0;
862 /*
863 * hfs_reload_callback will be called for each vnode
864 * hung off of this mount point that can't be recycled...
865 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
866 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
867 * properly referenced and unreferenced around the callback
868 */
869 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
870
871 if (args.error)
872 return (args.error);
873
874 /*
875 * Re-read VolumeHeader from disk.
876 */
877 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
878 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
879
880 error = (int)buf_meta_bread(hfsmp->hfs_devvp,
881 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
882 hfsmp->hfs_physical_block_size, NOCRED, &bp);
883 if (error) {
884 if (bp != NULL)
885 buf_brelse(bp);
886 return (error);
887 }
888
889 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
890
891 /* Do a quick sanity check */
892 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
893 SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
894 (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
895 SWAP_BE16(vhp->version) != kHFSXVersion) ||
896 SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
897 buf_brelse(bp);
898 return (EIO);
899 }
900
901 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
902 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes);
903 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
904 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize);
905 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID);
906 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
907 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount);
908 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount);
909 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount);
910 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
911 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks);
912 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks);
913 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap);
914 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
915 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
916
917 /*
918 * Re-load meta-file vnode data (extent info, file size, etc).
919 */
920 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
921 for (i = 0; i < kHFSPlusExtentDensity; i++) {
922 forkp->ff_extents[i].startBlock =
923 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
924 forkp->ff_extents[i].blockCount =
925 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
926 }
927 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
928 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
929 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
930
931
932 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
933 for (i = 0; i < kHFSPlusExtentDensity; i++) {
934 forkp->ff_extents[i].startBlock =
935 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
936 forkp->ff_extents[i].blockCount =
937 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
938 }
939 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
940 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
941 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
942
943 if (hfsmp->hfs_attribute_vp) {
944 forkp = VTOF(hfsmp->hfs_attribute_vp);
945 for (i = 0; i < kHFSPlusExtentDensity; i++) {
946 forkp->ff_extents[i].startBlock =
947 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
948 forkp->ff_extents[i].blockCount =
949 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
950 }
951 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
952 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
953 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
954 }
955
956 forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
957 for (i = 0; i < kHFSPlusExtentDensity; i++) {
958 forkp->ff_extents[i].startBlock =
959 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
960 forkp->ff_extents[i].blockCount =
961 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
962 }
963 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
964 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
965 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
966
967 buf_brelse(bp);
968 vhp = NULL;
969
970 /*
971 * Re-load B-tree header data
972 */
973 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
974 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
975 return (error);
976
977 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
978 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
979 return (error);
980
981 if (hfsmp->hfs_attribute_vp) {
982 forkp = VTOF(hfsmp->hfs_attribute_vp);
983 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
984 return (error);
985 }
986
987 /* Reload the volume name */
988 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, NULL, NULL)))
989 return (error);
990 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
991 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
992 cat_releasedesc(&cndesc);
993
994 /* Re-establish private/hidden directories. */
995 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
996 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
997
998 /* In case any volume information changed to trigger a notification */
999 hfs_generate_volume_notifications(hfsmp);
1000
1001 return (0);
1002 }
1003
1004 __unused
1005 static uint64_t tv_to_usecs(struct timeval *tv)
1006 {
1007 return tv->tv_sec * 1000000ULL + tv->tv_usec;
1008 }
1009
1010 // Returns TRUE if b - a >= usecs
1011 static boolean_t hfs_has_elapsed (const struct timeval *a,
1012 const struct timeval *b,
1013 uint64_t usecs)
1014 {
1015 struct timeval diff;
1016 timersub(b, a, &diff);
1017 return diff.tv_sec * 1000000ULL + diff.tv_usec >= usecs;
1018 }
1019
1020 static void
1021 hfs_syncer(void *arg0, void *unused)
1022 {
1023 #pragma unused(unused)
1024
1025 struct hfsmount *hfsmp = arg0;
1026 struct timeval now;
1027
1028 microuptime(&now);
1029
1030 KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER | DBG_FUNC_START, hfsmp,
1031 tv_to_usecs(&now),
1032 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1033 hfsmp->hfs_mp->mnt_pending_write_size, 0);
1034
1035 hfs_syncer_lock(hfsmp);
1036
1037 if (!hfsmp->hfs_syncer) {
1038 // hfs_unmount is waiting for us leave now and let it do the sync
1039 hfsmp->hfs_sync_incomplete = FALSE;
1040 hfs_syncer_unlock(hfsmp);
1041 hfs_syncer_wakeup(hfsmp);
1042 return;
1043 }
1044
1045 /* Check to see whether we should flush now: either the oldest is
1046 > HFS_MAX_META_DELAY or HFS_META_DELAY has elapsed since the
1047 request and there are no pending writes. */
1048
1049 boolean_t flush_now = FALSE;
1050
1051 if (hfs_has_elapsed(&hfsmp->hfs_sync_req_oldest, &now, HFS_MAX_META_DELAY))
1052 flush_now = TRUE;
1053 else if (!hfsmp->hfs_mp->mnt_pending_write_size) {
1054 /* N.B. accessing mnt_last_write_completed_timestamp is not thread safe, but
1055 it won't matter for what we're using it for. */
1056 if (hfs_has_elapsed(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp,
1057 &now,
1058 HFS_META_DELAY)) {
1059 flush_now = TRUE;
1060 }
1061 }
1062
1063 if (!flush_now) {
1064 thread_call_t syncer = hfsmp->hfs_syncer;
1065
1066 hfs_syncer_unlock(hfsmp);
1067
1068 hfs_syncer_queue(syncer);
1069
1070 return;
1071 }
1072
1073 timerclear(&hfsmp->hfs_sync_req_oldest);
1074
1075 hfs_syncer_unlock(hfsmp);
1076
1077 KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER_TIMED | DBG_FUNC_START,
1078 tv_to_usecs(&now),
1079 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1080 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp),
1081 hfsmp->hfs_mp->mnt_pending_write_size, 0);
1082
1083 if (hfsmp->hfs_syncer_thread) {
1084 printf("hfs: syncer already running!");
1085 return;
1086 }
1087
1088 hfsmp->hfs_syncer_thread = current_thread();
1089
1090 hfs_start_transaction(hfsmp); // so we hold off any new writes
1091
1092 /*
1093 * We intentionally do a synchronous flush (of the journal or entire volume) here.
1094 * For journaled volumes, this means we wait until the metadata blocks are written
1095 * to both the journal and their final locations (in the B-trees, etc.).
1096 *
1097 * This tends to avoid interleaving the metadata writes with other writes (for
1098 * example, user data, or to the journal when a later transaction notices that
1099 * an earlier transaction has finished its async writes, and then updates the
1100 * journal start in the journal header). Avoiding interleaving of writes is
1101 * very good for performance on simple flash devices like SD cards, thumb drives;
1102 * and on devices like floppies. Since removable devices tend to be this kind of
1103 * simple device, doing a synchronous flush actually improves performance in
1104 * practice.
1105 *
1106 * NOTE: For non-journaled volumes, the call to hfs_sync will also cause dirty
1107 * user data to be written.
1108 */
1109 if (hfsmp->jnl) {
1110 hfs_journal_flush(hfsmp, TRUE);
1111 } else {
1112 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1113 }
1114
1115 KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER_TIMED | DBG_FUNC_END,
1116 (microuptime(&now), tv_to_usecs(&now)),
1117 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1118 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp),
1119 hfsmp->hfs_mp->mnt_pending_write_size, 0);
1120
1121 hfs_end_transaction(hfsmp);
1122
1123 hfsmp->hfs_syncer_thread = NULL;
1124
1125 hfs_syncer_lock(hfsmp);
1126
1127 // If hfs_unmount lets us and we missed a sync, schedule again
1128 if (hfsmp->hfs_syncer && timerisset(&hfsmp->hfs_sync_req_oldest)) {
1129 thread_call_t syncer = hfsmp->hfs_syncer;
1130
1131 hfs_syncer_unlock(hfsmp);
1132
1133 hfs_syncer_queue(syncer);
1134 } else {
1135 hfsmp->hfs_sync_incomplete = FALSE;
1136 hfs_syncer_unlock(hfsmp);
1137 hfs_syncer_wakeup(hfsmp);
1138 }
1139
1140 /* BE CAREFUL WHAT YOU ADD HERE: at this point hfs_unmount is free
1141 to continue and therefore hfsmp might be invalid. */
1142
1143 KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER | DBG_FUNC_END, 0, 0, 0, 0, 0);
1144 }
1145
1146
1147 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1148
1149 /*
1150 * Call into the allocator code and perform a full scan of the bitmap file.
1151 *
1152 * This allows us to TRIM unallocated ranges if needed, and also to build up
1153 * an in-memory summary table of the state of the allocated blocks.
1154 */
1155 void hfs_scan_blocks (struct hfsmount *hfsmp) {
1156 /*
1157 * Take the allocation file lock. Journal transactions will block until
1158 * we're done here.
1159 */
1160
1161 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1162
1163 /*
1164 * We serialize here with the HFS mount lock as we're mounting.
1165 *
1166 * The mount can only proceed once this thread has acquired the bitmap
1167 * lock, since we absolutely do not want someone else racing in and
1168 * getting the bitmap lock, doing a read/write of the bitmap file,
1169 * then us getting the bitmap lock.
1170 *
1171 * To prevent this, the mount thread takes the HFS mount mutex, starts us
1172 * up, then immediately msleeps on the scan_var variable in the mount
1173 * point as a condition variable. This serialization is safe since
1174 * if we race in and try to proceed while they're still holding the lock,
1175 * we'll block trying to acquire the global lock. Since the mount thread
1176 * acquires the HFS mutex before starting this function in a new thread,
1177 * any lock acquisition on our part must be linearizably AFTER the mount thread's.
1178 *
1179 * Note that the HFS mount mutex is always taken last, and always for only
1180 * a short time. In this case, we just take it long enough to mark the
1181 * scan-in-flight bit.
1182 */
1183 (void) hfs_lock_mount (hfsmp);
1184 hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_INFLIGHT;
1185 wakeup((caddr_t) &hfsmp->scan_var);
1186 hfs_unlock_mount (hfsmp);
1187
1188 /* Initialize the summary table */
1189 if (hfs_init_summary (hfsmp)) {
1190 printf("hfs: could not initialize summary table for %s\n", hfsmp->vcbVN);
1191 }
1192
1193 /*
1194 * ScanUnmapBlocks assumes that the bitmap lock is held when you
1195 * call the function. We don't care if there were any errors issuing unmaps.
1196 *
1197 * It will also attempt to build up the summary table for subsequent
1198 * allocator use, as configured.
1199 */
1200 (void) ScanUnmapBlocks(hfsmp);
1201
1202 hfs_systemfile_unlock(hfsmp, flags);
1203 }
1204
1205 static int hfs_root_unmounted_cleanly = 0;
1206
1207 SYSCTL_DECL(_vfs_generic);
1208 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1209
1210 /*
1211 * Common code for mount and mountroot
1212 */
1213 int
1214 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1215 int journal_replay_only, vfs_context_t context)
1216 {
1217 struct proc *p = vfs_context_proc(context);
1218 int retval = E_NONE;
1219 struct hfsmount *hfsmp = NULL;
1220 struct buf *bp;
1221 dev_t dev;
1222 HFSMasterDirectoryBlock *mdbp = NULL;
1223 int ronly;
1224 #if QUOTA
1225 int i;
1226 #endif
1227 int mntwrapper;
1228 kauth_cred_t cred;
1229 u_int64_t disksize;
1230 daddr64_t log_blkcnt;
1231 u_int32_t log_blksize;
1232 u_int32_t phys_blksize;
1233 u_int32_t minblksize;
1234 u_int32_t iswritable;
1235 daddr64_t mdb_offset;
1236 int isvirtual = 0;
1237 int isroot = 0;
1238 u_int32_t device_features = 0;
1239 int isssd;
1240
1241 if (args == NULL) {
1242 /* only hfs_mountroot passes us NULL as the 'args' argument */
1243 isroot = 1;
1244 }
1245
1246 ronly = vfs_isrdonly(mp);
1247 dev = vnode_specrdev(devvp);
1248 cred = p ? vfs_context_ucred(context) : NOCRED;
1249 mntwrapper = 0;
1250
1251 bp = NULL;
1252 hfsmp = NULL;
1253 mdbp = NULL;
1254 minblksize = kHFSBlockSize;
1255
1256 /* Advisory locking should be handled at the VFS layer */
1257 vfs_setlocklocal(mp);
1258
1259 /* Get the logical block size (treated as physical block size everywhere) */
1260 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1261 if (HFS_MOUNT_DEBUG) {
1262 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1263 }
1264 retval = ENXIO;
1265 goto error_exit;
1266 }
1267 if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1268 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize);
1269 retval = ENXIO;
1270 goto error_exit;
1271 }
1272
1273 /* Get the physical block size. */
1274 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1275 if (retval) {
1276 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1277 if (HFS_MOUNT_DEBUG) {
1278 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1279 }
1280 retval = ENXIO;
1281 goto error_exit;
1282 }
1283 /* If device does not support this ioctl, assume that physical
1284 * block size is same as logical block size
1285 */
1286 phys_blksize = log_blksize;
1287 }
1288 if (phys_blksize == 0 || phys_blksize > MAXBSIZE) {
1289 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize);
1290 retval = ENXIO;
1291 goto error_exit;
1292 }
1293
1294 /* Switch to 512 byte sectors (temporarily) */
1295 if (log_blksize > 512) {
1296 u_int32_t size512 = 512;
1297
1298 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1299 if (HFS_MOUNT_DEBUG) {
1300 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1301 }
1302 retval = ENXIO;
1303 goto error_exit;
1304 }
1305 }
1306 /* Get the number of 512 byte physical blocks. */
1307 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1308 /* resetting block size may fail if getting block count did */
1309 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1310 if (HFS_MOUNT_DEBUG) {
1311 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1312 }
1313 retval = ENXIO;
1314 goto error_exit;
1315 }
1316 /* Compute an accurate disk size (i.e. within 512 bytes) */
1317 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1318
1319 /*
1320 * On Tiger it is not necessary to switch the device
1321 * block size to be 4k if there are more than 31-bits
1322 * worth of blocks but to insure compatibility with
1323 * pre-Tiger systems we have to do it.
1324 *
1325 * If the device size is not a multiple of 4K (8 * 512), then
1326 * switching the logical block size isn't going to help because
1327 * we will be unable to write the alternate volume header.
1328 * In this case, just leave the logical block size unchanged.
1329 */
1330 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1331 minblksize = log_blksize = 4096;
1332 if (phys_blksize < log_blksize)
1333 phys_blksize = log_blksize;
1334 }
1335
1336 /*
1337 * The cluster layer is not currently prepared to deal with a logical
1338 * block size larger than the system's page size. (It can handle
1339 * blocks per page, but not multiple pages per block.) So limit the
1340 * logical block size to the page size.
1341 */
1342 if (log_blksize > PAGE_SIZE) {
1343 log_blksize = PAGE_SIZE;
1344 }
1345
1346 /* Now switch to our preferred physical block size. */
1347 if (log_blksize > 512) {
1348 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1349 if (HFS_MOUNT_DEBUG) {
1350 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1351 }
1352 retval = ENXIO;
1353 goto error_exit;
1354 }
1355 /* Get the count of physical blocks. */
1356 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1357 if (HFS_MOUNT_DEBUG) {
1358 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1359 }
1360 retval = ENXIO;
1361 goto error_exit;
1362 }
1363 }
1364 /*
1365 * At this point:
1366 * minblksize is the minimum physical block size
1367 * log_blksize has our preferred physical block size
1368 * log_blkcnt has the total number of physical blocks
1369 */
1370
1371 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1372 if ((retval = (int)buf_meta_bread(devvp,
1373 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1374 phys_blksize, cred, &bp))) {
1375 if (HFS_MOUNT_DEBUG) {
1376 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1377 }
1378 goto error_exit;
1379 }
1380 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1381 if (mdbp == NULL) {
1382 retval = ENOMEM;
1383 if (HFS_MOUNT_DEBUG) {
1384 printf("hfs_mountfs: MALLOC failed\n");
1385 }
1386 goto error_exit;
1387 }
1388 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1389 buf_brelse(bp);
1390 bp = NULL;
1391
1392 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1393 if (hfsmp == NULL) {
1394 if (HFS_MOUNT_DEBUG) {
1395 printf("hfs_mountfs: MALLOC (2) failed\n");
1396 }
1397 retval = ENOMEM;
1398 goto error_exit;
1399 }
1400 bzero(hfsmp, sizeof(struct hfsmount));
1401
1402 hfs_chashinit_finish(hfsmp);
1403
1404 /* Init the ID lookup hashtable */
1405 hfs_idhash_init (hfsmp);
1406
1407 /*
1408 * See if the disk supports unmap (trim).
1409 *
1410 * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
1411 * returned by vfs_ioattr. We need to call VNOP_IOCTL ourselves.
1412 */
1413 if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
1414 if (device_features & DK_FEATURE_UNMAP) {
1415 hfsmp->hfs_flags |= HFS_UNMAP;
1416 }
1417 }
1418
1419 /*
1420 * See if the disk is a solid state device, too. We need this to decide what to do about
1421 * hotfiles.
1422 */
1423 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1424 if (isssd) {
1425 hfsmp->hfs_flags |= HFS_SSD;
1426 }
1427 }
1428
1429
1430 /*
1431 * Init the volume information structure
1432 */
1433
1434 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1435 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1436 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1437 lck_rw_init(&hfsmp->hfs_insync, hfs_rwlock_group, hfs_lock_attr);
1438 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1439
1440 vfs_setfsprivate(mp, hfsmp);
1441 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */
1442 hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1443 hfsmp->hfs_devvp = devvp;
1444 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
1445 hfsmp->hfs_logical_block_size = log_blksize;
1446 hfsmp->hfs_logical_block_count = log_blkcnt;
1447 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
1448 hfsmp->hfs_physical_block_size = phys_blksize;
1449 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1450 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1451 if (ronly)
1452 hfsmp->hfs_flags |= HFS_READ_ONLY;
1453 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1454 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1455
1456 #if QUOTA
1457 for (i = 0; i < MAXQUOTAS; i++)
1458 dqfileinit(&hfsmp->hfs_qfiles[i]);
1459 #endif
1460
1461 if (args) {
1462 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1463 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1464 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1465 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1466 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1467 if (args->hfs_mask != (mode_t)VNOVAL) {
1468 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1469 if (args->flags & HFSFSMNT_NOXONFILES) {
1470 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1471 } else {
1472 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1473 }
1474 } else {
1475 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1476 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1477 }
1478 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1479 mntwrapper = 1;
1480 } else {
1481 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1482 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1483 hfsmp->hfs_uid = UNKNOWNUID;
1484 hfsmp->hfs_gid = UNKNOWNGID;
1485 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1486 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1487 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1488 }
1489 }
1490
1491 /* Find out if disk media is writable. */
1492 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1493 if (iswritable)
1494 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1495 else
1496 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1497 }
1498
1499 // record the current time at which we're mounting this volume
1500 struct timeval tv;
1501 microtime(&tv);
1502 hfsmp->hfs_mount_time = tv.tv_sec;
1503
1504 /* Mount a standard HFS disk */
1505 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1506 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1507 #if CONFIG_HFS_STD
1508 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1509 if (vfs_isrdwr(mp)) {
1510 retval = EROFS;
1511 goto error_exit;
1512 }
1513
1514 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1515
1516 /* Treat it as if it's read-only and not writeable */
1517 hfsmp->hfs_flags |= HFS_READ_ONLY;
1518 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1519
1520 /* If only journal replay is requested, exit immediately */
1521 if (journal_replay_only) {
1522 retval = 0;
1523 goto error_exit;
1524 }
1525
1526 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1527 retval = EINVAL; /* Cannot root from HFS standard disks */
1528 goto error_exit;
1529 }
1530 /* HFS disks can only use 512 byte physical blocks */
1531 if (log_blksize > kHFSBlockSize) {
1532 log_blksize = kHFSBlockSize;
1533 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1534 retval = ENXIO;
1535 goto error_exit;
1536 }
1537 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1538 retval = ENXIO;
1539 goto error_exit;
1540 }
1541 hfsmp->hfs_logical_block_size = log_blksize;
1542 hfsmp->hfs_logical_block_count = log_blkcnt;
1543 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
1544 hfsmp->hfs_physical_block_size = log_blksize;
1545 hfsmp->hfs_log_per_phys = 1;
1546 }
1547 if (args) {
1548 hfsmp->hfs_encoding = args->hfs_encoding;
1549 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1550
1551 /* establish the timezone */
1552 gTimeZone = args->hfs_timezone;
1553 }
1554
1555 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1556 &hfsmp->hfs_get_hfsname);
1557 if (retval)
1558 goto error_exit;
1559
1560 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1561 if (retval)
1562 (void) hfs_relconverter(hfsmp->hfs_encoding);
1563 #else
1564 /* On platforms where HFS Standard is not supported, deny the mount altogether */
1565 retval = EINVAL;
1566 goto error_exit;
1567 #endif
1568
1569 }
1570 else { /* Mount an HFS Plus disk */
1571 HFSPlusVolumeHeader *vhp;
1572 off_t embeddedOffset;
1573 int jnl_disable = 0;
1574
1575 /* Get the embedded Volume Header */
1576 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1577 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1578 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1579 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1580
1581 /*
1582 * If the embedded volume doesn't start on a block
1583 * boundary, then switch the device to a 512-byte
1584 * block size so everything will line up on a block
1585 * boundary.
1586 */
1587 if ((embeddedOffset % log_blksize) != 0) {
1588 printf("hfs_mountfs: embedded volume offset not"
1589 " a multiple of physical block size (%d);"
1590 " switching to 512\n", log_blksize);
1591 log_blksize = 512;
1592 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1593 (caddr_t)&log_blksize, FWRITE, context)) {
1594
1595 if (HFS_MOUNT_DEBUG) {
1596 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1597 }
1598 retval = ENXIO;
1599 goto error_exit;
1600 }
1601 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1602 (caddr_t)&log_blkcnt, 0, context)) {
1603 if (HFS_MOUNT_DEBUG) {
1604 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1605 }
1606 retval = ENXIO;
1607 goto error_exit;
1608 }
1609 /* Note: relative block count adjustment */
1610 hfsmp->hfs_logical_block_count *=
1611 hfsmp->hfs_logical_block_size / log_blksize;
1612
1613 /* Update logical /physical block size */
1614 hfsmp->hfs_logical_block_size = log_blksize;
1615 hfsmp->hfs_physical_block_size = log_blksize;
1616
1617 phys_blksize = log_blksize;
1618 hfsmp->hfs_log_per_phys = 1;
1619 }
1620
1621 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1622 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1623
1624 hfsmp->hfs_logical_block_count = disksize / log_blksize;
1625
1626 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1627
1628 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1629 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1630 phys_blksize, cred, &bp);
1631 if (retval) {
1632 if (HFS_MOUNT_DEBUG) {
1633 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1634 }
1635 goto error_exit;
1636 }
1637 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1638 buf_brelse(bp);
1639 bp = NULL;
1640 vhp = (HFSPlusVolumeHeader*) mdbp;
1641
1642 }
1643 else { /* pure HFS+ */
1644 embeddedOffset = 0;
1645 vhp = (HFSPlusVolumeHeader*) mdbp;
1646 }
1647
1648 if (isroot) {
1649 hfs_root_unmounted_cleanly = ((SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0);
1650 }
1651
1652 /*
1653 * On inconsistent disks, do not allow read-write mount
1654 * unless it is the boot volume being mounted. We also
1655 * always want to replay the journal if the journal_replay_only
1656 * flag is set because that will (most likely) get the
1657 * disk into a consistent state before fsck_hfs starts
1658 * looking at it.
1659 */
1660 if ( !(vfs_flags(mp) & MNT_ROOTFS)
1661 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1662 && !journal_replay_only
1663 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1664
1665 if (HFS_MOUNT_DEBUG) {
1666 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1667 }
1668 retval = EINVAL;
1669 goto error_exit;
1670 }
1671
1672
1673 // XXXdbg
1674 //
1675 hfsmp->jnl = NULL;
1676 hfsmp->jvp = NULL;
1677 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1678 args->journal_disable) {
1679 jnl_disable = 1;
1680 }
1681
1682 //
1683 // We only initialize the journal here if the last person
1684 // to mount this volume was journaling aware. Otherwise
1685 // we delay journal initialization until later at the end
1686 // of hfs_MountHFSPlusVolume() because the last person who
1687 // mounted it could have messed things up behind our back
1688 // (so we need to go find the .journal file, make sure it's
1689 // the right size, re-sync up if it was moved, etc).
1690 //
1691 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1692 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1693 && !jnl_disable) {
1694
1695 // if we're able to init the journal, mark the mount
1696 // point as journaled.
1697 //
1698 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1699 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1700 } else {
1701 if (retval == EROFS) {
1702 // EROFS is a special error code that means the volume has an external
1703 // journal which we couldn't find. in that case we do not want to
1704 // rewrite the volume header - we'll just refuse to mount the volume.
1705 if (HFS_MOUNT_DEBUG) {
1706 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1707 }
1708 retval = EINVAL;
1709 goto error_exit;
1710 }
1711
1712 // if the journal failed to open, then set the lastMountedVersion
1713 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1714 // of just bailing out because the volume is journaled.
1715 if (!ronly) {
1716 if (HFS_MOUNT_DEBUG) {
1717 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1718 }
1719
1720 HFSPlusVolumeHeader *jvhp;
1721
1722 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1723
1724 if (mdb_offset == 0) {
1725 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1726 }
1727
1728 bp = NULL;
1729 retval = (int)buf_meta_bread(devvp,
1730 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1731 phys_blksize, cred, &bp);
1732 if (retval == 0) {
1733 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1734
1735 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1736 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
1737 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1738 buf_bwrite(bp);
1739 } else {
1740 buf_brelse(bp);
1741 }
1742 bp = NULL;
1743 } else if (bp) {
1744 buf_brelse(bp);
1745 // clear this so the error exit path won't try to use it
1746 bp = NULL;
1747 }
1748 }
1749
1750 // if this isn't the root device just bail out.
1751 // If it is the root device we just continue on
1752 // in the hopes that fsck_hfs will be able to
1753 // fix any damage that exists on the volume.
1754 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1755 if (HFS_MOUNT_DEBUG) {
1756 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1757 }
1758 retval = EINVAL;
1759 goto error_exit;
1760 }
1761 }
1762 }
1763 // XXXdbg
1764
1765 /* Either the journal is replayed successfully, or there
1766 * was nothing to replay, or no journal exists. In any case,
1767 * return success.
1768 */
1769 if (journal_replay_only) {
1770 retval = 0;
1771 goto error_exit;
1772 }
1773
1774 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1775
1776 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1777 /*
1778 * If the backend didn't like our physical blocksize
1779 * then retry with physical blocksize of 512.
1780 */
1781 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1782 printf("hfs_mountfs: could not use physical block size "
1783 "(%d) switching to 512\n", log_blksize);
1784 log_blksize = 512;
1785 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1786 if (HFS_MOUNT_DEBUG) {
1787 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1788 }
1789 retval = ENXIO;
1790 goto error_exit;
1791 }
1792 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1793 if (HFS_MOUNT_DEBUG) {
1794 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1795 }
1796 retval = ENXIO;
1797 goto error_exit;
1798 }
1799 devvp->v_specsize = log_blksize;
1800 /* Note: relative block count adjustment (in case this is an embedded volume). */
1801 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1802 hfsmp->hfs_logical_block_size = log_blksize;
1803 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1804
1805 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1806
1807 if (hfsmp->jnl && hfsmp->jvp == devvp) {
1808 // close and re-open this with the new block size
1809 journal_close(hfsmp->jnl);
1810 hfsmp->jnl = NULL;
1811 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1812 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1813 } else {
1814 // if the journal failed to open, then set the lastMountedVersion
1815 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1816 // of just bailing out because the volume is journaled.
1817 if (!ronly) {
1818 if (HFS_MOUNT_DEBUG) {
1819 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1820 }
1821 HFSPlusVolumeHeader *jvhp;
1822
1823 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1824
1825 if (mdb_offset == 0) {
1826 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1827 }
1828
1829 bp = NULL;
1830 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1831 phys_blksize, cred, &bp);
1832 if (retval == 0) {
1833 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1834
1835 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1836 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
1837 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1838 buf_bwrite(bp);
1839 } else {
1840 buf_brelse(bp);
1841 }
1842 bp = NULL;
1843 } else if (bp) {
1844 buf_brelse(bp);
1845 // clear this so the error exit path won't try to use it
1846 bp = NULL;
1847 }
1848 }
1849
1850 // if this isn't the root device just bail out.
1851 // If it is the root device we just continue on
1852 // in the hopes that fsck_hfs will be able to
1853 // fix any damage that exists on the volume.
1854 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1855 if (HFS_MOUNT_DEBUG) {
1856 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1857 }
1858 retval = EINVAL;
1859 goto error_exit;
1860 }
1861 }
1862 }
1863
1864 /* Try again with a smaller block size... */
1865 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1866 if (retval && HFS_MOUNT_DEBUG) {
1867 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1868 }
1869 }
1870 if (retval)
1871 (void) hfs_relconverter(0);
1872 }
1873
1874 // save off a snapshot of the mtime from the previous mount
1875 // (for matador).
1876 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1877
1878 if ( retval ) {
1879 if (HFS_MOUNT_DEBUG) {
1880 printf("hfs_mountfs: encountered failure %d \n", retval);
1881 }
1882 goto error_exit;
1883 }
1884
1885 mp->mnt_vfsstat.f_fsid.val[0] = dev;
1886 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1887 vfs_setmaxsymlen(mp, 0);
1888
1889 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1890 #if NAMEDSTREAMS
1891 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1892 #endif
1893 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0 ) {
1894 /* Tell VFS that we support directory hard links. */
1895 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1896 }
1897 #if CONFIG_HFS_STD
1898 else {
1899 /* HFS standard doesn't support extended readdir! */
1900 mount_set_noreaddirext (mp);
1901 }
1902 #endif
1903
1904 if (args) {
1905 /*
1906 * Set the free space warning levels for a non-root volume:
1907 *
1908 * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1909 * is less. Set the "warning" limit to 2% of the volume size or 150MB,
1910 * whichever is less. And last, set the "desired" freespace level to
1911 * to 3% of the volume size or 200MB, whichever is less.
1912 */
1913 hfsmp->hfs_freespace_notify_dangerlimit =
1914 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1915 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1916 hfsmp->hfs_freespace_notify_warninglimit =
1917 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1918 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1919 hfsmp->hfs_freespace_notify_desiredlevel =
1920 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1921 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1922 } else {
1923 /*
1924 * Set the free space warning levels for the root volume:
1925 *
1926 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1927 * is less. Set the "warning" limit to 10% of the volume size or 1GB,
1928 * whichever is less. And last, set the "desired" freespace level to
1929 * to 11% of the volume size or 1.25GB, whichever is less.
1930 */
1931 hfsmp->hfs_freespace_notify_dangerlimit =
1932 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1933 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1934 hfsmp->hfs_freespace_notify_warninglimit =
1935 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1936 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1937 hfsmp->hfs_freespace_notify_desiredlevel =
1938 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1939 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1940 };
1941
1942 /* Check if the file system exists on virtual device, like disk image */
1943 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1944 if (isvirtual) {
1945 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1946 }
1947 }
1948
1949 /* do not allow ejectability checks on the root device */
1950 if (isroot == 0) {
1951 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
1952 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
1953 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
1954 if (hfsmp->hfs_syncer == NULL) {
1955 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
1956 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
1957 }
1958 }
1959 }
1960
1961 printf("hfs: mounted %s on device %s\n", (hfsmp->vcbVN ? (const char*) hfsmp->vcbVN : "unknown"),
1962 (devvp->v_name ? devvp->v_name : (isroot ? "root_device": "unknown device")));
1963
1964 /*
1965 * Start looking for free space to drop below this level and generate a
1966 * warning immediately if needed:
1967 */
1968 hfsmp->hfs_notification_conditions = 0;
1969 hfs_generate_volume_notifications(hfsmp);
1970
1971 if (ronly == 0) {
1972 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1973 }
1974 FREE(mdbp, M_TEMP);
1975 return (0);
1976
1977 error_exit:
1978 if (bp)
1979 buf_brelse(bp);
1980 if (mdbp)
1981 FREE(mdbp, M_TEMP);
1982
1983 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
1984 vnode_clearmountedon(hfsmp->jvp);
1985 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
1986 hfsmp->jvp = NULL;
1987 }
1988 if (hfsmp) {
1989 if (hfsmp->hfs_devvp) {
1990 vnode_rele(hfsmp->hfs_devvp);
1991 }
1992 hfs_locks_destroy(hfsmp);
1993 hfs_delete_chash(hfsmp);
1994 hfs_idhash_destroy (hfsmp);
1995
1996 FREE(hfsmp, M_HFSMNT);
1997 vfs_setfsprivate(mp, NULL);
1998 }
1999 return (retval);
2000 }
2001
2002
2003 /*
2004 * Make a filesystem operational.
2005 * Nothing to do at the moment.
2006 */
2007 /* ARGSUSED */
2008 static int
2009 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2010 {
2011 return (0);
2012 }
2013
2014
2015 /*
2016 * unmount system call
2017 */
2018 int
2019 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2020 {
2021 struct proc *p = vfs_context_proc(context);
2022 struct hfsmount *hfsmp = VFSTOHFS(mp);
2023 int retval = E_NONE;
2024 int flags;
2025 int force;
2026 int started_tr = 0;
2027
2028 flags = 0;
2029 force = 0;
2030 if (mntflags & MNT_FORCE) {
2031 flags |= FORCECLOSE;
2032 force = 1;
2033 }
2034
2035 printf("hfs: unmount initiated on %s on device %s\n",
2036 (hfsmp->vcbVN ? (const char*) hfsmp->vcbVN : "unknown"),
2037 (hfsmp->hfs_devvp ? ((hfsmp->hfs_devvp->v_name ? hfsmp->hfs_devvp->v_name : "unknown device")) : "unknown device"));
2038
2039 if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2040 return (retval);
2041
2042 if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2043 (void) hfs_recording_suspend(hfsmp);
2044
2045 // Tidy up the syncer
2046 if (hfsmp->hfs_syncer)
2047 {
2048 hfs_syncer_lock(hfsmp);
2049
2050 /* First, make sure everything else knows we don't want any more
2051 requests queued. */
2052 thread_call_t syncer = hfsmp->hfs_syncer;
2053 hfsmp->hfs_syncer = NULL;
2054
2055 hfs_syncer_unlock(hfsmp);
2056
2057 // Now deal with requests that are outstanding
2058 if (hfsmp->hfs_sync_incomplete) {
2059 if (thread_call_cancel(syncer)) {
2060 // We managed to cancel the timer so we're done
2061 hfsmp->hfs_sync_incomplete = FALSE;
2062 } else {
2063 // Syncer must be running right now so we have to wait
2064 hfs_syncer_lock(hfsmp);
2065 while (hfsmp->hfs_sync_incomplete)
2066 hfs_syncer_wait(hfsmp);
2067 hfs_syncer_unlock(hfsmp);
2068 }
2069 }
2070
2071 // Now we're safe to free the syncer
2072 thread_call_free(syncer);
2073 }
2074
2075 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2076 if (hfsmp->hfs_summary_table) {
2077 int err = 0;
2078 /*
2079 * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress
2080 */
2081 if (hfsmp->hfs_allocation_vp) {
2082 err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2083 }
2084 FREE (hfsmp->hfs_summary_table, M_TEMP);
2085 hfsmp->hfs_summary_table = NULL;
2086 hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
2087
2088 if (err == 0 && hfsmp->hfs_allocation_vp){
2089 hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
2090 }
2091
2092 }
2093 }
2094
2095 /*
2096 * Flush out the b-trees, volume bitmap and Volume Header
2097 */
2098 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2099 retval = hfs_start_transaction(hfsmp);
2100 if (retval == 0) {
2101 started_tr = 1;
2102 } else if (!force) {
2103 goto err_exit;
2104 }
2105
2106 if (hfsmp->hfs_startup_vp) {
2107 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2108 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2109 hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2110 if (retval && !force)
2111 goto err_exit;
2112 }
2113
2114 if (hfsmp->hfs_attribute_vp) {
2115 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2116 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2117 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2118 if (retval && !force)
2119 goto err_exit;
2120 }
2121
2122 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2123 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2124 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2125 if (retval && !force)
2126 goto err_exit;
2127
2128 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2129 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2130 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2131 if (retval && !force)
2132 goto err_exit;
2133
2134 if (hfsmp->hfs_allocation_vp) {
2135 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2136 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2137 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2138 if (retval && !force)
2139 goto err_exit;
2140 }
2141
2142 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2143 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2144 if (retval && !force)
2145 goto err_exit;
2146 }
2147
2148 /* If runtime corruption was detected, indicate that the volume
2149 * was not unmounted cleanly.
2150 */
2151 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2152 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2153 } else {
2154 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2155 }
2156
2157 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2158 int i;
2159 u_int32_t min_start = hfsmp->totalBlocks;
2160
2161 // set the nextAllocation pointer to the smallest free block number
2162 // we've seen so on the next mount we won't rescan unnecessarily
2163 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2164 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2165 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2166 min_start = hfsmp->vcbFreeExt[i].startBlock;
2167 }
2168 }
2169 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2170 if (min_start < hfsmp->nextAllocation) {
2171 hfsmp->nextAllocation = min_start;
2172 }
2173 }
2174
2175 retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2176 if (retval) {
2177 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2178 if (!force)
2179 goto err_exit; /* could not flush everything */
2180 }
2181
2182 if (started_tr) {
2183 hfs_end_transaction(hfsmp);
2184 started_tr = 0;
2185 }
2186 }
2187
2188 if (hfsmp->jnl) {
2189 hfs_journal_flush(hfsmp, FALSE);
2190 }
2191
2192 /*
2193 * Invalidate our caches and release metadata vnodes
2194 */
2195 (void) hfsUnmount(hfsmp, p);
2196
2197 #if CONFIG_HFS_STD
2198 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2199 (void) hfs_relconverter(hfsmp->hfs_encoding);
2200 }
2201 #endif
2202
2203 // XXXdbg
2204 if (hfsmp->jnl) {
2205 journal_close(hfsmp->jnl);
2206 hfsmp->jnl = NULL;
2207 }
2208
2209 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2210
2211 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2212 vnode_clearmountedon(hfsmp->jvp);
2213 retval = VNOP_CLOSE(hfsmp->jvp,
2214 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2215 vfs_context_kernel());
2216 vnode_put(hfsmp->jvp);
2217 hfsmp->jvp = NULL;
2218 }
2219 // XXXdbg
2220
2221 /*
2222 * Last chance to dump unreferenced system files.
2223 */
2224 (void) vflush(mp, NULLVP, FORCECLOSE);
2225
2226 #if HFS_SPARSE_DEV
2227 /* Drop our reference on the backing fs (if any). */
2228 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2229 struct vnode * tmpvp;
2230
2231 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2232 tmpvp = hfsmp->hfs_backingfs_rootvp;
2233 hfsmp->hfs_backingfs_rootvp = NULLVP;
2234 vnode_rele(tmpvp);
2235 }
2236 #endif /* HFS_SPARSE_DEV */
2237
2238 vnode_rele(hfsmp->hfs_devvp);
2239
2240 hfs_locks_destroy(hfsmp);
2241 hfs_delete_chash(hfsmp);
2242 hfs_idhash_destroy(hfsmp);
2243 FREE(hfsmp, M_HFSMNT);
2244
2245 return (0);
2246
2247 err_exit:
2248 if (started_tr) {
2249 hfs_end_transaction(hfsmp);
2250 }
2251 return retval;
2252 }
2253
2254
2255 /*
2256 * Return the root of a filesystem.
2257 */
2258 static int
2259 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2260 {
2261 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2262 }
2263
2264
2265 /*
2266 * Do operations associated with quotas
2267 */
2268 #if !QUOTA
2269 static int
2270 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2271 {
2272 return (ENOTSUP);
2273 }
2274 #else
2275 static int
2276 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2277 {
2278 struct proc *p = vfs_context_proc(context);
2279 int cmd, type, error;
2280
2281 if (uid == ~0U)
2282 uid = kauth_cred_getuid(vfs_context_ucred(context));
2283 cmd = cmds >> SUBCMDSHIFT;
2284
2285 switch (cmd) {
2286 case Q_SYNC:
2287 case Q_QUOTASTAT:
2288 break;
2289 case Q_GETQUOTA:
2290 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2291 break;
2292 /* fall through */
2293 default:
2294 if ( (error = vfs_context_suser(context)) )
2295 return (error);
2296 }
2297
2298 type = cmds & SUBCMDMASK;
2299 if ((u_int)type >= MAXQUOTAS)
2300 return (EINVAL);
2301 if (vfs_busy(mp, LK_NOWAIT))
2302 return (0);
2303
2304 switch (cmd) {
2305
2306 case Q_QUOTAON:
2307 error = hfs_quotaon(p, mp, type, datap);
2308 break;
2309
2310 case Q_QUOTAOFF:
2311 error = hfs_quotaoff(p, mp, type);
2312 break;
2313
2314 case Q_SETQUOTA:
2315 error = hfs_setquota(mp, uid, type, datap);
2316 break;
2317
2318 case Q_SETUSE:
2319 error = hfs_setuse(mp, uid, type, datap);
2320 break;
2321
2322 case Q_GETQUOTA:
2323 error = hfs_getquota(mp, uid, type, datap);
2324 break;
2325
2326 case Q_SYNC:
2327 error = hfs_qsync(mp);
2328 break;
2329
2330 case Q_QUOTASTAT:
2331 error = hfs_quotastat(mp, type, datap);
2332 break;
2333
2334 default:
2335 error = EINVAL;
2336 break;
2337 }
2338 vfs_unbusy(mp);
2339
2340 return (error);
2341 }
2342 #endif /* QUOTA */
2343
2344 /* Subtype is composite of bits */
2345 #define HFS_SUBTYPE_JOURNALED 0x01
2346 #define HFS_SUBTYPE_CASESENSITIVE 0x02
2347 /* bits 2 - 6 reserved */
2348 #define HFS_SUBTYPE_STANDARDHFS 0x80
2349
2350 /*
2351 * Get file system statistics.
2352 */
2353 int
2354 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2355 {
2356 ExtendedVCB *vcb = VFSTOVCB(mp);
2357 struct hfsmount *hfsmp = VFSTOHFS(mp);
2358 u_int32_t freeCNIDs;
2359 u_int16_t subtype = 0;
2360
2361 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2362
2363 sbp->f_bsize = (u_int32_t)vcb->blockSize;
2364 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2365 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2366 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2367 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2368 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */
2369 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2370
2371 /*
2372 * Subtypes (flavors) for HFS
2373 * 0: Mac OS Extended
2374 * 1: Mac OS Extended (Journaled)
2375 * 2: Mac OS Extended (Case Sensitive)
2376 * 3: Mac OS Extended (Case Sensitive, Journaled)
2377 * 4 - 127: Reserved
2378 * 128: Mac OS Standard
2379 *
2380 */
2381 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
2382 /* HFS+ & variants */
2383 if (hfsmp->jnl) {
2384 subtype |= HFS_SUBTYPE_JOURNALED;
2385 }
2386 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) {
2387 subtype |= HFS_SUBTYPE_CASESENSITIVE;
2388 }
2389 }
2390 #if CONFIG_HFS_STD
2391 else {
2392 /* HFS standard */
2393 subtype = HFS_SUBTYPE_STANDARDHFS;
2394 }
2395 #endif
2396 sbp->f_fssubtype = subtype;
2397
2398 return (0);
2399 }
2400
2401
2402 //
2403 // XXXdbg -- this is a callback to be used by the journal to
2404 // get meta data blocks flushed out to disk.
2405 //
2406 // XXXdbg -- be smarter and don't flush *every* block on each
2407 // call. try to only flush some so we don't wind up
2408 // being too synchronous.
2409 //
2410 __private_extern__
2411 void
2412 hfs_sync_metadata(void *arg)
2413 {
2414 struct mount *mp = (struct mount *)arg;
2415 struct hfsmount *hfsmp;
2416 ExtendedVCB *vcb;
2417 buf_t bp;
2418 int retval;
2419 daddr64_t priIDSector;
2420 hfsmp = VFSTOHFS(mp);
2421 vcb = HFSTOVCB(hfsmp);
2422
2423 // now make sure the super block is flushed
2424 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2425 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2426
2427 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2428 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2429 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2430 if ((retval != 0 ) && (retval != ENXIO)) {
2431 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2432 (int)priIDSector, retval);
2433 }
2434
2435 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2436 buf_bwrite(bp);
2437 } else if (bp) {
2438 buf_brelse(bp);
2439 }
2440
2441 // the alternate super block...
2442 // XXXdbg - we probably don't need to do this each and every time.
2443 // hfs_btreeio.c:FlushAlternate() should flag when it was
2444 // written...
2445 if (hfsmp->hfs_alt_id_sector) {
2446 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2447 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
2448 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2449 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2450 buf_bwrite(bp);
2451 } else if (bp) {
2452 buf_brelse(bp);
2453 }
2454 }
2455 }
2456
2457
2458 struct hfs_sync_cargs {
2459 kauth_cred_t cred;
2460 struct proc *p;
2461 int waitfor;
2462 int error;
2463 };
2464
2465
2466 static int
2467 hfs_sync_callback(struct vnode *vp, void *cargs)
2468 {
2469 struct cnode *cp;
2470 struct hfs_sync_cargs *args;
2471 int error;
2472
2473 args = (struct hfs_sync_cargs *)cargs;
2474
2475 if (hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
2476 return (VNODE_RETURNED);
2477 }
2478 cp = VTOC(vp);
2479
2480 if ((cp->c_flag & C_MODIFIED) ||
2481 (cp->c_touch_acctime | cp->c_touch_chgtime | cp->c_touch_modtime) ||
2482 vnode_hasdirtyblks(vp)) {
2483 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2484
2485 if (error)
2486 args->error = error;
2487 }
2488 hfs_unlock(cp);
2489 return (VNODE_RETURNED);
2490 }
2491
2492
2493
2494 /*
2495 * Go through the disk queues to initiate sandbagged IO;
2496 * go through the inodes to write those that have been modified;
2497 * initiate the writing of the super block if it has been modified.
2498 *
2499 * Note: we are always called with the filesystem marked `MPBUSY'.
2500 */
2501 int
2502 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2503 {
2504 struct proc *p = vfs_context_proc(context);
2505 struct cnode *cp;
2506 struct hfsmount *hfsmp;
2507 ExtendedVCB *vcb;
2508 struct vnode *meta_vp[4];
2509 int i;
2510 int error, allerror = 0;
2511 struct hfs_sync_cargs args;
2512
2513 hfsmp = VFSTOHFS(mp);
2514
2515 /*
2516 * hfs_changefs might be manipulating vnodes so back off
2517 */
2518 if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2519 return (0);
2520
2521 if (hfsmp->hfs_flags & HFS_READ_ONLY)
2522 return (EROFS);
2523
2524 /* skip over frozen volumes */
2525 if (!lck_rw_try_lock_shared(&hfsmp->hfs_insync))
2526 return 0;
2527
2528 args.cred = kauth_cred_get();
2529 args.waitfor = waitfor;
2530 args.p = p;
2531 args.error = 0;
2532 /*
2533 * hfs_sync_callback will be called for each vnode
2534 * hung off of this mount point... the vnode will be
2535 * properly referenced and unreferenced around the callback
2536 */
2537 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2538
2539 if (args.error)
2540 allerror = args.error;
2541
2542 vcb = HFSTOVCB(hfsmp);
2543
2544 meta_vp[0] = vcb->extentsRefNum;
2545 meta_vp[1] = vcb->catalogRefNum;
2546 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */
2547 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2548
2549 /* Now sync our three metadata files */
2550 for (i = 0; i < 4; ++i) {
2551 struct vnode *btvp;
2552
2553 btvp = meta_vp[i];;
2554 if ((btvp==0) || (vnode_mount(btvp) != mp))
2555 continue;
2556
2557 /* XXX use hfs_systemfile_lock instead ? */
2558 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2559 cp = VTOC(btvp);
2560
2561 if (((cp->c_flag & C_MODIFIED) == 0) &&
2562 (cp->c_touch_acctime == 0) &&
2563 (cp->c_touch_chgtime == 0) &&
2564 (cp->c_touch_modtime == 0) &&
2565 vnode_hasdirtyblks(btvp) == 0) {
2566 hfs_unlock(VTOC(btvp));
2567 continue;
2568 }
2569 error = vnode_get(btvp);
2570 if (error) {
2571 hfs_unlock(VTOC(btvp));
2572 continue;
2573 }
2574 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2575 allerror = error;
2576
2577 hfs_unlock(cp);
2578 vnode_put(btvp);
2579 };
2580
2581
2582 #if CONFIG_HFS_STD
2583 /*
2584 * Force stale file system control information to be flushed.
2585 */
2586 if (vcb->vcbSigWord == kHFSSigWord) {
2587 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2588 allerror = error;
2589 }
2590 }
2591 #endif
2592
2593 #if QUOTA
2594 hfs_qsync(mp);
2595 #endif /* QUOTA */
2596
2597 hfs_hotfilesync(hfsmp, vfs_context_kernel());
2598
2599 /*
2600 * Write back modified superblock.
2601 */
2602 if (IsVCBDirty(vcb)) {
2603 error = hfs_flushvolumeheader(hfsmp, waitfor, 0);
2604 if (error)
2605 allerror = error;
2606 }
2607
2608 if (hfsmp->jnl) {
2609 hfs_journal_flush(hfsmp, FALSE);
2610 }
2611
2612 lck_rw_unlock_shared(&hfsmp->hfs_insync);
2613 return (allerror);
2614 }
2615
2616
2617 /*
2618 * File handle to vnode
2619 *
2620 * Have to be really careful about stale file handles:
2621 * - check that the cnode id is valid
2622 * - call hfs_vget() to get the locked cnode
2623 * - check for an unallocated cnode (i_mode == 0)
2624 * - check that the given client host has export rights and return
2625 * those rights via. exflagsp and credanonp
2626 */
2627 static int
2628 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2629 {
2630 struct hfsfid *hfsfhp;
2631 struct vnode *nvp;
2632 int result;
2633
2634 *vpp = NULL;
2635 hfsfhp = (struct hfsfid *)fhp;
2636
2637 if (fhlen < (int)sizeof(struct hfsfid))
2638 return (EINVAL);
2639
2640 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2641 if (result) {
2642 if (result == ENOENT)
2643 result = ESTALE;
2644 return result;
2645 }
2646
2647 /*
2648 * We used to use the create time as the gen id of the file handle,
2649 * but it is not static enough because it can change at any point
2650 * via system calls. We still don't have another volume ID or other
2651 * unique identifier to use for a generation ID across reboots that
2652 * persists until the file is removed. Using only the CNID exposes
2653 * us to the potential wrap-around case, but as of 2/2008, it would take
2654 * over 2 months to wrap around if the machine did nothing but allocate
2655 * CNIDs. Using some kind of wrap counter would only be effective if
2656 * each file had the wrap counter associated with it. For now,
2657 * we use only the CNID to identify the file as it's good enough.
2658 */
2659
2660 *vpp = nvp;
2661
2662 hfs_unlock(VTOC(nvp));
2663 return (0);
2664 }
2665
2666
2667 /*
2668 * Vnode pointer to File handle
2669 */
2670 /* ARGSUSED */
2671 static int
2672 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2673 {
2674 struct cnode *cp;
2675 struct hfsfid *hfsfhp;
2676
2677 if (ISHFS(VTOVCB(vp)))
2678 return (ENOTSUP); /* hfs standard is not exportable */
2679
2680 if (*fhlenp < (int)sizeof(struct hfsfid))
2681 return (EOVERFLOW);
2682
2683 cp = VTOC(vp);
2684 hfsfhp = (struct hfsfid *)fhp;
2685 /* only the CNID is used to identify the file now */
2686 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2687 hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2688 *fhlenp = sizeof(struct hfsfid);
2689
2690 return (0);
2691 }
2692
2693
2694 /*
2695 * Initialize HFS filesystems, done only once per boot.
2696 *
2697 * HFS is not a kext-based file system. This makes it difficult to find
2698 * out when the last HFS file system was unmounted and call hfs_uninit()
2699 * to deallocate data structures allocated in hfs_init(). Therefore we
2700 * never deallocate memory allocated by lock attribute and group initializations
2701 * in this function.
2702 */
2703 static int
2704 hfs_init(__unused struct vfsconf *vfsp)
2705 {
2706 static int done = 0;
2707
2708 if (done)
2709 return (0);
2710 done = 1;
2711 hfs_chashinit();
2712 hfs_converterinit();
2713
2714 BTReserveSetup();
2715
2716 hfs_lock_attr = lck_attr_alloc_init();
2717 hfs_group_attr = lck_grp_attr_alloc_init();
2718 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2719 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2720 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2721
2722 #if HFS_COMPRESSION
2723 decmpfs_init();
2724 #endif
2725
2726 return (0);
2727 }
2728
2729
2730 /*
2731 * Destroy all locks, mutexes and spinlocks in hfsmp on unmount or failed mount
2732 */
2733 static void
2734 hfs_locks_destroy(struct hfsmount *hfsmp)
2735 {
2736
2737 lck_mtx_destroy(&hfsmp->hfs_mutex, hfs_mutex_group);
2738 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2739 lck_rw_destroy(&hfsmp->hfs_global_lock, hfs_rwlock_group);
2740 lck_rw_destroy(&hfsmp->hfs_insync, hfs_rwlock_group);
2741 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2742
2743 return;
2744 }
2745
2746
2747 static int
2748 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2749 {
2750 struct hfsmount * hfsmp;
2751 char fstypename[MFSNAMELEN];
2752
2753 if (vp == NULL)
2754 return (EINVAL);
2755
2756 if (!vnode_isvroot(vp))
2757 return (EINVAL);
2758
2759 vnode_vfsname(vp, fstypename);
2760 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2761 return (EINVAL);
2762
2763 hfsmp = VTOHFS(vp);
2764
2765 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2766 return (EINVAL);
2767
2768 *hfsmpp = hfsmp;
2769
2770 return (0);
2771 }
2772
2773 // XXXdbg
2774 #include <sys/filedesc.h>
2775
2776 /*
2777 * HFS filesystem related variables.
2778 */
2779 int
2780 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2781 user_addr_t newp, size_t newlen, vfs_context_t context)
2782 {
2783 struct proc *p = vfs_context_proc(context);
2784 int error;
2785 struct hfsmount *hfsmp;
2786
2787 /* all sysctl names at this level are terminal */
2788
2789 if (name[0] == HFS_ENCODINGBIAS) {
2790 int bias;
2791
2792 bias = hfs_getencodingbias();
2793 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2794 if (error == 0 && newp)
2795 hfs_setencodingbias(bias);
2796 return (error);
2797
2798 } else if (name[0] == HFS_EXTEND_FS) {
2799 u_int64_t newsize;
2800 vnode_t vp = vfs_context_cwd(context);
2801
2802 if (newp == USER_ADDR_NULL || vp == NULLVP)
2803 return (EINVAL);
2804 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2805 return (error);
2806 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2807 if (error)
2808 return (error);
2809
2810 error = hfs_extendfs(hfsmp, newsize, context);
2811 return (error);
2812
2813 } else if (name[0] == HFS_ENCODINGHINT) {
2814 size_t bufsize;
2815 size_t bytes;
2816 u_int32_t hint;
2817 u_int16_t *unicode_name = NULL;
2818 char *filename = NULL;
2819
2820 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2821 return (EINVAL);
2822
2823 bufsize = MAX(newlen * 3, MAXPATHLEN);
2824 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2825 if (filename == NULL) {
2826 error = ENOMEM;
2827 goto encodinghint_exit;
2828 }
2829 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2830 if (filename == NULL) {
2831 error = ENOMEM;
2832 goto encodinghint_exit;
2833 }
2834
2835 error = copyin(newp, (caddr_t)filename, newlen);
2836 if (error == 0) {
2837 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2838 &bytes, bufsize, 0, UTF_DECOMPOSED);
2839 if (error == 0) {
2840 hint = hfs_pickencoding(unicode_name, bytes / 2);
2841 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2842 }
2843 }
2844
2845 encodinghint_exit:
2846 if (unicode_name)
2847 FREE(unicode_name, M_TEMP);
2848 if (filename)
2849 FREE(filename, M_TEMP);
2850 return (error);
2851
2852 } else if (name[0] == HFS_ENABLE_JOURNALING) {
2853 // make the file system journaled...
2854 vnode_t vp = vfs_context_cwd(context);
2855 vnode_t jvp;
2856 ExtendedVCB *vcb;
2857 struct cat_attr jnl_attr;
2858 struct cat_attr jinfo_attr;
2859 struct cat_fork jnl_fork;
2860 struct cat_fork jinfo_fork;
2861 buf_t jib_buf;
2862 uint64_t jib_blkno;
2863 uint32_t tmpblkno;
2864 uint64_t journal_byte_offset;
2865 uint64_t journal_size;
2866 vnode_t jib_vp = NULLVP;
2867 struct JournalInfoBlock local_jib;
2868 int err = 0;
2869 void *jnl = NULL;
2870 int lockflags;
2871
2872 /* Only root can enable journaling */
2873 if (!kauth_cred_issuser(kauth_cred_get())) {
2874 return (EPERM);
2875 }
2876 if (vp == NULLVP)
2877 return EINVAL;
2878
2879 hfsmp = VTOHFS(vp);
2880 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2881 return EROFS;
2882 }
2883 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2884 printf("hfs: can't make a plain hfs volume journaled.\n");
2885 return EINVAL;
2886 }
2887
2888 if (hfsmp->jnl) {
2889 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2890 return EAGAIN;
2891 }
2892 vcb = HFSTOVCB(hfsmp);
2893
2894 /* Set up local copies of the initialization info */
2895 tmpblkno = (uint32_t) name[1];
2896 jib_blkno = (uint64_t) tmpblkno;
2897 journal_byte_offset = (uint64_t) name[2];
2898 journal_byte_offset *= hfsmp->blockSize;
2899 journal_byte_offset += hfsmp->hfsPlusIOPosOffset;
2900 journal_size = (uint64_t)((unsigned)name[3]);
2901
2902 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2903 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2904 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2905
2906 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n");
2907 hfs_systemfile_unlock(hfsmp, lockflags);
2908 return EINVAL;
2909 }
2910 hfs_systemfile_unlock(hfsmp, lockflags);
2911
2912 // make sure these both exist!
2913 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2914 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2915
2916 return EINVAL;
2917 }
2918
2919 /*
2920 * At this point, we have a copy of the metadata that lives in the catalog for the
2921 * journal info block. Compare that the journal info block's single extent matches
2922 * that which was passed into this sysctl.
2923 *
2924 * If it is different, deny the journal enable call.
2925 */
2926 if (jinfo_fork.cf_blocks > 1) {
2927 /* too many blocks */
2928 return EINVAL;
2929 }
2930
2931 if (jinfo_fork.cf_extents[0].startBlock != jib_blkno) {
2932 /* Wrong block */
2933 return EINVAL;
2934 }
2935
2936 /*
2937 * We want to immediately purge the vnode for the JIB.
2938 *
2939 * Because it was written to from userland, there's probably
2940 * a vnode somewhere in the vnode cache (possibly with UBC backed blocks).
2941 * So we bring the vnode into core, then immediately do whatever
2942 * we can to flush/vclean it out. This is because those blocks will be
2943 * interpreted as user data, which may be treated separately on some platforms
2944 * than metadata. If the vnode is gone, then there cannot be backing blocks
2945 * in the UBC.
2946 */
2947 if (hfs_vget (hfsmp, jinfo_attr.ca_fileid, &jib_vp, 1, 0)) {
2948 return EINVAL;
2949 }
2950 /*
2951 * Now we have a vnode for the JIB. recycle it. Because we hold an iocount
2952 * on the vnode, we'll just mark it for termination when the last iocount
2953 * (hopefully ours), is dropped.
2954 */
2955 vnode_recycle (jib_vp);
2956 err = vnode_put (jib_vp);
2957 if (err) {
2958 return EINVAL;
2959 }
2960
2961 /* Initialize the local copy of the JIB (just like hfs.util) */
2962 memset (&local_jib, 'Z', sizeof(struct JournalInfoBlock));
2963 local_jib.flags = SWAP_BE32(kJIJournalInFSMask);
2964 /* Note that the JIB's offset is in bytes */
2965 local_jib.offset = SWAP_BE64(journal_byte_offset);
2966 local_jib.size = SWAP_BE64(journal_size);
2967
2968 /*
2969 * Now write out the local JIB. This essentially overwrites the userland
2970 * copy of the JIB. Read it as BLK_META to treat it as a metadata read/write.
2971 */
2972 jib_buf = buf_getblk (hfsmp->hfs_devvp,
2973 jib_blkno * (hfsmp->blockSize / hfsmp->hfs_logical_block_size),
2974 hfsmp->blockSize, 0, 0, BLK_META);
2975 char* buf_ptr = (char*) buf_dataptr (jib_buf);
2976
2977 /* Zero out the portion of the block that won't contain JIB data */
2978 memset (buf_ptr, 0, hfsmp->blockSize);
2979
2980 bcopy(&local_jib, buf_ptr, sizeof(local_jib));
2981 if (buf_bwrite (jib_buf)) {
2982 return EIO;
2983 }
2984
2985 /* Force a flush track cache */
2986 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
2987
2988
2989 /* Now proceed with full volume sync */
2990 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
2991
2992 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2993 (off_t)name[2], (off_t)name[3]);
2994
2995 //
2996 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
2997 // enabling the journal on a separate device so it is safe
2998 // to just copy hfs_devvp here. If hfs_util gets the ability
2999 // to dynamically enable the journal on a separate device then
3000 // we will have to do the same thing as hfs_early_journal_init()
3001 // to locate and open the journal device.
3002 //
3003 jvp = hfsmp->hfs_devvp;
3004 jnl = journal_create(jvp, journal_byte_offset, journal_size,
3005 hfsmp->hfs_devvp,
3006 hfsmp->hfs_logical_block_size,
3007 0,
3008 0,
3009 hfs_sync_metadata, hfsmp->hfs_mp,
3010 hfsmp->hfs_mp);
3011
3012 /*
3013 * Set up the trim callback function so that we can add
3014 * recently freed extents to the free extent cache once
3015 * the transaction that freed them is written to the
3016 * journal on disk.
3017 */
3018 if (jnl)
3019 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
3020
3021 if (jnl == NULL) {
3022 printf("hfs: FAILED to create the journal!\n");
3023 if (jvp && jvp != hfsmp->hfs_devvp) {
3024 vnode_clearmountedon(jvp);
3025 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
3026 }
3027 jvp = NULL;
3028
3029 return EINVAL;
3030 }
3031
3032 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3033
3034 /*
3035 * Flush all dirty metadata buffers.
3036 */
3037 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
3038 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
3039 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
3040 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
3041 if (hfsmp->hfs_attribute_vp)
3042 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
3043
3044 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
3045 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
3046 hfsmp->jvp = jvp;
3047 hfsmp->jnl = jnl;
3048
3049 // save this off for the hack-y check in hfs_remove()
3050 hfsmp->jnl_start = (u_int32_t)name[2];
3051 hfsmp->jnl_size = (off_t)((unsigned)name[3]);
3052 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
3053 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid;
3054
3055 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
3056
3057 hfs_unlock_global (hfsmp);
3058 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
3059
3060 {
3061 fsid_t fsid;
3062
3063 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3064 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3065 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3066 }
3067 return 0;
3068 } else if (name[0] == HFS_DISABLE_JOURNALING) {
3069 // clear the journaling bit
3070 vnode_t vp = vfs_context_cwd(context);
3071
3072 /* Only root can disable journaling */
3073 if (!kauth_cred_issuser(kauth_cred_get())) {
3074 return (EPERM);
3075 }
3076 if (vp == NULLVP)
3077 return EINVAL;
3078
3079 hfsmp = VTOHFS(vp);
3080
3081 /*
3082 * Disabling journaling is disallowed on volumes with directory hard links
3083 * because we have not tested the relevant code path.
3084 */
3085 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
3086 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
3087 return EPERM;
3088 }
3089
3090 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
3091
3092 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3093
3094 // Lights out for you buddy!
3095 journal_close(hfsmp->jnl);
3096 hfsmp->jnl = NULL;
3097
3098 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
3099 vnode_clearmountedon(hfsmp->jvp);
3100 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
3101 vnode_put(hfsmp->jvp);
3102 }
3103 hfsmp->jvp = NULL;
3104 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
3105 hfsmp->jnl_start = 0;
3106 hfsmp->hfs_jnlinfoblkid = 0;
3107 hfsmp->hfs_jnlfileid = 0;
3108
3109 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
3110
3111 hfs_unlock_global (hfsmp);
3112
3113 hfs_flushvolumeheader(hfsmp, MNT_WAIT, 1);
3114
3115 {
3116 fsid_t fsid;
3117
3118 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3119 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3120 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3121 }
3122 return 0;
3123 } else if (name[0] == HFS_GET_JOURNAL_INFO) {
3124 vnode_t vp = vfs_context_cwd(context);
3125 off_t jnl_start, jnl_size;
3126
3127 if (vp == NULLVP)
3128 return EINVAL;
3129
3130 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3131 if (proc_is64bit(current_proc()))
3132 return EINVAL;
3133
3134 hfsmp = VTOHFS(vp);
3135 if (hfsmp->jnl == NULL) {
3136 jnl_start = 0;
3137 jnl_size = 0;
3138 } else {
3139 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3140 jnl_size = (off_t)hfsmp->jnl_size;
3141 }
3142
3143 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3144 return error;
3145 }
3146 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3147 return error;
3148 }
3149
3150 return 0;
3151 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3152
3153 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3154
3155 } else if (name[0] == VFS_CTL_QUERY) {
3156 struct sysctl_req *req;
3157 union union_vfsidctl vc;
3158 struct mount *mp;
3159 struct vfsquery vq;
3160
3161 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */
3162
3163 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3164 if (error) return (error);
3165
3166 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3167 if (mp == NULL) return (ENOENT);
3168
3169 hfsmp = VFSTOHFS(mp);
3170 bzero(&vq, sizeof(vq));
3171 vq.vq_flags = hfsmp->hfs_notification_conditions;
3172 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3173 } else if (name[0] == HFS_REPLAY_JOURNAL) {
3174 vnode_t devvp = NULL;
3175 int device_fd;
3176 if (namelen != 2) {
3177 return (EINVAL);
3178 }
3179 device_fd = name[1];
3180 error = file_vnode(device_fd, &devvp);
3181 if (error) {
3182 return error;
3183 }
3184 error = vnode_getwithref(devvp);
3185 if (error) {
3186 file_drop(device_fd);
3187 return error;
3188 }
3189 error = hfs_journal_replay(devvp, context);
3190 file_drop(device_fd);
3191 vnode_put(devvp);
3192 return error;
3193 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3194 hfs_resize_debug = 1;
3195 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3196 return 0;
3197 }
3198
3199 return (ENOTSUP);
3200 }
3201
3202 /*
3203 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3204 * the build_path ioctl. We use it to leverage the code below that updates
3205 * the origin list cache if necessary
3206 */
3207
3208 int
3209 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3210 {
3211 int error;
3212 int lockflags;
3213 struct hfsmount *hfsmp;
3214
3215 hfsmp = VFSTOHFS(mp);
3216
3217 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3218 if (error)
3219 return (error);
3220
3221 /*
3222 * ADLs may need to have their origin state updated
3223 * since build_path needs a valid parent. The same is true
3224 * for hardlinked files as well. There isn't a race window here
3225 * in re-acquiring the cnode lock since we aren't pulling any data
3226 * out of the cnode; instead, we're going to the catalog.
3227 */
3228 if ((VTOC(*vpp)->c_flag & C_HARDLINK) &&
3229 (hfs_lock(VTOC(*vpp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) == 0)) {
3230 cnode_t *cp = VTOC(*vpp);
3231 struct cat_desc cdesc;
3232
3233 if (!hfs_haslinkorigin(cp)) {
3234 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3235 error = cat_findname(hfsmp, (cnid_t)ino, &cdesc);
3236 hfs_systemfile_unlock(hfsmp, lockflags);
3237 if (error == 0) {
3238 if ((cdesc.cd_parentcnid != hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3239 (cdesc.cd_parentcnid != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)) {
3240 hfs_savelinkorigin(cp, cdesc.cd_parentcnid);
3241 }
3242 cat_releasedesc(&cdesc);
3243 }
3244 }
3245 hfs_unlock(cp);
3246 }
3247 return (0);
3248 }
3249
3250
3251 /*
3252 * Look up an HFS object by ID.
3253 *
3254 * The object is returned with an iocount reference and the cnode locked.
3255 *
3256 * If the object is a file then it will represent the data fork.
3257 */
3258 int
3259 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3260 {
3261 struct vnode *vp = NULLVP;
3262 struct cat_desc cndesc;
3263 struct cat_attr cnattr;
3264 struct cat_fork cnfork;
3265 u_int32_t linkref = 0;
3266 int error;
3267
3268 /* Check for cnids that should't be exported. */
3269 if ((cnid < kHFSFirstUserCatalogNodeID) &&
3270 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3271 return (ENOENT);
3272 }
3273 /* Don't export our private directories. */
3274 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3275 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3276 return (ENOENT);
3277 }
3278 /*
3279 * Check the hash first
3280 */
3281 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3282 if (vp) {
3283 *vpp = vp;
3284 return(0);
3285 }
3286
3287 bzero(&cndesc, sizeof(cndesc));
3288 bzero(&cnattr, sizeof(cnattr));
3289 bzero(&cnfork, sizeof(cnfork));
3290
3291 /*
3292 * Not in hash, lookup in catalog
3293 */
3294 if (cnid == kHFSRootParentID) {
3295 static char hfs_rootname[] = "/";
3296
3297 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3298 cndesc.cd_namelen = 1;
3299 cndesc.cd_parentcnid = kHFSRootParentID;
3300 cndesc.cd_cnid = kHFSRootFolderID;
3301 cndesc.cd_flags = CD_ISDIR;
3302
3303 cnattr.ca_fileid = kHFSRootFolderID;
3304 cnattr.ca_linkcount = 1;
3305 cnattr.ca_entries = 1;
3306 cnattr.ca_dircount = 1;
3307 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3308 } else {
3309 int lockflags;
3310 cnid_t pid;
3311 const char *nameptr;
3312
3313 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3314 error = cat_idlookup(hfsmp, cnid, 0, 0, &cndesc, &cnattr, &cnfork);
3315 hfs_systemfile_unlock(hfsmp, lockflags);
3316
3317 if (error) {
3318 *vpp = NULL;
3319 return (error);
3320 }
3321
3322 /*
3323 * Check for a raw hardlink inode and save its linkref.
3324 */
3325 pid = cndesc.cd_parentcnid;
3326 nameptr = (const char *)cndesc.cd_nameptr;
3327
3328 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3329 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3330 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3331
3332 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3333 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3334 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3335
3336 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3337 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3338 *vpp = NULL;
3339 cat_releasedesc(&cndesc);
3340 return (ENOENT); /* open unlinked file */
3341 }
3342 }
3343
3344 /*
3345 * Finish initializing cnode descriptor for hardlinks.
3346 *
3347 * We need a valid name and parent for reverse lookups.
3348 */
3349 if (linkref) {
3350 cnid_t lastid;
3351 struct cat_desc linkdesc;
3352 int linkerr = 0;
3353
3354 cnattr.ca_linkref = linkref;
3355 bzero (&linkdesc, sizeof (linkdesc));
3356
3357 /*
3358 * If the caller supplied the raw inode value, then we don't know exactly
3359 * which hardlink they wanted. It's likely that they acquired the raw inode
3360 * value BEFORE the item became a hardlink, in which case, they probably
3361 * want the oldest link. So request the oldest link from the catalog.
3362 *
3363 * Unfortunately, this requires that we iterate through all N hardlinks. On the plus
3364 * side, since we know that we want the last linkID, we can also have this one
3365 * call give us back the name of the last ID, since it's going to have it in-hand...
3366 */
3367 linkerr = hfs_lookup_lastlink (hfsmp, linkref, &lastid, &linkdesc);
3368 if ((linkerr == 0) && (lastid != 0)) {
3369 /*
3370 * Release any lingering buffers attached to our local descriptor.
3371 * Then copy the name and other business into the cndesc
3372 */
3373 cat_releasedesc (&cndesc);
3374 bcopy (&linkdesc, &cndesc, sizeof(linkdesc));
3375 }
3376 /* If it failed, the linkref code will just use whatever it had in-hand below. */
3377 }
3378
3379 if (linkref) {
3380 int newvnode_flags = 0;
3381
3382 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3383 &cnfork, &vp, &newvnode_flags);
3384 if (error == 0) {
3385 VTOC(vp)->c_flag |= C_HARDLINK;
3386 vnode_setmultipath(vp);
3387 }
3388 } else {
3389 struct componentname cn;
3390 int newvnode_flags = 0;
3391
3392 /* Supply hfs_getnewvnode with a component name. */
3393 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3394 cn.cn_nameiop = LOOKUP;
3395 cn.cn_flags = ISLASTCN | HASBUF;
3396 cn.cn_context = NULL;
3397 cn.cn_pnlen = MAXPATHLEN;
3398 cn.cn_nameptr = cn.cn_pnbuf;
3399 cn.cn_namelen = cndesc.cd_namelen;
3400 cn.cn_hash = 0;
3401 cn.cn_consume = 0;
3402 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3403
3404 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3405 &cnfork, &vp, &newvnode_flags);
3406
3407 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3408 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3409 }
3410 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3411 }
3412 cat_releasedesc(&cndesc);
3413
3414 *vpp = vp;
3415 if (vp && skiplock) {
3416 hfs_unlock(VTOC(vp));
3417 }
3418 return (error);
3419 }
3420
3421
3422 /*
3423 * Flush out all the files in a filesystem.
3424 */
3425 static int
3426 #if QUOTA
3427 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3428 #else
3429 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3430 #endif /* QUOTA */
3431 {
3432 struct hfsmount *hfsmp;
3433 struct vnode *skipvp = NULLVP;
3434 int error;
3435 int accounted_root_usecounts;
3436 #if QUOTA
3437 int i;
3438 #endif
3439
3440 hfsmp = VFSTOHFS(mp);
3441
3442 accounted_root_usecounts = 0;
3443 #if QUOTA
3444 /*
3445 * The open quota files have an indirect reference on
3446 * the root directory vnode. We must account for this
3447 * extra reference when doing the intial vflush.
3448 */
3449 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3450 /* Find out how many quota files we have open. */
3451 for (i = 0; i < MAXQUOTAS; i++) {
3452 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3453 ++accounted_root_usecounts;
3454 }
3455 }
3456 #endif /* QUOTA */
3457 if (hfsmp->hfs_flags & HFS_CS) {
3458 ++accounted_root_usecounts;
3459 }
3460
3461 if (accounted_root_usecounts > 0) {
3462 /* Obtain the root vnode so we can skip over it. */
3463 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3464 }
3465
3466 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3467 if (error != 0)
3468 return(error);
3469
3470 error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3471
3472 if (skipvp) {
3473 /*
3474 * See if there are additional references on the
3475 * root vp besides the ones obtained from the open
3476 * quota files and CoreStorage.
3477 */
3478 if ((error == 0) &&
3479 (vnode_isinuse(skipvp, accounted_root_usecounts))) {
3480 error = EBUSY; /* root directory is still open */
3481 }
3482 hfs_unlock(VTOC(skipvp));
3483 /* release the iocount from the hfs_chash_getvnode call above. */
3484 vnode_put(skipvp);
3485 }
3486 if (error && (flags & FORCECLOSE) == 0)
3487 return (error);
3488
3489 #if QUOTA
3490 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3491 for (i = 0; i < MAXQUOTAS; i++) {
3492 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3493 continue;
3494 hfs_quotaoff(p, mp, i);
3495 }
3496 }
3497 #endif /* QUOTA */
3498 if (hfsmp->hfs_flags & HFS_CS) {
3499 error = VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSSETFSVNODE,
3500 (caddr_t)NULL, 0, vfs_context_kernel());
3501 vnode_rele(skipvp);
3502 printf("hfs_flushfiles: VNOP_IOCTL(_DKIOCCSSETFSVNODE) failed with error code %d\n",
3503 error);
3504
3505 /* ignore the CS error and proceed with the unmount. */
3506 error = 0;
3507 }
3508 if (skipvp) {
3509 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3510 }
3511
3512 return (error);
3513 }
3514
3515 /*
3516 * Update volume encoding bitmap (HFS Plus only)
3517 *
3518 * Mark a legacy text encoding as in-use (as needed)
3519 * in the volume header of this HFS+ filesystem.
3520 */
3521 __private_extern__
3522 void
3523 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3524 {
3525 #define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */
3526 #define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */
3527
3528 u_int32_t index;
3529
3530 switch (encoding) {
3531 case kTextEncodingMacUkrainian:
3532 index = kIndexMacUkrainian;
3533 break;
3534 case kTextEncodingMacFarsi:
3535 index = kIndexMacFarsi;
3536 break;
3537 default:
3538 index = encoding;
3539 break;
3540 }
3541
3542 /* Only mark the encoding as in-use if it wasn't already set */
3543 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3544 hfs_lock_mount (hfsmp);
3545 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3546 MarkVCBDirty(hfsmp);
3547 hfs_unlock_mount(hfsmp);
3548 }
3549 }
3550
3551 /*
3552 * Update volume stats
3553 *
3554 * On journal volumes this will cause a volume header flush
3555 */
3556 int
3557 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3558 {
3559 struct timeval tv;
3560
3561 microtime(&tv);
3562
3563 hfs_lock_mount (hfsmp);
3564
3565 MarkVCBDirty(hfsmp);
3566 hfsmp->hfs_mtime = tv.tv_sec;
3567
3568 switch (op) {
3569 case VOL_UPDATE:
3570 break;
3571 case VOL_MKDIR:
3572 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3573 ++hfsmp->hfs_dircount;
3574 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3575 ++hfsmp->vcbNmRtDirs;
3576 break;
3577 case VOL_RMDIR:
3578 if (hfsmp->hfs_dircount != 0)
3579 --hfsmp->hfs_dircount;
3580 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3581 --hfsmp->vcbNmRtDirs;
3582 break;
3583 case VOL_MKFILE:
3584 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3585 ++hfsmp->hfs_filecount;
3586 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3587 ++hfsmp->vcbNmFls;
3588 break;
3589 case VOL_RMFILE:
3590 if (hfsmp->hfs_filecount != 0)
3591 --hfsmp->hfs_filecount;
3592 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3593 --hfsmp->vcbNmFls;
3594 break;
3595 }
3596
3597 hfs_unlock_mount (hfsmp);
3598
3599 if (hfsmp->jnl) {
3600 hfs_flushvolumeheader(hfsmp, 0, 0);
3601 }
3602
3603 return (0);
3604 }
3605
3606
3607 #if CONFIG_HFS_STD
3608 static int
3609 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3610 {
3611 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3612 struct filefork *fp;
3613 HFSMasterDirectoryBlock *mdb;
3614 struct buf *bp = NULL;
3615 int retval;
3616 int sector_size;
3617 ByteCount namelen;
3618
3619 sector_size = hfsmp->hfs_logical_block_size;
3620 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sector_size), sector_size, NOCRED, &bp);
3621 if (retval) {
3622 if (bp)
3623 buf_brelse(bp);
3624 return retval;
3625 }
3626
3627 hfs_lock_mount (hfsmp);
3628
3629 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size));
3630
3631 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3632 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3633 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb);
3634 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls);
3635 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3636 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz);
3637 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID);
3638 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks);
3639
3640 namelen = strlen((char *)vcb->vcbVN);
3641 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3642 /* Retry with MacRoman in case that's how it was exported. */
3643 if (retval)
3644 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3645
3646 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3647 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt);
3648 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3649 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt);
3650 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt);
3651
3652 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3653
3654 fp = VTOF(vcb->extentsRefNum);
3655 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3656 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3657 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3658 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3659 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3660 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3661 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3662 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3663 FTOC(fp)->c_flag &= ~C_MODIFIED;
3664
3665 fp = VTOF(vcb->catalogRefNum);
3666 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3667 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3668 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3669 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3670 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3671 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3672 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3673 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3674 FTOC(fp)->c_flag &= ~C_MODIFIED;
3675
3676 MarkVCBClean( vcb );
3677
3678 hfs_unlock_mount (hfsmp);
3679
3680 /* If requested, flush out the alternate MDB */
3681 if (altflush) {
3682 struct buf *alt_bp = NULL;
3683
3684 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sector_size, NOCRED, &alt_bp) == 0) {
3685 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize);
3686
3687 (void) VNOP_BWRITE(alt_bp);
3688 } else if (alt_bp)
3689 buf_brelse(alt_bp);
3690 }
3691
3692 if (waitfor != MNT_WAIT)
3693 buf_bawrite(bp);
3694 else
3695 retval = VNOP_BWRITE(bp);
3696
3697 return (retval);
3698 }
3699 #endif
3700
3701 /*
3702 * Flush any dirty in-memory mount data to the on-disk
3703 * volume header.
3704 *
3705 * Note: the on-disk volume signature is intentionally
3706 * not flushed since the on-disk "H+" and "HX" signatures
3707 * are always stored in-memory as "H+".
3708 */
3709 int
3710 hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
3711 {
3712 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3713 struct filefork *fp;
3714 HFSPlusVolumeHeader *volumeHeader, *altVH;
3715 int retval;
3716 struct buf *bp, *alt_bp;
3717 int i;
3718 daddr64_t priIDSector;
3719 int critical;
3720 u_int16_t signature;
3721 u_int16_t hfsversion;
3722
3723 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3724 return(0);
3725 }
3726 #if CONFIG_HFS_STD
3727 if (hfsmp->hfs_flags & HFS_STANDARD) {
3728 return hfs_flushMDB(hfsmp, waitfor, altflush);
3729 }
3730 #endif
3731 critical = altflush;
3732 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3733 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3734
3735 if (hfs_start_transaction(hfsmp) != 0) {
3736 return EINVAL;
3737 }
3738
3739 bp = NULL;
3740 alt_bp = NULL;
3741
3742 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3743 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3744 hfsmp->hfs_physical_block_size, NOCRED, &bp);
3745 if (retval) {
3746 printf("hfs: err %d reading VH blk (vol=%s)\n", retval, vcb->vcbVN);
3747 goto err_exit;
3748 }
3749
3750 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3751 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3752
3753 /*
3754 * Sanity check what we just read. If it's bad, try the alternate
3755 * instead.
3756 */
3757 signature = SWAP_BE16 (volumeHeader->signature);
3758 hfsversion = SWAP_BE16 (volumeHeader->version);
3759 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3760 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3761 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3762 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d%s\n",
3763 vcb->vcbVN, signature, hfsversion,
3764 SWAP_BE32 (volumeHeader->blockSize),
3765 hfsmp->hfs_alt_id_sector ? "; trying alternate" : "");
3766 hfs_mark_volume_inconsistent(hfsmp);
3767
3768 if (hfsmp->hfs_alt_id_sector) {
3769 retval = buf_meta_bread(hfsmp->hfs_devvp,
3770 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3771 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3772 if (retval) {
3773 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3774 goto err_exit;
3775 }
3776
3777 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3778 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3779 signature = SWAP_BE16(altVH->signature);
3780 hfsversion = SWAP_BE16(altVH->version);
3781
3782 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3783 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3784 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3785 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3786 vcb->vcbVN, signature, hfsversion,
3787 SWAP_BE32(altVH->blockSize));
3788 retval = EIO;
3789 goto err_exit;
3790 }
3791
3792 /* The alternate is plausible, so use it. */
3793 bcopy(altVH, volumeHeader, kMDBSize);
3794 buf_brelse(alt_bp);
3795 alt_bp = NULL;
3796 } else {
3797 /* No alternate VH, nothing more we can do. */
3798 retval = EIO;
3799 goto err_exit;
3800 }
3801 }
3802
3803 if (hfsmp->jnl) {
3804 journal_modify_block_start(hfsmp->jnl, bp);
3805 }
3806
3807 /*
3808 * For embedded HFS+ volumes, update create date if it changed
3809 * (ie from a setattrlist call)
3810 */
3811 if ((vcb->hfsPlusIOPosOffset != 0) &&
3812 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3813 struct buf *bp2;
3814 HFSMasterDirectoryBlock *mdb;
3815
3816 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3817 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3818 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3819 if (retval) {
3820 if (bp2)
3821 buf_brelse(bp2);
3822 retval = 0;
3823 } else {
3824 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3825 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3826
3827 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3828 {
3829 if (hfsmp->jnl) {
3830 journal_modify_block_start(hfsmp->jnl, bp2);
3831 }
3832
3833 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */
3834
3835 if (hfsmp->jnl) {
3836 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3837 } else {
3838 (void) VNOP_BWRITE(bp2); /* write out the changes */
3839 }
3840 }
3841 else
3842 {
3843 buf_brelse(bp2); /* just release it */
3844 }
3845 }
3846 }
3847
3848 hfs_lock_mount (hfsmp);
3849
3850 /* Note: only update the lower 16 bits worth of attributes */
3851 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb);
3852 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3853 if (hfsmp->jnl) {
3854 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
3855 } else {
3856 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
3857 }
3858 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */
3859 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
3860 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
3861 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt);
3862 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt);
3863 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks);
3864 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks);
3865 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation);
3866 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3867 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
3868 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID);
3869 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt);
3870 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
3871
3872 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
3873 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
3874 critical = 1;
3875 }
3876
3877 /*
3878 * System files are only dirty when altflush is set.
3879 */
3880 if (altflush == 0) {
3881 goto done;
3882 }
3883
3884 /* Sync Extents over-flow file meta data */
3885 fp = VTOF(vcb->extentsRefNum);
3886 if (FTOC(fp)->c_flag & C_MODIFIED) {
3887 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3888 volumeHeader->extentsFile.extents[i].startBlock =
3889 SWAP_BE32 (fp->ff_extents[i].startBlock);
3890 volumeHeader->extentsFile.extents[i].blockCount =
3891 SWAP_BE32 (fp->ff_extents[i].blockCount);
3892 }
3893 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
3894 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3895 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3896 FTOC(fp)->c_flag &= ~C_MODIFIED;
3897 }
3898
3899 /* Sync Catalog file meta data */
3900 fp = VTOF(vcb->catalogRefNum);
3901 if (FTOC(fp)->c_flag & C_MODIFIED) {
3902 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3903 volumeHeader->catalogFile.extents[i].startBlock =
3904 SWAP_BE32 (fp->ff_extents[i].startBlock);
3905 volumeHeader->catalogFile.extents[i].blockCount =
3906 SWAP_BE32 (fp->ff_extents[i].blockCount);
3907 }
3908 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
3909 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3910 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3911 FTOC(fp)->c_flag &= ~C_MODIFIED;
3912 }
3913
3914 /* Sync Allocation file meta data */
3915 fp = VTOF(vcb->allocationsRefNum);
3916 if (FTOC(fp)->c_flag & C_MODIFIED) {
3917 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3918 volumeHeader->allocationFile.extents[i].startBlock =
3919 SWAP_BE32 (fp->ff_extents[i].startBlock);
3920 volumeHeader->allocationFile.extents[i].blockCount =
3921 SWAP_BE32 (fp->ff_extents[i].blockCount);
3922 }
3923 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
3924 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3925 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3926 FTOC(fp)->c_flag &= ~C_MODIFIED;
3927 }
3928
3929 /* Sync Attribute file meta data */
3930 if (hfsmp->hfs_attribute_vp) {
3931 fp = VTOF(hfsmp->hfs_attribute_vp);
3932 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3933 volumeHeader->attributesFile.extents[i].startBlock =
3934 SWAP_BE32 (fp->ff_extents[i].startBlock);
3935 volumeHeader->attributesFile.extents[i].blockCount =
3936 SWAP_BE32 (fp->ff_extents[i].blockCount);
3937 }
3938 FTOC(fp)->c_flag &= ~C_MODIFIED;
3939 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
3940 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3941 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3942 }
3943
3944 /* Sync Startup file meta data */
3945 if (hfsmp->hfs_startup_vp) {
3946 fp = VTOF(hfsmp->hfs_startup_vp);
3947 if (FTOC(fp)->c_flag & C_MODIFIED) {
3948 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3949 volumeHeader->startupFile.extents[i].startBlock =
3950 SWAP_BE32 (fp->ff_extents[i].startBlock);
3951 volumeHeader->startupFile.extents[i].blockCount =
3952 SWAP_BE32 (fp->ff_extents[i].blockCount);
3953 }
3954 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
3955 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
3956 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
3957 FTOC(fp)->c_flag &= ~C_MODIFIED;
3958 }
3959 }
3960
3961 done:
3962 MarkVCBClean(hfsmp);
3963 hfs_unlock_mount (hfsmp);
3964
3965 /* If requested, flush out the alternate volume header */
3966 if (altflush && hfsmp->hfs_alt_id_sector) {
3967 if (buf_meta_bread(hfsmp->hfs_devvp,
3968 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
3969 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
3970 if (hfsmp->jnl) {
3971 journal_modify_block_start(hfsmp->jnl, alt_bp);
3972 }
3973
3974 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
3975 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
3976 kMDBSize);
3977
3978 if (hfsmp->jnl) {
3979 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
3980 } else {
3981 (void) VNOP_BWRITE(alt_bp);
3982 }
3983 } else if (alt_bp)
3984 buf_brelse(alt_bp);
3985 }
3986
3987 if (hfsmp->jnl) {
3988 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
3989 } else {
3990 if (waitfor != MNT_WAIT)
3991 buf_bawrite(bp);
3992 else {
3993 retval = VNOP_BWRITE(bp);
3994 /* When critical data changes, flush the device cache */
3995 if (critical && (retval == 0)) {
3996 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE,
3997 NULL, FWRITE, NULL);
3998 }
3999 }
4000 }
4001 hfs_end_transaction(hfsmp);
4002
4003 return (retval);
4004
4005 err_exit:
4006 if (alt_bp)
4007 buf_brelse(alt_bp);
4008 if (bp)
4009 buf_brelse(bp);
4010 hfs_end_transaction(hfsmp);
4011 return retval;
4012 }
4013
4014
4015 /*
4016 * Extend a file system.
4017 */
4018 int
4019 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4020 {
4021 struct proc *p = vfs_context_proc(context);
4022 kauth_cred_t cred = vfs_context_ucred(context);
4023 struct vnode *vp;
4024 struct vnode *devvp;
4025 struct buf *bp;
4026 struct filefork *fp = NULL;
4027 ExtendedVCB *vcb;
4028 struct cat_fork forkdata;
4029 u_int64_t oldsize;
4030 u_int64_t newblkcnt;
4031 u_int64_t prev_phys_block_count;
4032 u_int32_t addblks;
4033 u_int64_t sector_count;
4034 u_int32_t sector_size;
4035 u_int32_t phys_sector_size;
4036 u_int32_t overage_blocks;
4037 daddr64_t prev_alt_sector;
4038 daddr_t bitmapblks;
4039 int lockflags = 0;
4040 int error;
4041 int64_t oldBitmapSize;
4042 Boolean usedExtendFileC = false;
4043 int transaction_begun = 0;
4044
4045 devvp = hfsmp->hfs_devvp;
4046 vcb = HFSTOVCB(hfsmp);
4047
4048 /*
4049 * - HFS Plus file systems only.
4050 * - Journaling must be enabled.
4051 * - No embedded volumes.
4052 */
4053 if ((vcb->vcbSigWord == kHFSSigWord) ||
4054 (hfsmp->jnl == NULL) ||
4055 (vcb->hfsPlusIOPosOffset != 0)) {
4056 return (EPERM);
4057 }
4058 /*
4059 * If extending file system by non-root, then verify
4060 * ownership and check permissions.
4061 */
4062 if (suser(cred, NULL)) {
4063 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
4064
4065 if (error)
4066 return (error);
4067 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
4068 if (error == 0) {
4069 error = hfs_write_access(vp, cred, p, false);
4070 }
4071 hfs_unlock(VTOC(vp));
4072 vnode_put(vp);
4073 if (error)
4074 return (error);
4075
4076 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
4077 if (error)
4078 return (error);
4079 }
4080 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sector_size, 0, context)) {
4081 return (ENXIO);
4082 }
4083 if (sector_size != hfsmp->hfs_logical_block_size) {
4084 return (ENXIO);
4085 }
4086 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sector_count, 0, context)) {
4087 return (ENXIO);
4088 }
4089 if ((sector_size * sector_count) < newsize) {
4090 printf("hfs_extendfs: not enough space on device (vol=%s)\n", hfsmp->vcbVN);
4091 return (ENOSPC);
4092 }
4093 error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context);
4094 if (error) {
4095 if ((error != ENOTSUP) && (error != ENOTTY)) {
4096 return (ENXIO);
4097 }
4098 /* If ioctl is not supported, force physical and logical sector size to be same */
4099 phys_sector_size = sector_size;
4100 }
4101 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4102
4103 /*
4104 * Validate new size.
4105 */
4106 if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) {
4107 printf("hfs_extendfs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4108 return (EINVAL);
4109 }
4110 newblkcnt = newsize / vcb->blockSize;
4111 if (newblkcnt > (u_int64_t)0xFFFFFFFF) {
4112 printf ("hfs_extendfs: current blockSize=%u too small for newsize=%qu\n", hfsmp->blockSize, newsize);
4113 return (EOVERFLOW);
4114 }
4115
4116 addblks = newblkcnt - vcb->totalBlocks;
4117
4118 if (hfs_resize_debug) {
4119 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
4120 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
4121 }
4122 printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
4123
4124 hfs_lock_mount (hfsmp);
4125 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4126 hfs_unlock_mount(hfsmp);
4127 error = EALREADY;
4128 goto out;
4129 }
4130 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4131 hfs_unlock_mount (hfsmp);
4132
4133 /* Start with a clean journal. */
4134 hfs_journal_flush(hfsmp, TRUE);
4135
4136 /*
4137 * Enclose changes inside a transaction.
4138 */
4139 if (hfs_start_transaction(hfsmp) != 0) {
4140 error = EINVAL;
4141 goto out;
4142 }
4143 transaction_begun = 1;
4144
4145
4146 /* Update the hfsmp fields for the physical information about the device */
4147 prev_phys_block_count = hfsmp->hfs_logical_block_count;
4148 prev_alt_sector = hfsmp->hfs_alt_id_sector;
4149
4150 hfsmp->hfs_logical_block_count = sector_count;
4151 /*
4152 * Note that the new AltVH location must be based on the device's EOF rather than the new
4153 * filesystem's EOF, so we use logical_block_count here rather than newsize.
4154 */
4155 hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) +
4156 HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count);
4157 hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size;
4158
4159
4160 /*
4161 * Note: we take the attributes lock in case we have an attribute data vnode
4162 * which needs to change size.
4163 */
4164 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4165 vp = vcb->allocationsRefNum;
4166 fp = VTOF(vp);
4167 bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
4168
4169 /*
4170 * Calculate additional space required (if any) by allocation bitmap.
4171 */
4172 oldBitmapSize = fp->ff_size;
4173 bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
4174 if (bitmapblks > (daddr_t)fp->ff_blocks)
4175 bitmapblks -= fp->ff_blocks;
4176 else
4177 bitmapblks = 0;
4178
4179 /*
4180 * The allocation bitmap can contain unused bits that are beyond end of
4181 * current volume's allocation blocks. Usually they are supposed to be
4182 * zero'ed out but there can be cases where they might be marked as used.
4183 * After extending the file system, those bits can represent valid
4184 * allocation blocks, so we mark all the bits from the end of current
4185 * volume to end of allocation bitmap as "free".
4186 *
4187 * Figure out the number of overage blocks before proceeding though,
4188 * so we don't add more bytes to our I/O than necessary.
4189 * First figure out the total number of blocks representable by the
4190 * end of the bitmap file vs. the total number of blocks in the new FS.
4191 * Then subtract away the number of blocks in the current FS. This is how much
4192 * we can mark as free right now without having to grow the bitmap file.
4193 */
4194 overage_blocks = fp->ff_blocks * vcb->blockSize * 8;
4195 overage_blocks = MIN (overage_blocks, newblkcnt);
4196 overage_blocks -= vcb->totalBlocks;
4197
4198 BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks);
4199
4200 if (bitmapblks > 0) {
4201 daddr64_t blkno;
4202 daddr_t blkcnt;
4203 off_t bytesAdded;
4204
4205 /*
4206 * Get the bitmap's current size (in allocation blocks) so we know
4207 * where to start zero filling once the new space is added. We've
4208 * got to do this before the bitmap is grown.
4209 */
4210 blkno = (daddr64_t)fp->ff_blocks;
4211
4212 /*
4213 * Try to grow the allocation file in the normal way, using allocation
4214 * blocks already existing in the file system. This way, we might be
4215 * able to grow the bitmap contiguously, or at least in the metadata
4216 * zone.
4217 */
4218 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
4219 kEFAllMask | kEFNoClumpMask | kEFReserveMask
4220 | kEFMetadataMask | kEFContigMask, &bytesAdded);
4221
4222 if (error == 0) {
4223 usedExtendFileC = true;
4224 } else {
4225 /*
4226 * If the above allocation failed, fall back to allocating the new
4227 * extent of the bitmap from the space we're going to add. Since those
4228 * blocks don't yet belong to the file system, we have to update the
4229 * extent list directly, and manually adjust the file size.
4230 */
4231 bytesAdded = 0;
4232 error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
4233 if (error) {
4234 printf("hfs_extendfs: error %d adding extents\n", error);
4235 goto out;
4236 }
4237 fp->ff_blocks += bitmapblks;
4238 VTOC(vp)->c_blocks = fp->ff_blocks;
4239 VTOC(vp)->c_flag |= C_MODIFIED;
4240 }
4241
4242 /*
4243 * Update the allocation file's size to include the newly allocated
4244 * blocks. Note that ExtendFileC doesn't do this, which is why this
4245 * statement is outside the above "if" statement.
4246 */
4247 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4248
4249 /*
4250 * Zero out the new bitmap blocks.
4251 */
4252 {
4253
4254 bp = NULL;
4255 blkcnt = bitmapblks;
4256 while (blkcnt > 0) {
4257 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
4258 if (error) {
4259 if (bp) {
4260 buf_brelse(bp);
4261 }
4262 break;
4263 }
4264 bzero((char *)buf_dataptr(bp), vcb->blockSize);
4265 buf_markaged(bp);
4266 error = (int)buf_bwrite(bp);
4267 if (error)
4268 break;
4269 --blkcnt;
4270 ++blkno;
4271 }
4272 }
4273 if (error) {
4274 printf("hfs_extendfs: error %d clearing blocks\n", error);
4275 goto out;
4276 }
4277 /*
4278 * Mark the new bitmap space as allocated.
4279 *
4280 * Note that ExtendFileC will have marked any blocks it allocated, so
4281 * this is only needed if we used AddFileExtent. Also note that this
4282 * has to come *after* the zero filling of new blocks in the case where
4283 * we used AddFileExtent (since the part of the bitmap we're touching
4284 * is in those newly allocated blocks).
4285 */
4286 if (!usedExtendFileC) {
4287 error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
4288 if (error) {
4289 printf("hfs_extendfs: error %d setting bitmap\n", error);
4290 goto out;
4291 }
4292 vcb->freeBlocks -= bitmapblks;
4293 }
4294 }
4295 /*
4296 * Mark the new alternate VH as allocated.
4297 */
4298 if (vcb->blockSize == 512)
4299 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
4300 else
4301 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
4302 if (error) {
4303 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
4304 goto out;
4305 }
4306 /*
4307 * Mark the old alternate VH as free.
4308 */
4309 if (vcb->blockSize == 512)
4310 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
4311 else
4312 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
4313 /*
4314 * Adjust file system variables for new space.
4315 */
4316 vcb->totalBlocks += addblks;
4317 vcb->freeBlocks += addblks;
4318 MarkVCBDirty(vcb);
4319 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4320 if (error) {
4321 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
4322 /*
4323 * Restore to old state.
4324 */
4325 if (usedExtendFileC) {
4326 (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
4327 FTOC(fp)->c_fileid, false);
4328 } else {
4329 fp->ff_blocks -= bitmapblks;
4330 fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
4331 /*
4332 * No need to mark the excess blocks free since those bitmap blocks
4333 * are no longer part of the bitmap. But we do need to undo the
4334 * effect of the "vcb->freeBlocks -= bitmapblks" above.
4335 */
4336 vcb->freeBlocks += bitmapblks;
4337 }
4338 vcb->totalBlocks -= addblks;
4339 vcb->freeBlocks -= addblks;
4340 hfsmp->hfs_logical_block_count = prev_phys_block_count;
4341 hfsmp->hfs_alt_id_sector = prev_alt_sector;
4342 MarkVCBDirty(vcb);
4343 if (vcb->blockSize == 512) {
4344 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
4345 hfs_mark_volume_inconsistent(hfsmp);
4346 }
4347 } else {
4348 if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
4349 hfs_mark_volume_inconsistent(hfsmp);
4350 }
4351 }
4352 goto out;
4353 }
4354 /*
4355 * Invalidate the old alternate volume header.
4356 */
4357 bp = NULL;
4358 if (prev_alt_sector) {
4359 if (buf_meta_bread(hfsmp->hfs_devvp,
4360 HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
4361 hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
4362 journal_modify_block_start(hfsmp->jnl, bp);
4363
4364 bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
4365
4366 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4367 } else if (bp) {
4368 buf_brelse(bp);
4369 }
4370 }
4371
4372 /*
4373 * Update the metadata zone size based on current volume size
4374 */
4375 hfs_metadatazone_init(hfsmp, false);
4376
4377 /*
4378 * Adjust the size of hfsmp->hfs_attrdata_vp
4379 */
4380 if (hfsmp->hfs_attrdata_vp) {
4381 struct cnode *attr_cp;
4382 struct filefork *attr_fp;
4383
4384 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4385 attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
4386 attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
4387
4388 attr_cp->c_blocks = newblkcnt;
4389 attr_fp->ff_blocks = newblkcnt;
4390 attr_fp->ff_extents[0].blockCount = newblkcnt;
4391 attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4392 ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
4393 vnode_put(hfsmp->hfs_attrdata_vp);
4394 }
4395 }
4396
4397 /*
4398 * Update the R/B Tree if necessary. Since we don't have to drop the systemfile
4399 * locks in the middle of these operations like we do in the truncate case
4400 * where we have to relocate files, we can only update the red-black tree
4401 * if there were actual changes made to the bitmap. Also, we can't really scan the
4402 * new portion of the bitmap before it has been allocated. The BlockMarkAllocated
4403 * routines are smart enough to avoid the r/b tree if the portion they are manipulating is
4404 * not currently controlled by the tree.
4405 *
4406 * We only update hfsmp->allocLimit if totalBlocks actually increased.
4407 */
4408 if (error == 0) {
4409 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
4410 }
4411
4412 /* Release all locks and sync up journal content before
4413 * checking and extending, if required, the journal
4414 */
4415 if (lockflags) {
4416 hfs_systemfile_unlock(hfsmp, lockflags);
4417 lockflags = 0;
4418 }
4419 if (transaction_begun) {
4420 hfs_end_transaction(hfsmp);
4421 hfs_journal_flush(hfsmp, TRUE);
4422 transaction_begun = 0;
4423 }
4424
4425 /* Increase the journal size, if required. */
4426 error = hfs_extend_journal(hfsmp, sector_size, sector_count, context);
4427 if (error) {
4428 printf ("hfs_extendfs: Could not extend journal size\n");
4429 goto out_noalloc;
4430 }
4431
4432 /* Log successful extending */
4433 printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
4434 hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
4435
4436 out:
4437 if (error && fp) {
4438 /* Restore allocation fork. */
4439 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
4440 VTOC(vp)->c_blocks = fp->ff_blocks;
4441
4442 }
4443
4444 out_noalloc:
4445 hfs_lock_mount (hfsmp);
4446 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4447 hfs_unlock_mount (hfsmp);
4448 if (lockflags) {
4449 hfs_systemfile_unlock(hfsmp, lockflags);
4450 }
4451 if (transaction_begun) {
4452 hfs_end_transaction(hfsmp);
4453 hfs_journal_flush(hfsmp, FALSE);
4454 /* Just to be sure, sync all data to the disk */
4455 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4456 }
4457 if (error) {
4458 printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
4459 }
4460
4461 return MacToVFSError(error);
4462 }
4463
4464 #define HFS_MIN_SIZE (32LL * 1024LL * 1024LL)
4465
4466 /*
4467 * Truncate a file system (while still mounted).
4468 */
4469 int
4470 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
4471 {
4472 struct buf *bp = NULL;
4473 u_int64_t oldsize;
4474 u_int32_t newblkcnt;
4475 u_int32_t reclaimblks = 0;
4476 int lockflags = 0;
4477 int transaction_begun = 0;
4478 Boolean updateFreeBlocks = false;
4479 Boolean disable_sparse = false;
4480 int error = 0;
4481
4482 hfs_lock_mount (hfsmp);
4483 if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
4484 hfs_unlock_mount (hfsmp);
4485 return (EALREADY);
4486 }
4487 hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
4488 hfsmp->hfs_resize_blocksmoved = 0;
4489 hfsmp->hfs_resize_totalblocks = 0;
4490 hfsmp->hfs_resize_progress = 0;
4491 hfs_unlock_mount (hfsmp);
4492
4493 /*
4494 * - Journaled HFS Plus volumes only.
4495 * - No embedded volumes.
4496 */
4497 if ((hfsmp->jnl == NULL) ||
4498 (hfsmp->hfsPlusIOPosOffset != 0)) {
4499 error = EPERM;
4500 goto out;
4501 }
4502 oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
4503 newblkcnt = newsize / hfsmp->blockSize;
4504 reclaimblks = hfsmp->totalBlocks - newblkcnt;
4505
4506 if (hfs_resize_debug) {
4507 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
4508 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
4509 }
4510
4511 /* Make sure new size is valid. */
4512 if ((newsize < HFS_MIN_SIZE) ||
4513 (newsize >= oldsize) ||
4514 (newsize % hfsmp->hfs_logical_block_size) ||
4515 (newsize % hfsmp->hfs_physical_block_size)) {
4516 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
4517 error = EINVAL;
4518 goto out;
4519 }
4520
4521 /*
4522 * Make sure that the file system has enough free blocks reclaim.
4523 *
4524 * Before resize, the disk is divided into four zones -
4525 * A. Allocated_Stationary - These are allocated blocks that exist
4526 * before the new end of disk. These blocks will not be
4527 * relocated or modified during resize.
4528 * B. Free_Stationary - These are free blocks that exist before the
4529 * new end of disk. These blocks can be used for any new
4530 * allocations during resize, including allocation for relocating
4531 * data from the area of disk being reclaimed.
4532 * C. Allocated_To-Reclaim - These are allocated blocks that exist
4533 * beyond the new end of disk. These blocks need to be reclaimed
4534 * during resize by allocating equal number of blocks in Free
4535 * Stationary zone and copying the data.
4536 * D. Free_To-Reclaim - These are free blocks that exist beyond the
4537 * new end of disk. Nothing special needs to be done to reclaim
4538 * them.
4539 *
4540 * Total number of blocks on the disk before resize:
4541 * ------------------------------------------------
4542 * Total Blocks = Allocated_Stationary + Free_Stationary +
4543 * Allocated_To-Reclaim + Free_To-Reclaim
4544 *
4545 * Total number of blocks that need to be reclaimed:
4546 * ------------------------------------------------
4547 * Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
4548 *
4549 * Note that the check below also makes sure that we have enough space
4550 * to relocate data from Allocated_To-Reclaim to Free_Stationary.
4551 * Therefore we do not need to check total number of blocks to relocate
4552 * later in the code.
4553 *
4554 * The condition below gets converted to:
4555 *
4556 * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
4557 *
4558 * which is equivalent to:
4559 *
4560 * Allocated To-Reclaim >= Free Stationary
4561 */
4562 if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
4563 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
4564 error = ENOSPC;
4565 goto out;
4566 }
4567
4568 /* Start with a clean journal. */
4569 hfs_journal_flush(hfsmp, TRUE);
4570
4571 if (hfs_start_transaction(hfsmp) != 0) {
4572 error = EINVAL;
4573 goto out;
4574 }
4575 transaction_begun = 1;
4576
4577 /* Take the bitmap lock to update the alloc limit field */
4578 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4579
4580 /*
4581 * Prevent new allocations from using the part we're trying to truncate.
4582 *
4583 * NOTE: allocLimit is set to the allocation block number where the new
4584 * alternate volume header will be. That way there will be no files to
4585 * interfere with allocating the new alternate volume header, and no files
4586 * in the allocation blocks beyond (i.e. the blocks we're trying to
4587 * truncate away.
4588 *
4589 * Also shrink the red-black tree if needed.
4590 */
4591 if (hfsmp->blockSize == 512) {
4592 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
4593 }
4594 else {
4595 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
4596 }
4597
4598 /* Sparse devices use first fit allocation which is not ideal
4599 * for volume resize which requires best fit allocation. If a
4600 * sparse device is being truncated, disable the sparse device
4601 * property temporarily for the duration of resize. Also reset
4602 * the free extent cache so that it is rebuilt as sorted by
4603 * totalBlocks instead of startBlock.
4604 *
4605 * Note that this will affect all allocations on the volume and
4606 * ideal fix would be just to modify resize-related allocations,
4607 * but it will result in complexity like handling of two free
4608 * extent caches sorted differently, etc. So we stick to this
4609 * solution for now.
4610 */
4611 hfs_lock_mount (hfsmp);
4612 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
4613 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
4614 ResetVCBFreeExtCache(hfsmp);
4615 disable_sparse = true;
4616 }
4617
4618 /*
4619 * Update the volume free block count to reflect the total number
4620 * of free blocks that will exist after a successful resize.
4621 * Relocation of extents will result in no net change in the total
4622 * free space on the disk. Therefore the code that allocates
4623 * space for new extent and deallocates the old extent explicitly
4624 * prevents updating the volume free block count. It will also
4625 * prevent false disk full error when the number of blocks in
4626 * an extent being relocated is more than the free blocks that
4627 * will exist after the volume is resized.
4628 */
4629 hfsmp->freeBlocks -= reclaimblks;
4630 updateFreeBlocks = true;
4631 hfs_unlock_mount(hfsmp);
4632
4633 if (lockflags) {
4634 hfs_systemfile_unlock(hfsmp, lockflags);
4635 lockflags = 0;
4636 }
4637
4638 /*
4639 * Update the metadata zone size to match the new volume size,
4640 * and if it too less, metadata zone might be disabled.
4641 */
4642 hfs_metadatazone_init(hfsmp, false);
4643
4644 /*
4645 * If some files have blocks at or beyond the location of the
4646 * new alternate volume header, recalculate free blocks and
4647 * reclaim blocks. Otherwise just update free blocks count.
4648 *
4649 * The current allocLimit is set to the location of new alternate
4650 * volume header, and reclaimblks are the total number of blocks
4651 * that need to be reclaimed. So the check below is really
4652 * ignoring the blocks allocated for old alternate volume header.
4653 */
4654 if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
4655 /*
4656 * hfs_reclaimspace will use separate transactions when
4657 * relocating files (so we don't overwhelm the journal).
4658 */
4659 hfs_end_transaction(hfsmp);
4660 transaction_begun = 0;
4661
4662 /* Attempt to reclaim some space. */
4663 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
4664 if (error != 0) {
4665 printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
4666 error = ENOSPC;
4667 goto out;
4668 }
4669 if (hfs_start_transaction(hfsmp) != 0) {
4670 error = EINVAL;
4671 goto out;
4672 }
4673 transaction_begun = 1;
4674
4675 /* Check if we're clear now. */
4676 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
4677 if (error != 0) {
4678 printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
4679 error = EAGAIN; /* tell client to try again */
4680 goto out;
4681 }
4682 }
4683
4684 /*
4685 * Note: we take the attributes lock in case we have an attribute data vnode
4686 * which needs to change size.
4687 */
4688 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
4689
4690 /*
4691 * Allocate last 1KB for alternate volume header.
4692 */
4693 error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
4694 if (error) {
4695 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
4696 goto out;
4697 }
4698
4699 /*
4700 * Mark the old alternate volume header as free.
4701 * We don't bother shrinking allocation bitmap file.
4702 */
4703 if (hfsmp->blockSize == 512)
4704 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
4705 else
4706 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
4707
4708 /*
4709 * Invalidate the existing alternate volume header.
4710 *
4711 * Don't include this in a transaction (don't call journal_modify_block)
4712 * since this block will be outside of the truncated file system!
4713 */
4714 if (hfsmp->hfs_alt_id_sector) {
4715 error = buf_meta_bread(hfsmp->hfs_devvp,
4716 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
4717 hfsmp->hfs_physical_block_size, NOCRED, &bp);
4718 if (error == 0) {
4719 bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
4720 (void) VNOP_BWRITE(bp);
4721 } else {
4722 if (bp) {
4723 buf_brelse(bp);
4724 }
4725 }
4726 bp = NULL;
4727 }
4728
4729 /* Log successful shrinking. */
4730 printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
4731 hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
4732
4733 /*
4734 * Adjust file system variables and flush them to disk.
4735 */
4736 hfsmp->totalBlocks = newblkcnt;
4737 hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
4738 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
4739
4740 /*
4741 * Note that although the logical block size is updated here, it is only done for
4742 * the benefit of the partition management software. The logical block count change
4743 * has not yet actually been propagated to the disk device yet.
4744 */
4745
4746 hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
4747 MarkVCBDirty(hfsmp);
4748 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4749 if (error)
4750 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
4751
4752 /*
4753 * Adjust the size of hfsmp->hfs_attrdata_vp
4754 */
4755 if (hfsmp->hfs_attrdata_vp) {
4756 struct cnode *cp;
4757 struct filefork *fp;
4758
4759 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
4760 cp = VTOC(hfsmp->hfs_attrdata_vp);
4761 fp = VTOF(hfsmp->hfs_attrdata_vp);
4762
4763 cp->c_blocks = newblkcnt;
4764 fp->ff_blocks = newblkcnt;
4765 fp->ff_extents[0].blockCount = newblkcnt;
4766 fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
4767 ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
4768 vnode_put(hfsmp->hfs_attrdata_vp);
4769 }
4770 }
4771
4772 out:
4773 /*
4774 * Update the allocLimit to acknowledge the last one or two blocks now.
4775 * Add it to the tree as well if necessary.
4776 */
4777 UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
4778
4779 hfs_lock_mount (hfsmp);
4780 if (disable_sparse == true) {
4781 /* Now that resize is completed, set the volume to be sparse
4782 * device again so that all further allocations will be first
4783 * fit instead of best fit. Reset free extent cache so that
4784 * it is rebuilt.
4785 */
4786 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
4787 ResetVCBFreeExtCache(hfsmp);
4788 }
4789
4790 if (error && (updateFreeBlocks == true)) {
4791 hfsmp->freeBlocks += reclaimblks;
4792 }
4793
4794 if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
4795 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
4796 }
4797 hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
4798 hfs_unlock_mount (hfsmp);
4799
4800 /* On error, reset the metadata zone for original volume size */
4801 if (error && (updateFreeBlocks == true)) {
4802 hfs_metadatazone_init(hfsmp, false);
4803 }
4804
4805 if (lockflags) {
4806 hfs_systemfile_unlock(hfsmp, lockflags);
4807 }
4808 if (transaction_begun) {
4809 hfs_end_transaction(hfsmp);
4810 hfs_journal_flush(hfsmp, FALSE);
4811 /* Just to be sure, sync all data to the disk */
4812 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
4813 }
4814
4815 if (error) {
4816 printf ("hfs_truncatefs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
4817 }
4818
4819 return MacToVFSError(error);
4820 }
4821
4822
4823 /*
4824 * Invalidate the physical block numbers associated with buffer cache blocks
4825 * in the given extent of the given vnode.
4826 */
4827 struct hfs_inval_blk_no {
4828 daddr64_t sectorStart;
4829 daddr64_t sectorCount;
4830 };
4831 static int
4832 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
4833 {
4834 daddr64_t blkno;
4835 struct hfs_inval_blk_no *args;
4836
4837 blkno = buf_blkno(bp);
4838 args = args_in;
4839
4840 if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
4841 buf_setblkno(bp, buf_lblkno(bp));
4842
4843 return BUF_RETURNED;
4844 }
4845 static void
4846 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
4847 {
4848 struct hfs_inval_blk_no args;
4849 args.sectorStart = sectorStart;
4850 args.sectorCount = sectorCount;
4851
4852 buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
4853 }
4854
4855
4856 /*
4857 * Copy the contents of an extent to a new location. Also invalidates the
4858 * physical block number of any buffer cache block in the copied extent
4859 * (so that if the block is written, it will go through VNOP_BLOCKMAP to
4860 * determine the new physical block number).
4861 *
4862 * At this point, for regular files, we hold the truncate lock exclusive
4863 * and the cnode lock exclusive.
4864 */
4865 static int
4866 hfs_copy_extent(
4867 struct hfsmount *hfsmp,
4868 struct vnode *vp, /* The file whose extent is being copied. */
4869 u_int32_t oldStart, /* The start of the source extent. */
4870 u_int32_t newStart, /* The start of the destination extent. */
4871 u_int32_t blockCount, /* The number of allocation blocks to copy. */
4872 vfs_context_t context)
4873 {
4874 int err = 0;
4875 size_t bufferSize;
4876 void *buffer = NULL;
4877 struct vfsioattr ioattr;
4878 buf_t bp = NULL;
4879 off_t resid;
4880 size_t ioSize;
4881 u_int32_t ioSizeSectors; /* Device sectors in this I/O */
4882 daddr64_t srcSector, destSector;
4883 u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4884 #if CONFIG_PROTECT
4885 int cpenabled = 0;
4886 #endif
4887
4888 /*
4889 * Sanity check that we have locked the vnode of the file we're copying.
4890 *
4891 * But since hfs_systemfile_lock() doesn't actually take the lock on
4892 * the allocation file if a journal is active, ignore the check if the
4893 * file being copied is the allocation file.
4894 */
4895 struct cnode *cp = VTOC(vp);
4896 if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
4897 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
4898
4899 #if CONFIG_PROTECT
4900 /*
4901 * Prepare the CP blob and get it ready for use, if necessary.
4902 *
4903 * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs),
4904 * because they are implicitly protected via the media key on iOS. As such, they
4905 * must not be relocated except with the media key. So it is OK to not pass down
4906 * a special cpentry to the IOMedia/LwVM code for handling.
4907 */
4908 if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) {
4909 int cp_err = 0;
4910 /*
4911 * Ideally, the file whose extents we are about to manipulate is using the
4912 * newer offset-based IVs so that we can manipulate it regardless of the
4913 * current lock state. However, we must maintain support for older-style
4914 * EAs.
4915 *
4916 * For the older EA case, the IV was tied to the device LBA for file content.
4917 * This means that encrypted data cannot be moved from one location to another
4918 * in the filesystem without garbling the IV data. As a result, we need to
4919 * access the file's plaintext because we cannot do our AES-symmetry trick
4920 * here. This requires that we attempt a key-unwrap here (via cp_handle_relocate)
4921 * to make forward progress. If the keys are unavailable then we will
4922 * simply stop the resize in its tracks here since we cannot move
4923 * this extent at this time.
4924 */
4925 if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) {
4926 cp_err = cp_handle_relocate(cp, hfsmp);
4927 }
4928
4929 if (cp_err) {
4930 printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err);
4931 return cp_err;
4932 }
4933
4934 cpenabled = 1;
4935 }
4936 #endif
4937
4938
4939 /*
4940 * Determine the I/O size to use
4941 *
4942 * NOTE: Many external drives will result in an ioSize of 128KB.
4943 * TODO: Should we use a larger buffer, doing several consecutive
4944 * reads, then several consecutive writes?
4945 */
4946 vfs_ioattr(hfsmp->hfs_mp, &ioattr);
4947 bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
4948 if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
4949 return ENOMEM;
4950
4951 /* Get a buffer for doing the I/O */
4952 bp = buf_alloc(hfsmp->hfs_devvp);
4953 buf_setdataptr(bp, (uintptr_t)buffer);
4954
4955 resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
4956 srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4957 destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
4958 while (resid > 0) {
4959 ioSize = MIN(bufferSize, (size_t) resid);
4960 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
4961
4962 /* Prepare the buffer for reading */
4963 buf_reset(bp, B_READ);
4964 buf_setsize(bp, ioSize);
4965 buf_setcount(bp, ioSize);
4966 buf_setblkno(bp, srcSector);
4967 buf_setlblkno(bp, srcSector);
4968
4969 /*
4970 * Note that because this is an I/O to the device vp
4971 * it is correct to have lblkno and blkno both point to the
4972 * start sector being read from. If it were being issued against the
4973 * underlying file then that would be different.
4974 */
4975
4976 /* Attach the new CP blob to the buffer if needed */
4977 #if CONFIG_PROTECT
4978 if (cpenabled) {
4979 if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
4980 /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
4981 cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
4982 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
4983 }
4984 else {
4985 /*
4986 * Use the cnode's cp key. This file is tied to the
4987 * LBAs of the physical blocks that it occupies.
4988 */
4989 buf_setcpaddr (bp, cp->c_cpentry);
4990 }
4991
4992 /* Initialize the content protection file offset to start at 0 */
4993 buf_setcpoff (bp, 0);
4994 }
4995 #endif
4996
4997 /* Do the read */
4998 err = VNOP_STRATEGY(bp);
4999 if (!err)
5000 err = buf_biowait(bp);
5001 if (err) {
5002 #if CONFIG_PROTECT
5003 /* Turn the flag off in error cases. */
5004 if (cpenabled) {
5005 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
5006 }
5007 #endif
5008 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
5009 break;
5010 }
5011
5012 /* Prepare the buffer for writing */
5013 buf_reset(bp, B_WRITE);
5014 buf_setsize(bp, ioSize);
5015 buf_setcount(bp, ioSize);
5016 buf_setblkno(bp, destSector);
5017 buf_setlblkno(bp, destSector);
5018 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
5019 buf_markfua(bp);
5020
5021 #if CONFIG_PROTECT
5022 /* Attach the CP to the buffer if needed */
5023 if (cpenabled) {
5024 if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
5025 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
5026 }
5027 else {
5028 /*
5029 * Use the cnode's CP key. This file is still tied
5030 * to the LBAs of the physical blocks that it occupies.
5031 */
5032 buf_setcpaddr (bp, cp->c_cpentry);
5033 }
5034 /*
5035 * The last STRATEGY call may have updated the cp file offset behind our
5036 * back, so we cannot trust it. Re-initialize the content protection
5037 * file offset back to 0 before initiating the write portion of this I/O.
5038 */
5039 buf_setcpoff (bp, 0);
5040 }
5041 #endif
5042
5043 /* Do the write */
5044 vnode_startwrite(hfsmp->hfs_devvp);
5045 err = VNOP_STRATEGY(bp);
5046 if (!err) {
5047 err = buf_biowait(bp);
5048 }
5049 #if CONFIG_PROTECT
5050 /* Turn the flag off regardless once the strategy call finishes. */
5051 if (cpenabled) {
5052 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
5053 }
5054 #endif
5055 if (err) {
5056 printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
5057 break;
5058 }
5059
5060 resid -= ioSize;
5061 srcSector += ioSizeSectors;
5062 destSector += ioSizeSectors;
5063 }
5064 if (bp)
5065 buf_free(bp);
5066 if (buffer)
5067 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
5068
5069 /* Make sure all writes have been flushed to disk. */
5070 if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
5071 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
5072 if (err) {
5073 printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
5074 err = 0; /* Don't fail the copy. */
5075 }
5076 }
5077
5078 if (!err)
5079 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
5080
5081 return err;
5082 }
5083
5084
5085 /* Structure to store state of reclaiming extents from a
5086 * given file. hfs_reclaim_file()/hfs_reclaim_xattr()
5087 * initializes the values in this structure which are then
5088 * used by code that reclaims and splits the extents.
5089 */
5090 struct hfs_reclaim_extent_info {
5091 struct vnode *vp;
5092 u_int32_t fileID;
5093 u_int8_t forkType;
5094 u_int8_t is_dirlink; /* Extent belongs to directory hard link */
5095 u_int8_t is_sysfile; /* Extent belongs to system file */
5096 u_int8_t is_xattr; /* Extent belongs to extent-based xattr */
5097 u_int8_t extent_index;
5098 int lockflags; /* Locks that reclaim and split code should grab before modifying the extent record */
5099 u_int32_t blocks_relocated; /* Total blocks relocated for this file till now */
5100 u_int32_t recStartBlock; /* File allocation block number (FABN) for current extent record */
5101 u_int32_t cur_blockCount; /* Number of allocation blocks that have been checked for reclaim */
5102 struct filefork *catalog_fp; /* If non-NULL, extent is from catalog record */
5103 union record {
5104 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
5105 HFSPlusAttrRecord xattr; /* Attribute record for large EAs */
5106 } record;
5107 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being processed.
5108 * For catalog extent record, points to the correct
5109 * extent information in filefork. For overflow extent
5110 * record, or xattr record, points to extent record
5111 * in the structure above
5112 */
5113 struct cat_desc *dirlink_desc;
5114 struct cat_attr *dirlink_attr;
5115 struct filefork *dirlink_fork; /* For directory hard links, fp points actually to this */
5116 struct BTreeIterator *iterator; /* Shared read/write iterator, hfs_reclaim_file/xattr()
5117 * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
5118 * use it for writing updated extent record
5119 */
5120 struct FSBufferDescriptor btdata; /* Shared btdata for reading/writing extent record, same as iterator above */
5121 u_int16_t recordlen;
5122 int overflow_count; /* For debugging, counter for overflow extent record */
5123 FCB *fcb; /* Pointer to the current btree being traversed */
5124 };
5125
5126 /*
5127 * Split the current extent into two extents, with first extent
5128 * to contain given number of allocation blocks. Splitting of
5129 * extent creates one new extent entry which can result in
5130 * shifting of many entries through all the extent records of a
5131 * file, and/or creating a new extent record in the overflow
5132 * extent btree.
5133 *
5134 * Example:
5135 * The diagram below represents two consecutive extent records,
5136 * for simplicity, lets call them record X and X+1 respectively.
5137 * Interesting extent entries have been denoted by letters.
5138 * If the letter is unchanged before and after split, it means
5139 * that the extent entry was not modified during the split.
5140 * A '.' means that the entry remains unchanged after the split
5141 * and is not relevant for our example. A '0' means that the
5142 * extent entry is empty.
5143 *
5144 * If there isn't sufficient contiguous free space to relocate
5145 * an extent (extent "C" below), we will have to break the one
5146 * extent into multiple smaller extents, and relocate each of
5147 * the smaller extents individually. The way we do this is by
5148 * finding the largest contiguous free space that is currently
5149 * available (N allocation blocks), and then convert extent "C"
5150 * into two extents, C1 and C2, that occupy exactly the same
5151 * allocation blocks as extent C. Extent C1 is the first
5152 * N allocation blocks of extent C, and extent C2 is the remainder
5153 * of extent C. Then we can relocate extent C1 since we know
5154 * we have enough contiguous free space to relocate it in its
5155 * entirety. We then repeat the process starting with extent C2.
5156 *
5157 * In record X, only the entries following entry C are shifted, and
5158 * the original entry C is replaced with two entries C1 and C2 which
5159 * are actually two extent entries for contiguous allocation blocks.
5160 *
5161 * Note that the entry E from record X is shifted into record X+1 as
5162 * the new first entry. Since the first entry of record X+1 is updated,
5163 * the FABN will also get updated with the blockCount of entry E.
5164 * This also results in shifting of all extent entries in record X+1.
5165 * Note that the number of empty entries after the split has been
5166 * changed from 3 to 2.
5167 *
5168 * Before:
5169 * record X record X+1
5170 * ---------------------===--------- ---------------------------------
5171 * | A | . | . | . | B | C | D | E | | F | . | . | . | G | 0 | 0 | 0 |
5172 * ---------------------===--------- ---------------------------------
5173 *
5174 * After:
5175 * ---------------------=======----- ---------------------------------
5176 * | A | . | . | . | B | C1| C2| D | | E | F | . | . | . | G | 0 | 0 |
5177 * ---------------------=======----- ---------------------------------
5178 *
5179 * C1.startBlock = C.startBlock
5180 * C1.blockCount = N
5181 *
5182 * C2.startBlock = C.startBlock + N
5183 * C2.blockCount = C.blockCount - N
5184 *
5185 * FABN = old FABN - E.blockCount
5186 *
5187 * Inputs:
5188 * extent_info - This is the structure that contains state about
5189 * the current file, extent, and extent record that
5190 * is being relocated. This structure is shared
5191 * among code that traverses through all the extents
5192 * of the file, code that relocates extents, and
5193 * code that splits the extent.
5194 * newBlockCount - The blockCount of the extent to be split after
5195 * successfully split operation.
5196 * Output:
5197 * Zero on success, non-zero on failure.
5198 */
5199 static int
5200 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
5201 {
5202 int error = 0;
5203 int index = extent_info->extent_index;
5204 int i;
5205 HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */
5206 HFSPlusExtentDescriptor last_extent;
5207 HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
5208 HFSPlusExtentRecord *extents_rec = NULL;
5209 HFSPlusExtentKey *extents_key = NULL;
5210 HFSPlusAttrRecord *xattr_rec = NULL;
5211 HFSPlusAttrKey *xattr_key = NULL;
5212 struct BTreeIterator iterator;
5213 struct FSBufferDescriptor btdata;
5214 uint16_t reclen;
5215 uint32_t read_recStartBlock; /* Starting allocation block number to read old extent record */
5216 uint32_t write_recStartBlock; /* Starting allocation block number to insert newly updated extent record */
5217 Boolean create_record = false;
5218 Boolean is_xattr;
5219 struct cnode *cp;
5220
5221 is_xattr = extent_info->is_xattr;
5222 extents = extent_info->extents;
5223 cp = VTOC(extent_info->vp);
5224
5225 if (newBlockCount == 0) {
5226 if (hfs_resize_debug) {
5227 printf ("hfs_split_extent: No splitting required for newBlockCount=0\n");
5228 }
5229 return error;
5230 }
5231
5232 if (hfs_resize_debug) {
5233 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
5234 }
5235
5236 /* Extents overflow btree can not have more than 8 extents.
5237 * No split allowed if the 8th extent is already used.
5238 */
5239 if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) {
5240 printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n");
5241 error = ENOSPC;
5242 goto out;
5243 }
5244
5245 /* Determine the starting allocation block number for the following
5246 * overflow extent record, if any, before the current record
5247 * gets modified.
5248 */
5249 read_recStartBlock = extent_info->recStartBlock;
5250 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5251 if (extents[i].blockCount == 0) {
5252 break;
5253 }
5254 read_recStartBlock += extents[i].blockCount;
5255 }
5256
5257 /* Shift and split */
5258 if (index == kHFSPlusExtentDensity-1) {
5259 /* The new extent created after split will go into following overflow extent record */
5260 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
5261 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
5262
5263 /* Last extent in the record will be split, so nothing to shift */
5264 } else {
5265 /* Splitting of extents can result in at most of one
5266 * extent entry to be shifted into following overflow extent
5267 * record. So, store the last extent entry for later.
5268 */
5269 shift_extent = extents[kHFSPlusExtentDensity-1];
5270 if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) {
5271 printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount);
5272 }
5273
5274 /* Start shifting extent information from the end of the extent
5275 * record to the index where we want to insert the new extent.
5276 * Note that kHFSPlusExtentDensity-1 is already saved above, and
5277 * does not need to be shifted. The extent entry that is being
5278 * split does not get shifted.
5279 */
5280 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
5281 if (hfs_resize_debug) {
5282 if (extents[i].blockCount) {
5283 printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
5284 }
5285 }
5286 extents[i+1] = extents[i];
5287 }
5288 }
5289
5290 if (index == kHFSPlusExtentDensity-1) {
5291 /* The second half of the extent being split will be the overflow
5292 * entry that will go into following overflow extent record. The
5293 * value has been stored in 'shift_extent' above, so there is
5294 * nothing to be done here.
5295 */
5296 } else {
5297 /* Update the values in the second half of the extent being split
5298 * before updating the first half of the split. Note that the
5299 * extent to split or first half of the split is at index 'index'
5300 * and a new extent or second half of the split will be inserted at
5301 * 'index+1' or into following overflow extent record.
5302 */
5303 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
5304 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
5305 }
5306 /* Update the extent being split, only the block count will change */
5307 extents[index].blockCount = newBlockCount;
5308
5309 if (hfs_resize_debug) {
5310 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
5311 if (index != kHFSPlusExtentDensity-1) {
5312 printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
5313 } else {
5314 printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
5315 }
5316 }
5317
5318 /* Write out information about the newly split extent to the disk */
5319 if (extent_info->catalog_fp) {
5320 /* (extent_info->catalog_fp != NULL) means the newly split
5321 * extent exists in the catalog record. This means that
5322 * the cnode was updated. Therefore, to write out the changes,
5323 * mark the cnode as modified. We cannot call hfs_update()
5324 * in this function because the caller hfs_reclaim_extent()
5325 * is holding the catalog lock currently.
5326 */
5327 cp->c_flag |= C_MODIFIED;
5328 } else {
5329 /* The newly split extent is for large EAs or is in overflow
5330 * extent record, so update it directly in the btree using the
5331 * iterator information from the shared extent_info structure
5332 */
5333 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5334 &(extent_info->btdata), extent_info->recordlen);
5335 if (error) {
5336 printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
5337 goto out;
5338 }
5339 }
5340
5341 /* No extent entry to be shifted into another extent overflow record */
5342 if (shift_extent.blockCount == 0) {
5343 if (hfs_resize_debug) {
5344 printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
5345 }
5346 error = 0;
5347 goto out;
5348 }
5349
5350 /* The overflow extent entry has to be shifted into an extent
5351 * overflow record. This means that we might have to shift
5352 * extent entries from all subsequent overflow records by one.
5353 * We start iteration from the first record to the last record,
5354 * and shift the extent entry from one record to another.
5355 * We might have to create a new extent record for the last
5356 * extent entry for the file.
5357 */
5358
5359 /* Initialize iterator to search the next record */
5360 bzero(&iterator, sizeof(iterator));
5361 if (is_xattr) {
5362 /* Copy the key from the iterator that was used to update the modified attribute record. */
5363 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
5364 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
5365 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
5366
5367 MALLOC(xattr_rec, HFSPlusAttrRecord *,
5368 sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
5369 if (xattr_rec == NULL) {
5370 error = ENOMEM;
5371 goto out;
5372 }
5373 btdata.bufferAddress = xattr_rec;
5374 btdata.itemSize = sizeof(HFSPlusAttrRecord);
5375 btdata.itemCount = 1;
5376 extents = xattr_rec->overflowExtents.extents;
5377 } else {
5378 /* Initialize the extent key for the current file */
5379 extents_key = (HFSPlusExtentKey *) &(iterator.key);
5380 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5381 extents_key->forkType = extent_info->forkType;
5382 extents_key->fileID = extent_info->fileID;
5383 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
5384
5385 MALLOC(extents_rec, HFSPlusExtentRecord *,
5386 sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
5387 if (extents_rec == NULL) {
5388 error = ENOMEM;
5389 goto out;
5390 }
5391 btdata.bufferAddress = extents_rec;
5392 btdata.itemSize = sizeof(HFSPlusExtentRecord);
5393 btdata.itemCount = 1;
5394 extents = extents_rec[0];
5395 }
5396
5397 /* The overflow extent entry has to be shifted into an extent
5398 * overflow record. This means that we might have to shift
5399 * extent entries from all subsequent overflow records by one.
5400 * We start iteration from the first record to the last record,
5401 * examine one extent record in each iteration and shift one
5402 * extent entry from one record to another. We might have to
5403 * create a new extent record for the last extent entry for the
5404 * file.
5405 *
5406 * If shift_extent.blockCount is non-zero, it means that there is
5407 * an extent entry that needs to be shifted into the next
5408 * overflow extent record. We keep on going till there are no such
5409 * entries left to be shifted. This will also change the starting
5410 * allocation block number of the extent record which is part of
5411 * the key for the extent record in each iteration. Note that
5412 * because the extent record key is changing while we are searching,
5413 * the record can not be updated directly, instead it has to be
5414 * deleted and inserted again.
5415 */
5416 while (shift_extent.blockCount) {
5417 if (hfs_resize_debug) {
5418 printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
5419 }
5420
5421 /* Search if there is any existing overflow extent record
5422 * that matches the current file and the logical start block
5423 * number.
5424 *
5425 * For this, the logical start block number in the key is
5426 * the value calculated based on the logical start block
5427 * number of the current extent record and the total number
5428 * of blocks existing in the current extent record.
5429 */
5430 if (is_xattr) {
5431 xattr_key->startBlock = read_recStartBlock;
5432 } else {
5433 extents_key->startBlock = read_recStartBlock;
5434 }
5435 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
5436 if (error) {
5437 if (error != btNotFound) {
5438 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5439 goto out;
5440 }
5441 /* No matching record was found, so create a new extent record.
5442 * Note: Since no record was found, we can't rely on the
5443 * btree key in the iterator any longer. This will be initialized
5444 * later before we insert the record.
5445 */
5446 create_record = true;
5447 }
5448
5449 /* The extra extent entry from the previous record is being inserted
5450 * as the first entry in the current extent record. This will change
5451 * the file allocation block number (FABN) of the current extent
5452 * record, which is the startBlock value from the extent record key.
5453 * Since one extra entry is being inserted in the record, the new
5454 * FABN for the record will less than old FABN by the number of blocks
5455 * in the new extent entry being inserted at the start. We have to
5456 * do this before we update read_recStartBlock to point at the
5457 * startBlock of the following record.
5458 */
5459 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
5460 if (hfs_resize_debug) {
5461 if (create_record) {
5462 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
5463 }
5464 }
5465
5466 /* Now update the read_recStartBlock to account for total number
5467 * of blocks in this extent record. It will now point to the
5468 * starting allocation block number for the next extent record.
5469 */
5470 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5471 if (extents[i].blockCount == 0) {
5472 break;
5473 }
5474 read_recStartBlock += extents[i].blockCount;
5475 }
5476
5477 if (create_record == true) {
5478 /* Initialize new record content with only one extent entry */
5479 bzero(extents, sizeof(HFSPlusExtentRecord));
5480 /* The new record will contain only one extent entry */
5481 extents[0] = shift_extent;
5482 /* There are no more overflow extents to be shifted */
5483 shift_extent.startBlock = shift_extent.blockCount = 0;
5484
5485 if (is_xattr) {
5486 /* BTSearchRecord above returned btNotFound,
5487 * but since the attribute btree is never empty
5488 * if we are trying to insert new overflow
5489 * record for the xattrs, the extents_key will
5490 * contain correct data. So we don't need to
5491 * re-initialize it again like below.
5492 */
5493
5494 /* Initialize the new xattr record */
5495 xattr_rec->recordType = kHFSPlusAttrExtents;
5496 xattr_rec->overflowExtents.reserved = 0;
5497 reclen = sizeof(HFSPlusAttrExtents);
5498 } else {
5499 /* BTSearchRecord above returned btNotFound,
5500 * which means that extents_key content might
5501 * not correspond to the record that we are
5502 * trying to create, especially when the extents
5503 * overflow btree is empty. So we reinitialize
5504 * the extents_key again always.
5505 */
5506 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
5507 extents_key->forkType = extent_info->forkType;
5508 extents_key->fileID = extent_info->fileID;
5509
5510 /* Initialize the new extent record */
5511 reclen = sizeof(HFSPlusExtentRecord);
5512 }
5513 } else {
5514 /* The overflow extent entry from previous record will be
5515 * the first entry in this extent record. If the last
5516 * extent entry in this record is valid, it will be shifted
5517 * into the following extent record as its first entry. So
5518 * save the last entry before shifting entries in current
5519 * record.
5520 */
5521 last_extent = extents[kHFSPlusExtentDensity-1];
5522
5523 /* Shift all entries by one index towards the end */
5524 for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
5525 extents[i+1] = extents[i];
5526 }
5527
5528 /* Overflow extent entry saved from previous record
5529 * is now the first entry in the current record.
5530 */
5531 extents[0] = shift_extent;
5532
5533 if (hfs_resize_debug) {
5534 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
5535 }
5536
5537 /* The last entry from current record will be the
5538 * overflow entry which will be the first entry for
5539 * the following extent record.
5540 */
5541 shift_extent = last_extent;
5542
5543 /* Since the key->startBlock is being changed for this record,
5544 * it should be deleted and inserted with the new key.
5545 */
5546 error = BTDeleteRecord(extent_info->fcb, &iterator);
5547 if (error) {
5548 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
5549 goto out;
5550 }
5551 if (hfs_resize_debug) {
5552 printf ("hfs_split_extent: Deleted extent record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
5553 }
5554 }
5555
5556 /* Insert the newly created or modified extent record */
5557 bzero(&iterator.hint, sizeof(iterator.hint));
5558 if (is_xattr) {
5559 xattr_key->startBlock = write_recStartBlock;
5560 } else {
5561 extents_key->startBlock = write_recStartBlock;
5562 }
5563 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
5564 if (error) {
5565 printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
5566 goto out;
5567 }
5568 if (hfs_resize_debug) {
5569 printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
5570 }
5571 }
5572
5573 out:
5574 /*
5575 * Extents overflow btree or attributes btree headers might have
5576 * been modified during the split/shift operation, so flush the
5577 * changes to the disk while we are inside journal transaction.
5578 * We should only be able to generate I/O that modifies the B-Tree
5579 * header nodes while we're in the middle of a journal transaction.
5580 * Otherwise it might result in panic during unmount.
5581 */
5582 BTFlushPath(extent_info->fcb);
5583
5584 if (extents_rec) {
5585 FREE (extents_rec, M_TEMP);
5586 }
5587 if (xattr_rec) {
5588 FREE (xattr_rec, M_TEMP);
5589 }
5590 return error;
5591 }
5592
5593
5594 /*
5595 * Relocate an extent if it lies beyond the expected end of volume.
5596 *
5597 * This function is called for every extent of the file being relocated.
5598 * It allocates space for relocation, copies the data, deallocates
5599 * the old extent, and update corresponding on-disk extent. If the function
5600 * does not find contiguous space to relocate an extent, it splits the
5601 * extent in smaller size to be able to relocate it out of the area of
5602 * disk being reclaimed. As an optimization, if an extent lies partially
5603 * in the area of the disk being reclaimed, it is split so that we only
5604 * have to relocate the area that was overlapping with the area of disk
5605 * being reclaimed.
5606 *
5607 * Note that every extent is relocated in its own transaction so that
5608 * they do not overwhelm the journal. This function handles the extent
5609 * record that exists in the catalog record, extent record from overflow
5610 * extents btree, and extents for large EAs.
5611 *
5612 * Inputs:
5613 * extent_info - This is the structure that contains state about
5614 * the current file, extent, and extent record that
5615 * is being relocated. This structure is shared
5616 * among code that traverses through all the extents
5617 * of the file, code that relocates extents, and
5618 * code that splits the extent.
5619 */
5620 static int
5621 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
5622 {
5623 int error = 0;
5624 int index;
5625 struct cnode *cp;
5626 u_int32_t oldStartBlock;
5627 u_int32_t oldBlockCount;
5628 u_int32_t newStartBlock;
5629 u_int32_t newBlockCount;
5630 u_int32_t roundedBlockCount;
5631 uint16_t node_size;
5632 uint32_t remainder_blocks;
5633 u_int32_t alloc_flags;
5634 int blocks_allocated = false;
5635
5636 index = extent_info->extent_index;
5637 cp = VTOC(extent_info->vp);
5638
5639 oldStartBlock = extent_info->extents[index].startBlock;
5640 oldBlockCount = extent_info->extents[index].blockCount;
5641
5642 if (0 && hfs_resize_debug) {
5643 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
5644 }
5645
5646 /* If the current extent lies completely within allocLimit,
5647 * it does not require any relocation.
5648 */
5649 if ((oldStartBlock + oldBlockCount) <= allocLimit) {
5650 extent_info->cur_blockCount += oldBlockCount;
5651 return error;
5652 }
5653
5654 /* Every extent should be relocated in its own transaction
5655 * to make sure that we don't overflow the journal buffer.
5656 */
5657 error = hfs_start_transaction(hfsmp);
5658 if (error) {
5659 return error;
5660 }
5661 extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
5662
5663 /* Check if the extent lies partially in the area to reclaim,
5664 * i.e. it starts before allocLimit and ends beyond allocLimit.
5665 * We have already skipped extents that lie completely within
5666 * allocLimit in the check above, so we only check for the
5667 * startBlock. If it lies partially, split it so that we
5668 * only relocate part of the extent.
5669 */
5670 if (oldStartBlock < allocLimit) {
5671 newBlockCount = allocLimit - oldStartBlock;
5672
5673 if (hfs_resize_debug) {
5674 int idx = extent_info->extent_index;
5675 printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
5676 }
5677
5678 /* If the extent belongs to a btree, check and trim
5679 * it to be multiple of the node size.
5680 */
5681 if (extent_info->is_sysfile) {
5682 node_size = get_btree_nodesize(extent_info->vp);
5683 /* If the btree node size is less than the block size,
5684 * splitting this extent will not split a node across
5685 * different extents. So we only check and trim if
5686 * node size is more than the allocation block size.
5687 */
5688 if (node_size > hfsmp->blockSize) {
5689 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5690 if (remainder_blocks) {
5691 newBlockCount -= remainder_blocks;
5692 if (hfs_resize_debug) {
5693 printf ("hfs_reclaim_extent: Round-down newBlockCount to be multiple of nodeSize, node_allocblks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5694 }
5695 }
5696 }
5697 /* The newBlockCount is zero because of rounding-down so that
5698 * btree nodes are not split across extents. Therefore this
5699 * straddling extent across resize-boundary does not require
5700 * splitting. Skip over to relocating of complete extent.
5701 */
5702 if (newBlockCount == 0) {
5703 if (hfs_resize_debug) {
5704 printf ("hfs_reclaim_extent: After round-down newBlockCount=0, skip split, relocate full extent\n");
5705 }
5706 goto relocate_full_extent;
5707 }
5708 }
5709
5710 /* Split the extents into two parts --- the first extent lies
5711 * completely within allocLimit and therefore does not require
5712 * relocation. The second extent will require relocation which
5713 * will be handled when the caller calls this function again
5714 * for the next extent.
5715 */
5716 error = hfs_split_extent(extent_info, newBlockCount);
5717 if (error == 0) {
5718 /* Split success, no relocation required */
5719 goto out;
5720 }
5721 /* Split failed, so try to relocate entire extent */
5722 if (hfs_resize_debug) {
5723 int idx = extent_info->extent_index;
5724 printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks failed, relocate full extent\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
5725 }
5726 }
5727
5728 relocate_full_extent:
5729 /* At this point, the current extent requires relocation.
5730 * We will try to allocate space equal to the size of the extent
5731 * being relocated first to try to relocate it without splitting.
5732 * If the allocation fails, we will try to allocate contiguous
5733 * blocks out of metadata zone. If that allocation also fails,
5734 * then we will take a whatever contiguous block run is returned
5735 * by the allocation, split the extent into two parts, and then
5736 * relocate the first splitted extent.
5737 */
5738 alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
5739 if (extent_info->is_sysfile) {
5740 alloc_flags |= HFS_ALLOC_METAZONE;
5741 }
5742
5743 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
5744 &newStartBlock, &newBlockCount);
5745 if ((extent_info->is_sysfile == false) &&
5746 ((error == dskFulErr) || (error == ENOSPC))) {
5747 /* For non-system files, try reallocating space in metadata zone */
5748 alloc_flags |= HFS_ALLOC_METAZONE;
5749 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5750 alloc_flags, &newStartBlock, &newBlockCount);
5751 }
5752 if ((error == dskFulErr) || (error == ENOSPC)) {
5753 /* We did not find desired contiguous space for this extent.
5754 * So don't worry about getting contiguity anymore. Also, allow using
5755 * blocks that were recently deallocated.
5756 */
5757 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
5758 alloc_flags |= HFS_ALLOC_FLUSHTXN;
5759
5760 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
5761 alloc_flags, &newStartBlock, &newBlockCount);
5762 if (error) {
5763 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5764 goto out;
5765 }
5766 blocks_allocated = true;
5767
5768 /* The number of blocks allocated is less than the requested
5769 * number of blocks. For btree extents, check and trim the
5770 * extent to be multiple of the node size.
5771 */
5772 if (extent_info->is_sysfile) {
5773 node_size = get_btree_nodesize(extent_info->vp);
5774 if (node_size > hfsmp->blockSize) {
5775 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
5776 if (remainder_blocks) {
5777 roundedBlockCount = newBlockCount - remainder_blocks;
5778 /* Free tail-end blocks of the newly allocated extent */
5779 BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount,
5780 newBlockCount - roundedBlockCount,
5781 HFS_ALLOC_SKIPFREEBLKS);
5782 newBlockCount = roundedBlockCount;
5783 if (hfs_resize_debug) {
5784 printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
5785 }
5786 if (newBlockCount == 0) {
5787 printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID);
5788 error = ENOSPC;
5789 goto out;
5790 }
5791 }
5792 }
5793 }
5794
5795 /* The number of blocks allocated is less than the number of
5796 * blocks requested, so split this extent --- the first extent
5797 * will be relocated as part of this function call and the caller
5798 * will handle relocating the second extent by calling this
5799 * function again for the second extent.
5800 */
5801 error = hfs_split_extent(extent_info, newBlockCount);
5802 if (error) {
5803 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5804 goto out;
5805 }
5806 oldBlockCount = newBlockCount;
5807 }
5808 if (error) {
5809 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5810 goto out;
5811 }
5812 blocks_allocated = true;
5813
5814 /* Copy data from old location to new location */
5815 error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
5816 newStartBlock, newBlockCount, context);
5817 if (error) {
5818 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
5819 goto out;
5820 }
5821
5822 /* Update the extent record with the new start block information */
5823 extent_info->extents[index].startBlock = newStartBlock;
5824
5825 /* Sync the content back to the disk */
5826 if (extent_info->catalog_fp) {
5827 /* Update the extents in catalog record */
5828 if (extent_info->is_dirlink) {
5829 error = cat_update_dirlink(hfsmp, extent_info->forkType,
5830 extent_info->dirlink_desc, extent_info->dirlink_attr,
5831 &(extent_info->dirlink_fork->ff_data));
5832 } else {
5833 cp->c_flag |= C_MODIFIED;
5834 /* If this is a system file, sync volume headers on disk */
5835 if (extent_info->is_sysfile) {
5836 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5837 }
5838 }
5839 } else {
5840 /* Replace record for extents overflow or extents-based xattrs */
5841 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
5842 &(extent_info->btdata), extent_info->recordlen);
5843 }
5844 if (error) {
5845 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
5846 goto out;
5847 }
5848
5849 /* Deallocate the old extent */
5850 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5851 if (error) {
5852 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
5853 goto out;
5854 }
5855 extent_info->blocks_relocated += newBlockCount;
5856
5857 if (hfs_resize_debug) {
5858 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
5859 }
5860
5861 out:
5862 if (error != 0) {
5863 if (blocks_allocated == true) {
5864 BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
5865 }
5866 } else {
5867 /* On success, increment the total allocation blocks processed */
5868 extent_info->cur_blockCount += newBlockCount;
5869 }
5870
5871 hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
5872
5873 /* For a non-system file, if an extent entry from catalog record
5874 * was modified, sync the in-memory changes to the catalog record
5875 * on disk before ending the transaction.
5876 */
5877 if ((extent_info->catalog_fp) &&
5878 (extent_info->is_sysfile == false)) {
5879 (void) hfs_update(extent_info->vp, MNT_WAIT);
5880 }
5881
5882 hfs_end_transaction(hfsmp);
5883
5884 return error;
5885 }
5886
5887 /* Report intermediate progress during volume resize */
5888 static void
5889 hfs_truncatefs_progress(struct hfsmount *hfsmp)
5890 {
5891 u_int32_t cur_progress = 0;
5892
5893 hfs_resize_progress(hfsmp, &cur_progress);
5894 if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
5895 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
5896 hfsmp->hfs_resize_progress = cur_progress;
5897 }
5898 return;
5899 }
5900
5901 /*
5902 * Reclaim space at the end of a volume for given file and forktype.
5903 *
5904 * This routine attempts to move any extent which contains allocation blocks
5905 * at or after "allocLimit." A separate transaction is used for every extent
5906 * that needs to be moved. If there is not contiguous space available for
5907 * moving an extent, it can be split into smaller extents. The contents of
5908 * any moved extents are read and written via the volume's device vnode --
5909 * NOT via "vp." During the move, moved blocks which are part of a transaction
5910 * have their physical block numbers invalidated so they will eventually be
5911 * written to their new locations.
5912 *
5913 * This function is also called for directory hard links. Directory hard links
5914 * are regular files with no data fork and resource fork that contains alias
5915 * information for backward compatibility with pre-Leopard systems. However
5916 * non-Mac OS X implementation can add/modify data fork or resource fork
5917 * information to directory hard links, so we check, and if required, relocate
5918 * both data fork and resource fork.
5919 *
5920 * Inputs:
5921 * hfsmp The volume being resized.
5922 * vp The vnode for the system file.
5923 * fileID ID of the catalog record that needs to be relocated
5924 * forktype The type of fork that needs relocated,
5925 * kHFSResourceForkType for resource fork,
5926 * kHFSDataForkType for data fork
5927 * allocLimit Allocation limit for the new volume size,
5928 * do not use this block or beyond. All extents
5929 * that use this block or any blocks beyond this limit
5930 * will be relocated.
5931 *
5932 * Side Effects:
5933 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
5934 * blocks that were relocated.
5935 */
5936 static int
5937 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
5938 u_int8_t forktype, u_long allocLimit, vfs_context_t context)
5939 {
5940 int error = 0;
5941 struct hfs_reclaim_extent_info *extent_info;
5942 int i;
5943 int lockflags = 0;
5944 struct cnode *cp;
5945 struct filefork *fp;
5946 int took_truncate_lock = false;
5947 int release_desc = false;
5948 HFSPlusExtentKey *key;
5949
5950 /* If there is no vnode for this file, then there's nothing to do. */
5951 if (vp == NULL) {
5952 return 0;
5953 }
5954
5955 cp = VTOC(vp);
5956
5957 if (hfs_resize_debug) {
5958 const char *filename = (const char *) cp->c_desc.cd_nameptr;
5959 int namelen = cp->c_desc.cd_namelen;
5960
5961 if (filename == NULL) {
5962 filename = "";
5963 namelen = 0;
5964 }
5965 printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename);
5966 }
5967
5968 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
5969 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
5970 if (extent_info == NULL) {
5971 return ENOMEM;
5972 }
5973 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
5974 extent_info->vp = vp;
5975 extent_info->fileID = fileID;
5976 extent_info->forkType = forktype;
5977 extent_info->is_sysfile = vnode_issystem(vp);
5978 if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
5979 extent_info->is_dirlink = true;
5980 }
5981 /* We always need allocation bitmap and extent btree lock */
5982 lockflags = SFL_BITMAP | SFL_EXTENTS;
5983 if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
5984 lockflags |= SFL_CATALOG;
5985 } else if (fileID == kHFSAttributesFileID) {
5986 lockflags |= SFL_ATTRIBUTE;
5987 } else if (fileID == kHFSStartupFileID) {
5988 lockflags |= SFL_STARTUP;
5989 }
5990 extent_info->lockflags = lockflags;
5991 extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
5992
5993 /* Flush data associated with current file on disk.
5994 *
5995 * If the current vnode is directory hard link, no flushing of
5996 * journal or vnode is required. The current kernel does not
5997 * modify data/resource fork of directory hard links, so nothing
5998 * will be in the cache. If a directory hard link is newly created,
5999 * the resource fork data is written directly using devvp and
6000 * the code that actually relocates data (hfs_copy_extent()) also
6001 * uses devvp for its I/O --- so they will see a consistent copy.
6002 */
6003 if (extent_info->is_sysfile) {
6004 /* If the current vnode is system vnode, flush journal
6005 * to make sure that all data is written to the disk.
6006 */
6007 error = hfs_journal_flush(hfsmp, TRUE);
6008 if (error) {
6009 printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
6010 goto out;
6011 }
6012 } else if (extent_info->is_dirlink == false) {
6013 /* Flush all blocks associated with this regular file vnode.
6014 * Normally there should not be buffer cache blocks for regular
6015 * files, but for objects like symlinks, we can have buffer cache
6016 * blocks associated with the vnode. Therefore we call
6017 * buf_flushdirtyblks() also.
6018 */
6019 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
6020
6021 hfs_unlock(cp);
6022 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
6023 took_truncate_lock = true;
6024 (void) cluster_push(vp, 0);
6025 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
6026 if (error) {
6027 goto out;
6028 }
6029
6030 /* If the file no longer exists, nothing left to do */
6031 if (cp->c_flag & C_NOEXISTS) {
6032 error = 0;
6033 goto out;
6034 }
6035
6036 /* Wait for any in-progress writes to this vnode to complete, so that we'll
6037 * be copying consistent bits. (Otherwise, it's possible that an async
6038 * write will complete to the old extent after we read from it. That
6039 * could lead to corruption.)
6040 */
6041 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
6042 if (error) {
6043 goto out;
6044 }
6045 }
6046
6047 if (hfs_resize_debug) {
6048 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
6049 }
6050
6051 if (extent_info->is_dirlink) {
6052 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
6053 sizeof(struct cat_desc), M_TEMP, M_WAITOK);
6054 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
6055 sizeof(struct cat_attr), M_TEMP, M_WAITOK);
6056 MALLOC(extent_info->dirlink_fork, struct filefork *,
6057 sizeof(struct filefork), M_TEMP, M_WAITOK);
6058 if ((extent_info->dirlink_desc == NULL) ||
6059 (extent_info->dirlink_attr == NULL) ||
6060 (extent_info->dirlink_fork == NULL)) {
6061 error = ENOMEM;
6062 goto out;
6063 }
6064
6065 /* Lookup catalog record for directory hard link and
6066 * create a fake filefork for the value looked up from
6067 * the disk.
6068 */
6069 fp = extent_info->dirlink_fork;
6070 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
6071 extent_info->dirlink_fork->ff_cp = cp;
6072 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6073 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
6074 extent_info->dirlink_desc, extent_info->dirlink_attr,
6075 &(extent_info->dirlink_fork->ff_data));
6076 hfs_systemfile_unlock(hfsmp, lockflags);
6077 if (error) {
6078 printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
6079 goto out;
6080 }
6081 release_desc = true;
6082 } else {
6083 fp = VTOF(vp);
6084 }
6085
6086 extent_info->catalog_fp = fp;
6087 extent_info->recStartBlock = 0;
6088 extent_info->extents = extent_info->catalog_fp->ff_extents;
6089 /* Relocate extents from the catalog record */
6090 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
6091 if (fp->ff_extents[i].blockCount == 0) {
6092 break;
6093 }
6094 extent_info->extent_index = i;
6095 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6096 if (error) {
6097 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
6098 goto out;
6099 }
6100 }
6101
6102 /* If the number of allocation blocks processed for reclaiming
6103 * are less than total number of blocks for the file, continuing
6104 * working on overflow extents record.
6105 */
6106 if (fp->ff_blocks <= extent_info->cur_blockCount) {
6107 if (0 && hfs_resize_debug) {
6108 printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
6109 }
6110 goto out;
6111 }
6112
6113 if (hfs_resize_debug) {
6114 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
6115 }
6116
6117 MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6118 if (extent_info->iterator == NULL) {
6119 error = ENOMEM;
6120 goto out;
6121 }
6122 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6123 key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
6124 key->keyLength = kHFSPlusExtentKeyMaximumLength;
6125 key->forkType = forktype;
6126 key->fileID = fileID;
6127 key->startBlock = extent_info->cur_blockCount;
6128
6129 extent_info->btdata.bufferAddress = extent_info->record.overflow;
6130 extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
6131 extent_info->btdata.itemCount = 1;
6132
6133 extent_info->catalog_fp = NULL;
6134
6135 /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
6136 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6137 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6138 &(extent_info->btdata), &(extent_info->recordlen),
6139 extent_info->iterator);
6140 hfs_systemfile_unlock(hfsmp, lockflags);
6141 while (error == 0) {
6142 extent_info->overflow_count++;
6143 extent_info->recStartBlock = key->startBlock;
6144 extent_info->extents = extent_info->record.overflow;
6145 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6146 if (extent_info->record.overflow[i].blockCount == 0) {
6147 goto out;
6148 }
6149 extent_info->extent_index = i;
6150 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6151 if (error) {
6152 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
6153 goto out;
6154 }
6155 }
6156
6157 /* Look for more overflow records */
6158 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
6159 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6160 extent_info->iterator, &(extent_info->btdata),
6161 &(extent_info->recordlen));
6162 hfs_systemfile_unlock(hfsmp, lockflags);
6163 if (error) {
6164 break;
6165 }
6166 /* Stop when we encounter a different file or fork. */
6167 if ((key->fileID != fileID) || (key->forkType != forktype)) {
6168 break;
6169 }
6170 }
6171 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6172 error = 0;
6173 }
6174
6175 out:
6176 /* If any blocks were relocated, account them and report progress */
6177 if (extent_info->blocks_relocated) {
6178 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6179 hfs_truncatefs_progress(hfsmp);
6180 if (fileID < kHFSFirstUserCatalogNodeID) {
6181 printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
6182 extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
6183 }
6184 }
6185 if (extent_info->iterator) {
6186 FREE(extent_info->iterator, M_TEMP);
6187 }
6188 if (release_desc == true) {
6189 cat_releasedesc(extent_info->dirlink_desc);
6190 }
6191 if (extent_info->dirlink_desc) {
6192 FREE(extent_info->dirlink_desc, M_TEMP);
6193 }
6194 if (extent_info->dirlink_attr) {
6195 FREE(extent_info->dirlink_attr, M_TEMP);
6196 }
6197 if (extent_info->dirlink_fork) {
6198 FREE(extent_info->dirlink_fork, M_TEMP);
6199 }
6200 if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
6201 (void) hfs_update(vp, MNT_WAIT);
6202 }
6203 if (took_truncate_lock) {
6204 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
6205 }
6206 if (extent_info) {
6207 FREE(extent_info, M_TEMP);
6208 }
6209 if (hfs_resize_debug) {
6210 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
6211 }
6212
6213 return error;
6214 }
6215
6216
6217 /*
6218 * This journal_relocate callback updates the journal info block to point
6219 * at the new journal location. This write must NOT be done using the
6220 * transaction. We must write the block immediately. We must also force
6221 * it to get to the media so that the new journal location will be seen by
6222 * the replay code before we can safely let journaled blocks be written
6223 * to their normal locations.
6224 *
6225 * The tests for journal_uses_fua below are mildly hacky. Since the journal
6226 * and the file system are both on the same device, I'm leveraging what
6227 * the journal has decided about FUA.
6228 */
6229 struct hfs_journal_relocate_args {
6230 struct hfsmount *hfsmp;
6231 vfs_context_t context;
6232 u_int32_t newStartBlock;
6233 u_int32_t newBlockCount;
6234 };
6235
6236 static errno_t
6237 hfs_journal_relocate_callback(void *_args)
6238 {
6239 int error;
6240 struct hfs_journal_relocate_args *args = _args;
6241 struct hfsmount *hfsmp = args->hfsmp;
6242 buf_t bp;
6243 JournalInfoBlock *jibp;
6244
6245 error = buf_meta_bread(hfsmp->hfs_devvp,
6246 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6247 hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
6248 if (error) {
6249 printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error);
6250 if (bp) {
6251 buf_brelse(bp);
6252 }
6253 return error;
6254 }
6255 jibp = (JournalInfoBlock*) buf_dataptr(bp);
6256 jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
6257 jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize);
6258 if (journal_uses_fua(hfsmp->jnl))
6259 buf_markfua(bp);
6260 error = buf_bwrite(bp);
6261 if (error) {
6262 printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error);
6263 return error;
6264 }
6265 if (!journal_uses_fua(hfsmp->jnl)) {
6266 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
6267 if (error) {
6268 printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
6269 error = 0; /* Don't fail the operation. */
6270 }
6271 }
6272
6273 return error;
6274 }
6275
6276
6277 /* Type of resize operation in progress */
6278 #define HFS_RESIZE_TRUNCATE 1
6279 #define HFS_RESIZE_EXTEND 2
6280
6281 /*
6282 * Core function to relocate the journal file. This function takes the
6283 * journal size of the newly relocated journal --- the caller can
6284 * provide a new journal size if they want to change the size of
6285 * the journal. The function takes care of updating the journal info
6286 * block and all other data structures correctly.
6287 *
6288 * Note: This function starts a transaction and grabs the btree locks.
6289 */
6290 static int
6291 hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context)
6292 {
6293 int error;
6294 int journal_err;
6295 int lockflags;
6296 u_int32_t oldStartBlock;
6297 u_int32_t newStartBlock;
6298 u_int32_t oldBlockCount;
6299 u_int32_t newBlockCount;
6300 u_int32_t jnlBlockCount;
6301 u_int32_t alloc_skipfreeblks;
6302 struct cat_desc journal_desc;
6303 struct cat_attr journal_attr;
6304 struct cat_fork journal_fork;
6305 struct hfs_journal_relocate_args callback_args;
6306
6307 /* Calculate the number of allocation blocks required for the journal */
6308 jnlBlockCount = howmany(jnl_size, hfsmp->blockSize);
6309
6310 /*
6311 * During truncatefs(), the volume free block count is updated
6312 * before relocating data and reflects the total number of free
6313 * blocks that will exist on volume after the resize is successful.
6314 * This means that the allocation blocks required for relocation
6315 * have already been reserved and accounted for in the free block
6316 * count. Therefore, block allocation and deallocation routines
6317 * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS
6318 * flag.
6319 *
6320 * This special handling is not required when the file system
6321 * is being extended as we want all the allocated and deallocated
6322 * blocks to be accounted for correctly.
6323 */
6324 if (resize_type == HFS_RESIZE_TRUNCATE) {
6325 alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS;
6326 } else {
6327 alloc_skipfreeblks = 0;
6328 }
6329
6330 error = hfs_start_transaction(hfsmp);
6331 if (error) {
6332 printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error);
6333 return error;
6334 }
6335 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6336
6337 error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount,
6338 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_FLUSHTXN | alloc_skipfreeblks,
6339 &newStartBlock, &newBlockCount);
6340 if (error) {
6341 printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error);
6342 goto fail;
6343 }
6344 if (newBlockCount != jnlBlockCount) {
6345 printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount);
6346 goto free_fail;
6347 }
6348
6349 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, 0, &journal_desc, &journal_attr, &journal_fork);
6350 if (error) {
6351 printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error);
6352 goto free_fail;
6353 }
6354
6355 oldStartBlock = journal_fork.cf_extents[0].startBlock;
6356 oldBlockCount = journal_fork.cf_extents[0].blockCount;
6357 error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks);
6358 if (error) {
6359 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
6360 goto free_fail;
6361 }
6362
6363 /* Update the catalog record for .journal */
6364 journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
6365 journal_fork.cf_extents[0].startBlock = newStartBlock;
6366 journal_fork.cf_extents[0].blockCount = newBlockCount;
6367 journal_fork.cf_blocks = newBlockCount;
6368 error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
6369 cat_releasedesc(&journal_desc); /* all done with cat descriptor */
6370 if (error) {
6371 printf("hfs_relocate_journal_file: cat_update returned %d\n", error);
6372 goto free_fail;
6373 }
6374
6375 /*
6376 * If the journal is part of the file system, then tell the journal
6377 * code about the new location. If the journal is on an external
6378 * device, then just keep using it as-is.
6379 */
6380 if (hfsmp->jvp == hfsmp->hfs_devvp) {
6381 callback_args.hfsmp = hfsmp;
6382 callback_args.context = context;
6383 callback_args.newStartBlock = newStartBlock;
6384 callback_args.newBlockCount = newBlockCount;
6385
6386 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
6387 (off_t)newBlockCount*hfsmp->blockSize, 0,
6388 hfs_journal_relocate_callback, &callback_args);
6389 if (error) {
6390 /* NOTE: journal_relocate will mark the journal invalid. */
6391 printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error);
6392 goto fail;
6393 }
6394 if (hfs_resize_debug) {
6395 printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
6396 }
6397 hfsmp->jnl_start = newStartBlock;
6398 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
6399 }
6400
6401 hfs_systemfile_unlock(hfsmp, lockflags);
6402 error = hfs_end_transaction(hfsmp);
6403 if (error) {
6404 printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error);
6405 }
6406
6407 return error;
6408
6409 free_fail:
6410 journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
6411 if (journal_err) {
6412 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
6413 hfs_mark_volume_inconsistent(hfsmp);
6414 }
6415 fail:
6416 hfs_systemfile_unlock(hfsmp, lockflags);
6417 (void) hfs_end_transaction(hfsmp);
6418 if (hfs_resize_debug) {
6419 printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error);
6420 }
6421 return error;
6422 }
6423
6424
6425 /*
6426 * Relocate the journal file when the file system is being truncated.
6427 * We do not down-size the journal when the file system size is
6428 * reduced, so we always provide the current journal size to the
6429 * relocate code.
6430 */
6431 static int
6432 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6433 {
6434 int error = 0;
6435 u_int32_t startBlock;
6436 u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize;
6437
6438 /*
6439 * Figure out the location of the .journal file. When the journal
6440 * is on an external device, we need to look up the .journal file.
6441 */
6442 if (hfsmp->jvp == hfsmp->hfs_devvp) {
6443 startBlock = hfsmp->jnl_start;
6444 blockCount = hfsmp->jnl_size / hfsmp->blockSize;
6445 } else {
6446 u_int32_t fileid;
6447 u_int32_t old_jnlfileid;
6448 struct cat_attr attr;
6449 struct cat_fork fork;
6450
6451 /*
6452 * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid
6453 * is set, and it is trying to hide the .journal file. So temporarily
6454 * unset the field while calling GetFileInfo.
6455 */
6456 old_jnlfileid = hfsmp->hfs_jnlfileid;
6457 hfsmp->hfs_jnlfileid = 0;
6458 fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork);
6459 hfsmp->hfs_jnlfileid = old_jnlfileid;
6460 if (fileid != old_jnlfileid) {
6461 printf("hfs_reclaim_journal_file: cannot find .journal file!\n");
6462 return EIO;
6463 }
6464
6465 startBlock = fork.cf_extents[0].startBlock;
6466 blockCount = fork.cf_extents[0].blockCount;
6467 }
6468
6469 if (startBlock + blockCount <= allocLimit) {
6470 /* The journal file does not require relocation */
6471 return 0;
6472 }
6473
6474 error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context);
6475 if (error == 0) {
6476 hfsmp->hfs_resize_blocksmoved += blockCount;
6477 hfs_truncatefs_progress(hfsmp);
6478 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
6479 blockCount, hfsmp->vcbVN);
6480 }
6481
6482 return error;
6483 }
6484
6485
6486 /*
6487 * Move the journal info block to a new location. We have to make sure the
6488 * new copy of the journal info block gets to the media first, then change
6489 * the field in the volume header and the catalog record.
6490 */
6491 static int
6492 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6493 {
6494 int error;
6495 int journal_err;
6496 int lockflags;
6497 u_int32_t oldBlock;
6498 u_int32_t newBlock;
6499 u_int32_t blockCount;
6500 struct cat_desc jib_desc;
6501 struct cat_attr jib_attr;
6502 struct cat_fork jib_fork;
6503 buf_t old_bp, new_bp;
6504
6505 if (hfsmp->vcbJinfoBlock <= allocLimit) {
6506 /* The journal info block does not require relocation */
6507 return 0;
6508 }
6509
6510 error = hfs_start_transaction(hfsmp);
6511 if (error) {
6512 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
6513 return error;
6514 }
6515 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
6516
6517 error = BlockAllocate(hfsmp, 1, 1, 1,
6518 HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS | HFS_ALLOC_FLUSHTXN,
6519 &newBlock, &blockCount);
6520 if (error) {
6521 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
6522 goto fail;
6523 }
6524 if (blockCount != 1) {
6525 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
6526 goto free_fail;
6527 }
6528
6529 /* Copy the old journal info block content to the new location */
6530 error = buf_meta_bread(hfsmp->hfs_devvp,
6531 hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6532 hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
6533 if (error) {
6534 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
6535 if (old_bp) {
6536 buf_brelse(old_bp);
6537 }
6538 goto free_fail;
6539 }
6540 new_bp = buf_getblk(hfsmp->hfs_devvp,
6541 newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
6542 hfsmp->blockSize, 0, 0, BLK_META);
6543 bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
6544 buf_brelse(old_bp);
6545 if (journal_uses_fua(hfsmp->jnl))
6546 buf_markfua(new_bp);
6547 error = buf_bwrite(new_bp);
6548 if (error) {
6549 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
6550 goto free_fail;
6551 }
6552 if (!journal_uses_fua(hfsmp->jnl)) {
6553 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
6554 if (error) {
6555 printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
6556 /* Don't fail the operation. */
6557 }
6558 }
6559
6560 /* Deallocate the old block once the new one has the new valid content */
6561 error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
6562 if (error) {
6563 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6564 goto free_fail;
6565 }
6566
6567
6568 /* Update the catalog record for .journal_info_block */
6569 error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork);
6570 if (error) {
6571 printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error);
6572 goto fail;
6573 }
6574 oldBlock = jib_fork.cf_extents[0].startBlock;
6575 jib_fork.cf_size = hfsmp->blockSize;
6576 jib_fork.cf_extents[0].startBlock = newBlock;
6577 jib_fork.cf_extents[0].blockCount = 1;
6578 jib_fork.cf_blocks = 1;
6579 error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
6580 cat_releasedesc(&jib_desc); /* all done with cat descriptor */
6581 if (error) {
6582 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
6583 goto fail;
6584 }
6585
6586 /* Update the pointer to the journal info block in the volume header. */
6587 hfsmp->vcbJinfoBlock = newBlock;
6588 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
6589 if (error) {
6590 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
6591 goto fail;
6592 }
6593 hfs_systemfile_unlock(hfsmp, lockflags);
6594 error = hfs_end_transaction(hfsmp);
6595 if (error) {
6596 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
6597 }
6598 error = hfs_journal_flush(hfsmp, FALSE);
6599 if (error) {
6600 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
6601 }
6602
6603 /* Account for the block relocated and print progress */
6604 hfsmp->hfs_resize_blocksmoved += 1;
6605 hfs_truncatefs_progress(hfsmp);
6606 if (!error) {
6607 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
6608 hfsmp->vcbVN);
6609 if (hfs_resize_debug) {
6610 printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
6611 }
6612 }
6613 return error;
6614
6615 free_fail:
6616 journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
6617 if (journal_err) {
6618 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
6619 hfs_mark_volume_inconsistent(hfsmp);
6620 }
6621
6622 fail:
6623 hfs_systemfile_unlock(hfsmp, lockflags);
6624 (void) hfs_end_transaction(hfsmp);
6625 if (hfs_resize_debug) {
6626 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
6627 }
6628 return error;
6629 }
6630
6631
6632 static u_int64_t
6633 calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count)
6634 {
6635 u_int64_t journal_size;
6636 u_int32_t journal_scale;
6637
6638 #define DEFAULT_JOURNAL_SIZE (8*1024*1024)
6639 #define MAX_JOURNAL_SIZE (512*1024*1024)
6640
6641 /* Calculate the journal size for this volume. We want
6642 * at least 8 MB of journal for each 100 GB of disk space.
6643 * We cap the size at 512 MB, unless the allocation block
6644 * size is larger, in which case, we use one allocation
6645 * block.
6646 */
6647 journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024);
6648 journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1);
6649 if (journal_size > MAX_JOURNAL_SIZE) {
6650 journal_size = MAX_JOURNAL_SIZE;
6651 }
6652 if (journal_size < hfsmp->blockSize) {
6653 journal_size = hfsmp->blockSize;
6654 }
6655 return journal_size;
6656 }
6657
6658
6659 /*
6660 * Calculate the expected journal size based on current partition size.
6661 * If the size of the current journal is less than the calculated size,
6662 * force journal relocation with the new journal size.
6663 */
6664 static int
6665 hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context)
6666 {
6667 int error = 0;
6668 u_int64_t calc_journal_size;
6669
6670 if (hfsmp->jvp != hfsmp->hfs_devvp) {
6671 if (hfs_resize_debug) {
6672 printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n");
6673 }
6674 return 0;
6675 }
6676
6677 calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count);
6678 if (calc_journal_size <= hfsmp->jnl_size) {
6679 /* The journal size requires no modification */
6680 goto out;
6681 }
6682
6683 if (hfs_resize_debug) {
6684 printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size);
6685 }
6686
6687 /* Extend the journal to the new calculated size */
6688 error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context);
6689 if (error == 0) {
6690 printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n",
6691 hfsmp->jnl_size, hfsmp->vcbVN);
6692 }
6693 out:
6694 return error;
6695 }
6696
6697
6698 /*
6699 * This function traverses through all extended attribute records for a given
6700 * fileID, and calls function that reclaims data blocks that exist in the
6701 * area of the disk being reclaimed which in turn is responsible for allocating
6702 * new space, copying extent data, deallocating new space, and if required,
6703 * splitting the extent.
6704 *
6705 * Note: The caller has already acquired the cnode lock on the file. Therefore
6706 * we are assured that no other thread would be creating/deleting/modifying
6707 * extended attributes for this file.
6708 *
6709 * Side Effects:
6710 * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
6711 * blocks that were relocated.
6712 *
6713 * Returns:
6714 * 0 on success, non-zero on failure.
6715 */
6716 static int
6717 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
6718 {
6719 int error = 0;
6720 struct hfs_reclaim_extent_info *extent_info;
6721 int i;
6722 HFSPlusAttrKey *key;
6723 int *lockflags;
6724
6725 if (hfs_resize_debug) {
6726 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
6727 }
6728
6729 MALLOC(extent_info, struct hfs_reclaim_extent_info *,
6730 sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
6731 if (extent_info == NULL) {
6732 return ENOMEM;
6733 }
6734 bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
6735 extent_info->vp = vp;
6736 extent_info->fileID = fileID;
6737 extent_info->is_xattr = true;
6738 extent_info->is_sysfile = vnode_issystem(vp);
6739 extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
6740 lockflags = &(extent_info->lockflags);
6741 *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
6742
6743 /* Initialize iterator from the extent_info structure */
6744 MALLOC(extent_info->iterator, struct BTreeIterator *,
6745 sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
6746 if (extent_info->iterator == NULL) {
6747 error = ENOMEM;
6748 goto out;
6749 }
6750 bzero(extent_info->iterator, sizeof(struct BTreeIterator));
6751
6752 /* Build attribute key */
6753 key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
6754 error = hfs_buildattrkey(fileID, NULL, key);
6755 if (error) {
6756 goto out;
6757 }
6758
6759 /* Initialize btdata from extent_info structure. Note that the
6760 * buffer pointer actually points to the xattr record from the
6761 * extent_info structure itself.
6762 */
6763 extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
6764 extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
6765 extent_info->btdata.itemCount = 1;
6766
6767 /*
6768 * Sync all extent-based attribute data to the disk.
6769 *
6770 * All extent-based attribute data I/O is performed via cluster
6771 * I/O using a virtual file that spans across entire file system
6772 * space.
6773 */
6774 hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
6775 (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
6776 error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
6777 hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_LOCK_DEFAULT);
6778 if (error) {
6779 goto out;
6780 }
6781
6782 /* Search for extended attribute for current file. This
6783 * will place the iterator before the first matching record.
6784 */
6785 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6786 error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
6787 &(extent_info->btdata), &(extent_info->recordlen),
6788 extent_info->iterator);
6789 hfs_systemfile_unlock(hfsmp, *lockflags);
6790 if (error) {
6791 if (error != btNotFound) {
6792 goto out;
6793 }
6794 /* btNotFound is expected here, so just mask it */
6795 error = 0;
6796 }
6797
6798 while (1) {
6799 /* Iterate to the next record */
6800 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
6801 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
6802 extent_info->iterator, &(extent_info->btdata),
6803 &(extent_info->recordlen));
6804 hfs_systemfile_unlock(hfsmp, *lockflags);
6805
6806 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
6807 if (error || key->fileID != fileID) {
6808 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6809 error = 0;
6810 }
6811 break;
6812 }
6813
6814 /* We only care about extent-based EAs */
6815 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
6816 (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
6817 continue;
6818 }
6819
6820 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
6821 extent_info->overflow_count = 0;
6822 extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
6823 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
6824 extent_info->overflow_count++;
6825 extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
6826 }
6827
6828 extent_info->recStartBlock = key->startBlock;
6829 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6830 if (extent_info->extents[i].blockCount == 0) {
6831 break;
6832 }
6833 extent_info->extent_index = i;
6834 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
6835 if (error) {
6836 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
6837 goto out;
6838 }
6839 }
6840 }
6841
6842 out:
6843 /* If any blocks were relocated, account them and report progress */
6844 if (extent_info->blocks_relocated) {
6845 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
6846 hfs_truncatefs_progress(hfsmp);
6847 }
6848 if (extent_info->iterator) {
6849 FREE(extent_info->iterator, M_TEMP);
6850 }
6851 if (extent_info) {
6852 FREE(extent_info, M_TEMP);
6853 }
6854 if (hfs_resize_debug) {
6855 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
6856 }
6857 return error;
6858 }
6859
6860 /*
6861 * Reclaim any extent-based extended attributes allocation blocks from
6862 * the area of the disk that is being truncated.
6863 *
6864 * The function traverses the attribute btree to find out the fileIDs
6865 * of the extended attributes that need to be relocated. For every
6866 * file whose large EA requires relocation, it looks up the cnode and
6867 * calls hfs_reclaim_xattr() to do all the work for allocating
6868 * new space, copying data, deallocating old space, and if required,
6869 * splitting the extents.
6870 *
6871 * Inputs:
6872 * allocLimit - starting block of the area being reclaimed
6873 *
6874 * Returns:
6875 * returns 0 on success, non-zero on failure.
6876 */
6877 static int
6878 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
6879 {
6880 int error = 0;
6881 FCB *fcb;
6882 struct BTreeIterator *iterator = NULL;
6883 struct FSBufferDescriptor btdata;
6884 HFSPlusAttrKey *key;
6885 HFSPlusAttrRecord rec;
6886 int lockflags = 0;
6887 cnid_t prev_fileid = 0;
6888 struct vnode *vp;
6889 int need_relocate;
6890 int btree_operation;
6891 u_int32_t files_moved = 0;
6892 u_int32_t prev_blocksmoved;
6893 int i;
6894
6895 fcb = VTOF(hfsmp->hfs_attribute_vp);
6896 /* Store the value to print total blocks moved by this function in end */
6897 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
6898
6899 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
6900 return ENOMEM;
6901 }
6902 bzero(iterator, sizeof(*iterator));
6903 key = (HFSPlusAttrKey *)&iterator->key;
6904 btdata.bufferAddress = &rec;
6905 btdata.itemSize = sizeof(rec);
6906 btdata.itemCount = 1;
6907
6908 need_relocate = false;
6909 btree_operation = kBTreeFirstRecord;
6910 /* Traverse the attribute btree to find extent-based EAs to reclaim */
6911 while (1) {
6912 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
6913 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
6914 hfs_systemfile_unlock(hfsmp, lockflags);
6915 if (error) {
6916 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
6917 error = 0;
6918 }
6919 break;
6920 }
6921 btree_operation = kBTreeNextRecord;
6922
6923 /* If the extents of current fileID were already relocated, skip it */
6924 if (prev_fileid == key->fileID) {
6925 continue;
6926 }
6927
6928 /* Check if any of the extents in the current record need to be relocated */
6929 need_relocate = false;
6930 switch(rec.recordType) {
6931 case kHFSPlusAttrForkData:
6932 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6933 if (rec.forkData.theFork.extents[i].blockCount == 0) {
6934 break;
6935 }
6936 if ((rec.forkData.theFork.extents[i].startBlock +
6937 rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
6938 need_relocate = true;
6939 break;
6940 }
6941 }
6942 break;
6943
6944 case kHFSPlusAttrExtents:
6945 for (i = 0; i < kHFSPlusExtentDensity; i++) {
6946 if (rec.overflowExtents.extents[i].blockCount == 0) {
6947 break;
6948 }
6949 if ((rec.overflowExtents.extents[i].startBlock +
6950 rec.overflowExtents.extents[i].blockCount) > allocLimit) {
6951 need_relocate = true;
6952 break;
6953 }
6954 }
6955 break;
6956 };
6957
6958 /* Continue iterating to next attribute record */
6959 if (need_relocate == false) {
6960 continue;
6961 }
6962
6963 /* Look up the vnode for corresponding file. The cnode
6964 * will be locked which will ensure that no one modifies
6965 * the xattrs when we are relocating them.
6966 *
6967 * We want to allow open-unlinked files to be moved,
6968 * so provide allow_deleted == 1 for hfs_vget().
6969 */
6970 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
6971 continue;
6972 }
6973
6974 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
6975 hfs_unlock(VTOC(vp));
6976 vnode_put(vp);
6977 if (error) {
6978 printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
6979 break;
6980 }
6981 prev_fileid = key->fileID;
6982 files_moved++;
6983 }
6984
6985 if (files_moved) {
6986 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
6987 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
6988 files_moved, hfsmp->vcbVN);
6989 }
6990
6991 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
6992 return error;
6993 }
6994
6995 /*
6996 * Reclaim blocks from regular files.
6997 *
6998 * This function iterates over all the record in catalog btree looking
6999 * for files with extents that overlap into the space we're trying to
7000 * free up. If a file extent requires relocation, it looks up the vnode
7001 * and calls function to relocate the data.
7002 *
7003 * Returns:
7004 * Zero on success, non-zero on failure.
7005 */
7006 static int
7007 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
7008 {
7009 int error;
7010 FCB *fcb;
7011 struct BTreeIterator *iterator = NULL;
7012 struct FSBufferDescriptor btdata;
7013 int btree_operation;
7014 int lockflags;
7015 struct HFSPlusCatalogFile filerec;
7016 struct vnode *vp;
7017 struct vnode *rvp;
7018 struct filefork *datafork;
7019 u_int32_t files_moved = 0;
7020 u_int32_t prev_blocksmoved;
7021
7022 #if CONFIG_PROTECT
7023 int keys_generated = 0;
7024 #endif
7025
7026 fcb = VTOF(hfsmp->hfs_catalog_vp);
7027 /* Store the value to print total blocks moved by this function at the end */
7028 prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
7029
7030 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
7031 error = ENOMEM;
7032 goto reclaim_filespace_done;
7033 }
7034
7035 #if CONFIG_PROTECT
7036 /*
7037 * For content-protected filesystems, we may need to relocate files that
7038 * are encrypted. If they use the new-style offset-based IVs, then
7039 * we can move them regardless of the lock state. We create a temporary
7040 * key here that we use to read/write the data, then we discard it at the
7041 * end of the function.
7042 */
7043 if (cp_fs_protected (hfsmp->hfs_mp)) {
7044 int needs = 0;
7045 error = cp_needs_tempkeys(hfsmp, &needs);
7046
7047 if ((error == 0) && (needs)) {
7048 error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp);
7049 if (error == 0) {
7050 keys_generated = 1;
7051 }
7052 }
7053
7054 if (error) {
7055 printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error);
7056 goto reclaim_filespace_done;
7057 }
7058 }
7059
7060 #endif
7061
7062 bzero(iterator, sizeof(*iterator));
7063
7064 btdata.bufferAddress = &filerec;
7065 btdata.itemSize = sizeof(filerec);
7066 btdata.itemCount = 1;
7067
7068 btree_operation = kBTreeFirstRecord;
7069 while (1) {
7070 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
7071 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
7072 hfs_systemfile_unlock(hfsmp, lockflags);
7073 if (error) {
7074 if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
7075 error = 0;
7076 }
7077 break;
7078 }
7079 btree_operation = kBTreeNextRecord;
7080
7081 if (filerec.recordType != kHFSPlusFileRecord) {
7082 continue;
7083 }
7084
7085 /* Check if any of the extents require relocation */
7086 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
7087 continue;
7088 }
7089
7090 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
7091 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
7092 if (hfs_resize_debug) {
7093 printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID);
7094 }
7095 continue;
7096 }
7097
7098 /* If data fork exists or item is a directory hard link, relocate blocks */
7099 datafork = VTOF(vp);
7100 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
7101 error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
7102 kHFSDataForkType, allocLimit, context);
7103 if (error) {
7104 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
7105 hfs_unlock(VTOC(vp));
7106 vnode_put(vp);
7107 break;
7108 }
7109 }
7110
7111 /* If resource fork exists or item is a directory hard link, relocate blocks */
7112 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
7113 if (vnode_isdir(vp)) {
7114 /* Resource fork vnode lookup is invalid for directory hard link.
7115 * So we fake data fork vnode as resource fork vnode.
7116 */
7117 rvp = vp;
7118 } else {
7119 error = hfs_vgetrsrc(hfsmp, vp, &rvp, TRUE, FALSE);
7120 if (error) {
7121 printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
7122 hfs_unlock(VTOC(vp));
7123 vnode_put(vp);
7124 break;
7125 }
7126 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
7127 }
7128
7129 error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
7130 kHFSResourceForkType, allocLimit, context);
7131 if (error) {
7132 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
7133 hfs_unlock(VTOC(vp));
7134 vnode_put(vp);
7135 break;
7136 }
7137 }
7138
7139 /* The file forks were relocated successfully, now drop the
7140 * cnode lock and vnode reference, and continue iterating to
7141 * next catalog record.
7142 */
7143 hfs_unlock(VTOC(vp));
7144 vnode_put(vp);
7145 files_moved++;
7146 }
7147
7148 if (files_moved) {
7149 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
7150 (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
7151 files_moved, hfsmp->vcbVN);
7152 }
7153
7154 reclaim_filespace_done:
7155 if (iterator) {
7156 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
7157 }
7158
7159 #if CONFIG_PROTECT
7160 if (keys_generated) {
7161 cp_entry_destroy(hfsmp->hfs_resize_cpentry);
7162 hfsmp->hfs_resize_cpentry = NULL;
7163 }
7164 #endif
7165 return error;
7166 }
7167
7168 /*
7169 * Reclaim space at the end of a file system.
7170 *
7171 * Inputs -
7172 * allocLimit - start block of the space being reclaimed
7173 * reclaimblks - number of allocation blocks to reclaim
7174 */
7175 static int
7176 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
7177 {
7178 int error = 0;
7179
7180 /*
7181 * Preflight the bitmap to find out total number of blocks that need
7182 * relocation.
7183 *
7184 * Note: Since allocLimit is set to the location of new alternate volume
7185 * header, the check below does not account for blocks allocated for old
7186 * alternate volume header.
7187 */
7188 error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
7189 if (error) {
7190 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
7191 return error;
7192 }
7193 if (hfs_resize_debug) {
7194 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
7195 }
7196
7197 /* Just to be safe, sync the content of the journal to the disk before we proceed */
7198 hfs_journal_flush(hfsmp, TRUE);
7199
7200 /* First, relocate journal file blocks if they're in the way.
7201 * Doing this first will make sure that journal relocate code
7202 * gets access to contiguous blocks on disk first. The journal
7203 * file has to be contiguous on the disk, otherwise resize will
7204 * fail.
7205 */
7206 error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
7207 if (error) {
7208 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
7209 return error;
7210 }
7211
7212 /* Relocate journal info block blocks if they're in the way. */
7213 error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
7214 if (error) {
7215 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
7216 return error;
7217 }
7218
7219 /* Relocate extents of the Extents B-tree if they're in the way.
7220 * Relocating extents btree before other btrees is important as
7221 * this will provide access to largest contiguous block range on
7222 * the disk for relocating extents btree. Note that extents btree
7223 * can only have maximum of 8 extents.
7224 */
7225 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
7226 kHFSDataForkType, allocLimit, context);
7227 if (error) {
7228 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
7229 return error;
7230 }
7231
7232 /* Relocate extents of the Allocation file if they're in the way. */
7233 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
7234 kHFSDataForkType, allocLimit, context);
7235 if (error) {
7236 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
7237 return error;
7238 }
7239
7240 /* Relocate extents of the Catalog B-tree if they're in the way. */
7241 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
7242 kHFSDataForkType, allocLimit, context);
7243 if (error) {
7244 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
7245 return error;
7246 }
7247
7248 /* Relocate extents of the Attributes B-tree if they're in the way. */
7249 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
7250 kHFSDataForkType, allocLimit, context);
7251 if (error) {
7252 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
7253 return error;
7254 }
7255
7256 /* Relocate extents of the Startup File if there is one and they're in the way. */
7257 error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
7258 kHFSDataForkType, allocLimit, context);
7259 if (error) {
7260 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
7261 return error;
7262 }
7263
7264 /*
7265 * We need to make sure the alternate volume header gets flushed if we moved
7266 * any extents in the volume header. But we need to do that before
7267 * shrinking the size of the volume, or else the journal code will panic
7268 * with an invalid (too large) block number.
7269 *
7270 * Note that blks_moved will be set if ANY extent was moved, even
7271 * if it was just an overflow extent. In this case, the journal_flush isn't
7272 * strictly required, but shouldn't hurt.
7273 */
7274 if (hfsmp->hfs_resize_blocksmoved) {
7275 hfs_journal_flush(hfsmp, TRUE);
7276 }
7277
7278 /* Reclaim extents from catalog file records */
7279 error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
7280 if (error) {
7281 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
7282 return error;
7283 }
7284
7285 /* Reclaim extents from extent-based extended attributes, if any */
7286 error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
7287 if (error) {
7288 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
7289 return error;
7290 }
7291
7292 return error;
7293 }
7294
7295
7296 /*
7297 * Check if there are any extents (including overflow extents) that overlap
7298 * into the disk space that is being reclaimed.
7299 *
7300 * Output -
7301 * true - One of the extents need to be relocated
7302 * false - No overflow extents need to be relocated, or there was an error
7303 */
7304 static int
7305 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
7306 {
7307 struct BTreeIterator * iterator = NULL;
7308 struct FSBufferDescriptor btdata;
7309 HFSPlusExtentRecord extrec;
7310 HFSPlusExtentKey *extkeyptr;
7311 FCB *fcb;
7312 int overlapped = false;
7313 int i, j;
7314 int error;
7315 int lockflags = 0;
7316 u_int32_t endblock;
7317
7318 /* Check if data fork overlaps the target space */
7319 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
7320 if (filerec->dataFork.extents[i].blockCount == 0) {
7321 break;
7322 }
7323 endblock = filerec->dataFork.extents[i].startBlock +
7324 filerec->dataFork.extents[i].blockCount;
7325 if (endblock > allocLimit) {
7326 overlapped = true;
7327 goto out;
7328 }
7329 }
7330
7331 /* Check if resource fork overlaps the target space */
7332 for (j = 0; j < kHFSPlusExtentDensity; ++j) {
7333 if (filerec->resourceFork.extents[j].blockCount == 0) {
7334 break;
7335 }
7336 endblock = filerec->resourceFork.extents[j].startBlock +
7337 filerec->resourceFork.extents[j].blockCount;
7338 if (endblock > allocLimit) {
7339 overlapped = true;
7340 goto out;
7341 }
7342 }
7343
7344 /* Return back if there are no overflow extents for this file */
7345 if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
7346 goto out;
7347 }
7348
7349 if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
7350 return 0;
7351 }
7352 bzero(iterator, sizeof(*iterator));
7353 extkeyptr = (HFSPlusExtentKey *)&iterator->key;
7354 extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
7355 extkeyptr->forkType = 0;
7356 extkeyptr->fileID = filerec->fileID;
7357 extkeyptr->startBlock = 0;
7358
7359 btdata.bufferAddress = &extrec;
7360 btdata.itemSize = sizeof(extrec);
7361 btdata.itemCount = 1;
7362
7363 fcb = VTOF(hfsmp->hfs_extents_vp);
7364
7365 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
7366
7367 /* This will position the iterator just before the first overflow
7368 * extent record for given fileID. It will always return btNotFound,
7369 * so we special case the error code.
7370 */
7371 error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
7372 if (error && (error != btNotFound)) {
7373 goto out;
7374 }
7375
7376 /* BTIterateRecord() might return error if the btree is empty, and
7377 * therefore we return that the extent does not overflow to the caller
7378 */
7379 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
7380 while (error == 0) {
7381 /* Stop when we encounter a different file. */
7382 if (extkeyptr->fileID != filerec->fileID) {
7383 break;
7384 }
7385 /* Check if any of the forks exist in the target space. */
7386 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
7387 if (extrec[i].blockCount == 0) {
7388 break;
7389 }
7390 endblock = extrec[i].startBlock + extrec[i].blockCount;
7391 if (endblock > allocLimit) {
7392 overlapped = true;
7393 goto out;
7394 }
7395 }
7396 /* Look for more records. */
7397 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
7398 }
7399
7400 out:
7401 if (lockflags) {
7402 hfs_systemfile_unlock(hfsmp, lockflags);
7403 }
7404 if (iterator) {
7405 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
7406 }
7407 return overlapped;
7408 }
7409
7410
7411 /*
7412 * Calculate the progress of a file system resize operation.
7413 */
7414 __private_extern__
7415 int
7416 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
7417 {
7418 if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
7419 return (ENXIO);
7420 }
7421
7422 if (hfsmp->hfs_resize_totalblocks > 0) {
7423 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
7424 } else {
7425 *progress = 0;
7426 }
7427
7428 return (0);
7429 }
7430
7431
7432 /*
7433 * Creates a UUID from a unique "name" in the HFS UUID Name space.
7434 * See version 3 UUID.
7435 */
7436 static void
7437 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
7438 {
7439 MD5_CTX md5c;
7440 uint8_t rawUUID[8];
7441
7442 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
7443 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
7444
7445 MD5Init( &md5c );
7446 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
7447 MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
7448 MD5Final( result, &md5c );
7449
7450 result[6] = 0x30 | ( result[6] & 0x0F );
7451 result[8] = 0x80 | ( result[8] & 0x3F );
7452 }
7453
7454 /*
7455 * Get file system attributes.
7456 */
7457 static int
7458 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7459 {
7460 #define HFS_ATTR_CMN_VALIDMASK ATTR_CMN_VALIDMASK
7461 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
7462 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_ACCTIME))
7463
7464 ExtendedVCB *vcb = VFSTOVCB(mp);
7465 struct hfsmount *hfsmp = VFSTOHFS(mp);
7466 u_int32_t freeCNIDs;
7467
7468 int searchfs_on = 0;
7469 int exchangedata_on = 1;
7470
7471 #if CONFIG_SEARCHFS
7472 searchfs_on = 1;
7473 #endif
7474
7475 #if CONFIG_PROTECT
7476 if (cp_fs_protected(mp)) {
7477 exchangedata_on = 0;
7478 }
7479 #endif
7480
7481 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
7482
7483 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
7484 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
7485 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
7486 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
7487 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
7488 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
7489 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
7490 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
7491 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
7492 /* XXX needs clarification */
7493 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
7494 /* Maximum files is constrained by total blocks. */
7495 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
7496 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
7497
7498 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
7499 fsap->f_fsid.val[1] = vfs_typenum(mp);
7500 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
7501
7502 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
7503 VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
7504
7505 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
7506 vol_capabilities_attr_t *cap;
7507
7508 cap = &fsap->f_capabilities;
7509
7510 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
7511 /* HFS+ & variants */
7512 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
7513 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7514 VOL_CAP_FMT_SYMBOLICLINKS |
7515 VOL_CAP_FMT_HARDLINKS |
7516 VOL_CAP_FMT_JOURNAL |
7517 VOL_CAP_FMT_ZERO_RUNS |
7518 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
7519 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
7520 VOL_CAP_FMT_CASE_PRESERVING |
7521 VOL_CAP_FMT_FAST_STATFS |
7522 VOL_CAP_FMT_2TB_FILESIZE |
7523 VOL_CAP_FMT_HIDDEN_FILES |
7524 #if HFS_COMPRESSION
7525 VOL_CAP_FMT_PATH_FROM_ID |
7526 VOL_CAP_FMT_DECMPFS_COMPRESSION;
7527 #else
7528 VOL_CAP_FMT_PATH_FROM_ID;
7529 #endif
7530 }
7531 #if CONFIG_HFS_STD
7532 else {
7533 /* HFS standard */
7534 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
7535 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7536 VOL_CAP_FMT_CASE_PRESERVING |
7537 VOL_CAP_FMT_FAST_STATFS |
7538 VOL_CAP_FMT_HIDDEN_FILES |
7539 VOL_CAP_FMT_PATH_FROM_ID;
7540 }
7541 #endif
7542
7543 /*
7544 * The capabilities word in 'cap' tell you whether or not
7545 * this particular filesystem instance has feature X enabled.
7546 */
7547
7548 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
7549 VOL_CAP_INT_ATTRLIST |
7550 VOL_CAP_INT_NFSEXPORT |
7551 VOL_CAP_INT_READDIRATTR |
7552 VOL_CAP_INT_ALLOCATE |
7553 VOL_CAP_INT_VOL_RENAME |
7554 VOL_CAP_INT_ADVLOCK |
7555 VOL_CAP_INT_FLOCK |
7556 #if NAMEDSTREAMS
7557 VOL_CAP_INT_EXTENDED_ATTR |
7558 VOL_CAP_INT_NAMEDSTREAMS;
7559 #else
7560 VOL_CAP_INT_EXTENDED_ATTR;
7561 #endif
7562
7563 /* HFS may conditionally support searchfs and exchangedata depending on the runtime */
7564
7565 if (searchfs_on) {
7566 cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_SEARCHFS;
7567 }
7568 if (exchangedata_on) {
7569 cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_EXCHANGEDATA;
7570 }
7571
7572 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
7573 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
7574
7575 cap->valid[VOL_CAPABILITIES_FORMAT] =
7576 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
7577 VOL_CAP_FMT_SYMBOLICLINKS |
7578 VOL_CAP_FMT_HARDLINKS |
7579 VOL_CAP_FMT_JOURNAL |
7580 VOL_CAP_FMT_JOURNAL_ACTIVE |
7581 VOL_CAP_FMT_NO_ROOT_TIMES |
7582 VOL_CAP_FMT_SPARSE_FILES |
7583 VOL_CAP_FMT_ZERO_RUNS |
7584 VOL_CAP_FMT_CASE_SENSITIVE |
7585 VOL_CAP_FMT_CASE_PRESERVING |
7586 VOL_CAP_FMT_FAST_STATFS |
7587 VOL_CAP_FMT_2TB_FILESIZE |
7588 VOL_CAP_FMT_OPENDENYMODES |
7589 VOL_CAP_FMT_HIDDEN_FILES |
7590 #if HFS_COMPRESSION
7591 VOL_CAP_FMT_PATH_FROM_ID |
7592 VOL_CAP_FMT_DECMPFS_COMPRESSION;
7593 #else
7594 VOL_CAP_FMT_PATH_FROM_ID;
7595 #endif
7596
7597 /*
7598 * Bits in the "valid" field tell you whether or not the on-disk
7599 * format supports feature X.
7600 */
7601
7602 cap->valid[VOL_CAPABILITIES_INTERFACES] =
7603 VOL_CAP_INT_ATTRLIST |
7604 VOL_CAP_INT_NFSEXPORT |
7605 VOL_CAP_INT_READDIRATTR |
7606 VOL_CAP_INT_COPYFILE |
7607 VOL_CAP_INT_ALLOCATE |
7608 VOL_CAP_INT_VOL_RENAME |
7609 VOL_CAP_INT_ADVLOCK |
7610 VOL_CAP_INT_FLOCK |
7611 VOL_CAP_INT_MANLOCK |
7612 #if NAMEDSTREAMS
7613 VOL_CAP_INT_EXTENDED_ATTR |
7614 VOL_CAP_INT_NAMEDSTREAMS;
7615 #else
7616 VOL_CAP_INT_EXTENDED_ATTR;
7617 #endif
7618
7619 /* HFS always supports exchangedata and searchfs in the on-disk format natively */
7620 cap->valid[VOL_CAPABILITIES_INTERFACES] |= (VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_EXCHANGEDATA);
7621
7622
7623 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
7624 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
7625 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
7626 }
7627 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
7628 vol_attributes_attr_t *attrp = &fsap->f_attributes;
7629
7630 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
7631 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7632 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
7633 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
7634 attrp->validattr.forkattr = 0;
7635
7636 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
7637 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
7638 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
7639 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
7640 attrp->nativeattr.forkattr = 0;
7641 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
7642 }
7643 fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
7644 fsap->f_create_time.tv_nsec = 0;
7645 VFSATTR_SET_SUPPORTED(fsap, f_create_time);
7646 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
7647 fsap->f_modify_time.tv_nsec = 0;
7648 VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
7649
7650 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
7651 fsap->f_backup_time.tv_nsec = 0;
7652 VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
7653 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
7654 u_int16_t subtype = 0;
7655
7656 /*
7657 * Subtypes (flavors) for HFS
7658 * 0: Mac OS Extended
7659 * 1: Mac OS Extended (Journaled)
7660 * 2: Mac OS Extended (Case Sensitive)
7661 * 3: Mac OS Extended (Case Sensitive, Journaled)
7662 * 4 - 127: Reserved
7663 * 128: Mac OS Standard
7664 *
7665 */
7666 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
7667 if (hfsmp->jnl) {
7668 subtype |= HFS_SUBTYPE_JOURNALED;
7669 }
7670 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) {
7671 subtype |= HFS_SUBTYPE_CASESENSITIVE;
7672 }
7673 }
7674 #if CONFIG_HFS_STD
7675 else {
7676 subtype = HFS_SUBTYPE_STANDARDHFS;
7677 }
7678 #endif
7679 fsap->f_fssubtype = subtype;
7680 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
7681 }
7682
7683 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7684 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
7685 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7686 }
7687 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
7688 hfs_getvoluuid(hfsmp, fsap->f_uuid);
7689 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
7690 }
7691 return (0);
7692 }
7693
7694 /*
7695 * Perform a volume rename. Requires the FS' root vp.
7696 */
7697 static int
7698 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
7699 {
7700 ExtendedVCB *vcb = VTOVCB(vp);
7701 struct cnode *cp = VTOC(vp);
7702 struct hfsmount *hfsmp = VTOHFS(vp);
7703 struct cat_desc to_desc;
7704 struct cat_desc todir_desc;
7705 struct cat_desc new_desc;
7706 cat_cookie_t cookie;
7707 int lockflags;
7708 int error = 0;
7709 char converted_volname[256];
7710 size_t volname_length = 0;
7711 size_t conv_volname_length = 0;
7712
7713
7714 /*
7715 * Ignore attempts to rename a volume to a zero-length name.
7716 */
7717 if (name[0] == 0)
7718 return(0);
7719
7720 bzero(&to_desc, sizeof(to_desc));
7721 bzero(&todir_desc, sizeof(todir_desc));
7722 bzero(&new_desc, sizeof(new_desc));
7723 bzero(&cookie, sizeof(cookie));
7724
7725 todir_desc.cd_parentcnid = kHFSRootParentID;
7726 todir_desc.cd_cnid = kHFSRootFolderID;
7727 todir_desc.cd_flags = CD_ISDIR;
7728
7729 to_desc.cd_nameptr = (const u_int8_t *)name;
7730 to_desc.cd_namelen = strlen(name);
7731 to_desc.cd_parentcnid = kHFSRootParentID;
7732 to_desc.cd_cnid = cp->c_cnid;
7733 to_desc.cd_flags = CD_ISDIR;
7734
7735 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) == 0) {
7736 if ((error = hfs_start_transaction(hfsmp)) == 0) {
7737 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
7738 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
7739
7740 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
7741
7742 /*
7743 * If successful, update the name in the VCB, ensure it's terminated.
7744 */
7745 if (error == 0) {
7746 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
7747
7748 volname_length = strlen ((const char*)vcb->vcbVN);
7749 #define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
7750 /* Send the volume name down to CoreStorage if necessary */
7751 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
7752 if (error == 0) {
7753 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
7754 }
7755 error = 0;
7756 }
7757
7758 hfs_systemfile_unlock(hfsmp, lockflags);
7759 cat_postflight(hfsmp, &cookie, p);
7760
7761 if (error)
7762 MarkVCBDirty(vcb);
7763 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
7764 }
7765 hfs_end_transaction(hfsmp);
7766 }
7767 if (!error) {
7768 /* Release old allocated name buffer */
7769 if (cp->c_desc.cd_flags & CD_HASBUF) {
7770 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
7771
7772 cp->c_desc.cd_nameptr = 0;
7773 cp->c_desc.cd_namelen = 0;
7774 cp->c_desc.cd_flags &= ~CD_HASBUF;
7775 vfs_removename(tmp_name);
7776 }
7777 /* Update cnode's catalog descriptor */
7778 replace_desc(cp, &new_desc);
7779 vcb->volumeNameEncodingHint = new_desc.cd_encoding;
7780 cp->c_touch_chgtime = TRUE;
7781 }
7782
7783 hfs_unlock(cp);
7784 }
7785
7786 return(error);
7787 }
7788
7789 /*
7790 * Get file system attributes.
7791 */
7792 static int
7793 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
7794 {
7795 kauth_cred_t cred = vfs_context_ucred(context);
7796 int error = 0;
7797
7798 /*
7799 * Must be superuser or owner of filesystem to change volume attributes
7800 */
7801 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
7802 return(EACCES);
7803
7804 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
7805 vnode_t root_vp;
7806
7807 error = hfs_vfs_root(mp, &root_vp, context);
7808 if (error)
7809 goto out;
7810
7811 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
7812 (void) vnode_put(root_vp);
7813 if (error)
7814 goto out;
7815
7816 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
7817 }
7818
7819 out:
7820 return error;
7821 }
7822
7823 /* If a runtime corruption is detected, set the volume inconsistent
7824 * bit in the volume attributes. The volume inconsistent bit is a persistent
7825 * bit which represents that the volume is corrupt and needs repair.
7826 * The volume inconsistent bit can be set from the kernel when it detects
7827 * runtime corruption or from file system repair utilities like fsck_hfs when
7828 * a repair operation fails. The bit should be cleared only from file system
7829 * verify/repair utility like fsck_hfs when a verify/repair succeeds.
7830 */
7831 void hfs_mark_volume_inconsistent(struct hfsmount *hfsmp)
7832 {
7833 hfs_lock_mount (hfsmp);
7834 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
7835 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
7836 MarkVCBDirty(hfsmp);
7837 }
7838 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
7839 /* Log information to ASL log */
7840 fslog_fs_corrupt(hfsmp->hfs_mp);
7841 printf("hfs: Runtime corruption detected on %s, fsck will be forced on next mount.\n", hfsmp->vcbVN);
7842 }
7843 hfs_unlock_mount (hfsmp);
7844 }
7845
7846 /* Replay the journal on the device node provided. Returns zero if
7847 * journal replay succeeded or no journal was supposed to be replayed.
7848 */
7849 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
7850 {
7851 int retval = 0;
7852 int error = 0;
7853 struct mount *mp = NULL;
7854 struct hfs_mount_args *args = NULL;
7855
7856 /* Replay allowed only on raw devices */
7857 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
7858 retval = EINVAL;
7859 goto out;
7860 }
7861
7862 /* Create dummy mount structures */
7863 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
7864 if (mp == NULL) {
7865 retval = ENOMEM;
7866 goto out;
7867 }
7868 bzero(mp, sizeof(struct mount));
7869 mount_lock_init(mp);
7870
7871 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
7872 if (args == NULL) {
7873 retval = ENOMEM;
7874 goto out;
7875 }
7876 bzero(args, sizeof(struct hfs_mount_args));
7877
7878 retval = hfs_mountfs(devvp, mp, args, 1, context);
7879 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
7880
7881 /* FSYNC the devnode to be sure all data has been flushed */
7882 error = VNOP_FSYNC(devvp, MNT_WAIT, context);
7883 if (error) {
7884 retval = error;
7885 }
7886
7887 out:
7888 if (mp) {
7889 mount_lock_destroy(mp);
7890 FREE(mp, M_TEMP);
7891 }
7892 if (args) {
7893 FREE(args, M_TEMP);
7894 }
7895 return retval;
7896 }
7897
7898 /*
7899 * hfs vfs operations.
7900 */
7901 struct vfsops hfs_vfsops = {
7902 hfs_mount,
7903 hfs_start,
7904 hfs_unmount,
7905 hfs_vfs_root,
7906 hfs_quotactl,
7907 hfs_vfs_getattr, /* was hfs_statfs */
7908 hfs_sync,
7909 hfs_vfs_vget,
7910 hfs_fhtovp,
7911 hfs_vptofh,
7912 hfs_init,
7913 hfs_sysctl,
7914 hfs_vfs_setattr,
7915 {NULL}
7916 };