]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsops.c
a819362bb5eca73af012c12804f1fa6e8e9fee13
[apple/xnu.git] / bsd / hfs / hfs_vfsops.c
1 /*
2 * Copyright (c) 1999-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /*
29 * Copyright (c) 1991, 1993, 1994
30 * The Regents of the University of California. All rights reserved.
31 * (c) UNIX System Laboratories, Inc.
32 * All or some portions of this file are derived from material licensed
33 * to the University of California by American Telephone and Telegraph
34 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
35 * the permission of UNIX System Laboratories, Inc.
36 *
37 * Redistribution and use in source and binary forms, with or without
38 * modification, are permitted provided that the following conditions
39 * are met:
40 * 1. Redistributions of source code must retain the above copyright
41 * notice, this list of conditions and the following disclaimer.
42 * 2. Redistributions in binary form must reproduce the above copyright
43 * notice, this list of conditions and the following disclaimer in the
44 * documentation and/or other materials provided with the distribution.
45 * 3. All advertising materials mentioning features or use of this software
46 * must display the following acknowledgement:
47 * This product includes software developed by the University of
48 * California, Berkeley and its contributors.
49 * 4. Neither the name of the University nor the names of its contributors
50 * may be used to endorse or promote products derived from this software
51 * without specific prior written permission.
52 *
53 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
54 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
55 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
56 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
57 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
58 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
59 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
60 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
61 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
62 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
63 * SUCH DAMAGE.
64 *
65 * hfs_vfsops.c
66 * derived from @(#)ufs_vfsops.c 8.8 (Berkeley) 5/20/95
67 *
68 * (c) Copyright 1997-2002 Apple Computer, Inc. All rights reserved.
69 *
70 * hfs_vfsops.c -- VFS layer for loadable HFS file system.
71 *
72 */
73 #include <sys/param.h>
74 #include <sys/systm.h>
75 #include <sys/kauth.h>
76
77 #include <sys/ubc.h>
78 #include <sys/ubc_internal.h>
79 #include <sys/vnode_internal.h>
80 #include <sys/mount_internal.h>
81 #include <sys/sysctl.h>
82 #include <sys/malloc.h>
83 #include <sys/stat.h>
84 #include <sys/quota.h>
85 #include <sys/disk.h>
86 #include <sys/paths.h>
87 #include <sys/utfconv.h>
88 #include <sys/kdebug.h>
89 #include <sys/fslog.h>
90 #include <sys/ubc.h>
91 #include <sys/buf_internal.h>
92
93 /* for parsing boot-args */
94 #include <pexpert/pexpert.h>
95
96
97 #include <kern/locks.h>
98
99 #include <vfs/vfs_journal.h>
100
101 #include <miscfs/specfs/specdev.h>
102 #include <hfs/hfs_mount.h>
103
104 #include <libkern/crypto/md5.h>
105 #include <uuid/uuid.h>
106
107 #include "hfs.h"
108 #include "hfs_catalog.h"
109 #include "hfs_cnode.h"
110 #include "hfs_dbg.h"
111 #include "hfs_endian.h"
112 #include "hfs_hotfiles.h"
113 #include "hfs_quota.h"
114 #include "hfs_btreeio.h"
115 #include "hfs_kdebug.h"
116 #include "hfs_cprotect.h"
117
118 #include "hfscommon/headers/FileMgrInternal.h"
119 #include "hfscommon/headers/BTreesInternal.h"
120
121 #define HFS_MOUNT_DEBUG 1
122
123 #if HFS_DIAGNOSTIC
124 int hfs_dbg_all = 0;
125 int hfs_dbg_err = 0;
126 #endif
127
128 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
129 extern int hfs_resize_debug;
130
131 lck_grp_attr_t * hfs_group_attr;
132 lck_attr_t * hfs_lock_attr;
133 lck_grp_t * hfs_mutex_group;
134 lck_grp_t * hfs_rwlock_group;
135 lck_grp_t * hfs_spinlock_group;
136
137 extern struct vnodeopv_desc hfs_vnodeop_opv_desc;
138
139 #if CONFIG_HFS_STD
140 extern struct vnodeopv_desc hfs_std_vnodeop_opv_desc;
141 static int hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush);
142 #endif
143
144 /* not static so we can re-use in hfs_readwrite.c for build_path calls */
145 int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
146
147 static int hfs_changefs(struct mount *mp, struct hfs_mount_args *args);
148 static int hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, vfs_context_t context);
149 static int hfs_flushfiles(struct mount *, int, struct proc *);
150 static int hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp);
151 static int hfs_init(struct vfsconf *vfsp);
152 static void hfs_locks_destroy(struct hfsmount *hfsmp);
153 static int hfs_vfs_root(struct mount *mp, struct vnode **vpp, vfs_context_t context);
154 static int hfs_quotactl(struct mount *, int, uid_t, caddr_t, vfs_context_t context);
155 static int hfs_start(struct mount *mp, int flags, vfs_context_t context);
156 static int hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, vfs_context_t context);
157 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context);
158 static void hfs_syncer_free(struct hfsmount *hfsmp);
159
160 void hfs_initialize_allocator (struct hfsmount *hfsmp);
161 int hfs_teardown_allocator (struct hfsmount *hfsmp);
162
163 int hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context);
164 int hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args, int journal_replay_only, vfs_context_t context);
165 int hfs_reload(struct mount *mp);
166 int hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, vfs_context_t context);
167 int hfs_sync(struct mount *mp, int waitfor, vfs_context_t context);
168 int hfs_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
169 user_addr_t newp, size_t newlen, vfs_context_t context);
170 int hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context);
171
172 /*
173 * Called by vfs_mountroot when mounting HFS Plus as root.
174 */
175
176 int
177 hfs_mountroot(mount_t mp, vnode_t rvp, vfs_context_t context)
178 {
179 struct hfsmount *hfsmp;
180 ExtendedVCB *vcb;
181 struct vfsstatfs *vfsp;
182 int error;
183
184 if ((error = hfs_mountfs(rvp, mp, NULL, 0, context))) {
185 if (HFS_MOUNT_DEBUG) {
186 printf("hfs_mountroot: hfs_mountfs returned %d, rvp (%p) name (%s) \n",
187 error, rvp, (rvp->v_name ? rvp->v_name : "unknown device"));
188 }
189 return (error);
190 }
191
192 /* Init hfsmp */
193 hfsmp = VFSTOHFS(mp);
194
195 hfsmp->hfs_uid = UNKNOWNUID;
196 hfsmp->hfs_gid = UNKNOWNGID;
197 hfsmp->hfs_dir_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
198 hfsmp->hfs_file_mask = (S_IRWXU | S_IRGRP|S_IXGRP | S_IROTH|S_IXOTH); /* 0755 */
199
200 /* Establish the free block reserve. */
201 vcb = HFSTOVCB(hfsmp);
202 vcb->reserveBlocks = ((u_int64_t)vcb->totalBlocks * HFS_MINFREE) / 100;
203 vcb->reserveBlocks = MIN(vcb->reserveBlocks, HFS_MAXRESERVE / vcb->blockSize);
204
205 vfsp = vfs_statfs(mp);
206 (void)hfs_statfs(mp, vfsp, NULL);
207
208 return (0);
209 }
210
211
212 /*
213 * VFS Operations.
214 *
215 * mount system call
216 */
217
218 int
219 hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t context)
220 {
221 struct proc *p = vfs_context_proc(context);
222 struct hfsmount *hfsmp = NULL;
223 struct hfs_mount_args args;
224 int retval = E_NONE;
225 u_int32_t cmdflags;
226
227 if ((retval = copyin(data, (caddr_t)&args, sizeof(args)))) {
228 if (HFS_MOUNT_DEBUG) {
229 printf("hfs_mount: copyin returned %d for fs\n", retval);
230 }
231 return (retval);
232 }
233 cmdflags = (u_int32_t)vfs_flags(mp) & MNT_CMDFLAGS;
234 if (cmdflags & MNT_UPDATE) {
235 hfsmp = VFSTOHFS(mp);
236
237 /* Reload incore data after an fsck. */
238 if (cmdflags & MNT_RELOAD) {
239 if (vfs_isrdonly(mp)) {
240 int error = hfs_reload(mp);
241 if (error && HFS_MOUNT_DEBUG) {
242 printf("hfs_mount: hfs_reload returned %d on %s \n", error, hfsmp->vcbVN);
243 }
244 return error;
245 }
246 else {
247 if (HFS_MOUNT_DEBUG) {
248 printf("hfs_mount: MNT_RELOAD not supported on rdwr filesystem %s\n", hfsmp->vcbVN);
249 }
250 return (EINVAL);
251 }
252 }
253
254 /* Change to a read-only file system. */
255 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
256 vfs_isrdonly(mp)) {
257 int flags;
258
259 /* Set flag to indicate that a downgrade to read-only
260 * is in progress and therefore block any further
261 * modifications to the file system.
262 */
263 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
264 hfsmp->hfs_flags |= HFS_RDONLY_DOWNGRADE;
265 hfsmp->hfs_downgrading_thread = current_thread();
266 hfs_unlock_global (hfsmp);
267 hfs_syncer_free(hfsmp);
268
269 /* use VFS_SYNC to push out System (btree) files */
270 retval = VFS_SYNC(mp, MNT_WAIT, context);
271 if (retval && ((cmdflags & MNT_FORCE) == 0)) {
272 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
273 hfsmp->hfs_downgrading_thread = NULL;
274 if (HFS_MOUNT_DEBUG) {
275 printf("hfs_mount: VFS_SYNC returned %d during b-tree sync of %s \n", retval, hfsmp->vcbVN);
276 }
277 goto out;
278 }
279
280 flags = WRITECLOSE;
281 if (cmdflags & MNT_FORCE)
282 flags |= FORCECLOSE;
283
284 if ((retval = hfs_flushfiles(mp, flags, p))) {
285 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
286 hfsmp->hfs_downgrading_thread = NULL;
287 if (HFS_MOUNT_DEBUG) {
288 printf("hfs_mount: hfs_flushfiles returned %d on %s \n", retval, hfsmp->vcbVN);
289 }
290 goto out;
291 }
292
293 /* mark the volume cleanly unmounted */
294 hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
295 retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
296 hfsmp->hfs_flags |= HFS_READ_ONLY;
297
298 /*
299 * Close down the journal.
300 *
301 * NOTE: It is critically important to close down the journal
302 * and have it issue all pending I/O prior to calling VNOP_FSYNC below.
303 * In a journaled environment it is expected that the journal be
304 * the only actor permitted to issue I/O for metadata blocks in HFS.
305 * If we were to call VNOP_FSYNC prior to closing down the journal,
306 * we would inadvertantly issue (and wait for) the I/O we just
307 * initiated above as part of the flushvolumeheader call.
308 *
309 * To avoid this, we follow the same order of operations as in
310 * unmount and issue the journal_close prior to calling VNOP_FSYNC.
311 */
312
313 if (hfsmp->jnl) {
314 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
315
316 journal_close(hfsmp->jnl);
317 hfsmp->jnl = NULL;
318
319 // Note: we explicitly don't want to shutdown
320 // access to the jvp because we may need
321 // it later if we go back to being read-write.
322
323 hfs_unlock_global (hfsmp);
324
325 vfs_clearflags(hfsmp->hfs_mp, MNT_JOURNALED);
326 }
327
328 /*
329 * Write out any pending I/O still outstanding against the device node
330 * now that the journal has been closed.
331 */
332 if (retval == 0) {
333 vnode_get(hfsmp->hfs_devvp);
334 retval = VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
335 vnode_put(hfsmp->hfs_devvp);
336 }
337
338 if (retval) {
339 if (HFS_MOUNT_DEBUG) {
340 printf("hfs_mount: FSYNC on devvp returned %d for fs %s\n", retval, hfsmp->vcbVN);
341 }
342 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
343 hfsmp->hfs_downgrading_thread = NULL;
344 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
345 goto out;
346 }
347
348 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
349 if (hfsmp->hfs_summary_table) {
350 int err = 0;
351 /*
352 * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress
353 */
354 if (hfsmp->hfs_allocation_vp) {
355 err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
356 }
357 FREE (hfsmp->hfs_summary_table, M_TEMP);
358 hfsmp->hfs_summary_table = NULL;
359 hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
360 if (err == 0 && hfsmp->hfs_allocation_vp){
361 hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
362 }
363 }
364 }
365
366 hfsmp->hfs_downgrading_thread = NULL;
367 }
368
369 /* Change to a writable file system. */
370 if (vfs_iswriteupgrade(mp)) {
371 /*
372 * On inconsistent disks, do not allow read-write mount
373 * unless it is the boot volume being mounted.
374 */
375 if (!(vfs_flags(mp) & MNT_ROOTFS) &&
376 (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask)) {
377 if (HFS_MOUNT_DEBUG) {
378 printf("hfs_mount: attempting to mount inconsistent non-root volume %s\n", (hfsmp->vcbVN));
379 }
380 retval = EINVAL;
381 goto out;
382 }
383
384 // If the journal was shut-down previously because we were
385 // asked to be read-only, let's start it back up again now
386
387 if ( (HFSTOVCB(hfsmp)->vcbAtrb & kHFSVolumeJournaledMask)
388 && hfsmp->jnl == NULL
389 && hfsmp->jvp != NULL) {
390 int jflags;
391
392 if (hfsmp->hfs_flags & HFS_NEED_JNL_RESET) {
393 jflags = JOURNAL_RESET;
394 } else {
395 jflags = 0;
396 }
397
398 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
399
400 /* We provide the mount point twice here: The first is used as
401 * an opaque argument to be passed back when hfs_sync_metadata
402 * is called. The second is provided to the throttling code to
403 * indicate which mount's device should be used when accounting
404 * for metadata writes.
405 */
406 hfsmp->jnl = journal_open(hfsmp->jvp,
407 hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
408 hfsmp->jnl_size,
409 hfsmp->hfs_devvp,
410 hfsmp->hfs_logical_block_size,
411 jflags,
412 0,
413 hfs_sync_metadata, hfsmp->hfs_mp,
414 hfsmp->hfs_mp);
415
416 /*
417 * Set up the trim callback function so that we can add
418 * recently freed extents to the free extent cache once
419 * the transaction that freed them is written to the
420 * journal on disk.
421 */
422 if (hfsmp->jnl)
423 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
424
425 hfs_unlock_global (hfsmp);
426
427 if (hfsmp->jnl == NULL) {
428 if (HFS_MOUNT_DEBUG) {
429 printf("hfs_mount: journal_open == NULL; couldn't be opened on %s \n", (hfsmp->vcbVN));
430 }
431 retval = EINVAL;
432 goto out;
433 } else {
434 hfsmp->hfs_flags &= ~HFS_NEED_JNL_RESET;
435 vfs_setflags(hfsmp->hfs_mp, MNT_JOURNALED);
436 }
437 }
438
439 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
440 retval = hfs_erase_unused_nodes(hfsmp);
441 if (retval != E_NONE) {
442 if (HFS_MOUNT_DEBUG) {
443 printf("hfs_mount: hfs_erase_unused_nodes returned %d for fs %s\n", retval, hfsmp->vcbVN);
444 }
445 goto out;
446 }
447
448 /* If this mount point was downgraded from read-write
449 * to read-only, clear that information as we are now
450 * moving back to read-write.
451 */
452 hfsmp->hfs_flags &= ~HFS_RDONLY_DOWNGRADE;
453 hfsmp->hfs_downgrading_thread = NULL;
454
455 /* mark the volume dirty (clear clean unmount bit) */
456 hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
457
458 retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
459 if (retval != E_NONE) {
460 if (HFS_MOUNT_DEBUG) {
461 printf("hfs_mount: hfs_flushvolumeheader returned %d for fs %s\n", retval, hfsmp->vcbVN);
462 }
463 goto out;
464 }
465
466 /* Only clear HFS_READ_ONLY after a successful write */
467 hfsmp->hfs_flags &= ~HFS_READ_ONLY;
468
469
470 if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
471 /* Setup private/hidden directories for hardlinks. */
472 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
473 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
474
475 hfs_remove_orphans(hfsmp);
476
477 /*
478 * Since we're upgrading to a read-write mount, allow
479 * hot file clustering if conditions allow.
480 *
481 * Note: this normally only would happen if you booted
482 * single-user and upgraded the mount to read-write
483 *
484 * Note: at this point we are not allowed to fail the
485 * mount operation because the HotFile init code
486 * in hfs_recording_init() will lookup vnodes with
487 * VNOP_LOOKUP() which hangs vnodes off the mount
488 * (and if we were to fail, VFS is not prepared to
489 * clean that up at this point. Since HotFiles are
490 * optional, this is not a big deal.
491 */
492 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
493 (((hfsmp->hfs_mp->mnt_kern_flag & MNTK_SSD) == 0) || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) ) {
494 (void) hfs_recording_init(hfsmp);
495 }
496 /* Force ACLs on HFS+ file systems. */
497 if (vfs_extendedsecurity(HFSTOVFS(hfsmp)) == 0) {
498 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
499 }
500 }
501 }
502
503 /* Update file system parameters. */
504 retval = hfs_changefs(mp, &args);
505 if (retval && HFS_MOUNT_DEBUG) {
506 printf("hfs_mount: hfs_changefs returned %d for %s\n", retval, hfsmp->vcbVN);
507 }
508
509 } else /* not an update request */ {
510
511 /* Set the mount flag to indicate that we support volfs */
512 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_DOVOLFS));
513
514 retval = hfs_mountfs(devvp, mp, &args, 0, context);
515 if (retval) {
516 const char *name = vnode_getname(devvp);
517 printf("hfs_mount: hfs_mountfs returned error=%d for device %s\n", retval, (name ? name : "unknown-dev"));
518 if (name) {
519 vnode_putname(name);
520 }
521 goto out;
522 }
523
524 /* After hfs_mountfs succeeds, we should have valid hfsmp */
525 hfsmp = VFSTOHFS(mp);
526
527 }
528
529 out:
530 if (retval == 0) {
531 (void)hfs_statfs(mp, vfs_statfs(mp), context);
532 }
533 return (retval);
534 }
535
536
537 struct hfs_changefs_cargs {
538 struct hfsmount *hfsmp;
539 int namefix;
540 int permfix;
541 int permswitch;
542 };
543
544 static int
545 hfs_changefs_callback(struct vnode *vp, void *cargs)
546 {
547 ExtendedVCB *vcb;
548 struct cnode *cp;
549 struct cat_desc cndesc;
550 struct cat_attr cnattr;
551 struct hfs_changefs_cargs *args;
552 int lockflags;
553 int error;
554
555 args = (struct hfs_changefs_cargs *)cargs;
556
557 cp = VTOC(vp);
558 vcb = HFSTOVCB(args->hfsmp);
559
560 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
561 error = cat_lookup(args->hfsmp, &cp->c_desc, 0, 0, &cndesc, &cnattr, NULL, NULL);
562 hfs_systemfile_unlock(args->hfsmp, lockflags);
563 if (error) {
564 /*
565 * If we couldn't find this guy skip to the next one
566 */
567 if (args->namefix)
568 cache_purge(vp);
569
570 return (VNODE_RETURNED);
571 }
572 /*
573 * Get the real uid/gid and perm mask from disk.
574 */
575 if (args->permswitch || args->permfix) {
576 cp->c_uid = cnattr.ca_uid;
577 cp->c_gid = cnattr.ca_gid;
578 cp->c_mode = cnattr.ca_mode;
579 }
580 /*
581 * If we're switching name converters then...
582 * Remove the existing entry from the namei cache.
583 * Update name to one based on new encoder.
584 */
585 if (args->namefix) {
586 cache_purge(vp);
587 replace_desc(cp, &cndesc);
588
589 if (cndesc.cd_cnid == kHFSRootFolderID) {
590 strlcpy((char *)vcb->vcbVN, (const char *)cp->c_desc.cd_nameptr, NAME_MAX+1);
591 cp->c_desc.cd_encoding = args->hfsmp->hfs_encoding;
592 }
593 } else {
594 cat_releasedesc(&cndesc);
595 }
596 return (VNODE_RETURNED);
597 }
598
599 /* Change fs mount parameters */
600 static int
601 hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
602 {
603 int retval = 0;
604 int namefix, permfix, permswitch;
605 struct hfsmount *hfsmp;
606 ExtendedVCB *vcb;
607 struct hfs_changefs_cargs cargs;
608 u_int32_t mount_flags;
609
610 #if CONFIG_HFS_STD
611 u_int32_t old_encoding = 0;
612 hfs_to_unicode_func_t get_unicode_func;
613 unicode_to_hfs_func_t get_hfsname_func;
614 #endif
615
616 hfsmp = VFSTOHFS(mp);
617 vcb = HFSTOVCB(hfsmp);
618 mount_flags = (unsigned int)vfs_flags(mp);
619
620 hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
621
622 permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
623 ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
624 (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
625 (mount_flags & MNT_UNKNOWNPERMISSIONS)));
626
627 /* The root filesystem must operate with actual permissions: */
628 if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
629 vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
630 retval = EINVAL;
631 goto exit;
632 }
633 if (mount_flags & MNT_UNKNOWNPERMISSIONS)
634 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
635 else
636 hfsmp->hfs_flags &= ~HFS_UNKNOWN_PERMS;
637
638 namefix = permfix = 0;
639
640 /*
641 * Tracking of hot files requires up-to-date access times. So if
642 * access time updates are disabled, we must also disable hot files.
643 */
644 if (mount_flags & MNT_NOATIME) {
645 (void) hfs_recording_suspend(hfsmp);
646 }
647
648 /* Change the timezone (Note: this affects all hfs volumes and hfs+ volume create dates) */
649 if (args->hfs_timezone.tz_minuteswest != VNOVAL) {
650 gTimeZone = args->hfs_timezone;
651 }
652
653 /* Change the default uid, gid and/or mask */
654 if ((args->hfs_uid != (uid_t)VNOVAL) && (hfsmp->hfs_uid != args->hfs_uid)) {
655 hfsmp->hfs_uid = args->hfs_uid;
656 if (vcb->vcbSigWord == kHFSPlusSigWord)
657 ++permfix;
658 }
659 if ((args->hfs_gid != (gid_t)VNOVAL) && (hfsmp->hfs_gid != args->hfs_gid)) {
660 hfsmp->hfs_gid = args->hfs_gid;
661 if (vcb->vcbSigWord == kHFSPlusSigWord)
662 ++permfix;
663 }
664 if (args->hfs_mask != (mode_t)VNOVAL) {
665 if (hfsmp->hfs_dir_mask != (args->hfs_mask & ALLPERMS)) {
666 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
667 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
668 if ((args->flags != VNOVAL) && (args->flags & HFSFSMNT_NOXONFILES))
669 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
670 if (vcb->vcbSigWord == kHFSPlusSigWord)
671 ++permfix;
672 }
673 }
674
675 #if CONFIG_HFS_STD
676 /* Change the hfs encoding value (hfs only) */
677 if ((vcb->vcbSigWord == kHFSSigWord) &&
678 (args->hfs_encoding != (u_int32_t)VNOVAL) &&
679 (hfsmp->hfs_encoding != args->hfs_encoding)) {
680
681 retval = hfs_getconverter(args->hfs_encoding, &get_unicode_func, &get_hfsname_func);
682 if (retval)
683 goto exit;
684
685 /*
686 * Connect the new hfs_get_unicode converter but leave
687 * the old hfs_get_hfsname converter in place so that
688 * we can lookup existing vnodes to get their correctly
689 * encoded names.
690 *
691 * When we're all finished, we can then connect the new
692 * hfs_get_hfsname converter and release our interest
693 * in the old converters.
694 */
695 hfsmp->hfs_get_unicode = get_unicode_func;
696 old_encoding = hfsmp->hfs_encoding;
697 hfsmp->hfs_encoding = args->hfs_encoding;
698 ++namefix;
699 }
700 #endif
701
702 if (!(namefix || permfix || permswitch))
703 goto exit;
704
705 /* XXX 3762912 hack to support HFS filesystem 'owner' */
706 if (permfix)
707 vfs_setowner(mp,
708 hfsmp->hfs_uid == UNKNOWNUID ? KAUTH_UID_NONE : hfsmp->hfs_uid,
709 hfsmp->hfs_gid == UNKNOWNGID ? KAUTH_GID_NONE : hfsmp->hfs_gid);
710
711 /*
712 * For each active vnode fix things that changed
713 *
714 * Note that we can visit a vnode more than once
715 * and we can race with fsync.
716 *
717 * hfs_changefs_callback will be called for each vnode
718 * hung off of this mount point
719 *
720 * The vnode will be properly referenced and unreferenced
721 * around the callback
722 */
723 cargs.hfsmp = hfsmp;
724 cargs.namefix = namefix;
725 cargs.permfix = permfix;
726 cargs.permswitch = permswitch;
727
728 vnode_iterate(mp, 0, hfs_changefs_callback, (void *)&cargs);
729
730 #if CONFIG_HFS_STD
731 /*
732 * If we're switching name converters we can now
733 * connect the new hfs_get_hfsname converter and
734 * release our interest in the old converters.
735 */
736 if (namefix) {
737 /* HFS standard only */
738 hfsmp->hfs_get_hfsname = get_hfsname_func;
739 vcb->volumeNameEncodingHint = args->hfs_encoding;
740 (void) hfs_relconverter(old_encoding);
741 }
742 #endif
743
744 exit:
745 hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
746 return (retval);
747 }
748
749
750 struct hfs_reload_cargs {
751 struct hfsmount *hfsmp;
752 int error;
753 };
754
755 static int
756 hfs_reload_callback(struct vnode *vp, void *cargs)
757 {
758 struct cnode *cp;
759 struct hfs_reload_cargs *args;
760 int lockflags;
761
762 args = (struct hfs_reload_cargs *)cargs;
763 /*
764 * flush all the buffers associated with this node
765 */
766 (void) buf_invalidateblks(vp, 0, 0, 0);
767
768 cp = VTOC(vp);
769 /*
770 * Remove any directory hints
771 */
772 if (vnode_isdir(vp))
773 hfs_reldirhints(cp, 0);
774
775 /*
776 * Re-read cnode data for all active vnodes (non-metadata files).
777 */
778 if (!vnode_issystem(vp) && !VNODE_IS_RSRC(vp) && (cp->c_fileid >= kHFSFirstUserCatalogNodeID)) {
779 struct cat_fork *datafork;
780 struct cat_desc desc;
781
782 datafork = cp->c_datafork ? &cp->c_datafork->ff_data : NULL;
783
784 /* lookup by fileID since name could have changed */
785 lockflags = hfs_systemfile_lock(args->hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
786 args->error = cat_idlookup(args->hfsmp, cp->c_fileid, 0, 0, &desc, &cp->c_attr, datafork);
787 hfs_systemfile_unlock(args->hfsmp, lockflags);
788 if (args->error) {
789 return (VNODE_RETURNED_DONE);
790 }
791
792 /* update cnode's catalog descriptor */
793 (void) replace_desc(cp, &desc);
794 }
795 return (VNODE_RETURNED);
796 }
797
798 /*
799 * Reload all incore data for a filesystem (used after running fsck on
800 * the root filesystem and finding things to fix). The filesystem must
801 * be mounted read-only.
802 *
803 * Things to do to update the mount:
804 * invalidate all cached meta-data.
805 * invalidate all inactive vnodes.
806 * invalidate all cached file data.
807 * re-read volume header from disk.
808 * re-load meta-file info (extents, file size).
809 * re-load B-tree header data.
810 * re-read cnode data for all active vnodes.
811 */
812 int
813 hfs_reload(struct mount *mountp)
814 {
815 register struct vnode *devvp;
816 struct buf *bp;
817 int error, i;
818 struct hfsmount *hfsmp;
819 struct HFSPlusVolumeHeader *vhp;
820 ExtendedVCB *vcb;
821 struct filefork *forkp;
822 struct cat_desc cndesc;
823 struct hfs_reload_cargs args;
824 daddr64_t priIDSector;
825
826 hfsmp = VFSTOHFS(mountp);
827 vcb = HFSTOVCB(hfsmp);
828
829 if (vcb->vcbSigWord == kHFSSigWord)
830 return (EINVAL); /* rooting from HFS is not supported! */
831
832 /*
833 * Invalidate all cached meta-data.
834 */
835 devvp = hfsmp->hfs_devvp;
836 if (buf_invalidateblks(devvp, 0, 0, 0))
837 panic("hfs_reload: dirty1");
838
839 args.hfsmp = hfsmp;
840 args.error = 0;
841 /*
842 * hfs_reload_callback will be called for each vnode
843 * hung off of this mount point that can't be recycled...
844 * vnode_iterate will recycle those that it can (the VNODE_RELOAD option)
845 * the vnode will be in an 'unbusy' state (VNODE_WAIT) and
846 * properly referenced and unreferenced around the callback
847 */
848 vnode_iterate(mountp, VNODE_RELOAD | VNODE_WAIT, hfs_reload_callback, (void *)&args);
849
850 if (args.error)
851 return (args.error);
852
853 /*
854 * Re-read VolumeHeader from disk.
855 */
856 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
857 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
858
859 error = (int)buf_meta_bread(hfsmp->hfs_devvp,
860 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
861 hfsmp->hfs_physical_block_size, NOCRED, &bp);
862 if (error) {
863 if (bp != NULL)
864 buf_brelse(bp);
865 return (error);
866 }
867
868 vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
869
870 /* Do a quick sanity check */
871 if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
872 SWAP_BE16(vhp->signature) != kHFSXSigWord) ||
873 (SWAP_BE16(vhp->version) != kHFSPlusVersion &&
874 SWAP_BE16(vhp->version) != kHFSXVersion) ||
875 SWAP_BE32(vhp->blockSize) != vcb->blockSize) {
876 buf_brelse(bp);
877 return (EIO);
878 }
879
880 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
881 vcb->vcbAtrb = SWAP_BE32 (vhp->attributes);
882 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
883 vcb->vcbClpSiz = SWAP_BE32 (vhp->rsrcClumpSize);
884 vcb->vcbNxtCNID = SWAP_BE32 (vhp->nextCatalogID);
885 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
886 vcb->vcbWrCnt = SWAP_BE32 (vhp->writeCount);
887 vcb->vcbFilCnt = SWAP_BE32 (vhp->fileCount);
888 vcb->vcbDirCnt = SWAP_BE32 (vhp->folderCount);
889 HFS_UPDATE_NEXT_ALLOCATION(vcb, SWAP_BE32 (vhp->nextAllocation));
890 vcb->totalBlocks = SWAP_BE32 (vhp->totalBlocks);
891 vcb->freeBlocks = SWAP_BE32 (vhp->freeBlocks);
892 vcb->encodingsBitmap = SWAP_BE64 (vhp->encodingsBitmap);
893 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
894 vcb->localCreateDate = SWAP_BE32 (vhp->createDate); /* hfs+ create date is in local time */
895
896 /*
897 * Re-load meta-file vnode data (extent info, file size, etc).
898 */
899 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
900 for (i = 0; i < kHFSPlusExtentDensity; i++) {
901 forkp->ff_extents[i].startBlock =
902 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
903 forkp->ff_extents[i].blockCount =
904 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
905 }
906 forkp->ff_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
907 forkp->ff_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
908 forkp->ff_clumpsize = SWAP_BE32 (vhp->extentsFile.clumpSize);
909
910
911 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
912 for (i = 0; i < kHFSPlusExtentDensity; i++) {
913 forkp->ff_extents[i].startBlock =
914 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
915 forkp->ff_extents[i].blockCount =
916 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
917 }
918 forkp->ff_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
919 forkp->ff_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
920 forkp->ff_clumpsize = SWAP_BE32 (vhp->catalogFile.clumpSize);
921
922 if (hfsmp->hfs_attribute_vp) {
923 forkp = VTOF(hfsmp->hfs_attribute_vp);
924 for (i = 0; i < kHFSPlusExtentDensity; i++) {
925 forkp->ff_extents[i].startBlock =
926 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
927 forkp->ff_extents[i].blockCount =
928 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
929 }
930 forkp->ff_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
931 forkp->ff_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
932 forkp->ff_clumpsize = SWAP_BE32 (vhp->attributesFile.clumpSize);
933 }
934
935 forkp = VTOF((struct vnode *)vcb->allocationsRefNum);
936 for (i = 0; i < kHFSPlusExtentDensity; i++) {
937 forkp->ff_extents[i].startBlock =
938 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
939 forkp->ff_extents[i].blockCount =
940 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
941 }
942 forkp->ff_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
943 forkp->ff_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
944 forkp->ff_clumpsize = SWAP_BE32 (vhp->allocationFile.clumpSize);
945
946 buf_brelse(bp);
947 vhp = NULL;
948
949 /*
950 * Re-load B-tree header data
951 */
952 forkp = VTOF((struct vnode *)vcb->extentsRefNum);
953 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
954 return (error);
955
956 forkp = VTOF((struct vnode *)vcb->catalogRefNum);
957 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
958 return (error);
959
960 if (hfsmp->hfs_attribute_vp) {
961 forkp = VTOF(hfsmp->hfs_attribute_vp);
962 if ( (error = MacToVFSError( BTReloadData((FCB*)forkp) )) )
963 return (error);
964 }
965
966 /* Reload the volume name */
967 if ((error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, NULL, NULL)))
968 return (error);
969 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
970 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
971 cat_releasedesc(&cndesc);
972
973 /* Re-establish private/hidden directories. */
974 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
975 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
976
977 /* In case any volume information changed to trigger a notification */
978 hfs_generate_volume_notifications(hfsmp);
979
980 return (0);
981 }
982
983 __unused
984 static uint64_t tv_to_usecs(struct timeval *tv)
985 {
986 return tv->tv_sec * 1000000ULL + tv->tv_usec;
987 }
988
989 // Returns TRUE if b - a >= usecs
990 static boolean_t hfs_has_elapsed (const struct timeval *a,
991 const struct timeval *b,
992 uint64_t usecs)
993 {
994 struct timeval diff;
995 timersub(b, a, &diff);
996 return diff.tv_sec * 1000000ULL + diff.tv_usec >= usecs;
997 }
998
999 static void
1000 hfs_syncer(void *arg0, __unused void *unused)
1001 {
1002 struct hfsmount *hfsmp = arg0;
1003 struct timeval now;
1004
1005 microuptime(&now);
1006
1007 KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER | DBG_FUNC_START, hfsmp,
1008 tv_to_usecs(&now),
1009 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1010 hfsmp->hfs_mp->mnt_pending_write_size, 0);
1011
1012 hfs_syncer_lock(hfsmp);
1013
1014 if (!hfsmp->hfs_syncer) {
1015 // hfs_unmount is waiting for us leave now and let it do the sync
1016 hfsmp->hfs_sync_incomplete = FALSE;
1017 hfs_syncer_unlock(hfsmp);
1018 hfs_syncer_wakeup(hfsmp);
1019 return;
1020 }
1021
1022 /* Check to see whether we should flush now: either the oldest is
1023 > HFS_MAX_META_DELAY or HFS_META_DELAY has elapsed since the
1024 request and there are no pending writes. */
1025
1026 boolean_t flush_now = FALSE;
1027
1028 if (hfs_has_elapsed(&hfsmp->hfs_sync_req_oldest, &now, HFS_MAX_META_DELAY))
1029 flush_now = TRUE;
1030 else if (!hfsmp->hfs_mp->mnt_pending_write_size) {
1031 /* N.B. accessing mnt_last_write_completed_timestamp is not thread safe, but
1032 it won't matter for what we're using it for. */
1033 if (hfs_has_elapsed(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp,
1034 &now,
1035 HFS_META_DELAY)) {
1036 flush_now = TRUE;
1037 }
1038 }
1039
1040 if (!flush_now) {
1041 thread_call_t syncer = hfsmp->hfs_syncer;
1042
1043 hfs_syncer_unlock(hfsmp);
1044
1045 hfs_syncer_queue(syncer);
1046
1047 return;
1048 }
1049
1050 timerclear(&hfsmp->hfs_sync_req_oldest);
1051
1052 hfs_syncer_unlock(hfsmp);
1053
1054 KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER_TIMED | DBG_FUNC_START,
1055 tv_to_usecs(&now),
1056 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1057 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp),
1058 hfsmp->hfs_mp->mnt_pending_write_size, 0);
1059
1060 if (hfsmp->hfs_syncer_thread) {
1061 printf("hfs: syncer already running!\n");
1062 return;
1063 }
1064
1065 hfsmp->hfs_syncer_thread = current_thread();
1066
1067 /*
1068 * We intentionally do a synchronous flush (of the journal or entire volume) here.
1069 * For journaled volumes, this means we wait until the metadata blocks are written
1070 * to both the journal and their final locations (in the B-trees, etc.).
1071 *
1072 * This tends to avoid interleaving the metadata writes with other writes (for
1073 * example, user data, or to the journal when a later transaction notices that
1074 * an earlier transaction has finished its async writes, and then updates the
1075 * journal start in the journal header). Avoiding interleaving of writes is
1076 * very good for performance on simple flash devices like SD cards, thumb drives;
1077 * and on devices like floppies. Since removable devices tend to be this kind of
1078 * simple device, doing a synchronous flush actually improves performance in
1079 * practice.
1080 *
1081 * NOTE: For non-journaled volumes, the call to hfs_sync will also cause dirty
1082 * user data to be written.
1083 */
1084 if (hfsmp->jnl) {
1085 hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
1086 } else {
1087 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, vfs_context_kernel());
1088 }
1089
1090 KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER_TIMED | DBG_FUNC_END,
1091 (microuptime(&now), tv_to_usecs(&now)),
1092 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_completed_timestamp),
1093 tv_to_usecs(&hfsmp->hfs_mp->mnt_last_write_issued_timestamp),
1094 hfsmp->hfs_mp->mnt_pending_write_size, 0);
1095
1096 hfsmp->hfs_syncer_thread = NULL;
1097
1098 hfs_syncer_lock(hfsmp);
1099
1100 // If hfs_unmount lets us and we missed a sync, schedule again
1101 if (hfsmp->hfs_syncer && timerisset(&hfsmp->hfs_sync_req_oldest)) {
1102 thread_call_t syncer = hfsmp->hfs_syncer;
1103
1104 hfs_syncer_unlock(hfsmp);
1105
1106 hfs_syncer_queue(syncer);
1107 } else {
1108 hfsmp->hfs_sync_incomplete = FALSE;
1109 hfs_syncer_unlock(hfsmp);
1110 hfs_syncer_wakeup(hfsmp);
1111 }
1112
1113 /* BE CAREFUL WHAT YOU ADD HERE: at this point hfs_unmount is free
1114 to continue and therefore hfsmp might be invalid. */
1115
1116 KERNEL_DEBUG_CONSTANT(HFSDBG_SYNCER | DBG_FUNC_END, 0, 0, 0, 0, 0);
1117 }
1118
1119
1120 extern int IOBSDIsMediaEjectable( const char *cdev_name );
1121
1122 /*
1123 * Call into the allocator code and perform a full scan of the bitmap file.
1124 *
1125 * This allows us to TRIM unallocated ranges if needed, and also to build up
1126 * an in-memory summary table of the state of the allocated blocks.
1127 */
1128 void hfs_scan_blocks (struct hfsmount *hfsmp) {
1129 /*
1130 * Take the allocation file lock. Journal transactions will block until
1131 * we're done here.
1132 */
1133
1134 int flags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1135
1136 /*
1137 * We serialize here with the HFS mount lock as we're mounting.
1138 *
1139 * The mount can only proceed once this thread has acquired the bitmap
1140 * lock, since we absolutely do not want someone else racing in and
1141 * getting the bitmap lock, doing a read/write of the bitmap file,
1142 * then us getting the bitmap lock.
1143 *
1144 * To prevent this, the mount thread takes the HFS mount mutex, starts us
1145 * up, then immediately msleeps on the scan_var variable in the mount
1146 * point as a condition variable. This serialization is safe since
1147 * if we race in and try to proceed while they're still holding the lock,
1148 * we'll block trying to acquire the global lock. Since the mount thread
1149 * acquires the HFS mutex before starting this function in a new thread,
1150 * any lock acquisition on our part must be linearizably AFTER the mount thread's.
1151 *
1152 * Note that the HFS mount mutex is always taken last, and always for only
1153 * a short time. In this case, we just take it long enough to mark the
1154 * scan-in-flight bit.
1155 */
1156 (void) hfs_lock_mount (hfsmp);
1157 hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_INFLIGHT;
1158 wakeup((caddr_t) &hfsmp->scan_var);
1159 hfs_unlock_mount (hfsmp);
1160
1161 /* Initialize the summary table */
1162 if (hfs_init_summary (hfsmp)) {
1163 printf("hfs: could not initialize summary table for %s\n", hfsmp->vcbVN);
1164 }
1165
1166 /*
1167 * ScanUnmapBlocks assumes that the bitmap lock is held when you
1168 * call the function. We don't care if there were any errors issuing unmaps.
1169 *
1170 * It will also attempt to build up the summary table for subsequent
1171 * allocator use, as configured.
1172 */
1173 (void) ScanUnmapBlocks(hfsmp);
1174
1175 (void) hfs_lock_mount (hfsmp);
1176 hfsmp->scan_var &= ~HFS_ALLOCATOR_SCAN_INFLIGHT;
1177 hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED;
1178 wakeup((caddr_t) &hfsmp->scan_var);
1179 hfs_unlock_mount (hfsmp);
1180
1181 buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
1182
1183 hfs_systemfile_unlock(hfsmp, flags);
1184
1185 }
1186
1187 static int hfs_root_unmounted_cleanly = 0;
1188
1189 SYSCTL_DECL(_vfs_generic);
1190 SYSCTL_INT(_vfs_generic, OID_AUTO, root_unmounted_cleanly, CTLFLAG_RD, &hfs_root_unmounted_cleanly, 0, "Root filesystem was unmounted cleanly");
1191
1192 /*
1193 * Common code for mount and mountroot
1194 */
1195 int
1196 hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
1197 int journal_replay_only, vfs_context_t context)
1198 {
1199 struct proc *p = vfs_context_proc(context);
1200 int retval = E_NONE;
1201 struct hfsmount *hfsmp = NULL;
1202 struct buf *bp;
1203 dev_t dev;
1204 HFSMasterDirectoryBlock *mdbp = NULL;
1205 int ronly;
1206 #if QUOTA
1207 int i;
1208 #endif
1209 int mntwrapper;
1210 kauth_cred_t cred;
1211 u_int64_t disksize;
1212 daddr64_t log_blkcnt;
1213 u_int32_t log_blksize;
1214 u_int32_t phys_blksize;
1215 u_int32_t minblksize;
1216 u_int32_t iswritable;
1217 daddr64_t mdb_offset;
1218 int isvirtual = 0;
1219 int isroot = 0;
1220 u_int32_t device_features = 0;
1221 int isssd;
1222
1223 if (args == NULL) {
1224 /* only hfs_mountroot passes us NULL as the 'args' argument */
1225 isroot = 1;
1226 }
1227
1228 ronly = vfs_isrdonly(mp);
1229 dev = vnode_specrdev(devvp);
1230 cred = p ? vfs_context_ucred(context) : NOCRED;
1231 mntwrapper = 0;
1232
1233 bp = NULL;
1234 hfsmp = NULL;
1235 mdbp = NULL;
1236 minblksize = kHFSBlockSize;
1237
1238 /* Advisory locking should be handled at the VFS layer */
1239 vfs_setlocklocal(mp);
1240
1241 /* Get the logical block size (treated as physical block size everywhere) */
1242 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
1243 if (HFS_MOUNT_DEBUG) {
1244 printf("hfs_mountfs: DKIOCGETBLOCKSIZE failed\n");
1245 }
1246 retval = ENXIO;
1247 goto error_exit;
1248 }
1249 if (log_blksize == 0 || log_blksize > 1024*1024*1024) {
1250 printf("hfs: logical block size 0x%x looks bad. Not mounting.\n", log_blksize);
1251 retval = ENXIO;
1252 goto error_exit;
1253 }
1254
1255 /* Get the physical block size. */
1256 retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
1257 if (retval) {
1258 if ((retval != ENOTSUP) && (retval != ENOTTY)) {
1259 if (HFS_MOUNT_DEBUG) {
1260 printf("hfs_mountfs: DKIOCGETPHYSICALBLOCKSIZE failed\n");
1261 }
1262 retval = ENXIO;
1263 goto error_exit;
1264 }
1265 /* If device does not support this ioctl, assume that physical
1266 * block size is same as logical block size
1267 */
1268 phys_blksize = log_blksize;
1269 }
1270 if (phys_blksize == 0 || phys_blksize > MAXBSIZE) {
1271 printf("hfs: physical block size 0x%x looks bad. Not mounting.\n", phys_blksize);
1272 retval = ENXIO;
1273 goto error_exit;
1274 }
1275
1276 /* Switch to 512 byte sectors (temporarily) */
1277 if (log_blksize > 512) {
1278 u_int32_t size512 = 512;
1279
1280 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
1281 if (HFS_MOUNT_DEBUG) {
1282 printf("hfs_mountfs: DKIOCSETBLOCKSIZE failed \n");
1283 }
1284 retval = ENXIO;
1285 goto error_exit;
1286 }
1287 }
1288 /* Get the number of 512 byte physical blocks. */
1289 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1290 /* resetting block size may fail if getting block count did */
1291 (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
1292 if (HFS_MOUNT_DEBUG) {
1293 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT failed\n");
1294 }
1295 retval = ENXIO;
1296 goto error_exit;
1297 }
1298 /* Compute an accurate disk size (i.e. within 512 bytes) */
1299 disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
1300
1301 /*
1302 * On Tiger it is not necessary to switch the device
1303 * block size to be 4k if there are more than 31-bits
1304 * worth of blocks but to insure compatibility with
1305 * pre-Tiger systems we have to do it.
1306 *
1307 * If the device size is not a multiple of 4K (8 * 512), then
1308 * switching the logical block size isn't going to help because
1309 * we will be unable to write the alternate volume header.
1310 * In this case, just leave the logical block size unchanged.
1311 */
1312 if (log_blkcnt > 0x000000007fffffff && (log_blkcnt & 7) == 0) {
1313 minblksize = log_blksize = 4096;
1314 if (phys_blksize < log_blksize)
1315 phys_blksize = log_blksize;
1316 }
1317
1318 /*
1319 * The cluster layer is not currently prepared to deal with a logical
1320 * block size larger than the system's page size. (It can handle
1321 * blocks per page, but not multiple pages per block.) So limit the
1322 * logical block size to the page size.
1323 */
1324 if (log_blksize > PAGE_SIZE) {
1325 log_blksize = PAGE_SIZE;
1326 }
1327
1328 /* Now switch to our preferred physical block size. */
1329 if (log_blksize > 512) {
1330 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1331 if (HFS_MOUNT_DEBUG) {
1332 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (2) failed\n");
1333 }
1334 retval = ENXIO;
1335 goto error_exit;
1336 }
1337 /* Get the count of physical blocks. */
1338 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1339 if (HFS_MOUNT_DEBUG) {
1340 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (2) failed\n");
1341 }
1342 retval = ENXIO;
1343 goto error_exit;
1344 }
1345 }
1346 /*
1347 * At this point:
1348 * minblksize is the minimum physical block size
1349 * log_blksize has our preferred physical block size
1350 * log_blkcnt has the total number of physical blocks
1351 */
1352
1353 mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
1354 if ((retval = (int)buf_meta_bread(devvp,
1355 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
1356 phys_blksize, cred, &bp))) {
1357 if (HFS_MOUNT_DEBUG) {
1358 printf("hfs_mountfs: buf_meta_bread failed with %d\n", retval);
1359 }
1360 goto error_exit;
1361 }
1362 MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
1363 if (mdbp == NULL) {
1364 retval = ENOMEM;
1365 if (HFS_MOUNT_DEBUG) {
1366 printf("hfs_mountfs: MALLOC failed\n");
1367 }
1368 goto error_exit;
1369 }
1370 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
1371 buf_brelse(bp);
1372 bp = NULL;
1373
1374 MALLOC(hfsmp, struct hfsmount *, sizeof(struct hfsmount), M_HFSMNT, M_WAITOK);
1375 if (hfsmp == NULL) {
1376 if (HFS_MOUNT_DEBUG) {
1377 printf("hfs_mountfs: MALLOC (2) failed\n");
1378 }
1379 retval = ENOMEM;
1380 goto error_exit;
1381 }
1382 bzero(hfsmp, sizeof(struct hfsmount));
1383
1384 hfs_chashinit_finish(hfsmp);
1385
1386 /* Init the ID lookup hashtable */
1387 hfs_idhash_init (hfsmp);
1388
1389 /*
1390 * See if the disk supports unmap (trim).
1391 *
1392 * NOTE: vfs_init_io_attributes has not been called yet, so we can't use the io_flags field
1393 * returned by vfs_ioattr. We need to call VNOP_IOCTL ourselves.
1394 */
1395 if (VNOP_IOCTL(devvp, DKIOCGETFEATURES, (caddr_t)&device_features, 0, context) == 0) {
1396 if (device_features & DK_FEATURE_UNMAP) {
1397 hfsmp->hfs_flags |= HFS_UNMAP;
1398 }
1399
1400 if(device_features & DK_FEATURE_BARRIER)
1401 hfsmp->hfs_flags |= HFS_FEATURE_BARRIER;
1402 }
1403
1404 /*
1405 * See if the disk is a solid state device, too. We need this to decide what to do about
1406 * hotfiles.
1407 */
1408 if (VNOP_IOCTL(devvp, DKIOCISSOLIDSTATE, (caddr_t)&isssd, 0, context) == 0) {
1409 if (isssd) {
1410 hfsmp->hfs_flags |= HFS_SSD;
1411 }
1412 }
1413
1414 /* See if the underlying device is Core Storage or not */
1415 dk_corestorage_info_t cs_info;
1416 memset(&cs_info, 0, sizeof(dk_corestorage_info_t));
1417 if (VNOP_IOCTL(devvp, DKIOCCORESTORAGE, (caddr_t)&cs_info, 0, context) == 0) {
1418 hfsmp->hfs_flags |= HFS_CS;
1419 if (isroot && (cs_info.flags & DK_CORESTORAGE_PIN_YOUR_METADATA)) {
1420 hfsmp->hfs_flags |= HFS_CS_METADATA_PIN;
1421 }
1422 if (isroot && (cs_info.flags & DK_CORESTORAGE_ENABLE_HOTFILES)) {
1423 hfsmp->hfs_flags |= HFS_CS_HOTFILE_PIN;
1424 hfsmp->hfs_cs_hotfile_size = cs_info.hotfile_size;
1425 }
1426 if ((cs_info.flags & DK_CORESTORAGE_PIN_YOUR_SWAPFILE)) {
1427 hfsmp->hfs_flags |= HFS_CS_SWAPFILE_PIN;
1428
1429 mp->mnt_ioflags |= MNT_IOFLAGS_SWAPPIN_SUPPORTED;
1430 mp->mnt_max_swappin_available = cs_info.swapfile_pinning;
1431 }
1432 }
1433
1434 /*
1435 * Init the volume information structure
1436 */
1437
1438 lck_mtx_init(&hfsmp->hfs_mutex, hfs_mutex_group, hfs_lock_attr);
1439 lck_mtx_init(&hfsmp->hfc_mutex, hfs_mutex_group, hfs_lock_attr);
1440 lck_rw_init(&hfsmp->hfs_global_lock, hfs_rwlock_group, hfs_lock_attr);
1441 lck_spin_init(&hfsmp->vcbFreeExtLock, hfs_spinlock_group, hfs_lock_attr);
1442
1443 vfs_setfsprivate(mp, hfsmp);
1444 hfsmp->hfs_mp = mp; /* Make VFSTOHFS work */
1445 hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
1446 hfsmp->hfs_devvp = devvp;
1447 vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
1448 hfsmp->hfs_logical_block_size = log_blksize;
1449 hfsmp->hfs_logical_block_count = log_blkcnt;
1450 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
1451 hfsmp->hfs_physical_block_size = phys_blksize;
1452 hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
1453 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1454 if (ronly)
1455 hfsmp->hfs_flags |= HFS_READ_ONLY;
1456 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS)
1457 hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
1458
1459 #if QUOTA
1460 for (i = 0; i < MAXQUOTAS; i++)
1461 dqfileinit(&hfsmp->hfs_qfiles[i]);
1462 #endif
1463
1464 if (args) {
1465 hfsmp->hfs_uid = (args->hfs_uid == (uid_t)VNOVAL) ? UNKNOWNUID : args->hfs_uid;
1466 if (hfsmp->hfs_uid == 0xfffffffd) hfsmp->hfs_uid = UNKNOWNUID;
1467 hfsmp->hfs_gid = (args->hfs_gid == (gid_t)VNOVAL) ? UNKNOWNGID : args->hfs_gid;
1468 if (hfsmp->hfs_gid == 0xfffffffd) hfsmp->hfs_gid = UNKNOWNGID;
1469 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1470 if (args->hfs_mask != (mode_t)VNOVAL) {
1471 hfsmp->hfs_dir_mask = args->hfs_mask & ALLPERMS;
1472 if (args->flags & HFSFSMNT_NOXONFILES) {
1473 hfsmp->hfs_file_mask = (args->hfs_mask & DEFFILEMODE);
1474 } else {
1475 hfsmp->hfs_file_mask = args->hfs_mask & ALLPERMS;
1476 }
1477 } else {
1478 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1479 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1480 }
1481 if ((args->flags != (int)VNOVAL) && (args->flags & HFSFSMNT_WRAPPER))
1482 mntwrapper = 1;
1483 } else {
1484 /* Even w/o explicit mount arguments, MNT_UNKNOWNPERMISSIONS requires setting up uid, gid, and mask: */
1485 if (((unsigned int)vfs_flags(mp)) & MNT_UNKNOWNPERMISSIONS) {
1486 hfsmp->hfs_uid = UNKNOWNUID;
1487 hfsmp->hfs_gid = UNKNOWNGID;
1488 vfs_setowner(mp, hfsmp->hfs_uid, hfsmp->hfs_gid); /* tell the VFS */
1489 hfsmp->hfs_dir_mask = UNKNOWNPERMISSIONS & ALLPERMS; /* 0777: rwx---rwx */
1490 hfsmp->hfs_file_mask = UNKNOWNPERMISSIONS & DEFFILEMODE; /* 0666: no --x by default? */
1491 }
1492 }
1493
1494 /* Find out if disk media is writable. */
1495 if (VNOP_IOCTL(devvp, DKIOCISWRITABLE, (caddr_t)&iswritable, 0, context) == 0) {
1496 if (iswritable)
1497 hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
1498 else
1499 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1500 }
1501
1502 // Reservations
1503 rl_init(&hfsmp->hfs_reserved_ranges[0]);
1504 rl_init(&hfsmp->hfs_reserved_ranges[1]);
1505
1506 // record the current time at which we're mounting this volume
1507 struct timeval tv;
1508 microtime(&tv);
1509 hfsmp->hfs_mount_time = tv.tv_sec;
1510
1511 /* Mount a standard HFS disk */
1512 if ((SWAP_BE16(mdbp->drSigWord) == kHFSSigWord) &&
1513 (mntwrapper || (SWAP_BE16(mdbp->drEmbedSigWord) != kHFSPlusSigWord))) {
1514 #if CONFIG_HFS_STD
1515 /* On 10.6 and beyond, non read-only mounts for HFS standard vols get rejected */
1516 if (vfs_isrdwr(mp)) {
1517 retval = EROFS;
1518 goto error_exit;
1519 }
1520
1521 printf("hfs_mountfs: Mounting HFS Standard volumes was deprecated in Mac OS 10.7 \n");
1522
1523 /* Treat it as if it's read-only and not writeable */
1524 hfsmp->hfs_flags |= HFS_READ_ONLY;
1525 hfsmp->hfs_flags &= ~HFS_WRITEABLE_MEDIA;
1526
1527 /* If only journal replay is requested, exit immediately */
1528 if (journal_replay_only) {
1529 retval = 0;
1530 goto error_exit;
1531 }
1532
1533 if ((vfs_flags(mp) & MNT_ROOTFS)) {
1534 retval = EINVAL; /* Cannot root from HFS standard disks */
1535 goto error_exit;
1536 }
1537 /* HFS disks can only use 512 byte physical blocks */
1538 if (log_blksize > kHFSBlockSize) {
1539 log_blksize = kHFSBlockSize;
1540 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1541 retval = ENXIO;
1542 goto error_exit;
1543 }
1544 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1545 retval = ENXIO;
1546 goto error_exit;
1547 }
1548 hfsmp->hfs_logical_block_size = log_blksize;
1549 hfsmp->hfs_logical_block_count = log_blkcnt;
1550 hfsmp->hfs_logical_bytes = (uint64_t) log_blksize * (uint64_t) log_blkcnt;
1551 hfsmp->hfs_physical_block_size = log_blksize;
1552 hfsmp->hfs_log_per_phys = 1;
1553 }
1554 if (args) {
1555 hfsmp->hfs_encoding = args->hfs_encoding;
1556 HFSTOVCB(hfsmp)->volumeNameEncodingHint = args->hfs_encoding;
1557
1558 /* establish the timezone */
1559 gTimeZone = args->hfs_timezone;
1560 }
1561
1562 retval = hfs_getconverter(hfsmp->hfs_encoding, &hfsmp->hfs_get_unicode,
1563 &hfsmp->hfs_get_hfsname);
1564 if (retval)
1565 goto error_exit;
1566
1567 retval = hfs_MountHFSVolume(hfsmp, mdbp, p);
1568 if (retval)
1569 (void) hfs_relconverter(hfsmp->hfs_encoding);
1570 #else
1571 /* On platforms where HFS Standard is not supported, deny the mount altogether */
1572 retval = EINVAL;
1573 goto error_exit;
1574 #endif
1575
1576 }
1577 else { /* Mount an HFS Plus disk */
1578 HFSPlusVolumeHeader *vhp;
1579 off_t embeddedOffset;
1580 int jnl_disable = 0;
1581
1582 /* Get the embedded Volume Header */
1583 if (SWAP_BE16(mdbp->drEmbedSigWord) == kHFSPlusSigWord) {
1584 embeddedOffset = SWAP_BE16(mdbp->drAlBlSt) * kHFSBlockSize;
1585 embeddedOffset += (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.startBlock) *
1586 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1587
1588 /*
1589 * Cooperative Fusion is not allowed on embedded HFS+
1590 * filesystems (HFS+ inside HFS standard wrapper)
1591 */
1592 hfsmp->hfs_flags &= ~HFS_CS_METADATA_PIN;
1593
1594 /*
1595 * If the embedded volume doesn't start on a block
1596 * boundary, then switch the device to a 512-byte
1597 * block size so everything will line up on a block
1598 * boundary.
1599 */
1600 if ((embeddedOffset % log_blksize) != 0) {
1601 printf("hfs_mountfs: embedded volume offset not"
1602 " a multiple of physical block size (%d);"
1603 " switching to 512\n", log_blksize);
1604 log_blksize = 512;
1605 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
1606 (caddr_t)&log_blksize, FWRITE, context)) {
1607
1608 if (HFS_MOUNT_DEBUG) {
1609 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (3) failed\n");
1610 }
1611 retval = ENXIO;
1612 goto error_exit;
1613 }
1614 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
1615 (caddr_t)&log_blkcnt, 0, context)) {
1616 if (HFS_MOUNT_DEBUG) {
1617 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (3) failed\n");
1618 }
1619 retval = ENXIO;
1620 goto error_exit;
1621 }
1622 /* Note: relative block count adjustment */
1623 hfsmp->hfs_logical_block_count *=
1624 hfsmp->hfs_logical_block_size / log_blksize;
1625
1626 /* Update logical /physical block size */
1627 hfsmp->hfs_logical_block_size = log_blksize;
1628 hfsmp->hfs_physical_block_size = log_blksize;
1629
1630 phys_blksize = log_blksize;
1631 hfsmp->hfs_log_per_phys = 1;
1632 }
1633
1634 disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
1635 (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
1636
1637 hfsmp->hfs_logical_block_count = disksize / log_blksize;
1638
1639 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1640
1641 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1642
1643 if (bp) {
1644 buf_markinvalid(bp);
1645 buf_brelse(bp);
1646 bp = NULL;
1647 }
1648 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1649 phys_blksize, cred, &bp);
1650 if (retval) {
1651 if (HFS_MOUNT_DEBUG) {
1652 printf("hfs_mountfs: buf_meta_bread (2) failed with %d\n", retval);
1653 }
1654 goto error_exit;
1655 }
1656 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
1657 buf_brelse(bp);
1658 bp = NULL;
1659 vhp = (HFSPlusVolumeHeader*) mdbp;
1660
1661 }
1662 else { /* pure HFS+ */
1663 embeddedOffset = 0;
1664 vhp = (HFSPlusVolumeHeader*) mdbp;
1665 }
1666
1667 retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
1668 if (retval)
1669 goto error_exit;
1670
1671 /*
1672 * If allocation block size is less than the physical block size,
1673 * invalidate the buffer read in using native physical block size
1674 * to ensure data consistency.
1675 *
1676 * HFS Plus reserves one allocation block for the Volume Header.
1677 * If the physical size is larger, then when we read the volume header,
1678 * we will also end up reading in the next allocation block(s).
1679 * If those other allocation block(s) is/are modified, and then the volume
1680 * header is modified, the write of the volume header's buffer will write
1681 * out the old contents of the other allocation blocks.
1682 *
1683 * We assume that the physical block size is same as logical block size.
1684 * The physical block size value is used to round down the offsets for
1685 * reading and writing the primary and alternate volume headers.
1686 *
1687 * The same logic is also in hfs_MountHFSPlusVolume to ensure that
1688 * hfs_mountfs, hfs_MountHFSPlusVolume and later are doing the I/Os
1689 * using same block size.
1690 */
1691 if (SWAP_BE32(vhp->blockSize) < hfsmp->hfs_physical_block_size) {
1692 phys_blksize = hfsmp->hfs_logical_block_size;
1693 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
1694 hfsmp->hfs_log_per_phys = 1;
1695 // There should be one bp associated with devvp in buffer cache.
1696 retval = buf_invalidateblks(devvp, 0, 0, 0);
1697 if (retval)
1698 goto error_exit;
1699 }
1700
1701 if (isroot) {
1702 hfs_root_unmounted_cleanly = ((SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) != 0);
1703 }
1704
1705 /*
1706 * On inconsistent disks, do not allow read-write mount
1707 * unless it is the boot volume being mounted. We also
1708 * always want to replay the journal if the journal_replay_only
1709 * flag is set because that will (most likely) get the
1710 * disk into a consistent state before fsck_hfs starts
1711 * looking at it.
1712 */
1713 if ( !(vfs_flags(mp) & MNT_ROOTFS)
1714 && (SWAP_BE32(vhp->attributes) & kHFSVolumeInconsistentMask)
1715 && !journal_replay_only
1716 && !(hfsmp->hfs_flags & HFS_READ_ONLY)) {
1717
1718 if (HFS_MOUNT_DEBUG) {
1719 printf("hfs_mountfs: failed to mount non-root inconsistent disk\n");
1720 }
1721 retval = EINVAL;
1722 goto error_exit;
1723 }
1724
1725
1726 // XXXdbg
1727 //
1728 hfsmp->jnl = NULL;
1729 hfsmp->jvp = NULL;
1730 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS) &&
1731 args->journal_disable) {
1732 jnl_disable = 1;
1733 }
1734
1735 //
1736 // We only initialize the journal here if the last person
1737 // to mount this volume was journaling aware. Otherwise
1738 // we delay journal initialization until later at the end
1739 // of hfs_MountHFSPlusVolume() because the last person who
1740 // mounted it could have messed things up behind our back
1741 // (so we need to go find the .journal file, make sure it's
1742 // the right size, re-sync up if it was moved, etc).
1743 //
1744 if ( (SWAP_BE32(vhp->lastMountedVersion) == kHFSJMountVersion)
1745 && (SWAP_BE32(vhp->attributes) & kHFSVolumeJournaledMask)
1746 && !jnl_disable) {
1747
1748 // if we're able to init the journal, mark the mount
1749 // point as journaled.
1750 //
1751 if ((retval = hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred)) == 0) {
1752 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1753 } else {
1754 if (retval == EROFS) {
1755 // EROFS is a special error code that means the volume has an external
1756 // journal which we couldn't find. in that case we do not want to
1757 // rewrite the volume header - we'll just refuse to mount the volume.
1758 if (HFS_MOUNT_DEBUG) {
1759 printf("hfs_mountfs: hfs_early_journal_init indicated external jnl \n");
1760 }
1761 retval = EINVAL;
1762 goto error_exit;
1763 }
1764
1765 // if the journal failed to open, then set the lastMountedVersion
1766 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1767 // of just bailing out because the volume is journaled.
1768 if (!ronly) {
1769 if (HFS_MOUNT_DEBUG) {
1770 printf("hfs_mountfs: hfs_early_journal_init failed, setting to FSK \n");
1771 }
1772
1773 HFSPlusVolumeHeader *jvhp;
1774
1775 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1776
1777 if (mdb_offset == 0) {
1778 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1779 }
1780
1781 bp = NULL;
1782 retval = (int)buf_meta_bread(devvp,
1783 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1784 phys_blksize, cred, &bp);
1785 if (retval == 0) {
1786 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1787
1788 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1789 printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
1790 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1791 buf_bwrite(bp);
1792 } else {
1793 buf_brelse(bp);
1794 }
1795 bp = NULL;
1796 } else if (bp) {
1797 buf_brelse(bp);
1798 // clear this so the error exit path won't try to use it
1799 bp = NULL;
1800 }
1801 }
1802
1803 // if this isn't the root device just bail out.
1804 // If it is the root device we just continue on
1805 // in the hopes that fsck_hfs will be able to
1806 // fix any damage that exists on the volume.
1807 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1808 if (HFS_MOUNT_DEBUG) {
1809 printf("hfs_mountfs: hfs_early_journal_init failed, erroring out \n");
1810 }
1811 retval = EINVAL;
1812 goto error_exit;
1813 }
1814 }
1815 }
1816 // XXXdbg
1817
1818 /* Either the journal is replayed successfully, or there
1819 * was nothing to replay, or no journal exists. In any case,
1820 * return success.
1821 */
1822 if (journal_replay_only) {
1823 retval = 0;
1824 goto error_exit;
1825 }
1826
1827 (void) hfs_getconverter(0, &hfsmp->hfs_get_unicode, &hfsmp->hfs_get_hfsname);
1828
1829 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1830 /*
1831 * If the backend didn't like our physical blocksize
1832 * then retry with physical blocksize of 512.
1833 */
1834 if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
1835 printf("hfs_mountfs: could not use physical block size "
1836 "(%d) switching to 512\n", log_blksize);
1837 log_blksize = 512;
1838 if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
1839 if (HFS_MOUNT_DEBUG) {
1840 printf("hfs_mountfs: DKIOCSETBLOCKSIZE (4) failed \n");
1841 }
1842 retval = ENXIO;
1843 goto error_exit;
1844 }
1845 if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
1846 if (HFS_MOUNT_DEBUG) {
1847 printf("hfs_mountfs: DKIOCGETBLOCKCOUNT (4) failed \n");
1848 }
1849 retval = ENXIO;
1850 goto error_exit;
1851 }
1852 devvp->v_specsize = log_blksize;
1853 /* Note: relative block count adjustment (in case this is an embedded volume). */
1854 hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
1855 hfsmp->hfs_logical_block_size = log_blksize;
1856 hfsmp->hfs_log_per_phys = hfsmp->hfs_physical_block_size / log_blksize;
1857
1858 hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
1859
1860 if (hfsmp->jnl && hfsmp->jvp == devvp) {
1861 // close and re-open this with the new block size
1862 journal_close(hfsmp->jnl);
1863 hfsmp->jnl = NULL;
1864 if (hfs_early_journal_init(hfsmp, vhp, args, embeddedOffset, mdb_offset, mdbp, cred) == 0) {
1865 vfs_setflags(mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1866 } else {
1867 // if the journal failed to open, then set the lastMountedVersion
1868 // to be "FSK!" which fsck_hfs will see and force the fsck instead
1869 // of just bailing out because the volume is journaled.
1870 if (!ronly) {
1871 if (HFS_MOUNT_DEBUG) {
1872 printf("hfs_mountfs: hfs_early_journal_init (2) resetting.. \n");
1873 }
1874 HFSPlusVolumeHeader *jvhp;
1875
1876 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
1877
1878 if (mdb_offset == 0) {
1879 mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
1880 }
1881
1882 bp = NULL;
1883 retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
1884 phys_blksize, cred, &bp);
1885 if (retval == 0) {
1886 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
1887
1888 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
1889 printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
1890 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
1891 buf_bwrite(bp);
1892 } else {
1893 buf_brelse(bp);
1894 }
1895 bp = NULL;
1896 } else if (bp) {
1897 buf_brelse(bp);
1898 // clear this so the error exit path won't try to use it
1899 bp = NULL;
1900 }
1901 }
1902
1903 // if this isn't the root device just bail out.
1904 // If it is the root device we just continue on
1905 // in the hopes that fsck_hfs will be able to
1906 // fix any damage that exists on the volume.
1907 if ( !(vfs_flags(mp) & MNT_ROOTFS)) {
1908 if (HFS_MOUNT_DEBUG) {
1909 printf("hfs_mountfs: hfs_early_journal_init (2) failed \n");
1910 }
1911 retval = EINVAL;
1912 goto error_exit;
1913 }
1914 }
1915 }
1916
1917 /* Try again with a smaller block size... */
1918 retval = hfs_MountHFSPlusVolume(hfsmp, vhp, embeddedOffset, disksize, p, args, cred);
1919 if (retval && HFS_MOUNT_DEBUG) {
1920 printf("hfs_MountHFSPlusVolume (late) returned %d\n",retval);
1921 }
1922 }
1923 if (retval)
1924 (void) hfs_relconverter(0);
1925 }
1926
1927 // save off a snapshot of the mtime from the previous mount
1928 // (for matador).
1929 hfsmp->hfs_last_mounted_mtime = hfsmp->hfs_mtime;
1930
1931 if ( retval ) {
1932 if (HFS_MOUNT_DEBUG) {
1933 printf("hfs_mountfs: encountered failure %d \n", retval);
1934 }
1935 goto error_exit;
1936 }
1937
1938 mp->mnt_vfsstat.f_fsid.val[0] = dev;
1939 mp->mnt_vfsstat.f_fsid.val[1] = vfs_typenum(mp);
1940 vfs_setmaxsymlen(mp, 0);
1941
1942 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSNATIVEXATTR;
1943 #if NAMEDSTREAMS
1944 mp->mnt_kern_flag |= MNTK_NAMED_STREAMS;
1945 #endif
1946 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0 ) {
1947 /* Tell VFS that we support directory hard links. */
1948 mp->mnt_vtable->vfc_vfsflags |= VFC_VFSDIRLINKS;
1949 }
1950 #if CONFIG_HFS_STD
1951 else {
1952 /* HFS standard doesn't support extended readdir! */
1953 mount_set_noreaddirext (mp);
1954 }
1955 #endif
1956
1957 if (args) {
1958 /*
1959 * Set the free space warning levels for a non-root volume:
1960 *
1961 * Set the "danger" limit to 1% of the volume size or 100MB, whichever
1962 * is less. Set the "warning" limit to 2% of the volume size or 150MB,
1963 * whichever is less. And last, set the "desired" freespace level to
1964 * to 3% of the volume size or 200MB, whichever is less.
1965 */
1966 hfsmp->hfs_freespace_notify_dangerlimit =
1967 MIN(HFS_VERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1968 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_VERYLOWDISKTRIGGERFRACTION);
1969 hfsmp->hfs_freespace_notify_warninglimit =
1970 MIN(HFS_LOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1971 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKTRIGGERFRACTION);
1972 hfsmp->hfs_freespace_notify_desiredlevel =
1973 MIN(HFS_LOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1974 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_LOWDISKSHUTOFFFRACTION);
1975 } else {
1976 /*
1977 * Set the free space warning levels for the root volume:
1978 *
1979 * Set the "danger" limit to 5% of the volume size or 512MB, whichever
1980 * is less. Set the "warning" limit to 10% of the volume size or 1GB,
1981 * whichever is less. And last, set the "desired" freespace level to
1982 * to 11% of the volume size or 1.25GB, whichever is less.
1983 */
1984 hfsmp->hfs_freespace_notify_dangerlimit =
1985 MIN(HFS_ROOTVERYLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1986 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTVERYLOWDISKTRIGGERFRACTION);
1987 hfsmp->hfs_freespace_notify_warninglimit =
1988 MIN(HFS_ROOTLOWDISKTRIGGERLEVEL / HFSTOVCB(hfsmp)->blockSize,
1989 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKTRIGGERFRACTION);
1990 hfsmp->hfs_freespace_notify_desiredlevel =
1991 MIN(HFS_ROOTLOWDISKSHUTOFFLEVEL / HFSTOVCB(hfsmp)->blockSize,
1992 (HFSTOVCB(hfsmp)->totalBlocks / 100) * HFS_ROOTLOWDISKSHUTOFFFRACTION);
1993 };
1994
1995 /* Check if the file system exists on virtual device, like disk image */
1996 if (VNOP_IOCTL(devvp, DKIOCISVIRTUAL, (caddr_t)&isvirtual, 0, context) == 0) {
1997 if (isvirtual) {
1998 hfsmp->hfs_flags |= HFS_VIRTUAL_DEVICE;
1999 }
2000 }
2001
2002 /* do not allow ejectability checks on the root device */
2003 if (isroot == 0) {
2004 if ((hfsmp->hfs_flags & HFS_VIRTUAL_DEVICE) == 0 &&
2005 IOBSDIsMediaEjectable(mp->mnt_vfsstat.f_mntfromname)) {
2006 hfsmp->hfs_syncer = thread_call_allocate(hfs_syncer, hfsmp);
2007 if (hfsmp->hfs_syncer == NULL) {
2008 printf("hfs: failed to allocate syncer thread callback for %s (%s)\n",
2009 mp->mnt_vfsstat.f_mntfromname, mp->mnt_vfsstat.f_mntonname);
2010 }
2011 }
2012 }
2013
2014 printf("hfs: mounted %s on device %s\n", (hfsmp->vcbVN ? (const char*) hfsmp->vcbVN : "unknown"),
2015 (devvp->v_name ? devvp->v_name : (isroot ? "root_device": "unknown device")));
2016
2017 /*
2018 * Start looking for free space to drop below this level and generate a
2019 * warning immediately if needed:
2020 */
2021 hfsmp->hfs_notification_conditions = 0;
2022 hfs_generate_volume_notifications(hfsmp);
2023
2024 if (ronly == 0) {
2025 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
2026 }
2027 FREE(mdbp, M_TEMP);
2028 return (0);
2029
2030 error_exit:
2031 if (bp)
2032 buf_brelse(bp);
2033 if (mdbp)
2034 FREE(mdbp, M_TEMP);
2035
2036 if (hfsmp && hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2037 vnode_clearmountedon(hfsmp->jvp);
2038 (void)VNOP_CLOSE(hfsmp->jvp, ronly ? FREAD : FREAD|FWRITE, vfs_context_kernel());
2039 hfsmp->jvp = NULL;
2040 }
2041 if (hfsmp) {
2042 if (hfsmp->hfs_devvp) {
2043 vnode_rele(hfsmp->hfs_devvp);
2044 }
2045 hfs_locks_destroy(hfsmp);
2046 hfs_delete_chash(hfsmp);
2047 hfs_idhash_destroy (hfsmp);
2048
2049 FREE(hfsmp, M_HFSMNT);
2050 vfs_setfsprivate(mp, NULL);
2051 }
2052 return (retval);
2053 }
2054
2055
2056 /*
2057 * Make a filesystem operational.
2058 * Nothing to do at the moment.
2059 */
2060 /* ARGSUSED */
2061 static int
2062 hfs_start(__unused struct mount *mp, __unused int flags, __unused vfs_context_t context)
2063 {
2064 return (0);
2065 }
2066
2067
2068 /*
2069 * unmount system call
2070 */
2071 int
2072 hfs_unmount(struct mount *mp, int mntflags, vfs_context_t context)
2073 {
2074 struct proc *p = vfs_context_proc(context);
2075 struct hfsmount *hfsmp = VFSTOHFS(mp);
2076 int retval = E_NONE;
2077 int flags;
2078 int force;
2079 int started_tr = 0;
2080
2081 flags = 0;
2082 force = 0;
2083 if (mntflags & MNT_FORCE) {
2084 flags |= FORCECLOSE;
2085 force = 1;
2086 }
2087
2088 printf("hfs: unmount initiated on %s on device %s\n",
2089 (hfsmp->vcbVN ? (const char*) hfsmp->vcbVN : "unknown"),
2090 (hfsmp->hfs_devvp ? ((hfsmp->hfs_devvp->v_name ? hfsmp->hfs_devvp->v_name : "unknown device")) : "unknown device"));
2091
2092 if ((retval = hfs_flushfiles(mp, flags, p)) && !force)
2093 return (retval);
2094
2095 if (hfsmp->hfs_flags & HFS_METADATA_ZONE)
2096 (void) hfs_recording_suspend(hfsmp);
2097
2098 hfs_syncer_free(hfsmp);
2099
2100 if (hfsmp->hfs_flags & HFS_SUMMARY_TABLE) {
2101 if (hfsmp->hfs_summary_table) {
2102 int err = 0;
2103 /*
2104 * Take the bitmap lock to serialize against a concurrent bitmap scan still in progress
2105 */
2106 if (hfsmp->hfs_allocation_vp) {
2107 err = hfs_lock (VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2108 }
2109 FREE (hfsmp->hfs_summary_table, M_TEMP);
2110 hfsmp->hfs_summary_table = NULL;
2111 hfsmp->hfs_flags &= ~HFS_SUMMARY_TABLE;
2112
2113 if (err == 0 && hfsmp->hfs_allocation_vp){
2114 hfs_unlock (VTOC(hfsmp->hfs_allocation_vp));
2115 }
2116
2117 }
2118 }
2119
2120 /*
2121 * Flush out the b-trees, volume bitmap and Volume Header
2122 */
2123 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
2124 retval = hfs_start_transaction(hfsmp);
2125 if (retval == 0) {
2126 started_tr = 1;
2127 } else if (!force) {
2128 goto err_exit;
2129 }
2130
2131 if (hfsmp->hfs_startup_vp) {
2132 (void) hfs_lock(VTOC(hfsmp->hfs_startup_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2133 retval = hfs_fsync(hfsmp->hfs_startup_vp, MNT_WAIT, 0, p);
2134 hfs_unlock(VTOC(hfsmp->hfs_startup_vp));
2135 if (retval && !force)
2136 goto err_exit;
2137 }
2138
2139 if (hfsmp->hfs_attribute_vp) {
2140 (void) hfs_lock(VTOC(hfsmp->hfs_attribute_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2141 retval = hfs_fsync(hfsmp->hfs_attribute_vp, MNT_WAIT, 0, p);
2142 hfs_unlock(VTOC(hfsmp->hfs_attribute_vp));
2143 if (retval && !force)
2144 goto err_exit;
2145 }
2146
2147 (void) hfs_lock(VTOC(hfsmp->hfs_catalog_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2148 retval = hfs_fsync(hfsmp->hfs_catalog_vp, MNT_WAIT, 0, p);
2149 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
2150 if (retval && !force)
2151 goto err_exit;
2152
2153 (void) hfs_lock(VTOC(hfsmp->hfs_extents_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2154 retval = hfs_fsync(hfsmp->hfs_extents_vp, MNT_WAIT, 0, p);
2155 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
2156 if (retval && !force)
2157 goto err_exit;
2158
2159 if (hfsmp->hfs_allocation_vp) {
2160 (void) hfs_lock(VTOC(hfsmp->hfs_allocation_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2161 retval = hfs_fsync(hfsmp->hfs_allocation_vp, MNT_WAIT, 0, p);
2162 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
2163 if (retval && !force)
2164 goto err_exit;
2165 }
2166
2167 if (hfsmp->hfc_filevp && vnode_issystem(hfsmp->hfc_filevp)) {
2168 retval = hfs_fsync(hfsmp->hfc_filevp, MNT_WAIT, 0, p);
2169 if (retval && !force)
2170 goto err_exit;
2171 }
2172
2173 /* If runtime corruption was detected, indicate that the volume
2174 * was not unmounted cleanly.
2175 */
2176 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2177 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2178 } else {
2179 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeUnmountedMask;
2180 }
2181
2182 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2183 int i;
2184 u_int32_t min_start = hfsmp->totalBlocks;
2185
2186 // set the nextAllocation pointer to the smallest free block number
2187 // we've seen so on the next mount we won't rescan unnecessarily
2188 lck_spin_lock(&hfsmp->vcbFreeExtLock);
2189 for(i=0; i < (int)hfsmp->vcbFreeExtCnt; i++) {
2190 if (hfsmp->vcbFreeExt[i].startBlock < min_start) {
2191 min_start = hfsmp->vcbFreeExt[i].startBlock;
2192 }
2193 }
2194 lck_spin_unlock(&hfsmp->vcbFreeExtLock);
2195 if (min_start < hfsmp->nextAllocation) {
2196 hfsmp->nextAllocation = min_start;
2197 }
2198 }
2199
2200 retval = hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
2201 if (retval) {
2202 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeUnmountedMask;
2203 if (!force)
2204 goto err_exit; /* could not flush everything */
2205 }
2206
2207 if (started_tr) {
2208 hfs_end_transaction(hfsmp);
2209 started_tr = 0;
2210 }
2211 }
2212
2213 if (hfsmp->jnl) {
2214 hfs_flush(hfsmp, HFS_FLUSH_FULL);
2215 }
2216
2217 /*
2218 * Invalidate our caches and release metadata vnodes
2219 */
2220 (void) hfsUnmount(hfsmp, p);
2221
2222 #if CONFIG_HFS_STD
2223 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2224 (void) hfs_relconverter(hfsmp->hfs_encoding);
2225 }
2226 #endif
2227
2228 // XXXdbg
2229 if (hfsmp->jnl) {
2230 journal_close(hfsmp->jnl);
2231 hfsmp->jnl = NULL;
2232 }
2233
2234 VNOP_FSYNC(hfsmp->hfs_devvp, MNT_WAIT, context);
2235
2236 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
2237 vnode_clearmountedon(hfsmp->jvp);
2238 retval = VNOP_CLOSE(hfsmp->jvp,
2239 hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE,
2240 vfs_context_kernel());
2241 vnode_put(hfsmp->jvp);
2242 hfsmp->jvp = NULL;
2243 }
2244 // XXXdbg
2245
2246 /*
2247 * Last chance to dump unreferenced system files.
2248 */
2249 (void) vflush(mp, NULLVP, FORCECLOSE);
2250
2251 #if HFS_SPARSE_DEV
2252 /* Drop our reference on the backing fs (if any). */
2253 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) && hfsmp->hfs_backingfs_rootvp) {
2254 struct vnode * tmpvp;
2255
2256 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2257 tmpvp = hfsmp->hfs_backingfs_rootvp;
2258 hfsmp->hfs_backingfs_rootvp = NULLVP;
2259 vnode_rele(tmpvp);
2260 }
2261 #endif /* HFS_SPARSE_DEV */
2262
2263 vnode_rele(hfsmp->hfs_devvp);
2264
2265 hfs_locks_destroy(hfsmp);
2266 hfs_delete_chash(hfsmp);
2267 hfs_idhash_destroy(hfsmp);
2268
2269 assert(TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_TENTATIVE_BLOCKS])
2270 && TAILQ_EMPTY(&hfsmp->hfs_reserved_ranges[HFS_LOCKED_BLOCKS]));
2271 assert(!hfsmp->lockedBlocks);
2272
2273 FREE(hfsmp, M_HFSMNT);
2274
2275 return (0);
2276
2277 err_exit:
2278 if (started_tr) {
2279 hfs_end_transaction(hfsmp);
2280 }
2281 return retval;
2282 }
2283
2284
2285 /*
2286 * Return the root of a filesystem.
2287 */
2288 static int
2289 hfs_vfs_root(struct mount *mp, struct vnode **vpp, __unused vfs_context_t context)
2290 {
2291 return hfs_vget(VFSTOHFS(mp), (cnid_t)kHFSRootFolderID, vpp, 1, 0);
2292 }
2293
2294
2295 /*
2296 * Do operations associated with quotas
2297 */
2298 #if !QUOTA
2299 static int
2300 hfs_quotactl(__unused struct mount *mp, __unused int cmds, __unused uid_t uid, __unused caddr_t datap, __unused vfs_context_t context)
2301 {
2302 return (ENOTSUP);
2303 }
2304 #else
2305 static int
2306 hfs_quotactl(struct mount *mp, int cmds, uid_t uid, caddr_t datap, vfs_context_t context)
2307 {
2308 struct proc *p = vfs_context_proc(context);
2309 int cmd, type, error;
2310
2311 if (uid == ~0U)
2312 uid = kauth_cred_getuid(vfs_context_ucred(context));
2313 cmd = cmds >> SUBCMDSHIFT;
2314
2315 switch (cmd) {
2316 case Q_SYNC:
2317 case Q_QUOTASTAT:
2318 break;
2319 case Q_GETQUOTA:
2320 if (uid == kauth_cred_getuid(vfs_context_ucred(context)))
2321 break;
2322 /* fall through */
2323 default:
2324 if ( (error = vfs_context_suser(context)) )
2325 return (error);
2326 }
2327
2328 type = cmds & SUBCMDMASK;
2329 if ((u_int)type >= MAXQUOTAS)
2330 return (EINVAL);
2331 if (vfs_busy(mp, LK_NOWAIT))
2332 return (0);
2333
2334 switch (cmd) {
2335
2336 case Q_QUOTAON:
2337 error = hfs_quotaon(p, mp, type, datap);
2338 break;
2339
2340 case Q_QUOTAOFF:
2341 error = hfs_quotaoff(p, mp, type);
2342 break;
2343
2344 case Q_SETQUOTA:
2345 error = hfs_setquota(mp, uid, type, datap);
2346 break;
2347
2348 case Q_SETUSE:
2349 error = hfs_setuse(mp, uid, type, datap);
2350 break;
2351
2352 case Q_GETQUOTA:
2353 error = hfs_getquota(mp, uid, type, datap);
2354 break;
2355
2356 case Q_SYNC:
2357 error = hfs_qsync(mp);
2358 break;
2359
2360 case Q_QUOTASTAT:
2361 error = hfs_quotastat(mp, type, datap);
2362 break;
2363
2364 default:
2365 error = EINVAL;
2366 break;
2367 }
2368 vfs_unbusy(mp);
2369
2370 return (error);
2371 }
2372 #endif /* QUOTA */
2373
2374 /* Subtype is composite of bits */
2375 #define HFS_SUBTYPE_JOURNALED 0x01
2376 #define HFS_SUBTYPE_CASESENSITIVE 0x02
2377 /* bits 2 - 6 reserved */
2378 #define HFS_SUBTYPE_STANDARDHFS 0x80
2379
2380 /*
2381 * Get file system statistics.
2382 */
2383 int
2384 hfs_statfs(struct mount *mp, register struct vfsstatfs *sbp, __unused vfs_context_t context)
2385 {
2386 ExtendedVCB *vcb = VFSTOVCB(mp);
2387 struct hfsmount *hfsmp = VFSTOHFS(mp);
2388 u_int32_t freeCNIDs;
2389 u_int16_t subtype = 0;
2390
2391 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)vcb->vcbNxtCNID;
2392
2393 sbp->f_bsize = (u_int32_t)vcb->blockSize;
2394 sbp->f_iosize = (size_t)cluster_max_io_size(mp, 0);
2395 sbp->f_blocks = (u_int64_t)((u_int32_t)vcb->totalBlocks);
2396 sbp->f_bfree = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 0));
2397 sbp->f_bavail = (u_int64_t)((u_int32_t )hfs_freeblks(hfsmp, 1));
2398 sbp->f_files = (u_int64_t)((u_int32_t )(vcb->totalBlocks - 2)); /* max files is constrained by total blocks */
2399 sbp->f_ffree = (u_int64_t)((u_int32_t )(MIN(freeCNIDs, sbp->f_bavail)));
2400
2401 /*
2402 * Subtypes (flavors) for HFS
2403 * 0: Mac OS Extended
2404 * 1: Mac OS Extended (Journaled)
2405 * 2: Mac OS Extended (Case Sensitive)
2406 * 3: Mac OS Extended (Case Sensitive, Journaled)
2407 * 4 - 127: Reserved
2408 * 128: Mac OS Standard
2409 *
2410 */
2411 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
2412 /* HFS+ & variants */
2413 if (hfsmp->jnl) {
2414 subtype |= HFS_SUBTYPE_JOURNALED;
2415 }
2416 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) {
2417 subtype |= HFS_SUBTYPE_CASESENSITIVE;
2418 }
2419 }
2420 #if CONFIG_HFS_STD
2421 else {
2422 /* HFS standard */
2423 subtype = HFS_SUBTYPE_STANDARDHFS;
2424 }
2425 #endif
2426 sbp->f_fssubtype = subtype;
2427
2428 return (0);
2429 }
2430
2431
2432 //
2433 // XXXdbg -- this is a callback to be used by the journal to
2434 // get meta data blocks flushed out to disk.
2435 //
2436 // XXXdbg -- be smarter and don't flush *every* block on each
2437 // call. try to only flush some so we don't wind up
2438 // being too synchronous.
2439 //
2440 __private_extern__
2441 void
2442 hfs_sync_metadata(void *arg)
2443 {
2444 struct mount *mp = (struct mount *)arg;
2445 struct hfsmount *hfsmp;
2446 ExtendedVCB *vcb;
2447 buf_t bp;
2448 int retval;
2449 daddr64_t priIDSector;
2450 hfsmp = VFSTOHFS(mp);
2451 vcb = HFSTOVCB(hfsmp);
2452
2453 // now make sure the super block is flushed
2454 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
2455 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
2456
2457 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2458 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
2459 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2460 if ((retval != 0 ) && (retval != ENXIO)) {
2461 printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
2462 (int)priIDSector, retval);
2463 }
2464
2465 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2466 buf_bwrite(bp);
2467 } else if (bp) {
2468 buf_brelse(bp);
2469 }
2470
2471 /* Note that these I/Os bypass the journal (no calls to journal_start_modify_block) */
2472
2473 // the alternate super block...
2474 // XXXdbg - we probably don't need to do this each and every time.
2475 // hfs_btreeio.c:FlushAlternate() should flag when it was
2476 // written...
2477 if (hfsmp->hfs_partition_avh_sector) {
2478 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2479 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_partition_avh_sector, hfsmp->hfs_log_per_phys),
2480 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2481 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2482 /*
2483 * note this I/O can fail if the partition shrank behind our backs!
2484 * So failure should be OK here.
2485 */
2486 buf_bwrite(bp);
2487 } else if (bp) {
2488 buf_brelse(bp);
2489 }
2490 }
2491
2492 /* Is the FS's idea of the AVH different than the partition ? */
2493 if ((hfsmp->hfs_fs_avh_sector) && (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector)) {
2494 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
2495 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys),
2496 hfsmp->hfs_physical_block_size, NOCRED, &bp);
2497 if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
2498 buf_bwrite(bp);
2499 } else if (bp) {
2500 buf_brelse(bp);
2501 }
2502 }
2503
2504 }
2505
2506
2507 struct hfs_sync_cargs {
2508 kauth_cred_t cred;
2509 struct proc *p;
2510 int waitfor;
2511 int error;
2512 int atime_only_syncs;
2513 time_t sync_start_time;
2514 };
2515
2516
2517 static int
2518 hfs_sync_callback(struct vnode *vp, void *cargs)
2519 {
2520 struct cnode *cp = VTOC(vp);
2521 struct hfs_sync_cargs *args;
2522 int error;
2523
2524 args = (struct hfs_sync_cargs *)cargs;
2525
2526 if (hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
2527 return (VNODE_RETURNED);
2528 }
2529
2530 hfs_dirty_t dirty_state = hfs_is_dirty(cp);
2531
2532 bool sync = dirty_state == HFS_DIRTY || vnode_hasdirtyblks(vp);
2533
2534 if (!sync && dirty_state == HFS_DIRTY_ATIME
2535 && args->atime_only_syncs < 256) {
2536 // We only update if the atime changed more than 60s ago
2537 if (args->sync_start_time - cp->c_attr.ca_atime > 60) {
2538 sync = true;
2539 ++args->atime_only_syncs;
2540 }
2541 }
2542
2543 if (sync) {
2544 error = hfs_fsync(vp, args->waitfor, 0, args->p);
2545
2546 if (error)
2547 args->error = error;
2548 } else if (cp->c_touch_acctime)
2549 hfs_touchtimes(VTOHFS(vp), cp);
2550
2551 hfs_unlock(cp);
2552 return (VNODE_RETURNED);
2553 }
2554
2555
2556
2557 /*
2558 * Go through the disk queues to initiate sandbagged IO;
2559 * go through the inodes to write those that have been modified;
2560 * initiate the writing of the super block if it has been modified.
2561 *
2562 * Note: we are always called with the filesystem marked `MPBUSY'.
2563 */
2564 int
2565 hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
2566 {
2567 struct proc *p = vfs_context_proc(context);
2568 struct cnode *cp;
2569 struct hfsmount *hfsmp;
2570 ExtendedVCB *vcb;
2571 struct vnode *meta_vp[4];
2572 int i;
2573 int error, allerror = 0;
2574 struct hfs_sync_cargs args;
2575
2576 hfsmp = VFSTOHFS(mp);
2577
2578 // Back off if hfs_changefs or a freeze is underway
2579 hfs_lock_mount(hfsmp);
2580 if ((hfsmp->hfs_flags & HFS_IN_CHANGEFS)
2581 || hfsmp->hfs_freeze_state != HFS_THAWED) {
2582 hfs_unlock_mount(hfsmp);
2583 return 0;
2584 }
2585
2586 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2587 hfs_unlock_mount(hfsmp);
2588 return (EROFS);
2589 }
2590
2591 ++hfsmp->hfs_syncers;
2592 hfs_unlock_mount(hfsmp);
2593
2594 args.cred = kauth_cred_get();
2595 args.waitfor = waitfor;
2596 args.p = p;
2597 args.error = 0;
2598 args.atime_only_syncs = 0;
2599
2600 struct timeval tv;
2601 microtime(&tv);
2602
2603 args.sync_start_time = tv.tv_sec;
2604
2605 /*
2606 * hfs_sync_callback will be called for each vnode
2607 * hung off of this mount point... the vnode will be
2608 * properly referenced and unreferenced around the callback
2609 */
2610 vnode_iterate(mp, 0, hfs_sync_callback, (void *)&args);
2611
2612 if (args.error)
2613 allerror = args.error;
2614
2615 vcb = HFSTOVCB(hfsmp);
2616
2617 meta_vp[0] = vcb->extentsRefNum;
2618 meta_vp[1] = vcb->catalogRefNum;
2619 meta_vp[2] = vcb->allocationsRefNum; /* This is NULL for standard HFS */
2620 meta_vp[3] = hfsmp->hfs_attribute_vp; /* Optional file */
2621
2622 /* Now sync our three metadata files */
2623 for (i = 0; i < 4; ++i) {
2624 struct vnode *btvp;
2625
2626 btvp = meta_vp[i];;
2627 if ((btvp==0) || (vnode_mount(btvp) != mp))
2628 continue;
2629
2630 /* XXX use hfs_systemfile_lock instead ? */
2631 (void) hfs_lock(VTOC(btvp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2632 cp = VTOC(btvp);
2633
2634 if (!hfs_is_dirty(cp) && !vnode_hasdirtyblks(btvp)) {
2635 hfs_unlock(VTOC(btvp));
2636 continue;
2637 }
2638 error = vnode_get(btvp);
2639 if (error) {
2640 hfs_unlock(VTOC(btvp));
2641 continue;
2642 }
2643 if ((error = hfs_fsync(btvp, waitfor, 0, p)))
2644 allerror = error;
2645
2646 hfs_unlock(cp);
2647 vnode_put(btvp);
2648 };
2649
2650
2651 #if CONFIG_HFS_STD
2652 /*
2653 * Force stale file system control information to be flushed.
2654 */
2655 if (vcb->vcbSigWord == kHFSSigWord) {
2656 if ((error = VNOP_FSYNC(hfsmp->hfs_devvp, waitfor, context))) {
2657 allerror = error;
2658 }
2659 }
2660 #endif
2661
2662 #if QUOTA
2663 hfs_qsync(mp);
2664 #endif /* QUOTA */
2665
2666 hfs_hotfilesync(hfsmp, vfs_context_kernel());
2667
2668 /*
2669 * Write back modified superblock.
2670 */
2671 if (IsVCBDirty(vcb)) {
2672 error = hfs_flushvolumeheader(hfsmp, waitfor == MNT_WAIT ? HFS_FVH_WAIT : 0);
2673 if (error)
2674 allerror = error;
2675 }
2676
2677 if (hfsmp->jnl) {
2678 hfs_flush(hfsmp, HFS_FLUSH_JOURNAL);
2679 }
2680
2681 hfs_lock_mount(hfsmp);
2682 boolean_t wake = (!--hfsmp->hfs_syncers
2683 && hfsmp->hfs_freeze_state == HFS_WANT_TO_FREEZE);
2684 hfs_unlock_mount(hfsmp);
2685 if (wake)
2686 wakeup(&hfsmp->hfs_freeze_state);
2687
2688 return (allerror);
2689 }
2690
2691
2692 /*
2693 * File handle to vnode
2694 *
2695 * Have to be really careful about stale file handles:
2696 * - check that the cnode id is valid
2697 * - call hfs_vget() to get the locked cnode
2698 * - check for an unallocated cnode (i_mode == 0)
2699 * - check that the given client host has export rights and return
2700 * those rights via. exflagsp and credanonp
2701 */
2702 static int
2703 hfs_fhtovp(struct mount *mp, int fhlen, unsigned char *fhp, struct vnode **vpp, __unused vfs_context_t context)
2704 {
2705 struct hfsfid *hfsfhp;
2706 struct vnode *nvp;
2707 int result;
2708
2709 *vpp = NULL;
2710 hfsfhp = (struct hfsfid *)fhp;
2711
2712 if (fhlen < (int)sizeof(struct hfsfid))
2713 return (EINVAL);
2714
2715 result = hfs_vget(VFSTOHFS(mp), ntohl(hfsfhp->hfsfid_cnid), &nvp, 0, 0);
2716 if (result) {
2717 if (result == ENOENT)
2718 result = ESTALE;
2719 return result;
2720 }
2721
2722 /*
2723 * We used to use the create time as the gen id of the file handle,
2724 * but it is not static enough because it can change at any point
2725 * via system calls. We still don't have another volume ID or other
2726 * unique identifier to use for a generation ID across reboots that
2727 * persists until the file is removed. Using only the CNID exposes
2728 * us to the potential wrap-around case, but as of 2/2008, it would take
2729 * over 2 months to wrap around if the machine did nothing but allocate
2730 * CNIDs. Using some kind of wrap counter would only be effective if
2731 * each file had the wrap counter associated with it. For now,
2732 * we use only the CNID to identify the file as it's good enough.
2733 */
2734
2735 *vpp = nvp;
2736
2737 hfs_unlock(VTOC(nvp));
2738 return (0);
2739 }
2740
2741
2742 /*
2743 * Vnode pointer to File handle
2744 */
2745 /* ARGSUSED */
2746 static int
2747 hfs_vptofh(struct vnode *vp, int *fhlenp, unsigned char *fhp, __unused vfs_context_t context)
2748 {
2749 struct cnode *cp;
2750 struct hfsfid *hfsfhp;
2751
2752 if (ISHFS(VTOVCB(vp)))
2753 return (ENOTSUP); /* hfs standard is not exportable */
2754
2755 if (*fhlenp < (int)sizeof(struct hfsfid))
2756 return (EOVERFLOW);
2757
2758 cp = VTOC(vp);
2759 hfsfhp = (struct hfsfid *)fhp;
2760 /* only the CNID is used to identify the file now */
2761 hfsfhp->hfsfid_cnid = htonl(cp->c_fileid);
2762 hfsfhp->hfsfid_gen = htonl(cp->c_fileid);
2763 *fhlenp = sizeof(struct hfsfid);
2764
2765 return (0);
2766 }
2767
2768
2769 /*
2770 * Initialize HFS filesystems, done only once per boot.
2771 *
2772 * HFS is not a kext-based file system. This makes it difficult to find
2773 * out when the last HFS file system was unmounted and call hfs_uninit()
2774 * to deallocate data structures allocated in hfs_init(). Therefore we
2775 * never deallocate memory allocated by lock attribute and group initializations
2776 * in this function.
2777 */
2778 static int
2779 hfs_init(__unused struct vfsconf *vfsp)
2780 {
2781 static int done = 0;
2782
2783 if (done)
2784 return (0);
2785 done = 1;
2786 hfs_chashinit();
2787 hfs_converterinit();
2788
2789 BTReserveSetup();
2790
2791 hfs_lock_attr = lck_attr_alloc_init();
2792 hfs_group_attr = lck_grp_attr_alloc_init();
2793 hfs_mutex_group = lck_grp_alloc_init("hfs-mutex", hfs_group_attr);
2794 hfs_rwlock_group = lck_grp_alloc_init("hfs-rwlock", hfs_group_attr);
2795 hfs_spinlock_group = lck_grp_alloc_init("hfs-spinlock", hfs_group_attr);
2796
2797 #if HFS_COMPRESSION
2798 decmpfs_init();
2799 #endif
2800
2801 return (0);
2802 }
2803
2804
2805 /*
2806 * Destroy all locks, mutexes and spinlocks in hfsmp on unmount or failed mount
2807 */
2808 static void
2809 hfs_locks_destroy(struct hfsmount *hfsmp)
2810 {
2811
2812 lck_mtx_destroy(&hfsmp->hfs_mutex, hfs_mutex_group);
2813 lck_mtx_destroy(&hfsmp->hfc_mutex, hfs_mutex_group);
2814 lck_rw_destroy(&hfsmp->hfs_global_lock, hfs_rwlock_group);
2815 lck_spin_destroy(&hfsmp->vcbFreeExtLock, hfs_spinlock_group);
2816
2817 return;
2818 }
2819
2820
2821 static int
2822 hfs_getmountpoint(struct vnode *vp, struct hfsmount **hfsmpp)
2823 {
2824 struct hfsmount * hfsmp;
2825 char fstypename[MFSNAMELEN];
2826
2827 if (vp == NULL)
2828 return (EINVAL);
2829
2830 if (!vnode_isvroot(vp))
2831 return (EINVAL);
2832
2833 vnode_vfsname(vp, fstypename);
2834 if (strncmp(fstypename, "hfs", sizeof(fstypename)) != 0)
2835 return (EINVAL);
2836
2837 hfsmp = VTOHFS(vp);
2838
2839 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord)
2840 return (EINVAL);
2841
2842 *hfsmpp = hfsmp;
2843
2844 return (0);
2845 }
2846
2847 // XXXdbg
2848 #include <sys/filedesc.h>
2849
2850 /*
2851 * HFS filesystem related variables.
2852 */
2853 int
2854 hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
2855 user_addr_t newp, size_t newlen, vfs_context_t context)
2856 {
2857 struct proc *p = vfs_context_proc(context);
2858 int error;
2859 struct hfsmount *hfsmp;
2860
2861 /* all sysctl names at this level are terminal */
2862
2863 if (name[0] == HFS_ENCODINGBIAS) {
2864 int bias;
2865
2866 bias = hfs_getencodingbias();
2867 error = sysctl_int(oldp, oldlenp, newp, newlen, &bias);
2868 if (error == 0 && newp)
2869 hfs_setencodingbias(bias);
2870 return (error);
2871
2872 } else if (name[0] == HFS_EXTEND_FS) {
2873 u_int64_t newsize = 0;
2874 vnode_t vp = vfs_context_cwd(context);
2875
2876 if (newp == USER_ADDR_NULL || vp == NULLVP)
2877 return (EINVAL);
2878 if ((error = hfs_getmountpoint(vp, &hfsmp)))
2879 return (error);
2880
2881 /* Start with the 'size' set to the current number of bytes in the filesystem */
2882 newsize = ((uint64_t)hfsmp->totalBlocks) * ((uint64_t)hfsmp->blockSize);
2883
2884 /* now get the new size from userland and over-write our stored value */
2885 error = sysctl_quad(oldp, oldlenp, newp, newlen, (quad_t *)&newsize);
2886 if (error)
2887 return (error);
2888
2889 error = hfs_extendfs(hfsmp, newsize, context);
2890 return (error);
2891
2892 } else if (name[0] == HFS_ENCODINGHINT) {
2893 size_t bufsize;
2894 size_t bytes;
2895 u_int32_t hint;
2896 u_int16_t *unicode_name = NULL;
2897 char *filename = NULL;
2898
2899 if ((newlen <= 0) || (newlen > MAXPATHLEN))
2900 return (EINVAL);
2901
2902 bufsize = MAX(newlen * 3, MAXPATHLEN);
2903 MALLOC(filename, char *, newlen, M_TEMP, M_WAITOK);
2904 if (filename == NULL) {
2905 error = ENOMEM;
2906 goto encodinghint_exit;
2907 }
2908 MALLOC(unicode_name, u_int16_t *, bufsize, M_TEMP, M_WAITOK);
2909 if (unicode_name == NULL) {
2910 error = ENOMEM;
2911 goto encodinghint_exit;
2912 }
2913
2914 error = copyin(newp, (caddr_t)filename, newlen);
2915 if (error == 0) {
2916 error = utf8_decodestr((u_int8_t *)filename, newlen - 1, unicode_name,
2917 &bytes, bufsize, 0, UTF_DECOMPOSED);
2918 if (error == 0) {
2919 hint = hfs_pickencoding(unicode_name, bytes / 2);
2920 error = sysctl_int(oldp, oldlenp, USER_ADDR_NULL, 0, (int32_t *)&hint);
2921 }
2922 }
2923
2924 encodinghint_exit:
2925 if (unicode_name)
2926 FREE(unicode_name, M_TEMP);
2927 if (filename)
2928 FREE(filename, M_TEMP);
2929 return (error);
2930
2931 } else if (name[0] == HFS_ENABLE_JOURNALING) {
2932 // make the file system journaled...
2933 vnode_t vp = vfs_context_cwd(context);
2934 vnode_t jvp;
2935 ExtendedVCB *vcb;
2936 struct cat_attr jnl_attr;
2937 struct cat_attr jinfo_attr;
2938 struct cat_fork jnl_fork;
2939 struct cat_fork jinfo_fork;
2940 buf_t jib_buf;
2941 uint64_t jib_blkno;
2942 uint32_t tmpblkno;
2943 uint64_t journal_byte_offset;
2944 uint64_t journal_size;
2945 vnode_t jib_vp = NULLVP;
2946 struct JournalInfoBlock local_jib;
2947 int err = 0;
2948 void *jnl = NULL;
2949 int lockflags;
2950
2951 /* Only root can enable journaling */
2952 if (!kauth_cred_issuser(kauth_cred_get())) {
2953 return (EPERM);
2954 }
2955 if (vp == NULLVP)
2956 return EINVAL;
2957
2958 hfsmp = VTOHFS(vp);
2959 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2960 return EROFS;
2961 }
2962 if (HFSTOVCB(hfsmp)->vcbSigWord == kHFSSigWord) {
2963 printf("hfs: can't make a plain hfs volume journaled.\n");
2964 return EINVAL;
2965 }
2966
2967 if (hfsmp->jnl) {
2968 printf("hfs: volume @ mp %p is already journaled!\n", vnode_mount(vp));
2969 return EAGAIN;
2970 }
2971 vcb = HFSTOVCB(hfsmp);
2972
2973 /* Set up local copies of the initialization info */
2974 tmpblkno = (uint32_t) name[1];
2975 jib_blkno = (uint64_t) tmpblkno;
2976 journal_byte_offset = (uint64_t) name[2];
2977 journal_byte_offset *= hfsmp->blockSize;
2978 journal_byte_offset += hfsmp->hfsPlusIOPosOffset;
2979 journal_size = (uint64_t)((unsigned)name[3]);
2980
2981 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2982 if (BTHasContiguousNodes(VTOF(vcb->catalogRefNum)) == 0 ||
2983 BTHasContiguousNodes(VTOF(vcb->extentsRefNum)) == 0) {
2984
2985 printf("hfs: volume has a btree w/non-contiguous nodes. can not enable journaling.\n");
2986 hfs_systemfile_unlock(hfsmp, lockflags);
2987 return EINVAL;
2988 }
2989 hfs_systemfile_unlock(hfsmp, lockflags);
2990
2991 // make sure these both exist!
2992 if ( GetFileInfo(vcb, kHFSRootFolderID, ".journal_info_block", &jinfo_attr, &jinfo_fork) == 0
2993 || GetFileInfo(vcb, kHFSRootFolderID, ".journal", &jnl_attr, &jnl_fork) == 0) {
2994
2995 return EINVAL;
2996 }
2997
2998 /*
2999 * At this point, we have a copy of the metadata that lives in the catalog for the
3000 * journal info block. Compare that the journal info block's single extent matches
3001 * that which was passed into this sysctl.
3002 *
3003 * If it is different, deny the journal enable call.
3004 */
3005 if (jinfo_fork.cf_blocks > 1) {
3006 /* too many blocks */
3007 return EINVAL;
3008 }
3009
3010 if (jinfo_fork.cf_extents[0].startBlock != jib_blkno) {
3011 /* Wrong block */
3012 return EINVAL;
3013 }
3014
3015 /*
3016 * We want to immediately purge the vnode for the JIB.
3017 *
3018 * Because it was written to from userland, there's probably
3019 * a vnode somewhere in the vnode cache (possibly with UBC backed blocks).
3020 * So we bring the vnode into core, then immediately do whatever
3021 * we can to flush/vclean it out. This is because those blocks will be
3022 * interpreted as user data, which may be treated separately on some platforms
3023 * than metadata. If the vnode is gone, then there cannot be backing blocks
3024 * in the UBC.
3025 */
3026 if (hfs_vget (hfsmp, jinfo_attr.ca_fileid, &jib_vp, 1, 0)) {
3027 return EINVAL;
3028 }
3029 /*
3030 * Now we have a vnode for the JIB. recycle it. Because we hold an iocount
3031 * on the vnode, we'll just mark it for termination when the last iocount
3032 * (hopefully ours), is dropped.
3033 */
3034 vnode_recycle (jib_vp);
3035 err = vnode_put (jib_vp);
3036 if (err) {
3037 return EINVAL;
3038 }
3039
3040 /* Initialize the local copy of the JIB (just like hfs.util) */
3041 memset (&local_jib, 'Z', sizeof(struct JournalInfoBlock));
3042 local_jib.flags = SWAP_BE32(kJIJournalInFSMask);
3043 /* Note that the JIB's offset is in bytes */
3044 local_jib.offset = SWAP_BE64(journal_byte_offset);
3045 local_jib.size = SWAP_BE64(journal_size);
3046
3047 /*
3048 * Now write out the local JIB. This essentially overwrites the userland
3049 * copy of the JIB. Read it as BLK_META to treat it as a metadata read/write.
3050 */
3051 jib_buf = buf_getblk (hfsmp->hfs_devvp,
3052 jib_blkno * (hfsmp->blockSize / hfsmp->hfs_logical_block_size),
3053 hfsmp->blockSize, 0, 0, BLK_META);
3054 char* buf_ptr = (char*) buf_dataptr (jib_buf);
3055
3056 /* Zero out the portion of the block that won't contain JIB data */
3057 memset (buf_ptr, 0, hfsmp->blockSize);
3058
3059 bcopy(&local_jib, buf_ptr, sizeof(local_jib));
3060 if (buf_bwrite (jib_buf)) {
3061 return EIO;
3062 }
3063
3064 /* Force a flush track cache */
3065 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
3066
3067 /* Now proceed with full volume sync */
3068 hfs_sync(hfsmp->hfs_mp, MNT_WAIT, context);
3069
3070 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
3071 (off_t)name[2], (off_t)name[3]);
3072
3073 //
3074 // XXXdbg - note that currently (Sept, 08) hfs_util does not support
3075 // enabling the journal on a separate device so it is safe
3076 // to just copy hfs_devvp here. If hfs_util gets the ability
3077 // to dynamically enable the journal on a separate device then
3078 // we will have to do the same thing as hfs_early_journal_init()
3079 // to locate and open the journal device.
3080 //
3081 jvp = hfsmp->hfs_devvp;
3082 jnl = journal_create(jvp, journal_byte_offset, journal_size,
3083 hfsmp->hfs_devvp,
3084 hfsmp->hfs_logical_block_size,
3085 0,
3086 0,
3087 hfs_sync_metadata, hfsmp->hfs_mp,
3088 hfsmp->hfs_mp);
3089
3090 /*
3091 * Set up the trim callback function so that we can add
3092 * recently freed extents to the free extent cache once
3093 * the transaction that freed them is written to the
3094 * journal on disk.
3095 */
3096 if (jnl)
3097 journal_trim_set_callback(jnl, hfs_trim_callback, hfsmp);
3098
3099 if (jnl == NULL) {
3100 printf("hfs: FAILED to create the journal!\n");
3101 if (jvp && jvp != hfsmp->hfs_devvp) {
3102 vnode_clearmountedon(jvp);
3103 VNOP_CLOSE(jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
3104 }
3105 jvp = NULL;
3106
3107 return EINVAL;
3108 }
3109
3110 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3111
3112 /*
3113 * Flush all dirty metadata buffers.
3114 */
3115 buf_flushdirtyblks(hfsmp->hfs_devvp, TRUE, 0, "hfs_sysctl");
3116 buf_flushdirtyblks(hfsmp->hfs_extents_vp, TRUE, 0, "hfs_sysctl");
3117 buf_flushdirtyblks(hfsmp->hfs_catalog_vp, TRUE, 0, "hfs_sysctl");
3118 buf_flushdirtyblks(hfsmp->hfs_allocation_vp, TRUE, 0, "hfs_sysctl");
3119 if (hfsmp->hfs_attribute_vp)
3120 buf_flushdirtyblks(hfsmp->hfs_attribute_vp, TRUE, 0, "hfs_sysctl");
3121
3122 HFSTOVCB(hfsmp)->vcbJinfoBlock = name[1];
3123 HFSTOVCB(hfsmp)->vcbAtrb |= kHFSVolumeJournaledMask;
3124 hfsmp->jvp = jvp;
3125 hfsmp->jnl = jnl;
3126
3127 // save this off for the hack-y check in hfs_remove()
3128 hfsmp->jnl_start = (u_int32_t)name[2];
3129 hfsmp->jnl_size = (off_t)((unsigned)name[3]);
3130 hfsmp->hfs_jnlinfoblkid = jinfo_attr.ca_fileid;
3131 hfsmp->hfs_jnlfileid = jnl_attr.ca_fileid;
3132
3133 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
3134
3135 hfs_unlock_global (hfsmp);
3136 hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
3137
3138 {
3139 fsid_t fsid;
3140
3141 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3142 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3143 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3144 }
3145 return 0;
3146 } else if (name[0] == HFS_DISABLE_JOURNALING) {
3147 // clear the journaling bit
3148 vnode_t vp = vfs_context_cwd(context);
3149
3150 /* Only root can disable journaling */
3151 if (!kauth_cred_issuser(kauth_cred_get())) {
3152 return (EPERM);
3153 }
3154 if (vp == NULLVP)
3155 return EINVAL;
3156
3157 hfsmp = VTOHFS(vp);
3158
3159 /*
3160 * Disabling journaling is disallowed on volumes with directory hard links
3161 * because we have not tested the relevant code path.
3162 */
3163 if (hfsmp->hfs_private_attr[DIR_HARDLINKS].ca_entries != 0){
3164 printf("hfs: cannot disable journaling on volumes with directory hardlinks\n");
3165 return EPERM;
3166 }
3167
3168 printf("hfs: disabling journaling for mount @ %p\n", vnode_mount(vp));
3169
3170 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
3171
3172 // Lights out for you buddy!
3173 journal_close(hfsmp->jnl);
3174 hfsmp->jnl = NULL;
3175
3176 if (hfsmp->jvp && hfsmp->jvp != hfsmp->hfs_devvp) {
3177 vnode_clearmountedon(hfsmp->jvp);
3178 VNOP_CLOSE(hfsmp->jvp, hfsmp->hfs_flags & HFS_READ_ONLY ? FREAD : FREAD|FWRITE, vfs_context_kernel());
3179 vnode_put(hfsmp->jvp);
3180 }
3181 hfsmp->jvp = NULL;
3182 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
3183 hfsmp->jnl_start = 0;
3184 hfsmp->hfs_jnlinfoblkid = 0;
3185 hfsmp->hfs_jnlfileid = 0;
3186
3187 HFSTOVCB(hfsmp)->vcbAtrb &= ~kHFSVolumeJournaledMask;
3188
3189 hfs_unlock_global (hfsmp);
3190
3191 hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
3192
3193 {
3194 fsid_t fsid;
3195
3196 fsid.val[0] = (int32_t)hfsmp->hfs_raw_dev;
3197 fsid.val[1] = (int32_t)vfs_typenum(HFSTOVFS(hfsmp));
3198 vfs_event_signal(&fsid, VQ_UPDATE, (intptr_t)NULL);
3199 }
3200 return 0;
3201 } else if (name[0] == HFS_GET_JOURNAL_INFO) {
3202 vnode_t vp = vfs_context_cwd(context);
3203 off_t jnl_start, jnl_size;
3204
3205 if (vp == NULLVP)
3206 return EINVAL;
3207
3208 /* 64-bit processes won't work with this sysctl -- can't fit a pointer into an int! */
3209 if (proc_is64bit(current_proc()))
3210 return EINVAL;
3211
3212 hfsmp = VTOHFS(vp);
3213 if (hfsmp->jnl == NULL) {
3214 jnl_start = 0;
3215 jnl_size = 0;
3216 } else {
3217 jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, HFSTOVCB(hfsmp)->blockSize) + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
3218 jnl_size = hfsmp->jnl_size;
3219 }
3220
3221 if ((error = copyout((caddr_t)&jnl_start, CAST_USER_ADDR_T(name[1]), sizeof(off_t))) != 0) {
3222 return error;
3223 }
3224 if ((error = copyout((caddr_t)&jnl_size, CAST_USER_ADDR_T(name[2]), sizeof(off_t))) != 0) {
3225 return error;
3226 }
3227
3228 return 0;
3229 } else if (name[0] == HFS_SET_PKG_EXTENSIONS) {
3230
3231 return set_package_extensions_table((user_addr_t)((unsigned)name[1]), name[2], name[3]);
3232
3233 } else if (name[0] == VFS_CTL_QUERY) {
3234 struct sysctl_req *req;
3235 union union_vfsidctl vc;
3236 struct mount *mp;
3237 struct vfsquery vq;
3238
3239 req = CAST_DOWN(struct sysctl_req *, oldp); /* we're new style vfs sysctl. */
3240 if (req == NULL) {
3241 return EFAULT;
3242 }
3243
3244 error = SYSCTL_IN(req, &vc, proc_is64bit(p)? sizeof(vc.vc64):sizeof(vc.vc32));
3245 if (error) return (error);
3246
3247 mp = vfs_getvfs(&vc.vc32.vc_fsid); /* works for 32 and 64 */
3248 if (mp == NULL) return (ENOENT);
3249
3250 hfsmp = VFSTOHFS(mp);
3251 bzero(&vq, sizeof(vq));
3252 vq.vq_flags = hfsmp->hfs_notification_conditions;
3253 return SYSCTL_OUT(req, &vq, sizeof(vq));;
3254 } else if (name[0] == HFS_REPLAY_JOURNAL) {
3255 vnode_t devvp = NULL;
3256 int device_fd;
3257 if (namelen != 2) {
3258 return (EINVAL);
3259 }
3260 device_fd = name[1];
3261 error = file_vnode(device_fd, &devvp);
3262 if (error) {
3263 return error;
3264 }
3265 error = vnode_getwithref(devvp);
3266 if (error) {
3267 file_drop(device_fd);
3268 return error;
3269 }
3270 error = hfs_journal_replay(devvp, context);
3271 file_drop(device_fd);
3272 vnode_put(devvp);
3273 return error;
3274 } else if (name[0] == HFS_ENABLE_RESIZE_DEBUG) {
3275 hfs_resize_debug = 1;
3276 printf ("hfs_sysctl: Enabled volume resize debugging.\n");
3277 return 0;
3278 }
3279
3280 return (ENOTSUP);
3281 }
3282
3283 /*
3284 * hfs_vfs_vget is not static since it is used in hfs_readwrite.c to support
3285 * the build_path ioctl. We use it to leverage the code below that updates
3286 * the origin list cache if necessary
3287 */
3288
3289 int
3290 hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, __unused vfs_context_t context)
3291 {
3292 int error;
3293 int lockflags;
3294 struct hfsmount *hfsmp;
3295
3296 hfsmp = VFSTOHFS(mp);
3297
3298 error = hfs_vget(hfsmp, (cnid_t)ino, vpp, 1, 0);
3299 if (error)
3300 return error;
3301
3302 /*
3303 * If the look-up was via the object ID (rather than the link ID),
3304 * then we make sure there's a parent here. We can't leave this
3305 * until hfs_vnop_getattr because if there's a problem getting the
3306 * parent at that point, all the caller will do is call
3307 * hfs_vfs_vget again and we'll end up in an infinite loop.
3308 */
3309
3310 cnode_t *cp = VTOC(*vpp);
3311
3312 if (ISSET(cp->c_flag, C_HARDLINK) && ino == cp->c_fileid) {
3313 hfs_lock_always(cp, HFS_SHARED_LOCK);
3314
3315 if (!hfs_haslinkorigin(cp)) {
3316 if (!hfs_lock_upgrade(cp))
3317 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
3318
3319 if (cp->c_cnid == cp->c_fileid) {
3320 /*
3321 * Descriptor is stale, so we need to refresh it. We
3322 * pick the first link.
3323 */
3324 cnid_t link_id;
3325
3326 error = hfs_first_link(hfsmp, cp, &link_id);
3327
3328 if (!error) {
3329 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3330 error = cat_findname(hfsmp, link_id, &cp->c_desc);
3331 hfs_systemfile_unlock(hfsmp, lockflags);
3332 }
3333 } else {
3334 // We'll use whatever link the descriptor happens to have
3335 error = 0;
3336 }
3337 if (!error)
3338 hfs_savelinkorigin(cp, cp->c_parentcnid);
3339 }
3340
3341 hfs_unlock(cp);
3342
3343 if (error) {
3344 vnode_put(*vpp);
3345 *vpp = NULL;
3346 }
3347 }
3348
3349 return error;
3350 }
3351
3352
3353 /*
3354 * Look up an HFS object by ID.
3355 *
3356 * The object is returned with an iocount reference and the cnode locked.
3357 *
3358 * If the object is a file then it will represent the data fork.
3359 */
3360 int
3361 hfs_vget(struct hfsmount *hfsmp, cnid_t cnid, struct vnode **vpp, int skiplock, int allow_deleted)
3362 {
3363 struct vnode *vp = NULLVP;
3364 struct cat_desc cndesc;
3365 struct cat_attr cnattr;
3366 struct cat_fork cnfork;
3367 u_int32_t linkref = 0;
3368 int error;
3369
3370 /* Check for cnids that should't be exported. */
3371 if ((cnid < kHFSFirstUserCatalogNodeID) &&
3372 (cnid != kHFSRootFolderID && cnid != kHFSRootParentID)) {
3373 return (ENOENT);
3374 }
3375 /* Don't export our private directories. */
3376 if (cnid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid ||
3377 cnid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) {
3378 return (ENOENT);
3379 }
3380 /*
3381 * Check the hash first
3382 */
3383 vp = hfs_chash_getvnode(hfsmp, cnid, 0, skiplock, allow_deleted);
3384 if (vp) {
3385 *vpp = vp;
3386 return(0);
3387 }
3388
3389 bzero(&cndesc, sizeof(cndesc));
3390 bzero(&cnattr, sizeof(cnattr));
3391 bzero(&cnfork, sizeof(cnfork));
3392
3393 /*
3394 * Not in hash, lookup in catalog
3395 */
3396 if (cnid == kHFSRootParentID) {
3397 static char hfs_rootname[] = "/";
3398
3399 cndesc.cd_nameptr = (const u_int8_t *)&hfs_rootname[0];
3400 cndesc.cd_namelen = 1;
3401 cndesc.cd_parentcnid = kHFSRootParentID;
3402 cndesc.cd_cnid = kHFSRootFolderID;
3403 cndesc.cd_flags = CD_ISDIR;
3404
3405 cnattr.ca_fileid = kHFSRootFolderID;
3406 cnattr.ca_linkcount = 1;
3407 cnattr.ca_entries = 1;
3408 cnattr.ca_dircount = 1;
3409 cnattr.ca_mode = (S_IFDIR | S_IRWXU | S_IRWXG | S_IRWXO);
3410 } else {
3411 int lockflags;
3412 cnid_t pid;
3413 const char *nameptr;
3414
3415 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3416 error = cat_idlookup(hfsmp, cnid, 0, 0, &cndesc, &cnattr, &cnfork);
3417 hfs_systemfile_unlock(hfsmp, lockflags);
3418
3419 if (error) {
3420 *vpp = NULL;
3421 return (error);
3422 }
3423
3424 /*
3425 * Check for a raw hardlink inode and save its linkref.
3426 */
3427 pid = cndesc.cd_parentcnid;
3428 nameptr = (const char *)cndesc.cd_nameptr;
3429
3430 if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3431 (bcmp(nameptr, HFS_INODE_PREFIX, HFS_INODE_PREFIX_LEN) == 0)) {
3432 linkref = strtoul(&nameptr[HFS_INODE_PREFIX_LEN], NULL, 10);
3433
3434 } else if ((pid == hfsmp->hfs_private_desc[DIR_HARDLINKS].cd_cnid) &&
3435 (bcmp(nameptr, HFS_DIRINODE_PREFIX, HFS_DIRINODE_PREFIX_LEN) == 0)) {
3436 linkref = strtoul(&nameptr[HFS_DIRINODE_PREFIX_LEN], NULL, 10);
3437
3438 } else if ((pid == hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid) &&
3439 (bcmp(nameptr, HFS_DELETE_PREFIX, HFS_DELETE_PREFIX_LEN) == 0)) {
3440 *vpp = NULL;
3441 cat_releasedesc(&cndesc);
3442 return (ENOENT); /* open unlinked file */
3443 }
3444 }
3445
3446 /*
3447 * Finish initializing cnode descriptor for hardlinks.
3448 *
3449 * We need a valid name and parent for reverse lookups.
3450 */
3451 if (linkref) {
3452 cnid_t lastid;
3453 struct cat_desc linkdesc;
3454 int linkerr = 0;
3455
3456 cnattr.ca_linkref = linkref;
3457 bzero (&linkdesc, sizeof (linkdesc));
3458
3459 /*
3460 * If the caller supplied the raw inode value, then we don't know exactly
3461 * which hardlink they wanted. It's likely that they acquired the raw inode
3462 * value BEFORE the item became a hardlink, in which case, they probably
3463 * want the oldest link. So request the oldest link from the catalog.
3464 *
3465 * Unfortunately, this requires that we iterate through all N hardlinks. On the plus
3466 * side, since we know that we want the last linkID, we can also have this one
3467 * call give us back the name of the last ID, since it's going to have it in-hand...
3468 */
3469 linkerr = hfs_lookup_lastlink (hfsmp, linkref, &lastid, &linkdesc);
3470 if ((linkerr == 0) && (lastid != 0)) {
3471 /*
3472 * Release any lingering buffers attached to our local descriptor.
3473 * Then copy the name and other business into the cndesc
3474 */
3475 cat_releasedesc (&cndesc);
3476 bcopy (&linkdesc, &cndesc, sizeof(linkdesc));
3477 }
3478 /* If it failed, the linkref code will just use whatever it had in-hand below. */
3479 }
3480
3481 if (linkref) {
3482 int newvnode_flags = 0;
3483
3484 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr,
3485 &cnfork, &vp, &newvnode_flags);
3486 if (error == 0) {
3487 VTOC(vp)->c_flag |= C_HARDLINK;
3488 vnode_setmultipath(vp);
3489 }
3490 } else {
3491 struct componentname cn;
3492 int newvnode_flags = 0;
3493
3494 /* Supply hfs_getnewvnode with a component name. */
3495 MALLOC_ZONE(cn.cn_pnbuf, caddr_t, MAXPATHLEN, M_NAMEI, M_WAITOK);
3496 cn.cn_nameiop = LOOKUP;
3497 cn.cn_flags = ISLASTCN | HASBUF;
3498 cn.cn_context = NULL;
3499 cn.cn_pnlen = MAXPATHLEN;
3500 cn.cn_nameptr = cn.cn_pnbuf;
3501 cn.cn_namelen = cndesc.cd_namelen;
3502 cn.cn_hash = 0;
3503 cn.cn_consume = 0;
3504 bcopy(cndesc.cd_nameptr, cn.cn_nameptr, cndesc.cd_namelen + 1);
3505
3506 error = hfs_getnewvnode(hfsmp, NULLVP, &cn, &cndesc, 0, &cnattr,
3507 &cnfork, &vp, &newvnode_flags);
3508
3509 if (error == 0 && (VTOC(vp)->c_flag & C_HARDLINK)) {
3510 hfs_savelinkorigin(VTOC(vp), cndesc.cd_parentcnid);
3511 }
3512 FREE_ZONE(cn.cn_pnbuf, cn.cn_pnlen, M_NAMEI);
3513 }
3514 cat_releasedesc(&cndesc);
3515
3516 *vpp = vp;
3517 if (vp && skiplock) {
3518 hfs_unlock(VTOC(vp));
3519 }
3520 return (error);
3521 }
3522
3523
3524 /*
3525 * Flush out all the files in a filesystem.
3526 */
3527 static int
3528 #if QUOTA
3529 hfs_flushfiles(struct mount *mp, int flags, struct proc *p)
3530 #else
3531 hfs_flushfiles(struct mount *mp, int flags, __unused struct proc *p)
3532 #endif /* QUOTA */
3533 {
3534 struct hfsmount *hfsmp;
3535 struct vnode *skipvp = NULLVP;
3536 int error;
3537 int accounted_root_usecounts;
3538 #if QUOTA
3539 int i;
3540 #endif
3541
3542 hfsmp = VFSTOHFS(mp);
3543
3544 accounted_root_usecounts = 0;
3545 #if QUOTA
3546 /*
3547 * The open quota files have an indirect reference on
3548 * the root directory vnode. We must account for this
3549 * extra reference when doing the intial vflush.
3550 */
3551 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3552 /* Find out how many quota files we have open. */
3553 for (i = 0; i < MAXQUOTAS; i++) {
3554 if (hfsmp->hfs_qfiles[i].qf_vp != NULLVP)
3555 ++accounted_root_usecounts;
3556 }
3557 }
3558 #endif /* QUOTA */
3559
3560 if (accounted_root_usecounts > 0) {
3561 /* Obtain the root vnode so we can skip over it. */
3562 skipvp = hfs_chash_getvnode(hfsmp, kHFSRootFolderID, 0, 0, 0);
3563 }
3564
3565 error = vflush(mp, skipvp, SKIPSYSTEM | SKIPSWAP | flags);
3566 if (error != 0)
3567 return(error);
3568
3569 error = vflush(mp, skipvp, SKIPSYSTEM | flags);
3570
3571 if (skipvp) {
3572 /*
3573 * See if there are additional references on the
3574 * root vp besides the ones obtained from the open
3575 * quota files and CoreStorage.
3576 */
3577 if ((error == 0) &&
3578 (vnode_isinuse(skipvp, accounted_root_usecounts))) {
3579 error = EBUSY; /* root directory is still open */
3580 }
3581 hfs_unlock(VTOC(skipvp));
3582 /* release the iocount from the hfs_chash_getvnode call above. */
3583 vnode_put(skipvp);
3584 }
3585 if (error && (flags & FORCECLOSE) == 0)
3586 return (error);
3587
3588 #if QUOTA
3589 if (((unsigned int)vfs_flags(mp)) & MNT_QUOTA) {
3590 for (i = 0; i < MAXQUOTAS; i++) {
3591 if (hfsmp->hfs_qfiles[i].qf_vp == NULLVP)
3592 continue;
3593 hfs_quotaoff(p, mp, i);
3594 }
3595 }
3596 #endif /* QUOTA */
3597
3598 if (skipvp) {
3599 error = vflush(mp, NULLVP, SKIPSYSTEM | flags);
3600 }
3601
3602 return (error);
3603 }
3604
3605 /*
3606 * Update volume encoding bitmap (HFS Plus only)
3607 *
3608 * Mark a legacy text encoding as in-use (as needed)
3609 * in the volume header of this HFS+ filesystem.
3610 */
3611 __private_extern__
3612 void
3613 hfs_setencodingbits(struct hfsmount *hfsmp, u_int32_t encoding)
3614 {
3615 #define kIndexMacUkrainian 48 /* MacUkrainian encoding is 152 */
3616 #define kIndexMacFarsi 49 /* MacFarsi encoding is 140 */
3617
3618 u_int32_t index;
3619
3620 switch (encoding) {
3621 case kTextEncodingMacUkrainian:
3622 index = kIndexMacUkrainian;
3623 break;
3624 case kTextEncodingMacFarsi:
3625 index = kIndexMacFarsi;
3626 break;
3627 default:
3628 index = encoding;
3629 break;
3630 }
3631
3632 /* Only mark the encoding as in-use if it wasn't already set */
3633 if (index < 64 && (hfsmp->encodingsBitmap & (u_int64_t)(1ULL << index)) == 0) {
3634 hfs_lock_mount (hfsmp);
3635 hfsmp->encodingsBitmap |= (u_int64_t)(1ULL << index);
3636 MarkVCBDirty(hfsmp);
3637 hfs_unlock_mount(hfsmp);
3638 }
3639 }
3640
3641 /*
3642 * Update volume stats
3643 *
3644 * On journal volumes this will cause a volume header flush
3645 */
3646 int
3647 hfs_volupdate(struct hfsmount *hfsmp, enum volop op, int inroot)
3648 {
3649 struct timeval tv;
3650
3651 microtime(&tv);
3652
3653 hfs_lock_mount (hfsmp);
3654
3655 MarkVCBDirty(hfsmp);
3656 hfsmp->hfs_mtime = tv.tv_sec;
3657
3658 switch (op) {
3659 case VOL_UPDATE:
3660 break;
3661 case VOL_MKDIR:
3662 if (hfsmp->hfs_dircount != 0xFFFFFFFF)
3663 ++hfsmp->hfs_dircount;
3664 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3665 ++hfsmp->vcbNmRtDirs;
3666 break;
3667 case VOL_RMDIR:
3668 if (hfsmp->hfs_dircount != 0)
3669 --hfsmp->hfs_dircount;
3670 if (inroot && hfsmp->vcbNmRtDirs != 0xFFFF)
3671 --hfsmp->vcbNmRtDirs;
3672 break;
3673 case VOL_MKFILE:
3674 if (hfsmp->hfs_filecount != 0xFFFFFFFF)
3675 ++hfsmp->hfs_filecount;
3676 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3677 ++hfsmp->vcbNmFls;
3678 break;
3679 case VOL_RMFILE:
3680 if (hfsmp->hfs_filecount != 0)
3681 --hfsmp->hfs_filecount;
3682 if (inroot && hfsmp->vcbNmFls != 0xFFFF)
3683 --hfsmp->vcbNmFls;
3684 break;
3685 }
3686
3687 hfs_unlock_mount (hfsmp);
3688
3689 if (hfsmp->jnl) {
3690 hfs_flushvolumeheader(hfsmp, 0);
3691 }
3692
3693 return (0);
3694 }
3695
3696
3697 #if CONFIG_HFS_STD
3698 /* HFS Standard MDB flush */
3699 static int
3700 hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
3701 {
3702 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3703 struct filefork *fp;
3704 HFSMasterDirectoryBlock *mdb;
3705 struct buf *bp = NULL;
3706 int retval;
3707 int sector_size;
3708 ByteCount namelen;
3709
3710 sector_size = hfsmp->hfs_logical_block_size;
3711 retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sector_size), sector_size, NOCRED, &bp);
3712 if (retval) {
3713 if (bp)
3714 buf_brelse(bp);
3715 return retval;
3716 }
3717
3718 hfs_lock_mount (hfsmp);
3719
3720 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp) + HFS_PRI_OFFSET(sector_size));
3721
3722 mdb->drCrDate = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->hfs_itime)));
3723 mdb->drLsMod = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbLsMod)));
3724 mdb->drAtrb = SWAP_BE16 (vcb->vcbAtrb);
3725 mdb->drNmFls = SWAP_BE16 (vcb->vcbNmFls);
3726 mdb->drAllocPtr = SWAP_BE16 (vcb->nextAllocation);
3727 mdb->drClpSiz = SWAP_BE32 (vcb->vcbClpSiz);
3728 mdb->drNxtCNID = SWAP_BE32 (vcb->vcbNxtCNID);
3729 mdb->drFreeBks = SWAP_BE16 (vcb->freeBlocks);
3730
3731 namelen = strlen((char *)vcb->vcbVN);
3732 retval = utf8_to_hfs(vcb, namelen, vcb->vcbVN, mdb->drVN);
3733 /* Retry with MacRoman in case that's how it was exported. */
3734 if (retval)
3735 retval = utf8_to_mac_roman(namelen, vcb->vcbVN, mdb->drVN);
3736
3737 mdb->drVolBkUp = SWAP_BE32 (UTCToLocal(to_hfs_time(vcb->vcbVolBkUp)));
3738 mdb->drWrCnt = SWAP_BE32 (vcb->vcbWrCnt);
3739 mdb->drNmRtDirs = SWAP_BE16 (vcb->vcbNmRtDirs);
3740 mdb->drFilCnt = SWAP_BE32 (vcb->vcbFilCnt);
3741 mdb->drDirCnt = SWAP_BE32 (vcb->vcbDirCnt);
3742
3743 bcopy(vcb->vcbFndrInfo, mdb->drFndrInfo, sizeof(mdb->drFndrInfo));
3744
3745 fp = VTOF(vcb->extentsRefNum);
3746 mdb->drXTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3747 mdb->drXTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3748 mdb->drXTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3749 mdb->drXTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3750 mdb->drXTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3751 mdb->drXTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3752 mdb->drXTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3753 mdb->drXTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3754 FTOC(fp)->c_flag &= ~C_MODIFIED;
3755
3756 fp = VTOF(vcb->catalogRefNum);
3757 mdb->drCTExtRec[0].startBlock = SWAP_BE16 (fp->ff_extents[0].startBlock);
3758 mdb->drCTExtRec[0].blockCount = SWAP_BE16 (fp->ff_extents[0].blockCount);
3759 mdb->drCTExtRec[1].startBlock = SWAP_BE16 (fp->ff_extents[1].startBlock);
3760 mdb->drCTExtRec[1].blockCount = SWAP_BE16 (fp->ff_extents[1].blockCount);
3761 mdb->drCTExtRec[2].startBlock = SWAP_BE16 (fp->ff_extents[2].startBlock);
3762 mdb->drCTExtRec[2].blockCount = SWAP_BE16 (fp->ff_extents[2].blockCount);
3763 mdb->drCTFlSize = SWAP_BE32 (fp->ff_blocks * vcb->blockSize);
3764 mdb->drCTClpSiz = SWAP_BE32 (fp->ff_clumpsize);
3765 FTOC(fp)->c_flag &= ~C_MODIFIED;
3766
3767 MarkVCBClean( vcb );
3768
3769 hfs_unlock_mount (hfsmp);
3770
3771 /* If requested, flush out the alternate MDB */
3772 if (altflush) {
3773 struct buf *alt_bp = NULL;
3774
3775 if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_partition_avh_sector, sector_size, NOCRED, &alt_bp) == 0) {
3776 bcopy(mdb, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sector_size), kMDBSize);
3777
3778 (void) VNOP_BWRITE(alt_bp);
3779 } else if (alt_bp)
3780 buf_brelse(alt_bp);
3781 }
3782
3783 if (waitfor != MNT_WAIT)
3784 buf_bawrite(bp);
3785 else
3786 retval = VNOP_BWRITE(bp);
3787
3788 return (retval);
3789 }
3790 #endif
3791
3792 /*
3793 * Flush any dirty in-memory mount data to the on-disk
3794 * volume header.
3795 *
3796 * Note: the on-disk volume signature is intentionally
3797 * not flushed since the on-disk "H+" and "HX" signatures
3798 * are always stored in-memory as "H+".
3799 */
3800 int
3801 hfs_flushvolumeheader(struct hfsmount *hfsmp,
3802 hfs_flush_volume_header_options_t options)
3803 {
3804 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3805 struct filefork *fp;
3806 HFSPlusVolumeHeader *volumeHeader, *altVH;
3807 int retval;
3808 struct buf *bp, *alt_bp;
3809 int i;
3810 daddr64_t priIDSector;
3811 bool critical = false;
3812 u_int16_t signature;
3813 u_int16_t hfsversion;
3814 daddr64_t avh_sector;
3815 bool altflush = ISSET(options, HFS_FVH_WRITE_ALT);
3816
3817 if (ISSET(options, HFS_FVH_FLUSH_IF_DIRTY)
3818 && !hfs_header_needs_flushing(hfsmp)) {
3819 return 0;
3820 }
3821
3822 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
3823 return(0);
3824 }
3825 #if CONFIG_HFS_STD
3826 if (hfsmp->hfs_flags & HFS_STANDARD) {
3827 return hfs_flushMDB(hfsmp, ISSET(options, HFS_FVH_WAIT) ? MNT_WAIT : 0, altflush);
3828 }
3829 #endif
3830 priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3831 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
3832
3833 if (hfs_start_transaction(hfsmp) != 0) {
3834 return EINVAL;
3835 }
3836
3837 bp = NULL;
3838 alt_bp = NULL;
3839
3840 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3841 HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
3842 hfsmp->hfs_physical_block_size, NOCRED, &bp);
3843 if (retval) {
3844 printf("hfs: err %d reading VH blk (vol=%s)\n", retval, vcb->vcbVN);
3845 goto err_exit;
3846 }
3847
3848 volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
3849 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3850
3851 /*
3852 * Sanity check what we just read. If it's bad, try the alternate
3853 * instead.
3854 */
3855 signature = SWAP_BE16 (volumeHeader->signature);
3856 hfsversion = SWAP_BE16 (volumeHeader->version);
3857 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3858 (hfsversion < kHFSPlusVersion) || (hfsversion > 100) ||
3859 (SWAP_BE32 (volumeHeader->blockSize) != vcb->blockSize)) {
3860 printf("hfs: corrupt VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3861 vcb->vcbVN, signature, hfsversion,
3862 SWAP_BE32 (volumeHeader->blockSize));
3863 hfs_mark_inconsistent(hfsmp, HFS_INCONSISTENCY_DETECTED);
3864
3865 /* Almost always we read AVH relative to the partition size */
3866 avh_sector = hfsmp->hfs_partition_avh_sector;
3867
3868 if (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector) {
3869 /*
3870 * The two altVH offsets do not match --- which means that a smaller file
3871 * system exists in a larger partition. Verify that we have the correct
3872 * alternate volume header sector as per the current parititon size.
3873 * The GPT device that we are mounted on top could have changed sizes
3874 * without us knowing.
3875 *
3876 * We're in a transaction, so it's safe to modify the partition_avh_sector
3877 * field if necessary.
3878 */
3879
3880 uint64_t sector_count;
3881
3882 /* Get underlying device block count */
3883 if ((retval = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCGETBLOCKCOUNT,
3884 (caddr_t)&sector_count, 0, vfs_context_current()))) {
3885 printf("hfs_flushVH: err %d getting block count (%s) \n", retval, vcb->vcbVN);
3886 retval = ENXIO;
3887 goto err_exit;
3888 }
3889
3890 /* Partition size was changed without our knowledge */
3891 if (sector_count != (uint64_t)hfsmp->hfs_logical_block_count) {
3892 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
3893 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, sector_count);
3894 /* Note: hfs_fs_avh_sector will remain unchanged */
3895 printf ("hfs_flushVH: partition size changed, partition_avh_sector=%qu, fs_avh_sector=%qu\n",
3896 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
3897
3898 /*
3899 * We just updated the offset for AVH relative to
3900 * the partition size, so the content of that AVH
3901 * will be invalid. But since we are also maintaining
3902 * a valid AVH relative to the file system size, we
3903 * can read it since primary VH and partition AVH
3904 * are not valid.
3905 */
3906 avh_sector = hfsmp->hfs_fs_avh_sector;
3907 }
3908 }
3909
3910 printf ("hfs: trying alternate (for %s) avh_sector=%qu\n",
3911 (avh_sector == hfsmp->hfs_fs_avh_sector) ? "file system" : "partition", avh_sector);
3912
3913 if (avh_sector) {
3914 retval = buf_meta_bread(hfsmp->hfs_devvp,
3915 HFS_PHYSBLK_ROUNDDOWN(avh_sector, hfsmp->hfs_log_per_phys),
3916 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp);
3917 if (retval) {
3918 printf("hfs: err %d reading alternate VH (%s)\n", retval, vcb->vcbVN);
3919 goto err_exit;
3920 }
3921
3922 altVH = (HFSPlusVolumeHeader *)((char *)buf_dataptr(alt_bp) +
3923 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size));
3924 signature = SWAP_BE16(altVH->signature);
3925 hfsversion = SWAP_BE16(altVH->version);
3926
3927 if ((signature != kHFSPlusSigWord && signature != kHFSXSigWord) ||
3928 (hfsversion < kHFSPlusVersion) || (kHFSPlusVersion > 100) ||
3929 (SWAP_BE32(altVH->blockSize) != vcb->blockSize)) {
3930 printf("hfs: corrupt alternate VH on %s, sig 0x%04x, ver %d, blksize %d\n",
3931 vcb->vcbVN, signature, hfsversion,
3932 SWAP_BE32(altVH->blockSize));
3933 retval = EIO;
3934 goto err_exit;
3935 }
3936
3937 /* The alternate is plausible, so use it. */
3938 bcopy(altVH, volumeHeader, kMDBSize);
3939 buf_brelse(alt_bp);
3940 alt_bp = NULL;
3941 } else {
3942 /* No alternate VH, nothing more we can do. */
3943 retval = EIO;
3944 goto err_exit;
3945 }
3946 }
3947
3948 if (hfsmp->jnl) {
3949 journal_modify_block_start(hfsmp->jnl, bp);
3950 }
3951
3952 /*
3953 * For embedded HFS+ volumes, update create date if it changed
3954 * (ie from a setattrlist call)
3955 */
3956 if ((vcb->hfsPlusIOPosOffset != 0) &&
3957 (SWAP_BE32 (volumeHeader->createDate) != vcb->localCreateDate)) {
3958 struct buf *bp2;
3959 HFSMasterDirectoryBlock *mdb;
3960
3961 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
3962 HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
3963 hfsmp->hfs_physical_block_size, NOCRED, &bp2);
3964 if (retval) {
3965 if (bp2)
3966 buf_brelse(bp2);
3967 retval = 0;
3968 } else {
3969 mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
3970 HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
3971
3972 if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
3973 {
3974 if (hfsmp->jnl) {
3975 journal_modify_block_start(hfsmp->jnl, bp2);
3976 }
3977
3978 mdb->drCrDate = SWAP_BE32 (vcb->localCreateDate); /* pick up the new create date */
3979
3980 if (hfsmp->jnl) {
3981 journal_modify_block_end(hfsmp->jnl, bp2, NULL, NULL);
3982 } else {
3983 (void) VNOP_BWRITE(bp2); /* write out the changes */
3984 }
3985 }
3986 else
3987 {
3988 buf_brelse(bp2); /* just release it */
3989 }
3990 }
3991 }
3992
3993 hfs_lock_mount (hfsmp);
3994
3995 /* Note: only update the lower 16 bits worth of attributes */
3996 volumeHeader->attributes = SWAP_BE32 (vcb->vcbAtrb);
3997 volumeHeader->journalInfoBlock = SWAP_BE32 (vcb->vcbJinfoBlock);
3998 if (hfsmp->jnl) {
3999 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSJMountVersion);
4000 } else {
4001 volumeHeader->lastMountedVersion = SWAP_BE32 (kHFSPlusMountVersion);
4002 }
4003 volumeHeader->createDate = SWAP_BE32 (vcb->localCreateDate); /* volume create date is in local time */
4004 volumeHeader->modifyDate = SWAP_BE32 (to_hfs_time(vcb->vcbLsMod));
4005 volumeHeader->backupDate = SWAP_BE32 (to_hfs_time(vcb->vcbVolBkUp));
4006 volumeHeader->fileCount = SWAP_BE32 (vcb->vcbFilCnt);
4007 volumeHeader->folderCount = SWAP_BE32 (vcb->vcbDirCnt);
4008 volumeHeader->totalBlocks = SWAP_BE32 (vcb->totalBlocks);
4009 volumeHeader->freeBlocks = SWAP_BE32 (vcb->freeBlocks + vcb->reclaimBlocks);
4010 volumeHeader->nextAllocation = SWAP_BE32 (vcb->nextAllocation);
4011 volumeHeader->rsrcClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
4012 volumeHeader->dataClumpSize = SWAP_BE32 (vcb->vcbClpSiz);
4013 volumeHeader->nextCatalogID = SWAP_BE32 (vcb->vcbNxtCNID);
4014 volumeHeader->writeCount = SWAP_BE32 (vcb->vcbWrCnt);
4015 volumeHeader->encodingsBitmap = SWAP_BE64 (vcb->encodingsBitmap);
4016
4017 if (bcmp(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo)) != 0) {
4018 bcopy(vcb->vcbFndrInfo, volumeHeader->finderInfo, sizeof(volumeHeader->finderInfo));
4019 critical = true;
4020 }
4021
4022 if (!altflush && !ISSET(options, HFS_FVH_FLUSH_IF_DIRTY)) {
4023 goto done;
4024 }
4025
4026 /* Sync Extents over-flow file meta data */
4027 fp = VTOF(vcb->extentsRefNum);
4028 if (FTOC(fp)->c_flag & C_MODIFIED) {
4029 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4030 volumeHeader->extentsFile.extents[i].startBlock =
4031 SWAP_BE32 (fp->ff_extents[i].startBlock);
4032 volumeHeader->extentsFile.extents[i].blockCount =
4033 SWAP_BE32 (fp->ff_extents[i].blockCount);
4034 }
4035 volumeHeader->extentsFile.logicalSize = SWAP_BE64 (fp->ff_size);
4036 volumeHeader->extentsFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
4037 volumeHeader->extentsFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
4038 FTOC(fp)->c_flag &= ~C_MODIFIED;
4039 altflush = true;
4040 }
4041
4042 /* Sync Catalog file meta data */
4043 fp = VTOF(vcb->catalogRefNum);
4044 if (FTOC(fp)->c_flag & C_MODIFIED) {
4045 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4046 volumeHeader->catalogFile.extents[i].startBlock =
4047 SWAP_BE32 (fp->ff_extents[i].startBlock);
4048 volumeHeader->catalogFile.extents[i].blockCount =
4049 SWAP_BE32 (fp->ff_extents[i].blockCount);
4050 }
4051 volumeHeader->catalogFile.logicalSize = SWAP_BE64 (fp->ff_size);
4052 volumeHeader->catalogFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
4053 volumeHeader->catalogFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
4054 FTOC(fp)->c_flag &= ~C_MODIFIED;
4055 altflush = true;
4056 }
4057
4058 /* Sync Allocation file meta data */
4059 fp = VTOF(vcb->allocationsRefNum);
4060 if (FTOC(fp)->c_flag & C_MODIFIED) {
4061 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4062 volumeHeader->allocationFile.extents[i].startBlock =
4063 SWAP_BE32 (fp->ff_extents[i].startBlock);
4064 volumeHeader->allocationFile.extents[i].blockCount =
4065 SWAP_BE32 (fp->ff_extents[i].blockCount);
4066 }
4067 volumeHeader->allocationFile.logicalSize = SWAP_BE64 (fp->ff_size);
4068 volumeHeader->allocationFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
4069 volumeHeader->allocationFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
4070 FTOC(fp)->c_flag &= ~C_MODIFIED;
4071 altflush = true;
4072 }
4073
4074 /* Sync Attribute file meta data */
4075 if (hfsmp->hfs_attribute_vp) {
4076 fp = VTOF(hfsmp->hfs_attribute_vp);
4077 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4078 volumeHeader->attributesFile.extents[i].startBlock =
4079 SWAP_BE32 (fp->ff_extents[i].startBlock);
4080 volumeHeader->attributesFile.extents[i].blockCount =
4081 SWAP_BE32 (fp->ff_extents[i].blockCount);
4082 }
4083 if (ISSET(FTOC(fp)->c_flag, C_MODIFIED)) {
4084 FTOC(fp)->c_flag &= ~C_MODIFIED;
4085 altflush = true;
4086 }
4087 volumeHeader->attributesFile.logicalSize = SWAP_BE64 (fp->ff_size);
4088 volumeHeader->attributesFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
4089 volumeHeader->attributesFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
4090 }
4091
4092 /* Sync Startup file meta data */
4093 if (hfsmp->hfs_startup_vp) {
4094 fp = VTOF(hfsmp->hfs_startup_vp);
4095 if (FTOC(fp)->c_flag & C_MODIFIED) {
4096 for (i = 0; i < kHFSPlusExtentDensity; i++) {
4097 volumeHeader->startupFile.extents[i].startBlock =
4098 SWAP_BE32 (fp->ff_extents[i].startBlock);
4099 volumeHeader->startupFile.extents[i].blockCount =
4100 SWAP_BE32 (fp->ff_extents[i].blockCount);
4101 }
4102 volumeHeader->startupFile.logicalSize = SWAP_BE64 (fp->ff_size);
4103 volumeHeader->startupFile.totalBlocks = SWAP_BE32 (fp->ff_blocks);
4104 volumeHeader->startupFile.clumpSize = SWAP_BE32 (fp->ff_clumpsize);
4105 FTOC(fp)->c_flag &= ~C_MODIFIED;
4106 altflush = true;
4107 }
4108 }
4109
4110 if (altflush)
4111 critical = true;
4112
4113 done:
4114 MarkVCBClean(hfsmp);
4115 hfs_unlock_mount (hfsmp);
4116
4117 /* If requested, flush out the alternate volume header */
4118 if (altflush) {
4119 /*
4120 * The two altVH offsets do not match --- which means that a smaller file
4121 * system exists in a larger partition. Verify that we have the correct
4122 * alternate volume header sector as per the current parititon size.
4123 * The GPT device that we are mounted on top could have changed sizes
4124 * without us knowning.
4125 *
4126 * We're in a transaction, so it's safe to modify the partition_avh_sector
4127 * field if necessary.
4128 */
4129 if (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector) {
4130 uint64_t sector_count;
4131
4132 /* Get underlying device block count */
4133 if ((retval = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCGETBLOCKCOUNT,
4134 (caddr_t)&sector_count, 0, vfs_context_current()))) {
4135 printf("hfs_flushVH: err %d getting block count (%s) \n", retval, vcb->vcbVN);
4136 retval = ENXIO;
4137 goto err_exit;
4138 }
4139
4140 /* Partition size was changed without our knowledge */
4141 if (sector_count != (uint64_t)hfsmp->hfs_logical_block_count) {
4142 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
4143 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, sector_count);
4144 /* Note: hfs_fs_avh_sector will remain unchanged */
4145 printf ("hfs_flushVH: altflush: partition size changed, partition_avh_sector=%qu, fs_avh_sector=%qu\n",
4146 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
4147 }
4148 }
4149
4150 /*
4151 * First see if we need to write I/O to the "secondary" AVH
4152 * located at FS Size - 1024 bytes, because this one will
4153 * always go into the journal. We put this AVH into the journal
4154 * because even if the filesystem size has shrunk, this LBA should be
4155 * reachable after the partition-size modification has occurred.
4156 * The one where we need to be careful is partitionsize-1024, since the
4157 * partition size should hopefully shrink.
4158 *
4159 * Most of the time this block will not execute.
4160 */
4161 if ((hfsmp->hfs_fs_avh_sector) &&
4162 (hfsmp->hfs_partition_avh_sector != hfsmp->hfs_fs_avh_sector)) {
4163 if (buf_meta_bread(hfsmp->hfs_devvp,
4164 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_fs_avh_sector, hfsmp->hfs_log_per_phys),
4165 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
4166 if (hfsmp->jnl) {
4167 journal_modify_block_start(hfsmp->jnl, alt_bp);
4168 }
4169
4170 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
4171 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
4172 kMDBSize);
4173
4174 if (hfsmp->jnl) {
4175 journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
4176 } else {
4177 (void) VNOP_BWRITE(alt_bp);
4178 }
4179 } else if (alt_bp) {
4180 buf_brelse(alt_bp);
4181 }
4182 }
4183
4184 /*
4185 * Flush out alternate volume header located at 1024 bytes before
4186 * end of the partition as part of journal transaction. In
4187 * most cases, this will be the only alternate volume header
4188 * that we need to worry about because the file system size is
4189 * same as the partition size, therefore hfs_fs_avh_sector is
4190 * same as hfs_partition_avh_sector. This is the "priority" AVH.
4191 *
4192 * However, do not always put this I/O into the journal. If we skipped the
4193 * FS-Size AVH write above, then we will put this I/O into the journal as
4194 * that indicates the two were in sync. However, if the FS size is
4195 * not the same as the partition size, we are tracking two. We don't
4196 * put it in the journal in that case, since if the partition
4197 * size changes between uptimes, and we need to replay the journal,
4198 * this I/O could generate an EIO if during replay it is now trying
4199 * to access blocks beyond the device EOF.
4200 */
4201 if (hfsmp->hfs_partition_avh_sector) {
4202 if (buf_meta_bread(hfsmp->hfs_devvp,
4203 HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_partition_avh_sector, hfsmp->hfs_log_per_phys),
4204 hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
4205
4206 /* only one AVH, put this I/O in the journal. */
4207 if ((hfsmp->jnl) && (hfsmp->hfs_partition_avh_sector == hfsmp->hfs_fs_avh_sector)) {
4208 journal_modify_block_start(hfsmp->jnl, alt_bp);
4209 }
4210
4211 bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
4212 HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
4213 kMDBSize);
4214
4215 /* If journaled and we only have one AVH to track */
4216 if ((hfsmp->jnl) && (hfsmp->hfs_partition_avh_sector == hfsmp->hfs_fs_avh_sector)) {
4217 journal_modify_block_end (hfsmp->jnl, alt_bp, NULL, NULL);
4218 } else {
4219 /*
4220 * If we don't have a journal or there are two AVH's at the
4221 * moment, then this one doesn't go in the journal. Note that
4222 * this one may generate I/O errors, since the partition
4223 * can be resized behind our backs at any moment and this I/O
4224 * may now appear to be beyond the device EOF.
4225 */
4226 (void) VNOP_BWRITE(alt_bp);
4227 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
4228 }
4229 } else if (alt_bp) {
4230 buf_brelse(alt_bp);
4231 }
4232 }
4233 }
4234
4235 /* Finish modifying the block for the primary VH */
4236 if (hfsmp->jnl) {
4237 journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
4238 } else {
4239 if (!ISSET(options, HFS_FVH_WAIT)) {
4240 buf_bawrite(bp);
4241 } else {
4242 retval = VNOP_BWRITE(bp);
4243 /* When critical data changes, flush the device cache */
4244 if (critical && (retval == 0)) {
4245 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
4246 }
4247 }
4248 }
4249 hfs_end_transaction(hfsmp);
4250
4251 return (retval);
4252
4253 err_exit:
4254 if (alt_bp)
4255 buf_brelse(alt_bp);
4256 if (bp)
4257 buf_brelse(bp);
4258 hfs_end_transaction(hfsmp);
4259 return retval;
4260 }
4261
4262
4263 /*
4264 * Creates a UUID from a unique "name" in the HFS UUID Name space.
4265 * See version 3 UUID.
4266 */
4267 static void
4268 hfs_getvoluuid(struct hfsmount *hfsmp, uuid_t result)
4269 {
4270 MD5_CTX md5c;
4271 uint8_t rawUUID[8];
4272
4273 ((uint32_t *)rawUUID)[0] = hfsmp->vcbFndrInfo[6];
4274 ((uint32_t *)rawUUID)[1] = hfsmp->vcbFndrInfo[7];
4275
4276 MD5Init( &md5c );
4277 MD5Update( &md5c, HFS_UUID_NAMESPACE_ID, sizeof( uuid_t ) );
4278 MD5Update( &md5c, rawUUID, sizeof (rawUUID) );
4279 MD5Final( result, &md5c );
4280
4281 result[6] = 0x30 | ( result[6] & 0x0F );
4282 result[8] = 0x80 | ( result[8] & 0x3F );
4283 }
4284
4285 /*
4286 * Get file system attributes.
4287 */
4288 static int
4289 hfs_vfs_getattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
4290 {
4291 #define HFS_ATTR_CMN_VALIDMASK ATTR_CMN_VALIDMASK
4292 #define HFS_ATTR_FILE_VALIDMASK (ATTR_FILE_VALIDMASK & ~(ATTR_FILE_FILETYPE | ATTR_FILE_FORKCOUNT | ATTR_FILE_FORKLIST))
4293 #define HFS_ATTR_CMN_VOL_VALIDMASK (ATTR_CMN_VALIDMASK & ~(ATTR_CMN_ACCTIME))
4294
4295 ExtendedVCB *vcb = VFSTOVCB(mp);
4296 struct hfsmount *hfsmp = VFSTOHFS(mp);
4297 u_int32_t freeCNIDs;
4298
4299 int searchfs_on = 0;
4300 int exchangedata_on = 1;
4301
4302 #if CONFIG_SEARCHFS
4303 searchfs_on = 1;
4304 #endif
4305
4306 #if CONFIG_PROTECT
4307 if (cp_fs_protected(mp)) {
4308 exchangedata_on = 0;
4309 }
4310 #endif
4311
4312 freeCNIDs = (u_int32_t)0xFFFFFFFF - (u_int32_t)hfsmp->vcbNxtCNID;
4313
4314 VFSATTR_RETURN(fsap, f_objcount, (u_int64_t)hfsmp->vcbFilCnt + (u_int64_t)hfsmp->vcbDirCnt);
4315 VFSATTR_RETURN(fsap, f_filecount, (u_int64_t)hfsmp->vcbFilCnt);
4316 VFSATTR_RETURN(fsap, f_dircount, (u_int64_t)hfsmp->vcbDirCnt);
4317 VFSATTR_RETURN(fsap, f_maxobjcount, (u_int64_t)0xFFFFFFFF);
4318 VFSATTR_RETURN(fsap, f_iosize, (size_t)cluster_max_io_size(mp, 0));
4319 VFSATTR_RETURN(fsap, f_blocks, (u_int64_t)hfsmp->totalBlocks);
4320 VFSATTR_RETURN(fsap, f_bfree, (u_int64_t)hfs_freeblks(hfsmp, 0));
4321 VFSATTR_RETURN(fsap, f_bavail, (u_int64_t)hfs_freeblks(hfsmp, 1));
4322 VFSATTR_RETURN(fsap, f_bsize, (u_int32_t)vcb->blockSize);
4323 /* XXX needs clarification */
4324 VFSATTR_RETURN(fsap, f_bused, hfsmp->totalBlocks - hfs_freeblks(hfsmp, 1));
4325 /* Maximum files is constrained by total blocks. */
4326 VFSATTR_RETURN(fsap, f_files, (u_int64_t)(hfsmp->totalBlocks - 2));
4327 VFSATTR_RETURN(fsap, f_ffree, MIN((u_int64_t)freeCNIDs, (u_int64_t)hfs_freeblks(hfsmp, 1)));
4328
4329 fsap->f_fsid.val[0] = hfsmp->hfs_raw_dev;
4330 fsap->f_fsid.val[1] = vfs_typenum(mp);
4331 VFSATTR_SET_SUPPORTED(fsap, f_fsid);
4332
4333 VFSATTR_RETURN(fsap, f_signature, vcb->vcbSigWord);
4334 VFSATTR_RETURN(fsap, f_carbon_fsid, 0);
4335
4336 if (VFSATTR_IS_ACTIVE(fsap, f_capabilities)) {
4337 vol_capabilities_attr_t *cap;
4338
4339 cap = &fsap->f_capabilities;
4340
4341 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
4342 /* HFS+ & variants */
4343 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
4344 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
4345 VOL_CAP_FMT_SYMBOLICLINKS |
4346 VOL_CAP_FMT_HARDLINKS |
4347 VOL_CAP_FMT_JOURNAL |
4348 VOL_CAP_FMT_ZERO_RUNS |
4349 (hfsmp->jnl ? VOL_CAP_FMT_JOURNAL_ACTIVE : 0) |
4350 (hfsmp->hfs_flags & HFS_CASE_SENSITIVE ? VOL_CAP_FMT_CASE_SENSITIVE : 0) |
4351 VOL_CAP_FMT_CASE_PRESERVING |
4352 VOL_CAP_FMT_FAST_STATFS |
4353 VOL_CAP_FMT_2TB_FILESIZE |
4354 VOL_CAP_FMT_HIDDEN_FILES |
4355 #if HFS_COMPRESSION
4356 VOL_CAP_FMT_PATH_FROM_ID |
4357 VOL_CAP_FMT_DECMPFS_COMPRESSION;
4358 #else
4359 VOL_CAP_FMT_PATH_FROM_ID;
4360 #endif
4361 }
4362 #if CONFIG_HFS_STD
4363 else {
4364 /* HFS standard */
4365 cap->capabilities[VOL_CAPABILITIES_FORMAT] =
4366 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
4367 VOL_CAP_FMT_CASE_PRESERVING |
4368 VOL_CAP_FMT_FAST_STATFS |
4369 VOL_CAP_FMT_HIDDEN_FILES |
4370 VOL_CAP_FMT_PATH_FROM_ID;
4371 }
4372 #endif
4373
4374 /*
4375 * The capabilities word in 'cap' tell you whether or not
4376 * this particular filesystem instance has feature X enabled.
4377 */
4378
4379 cap->capabilities[VOL_CAPABILITIES_INTERFACES] =
4380 VOL_CAP_INT_ATTRLIST |
4381 VOL_CAP_INT_NFSEXPORT |
4382 VOL_CAP_INT_READDIRATTR |
4383 VOL_CAP_INT_ALLOCATE |
4384 VOL_CAP_INT_VOL_RENAME |
4385 VOL_CAP_INT_ADVLOCK |
4386 VOL_CAP_INT_FLOCK |
4387 #if NAMEDSTREAMS
4388 VOL_CAP_INT_EXTENDED_ATTR |
4389 VOL_CAP_INT_NAMEDSTREAMS;
4390 #else
4391 VOL_CAP_INT_EXTENDED_ATTR;
4392 #endif
4393
4394 /* HFS may conditionally support searchfs and exchangedata depending on the runtime */
4395
4396 if (searchfs_on) {
4397 cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_SEARCHFS;
4398 }
4399 if (exchangedata_on) {
4400 cap->capabilities[VOL_CAPABILITIES_INTERFACES] |= VOL_CAP_INT_EXCHANGEDATA;
4401 }
4402
4403 cap->capabilities[VOL_CAPABILITIES_RESERVED1] = 0;
4404 cap->capabilities[VOL_CAPABILITIES_RESERVED2] = 0;
4405
4406 cap->valid[VOL_CAPABILITIES_FORMAT] =
4407 VOL_CAP_FMT_PERSISTENTOBJECTIDS |
4408 VOL_CAP_FMT_SYMBOLICLINKS |
4409 VOL_CAP_FMT_HARDLINKS |
4410 VOL_CAP_FMT_JOURNAL |
4411 VOL_CAP_FMT_JOURNAL_ACTIVE |
4412 VOL_CAP_FMT_NO_ROOT_TIMES |
4413 VOL_CAP_FMT_SPARSE_FILES |
4414 VOL_CAP_FMT_ZERO_RUNS |
4415 VOL_CAP_FMT_CASE_SENSITIVE |
4416 VOL_CAP_FMT_CASE_PRESERVING |
4417 VOL_CAP_FMT_FAST_STATFS |
4418 VOL_CAP_FMT_2TB_FILESIZE |
4419 VOL_CAP_FMT_OPENDENYMODES |
4420 VOL_CAP_FMT_HIDDEN_FILES |
4421 #if HFS_COMPRESSION
4422 VOL_CAP_FMT_PATH_FROM_ID |
4423 VOL_CAP_FMT_DECMPFS_COMPRESSION;
4424 #else
4425 VOL_CAP_FMT_PATH_FROM_ID;
4426 #endif
4427
4428 /*
4429 * Bits in the "valid" field tell you whether or not the on-disk
4430 * format supports feature X.
4431 */
4432
4433 cap->valid[VOL_CAPABILITIES_INTERFACES] =
4434 VOL_CAP_INT_ATTRLIST |
4435 VOL_CAP_INT_NFSEXPORT |
4436 VOL_CAP_INT_READDIRATTR |
4437 VOL_CAP_INT_COPYFILE |
4438 VOL_CAP_INT_ALLOCATE |
4439 VOL_CAP_INT_VOL_RENAME |
4440 VOL_CAP_INT_ADVLOCK |
4441 VOL_CAP_INT_FLOCK |
4442 VOL_CAP_INT_MANLOCK |
4443 #if NAMEDSTREAMS
4444 VOL_CAP_INT_EXTENDED_ATTR |
4445 VOL_CAP_INT_NAMEDSTREAMS;
4446 #else
4447 VOL_CAP_INT_EXTENDED_ATTR;
4448 #endif
4449
4450 /* HFS always supports exchangedata and searchfs in the on-disk format natively */
4451 cap->valid[VOL_CAPABILITIES_INTERFACES] |= (VOL_CAP_INT_SEARCHFS | VOL_CAP_INT_EXCHANGEDATA);
4452
4453
4454 cap->valid[VOL_CAPABILITIES_RESERVED1] = 0;
4455 cap->valid[VOL_CAPABILITIES_RESERVED2] = 0;
4456 VFSATTR_SET_SUPPORTED(fsap, f_capabilities);
4457 }
4458 if (VFSATTR_IS_ACTIVE(fsap, f_attributes)) {
4459 vol_attributes_attr_t *attrp = &fsap->f_attributes;
4460
4461 attrp->validattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
4462 attrp->validattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
4463 attrp->validattr.dirattr = ATTR_DIR_VALIDMASK;
4464 attrp->validattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
4465 attrp->validattr.forkattr = 0;
4466
4467 attrp->nativeattr.commonattr = HFS_ATTR_CMN_VOL_VALIDMASK;
4468 attrp->nativeattr.volattr = ATTR_VOL_VALIDMASK & ~ATTR_VOL_INFO;
4469 attrp->nativeattr.dirattr = ATTR_DIR_VALIDMASK;
4470 attrp->nativeattr.fileattr = HFS_ATTR_FILE_VALIDMASK;
4471 attrp->nativeattr.forkattr = 0;
4472 VFSATTR_SET_SUPPORTED(fsap, f_attributes);
4473 }
4474 fsap->f_create_time.tv_sec = hfsmp->hfs_itime;
4475 fsap->f_create_time.tv_nsec = 0;
4476 VFSATTR_SET_SUPPORTED(fsap, f_create_time);
4477 fsap->f_modify_time.tv_sec = hfsmp->vcbLsMod;
4478 fsap->f_modify_time.tv_nsec = 0;
4479 VFSATTR_SET_SUPPORTED(fsap, f_modify_time);
4480
4481 fsap->f_backup_time.tv_sec = hfsmp->vcbVolBkUp;
4482 fsap->f_backup_time.tv_nsec = 0;
4483 VFSATTR_SET_SUPPORTED(fsap, f_backup_time);
4484 if (VFSATTR_IS_ACTIVE(fsap, f_fssubtype)) {
4485 u_int16_t subtype = 0;
4486
4487 /*
4488 * Subtypes (flavors) for HFS
4489 * 0: Mac OS Extended
4490 * 1: Mac OS Extended (Journaled)
4491 * 2: Mac OS Extended (Case Sensitive)
4492 * 3: Mac OS Extended (Case Sensitive, Journaled)
4493 * 4 - 127: Reserved
4494 * 128: Mac OS Standard
4495 *
4496 */
4497 if ((hfsmp->hfs_flags & HFS_STANDARD) == 0) {
4498 if (hfsmp->jnl) {
4499 subtype |= HFS_SUBTYPE_JOURNALED;
4500 }
4501 if (hfsmp->hfs_flags & HFS_CASE_SENSITIVE) {
4502 subtype |= HFS_SUBTYPE_CASESENSITIVE;
4503 }
4504 }
4505 #if CONFIG_HFS_STD
4506 else {
4507 subtype = HFS_SUBTYPE_STANDARDHFS;
4508 }
4509 #endif
4510 fsap->f_fssubtype = subtype;
4511 VFSATTR_SET_SUPPORTED(fsap, f_fssubtype);
4512 }
4513
4514 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
4515 strlcpy(fsap->f_vol_name, (char *) hfsmp->vcbVN, MAXPATHLEN);
4516 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
4517 }
4518 if (VFSATTR_IS_ACTIVE(fsap, f_uuid)) {
4519 hfs_getvoluuid(hfsmp, fsap->f_uuid);
4520 VFSATTR_SET_SUPPORTED(fsap, f_uuid);
4521 }
4522 return (0);
4523 }
4524
4525 /*
4526 * Perform a volume rename. Requires the FS' root vp.
4527 */
4528 static int
4529 hfs_rename_volume(struct vnode *vp, const char *name, proc_t p)
4530 {
4531 ExtendedVCB *vcb = VTOVCB(vp);
4532 struct cnode *cp = VTOC(vp);
4533 struct hfsmount *hfsmp = VTOHFS(vp);
4534 struct cat_desc to_desc;
4535 struct cat_desc todir_desc;
4536 struct cat_desc new_desc;
4537 cat_cookie_t cookie;
4538 int lockflags;
4539 int error = 0;
4540 char converted_volname[256];
4541 size_t volname_length = 0;
4542 size_t conv_volname_length = 0;
4543
4544
4545 /*
4546 * Ignore attempts to rename a volume to a zero-length name.
4547 */
4548 if (name[0] == 0)
4549 return(0);
4550
4551 bzero(&to_desc, sizeof(to_desc));
4552 bzero(&todir_desc, sizeof(todir_desc));
4553 bzero(&new_desc, sizeof(new_desc));
4554 bzero(&cookie, sizeof(cookie));
4555
4556 todir_desc.cd_parentcnid = kHFSRootParentID;
4557 todir_desc.cd_cnid = kHFSRootFolderID;
4558 todir_desc.cd_flags = CD_ISDIR;
4559
4560 to_desc.cd_nameptr = (const u_int8_t *)name;
4561 to_desc.cd_namelen = strlen(name);
4562 to_desc.cd_parentcnid = kHFSRootParentID;
4563 to_desc.cd_cnid = cp->c_cnid;
4564 to_desc.cd_flags = CD_ISDIR;
4565
4566 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) == 0) {
4567 if ((error = hfs_start_transaction(hfsmp)) == 0) {
4568 if ((error = cat_preflight(hfsmp, CAT_RENAME, &cookie, p)) == 0) {
4569 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
4570
4571 error = cat_rename(hfsmp, &cp->c_desc, &todir_desc, &to_desc, &new_desc);
4572
4573 /*
4574 * If successful, update the name in the VCB, ensure it's terminated.
4575 */
4576 if (error == 0) {
4577 strlcpy((char *)vcb->vcbVN, name, sizeof(vcb->vcbVN));
4578
4579 volname_length = strlen ((const char*)vcb->vcbVN);
4580 /* Send the volume name down to CoreStorage if necessary */
4581 error = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
4582 if (error == 0) {
4583 (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
4584 }
4585 error = 0;
4586 }
4587
4588 hfs_systemfile_unlock(hfsmp, lockflags);
4589 cat_postflight(hfsmp, &cookie, p);
4590
4591 if (error)
4592 MarkVCBDirty(vcb);
4593 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
4594 }
4595 hfs_end_transaction(hfsmp);
4596 }
4597 if (!error) {
4598 /* Release old allocated name buffer */
4599 if (cp->c_desc.cd_flags & CD_HASBUF) {
4600 const char *tmp_name = (const char *)cp->c_desc.cd_nameptr;
4601
4602 cp->c_desc.cd_nameptr = 0;
4603 cp->c_desc.cd_namelen = 0;
4604 cp->c_desc.cd_flags &= ~CD_HASBUF;
4605 vfs_removename(tmp_name);
4606 }
4607 /* Update cnode's catalog descriptor */
4608 replace_desc(cp, &new_desc);
4609 vcb->volumeNameEncodingHint = new_desc.cd_encoding;
4610 cp->c_touch_chgtime = TRUE;
4611 }
4612
4613 hfs_unlock(cp);
4614 }
4615
4616 return(error);
4617 }
4618
4619 /*
4620 * Get file system attributes.
4621 */
4622 static int
4623 hfs_vfs_setattr(struct mount *mp, struct vfs_attr *fsap, __unused vfs_context_t context)
4624 {
4625 kauth_cred_t cred = vfs_context_ucred(context);
4626 int error = 0;
4627
4628 /*
4629 * Must be superuser or owner of filesystem to change volume attributes
4630 */
4631 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(mp)->f_owner))
4632 return(EACCES);
4633
4634 if (VFSATTR_IS_ACTIVE(fsap, f_vol_name)) {
4635 vnode_t root_vp;
4636
4637 error = hfs_vfs_root(mp, &root_vp, context);
4638 if (error)
4639 goto out;
4640
4641 error = hfs_rename_volume(root_vp, fsap->f_vol_name, vfs_context_proc(context));
4642 (void) vnode_put(root_vp);
4643 if (error)
4644 goto out;
4645
4646 VFSATTR_SET_SUPPORTED(fsap, f_vol_name);
4647 }
4648
4649 out:
4650 return error;
4651 }
4652
4653 /* If a runtime corruption is detected, set the volume inconsistent
4654 * bit in the volume attributes. The volume inconsistent bit is a persistent
4655 * bit which represents that the volume is corrupt and needs repair.
4656 * The volume inconsistent bit can be set from the kernel when it detects
4657 * runtime corruption or from file system repair utilities like fsck_hfs when
4658 * a repair operation fails. The bit should be cleared only from file system
4659 * verify/repair utility like fsck_hfs when a verify/repair succeeds.
4660 */
4661 __private_extern__
4662 void hfs_mark_inconsistent(struct hfsmount *hfsmp,
4663 hfs_inconsistency_reason_t reason)
4664 {
4665 hfs_lock_mount (hfsmp);
4666 if ((hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) == 0) {
4667 hfsmp->vcbAtrb |= kHFSVolumeInconsistentMask;
4668 MarkVCBDirty(hfsmp);
4669 }
4670 if ((hfsmp->hfs_flags & HFS_READ_ONLY)==0) {
4671 switch (reason) {
4672 case HFS_INCONSISTENCY_DETECTED:
4673 printf("hfs_mark_inconsistent: Runtime corruption detected on %s, fsck will be forced on next mount.\n",
4674 hfsmp->vcbVN);
4675 break;
4676 case HFS_ROLLBACK_FAILED:
4677 printf("hfs_mark_inconsistent: Failed to roll back; volume `%s' might be inconsistent; fsck will be forced on next mount.\n",
4678 hfsmp->vcbVN);
4679 break;
4680 case HFS_OP_INCOMPLETE:
4681 printf("hfs_mark_inconsistent: Failed to complete operation; volume `%s' might be inconsistent; fsck will be forced on next mount.\n",
4682 hfsmp->vcbVN);
4683 break;
4684 case HFS_FSCK_FORCED:
4685 printf("hfs_mark_inconsistent: fsck requested for `%s'; fsck will be forced on next mount.\n",
4686 hfsmp->vcbVN);
4687 break;
4688 }
4689 }
4690 hfs_unlock_mount (hfsmp);
4691 }
4692
4693 /* Replay the journal on the device node provided. Returns zero if
4694 * journal replay succeeded or no journal was supposed to be replayed.
4695 */
4696 static int hfs_journal_replay(vnode_t devvp, vfs_context_t context)
4697 {
4698 int retval = 0;
4699 int error = 0;
4700 struct mount *mp = NULL;
4701 struct hfs_mount_args *args = NULL;
4702
4703 /* Replay allowed only on raw devices */
4704 if (!vnode_ischr(devvp) && !vnode_isblk(devvp)) {
4705 retval = EINVAL;
4706 goto out;
4707 }
4708
4709 /* Create dummy mount structures */
4710 MALLOC(mp, struct mount *, sizeof(struct mount), M_TEMP, M_WAITOK);
4711 if (mp == NULL) {
4712 retval = ENOMEM;
4713 goto out;
4714 }
4715 bzero(mp, sizeof(struct mount));
4716 mount_lock_init(mp);
4717
4718 MALLOC(args, struct hfs_mount_args *, sizeof(struct hfs_mount_args), M_TEMP, M_WAITOK);
4719 if (args == NULL) {
4720 retval = ENOMEM;
4721 goto out;
4722 }
4723 bzero(args, sizeof(struct hfs_mount_args));
4724
4725 retval = hfs_mountfs(devvp, mp, args, 1, context);
4726 buf_flushdirtyblks(devvp, TRUE, 0, "hfs_journal_replay");
4727
4728 /* FSYNC the devnode to be sure all data has been flushed */
4729 error = VNOP_FSYNC(devvp, MNT_WAIT, context);
4730 if (error) {
4731 retval = error;
4732 }
4733
4734 out:
4735 if (mp) {
4736 mount_lock_destroy(mp);
4737 FREE(mp, M_TEMP);
4738 }
4739 if (args) {
4740 FREE(args, M_TEMP);
4741 }
4742 return retval;
4743 }
4744
4745
4746 /*
4747 * Cancel the syncer
4748 */
4749 static void
4750 hfs_syncer_free(struct hfsmount *hfsmp)
4751 {
4752 if (hfsmp && hfsmp->hfs_syncer) {
4753 hfs_syncer_lock(hfsmp);
4754
4755 /*
4756 * First, make sure everything else knows we don't want any more
4757 * requests queued.
4758 */
4759 thread_call_t syncer = hfsmp->hfs_syncer;
4760 hfsmp->hfs_syncer = NULL;
4761
4762 hfs_syncer_unlock(hfsmp);
4763
4764 // Now deal with requests that are outstanding
4765 if (hfsmp->hfs_sync_incomplete) {
4766 if (thread_call_cancel(syncer)) {
4767 // We managed to cancel the timer so we're done
4768 hfsmp->hfs_sync_incomplete = FALSE;
4769 } else {
4770 // Syncer must be running right now so we have to wait
4771 hfs_syncer_lock(hfsmp);
4772 while (hfsmp->hfs_sync_incomplete)
4773 hfs_syncer_wait(hfsmp);
4774 hfs_syncer_unlock(hfsmp);
4775 }
4776 }
4777
4778 // Now we're safe to free the syncer
4779 thread_call_free(syncer);
4780 }
4781 }
4782
4783 /*
4784 * hfs vfs operations.
4785 */
4786 struct vfsops hfs_vfsops = {
4787 hfs_mount,
4788 hfs_start,
4789 hfs_unmount,
4790 hfs_vfs_root,
4791 hfs_quotactl,
4792 hfs_vfs_getattr, /* was hfs_statfs */
4793 hfs_sync,
4794 hfs_vfs_vget,
4795 hfs_fhtovp,
4796 hfs_vptofh,
4797 hfs_init,
4798 hfs_sysctl,
4799 hfs_vfs_setattr,
4800 {NULL}
4801 };