]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_vfsutils.c
hfs-556.41.1.tar.gz
[apple/hfs.git] / core / hfs_vfsutils.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_vfsutils.c 4.0
29 *
30 * (c) 1997-2002 Apple Inc. All Rights Reserved
31 *
32 * hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
33 *
34 */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/stat.h>
40 #include <sys/mount.h>
41 #include <sys/vm.h>
42 #include <sys/buf.h>
43 #include <sys/ubc.h>
44 #include <sys/unistd.h>
45 #include <sys/utfconv.h>
46 #include <sys/kauth.h>
47 #include <sys/fcntl.h>
48 #include <sys/fsctl.h>
49 #include <sys/mount.h>
50 #include <sys/sysctl.h>
51 #include <kern/clock.h>
52 #include <stdbool.h>
53 #include <miscfs/specfs/specdev.h>
54 #include <libkern/OSAtomic.h>
55 #include <IOKit/IOLib.h>
56
57 /* for parsing boot-args */
58 #include <pexpert/pexpert.h>
59 #include <kern/kalloc.h>
60 #include <kern/zalloc.h>
61
62 #include "hfs_iokit.h"
63 #include "hfs.h"
64 #include "hfs_catalog.h"
65 #include "hfs_dbg.h"
66 #include "hfs_mount.h"
67 #include "hfs_endian.h"
68 #include "hfs_cnode.h"
69 #include "hfs_fsctl.h"
70 #include "hfs_cprotect.h"
71
72 #include "FileMgrInternal.h"
73 #include "BTreesInternal.h"
74 #include "HFSUnicodeWrappers.h"
75
76 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
77 extern int hfs_resize_debug;
78
79 static void ReleaseMetaFileVNode(struct vnode *vp);
80 static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
81
82 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
83 static void hfs_thaw_locked(struct hfsmount *hfsmp);
84
85 #define HFS_MOUNT_DEBUG 1
86
87
88 //*******************************************************************************
89 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
90 // hence are not in the right byte order on little endian machines. It is
91 // the responsibility of the finder and other clients to swap the data.
92 //*******************************************************************************
93
94 //*******************************************************************************
95 // Routine: hfs_MountHFSVolume
96 //
97 //
98 //*******************************************************************************
99 unsigned char hfs_catname[] = "Catalog B-tree";
100 unsigned char hfs_extname[] = "Extents B-tree";
101 unsigned char hfs_vbmname[] = "Volume Bitmap";
102 unsigned char hfs_attrname[] = "Attribute B-tree";
103 unsigned char hfs_startupname[] = "Startup File";
104
105 #if CONFIG_HFS_STD
106 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
107 __unused struct proc *p)
108 {
109 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
110 int error;
111 ByteCount utf8chars;
112 struct cat_desc cndesc;
113 struct cat_attr cnattr;
114 struct cat_fork fork;
115 int newvnode_flags = 0;
116
117 /* Block size must be a multiple of 512 */
118 if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
119 (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
120 return (EINVAL);
121
122 /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
123 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
124 ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
125 return (EINVAL);
126 }
127 hfsmp->hfs_flags |= HFS_STANDARD;
128 /*
129 * The MDB seems OK: transfer info from it into VCB
130 * Note - the VCB starts out clear (all zeros)
131 *
132 */
133 vcb->vcbSigWord = SWAP_BE16 (mdb->drSigWord);
134 vcb->hfs_itime = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
135 vcb->localCreateDate = SWAP_BE32 (mdb->drCrDate);
136 vcb->vcbLsMod = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
137 vcb->vcbAtrb = SWAP_BE16 (mdb->drAtrb);
138 vcb->vcbNmFls = SWAP_BE16 (mdb->drNmFls);
139 vcb->vcbVBMSt = SWAP_BE16 (mdb->drVBMSt);
140 vcb->nextAllocation = SWAP_BE16 (mdb->drAllocPtr);
141 vcb->totalBlocks = SWAP_BE16 (mdb->drNmAlBlks);
142 vcb->allocLimit = vcb->totalBlocks;
143 vcb->blockSize = SWAP_BE32 (mdb->drAlBlkSiz);
144 vcb->vcbClpSiz = SWAP_BE32 (mdb->drClpSiz);
145 vcb->vcbAlBlSt = SWAP_BE16 (mdb->drAlBlSt);
146 vcb->vcbNxtCNID = SWAP_BE32 (mdb->drNxtCNID);
147 vcb->freeBlocks = SWAP_BE16 (mdb->drFreeBks);
148 vcb->vcbVolBkUp = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
149 vcb->vcbWrCnt = SWAP_BE32 (mdb->drWrCnt);
150 vcb->vcbNmRtDirs = SWAP_BE16 (mdb->drNmRtDirs);
151 vcb->vcbFilCnt = SWAP_BE32 (mdb->drFilCnt);
152 vcb->vcbDirCnt = SWAP_BE32 (mdb->drDirCnt);
153 bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
154 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
155 vcb->vcbWrCnt++; /* Compensate for write of MDB on last flush */
156
157 /* convert hfs encoded name into UTF-8 string */
158 error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
159 /*
160 * When an HFS name cannot be encoded with the current
161 * volume encoding we use MacRoman as a fallback.
162 */
163 if (error || (utf8chars == 0)) {
164 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
165 /* If we fail to encode to UTF8 from Mac Roman, the name is bad. Deny the mount */
166 if (error) {
167 goto MtVolErr;
168 }
169 }
170
171 hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
172 vcb->vcbVBMIOSize = kHFSBlockSize;
173
174 /* Generate the partition-based AVH location */
175 hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
176 hfsmp->hfs_logical_block_count);
177
178 /* HFS standard is read-only, so just stuff the FS location in here, too */
179 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
180
181 bzero(&cndesc, sizeof(cndesc));
182 cndesc.cd_parentcnid = kHFSRootParentID;
183 cndesc.cd_flags |= CD_ISMETA;
184 bzero(&cnattr, sizeof(cnattr));
185 cnattr.ca_linkcount = 1;
186 cnattr.ca_mode = S_IFREG;
187 bzero(&fork, sizeof(fork));
188
189 /*
190 * Set up Extents B-tree vnode
191 */
192 cndesc.cd_nameptr = hfs_extname;
193 cndesc.cd_namelen = strlen((char *)hfs_extname);
194 cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
195 fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
196 fork.cf_blocks = fork.cf_size / vcb->blockSize;
197 fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
198 fork.cf_vblocks = 0;
199 fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
200 fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
201 fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
202 fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
203 fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
204 fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
205 cnattr.ca_blocks = fork.cf_blocks;
206
207 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
208 &hfsmp->hfs_extents_vp, &newvnode_flags);
209 if (error) {
210 if (HFS_MOUNT_DEBUG) {
211 printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
212 }
213 goto MtVolErr;
214 }
215 error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
216 (KeyCompareProcPtr)CompareExtentKeys));
217 if (error) {
218 if (HFS_MOUNT_DEBUG) {
219 printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
220 }
221 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
222 goto MtVolErr;
223 }
224 hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
225
226 /*
227 * Set up Catalog B-tree vnode...
228 */
229 cndesc.cd_nameptr = hfs_catname;
230 cndesc.cd_namelen = strlen((char *)hfs_catname);
231 cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
232 fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
233 fork.cf_blocks = fork.cf_size / vcb->blockSize;
234 fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
235 fork.cf_vblocks = 0;
236 fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
237 fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
238 fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
239 fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
240 fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
241 fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
242 cnattr.ca_blocks = fork.cf_blocks;
243
244 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
245 &hfsmp->hfs_catalog_vp, &newvnode_flags);
246 if (error) {
247 if (HFS_MOUNT_DEBUG) {
248 printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
249 }
250 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
251 goto MtVolErr;
252 }
253 error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
254 (KeyCompareProcPtr)CompareCatalogKeys));
255 if (error) {
256 if (HFS_MOUNT_DEBUG) {
257 printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
258 }
259 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
260 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
261 goto MtVolErr;
262 }
263 hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
264
265 /*
266 * Set up dummy Allocation file vnode (used only for locking bitmap)
267 */
268 cndesc.cd_nameptr = hfs_vbmname;
269 cndesc.cd_namelen = strlen((char *)hfs_vbmname);
270 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
271 bzero(&fork, sizeof(fork));
272 cnattr.ca_blocks = 0;
273
274 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
275 &hfsmp->hfs_allocation_vp, &newvnode_flags);
276 if (error) {
277 if (HFS_MOUNT_DEBUG) {
278 printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
279 }
280 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
281 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
282 goto MtVolErr;
283 }
284 hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
285
286 /* mark the volume dirty (clear clean unmount bit) */
287 vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
288
289 if (error == noErr) {
290 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
291 if (HFS_MOUNT_DEBUG) {
292 printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
293 }
294 }
295
296 if (error == noErr) {
297 /* If the disk isn't write protected.. */
298 if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
299 MarkVCBDirty (vcb); // mark VCB dirty so it will be written
300 }
301 }
302
303 /*
304 * all done with system files so we can unlock now...
305 */
306 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
307 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
308 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
309
310 if (error == noErr) {
311 /* If successful, then we can just return once we've unlocked the cnodes */
312 return error;
313 }
314
315 //-- Release any resources allocated so far before exiting with an error:
316 MtVolErr:
317 hfsUnmount(hfsmp, NULL);
318
319 return (error);
320 }
321
322 #endif
323
324 //*******************************************************************************
325 //
326 // Sanity check Volume Header Block:
327 // Input argument *vhp is a pointer to a HFSPlusVolumeHeader block that has
328 // not been endian-swapped and represents the on-disk contents of this sector.
329 // This routine will not change the endianness of vhp block.
330 //
331 //*******************************************************************************
332 OSErr hfs_ValidateHFSPlusVolumeHeader(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp)
333 {
334 u_int16_t signature;
335 u_int16_t hfs_version;
336 u_int32_t blockSize;
337
338 signature = SWAP_BE16(vhp->signature);
339 hfs_version = SWAP_BE16(vhp->version);
340
341 if (signature == kHFSPlusSigWord) {
342 if (hfs_version != kHFSPlusVersion) {
343 printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFS+ version: %x\n", hfs_version);
344 return (EINVAL);
345 }
346 } else if (signature == kHFSXSigWord) {
347 if (hfs_version != kHFSXVersion) {
348 printf("hfs_ValidateHFSPlusVolumeHeader: invalid HFSX version: %x\n", hfs_version);
349 return (EINVAL);
350 }
351 } else {
352 /* Removed printf for invalid HFS+ signature because it gives
353 * false error for UFS root volume
354 */
355 if (HFS_MOUNT_DEBUG) {
356 printf("hfs_ValidateHFSPlusVolumeHeader: unknown Volume Signature : %x\n", signature);
357 }
358 return (EINVAL);
359 }
360
361 /* Block size must be at least 512 and a power of 2 */
362 blockSize = SWAP_BE32(vhp->blockSize);
363 if (blockSize < 512 || !powerof2(blockSize)) {
364 if (HFS_MOUNT_DEBUG) {
365 printf("hfs_ValidateHFSPlusVolumeHeader: invalid blocksize (%d) \n", blockSize);
366 }
367 return (EINVAL);
368 }
369
370 if (blockSize < hfsmp->hfs_logical_block_size) {
371 if (HFS_MOUNT_DEBUG) {
372 printf("hfs_ValidateHFSPlusVolumeHeader: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
373 blockSize, hfsmp->hfs_logical_block_size);
374 }
375 return (EINVAL);
376 }
377 return 0;
378 }
379
380 //*******************************************************************************
381 // Routine: hfs_MountHFSPlusVolume
382 //
383 //
384 //*******************************************************************************
385
386 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
387 off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
388 {
389 register ExtendedVCB *vcb;
390 struct cat_desc cndesc;
391 struct cat_attr cnattr;
392 struct cat_fork cfork;
393 u_int32_t blockSize;
394 daddr64_t spare_sectors;
395 struct BTreeInfoRec btinfo;
396 u_int16_t signature;
397 u_int16_t hfs_version;
398 int newvnode_flags = 0;
399 int i;
400 OSErr retval;
401 char converted_volname[256];
402 size_t volname_length = 0;
403 size_t conv_volname_length = 0;
404 bool async_bitmap_scan;
405
406 signature = SWAP_BE16(vhp->signature);
407 hfs_version = SWAP_BE16(vhp->version);
408
409 retval = hfs_ValidateHFSPlusVolumeHeader(hfsmp, vhp);
410 if (retval)
411 return retval;
412
413 if (signature == kHFSXSigWord) {
414 /* The in-memory signature is always 'H+'. */
415 signature = kHFSPlusSigWord;
416 hfsmp->hfs_flags |= HFS_X;
417 }
418
419 blockSize = SWAP_BE32(vhp->blockSize);
420 /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
421 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
422 (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
423 if (HFS_MOUNT_DEBUG) {
424 printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
425 }
426 return (EINVAL);
427 }
428
429 /* Make sure we can live with the physical block size. */
430 if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
431 (embeddedOffset & (hfsmp->hfs_logical_block_size - 1))) {
432 if (HFS_MOUNT_DEBUG) {
433 printf("hfs_mounthfsplus: hfs_logical_blocksize (%d) \n",
434 hfsmp->hfs_logical_block_size);
435 }
436 return (ENXIO);
437 }
438
439 /*
440 * If allocation block size is less than the physical block size,
441 * same data could be cached in two places and leads to corruption.
442 *
443 * HFS Plus reserves one allocation block for the Volume Header.
444 * If the physical size is larger, then when we read the volume header,
445 * we will also end up reading in the next allocation block(s).
446 * If those other allocation block(s) is/are modified, and then the volume
447 * header is modified, the write of the volume header's buffer will write
448 * out the old contents of the other allocation blocks.
449 *
450 * We assume that the physical block size is same as logical block size.
451 * The physical block size value is used to round down the offsets for
452 * reading and writing the primary and alternate volume headers.
453 *
454 * The same logic to ensure good hfs_physical_block_size is also in
455 * hfs_mountfs so that hfs_mountfs, hfs_MountHFSPlusVolume and
456 * later are doing the I/Os using same block size.
457 */
458 if (blockSize < hfsmp->hfs_physical_block_size) {
459 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
460 hfsmp->hfs_log_per_phys = 1;
461 }
462
463 /*
464 * The VolumeHeader seems OK: transfer info from it into VCB
465 * Note - the VCB starts out clear (all zeros)
466 */
467 vcb = HFSTOVCB(hfsmp);
468
469 vcb->vcbSigWord = signature;
470 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
471 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
472 vcb->vcbAtrb = SWAP_BE32(vhp->attributes);
473 vcb->vcbClpSiz = SWAP_BE32(vhp->rsrcClumpSize);
474 vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
475 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
476 vcb->vcbWrCnt = SWAP_BE32(vhp->writeCount);
477 vcb->vcbFilCnt = SWAP_BE32(vhp->fileCount);
478 vcb->vcbDirCnt = SWAP_BE32(vhp->folderCount);
479
480 /* copy 32 bytes of Finder info */
481 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
482
483 vcb->vcbAlBlSt = 0; /* hfs+ allocation blocks start at first block of volume */
484 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
485 vcb->vcbWrCnt++; /* compensate for write of Volume Header on last flush */
486
487 /* Now fill in the Extended VCB info */
488 vcb->nextAllocation = SWAP_BE32(vhp->nextAllocation);
489 vcb->totalBlocks = SWAP_BE32(vhp->totalBlocks);
490 vcb->allocLimit = vcb->totalBlocks;
491 vcb->freeBlocks = SWAP_BE32(vhp->freeBlocks);
492 vcb->blockSize = blockSize;
493 vcb->encodingsBitmap = SWAP_BE64(vhp->encodingsBitmap);
494 vcb->localCreateDate = SWAP_BE32(vhp->createDate);
495
496 vcb->hfsPlusIOPosOffset = embeddedOffset;
497
498 /* Default to no free block reserve */
499 vcb->reserveBlocks = 0;
500
501 /*
502 * Update the logical block size in the mount struct
503 * (currently set up from the wrapper MDB) using the
504 * new blocksize value:
505 */
506 hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
507 vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
508
509 /*
510 * Validate and initialize the location of the alternate volume header.
511 *
512 * Note that there may be spare sectors beyond the end of the filesystem that still
513 * belong to our partition.
514 */
515
516 spare_sectors = hfsmp->hfs_logical_block_count -
517 (((daddr64_t)vcb->totalBlocks * blockSize) /
518 hfsmp->hfs_logical_block_size);
519
520 /*
521 * Differentiate between "innocuous" spare sectors and the more unusual
522 * degenerate case:
523 *
524 * *** Innocuous spare sectors exist if:
525 *
526 * A) the number of bytes assigned to the partition (by multiplying logical
527 * block size * logical block count) is greater than the filesystem size
528 * (by multiplying allocation block count and allocation block size)
529 *
530 * and
531 *
532 * B) the remainder is less than the size of a full allocation block's worth of bytes.
533 *
534 * This handles the normal case where there may be a few extra sectors, but the two
535 * are fundamentally in sync.
536 *
537 * *** Degenerate spare sectors exist if:
538 * A) The number of bytes assigned to the partition (by multiplying logical
539 * block size * logical block count) is greater than the filesystem size
540 * (by multiplying allocation block count and block size).
541 *
542 * and
543 *
544 * B) the remainder is greater than a full allocation's block worth of bytes.
545 * In this case, a smaller file system exists in a larger partition.
546 * This can happen in various ways, including when volume is resized but the
547 * partition is yet to be resized. Under this condition, we have to assume that
548 * a partition management software may resize the partition to match
549 * the file system size in the future. Therefore we should update
550 * alternate volume header at two locations on the disk,
551 * a. 1024 bytes before end of the partition
552 * b. 1024 bytes before end of the file system
553 */
554
555 if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
556 /*
557 * Handle the degenerate case above. FS < partition size.
558 * AVH located at 1024 bytes from the end of the partition
559 */
560 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
561 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
562
563 /* AVH located at 1024 bytes from the end of the filesystem */
564 hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
565 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
566 (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
567 }
568 else {
569 /* Innocuous spare sectors; Partition & FS notion are in sync */
570 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
571 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
572
573 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
574 }
575 if (hfs_resize_debug) {
576 printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
577 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
578 }
579
580 bzero(&cndesc, sizeof(cndesc));
581 cndesc.cd_parentcnid = kHFSRootParentID;
582 cndesc.cd_flags |= CD_ISMETA;
583 bzero(&cnattr, sizeof(cnattr));
584 cnattr.ca_linkcount = 1;
585 cnattr.ca_mode = S_IFREG;
586
587 /*
588 * Set up Extents B-tree vnode
589 */
590 cndesc.cd_nameptr = hfs_extname;
591 cndesc.cd_namelen = strlen((char *)hfs_extname);
592 cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
593
594 cfork.cf_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
595 cfork.cf_new_size= 0;
596 cfork.cf_clump = SWAP_BE32 (vhp->extentsFile.clumpSize);
597 cfork.cf_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
598 cfork.cf_vblocks = 0;
599 cnattr.ca_blocks = cfork.cf_blocks;
600 for (i = 0; i < kHFSPlusExtentDensity; i++) {
601 cfork.cf_extents[i].startBlock =
602 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
603 cfork.cf_extents[i].blockCount =
604 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
605 }
606 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
607 &hfsmp->hfs_extents_vp, &newvnode_flags);
608 if (retval)
609 {
610 if (HFS_MOUNT_DEBUG) {
611 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
612 }
613 goto ErrorExit;
614 }
615
616 hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
617
618 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
619 (KeyCompareProcPtr) CompareExtentKeysPlus));
620
621 hfs_unlock(hfsmp->hfs_extents_cp);
622
623 if (retval)
624 {
625 if (HFS_MOUNT_DEBUG) {
626 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
627 }
628 goto ErrorExit;
629 }
630 /*
631 * Set up Catalog B-tree vnode
632 */
633 cndesc.cd_nameptr = hfs_catname;
634 cndesc.cd_namelen = strlen((char *)hfs_catname);
635 cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
636
637 cfork.cf_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
638 cfork.cf_clump = SWAP_BE32 (vhp->catalogFile.clumpSize);
639 cfork.cf_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
640 cfork.cf_vblocks = 0;
641 cnattr.ca_blocks = cfork.cf_blocks;
642 for (i = 0; i < kHFSPlusExtentDensity; i++) {
643 cfork.cf_extents[i].startBlock =
644 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
645 cfork.cf_extents[i].blockCount =
646 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
647 }
648 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
649 &hfsmp->hfs_catalog_vp, &newvnode_flags);
650 if (retval) {
651 if (HFS_MOUNT_DEBUG) {
652 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
653 }
654 goto ErrorExit;
655 }
656 hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
657
658 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
659 (KeyCompareProcPtr) CompareExtendedCatalogKeys));
660
661 if (retval) {
662 if (HFS_MOUNT_DEBUG) {
663 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
664 }
665 hfs_unlock(hfsmp->hfs_catalog_cp);
666 goto ErrorExit;
667 }
668 if ((hfsmp->hfs_flags & HFS_X) &&
669 BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
670 if (btinfo.keyCompareType == kHFSBinaryCompare) {
671 hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
672 /* Install a case-sensitive key compare */
673 (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
674 (KeyCompareProcPtr)cat_binarykeycompare);
675 }
676 }
677
678 hfs_unlock(hfsmp->hfs_catalog_cp);
679
680 /*
681 * Set up Allocation file vnode
682 */
683 cndesc.cd_nameptr = hfs_vbmname;
684 cndesc.cd_namelen = strlen((char *)hfs_vbmname);
685 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
686
687 cfork.cf_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
688 cfork.cf_clump = SWAP_BE32 (vhp->allocationFile.clumpSize);
689 cfork.cf_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
690 cfork.cf_vblocks = 0;
691 cnattr.ca_blocks = cfork.cf_blocks;
692 for (i = 0; i < kHFSPlusExtentDensity; i++) {
693 cfork.cf_extents[i].startBlock =
694 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
695 cfork.cf_extents[i].blockCount =
696 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
697 }
698 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
699 &hfsmp->hfs_allocation_vp, &newvnode_flags);
700 if (retval) {
701 if (HFS_MOUNT_DEBUG) {
702 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
703 }
704 goto ErrorExit;
705 }
706 hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
707 hfs_unlock(hfsmp->hfs_allocation_cp);
708
709 /*
710 * Set up Attribute B-tree vnode
711 */
712 if (vhp->attributesFile.totalBlocks != 0) {
713 cndesc.cd_nameptr = hfs_attrname;
714 cndesc.cd_namelen = strlen((char *)hfs_attrname);
715 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
716
717 cfork.cf_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
718 cfork.cf_clump = SWAP_BE32 (vhp->attributesFile.clumpSize);
719 cfork.cf_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
720 cfork.cf_vblocks = 0;
721 cnattr.ca_blocks = cfork.cf_blocks;
722 for (i = 0; i < kHFSPlusExtentDensity; i++) {
723 cfork.cf_extents[i].startBlock =
724 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
725 cfork.cf_extents[i].blockCount =
726 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
727 }
728 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
729 &hfsmp->hfs_attribute_vp, &newvnode_flags);
730 if (retval) {
731 if (HFS_MOUNT_DEBUG) {
732 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
733 }
734 goto ErrorExit;
735 }
736 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
737 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
738 (KeyCompareProcPtr) hfs_attrkeycompare));
739 hfs_unlock(hfsmp->hfs_attribute_cp);
740 if (retval) {
741 if (HFS_MOUNT_DEBUG) {
742 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
743 }
744 goto ErrorExit;
745 }
746
747 /* Initialize vnode for virtual attribute data file that spans the
748 * entire file system space for performing I/O to attribute btree
749 * We hold iocount on the attrdata vnode for the entire duration
750 * of mount (similar to btree vnodes)
751 */
752 retval = init_attrdata_vnode(hfsmp);
753 if (retval) {
754 if (HFS_MOUNT_DEBUG) {
755 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
756 }
757 goto ErrorExit;
758 }
759 }
760
761 /*
762 * Set up Startup file vnode
763 */
764 if (vhp->startupFile.totalBlocks != 0) {
765 cndesc.cd_nameptr = hfs_startupname;
766 cndesc.cd_namelen = strlen((char *)hfs_startupname);
767 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
768
769 cfork.cf_size = SWAP_BE64 (vhp->startupFile.logicalSize);
770 cfork.cf_clump = SWAP_BE32 (vhp->startupFile.clumpSize);
771 cfork.cf_blocks = SWAP_BE32 (vhp->startupFile.totalBlocks);
772 cfork.cf_vblocks = 0;
773 cnattr.ca_blocks = cfork.cf_blocks;
774 for (i = 0; i < kHFSPlusExtentDensity; i++) {
775 cfork.cf_extents[i].startBlock =
776 SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
777 cfork.cf_extents[i].blockCount =
778 SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
779 }
780 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
781 &hfsmp->hfs_startup_vp, &newvnode_flags);
782 if (retval) {
783 if (HFS_MOUNT_DEBUG) {
784 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
785 }
786 goto ErrorExit;
787 }
788 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
789 hfs_unlock(hfsmp->hfs_startup_cp);
790 }
791
792 /*
793 * Pick up volume name and create date
794 *
795 * Acquiring the volume name should not manipulate the bitmap, only the catalog
796 * btree and possibly the extents overflow b-tree.
797 */
798 retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
799 if (retval) {
800 if (HFS_MOUNT_DEBUG) {
801 printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
802 }
803 goto ErrorExit;
804 }
805 vcb->hfs_itime = cnattr.ca_itime;
806 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
807 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
808 volname_length = strlen ((const char*)vcb->vcbVN);
809 cat_releasedesc(&cndesc);
810
811 /* Send the volume name down to CoreStorage if necessary */
812 retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
813 if (retval == 0) {
814 (void) VNOP_IOCTL (hfsmp->hfs_devvp, _DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
815 }
816
817 /* reset retval == 0. we don't care about errors in volname conversion */
818 retval = 0;
819
820 /*
821 * pull in the volume UUID while we are still single-threaded.
822 * This brings the volume UUID into the cached one dangling off of the HFSMP
823 * Otherwise it would have to be computed on first access.
824 */
825 uuid_t throwaway;
826 hfs_getvoluuid (hfsmp, throwaway);
827
828 /*
829 * We now always initiate a full bitmap scan even if the volume is read-only because this is
830 * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
831 * expects. TRIMs will not be delivered to the underlying media if the volume is not
832 * read-write though.
833 */
834 hfsmp->scan_var = 0;
835
836 /*
837 * We have to ensure if we can proceed to scan the bitmap allocation
838 * file asynchronously. If the catalog file is fragmented such that it
839 * has overflow extents and the volume needs journal transaction we
840 * cannot scan the bitmap asynchronously. Doing so will cause the mount
841 * thread to block at journal transaction on bitmap lock, while scan
842 * thread which hold the bitmap lock exclusively performs disk I/O to
843 * issue TRIMS to unallocated ranges and build summary table. The
844 * amount of time the mount thread is blocked depends on the size of
845 * the volume, type of disk, etc. This blocking can cause the watchdog
846 * timer to timeout resulting in panic. Thus to ensure we don't timeout
847 * watchdog in such cases we scan the bitmap synchronously.
848 *
849 * Please NOTE: Currently this timeout only seem to happen for non SSD
850 * drives. Possibly reading a big fragmented allocation file to
851 * construct the summary table takes enough time to timeout watchdog.
852 * Thus we check if we need to scan the bitmap synchronously only if
853 * the disk is not SSD.
854 */
855 async_bitmap_scan = true;
856 if (!ISSET(hfsmp->hfs_flags, HFS_SSD) && hfsmp->hfs_catalog_cp) {
857 bool catalog_has_overflow_extents;
858 bool journal_transaction_needed;
859
860 catalog_has_overflow_extents = false;
861 if ((hfsmp->hfs_catalog_vp != NULL) &&
862 (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
863 catalog_has_overflow_extents = true;
864 }
865
866 journal_transaction_needed = false;
867 if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) &&
868 (hfsmp->hfs_flags & HFS_READ_ONLY))) {
869 journal_transaction_needed = true;
870 }
871
872 if (catalog_has_overflow_extents && journal_transaction_needed)
873 async_bitmap_scan = false;
874 }
875
876 if (async_bitmap_scan) {
877 thread_t allocator_scanner;
878
879 /* Take the HFS mount mutex and wait on scan_var */
880 hfs_lock_mount (hfsmp);
881
882
883 /*
884 * Scan the bitmap asynchronously.
885 */
886 kernel_thread_start ((thread_continue_t)hfs_scan_blocks, hfsmp,
887 &allocator_scanner);
888
889 /*
890 * Wait until it registers that it's got the appropriate locks
891 * (or that it is finished).
892 */
893 while ((hfsmp->scan_var & (HFS_ALLOCATOR_SCAN_INFLIGHT|
894 HFS_ALLOCATOR_SCAN_COMPLETED)) == 0) {
895 msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD,
896 "hfs_scan_blocks", 0);
897 }
898
899 hfs_unlock_mount(hfsmp);
900
901 thread_deallocate (allocator_scanner);
902 } else {
903
904 /*
905 * Initialize the summary table and then scan the bitmap
906 * synchronously. Since we are scanning the bitmap
907 * synchronously we don't need to hold the bitmap lock.
908 */
909 if (hfs_init_summary (hfsmp)) {
910 printf ("hfs: could not initialize summary table for "
911 "%s\n", hfsmp->vcbVN);
912 }
913
914 (void)ScanUnmapBlocks (hfsmp);
915
916 /*
917 * We need to set that the allocator scan is completed because
918 * hot file clustering waits for this condition later.
919 */
920 hfsmp->scan_var |= HFS_ALLOCATOR_SCAN_COMPLETED;
921 buf_invalidateblks (hfsmp->hfs_allocation_vp, 0, 0, 0);
922 }
923
924 /* mark the volume dirty (clear clean unmount bit) */
925 vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
926 if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
927 hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
928 }
929
930 /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
931 if ((hfsmp->hfs_flags & HFS_X) != 0) {
932 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
933 }
934
935 //
936 // Check if we need to do late journal initialization. This only
937 // happens if a previous version of MacOS X (or 9) touched the disk.
938 // In that case hfs_late_journal_init() will go re-locate the journal
939 // and journal_info_block files and validate that they're still kosher.
940 //
941 if ( (vcb->vcbAtrb & kHFSVolumeJournaledMask)
942 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
943 && (hfsmp->jnl == NULL)) {
944
945 retval = hfs_late_journal_init(hfsmp, vhp, args);
946 if (retval != 0) {
947 if (retval == EROFS) {
948 // EROFS is a special error code that means the volume has an external
949 // journal which we couldn't find. in that case we do not want to
950 // rewrite the volume header - we'll just refuse to mount the volume.
951 if (HFS_MOUNT_DEBUG) {
952 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
953 }
954 retval = EINVAL;
955 goto ErrorExit;
956 }
957
958 hfsmp->jnl = NULL;
959
960 // if the journal failed to open, then set the lastMountedVersion
961 // to be "FSK!" which fsck_hfs will see and force the fsck instead
962 // of just bailing out because the volume is journaled.
963 if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
964 HFSPlusVolumeHeader *jvhp;
965 daddr64_t mdb_offset;
966 struct buf *bp = NULL;
967
968 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
969
970 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
971
972 bp = NULL;
973 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
974 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
975 hfsmp->hfs_physical_block_size, cred, &bp);
976 if (retval == 0) {
977 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
978
979 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
980 printf ("hfs(3): Journal replay fail. Writing lastMountVersion as FSK!\n");
981 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
982 buf_bwrite(bp);
983 } else {
984 buf_brelse(bp);
985 }
986 bp = NULL;
987 } else if (bp) {
988 buf_brelse(bp);
989 // clear this so the error exit path won't try to use it
990 bp = NULL;
991 }
992 }
993
994 if (HFS_MOUNT_DEBUG) {
995 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
996 }
997 retval = EINVAL;
998 goto ErrorExit;
999 } else if (hfsmp->jnl) {
1000 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1001 }
1002 } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
1003 struct cat_attr jinfo_attr, jnl_attr;
1004
1005 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1006 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
1007 }
1008
1009 // if we're here we need to fill in the fileid's for the
1010 // journal and journal_info_block.
1011 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
1012 hfsmp->hfs_jnlfileid = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
1013 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
1014 printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
1015 printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
1016 }
1017
1018 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1019 vcb->vcbAtrb |= kHFSVolumeJournaledMask;
1020 }
1021
1022 if (hfsmp->jnl == NULL) {
1023 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
1024 }
1025 }
1026
1027 if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected
1028 {
1029 MarkVCBDirty( vcb ); // mark VCB dirty so it will be written
1030 }
1031
1032 if (hfsmp->hfs_flags & HFS_CS_METADATA_PIN) {
1033 hfs_pin_fs_metadata(hfsmp);
1034 }
1035 /*
1036 * Distinguish 3 potential cases involving content protection:
1037 * 1. mount point bit set; vcbAtrb does not support it. Fail.
1038 * 2. mount point bit set; vcbattrb supports it. we're good.
1039 * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
1040 */
1041 if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
1042 /* Does the mount point support it ? */
1043 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
1044 /* Case 1 above */
1045 retval = EINVAL;
1046 goto ErrorExit;
1047 }
1048 }
1049 else {
1050 /* not requested in the mount point. Is it in FS? */
1051 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
1052 /* Case 3 above */
1053 vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
1054 }
1055 }
1056
1057 /* At this point, if the mount point flag is set, we can enable it. */
1058 if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
1059 /* Cases 2+3 above */
1060 #if CONFIG_PROTECT
1061 /* Get the EAs as needed. */
1062 int cperr = 0;
1063 struct cp_root_xattr *xattr = NULL;
1064 xattr = hfs_malloc(sizeof(*xattr));
1065
1066 /* go get the EA to get the version information */
1067 cperr = cp_getrootxattr (hfsmp, xattr);
1068 /*
1069 * If there was no EA there, then write one out.
1070 * Assuming EA is not present on the root means
1071 * this is an erase install or a very old FS
1072 */
1073
1074 if (cperr == 0) {
1075 /* Have to run a valid CP version. */
1076 if (!cp_is_supported_version(xattr->major_version)) {
1077 cperr = EINVAL;
1078 }
1079 }
1080 else if (cperr == ENOATTR) {
1081 printf("No root EA set, creating new EA with new version: %d\n", CP_CURRENT_VERS);
1082 bzero(xattr, sizeof(struct cp_root_xattr));
1083 xattr->major_version = CP_CURRENT_VERS;
1084 xattr->minor_version = CP_MINOR_VERS;
1085 cperr = cp_setrootxattr (hfsmp, xattr);
1086 }
1087
1088 if (cperr) {
1089 hfs_free(xattr, sizeof(*xattr));
1090 retval = EPERM;
1091 goto ErrorExit;
1092 }
1093
1094 /* If we got here, then the CP version is valid. Set it in the mount point */
1095 hfsmp->hfs_running_cp_major_vers = xattr->major_version;
1096 printf("Running with CP root xattr: %d.%d\n", xattr->major_version, xattr->minor_version);
1097 hfsmp->cproot_flags = xattr->flags;
1098 hfsmp->cp_crypto_generation = ISSET(xattr->flags, CP_ROOT_CRYPTOG1) ? 1 : 0;
1099 #if HFS_CONFIG_KEY_ROLL
1100 hfsmp->hfs_auto_roll_min_key_os_version = xattr->auto_roll_min_version;
1101 hfsmp->hfs_auto_roll_max_key_os_version = xattr->auto_roll_max_version;
1102 #endif
1103
1104 hfs_free(xattr, sizeof(*xattr));
1105
1106 /*
1107 * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
1108 * Ensure that the boot-arg's value is valid for FILES (not directories),
1109 * since only files are actually protected for now.
1110 */
1111
1112 PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1113
1114 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1115 PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
1116 }
1117
1118 #if HFS_TMPDBG
1119 #if !SECURE_KERNEL
1120 PE_parse_boot_argn("aks_verbose", &hfsmp->hfs_cp_verbose, sizeof(hfsmp->hfs_cp_verbose));
1121 #endif
1122 #endif
1123
1124 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
1125 hfsmp->default_cp_class = PROTECTION_CLASS_C;
1126 }
1127
1128 #else
1129 /* If CONFIG_PROTECT not built, ignore CP */
1130 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1131 #endif
1132 }
1133
1134 /*
1135 * Establish a metadata allocation zone.
1136 */
1137 hfs_metadatazone_init(hfsmp, false);
1138
1139 /*
1140 * Make any metadata zone adjustments.
1141 */
1142 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1143 /* Keep the roving allocator out of the metadata zone. */
1144 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1145 vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1146 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1147 }
1148 } else {
1149 if (vcb->nextAllocation <= 1) {
1150 vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1151 }
1152 }
1153 vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1154
1155 /* Setup private/hidden directories for hardlinks. */
1156 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1157 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1158
1159 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1160 hfs_remove_orphans(hfsmp);
1161
1162 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1163 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1164 {
1165 retval = hfs_erase_unused_nodes(hfsmp);
1166 if (retval) {
1167 if (HFS_MOUNT_DEBUG) {
1168 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1169 }
1170
1171 goto ErrorExit;
1172 }
1173 }
1174
1175 /*
1176 * Allow hot file clustering if conditions allow.
1177 */
1178 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) && !(hfsmp->hfs_flags & HFS_READ_ONLY) &&
1179 ((hfsmp->hfs_flags & HFS_SSD) == 0 || (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN))) {
1180 //
1181 // Wait until the bitmap scan completes before we initializes the
1182 // hotfile area so that we do not run into any issues with the
1183 // bitmap being read while hotfiles is initializing itself. On
1184 // some older/slower machines, without this interlock, the bitmap
1185 // would sometimes get corrupted at boot time.
1186 //
1187 hfs_lock_mount(hfsmp);
1188 while(!(hfsmp->scan_var & HFS_ALLOCATOR_SCAN_COMPLETED)) {
1189 (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, PINOD, "hfs_hotfile_bitmap_interlock", 0);
1190 }
1191 hfs_unlock_mount(hfsmp);
1192
1193 /*
1194 * Note: at this point we are not allowed to fail the
1195 * mount operation because the HotFile init code
1196 * in hfs_recording_init() will lookup vnodes with
1197 * VNOP_LOOKUP() which hangs vnodes off the mount
1198 * (and if we were to fail, VFS is not prepared to
1199 * clean that up at this point. Since HotFiles are
1200 * optional, this is not a big deal.
1201 */
1202 (void) hfs_recording_init(hfsmp);
1203 }
1204
1205 /* Force ACLs on HFS+ file systems. */
1206 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1207
1208 /* Enable extent-based extended attributes by default */
1209 hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1210
1211 return (0);
1212
1213 ErrorExit:
1214 /*
1215 * A fatal error occurred and the volume cannot be mounted, so
1216 * release any resources that we acquired...
1217 */
1218 hfsUnmount(hfsmp, NULL);
1219
1220 if (HFS_MOUNT_DEBUG) {
1221 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1222 }
1223 return (retval);
1224 }
1225
1226 static int
1227 _pin_metafile(struct hfsmount *hfsmp, vnode_t vp)
1228 {
1229 int err;
1230
1231 err = hfs_lock(VTOC(vp), HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
1232 if (err == 0) {
1233 err = hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT, NULL);
1234 hfs_unlock(VTOC(vp));
1235 }
1236
1237 return err;
1238 }
1239
1240 void
1241 hfs_pin_fs_metadata(struct hfsmount *hfsmp)
1242 {
1243 ExtendedVCB *vcb;
1244 int err;
1245
1246 vcb = HFSTOVCB(hfsmp);
1247
1248 err = _pin_metafile(hfsmp, hfsmp->hfs_extents_vp);
1249 if (err != 0) {
1250 printf("hfs: failed to pin extents overflow file %d\n", err);
1251 }
1252 err = _pin_metafile(hfsmp, hfsmp->hfs_catalog_vp);
1253 if (err != 0) {
1254 printf("hfs: failed to pin catalog file %d\n", err);
1255 }
1256 err = _pin_metafile(hfsmp, hfsmp->hfs_allocation_vp);
1257 if (err != 0) {
1258 printf("hfs: failed to pin bitmap file %d\n", err);
1259 }
1260 err = _pin_metafile(hfsmp, hfsmp->hfs_attribute_vp);
1261 if (err != 0) {
1262 printf("hfs: failed to pin extended attr file %d\n", err);
1263 }
1264
1265 hfs_pin_block_range(hfsmp, HFS_PIN_IT, 0, 1);
1266 hfs_pin_block_range(hfsmp, HFS_PIN_IT, vcb->totalBlocks-1, 1);
1267
1268 if (vfs_flags(hfsmp->hfs_mp) & MNT_JOURNALED) {
1269 // and hey, if we've got a journal, let's pin that too!
1270 hfs_pin_block_range(hfsmp, HFS_PIN_IT, hfsmp->jnl_start, howmany(hfsmp->jnl_size, vcb->blockSize));
1271 }
1272 }
1273
1274 /*
1275 * ReleaseMetaFileVNode
1276 *
1277 * vp L - -
1278 */
1279 static void ReleaseMetaFileVNode(struct vnode *vp)
1280 {
1281 struct filefork *fp;
1282
1283 if (vp && (fp = VTOF(vp))) {
1284 if (fp->fcbBTCBPtr != NULL) {
1285 (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1286 (void) BTClosePath(fp);
1287 hfs_unlock(VTOC(vp));
1288 }
1289
1290 /* release the node even if BTClosePath fails */
1291 vnode_recycle(vp);
1292 vnode_put(vp);
1293 }
1294 }
1295
1296
1297 /*************************************************************
1298 *
1299 * Unmounts a hfs volume.
1300 * At this point vflush() has been called (to dump all non-metadata files)
1301 *
1302 *************************************************************/
1303
1304 int
1305 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1306 {
1307 /* Get rid of our attribute data vnode (if any). This is done
1308 * after the vflush() during mount, so we don't need to worry
1309 * about any locks.
1310 */
1311 if (hfsmp->hfs_attrdata_vp) {
1312 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1313 hfsmp->hfs_attrdata_vp = NULLVP;
1314 }
1315
1316 if (hfsmp->hfs_startup_vp) {
1317 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1318 hfsmp->hfs_startup_cp = NULL;
1319 hfsmp->hfs_startup_vp = NULL;
1320 }
1321
1322 if (hfsmp->hfs_attribute_vp) {
1323 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1324 hfsmp->hfs_attribute_cp = NULL;
1325 hfsmp->hfs_attribute_vp = NULL;
1326 }
1327
1328 if (hfsmp->hfs_catalog_vp) {
1329 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1330 hfsmp->hfs_catalog_cp = NULL;
1331 hfsmp->hfs_catalog_vp = NULL;
1332 }
1333
1334 if (hfsmp->hfs_extents_vp) {
1335 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1336 hfsmp->hfs_extents_cp = NULL;
1337 hfsmp->hfs_extents_vp = NULL;
1338 }
1339
1340 if (hfsmp->hfs_allocation_vp) {
1341 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1342 hfsmp->hfs_allocation_cp = NULL;
1343 hfsmp->hfs_allocation_vp = NULL;
1344 }
1345
1346 return (0);
1347 }
1348
1349
1350 /*
1351 * Test if fork has overflow extents.
1352 *
1353 * Returns:
1354 * non-zero - overflow extents exist
1355 * zero - overflow extents do not exist
1356 */
1357 bool overflow_extents(struct filefork *fp)
1358 {
1359 u_int32_t blocks;
1360
1361 //
1362 // If the vnode pointer is NULL then we're being called
1363 // from hfs_remove_orphans() with a faked-up filefork
1364 // and therefore it has to be an HFS+ volume. Otherwise
1365 // we check through the volume header to see what type
1366 // of volume we're on.
1367 //
1368
1369 #if CONFIG_HFS_STD
1370 if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1371 if (fp->ff_extents[2].blockCount == 0)
1372 return false;
1373
1374 blocks = fp->ff_extents[0].blockCount +
1375 fp->ff_extents[1].blockCount +
1376 fp->ff_extents[2].blockCount;
1377
1378 return fp->ff_blocks > blocks;
1379 }
1380 #endif
1381
1382 if (fp->ff_extents[7].blockCount == 0)
1383 return false;
1384
1385 blocks = fp->ff_extents[0].blockCount +
1386 fp->ff_extents[1].blockCount +
1387 fp->ff_extents[2].blockCount +
1388 fp->ff_extents[3].blockCount +
1389 fp->ff_extents[4].blockCount +
1390 fp->ff_extents[5].blockCount +
1391 fp->ff_extents[6].blockCount +
1392 fp->ff_extents[7].blockCount;
1393
1394 return fp->ff_blocks > blocks;
1395 }
1396
1397 static __attribute__((pure))
1398 boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1399 {
1400 return (hfsmp->hfs_freeze_state == HFS_FROZEN
1401 || (hfsmp->hfs_freeze_state == HFS_FREEZING
1402 && current_thread() != hfsmp->hfs_freezing_thread));
1403 }
1404
1405 /*
1406 * Lock the HFS global journal lock
1407 */
1408 int
1409 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1410 {
1411 thread_t thread = current_thread();
1412
1413 if (hfsmp->hfs_global_lockowner == thread) {
1414 panic ("hfs_lock_global: locking against myself!");
1415 }
1416
1417 /*
1418 * This check isn't really necessary but this stops us taking
1419 * the mount lock in most cases. The essential check is below.
1420 */
1421 if (hfs_is_frozen(hfsmp)) {
1422 /*
1423 * Unfortunately, there is no easy way of getting a notification
1424 * for when a process is exiting and it's possible for the exiting
1425 * process to get blocked somewhere else. To catch this, we
1426 * periodically monitor the frozen process here and thaw if
1427 * we spot that it's exiting.
1428 */
1429 frozen:
1430 hfs_lock_mount(hfsmp);
1431
1432 struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1433
1434 while (hfs_is_frozen(hfsmp)) {
1435 if (hfsmp->hfs_freeze_state == HFS_FROZEN
1436 && proc_exiting(hfsmp->hfs_freezing_proc)) {
1437 hfs_thaw_locked(hfsmp);
1438 break;
1439 }
1440
1441 msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1442 PWAIT, "hfs_lock_global (frozen)", &ts);
1443 }
1444 hfs_unlock_mount(hfsmp);
1445 }
1446
1447 /* HFS_SHARED_LOCK */
1448 if (locktype == HFS_SHARED_LOCK) {
1449 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1450 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1451 }
1452 /* HFS_EXCLUSIVE_LOCK */
1453 else {
1454 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1455 hfsmp->hfs_global_lockowner = thread;
1456 }
1457
1458 /*
1459 * We have to check if we're frozen again because of the time
1460 * between when we checked and when we took the global lock.
1461 */
1462 if (hfs_is_frozen(hfsmp)) {
1463 hfs_unlock_global(hfsmp);
1464 goto frozen;
1465 }
1466
1467 return 0;
1468 }
1469
1470
1471 /*
1472 * Unlock the HFS global journal lock
1473 */
1474 void
1475 hfs_unlock_global (struct hfsmount *hfsmp)
1476 {
1477 thread_t thread = current_thread();
1478
1479 /* HFS_LOCK_EXCLUSIVE */
1480 if (hfsmp->hfs_global_lockowner == thread) {
1481 hfsmp->hfs_global_lockowner = NULL;
1482 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1483 }
1484 /* HFS_LOCK_SHARED */
1485 else {
1486 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1487 }
1488 }
1489
1490 /*
1491 * Lock the HFS mount lock
1492 *
1493 * Note: this is a mutex, not a rw lock!
1494 */
1495 inline
1496 void hfs_lock_mount (struct hfsmount *hfsmp) {
1497 lck_mtx_lock (&(hfsmp->hfs_mutex));
1498 }
1499
1500 /*
1501 * Unlock the HFS mount lock
1502 *
1503 * Note: this is a mutex, not a rw lock!
1504 */
1505 inline
1506 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1507 lck_mtx_unlock (&(hfsmp->hfs_mutex));
1508 }
1509
1510 /*
1511 * Lock HFS system file(s).
1512 *
1513 * This function accepts a @flags parameter which indicates which
1514 * system file locks are required. The value it returns should be
1515 * used in a subsequent call to hfs_systemfile_unlock. The caller
1516 * should treat this value as opaque; it may or may not have a
1517 * relation to the @flags field that is passed in. The *only*
1518 * guarantee that we make is that a value of zero means that no locks
1519 * were taken and that there is no need to call hfs_systemfile_unlock
1520 * (although it is harmless to do so). Recursion is supported but
1521 * care must still be taken to ensure correct lock ordering. Note
1522 * that requests for certain locks may cause other locks to also be
1523 * taken, including locks that are not possible to ask for via the
1524 * @flags parameter.
1525 */
1526 int
1527 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1528 {
1529 /*
1530 * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1531 */
1532 if (flags & SFL_CATALOG) {
1533 if (hfsmp->hfs_catalog_cp
1534 && hfsmp->hfs_catalog_cp->c_lockowner != current_thread()) {
1535 #ifdef HFS_CHECK_LOCK_ORDER
1536 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1537 panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1538 }
1539 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1540 panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1541 }
1542 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1543 panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1544 }
1545 #endif /* HFS_CHECK_LOCK_ORDER */
1546
1547 (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1548 /*
1549 * When the catalog file has overflow extents then
1550 * also acquire the extents b-tree lock if its not
1551 * already requested.
1552 */
1553 if (((flags & SFL_EXTENTS) == 0) &&
1554 (hfsmp->hfs_catalog_vp != NULL) &&
1555 (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1556 flags |= SFL_EXTENTS;
1557 }
1558 } else {
1559 flags &= ~SFL_CATALOG;
1560 }
1561 }
1562
1563 if (flags & SFL_ATTRIBUTE) {
1564 if (hfsmp->hfs_attribute_cp
1565 && hfsmp->hfs_attribute_cp->c_lockowner != current_thread()) {
1566 #ifdef HFS_CHECK_LOCK_ORDER
1567 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1568 panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1569 }
1570 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1571 panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1572 }
1573 #endif /* HFS_CHECK_LOCK_ORDER */
1574
1575 (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1576 /*
1577 * When the attribute file has overflow extents then
1578 * also acquire the extents b-tree lock if its not
1579 * already requested.
1580 */
1581 if (((flags & SFL_EXTENTS) == 0) &&
1582 (hfsmp->hfs_attribute_vp != NULL) &&
1583 (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1584 flags |= SFL_EXTENTS;
1585 }
1586 } else {
1587 flags &= ~SFL_ATTRIBUTE;
1588 }
1589 }
1590
1591 if (flags & SFL_STARTUP) {
1592 if (hfsmp->hfs_startup_cp
1593 && hfsmp->hfs_startup_cp->c_lockowner != current_thread()) {
1594 #ifdef HFS_CHECK_LOCK_ORDER
1595 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1596 panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1597 }
1598 #endif /* HFS_CHECK_LOCK_ORDER */
1599
1600 (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1601 /*
1602 * When the startup file has overflow extents then
1603 * also acquire the extents b-tree lock if its not
1604 * already requested.
1605 */
1606 if (((flags & SFL_EXTENTS) == 0) &&
1607 (hfsmp->hfs_startup_vp != NULL) &&
1608 (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1609 flags |= SFL_EXTENTS;
1610 }
1611 } else {
1612 flags &= ~SFL_STARTUP;
1613 }
1614 }
1615
1616 /*
1617 * To prevent locks being taken in the wrong order, the extent lock
1618 * gets a bitmap lock as well.
1619 */
1620 if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1621 if (hfsmp->hfs_allocation_cp) {
1622 (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1623 /*
1624 * The bitmap lock is also grabbed when only extent lock
1625 * was requested. Set the bitmap lock bit in the lock
1626 * flags which callers will use during unlock.
1627 */
1628 flags |= SFL_BITMAP;
1629 } else {
1630 flags &= ~SFL_BITMAP;
1631 }
1632 }
1633
1634 if (flags & SFL_EXTENTS) {
1635 /*
1636 * Since the extents btree lock is recursive we always
1637 * need exclusive access.
1638 */
1639 if (hfsmp->hfs_extents_cp) {
1640 (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1641
1642 if (vfs_isswapmount(hfsmp->hfs_mp)) {
1643 /*
1644 * because we may need this lock on the pageout path (if a swapfile allocation
1645 * spills into the extents overflow tree), we will grant the holder of this
1646 * lock the privilege of dipping into the reserve free pool in order to prevent
1647 * a deadlock from occurring if we need those pageouts to complete before we
1648 * will make any new pages available on the free list... the deadlock can occur
1649 * if this thread needs to allocate memory while this lock is held
1650 */
1651 if (set_vm_privilege(TRUE) == FALSE) {
1652 /*
1653 * indicate that we need to drop vm_privilege
1654 * when we unlock
1655 */
1656 flags |= SFL_VM_PRIV;
1657 }
1658 }
1659 } else {
1660 flags &= ~SFL_EXTENTS;
1661 }
1662 }
1663
1664 return (flags);
1665 }
1666
1667 /*
1668 * unlock HFS system file(s).
1669 */
1670 void
1671 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1672 {
1673 if (!flags)
1674 return;
1675
1676 struct timeval tv;
1677 u_int32_t lastfsync;
1678 int numOfLockedBuffs;
1679
1680 if (hfsmp->jnl == NULL) {
1681 microuptime(&tv);
1682 lastfsync = tv.tv_sec;
1683 }
1684 if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1685 hfs_unlock(hfsmp->hfs_startup_cp);
1686 }
1687 if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1688 if (hfsmp->jnl == NULL) {
1689 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1690 numOfLockedBuffs = count_lock_queue();
1691 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1692 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1693 kMaxSecsForFsync))) {
1694 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1695 }
1696 }
1697 hfs_unlock(hfsmp->hfs_attribute_cp);
1698 }
1699 if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1700 if (hfsmp->jnl == NULL) {
1701 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1702 numOfLockedBuffs = count_lock_queue();
1703 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1704 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1705 kMaxSecsForFsync))) {
1706 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1707 }
1708 }
1709 hfs_unlock(hfsmp->hfs_catalog_cp);
1710 }
1711 if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1712 hfs_unlock(hfsmp->hfs_allocation_cp);
1713 }
1714 if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1715 if (hfsmp->jnl == NULL) {
1716 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1717 numOfLockedBuffs = count_lock_queue();
1718 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1719 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1720 kMaxSecsForFsync))) {
1721 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1722 }
1723 }
1724 hfs_unlock(hfsmp->hfs_extents_cp);
1725
1726 if (flags & SFL_VM_PRIV) {
1727 /*
1728 * revoke the vm_privilege we granted this thread
1729 * now that we have unlocked the overflow extents
1730 */
1731 set_vm_privilege(FALSE);
1732 }
1733 }
1734 }
1735
1736
1737 /*
1738 * RequireFileLock
1739 *
1740 * Check to see if a vnode is locked in the current context
1741 * This is to be used for debugging purposes only!!
1742 */
1743 #if DEBUG
1744 void RequireFileLock(FileReference vp, int shareable)
1745 {
1746 int locked;
1747
1748 /* The extents btree and allocation bitmap are always exclusive. */
1749 if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1750 VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1751 shareable = 0;
1752 }
1753
1754 locked = VTOC(vp)->c_lockowner == current_thread();
1755
1756 if (!locked && !shareable) {
1757 switch (VTOC(vp)->c_fileid) {
1758 case kHFSExtentsFileID:
1759 panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1760 break;
1761 case kHFSCatalogFileID:
1762 panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1763 break;
1764 case kHFSAllocationFileID:
1765 /* The allocation file can hide behind the jornal lock. */
1766 if (VTOHFS(vp)->jnl == NULL)
1767 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1768 break;
1769 case kHFSStartupFileID:
1770 panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1771 case kHFSAttributesFileID:
1772 panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1773 break;
1774 }
1775 }
1776 }
1777 #endif // DEBUG
1778
1779
1780 /*
1781 * There are three ways to qualify for ownership rights on an object:
1782 *
1783 * 1. (a) Your UID matches the cnode's UID.
1784 * (b) The object in question is owned by "unknown"
1785 * 2. (a) Permissions on the filesystem are being ignored and
1786 * your UID matches the replacement UID.
1787 * (b) Permissions on the filesystem are being ignored and
1788 * the replacement UID is "unknown".
1789 * 3. You are root.
1790 *
1791 */
1792 int
1793 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1794 __unused struct proc *p, int invokesuperuserstatus)
1795 {
1796 if ((kauth_cred_getuid(cred) == cnode_uid) || /* [1a] */
1797 (cnode_uid == UNKNOWNUID) || /* [1b] */
1798 ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) && /* [2] */
1799 ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) || /* [2a] */
1800 (hfsmp->hfs_uid == UNKNOWNUID))) || /* [2b] */
1801 (invokesuperuserstatus && (suser(cred, 0) == 0))) { /* [3] */
1802 return (0);
1803 } else {
1804 return (EPERM);
1805 }
1806 }
1807
1808
1809 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1810 u_int32_t blockSizeLimit,
1811 u_int32_t baseMultiple) {
1812 /*
1813 Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1814 specified limit but still an even multiple of the baseMultiple.
1815 */
1816 int baseBlockCount, blockCount;
1817 u_int32_t trialBlockSize;
1818
1819 if (allocationBlockSize % baseMultiple != 0) {
1820 /*
1821 Whoops: the allocation blocks aren't even multiples of the specified base:
1822 no amount of dividing them into even parts will be a multiple, either then!
1823 */
1824 return 512; /* Hope for the best */
1825 };
1826
1827 /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1828 from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1829 Even though the former (the result of the loop below) is the larger allocation
1830 block size, the latter is more efficient: */
1831 if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1832
1833 /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1834 baseBlockCount = allocationBlockSize / baseMultiple; /* Now guaranteed to be an even multiple */
1835
1836 for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1837 trialBlockSize = blockCount * baseMultiple;
1838 if (allocationBlockSize % trialBlockSize == 0) { /* An even multiple? */
1839 if ((trialBlockSize <= blockSizeLimit) &&
1840 (trialBlockSize % baseMultiple == 0)) {
1841 return trialBlockSize;
1842 };
1843 };
1844 };
1845
1846 /* Note: we should never get here, since blockCount = 1 should always work,
1847 but this is nice and safe and makes the compiler happy, too ... */
1848 return 512;
1849 }
1850
1851
1852 u_int32_t
1853 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1854 struct cat_attr *fattr, struct cat_fork *forkinfo)
1855 {
1856 struct hfsmount * hfsmp;
1857 struct cat_desc jdesc;
1858 int lockflags;
1859 int error;
1860
1861 if (vcb->vcbSigWord != kHFSPlusSigWord)
1862 return (0);
1863
1864 hfsmp = VCBTOHFS(vcb);
1865
1866 memset(&jdesc, 0, sizeof(struct cat_desc));
1867 jdesc.cd_parentcnid = kRootDirID;
1868 jdesc.cd_nameptr = (const u_int8_t *)name;
1869 jdesc.cd_namelen = strlen(name);
1870
1871 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1872 error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1873 hfs_systemfile_unlock(hfsmp, lockflags);
1874
1875 if (error == 0) {
1876 return (fattr->ca_fileid);
1877 } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1878 return (0);
1879 }
1880
1881 return (0); /* XXX what callers expect on an error */
1882 }
1883
1884
1885 /*
1886 * On HFS Plus Volumes, there can be orphaned files or directories
1887 * These are files or directories that were unlinked while busy.
1888 * If the volume was not cleanly unmounted then some of these may
1889 * have persisted and need to be removed.
1890 */
1891 void
1892 hfs_remove_orphans(struct hfsmount * hfsmp)
1893 {
1894 struct BTreeIterator * iterator = NULL;
1895 struct FSBufferDescriptor btdata;
1896 struct HFSPlusCatalogFile filerec;
1897 struct HFSPlusCatalogKey * keyp;
1898 struct proc *p = current_proc();
1899 FCB *fcb;
1900 ExtendedVCB *vcb;
1901 char filename[32];
1902 char tempname[32];
1903 size_t namelen;
1904 cat_cookie_t cookie;
1905 int catlock = 0;
1906 int catreserve = 0;
1907 bool started_tr = false;
1908 int lockflags;
1909 int result;
1910 int orphaned_files = 0;
1911 int orphaned_dirs = 0;
1912
1913 bzero(&cookie, sizeof(cookie));
1914
1915 if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1916 return;
1917
1918 vcb = HFSTOVCB(hfsmp);
1919 fcb = VTOF(hfsmp->hfs_catalog_vp);
1920
1921 btdata.bufferAddress = &filerec;
1922 btdata.itemSize = sizeof(filerec);
1923 btdata.itemCount = 1;
1924
1925 iterator = hfs_mallocz(sizeof(*iterator));
1926
1927 /* Build a key to "temp" */
1928 keyp = (HFSPlusCatalogKey*)&iterator->key;
1929 keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1930 keyp->nodeName.length = 4; /* "temp" */
1931 keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1932 keyp->nodeName.unicode[0] = 't';
1933 keyp->nodeName.unicode[1] = 'e';
1934 keyp->nodeName.unicode[2] = 'm';
1935 keyp->nodeName.unicode[3] = 'p';
1936
1937 /*
1938 * Position the iterator just before the first real temp file/dir.
1939 */
1940 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1941 (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1942 hfs_systemfile_unlock(hfsmp, lockflags);
1943
1944 /* Visit all the temp files/dirs in the HFS+ private directory. */
1945 for (;;) {
1946 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1947 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1948 hfs_systemfile_unlock(hfsmp, lockflags);
1949 if (result)
1950 break;
1951 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1952 break;
1953
1954 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1955 (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1956
1957 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1958 HFS_DELETE_PREFIX, filerec.fileID);
1959
1960 /*
1961 * Delete all files (and directories) named "tempxxx",
1962 * where xxx is the file's cnid in decimal.
1963 *
1964 */
1965 if (bcmp(tempname, filename, namelen + 1) != 0)
1966 continue;
1967
1968 struct filefork dfork;
1969 struct filefork rfork;
1970 struct cnode cnode;
1971 int mode = 0;
1972
1973 bzero(&dfork, sizeof(dfork));
1974 bzero(&rfork, sizeof(rfork));
1975 bzero(&cnode, sizeof(cnode));
1976
1977 if (hfs_start_transaction(hfsmp) != 0) {
1978 printf("hfs_remove_orphans: failed to start transaction\n");
1979 goto exit;
1980 }
1981 started_tr = true;
1982
1983 /*
1984 * Reserve some space in the Catalog file.
1985 */
1986 if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1987 printf("hfs_remove_orphans: cat_preflight failed\n");
1988 goto exit;
1989 }
1990 catreserve = 1;
1991
1992 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1993 catlock = 1;
1994
1995 /* Build a fake cnode */
1996 cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1997 &dfork.ff_data, &rfork.ff_data);
1998 cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1999 cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
2000 cnode.c_desc.cd_namelen = namelen;
2001 cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
2002 cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
2003
2004 /* Position iterator at previous entry */
2005 if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
2006 NULL, NULL) != 0) {
2007 break;
2008 }
2009
2010 /* Truncate the file to zero (both forks) */
2011 if (dfork.ff_blocks > 0) {
2012 u_int64_t fsize;
2013
2014 dfork.ff_cp = &cnode;
2015 cnode.c_datafork = &dfork;
2016 cnode.c_rsrcfork = NULL;
2017 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
2018 while (fsize > 0) {
2019 if (fsize > HFS_BIGFILE_SIZE) {
2020 fsize -= HFS_BIGFILE_SIZE;
2021 } else {
2022 fsize = 0;
2023 }
2024
2025 if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
2026 cnode.c_attr.ca_fileid, false) != 0) {
2027 printf("hfs: error truncating data fork!\n");
2028 break;
2029 }
2030
2031 //
2032 // if we're iteratively truncating this file down,
2033 // then end the transaction and start a new one so
2034 // that no one transaction gets too big.
2035 //
2036 if (fsize > 0) {
2037 /* Drop system file locks before starting
2038 * another transaction to preserve lock order.
2039 */
2040 hfs_systemfile_unlock(hfsmp, lockflags);
2041 catlock = 0;
2042 hfs_end_transaction(hfsmp);
2043
2044 if (hfs_start_transaction(hfsmp) != 0) {
2045 started_tr = false;
2046 goto exit;
2047 }
2048 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2049 catlock = 1;
2050 }
2051 }
2052 }
2053
2054 if (rfork.ff_blocks > 0) {
2055 rfork.ff_cp = &cnode;
2056 cnode.c_datafork = NULL;
2057 cnode.c_rsrcfork = &rfork;
2058 if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
2059 printf("hfs: error truncating rsrc fork!\n");
2060 break;
2061 }
2062 }
2063
2064 // Deal with extended attributes
2065 if (ISSET(cnode.c_attr.ca_recflags, kHFSHasAttributesMask)) {
2066 // hfs_removeallattr uses its own transactions
2067 hfs_systemfile_unlock(hfsmp, lockflags);
2068 catlock = false;
2069 hfs_end_transaction(hfsmp);
2070
2071 hfs_removeallattr(hfsmp, cnode.c_attr.ca_fileid, &started_tr);
2072
2073 if (!started_tr) {
2074 if (hfs_start_transaction(hfsmp) != 0) {
2075 printf("hfs_remove_orphans: failed to start transaction\n");
2076 goto exit;
2077 }
2078 started_tr = true;
2079 }
2080
2081 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2082 catlock = 1;
2083 }
2084
2085 /* Remove the file or folder record from the Catalog */
2086 if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
2087 printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
2088 hfs_systemfile_unlock(hfsmp, lockflags);
2089 catlock = 0;
2090 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2091 break;
2092 }
2093
2094 mode = cnode.c_attr.ca_mode & S_IFMT;
2095
2096 if (mode == S_IFDIR) {
2097 orphaned_dirs++;
2098 }
2099 else {
2100 orphaned_files++;
2101 }
2102
2103 /* Update parent and volume counts */
2104 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
2105 if (mode == S_IFDIR) {
2106 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
2107 }
2108
2109 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
2110 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
2111
2112 /* Drop locks and end the transaction */
2113 hfs_systemfile_unlock(hfsmp, lockflags);
2114 cat_postflight(hfsmp, &cookie, p);
2115 catlock = catreserve = 0;
2116
2117 /*
2118 Now that Catalog is unlocked, update the volume info, making
2119 sure to differentiate between files and directories
2120 */
2121 if (mode == S_IFDIR) {
2122 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
2123 }
2124 else{
2125 hfs_volupdate(hfsmp, VOL_RMFILE, 0);
2126 }
2127
2128 hfs_end_transaction(hfsmp);
2129 started_tr = false;
2130 } /* end for */
2131
2132 exit:
2133
2134 if (orphaned_files > 0 || orphaned_dirs > 0)
2135 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
2136 if (catlock) {
2137 hfs_systemfile_unlock(hfsmp, lockflags);
2138 }
2139 if (catreserve) {
2140 cat_postflight(hfsmp, &cookie, p);
2141 }
2142 if (started_tr) {
2143 hfs_end_transaction(hfsmp);
2144 }
2145
2146 hfs_free(iterator, sizeof(*iterator));
2147 hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
2148 }
2149
2150
2151 /*
2152 * This will return the correct logical block size for a given vnode.
2153 * For most files, it is the allocation block size, for meta data like
2154 * BTrees, this is kept as part of the BTree private nodeSize
2155 */
2156 u_int32_t
2157 GetLogicalBlockSize(struct vnode *vp)
2158 {
2159 u_int32_t logBlockSize;
2160
2161 hfs_assert(vp != NULL);
2162
2163 /* start with default */
2164 logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
2165
2166 if (vnode_issystem(vp)) {
2167 if (VTOF(vp)->fcbBTCBPtr != NULL) {
2168 BTreeInfoRec bTreeInfo;
2169
2170 /*
2171 * We do not lock the BTrees, because if we are getting block..then the tree
2172 * should be locked in the first place.
2173 * We just want the nodeSize wich will NEVER change..so even if the world
2174 * is changing..the nodeSize should remain the same. Which argues why lock
2175 * it in the first place??
2176 */
2177
2178 (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
2179
2180 logBlockSize = bTreeInfo.nodeSize;
2181
2182 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
2183 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
2184 }
2185 }
2186
2187 hfs_assert(logBlockSize > 0);
2188
2189 return logBlockSize;
2190 }
2191
2192 #if HFS_SPARSE_DEV
2193 static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
2194 {
2195 struct vfsstatfs *vfsp; /* 272 bytes */
2196 uint64_t vfreeblks;
2197 struct timeval now;
2198
2199 hfs_lock_mount(hfsmp);
2200
2201 vnode_t backing_vp = hfsmp->hfs_backingvp;
2202 if (!backing_vp) {
2203 hfs_unlock_mount(hfsmp);
2204 return false;
2205 }
2206
2207 // usecount is not enough; we need iocount
2208 if (vnode_get(backing_vp)) {
2209 hfs_unlock_mount(hfsmp);
2210 *pfree_blks = 0;
2211 return true;
2212 }
2213
2214 uint32_t loanedblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2215 uint32_t bandblks = hfsmp->hfs_sparsebandblks;
2216 uint64_t maxblks = hfsmp->hfs_backingfs_maxblocks;
2217
2218 hfs_unlock_mount(hfsmp);
2219
2220 mount_t backingfs_mp = vnode_mount(backing_vp);
2221
2222 microtime(&now);
2223 if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
2224 vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
2225 hfsmp->hfs_last_backingstatfs = now.tv_sec;
2226 }
2227
2228 if (!(vfsp = vfs_statfs(backingfs_mp))) {
2229 vnode_put(backing_vp);
2230 return false;
2231 }
2232
2233 vfreeblks = vfsp->f_bavail;
2234 /* Normalize block count if needed. */
2235 if (vfsp->f_bsize != hfsmp->blockSize)
2236 vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2237 if (vfreeblks > bandblks)
2238 vfreeblks -= bandblks;
2239 else
2240 vfreeblks = 0;
2241
2242 /*
2243 * Take into account any delayed allocations. It is not
2244 * certain what the original reason for the "2 *" is. Most
2245 * likely it is to allow for additional requirements in the
2246 * host file system and metadata required by disk images. The
2247 * number of loaned blocks is likely to be small and we will
2248 * stop using them as we get close to the limit.
2249 */
2250 loanedblks = 2 * loanedblks;
2251 if (vfreeblks > loanedblks)
2252 vfreeblks -= loanedblks;
2253 else
2254 vfreeblks = 0;
2255
2256 if (maxblks)
2257 vfreeblks = MIN(vfreeblks, maxblks);
2258
2259 vnode_put(backing_vp);
2260
2261 *pfree_blks = vfreeblks;
2262
2263 return true;
2264 }
2265 #endif
2266
2267 u_int32_t
2268 hfs_free_cnids(struct hfsmount * hfsmp)
2269 {
2270 return HFS_MAX_FILES - hfsmp->hfs_filecount - hfsmp->hfs_dircount;
2271 }
2272
2273 u_int32_t
2274 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2275 {
2276 u_int32_t freeblks;
2277 u_int32_t rsrvblks;
2278 u_int32_t loanblks;
2279
2280 /*
2281 * We don't bother taking the mount lock
2282 * to look at these values since the values
2283 * themselves are each updated atomically
2284 * on aligned addresses.
2285 */
2286 freeblks = hfsmp->freeBlocks;
2287 rsrvblks = hfsmp->reserveBlocks;
2288 loanblks = hfsmp->loanedBlocks + hfsmp->lockedBlocks;
2289 if (wantreserve) {
2290 if (freeblks > rsrvblks)
2291 freeblks -= rsrvblks;
2292 else
2293 freeblks = 0;
2294 }
2295 if (freeblks > loanblks)
2296 freeblks -= loanblks;
2297 else
2298 freeblks = 0;
2299
2300 #if HFS_SPARSE_DEV
2301 /*
2302 * When the underlying device is sparse, check the
2303 * available space on the backing store volume.
2304 */
2305 uint64_t vfreeblks;
2306 if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2307 freeblks = MIN(freeblks, vfreeblks);
2308 #endif /* HFS_SPARSE_DEV */
2309
2310 return (freeblks);
2311 }
2312
2313 /*
2314 * Map HFS Common errors (negative) to BSD error codes (positive).
2315 * Positive errors (ie BSD errors) are passed through unchanged.
2316 */
2317 short MacToVFSError(OSErr err)
2318 {
2319 if (err >= 0)
2320 return err;
2321
2322 /* BSD/VFS internal errnos */
2323 switch (err) {
2324 case HFS_ERESERVEDNAME: /* -8 */
2325 return err;
2326 }
2327
2328 switch (err) {
2329 case dskFulErr: /* -34 */
2330 case btNoSpaceAvail: /* -32733 */
2331 return ENOSPC;
2332 case fxOvFlErr: /* -32750 */
2333 return EOVERFLOW;
2334
2335 case btBadNode: /* -32731 */
2336 return EIO;
2337
2338 case memFullErr: /* -108 */
2339 return ENOMEM; /* +12 */
2340
2341 case cmExists: /* -32718 */
2342 case btExists: /* -32734 */
2343 return EEXIST; /* +17 */
2344
2345 case cmNotFound: /* -32719 */
2346 case btNotFound: /* -32735 */
2347 return ENOENT; /* 28 */
2348
2349 case cmNotEmpty: /* -32717 */
2350 return ENOTEMPTY; /* 66 */
2351
2352 case cmFThdDirErr: /* -32714 */
2353 return EISDIR; /* 21 */
2354
2355 case fxRangeErr: /* -32751 */
2356 return ERANGE;
2357
2358 case bdNamErr: /* -37 */
2359 return ENAMETOOLONG; /* 63 */
2360
2361 case paramErr: /* -50 */
2362 case fileBoundsErr: /* -1309 */
2363 return EINVAL; /* +22 */
2364
2365 case fsBTBadNodeSize:
2366 return ENXIO;
2367
2368 default:
2369 return EIO; /* +5 */
2370 }
2371 }
2372
2373
2374 /*
2375 * Find the current thread's directory hint for a given index.
2376 *
2377 * Requires an exclusive lock on directory cnode.
2378 *
2379 * Use detach if the cnode lock must be dropped while the hint is still active.
2380 */
2381 directoryhint_t *
2382 hfs_getdirhint(struct cnode *dcp, int index, int detach)
2383 {
2384 struct timeval tv;
2385 directoryhint_t *hint;
2386 boolean_t need_remove, need_init;
2387 const u_int8_t * name;
2388
2389 microuptime(&tv);
2390
2391 /*
2392 * Look for an existing hint first. If not found, create a new one (when
2393 * the list is not full) or recycle the oldest hint. Since new hints are
2394 * always added to the head of the list, the last hint is always the
2395 * oldest.
2396 */
2397 TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2398 if (hint->dh_index == index)
2399 break;
2400 }
2401 if (hint != NULL) { /* found an existing hint */
2402 need_init = false;
2403 need_remove = true;
2404 } else { /* cannot find an existing hint */
2405 need_init = true;
2406 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2407 /* Create a default directory hint */
2408 hint = hfs_zalloc(HFS_DIRHINT_ZONE);
2409 ++dcp->c_dirhintcnt;
2410 need_remove = false;
2411 } else { /* recycle the last (i.e., the oldest) hint */
2412 hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2413 if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2414 (name = hint->dh_desc.cd_nameptr)) {
2415 hint->dh_desc.cd_nameptr = NULL;
2416 hint->dh_desc.cd_namelen = 0;
2417 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2418 vfs_removename((const char *)name);
2419 }
2420 need_remove = true;
2421 }
2422 }
2423
2424 if (need_remove)
2425 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2426
2427 if (detach)
2428 --dcp->c_dirhintcnt;
2429 else
2430 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2431
2432 if (need_init) {
2433 hint->dh_index = index;
2434 hint->dh_desc.cd_flags = 0;
2435 hint->dh_desc.cd_encoding = 0;
2436 hint->dh_desc.cd_namelen = 0;
2437 hint->dh_desc.cd_nameptr = NULL;
2438 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2439 hint->dh_desc.cd_hint = dcp->c_childhint;
2440 hint->dh_desc.cd_cnid = 0;
2441 }
2442 hint->dh_time = tv.tv_sec;
2443 return (hint);
2444 }
2445
2446 /*
2447 * Release a single directory hint.
2448 *
2449 * Requires an exclusive lock on directory cnode.
2450 */
2451 void
2452 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2453 {
2454 const u_int8_t * name;
2455 directoryhint_t *hint;
2456
2457 /* Check if item is on list (could be detached) */
2458 TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2459 if (hint == relhint) {
2460 TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2461 --dcp->c_dirhintcnt;
2462 break;
2463 }
2464 }
2465 name = relhint->dh_desc.cd_nameptr;
2466 if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2467 relhint->dh_desc.cd_nameptr = NULL;
2468 relhint->dh_desc.cd_namelen = 0;
2469 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2470 vfs_removename((const char *)name);
2471 }
2472 hfs_zfree(relhint, HFS_DIRHINT_ZONE);
2473 }
2474
2475 /*
2476 * Release directory hints for given directory
2477 *
2478 * Requires an exclusive lock on directory cnode.
2479 */
2480 void
2481 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2482 {
2483 struct timeval tv;
2484 directoryhint_t *hint, *prev;
2485 const u_int8_t * name;
2486
2487 if (stale_hints_only)
2488 microuptime(&tv);
2489
2490 /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2491 for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2492 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2493 break; /* stop here if this entry is too new */
2494 name = hint->dh_desc.cd_nameptr;
2495 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2496 hint->dh_desc.cd_nameptr = NULL;
2497 hint->dh_desc.cd_namelen = 0;
2498 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2499 vfs_removename((const char *)name);
2500 }
2501 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2502 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2503 hfs_zfree(hint, HFS_DIRHINT_ZONE);
2504 --dcp->c_dirhintcnt;
2505 }
2506 }
2507
2508 /*
2509 * Insert a detached directory hint back into the list of dirhints.
2510 *
2511 * Requires an exclusive lock on directory cnode.
2512 */
2513 void
2514 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2515 {
2516 directoryhint_t *test;
2517
2518 TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2519 if (test == hint)
2520 panic("hfs_insertdirhint: hint %p already on list!", hint);
2521 }
2522
2523 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2524 ++dcp->c_dirhintcnt;
2525 }
2526
2527 /*
2528 * Perform a case-insensitive compare of two UTF-8 filenames.
2529 *
2530 * Returns 0 if the strings match.
2531 */
2532 int
2533 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2534 {
2535 u_int16_t *ustr1, *ustr2;
2536 size_t ulen1, ulen2;
2537 size_t maxbytes;
2538 int cmp = -1;
2539
2540 if (len1 != len2)
2541 return (cmp);
2542
2543 maxbytes = kHFSPlusMaxFileNameChars << 1;
2544 ustr1 = hfs_malloc(maxbytes << 1);
2545 ustr2 = ustr1 + (maxbytes >> 1);
2546
2547 if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2548 goto out;
2549 if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2550 goto out;
2551
2552 cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2553 out:
2554 hfs_free(ustr1, maxbytes << 1);
2555 return (cmp);
2556 }
2557
2558 typedef struct jopen_cb_info {
2559 mount_t mp;
2560 off_t jsize;
2561 char *desired_uuid;
2562 struct vnode *jvp;
2563 size_t blksize;
2564 int need_clean;
2565 int need_init;
2566 } jopen_cb_info;
2567
2568 static int
2569 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2570 {
2571 jopen_cb_info *ji = (jopen_cb_info *)arg;
2572 char bsd_name[256];
2573 int error;
2574
2575 strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2576 strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2577
2578 if ((error = vnode_lookup(bsd_name, VNODE_LOOKUP_NOFOLLOW, &ji->jvp,
2579 vfs_context_kernel()))) {
2580 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2581 return 1; // keep iterating
2582 }
2583
2584 struct vnop_open_args oargs = {
2585 .a_vp = ji->jvp,
2586 .a_mode = FREAD | FWRITE,
2587 .a_context = vfs_context_kernel(),
2588 };
2589
2590 if (spec_open(&oargs)) {
2591 vnode_put(ji->jvp);
2592 ji->jvp = NULL;
2593 return 1;
2594 }
2595
2596 // if the journal is dirty and we didn't specify a desired
2597 // journal device uuid, then do not use the journal. but
2598 // if the journal is just invalid (e.g. it hasn't been
2599 // initialized) then just set the need_init flag.
2600 if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2601 error = journal_is_clean(ji->jvp, 0, ji->jsize,
2602 (void *)1, ji->blksize);
2603 if (error == EBUSY) {
2604 struct vnop_close_args cargs = {
2605 .a_vp = ji->jvp,
2606 .a_fflag = FREAD | FWRITE,
2607 .a_context = vfs_context_kernel()
2608 };
2609 spec_close(&cargs);
2610 vnode_put(ji->jvp);
2611 ji->jvp = NULL;
2612 return 1; // keep iterating
2613 } else if (error == EINVAL) {
2614 ji->need_init = 1;
2615 }
2616 }
2617
2618 if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2619 strlcpy(ji->desired_uuid, uuid_str, 128);
2620 }
2621 vnode_setmountedon(ji->jvp);
2622 return 0; // stop iterating
2623 }
2624
2625 static vnode_t
2626 open_journal_dev(mount_t mp,
2627 const char *vol_device,
2628 int need_clean,
2629 char *uuid_str,
2630 char *machine_serial_num,
2631 off_t jsize,
2632 size_t blksize,
2633 int *need_init)
2634 {
2635 int retry_counter=0;
2636 jopen_cb_info ji;
2637
2638 ji.mp = mp;
2639 ji.jsize = jsize;
2640 ji.desired_uuid = uuid_str;
2641 ji.jvp = NULL;
2642 ji.blksize = blksize;
2643 ji.need_clean = need_clean;
2644 ji.need_init = 0;
2645
2646 // if (uuid_str[0] == '\0') {
2647 // printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2648 // } else {
2649 // printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2650 // }
2651 while (ji.jvp == NULL && retry_counter++ < 4) {
2652 if (retry_counter > 1) {
2653 if (uuid_str[0]) {
2654 printf("hfs: open_journal_dev: uuid %s not found. waiting 10sec.\n", uuid_str);
2655 } else {
2656 printf("hfs: open_journal_dev: no available external journal partition found. waiting 10sec.\n");
2657 }
2658 delay_for_interval(10* 1000000, NSEC_PER_USEC); // wait for ten seconds and then try again
2659 }
2660
2661 hfs_iterate_media_with_content(EXTJNL_CONTENT_TYPE_UUID,
2662 journal_open_cb, &ji);
2663 }
2664
2665 if (ji.jvp == NULL) {
2666 printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2667 vol_device, uuid_str, machine_serial_num);
2668 }
2669
2670 *need_init = ji.need_init;
2671
2672 return ji.jvp;
2673 }
2674
2675 void hfs_close_jvp(hfsmount_t *hfsmp)
2676 {
2677 if (!hfsmp || !hfsmp->jvp || hfsmp->jvp == hfsmp->hfs_devvp)
2678 return;
2679
2680 vnode_clearmountedon(hfsmp->jvp);
2681 struct vnop_close_args cargs = {
2682 .a_vp = hfsmp->jvp,
2683 .a_fflag = FREAD | FWRITE,
2684 .a_context = vfs_context_kernel()
2685 };
2686 spec_close(&cargs);
2687 vnode_put(hfsmp->jvp);
2688 hfsmp->jvp = NULL;
2689 }
2690
2691 int
2692 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2693 void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2694 HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2695 {
2696 JournalInfoBlock *jibp;
2697 struct buf *jinfo_bp, *bp;
2698 int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2699 int retval, write_jibp = 0;
2700 uint32_t blksize = hfsmp->hfs_logical_block_size;
2701 struct vnode *devvp;
2702 struct hfs_mount_args *args = _args;
2703 u_int32_t jib_flags;
2704 u_int64_t jib_offset;
2705 u_int64_t jib_size;
2706 const char *dev_name;
2707
2708 devvp = hfsmp->hfs_devvp;
2709 dev_name = vnode_getname_printable(devvp);
2710
2711 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2712 arg_flags = args->journal_flags;
2713 arg_tbufsz = args->journal_tbuffer_size;
2714 }
2715
2716 sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2717
2718 jinfo_bp = NULL;
2719 retval = (int)buf_meta_bread(devvp,
2720 (daddr64_t)((embeddedOffset/blksize) +
2721 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2722 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2723 if (retval) {
2724 if (jinfo_bp) {
2725 buf_brelse(jinfo_bp);
2726 }
2727 goto cleanup_dev_name;
2728 }
2729
2730 jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2731 jib_flags = SWAP_BE32(jibp->flags);
2732 jib_size = SWAP_BE64(jibp->size);
2733
2734 if (jib_flags & kJIJournalInFSMask) {
2735 hfsmp->jvp = hfsmp->hfs_devvp;
2736 jib_offset = SWAP_BE64(jibp->offset);
2737 } else {
2738 int need_init=0;
2739
2740 // if the volume was unmounted cleanly then we'll pick any
2741 // available external journal partition
2742 //
2743 if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2744 *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2745 }
2746
2747 hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
2748 dev_name,
2749 !(jib_flags & kJIJournalNeedInitMask),
2750 (char *)&jibp->ext_jnl_uuid[0],
2751 (char *)&jibp->machine_serial_num[0],
2752 jib_size,
2753 hfsmp->hfs_logical_block_size,
2754 &need_init);
2755 if (hfsmp->jvp == NULL) {
2756 buf_brelse(jinfo_bp);
2757 retval = EROFS;
2758 goto cleanup_dev_name;
2759 } else {
2760 if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2761 strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2762 }
2763 }
2764
2765 jib_offset = 0;
2766 write_jibp = 1;
2767 if (need_init) {
2768 jib_flags |= kJIJournalNeedInitMask;
2769 }
2770 }
2771
2772 // save this off for the hack-y check in hfs_remove()
2773 hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2774 hfsmp->jnl_size = jib_size;
2775
2776 if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2777 // if the file system is read-only, check if the journal is empty.
2778 // if it is, then we can allow the mount. otherwise we have to
2779 // return failure.
2780 retval = journal_is_clean(hfsmp->jvp,
2781 jib_offset + embeddedOffset,
2782 jib_size,
2783 devvp,
2784 hfsmp->hfs_logical_block_size);
2785
2786 hfsmp->jnl = NULL;
2787
2788 buf_brelse(jinfo_bp);
2789
2790 if (retval) {
2791 const char *name = vnode_getname_printable(devvp);
2792 printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
2793 name);
2794 vnode_putname_printable(name);
2795 }
2796
2797 goto cleanup_dev_name;
2798 }
2799
2800 if (jib_flags & kJIJournalNeedInitMask) {
2801 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2802 jib_offset + embeddedOffset, jib_size);
2803 hfsmp->jnl = journal_create(hfsmp->jvp,
2804 jib_offset + embeddedOffset,
2805 jib_size,
2806 devvp,
2807 blksize,
2808 arg_flags,
2809 arg_tbufsz,
2810 hfs_sync_metadata, hfsmp->hfs_mp,
2811 hfsmp->hfs_mp);
2812 if (hfsmp->jnl)
2813 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2814
2815 // no need to start a transaction here... if this were to fail
2816 // we'd just re-init it on the next mount.
2817 jib_flags &= ~kJIJournalNeedInitMask;
2818 jibp->flags = SWAP_BE32(jib_flags);
2819 buf_bwrite(jinfo_bp);
2820 jinfo_bp = NULL;
2821 jibp = NULL;
2822 } else {
2823 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2824 // jib_offset + embeddedOffset,
2825 // jib_size, SWAP_BE32(vhp->blockSize));
2826
2827 hfsmp->jnl = journal_open(hfsmp->jvp,
2828 jib_offset + embeddedOffset,
2829 jib_size,
2830 devvp,
2831 blksize,
2832 arg_flags,
2833 arg_tbufsz,
2834 hfs_sync_metadata, hfsmp->hfs_mp,
2835 hfsmp->hfs_mp);
2836 if (hfsmp->jnl)
2837 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2838
2839 if (write_jibp) {
2840 buf_bwrite(jinfo_bp);
2841 } else {
2842 buf_brelse(jinfo_bp);
2843 }
2844 jinfo_bp = NULL;
2845 jibp = NULL;
2846
2847 if (hfsmp->jnl && mdbp) {
2848 // reload the mdb because it could have changed
2849 // if the journal had to be replayed.
2850 if (mdb_offset == 0) {
2851 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2852 }
2853 bp = NULL;
2854 retval = (int)buf_meta_bread(devvp,
2855 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2856 hfsmp->hfs_physical_block_size, cred, &bp);
2857 if (retval) {
2858 if (bp) {
2859 buf_brelse(bp);
2860 }
2861 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2862 retval);
2863 goto cleanup_dev_name;
2864 }
2865 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2866 buf_brelse(bp);
2867 bp = NULL;
2868 }
2869 }
2870
2871 // if we expected the journal to be there and we couldn't
2872 // create it or open it then we have to bail out.
2873 if (hfsmp->jnl == NULL) {
2874 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2875 retval = EINVAL;
2876 goto cleanup_dev_name;
2877 }
2878
2879 retval = 0;
2880
2881 cleanup_dev_name:
2882 vnode_putname_printable(dev_name);
2883 return retval;
2884 }
2885
2886
2887 //
2888 // This function will go and re-locate the .journal_info_block and
2889 // the .journal files in case they moved (which can happen if you
2890 // run Norton SpeedDisk). If we fail to find either file we just
2891 // disable journaling for this volume and return. We turn off the
2892 // journaling bit in the vcb and assume it will get written to disk
2893 // later (if it doesn't on the next mount we'd do the same thing
2894 // again which is harmless). If we disable journaling we don't
2895 // return an error so that the volume is still mountable.
2896 //
2897 // If the info we find for the .journal_info_block and .journal files
2898 // isn't what we had stored, we re-set our cached info and proceed
2899 // with opening the journal normally.
2900 //
2901 static int
2902 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2903 {
2904 JournalInfoBlock *jibp;
2905 struct buf *jinfo_bp;
2906 int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2907 int retval, write_jibp = 0, recreate_journal = 0;
2908 struct vnode *devvp;
2909 struct cat_attr jib_attr, jattr;
2910 struct cat_fork jib_fork, jfork;
2911 ExtendedVCB *vcb;
2912 u_int32_t fid;
2913 struct hfs_mount_args *args = _args;
2914 u_int32_t jib_flags;
2915 u_int64_t jib_offset;
2916 u_int64_t jib_size;
2917
2918 devvp = hfsmp->hfs_devvp;
2919 vcb = HFSTOVCB(hfsmp);
2920
2921 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2922 if (args->journal_disable) {
2923 return 0;
2924 }
2925
2926 arg_flags = args->journal_flags;
2927 arg_tbufsz = args->journal_tbuffer_size;
2928 }
2929
2930 fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2931 if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2932 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2933 fid ? jib_fork.cf_extents[0].startBlock : 0);
2934 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2935 return 0;
2936 }
2937 hfsmp->hfs_jnlinfoblkid = fid;
2938
2939 // make sure the journal_info_block begins where we think it should.
2940 if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2941 printf("hfs: The journal_info_block moved (was: %d; is: %d). Fixing up\n",
2942 SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2943
2944 vcb->vcbJinfoBlock = jib_fork.cf_extents[0].startBlock;
2945 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2946 recreate_journal = 1;
2947 }
2948
2949
2950 sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2951 jinfo_bp = NULL;
2952 retval = (int)buf_meta_bread(devvp,
2953 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2954 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2955 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2956 if (retval) {
2957 if (jinfo_bp) {
2958 buf_brelse(jinfo_bp);
2959 }
2960 printf("hfs: can't read journal info block. disabling journaling.\n");
2961 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2962 return 0;
2963 }
2964
2965 jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2966 jib_flags = SWAP_BE32(jibp->flags);
2967 jib_offset = SWAP_BE64(jibp->offset);
2968 jib_size = SWAP_BE64(jibp->size);
2969
2970 fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2971 if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2972 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2973 fid ? jfork.cf_extents[0].startBlock : 0);
2974 buf_brelse(jinfo_bp);
2975 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2976 return 0;
2977 }
2978 hfsmp->hfs_jnlfileid = fid;
2979
2980 // make sure the journal file begins where we think it should.
2981 if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2982 printf("hfs: The journal file moved (was: %lld; is: %d). Fixing up\n",
2983 (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2984
2985 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2986 write_jibp = 1;
2987 recreate_journal = 1;
2988 }
2989
2990 // check the size of the journal file.
2991 if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2992 printf("hfs: The journal file changed size! (was %lld; is %lld). Fixing up.\n",
2993 jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2994
2995 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2996 write_jibp = 1;
2997 recreate_journal = 1;
2998 }
2999
3000 if (jib_flags & kJIJournalInFSMask) {
3001 hfsmp->jvp = hfsmp->hfs_devvp;
3002 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
3003 } else {
3004 const char *dev_name;
3005 int need_init = 0;
3006
3007 dev_name = vnode_getname_printable(devvp);
3008
3009 // since the journal is empty, just use any available external journal
3010 *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
3011
3012 // this fills in the uuid of the device we actually get
3013 hfsmp->jvp = open_journal_dev(hfsmp->hfs_mp,
3014 dev_name,
3015 !(jib_flags & kJIJournalNeedInitMask),
3016 (char *)&jibp->ext_jnl_uuid[0],
3017 (char *)&jibp->machine_serial_num[0],
3018 jib_size,
3019 hfsmp->hfs_logical_block_size,
3020 &need_init);
3021 if (hfsmp->jvp == NULL) {
3022 buf_brelse(jinfo_bp);
3023 vnode_putname_printable(dev_name);
3024 return EROFS;
3025 } else {
3026 if (hfs_get_platform_serial_number(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
3027 strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
3028 }
3029 }
3030 jib_offset = 0;
3031 recreate_journal = 1;
3032 write_jibp = 1;
3033 if (need_init) {
3034 jib_flags |= kJIJournalNeedInitMask;
3035 }
3036 vnode_putname_printable(dev_name);
3037 }
3038
3039 // save this off for the hack-y check in hfs_remove()
3040 hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
3041 hfsmp->jnl_size = jib_size;
3042
3043 if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
3044 // if the file system is read-only, check if the journal is empty.
3045 // if it is, then we can allow the mount. otherwise we have to
3046 // return failure.
3047 retval = journal_is_clean(hfsmp->jvp,
3048 jib_offset,
3049 jib_size,
3050 devvp,
3051 hfsmp->hfs_logical_block_size);
3052
3053 hfsmp->jnl = NULL;
3054
3055 buf_brelse(jinfo_bp);
3056
3057 if (retval) {
3058 const char *name = vnode_getname_printable(devvp);
3059 printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
3060 name);
3061 vnode_putname_printable(name);
3062 }
3063
3064 return retval;
3065 }
3066
3067 if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
3068 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
3069 jib_offset, jib_size);
3070 hfsmp->jnl = journal_create(hfsmp->jvp,
3071 jib_offset,
3072 jib_size,
3073 devvp,
3074 hfsmp->hfs_logical_block_size,
3075 arg_flags,
3076 arg_tbufsz,
3077 hfs_sync_metadata, hfsmp->hfs_mp,
3078 hfsmp->hfs_mp);
3079 if (hfsmp->jnl)
3080 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
3081
3082 // no need to start a transaction here... if this were to fail
3083 // we'd just re-init it on the next mount.
3084 jib_flags &= ~kJIJournalNeedInitMask;
3085 write_jibp = 1;
3086
3087 } else {
3088 //
3089 // if we weren't the last person to mount this volume
3090 // then we need to throw away the journal because it
3091 // is likely that someone else mucked with the disk.
3092 // if the journal is empty this is no big deal. if the
3093 // disk is dirty this prevents us from replaying the
3094 // journal over top of changes that someone else made.
3095 //
3096 arg_flags |= JOURNAL_RESET;
3097
3098 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
3099 // jib_offset,
3100 // jib_size, SWAP_BE32(vhp->blockSize));
3101
3102 hfsmp->jnl = journal_open(hfsmp->jvp,
3103 jib_offset,
3104 jib_size,
3105 devvp,
3106 hfsmp->hfs_logical_block_size,
3107 arg_flags,
3108 arg_tbufsz,
3109 hfs_sync_metadata, hfsmp->hfs_mp,
3110 hfsmp->hfs_mp);
3111 if (hfsmp->jnl)
3112 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
3113 }
3114
3115
3116 if (write_jibp) {
3117 jibp->flags = SWAP_BE32(jib_flags);
3118 jibp->offset = SWAP_BE64(jib_offset);
3119 jibp->size = SWAP_BE64(jib_size);
3120
3121 buf_bwrite(jinfo_bp);
3122 } else {
3123 buf_brelse(jinfo_bp);
3124 }
3125 jinfo_bp = NULL;
3126 jibp = NULL;
3127
3128 // if we expected the journal to be there and we couldn't
3129 // create it or open it then we have to bail out.
3130 if (hfsmp->jnl == NULL) {
3131 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
3132 return EINVAL;
3133 }
3134
3135 return 0;
3136 }
3137
3138 /*
3139 * Calculate the allocation zone for metadata.
3140 *
3141 * This zone includes the following:
3142 * Allocation Bitmap file
3143 * Overflow Extents file
3144 * Journal file
3145 * Quota files
3146 * Clustered Hot files
3147 * Catalog file
3148 *
3149 * METADATA ALLOCATION ZONE
3150 * ____________________________________________________________________________
3151 * | | | | | | |
3152 * | BM | JF | OEF | CATALOG |---> | HOT FILES |
3153 * |____|____|_____|_______________|______________________________|___________|
3154 *
3155 * <------------------------------- N * 128 MB ------------------------------->
3156 *
3157 */
3158 #define GIGABYTE (u_int64_t)(1024*1024*1024)
3159
3160 #define HOTBAND_MINIMUM_SIZE (10*1024*1024)
3161 #define HOTBAND_MAXIMUM_SIZE (512*1024*1024)
3162
3163 /* Initialize the metadata zone.
3164 *
3165 * If the size of the volume is less than the minimum size for
3166 * metadata zone, metadata zone is disabled.
3167 *
3168 * If disable is true, disable metadata zone unconditionally.
3169 */
3170 void
3171 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
3172 {
3173 ExtendedVCB *vcb;
3174 u_int64_t fs_size;
3175 u_int64_t zonesize;
3176 u_int64_t temp;
3177 u_int64_t filesize;
3178 u_int32_t blk;
3179 int items, really_do_it=1;
3180
3181 vcb = HFSTOVCB(hfsmp);
3182 fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
3183
3184 /*
3185 * For volumes less than 10 GB, don't bother.
3186 */
3187 if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
3188 really_do_it = 0;
3189 }
3190
3191 /*
3192 * Skip non-journaled volumes as well.
3193 */
3194 if (hfsmp->jnl == NULL) {
3195 really_do_it = 0;
3196 }
3197
3198 /* If caller wants to disable metadata zone, do it */
3199 if (disable == true) {
3200 really_do_it = 0;
3201 }
3202
3203 /*
3204 * Start with space for the boot blocks and Volume Header.
3205 * 1536 = byte offset from start of volume to end of volume header:
3206 * 1024 bytes is the offset from the start of the volume to the
3207 * start of the volume header (defined by the volume format)
3208 * + 512 bytes (the size of the volume header).
3209 */
3210 zonesize = roundup(1536, hfsmp->blockSize);
3211
3212 /*
3213 * Add the on-disk size of allocation bitmap.
3214 */
3215 zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3216
3217 /*
3218 * Add space for the Journal Info Block and Journal (if they're in
3219 * this file system).
3220 */
3221 if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3222 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3223 }
3224
3225 /*
3226 * Add the existing size of the Extents Overflow B-tree.
3227 * (It rarely grows, so don't bother reserving additional room for it.)
3228 */
3229 zonesize += hfs_blk_to_bytes(hfsmp->hfs_extents_cp->c_datafork->ff_blocks, hfsmp->blockSize);
3230
3231 /*
3232 * If there is an Attributes B-tree, leave room for 11 clumps worth.
3233 * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3234 * When installing a full OS install onto a 20GB volume, we use
3235 * 7 to 8 clumps worth of space (depending on packages), so that leaves
3236 * us with another 3 or 4 clumps worth before we need another extent.
3237 */
3238 if (hfsmp->hfs_attribute_cp) {
3239 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3240 }
3241
3242 /*
3243 * Leave room for 11 clumps of the Catalog B-tree.
3244 * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3245 * When installing a full OS install onto a 20GB volume, we use
3246 * 7 to 8 clumps worth of space (depending on packages), so that leaves
3247 * us with another 3 or 4 clumps worth before we need another extent.
3248 */
3249 zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3250
3251 /*
3252 * Add space for hot file region.
3253 *
3254 * ...for now, use 5 MB per 1 GB (0.5 %)
3255 */
3256 filesize = (fs_size / 1024) * 5;
3257 if (filesize > HOTBAND_MAXIMUM_SIZE)
3258 filesize = HOTBAND_MAXIMUM_SIZE;
3259 else if (filesize < HOTBAND_MINIMUM_SIZE)
3260 filesize = HOTBAND_MINIMUM_SIZE;
3261 /*
3262 * Calculate user quota file requirements.
3263 */
3264 if (hfsmp->hfs_flags & HFS_QUOTAS) {
3265 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3266 if (items < QF_MIN_USERS)
3267 items = QF_MIN_USERS;
3268 else if (items > QF_MAX_USERS)
3269 items = QF_MAX_USERS;
3270 if (!powerof2(items)) {
3271 int x = items;
3272 items = 4;
3273 while (x>>1 != 1) {
3274 x = x >> 1;
3275 items = items << 1;
3276 }
3277 }
3278 filesize += (items + 1) * sizeof(struct dqblk);
3279 /*
3280 * Calculate group quota file requirements.
3281 *
3282 */
3283 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3284 if (items < QF_MIN_GROUPS)
3285 items = QF_MIN_GROUPS;
3286 else if (items > QF_MAX_GROUPS)
3287 items = QF_MAX_GROUPS;
3288 if (!powerof2(items)) {
3289 int x = items;
3290 items = 4;
3291 while (x>>1 != 1) {
3292 x = x >> 1;
3293 items = items << 1;
3294 }
3295 }
3296 filesize += (items + 1) * sizeof(struct dqblk);
3297 }
3298 zonesize += filesize;
3299
3300 /*
3301 * Round up entire zone to a bitmap block's worth.
3302 * The extra space goes to the catalog file and hot file area.
3303 */
3304 temp = zonesize;
3305 zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3306 hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3307 /*
3308 * If doing the round up for hfs_min_alloc_start would push us past
3309 * allocLimit, then just reset it back to 0. Though using a value
3310 * bigger than allocLimit would not cause damage in the block allocator
3311 * code, this value could get stored in the volume header and make it out
3312 * to disk, making the volume header technically corrupt.
3313 */
3314 if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3315 hfsmp->hfs_min_alloc_start = 0;
3316 }
3317
3318 if (really_do_it == 0) {
3319 /* If metadata zone needs to be disabled because the
3320 * volume was truncated, clear the bit and zero out
3321 * the values that are no longer needed.
3322 */
3323 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3324 /* Disable metadata zone */
3325 hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3326
3327 /* Zero out mount point values that are not required */
3328 hfsmp->hfs_catalog_maxblks = 0;
3329 hfsmp->hfs_hotfile_maxblks = 0;
3330 hfsmp->hfs_hotfile_start = 0;
3331 hfsmp->hfs_hotfile_end = 0;
3332 hfsmp->hfs_hotfile_freeblks = 0;
3333 hfsmp->hfs_metazone_start = 0;
3334 hfsmp->hfs_metazone_end = 0;
3335 }
3336
3337 return;
3338 }
3339
3340 temp = zonesize - temp; /* temp has extra space */
3341 filesize += temp / 3;
3342 hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3343
3344 if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3345 hfsmp->hfs_hotfile_maxblks = (uint32_t) (hfsmp->hfs_cs_hotfile_size / HFSTOVCB(hfsmp)->blockSize);
3346 } else {
3347 hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3348 }
3349
3350 /* Convert to allocation blocks. */
3351 blk = zonesize / vcb->blockSize;
3352
3353 /* The default metadata zone location is at the start of volume. */
3354 hfsmp->hfs_metazone_start = 1;
3355 hfsmp->hfs_metazone_end = blk - 1;
3356
3357 /* The default hotfile area is at the end of the zone. */
3358 if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3359 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3360 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3361 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3362 }
3363 else {
3364 hfsmp->hfs_hotfile_start = 0;
3365 hfsmp->hfs_hotfile_end = 0;
3366 hfsmp->hfs_hotfile_freeblks = 0;
3367 }
3368 #if DEBUG
3369 printf("hfs:%s: metadata zone is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3370 printf("hfs:%s: hot file band is %d to %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3371 printf("hfs:%s: hot file band free blocks = %d\n", hfsmp->vcbVN, hfsmp->hfs_hotfile_freeblks);
3372 #endif
3373
3374 hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3375 }
3376
3377
3378 static u_int32_t
3379 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3380 {
3381 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3382 int lockflags;
3383 int freeblocks;
3384
3385 if (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN) {
3386 //
3387 // This is only used at initialization time and on an ssd
3388 // we'll get the real info from the hotfile btree user
3389 // info
3390 //
3391 return 0;
3392 }
3393
3394 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3395 freeblocks = MetaZoneFreeBlocks(vcb);
3396 hfs_systemfile_unlock(hfsmp, lockflags);
3397
3398 /* Minus Extents overflow file reserve. */
3399 if ((uint32_t)hfsmp->hfs_overflow_maxblks >= VTOF(hfsmp->hfs_extents_vp)->ff_blocks) {
3400 freeblocks -= hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3401 }
3402
3403 /* Minus catalog file reserve. */
3404 if ((uint32_t)hfsmp->hfs_catalog_maxblks >= VTOF(hfsmp->hfs_catalog_vp)->ff_blocks) {
3405 freeblocks -= hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3406 }
3407
3408 if (freeblocks < 0)
3409 freeblocks = 0;
3410
3411 // printf("hfs: hotfile_freeblocks: MIN(%d, %d) = %d\n", freeblocks, hfsmp->hfs_hotfile_maxblks, MIN(freeblocks, hfsmp->hfs_hotfile_maxblks));
3412 return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3413 }
3414
3415 /*
3416 * Determine if a file is a "virtual" metadata file.
3417 * This includes journal and quota files.
3418 */
3419 int
3420 hfs_virtualmetafile(struct cnode *cp)
3421 {
3422 const char * filename;
3423
3424
3425 if (cp->c_parentcnid != kHFSRootFolderID)
3426 return (0);
3427
3428 filename = (const char *)cp->c_desc.cd_nameptr;
3429 if (filename == NULL)
3430 return (0);
3431
3432 if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3433 (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3434 (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3435 (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3436 (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3437 return (1);
3438
3439 return (0);
3440 }
3441
3442 void hfs_syncer_lock(struct hfsmount *hfsmp)
3443 {
3444 hfs_lock_mount(hfsmp);
3445 }
3446
3447 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3448 {
3449 hfs_unlock_mount(hfsmp);
3450 }
3451
3452 void hfs_syncer_wait(struct hfsmount *hfsmp, struct timespec *ts)
3453 {
3454 msleep(&hfsmp->hfs_syncer_thread, &hfsmp->hfs_mutex, PWAIT,
3455 "hfs_syncer_wait", ts);
3456 }
3457
3458 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3459 {
3460 wakeup(&hfsmp->hfs_syncer_thread);
3461 }
3462
3463 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3464 {
3465 uint64_t deadline;
3466 clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3467 return deadline;
3468 }
3469
3470 //
3471 // Fire off a timed callback to sync the disk if the
3472 // volume is on ejectable media.
3473 //
3474 void hfs_sync_ejectable(struct hfsmount *hfsmp)
3475 {
3476 // If we don't have a syncer or we get called by the syncer, just return
3477 if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
3478 || current_thread() == hfsmp->hfs_syncer_thread) {
3479 return;
3480 }
3481
3482 hfs_syncer_lock(hfsmp);
3483
3484 if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3485 microuptime(&hfsmp->hfs_sync_req_oldest);
3486
3487 /* If hfs_unmount is running, it will clear the HFS_RUN_SYNCER
3488 flag. Also, we don't want to queue again if there is a sync
3489 outstanding. */
3490 if (!ISSET(hfsmp->hfs_flags, HFS_RUN_SYNCER)
3491 || hfsmp->hfs_syncer_thread) {
3492 hfs_syncer_unlock(hfsmp);
3493 return;
3494 }
3495
3496 hfsmp->hfs_syncer_thread = (void *)1;
3497
3498 hfs_syncer_unlock(hfsmp);
3499
3500 kernel_thread_start(hfs_syncer, hfsmp, &hfsmp->hfs_syncer_thread);
3501 thread_deallocate(hfsmp->hfs_syncer_thread);
3502 }
3503
3504 int
3505 hfs_start_transaction(struct hfsmount *hfsmp)
3506 {
3507 int ret = 0, unlock_on_err = 0;
3508 thread_t thread = current_thread();
3509
3510 #ifdef HFS_CHECK_LOCK_ORDER
3511 /*
3512 * You cannot start a transaction while holding a system
3513 * file lock. (unless the transaction is nested.)
3514 */
3515 if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3516 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3517 panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3518 }
3519 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3520 panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3521 }
3522 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3523 panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3524 }
3525 }
3526 #endif /* HFS_CHECK_LOCK_ORDER */
3527
3528 again:
3529
3530 if (hfsmp->jnl) {
3531 if (journal_owner(hfsmp->jnl) != thread) {
3532 /*
3533 * The global lock should be held shared if journal is
3534 * active to prevent disabling. If we're not the owner
3535 * of the journal lock, verify that we're not already
3536 * holding the global lock exclusive before moving on.
3537 */
3538 if (hfsmp->hfs_global_lockowner == thread) {
3539 ret = EBUSY;
3540 goto out;
3541 }
3542
3543 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3544
3545 // Things could have changed
3546 if (!hfsmp->jnl) {
3547 hfs_unlock_global(hfsmp);
3548 goto again;
3549 }
3550
3551 OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3552 unlock_on_err = 1;
3553 }
3554 } else {
3555 // No journal
3556 if (hfsmp->hfs_global_lockowner != thread) {
3557 hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3558
3559 // Things could have changed
3560 if (hfsmp->jnl) {
3561 hfs_unlock_global(hfsmp);
3562 goto again;
3563 }
3564
3565 OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3566 unlock_on_err = 1;
3567 }
3568 }
3569
3570 /* If a downgrade to read-only mount is in progress, no other
3571 * thread than the downgrade thread is allowed to modify
3572 * the file system.
3573 */
3574 if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3575 hfsmp->hfs_downgrading_thread != thread) {
3576 ret = EROFS;
3577 goto out;
3578 }
3579
3580 if (hfsmp->jnl) {
3581 ret = journal_start_transaction(hfsmp->jnl);
3582 } else {
3583 ret = 0;
3584 }
3585
3586 if (ret == 0)
3587 ++hfsmp->hfs_transaction_nesting;
3588
3589 out:
3590 if (ret != 0 && unlock_on_err) {
3591 hfs_unlock_global (hfsmp);
3592 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3593 }
3594
3595 return ret;
3596 }
3597
3598 int
3599 hfs_end_transaction(struct hfsmount *hfsmp)
3600 {
3601 int ret;
3602
3603 hfs_assert(!hfsmp->jnl || journal_owner(hfsmp->jnl) == current_thread());
3604 hfs_assert(hfsmp->hfs_transaction_nesting > 0);
3605
3606 if (hfsmp->jnl && hfsmp->hfs_transaction_nesting == 1)
3607 hfs_flushvolumeheader(hfsmp, HFS_FVH_FLUSH_IF_DIRTY);
3608
3609 bool need_unlock = !--hfsmp->hfs_transaction_nesting;
3610
3611 if (hfsmp->jnl) {
3612 ret = journal_end_transaction(hfsmp->jnl);
3613 } else {
3614 ret = 0;
3615 }
3616
3617 if (need_unlock) {
3618 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3619 hfs_unlock_global (hfsmp);
3620 hfs_sync_ejectable(hfsmp);
3621 }
3622
3623 return ret;
3624 }
3625
3626
3627 void
3628 hfs_journal_lock(struct hfsmount *hfsmp)
3629 {
3630 /* Only peek at hfsmp->jnl while holding the global lock */
3631 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3632 if (hfsmp->jnl) {
3633 journal_lock(hfsmp->jnl);
3634 }
3635 hfs_unlock_global (hfsmp);
3636 }
3637
3638 void
3639 hfs_journal_unlock(struct hfsmount *hfsmp)
3640 {
3641 /* Only peek at hfsmp->jnl while holding the global lock */
3642 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3643 if (hfsmp->jnl) {
3644 journal_unlock(hfsmp->jnl);
3645 }
3646 hfs_unlock_global (hfsmp);
3647 }
3648
3649 /*
3650 * Flush the contents of the journal to the disk.
3651 *
3652 * - HFS_FLUSH_JOURNAL
3653 * Wait to write in-memory journal to the disk consistently.
3654 * This means that the journal still contains uncommitted
3655 * transactions and the file system metadata blocks in
3656 * the journal transactions might be written asynchronously
3657 * to the disk. But there is no guarantee that they are
3658 * written to the disk before returning to the caller.
3659 * Note that this option is sufficient for file system
3660 * data integrity as it guarantees consistent journal
3661 * content on the disk.
3662 *
3663 * - HFS_FLUSH_JOURNAL_META
3664 * Wait to write in-memory journal to the disk
3665 * consistently, and also wait to write all asynchronous
3666 * metadata blocks to its corresponding locations
3667 * consistently on the disk. This is overkill in normal
3668 * scenarios but is useful whenever the metadata blocks
3669 * are required to be consistent on-disk instead of
3670 * just the journalbeing consistent; like before live
3671 * verification and live volume resizing. The update of the
3672 * metadata doesn't include a barrier of track cache flush.
3673 *
3674 * - HFS_FLUSH_FULL
3675 * HFS_FLUSH_JOURNAL + force a track cache flush to media
3676 *
3677 * - HFS_FLUSH_CACHE
3678 * Force a track cache flush to media.
3679 *
3680 * - HFS_FLUSH_BARRIER
3681 * Barrier-only flush to ensure write order
3682 *
3683 */
3684 errno_t hfs_flush(struct hfsmount *hfsmp, hfs_flush_mode_t mode)
3685 {
3686 errno_t error = 0;
3687 int options = 0;
3688 dk_synchronize_t sync_req = { .options = DK_SYNCHRONIZE_OPTION_BARRIER };
3689
3690 switch (mode) {
3691 case HFS_FLUSH_JOURNAL_META:
3692 // wait for journal, metadata blocks and previous async flush to finish
3693 SET(options, JOURNAL_WAIT_FOR_IO);
3694
3695 // no break
3696
3697 case HFS_FLUSH_JOURNAL:
3698 case HFS_FLUSH_JOURNAL_BARRIER:
3699 case HFS_FLUSH_FULL:
3700
3701 if (mode == HFS_FLUSH_JOURNAL_BARRIER &&
3702 !(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3703 mode = HFS_FLUSH_FULL;
3704
3705 if (mode == HFS_FLUSH_FULL)
3706 SET(options, JOURNAL_FLUSH_FULL);
3707
3708 /* Only peek at hfsmp->jnl while holding the global lock */
3709 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3710
3711 if (hfsmp->jnl)
3712 error = journal_flush(hfsmp->jnl, options);
3713
3714 hfs_unlock_global (hfsmp);
3715
3716 /*
3717 * This may result in a double barrier as
3718 * journal_flush may have issued a barrier itself
3719 */
3720 if (mode == HFS_FLUSH_JOURNAL_BARRIER)
3721 error = VNOP_IOCTL(hfsmp->hfs_devvp,
3722 DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3723 FWRITE, NULL);
3724
3725 break;
3726
3727 case HFS_FLUSH_CACHE:
3728 // Do a full sync
3729 sync_req.options = 0;
3730
3731 // no break
3732
3733 case HFS_FLUSH_BARRIER:
3734 // If barrier only flush doesn't support, fall back to use full flush.
3735 if (!(hfsmp->hfs_flags & HFS_FEATURE_BARRIER))
3736 sync_req.options = 0;
3737
3738 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZE, (caddr_t)&sync_req,
3739 FWRITE, NULL);
3740 break;
3741
3742 default:
3743 error = EINVAL;
3744 }
3745
3746 return error;
3747 }
3748
3749 /*
3750 * hfs_erase_unused_nodes
3751 *
3752 * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3753 * are not zeroed (due to <rdar://problem/6947811>). If so, just write
3754 * zeroes to the unused nodes.
3755 *
3756 * How do we detect when a volume needs this repair? We can't always be
3757 * certain. If a volume was created after a certain date, then it may have
3758 * been created with the faulty newfs_hfs. Since newfs_hfs only created one
3759 * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3760 * that means that the entire first clump must have been written to, which means
3761 * there shouldn't be unused and unwritten nodes in that first clump, and this
3762 * repair is not needed.
3763 *
3764 * We have defined a bit in the Volume Header's attributes to indicate when the
3765 * unused nodes have been repaired. A newer newfs_hfs will set this bit.
3766 * As will fsck_hfs when it repairs the unused nodes.
3767 */
3768 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3769 {
3770 int result;
3771 struct filefork *catalog;
3772 int lockflags;
3773
3774 if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3775 {
3776 /* This volume has already been checked and repaired. */
3777 return 0;
3778 }
3779
3780 if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3781 {
3782 /* This volume is too old to have had the problem. */
3783 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3784 return 0;
3785 }
3786
3787 catalog = hfsmp->hfs_catalog_cp->c_datafork;
3788 if (catalog->ff_size > catalog->ff_clumpsize)
3789 {
3790 /* The entire first clump must have been in use at some point. */
3791 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3792 return 0;
3793 }
3794
3795 /*
3796 * If we get here, we need to zero out those unused nodes.
3797 *
3798 * We start a transaction and lock the catalog since we're going to be
3799 * making on-disk changes. But note that BTZeroUnusedNodes doens't actually
3800 * do its writing via the journal, because that would be too much I/O
3801 * to fit in a transaction, and it's a pain to break it up into multiple
3802 * transactions. (It behaves more like growing a B-tree would.)
3803 */
3804 printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3805 result = hfs_start_transaction(hfsmp);
3806 if (result)
3807 goto done;
3808 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3809 result = BTZeroUnusedNodes(catalog);
3810 vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3811 hfs_systemfile_unlock(hfsmp, lockflags);
3812 hfs_end_transaction(hfsmp);
3813 if (result == 0)
3814 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3815 printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3816
3817 done:
3818 return result;
3819 }
3820
3821
3822 int
3823 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3824 {
3825 int error;
3826
3827 if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || decmpfs_cnode_cmp_type(VTOCMP(vp)) != DATALESS_CMPFS_TYPE) {
3828 // there's nothing to do, it's not dataless
3829 return 0;
3830 }
3831
3832 /* Swap files are special; ignore them */
3833 if (vnode_isswap(vp)) {
3834 return 0;
3835 }
3836
3837 // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3838 error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3839 if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3840 error = 0;
3841 } else if (error) {
3842 if (error == EAGAIN) {
3843 printf("hfs: dataless: timed out waiting for namespace handler...\n");
3844 // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3845 return 0;
3846 } else if (error == EINTR) {
3847 // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3848 return EINTR;
3849 }
3850 } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3851 //
3852 // if we're here, the dataless bit is still set on the file
3853 // which means it didn't get handled. we return an error
3854 // but it's presently ignored by all callers of this function.
3855 //
3856 // XXXdbg - EDATANOTPRESENT is what we really need...
3857 //
3858 return EBADF;
3859 }
3860
3861 return error;
3862 }
3863
3864
3865 //
3866 // NOTE: this function takes care of starting a transaction and
3867 // acquiring the systemfile lock so that it can call
3868 // cat_update().
3869 //
3870 // NOTE: do NOT hold and cnode locks while calling this function
3871 // to avoid deadlocks (because we take a lock on the root
3872 // cnode)
3873 //
3874 int
3875 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3876 {
3877 struct vnode *rvp;
3878 struct cnode *cp;
3879 int error;
3880
3881 error = hfs_vfs_root(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3882 if (error) {
3883 return error;
3884 }
3885
3886 cp = VTOC(rvp);
3887 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3888 return error;
3889 }
3890 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3891
3892 int lockflags;
3893 if ((error = hfs_start_transaction(hfsmp)) != 0) {
3894 return error;
3895 }
3896 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3897
3898 if (extinfo->document_id == 0) {
3899 // initialize this to start at 3 (one greater than the root-dir id)
3900 extinfo->document_id = 3;
3901 }
3902
3903 *docid = extinfo->document_id++;
3904
3905 // mark the root cnode dirty
3906 cp->c_flag |= C_MODIFIED;
3907 hfs_update(cp->c_vp, 0);
3908
3909 hfs_systemfile_unlock (hfsmp, lockflags);
3910 (void) hfs_end_transaction(hfsmp);
3911
3912 (void) hfs_unlock(cp);
3913
3914 vnode_put(rvp);
3915 rvp = NULL;
3916
3917 return 0;
3918 }
3919
3920
3921 /*
3922 * Return information about number of file system allocation blocks
3923 * taken by metadata on a volume.
3924 *
3925 * This function populates struct hfsinfo_metadata with allocation blocks
3926 * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3927 * journal file, and sum of all of the above.
3928 */
3929 int
3930 hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3931 {
3932 int lockflags = 0;
3933 int ret_lockflags = 0;
3934
3935 /* Zero out the output buffer */
3936 bzero(hinfo, sizeof(struct hfsinfo_metadata));
3937
3938 /*
3939 * Getting number of allocation blocks for all btrees
3940 * should be a quick operation, so we grab locks for
3941 * all of them at the same time
3942 */
3943 lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3944 ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3945 /*
3946 * Make sure that we were able to acquire all locks requested
3947 * to protect us against conditions like unmount in progress.
3948 */
3949 if ((lockflags & ret_lockflags) != lockflags) {
3950 /* Release any locks that were acquired */
3951 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3952 return EPERM;
3953 }
3954
3955 /* Get information about all the btrees */
3956 hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3957 hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3958 hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3959 hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3960
3961 /* Done with btrees, give up the locks */
3962 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3963
3964 /* Get information about journal file */
3965 hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3966
3967 /* Calculate total number of metadata blocks */
3968 hinfo->total = hinfo->extents + hinfo->catalog +
3969 hinfo->allocation + hinfo->attribute +
3970 hinfo->journal;
3971
3972 return 0;
3973 }
3974
3975 static int
3976 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3977 {
3978 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3979
3980 return 0;
3981 }
3982
3983 int hfs_freeze(struct hfsmount *hfsmp)
3984 {
3985 // First make sure some other process isn't freezing
3986 hfs_lock_mount(hfsmp);
3987 while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3988 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3989 PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3990 hfs_unlock_mount(hfsmp);
3991 return EINTR;
3992 }
3993 }
3994
3995 // Stop new syncers from starting
3996 hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3997
3998 // Now wait for all syncers to finish
3999 while (hfsmp->hfs_syncers) {
4000 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
4001 PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
4002 hfs_thaw_locked(hfsmp);
4003 hfs_unlock_mount(hfsmp);
4004 return EINTR;
4005 }
4006 }
4007 hfs_unlock_mount(hfsmp);
4008
4009 // flush things before we get started to try and prevent
4010 // dirty data from being paged out while we're frozen.
4011 // note: we can't do this once we're in the freezing state because
4012 // other threads will need to take the global lock
4013 vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
4014
4015 // Block everything in hfs_lock_global now
4016 hfs_lock_mount(hfsmp);
4017 hfsmp->hfs_freeze_state = HFS_FREEZING;
4018 hfsmp->hfs_freezing_thread = current_thread();
4019 hfs_unlock_mount(hfsmp);
4020
4021 /* Take the exclusive lock to flush out anything else that
4022 might have the global lock at the moment and also so we
4023 can flush the journal. */
4024 hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
4025 journal_flush(hfsmp->jnl, JOURNAL_WAIT_FOR_IO);
4026 hfs_unlock_global(hfsmp);
4027
4028 // don't need to iterate on all vnodes, we just need to
4029 // wait for writes to the system files and the device vnode
4030 //
4031 // Now that journal flush waits for all metadata blocks to
4032 // be written out, waiting for btree writes is probably no
4033 // longer required.
4034 if (HFSTOVCB(hfsmp)->extentsRefNum)
4035 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
4036 if (HFSTOVCB(hfsmp)->catalogRefNum)
4037 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
4038 if (HFSTOVCB(hfsmp)->allocationsRefNum)
4039 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
4040 if (hfsmp->hfs_attribute_vp)
4041 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
4042 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
4043
4044 // We're done, mark frozen
4045 hfs_lock_mount(hfsmp);
4046 hfsmp->hfs_freeze_state = HFS_FROZEN;
4047 hfsmp->hfs_freezing_proc = current_proc();
4048 hfs_unlock_mount(hfsmp);
4049
4050 return 0;
4051 }
4052
4053 int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
4054 {
4055 hfs_lock_mount(hfsmp);
4056
4057 if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
4058 hfs_unlock_mount(hfsmp);
4059 return EINVAL;
4060 }
4061 if (process && hfsmp->hfs_freezing_proc != process) {
4062 hfs_unlock_mount(hfsmp);
4063 return EPERM;
4064 }
4065
4066 hfs_thaw_locked(hfsmp);
4067
4068 hfs_unlock_mount(hfsmp);
4069
4070 return 0;
4071 }
4072
4073 static void hfs_thaw_locked(struct hfsmount *hfsmp)
4074 {
4075 hfsmp->hfs_freezing_proc = NULL;
4076 hfsmp->hfs_freeze_state = HFS_THAWED;
4077
4078 wakeup(&hfsmp->hfs_freeze_state);
4079 }
4080
4081 uintptr_t obfuscate_addr(void *addr)
4082 {
4083 vm_offset_t new_addr;
4084 vm_kernel_addrperm_external((vm_offset_t)addr, &new_addr);
4085 return new_addr;
4086 }
4087
4088 #if CONFIG_HFS_STD
4089 /*
4090 * Convert HFS encoded string into UTF-8
4091 *
4092 * Unicode output is fully decomposed
4093 * '/' chars are converted to ':'
4094 */
4095 int
4096 hfs_to_utf8(ExtendedVCB *vcb, const Str31 hfs_str, ByteCount maxDstLen, ByteCount *actualDstLen, unsigned char* dstStr)
4097 {
4098 int error;
4099 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
4100 ItemCount uniCount;
4101 size_t utf8len;
4102 hfs_to_unicode_func_t hfs_get_unicode = VCBTOHFS(vcb)->hfs_get_unicode;
4103 u_int8_t pascal_length = 0;
4104
4105 /*
4106 * Validate the length of the Pascal-style string before passing it
4107 * down to the decoding engine.
4108 */
4109 pascal_length = *((const u_int8_t*)(hfs_str));
4110 if (pascal_length > 31) {
4111 /* invalid string; longer than 31 bytes */
4112 error = EINVAL;
4113 return error;
4114 }
4115
4116 error = hfs_get_unicode(hfs_str, uniStr, MAX_HFS_UNICODE_CHARS, &uniCount);
4117
4118 if (uniCount == 0)
4119 error = EINVAL;
4120
4121 if (error == 0) {
4122 error = utf8_encodestr(uniStr, uniCount * sizeof(UniChar), dstStr, &utf8len, maxDstLen , ':', 0);
4123 if (error == ENAMETOOLONG)
4124 *actualDstLen = utf8_encodelen(uniStr, uniCount * sizeof(UniChar), ':', 0);
4125 else
4126 *actualDstLen = utf8len;
4127 }
4128
4129 return error;
4130 }
4131
4132 /*
4133 * Convert UTF-8 string into HFS encoding
4134 *
4135 * ':' chars are converted to '/'
4136 * Assumes input represents fully decomposed Unicode
4137 */
4138 int
4139 utf8_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, const unsigned char* srcStr, Str31 dstStr/*, int retry*/)
4140 {
4141 int error;
4142 UniChar uniStr[MAX_HFS_UNICODE_CHARS];
4143 size_t ucslen;
4144
4145 error = utf8_decodestr(srcStr, srcLen, uniStr, &ucslen, sizeof(uniStr), ':', 0);
4146 if (error == 0)
4147 error = unicode_to_hfs(vcb, ucslen, uniStr, dstStr, 1);
4148
4149 return error;
4150 }
4151
4152 /*
4153 * Convert Unicode string into HFS encoding
4154 *
4155 * ':' chars are converted to '/'
4156 * Assumes input represents fully decomposed Unicode
4157 */
4158 int
4159 unicode_to_hfs(ExtendedVCB *vcb, ByteCount srcLen, u_int16_t* srcStr, Str31 dstStr, int retry)
4160 {
4161 int error;
4162 unicode_to_hfs_func_t hfs_get_hfsname = VCBTOHFS(vcb)->hfs_get_hfsname;
4163
4164 error = hfs_get_hfsname(srcStr, srcLen/sizeof(UniChar), dstStr);
4165 if (error && retry) {
4166 error = unicode_to_mac_roman(srcStr, srcLen/sizeof(UniChar), dstStr);
4167 }
4168 return error;
4169 }
4170
4171 #endif // CONFIG_HFS_STD
4172
4173 static uint64_t hfs_allocated __attribute__((aligned(8)));
4174
4175 #if HFS_MALLOC_DEBUG
4176
4177 #warning HFS_MALLOC_DEBUG is on
4178
4179 #include <libkern/OSDebug.h>
4180 #include "hfs_alloc_trace.h"
4181
4182 struct alloc_debug_header {
4183 uint32_t magic;
4184 uint32_t size;
4185 uint64_t sequence;
4186 LIST_ENTRY(alloc_debug_header) chain;
4187 void *backtrace[HFS_ALLOC_BACKTRACE_LEN];
4188 };
4189
4190 enum {
4191 HFS_ALLOC_MAGIC = 0x68667361, // "hfsa"
4192 HFS_ALLOC_DEAD = 0x68667364, // "hfsd"
4193 };
4194
4195 static LIST_HEAD(, alloc_debug_header) hfs_alloc_list;
4196 static lck_mtx_t *hfs_alloc_mtx;
4197 static int hfs_alloc_tracing;
4198 static uint64_t hfs_alloc_sequence;
4199
4200 void hfs_alloc_trace_enable(void)
4201 {
4202 if (hfs_alloc_tracing)
4203 return;
4204
4205 // Not thread-safe, but this is debug so who cares
4206 extern lck_grp_t *hfs_mutex_group;
4207 extern lck_attr_t *hfs_lock_attr;
4208
4209 if (!hfs_alloc_mtx) {
4210 hfs_alloc_mtx = lck_mtx_alloc_init(hfs_mutex_group, hfs_lock_attr);
4211 LIST_INIT(&hfs_alloc_list);
4212 }
4213
4214 // Using OSCompareAndSwap in lieu of a barrier
4215 OSCompareAndSwap(hfs_alloc_tracing, true, &hfs_alloc_tracing);
4216 }
4217
4218 void hfs_alloc_trace_disable(void)
4219 {
4220 if (!hfs_alloc_tracing)
4221 return;
4222
4223 hfs_alloc_tracing = false;
4224
4225 lck_mtx_lock_spin(hfs_alloc_mtx);
4226
4227 struct alloc_debug_header *hdr;
4228 LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4229 hdr->chain.le_prev = NULL;
4230 }
4231 LIST_INIT(&hfs_alloc_list);
4232
4233 lck_mtx_unlock(hfs_alloc_mtx);
4234 }
4235
4236 static int hfs_handle_alloc_tracing SYSCTL_HANDLER_ARGS
4237 {
4238 int v = hfs_alloc_tracing;
4239
4240 int err = sysctl_handle_int(oidp, &v, 0, req);
4241 if (err || req->newptr == USER_ADDR_NULL || v == hfs_alloc_tracing)
4242 return err;
4243
4244 if (v)
4245 hfs_alloc_trace_enable();
4246 else
4247 hfs_alloc_trace_disable();
4248
4249 return 0;
4250 }
4251
4252 HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_tracing,
4253 CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED, NULL, 0,
4254 hfs_handle_alloc_tracing, "I", "Allocation tracing")
4255
4256 static int hfs_handle_alloc_trace_info SYSCTL_HANDLER_ARGS
4257 {
4258 if (!hfs_alloc_tracing) {
4259 struct hfs_alloc_trace_info info = {};
4260 return sysctl_handle_opaque(oidp, &info, sizeof(info), req);
4261 }
4262
4263 const int size = 128 * 1024;
4264 struct hfs_alloc_trace_info *info = kalloc(size);
4265
4266 const int max_entries = ((size - sizeof(*info))
4267 / sizeof(struct hfs_alloc_info_entry));
4268
4269 info->entry_count = 0;
4270 info->more = false;
4271
4272 lck_mtx_lock_spin(hfs_alloc_mtx);
4273
4274 struct alloc_debug_header *hdr;
4275 LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4276 if (info->entry_count == max_entries) {
4277 info->more = true;
4278 break;
4279 }
4280 vm_offset_t o;
4281 vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
4282 info->entries[info->entry_count].ptr = o;
4283 info->entries[info->entry_count].size = hdr->size;
4284 info->entries[info->entry_count].sequence = hdr->sequence;
4285 for (int i = 0; i < HFS_ALLOC_BACKTRACE_LEN; ++i) {
4286 vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[i], &o);
4287 info->entries[info->entry_count].backtrace[i] = o;
4288 }
4289 ++info->entry_count;
4290 }
4291
4292 lck_mtx_unlock(hfs_alloc_mtx);
4293
4294 int err = sysctl_handle_opaque(oidp, info,
4295 sizeof(*info) + info->entry_count
4296 * sizeof(struct hfs_alloc_info_entry),
4297 req);
4298
4299 kfree(info, size);
4300
4301 return err;
4302 }
4303
4304 HFS_SYSCTL(PROC, _vfs_generic_hfs, OID_AUTO, alloc_trace_info,
4305 CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_LOCKED, NULL, 0,
4306 hfs_handle_alloc_trace_info, "-", "Allocation trace info")
4307
4308 bool hfs_dump_allocations(void)
4309 {
4310 if (!hfs_allocated)
4311 return false;
4312
4313 lck_mtx_lock(hfs_alloc_mtx);
4314
4315 struct alloc_debug_header *hdr;
4316 LIST_FOREACH(hdr, &hfs_alloc_list, chain) {
4317 vm_offset_t o;
4318 vm_kernel_addrperm_external((vm_offset_t)hdr, &o);
4319 printf(" -- 0x%lx:%llu <%u> --\n", o, hdr->sequence, hdr->size);
4320 for (int j = 0; j < HFS_ALLOC_BACKTRACE_LEN && hdr->backtrace[j]; ++j) {
4321 vm_kernel_unslide_or_perm_external((vm_offset_t)hdr->backtrace[j], &o);
4322 printf("0x%lx\n", o);
4323 }
4324 }
4325
4326 lck_mtx_unlock(hfs_alloc_mtx);
4327
4328 return true;
4329 }
4330
4331 #endif
4332
4333 HFS_SYSCTL(QUAD, _vfs_generic_hfs, OID_AUTO, allocated,
4334 CTLFLAG_RD | CTLFLAG_LOCKED, &hfs_allocated, "Memory allocated")
4335
4336 void *hfs_malloc(size_t size)
4337 {
4338 #if HFS_MALLOC_DEBUG
4339 hfs_assert(size <= 0xffffffff);
4340
4341 struct alloc_debug_header *hdr;
4342
4343 void *ptr;
4344 ptr = kalloc(size + sizeof(*hdr));
4345
4346 hdr = ptr + size;
4347
4348 hdr->magic = HFS_ALLOC_MAGIC;
4349 hdr->size = size;
4350
4351 if (hfs_alloc_tracing) {
4352 OSBacktrace(hdr->backtrace, HFS_ALLOC_BACKTRACE_LEN);
4353 lck_mtx_lock_spin(hfs_alloc_mtx);
4354 LIST_INSERT_HEAD(&hfs_alloc_list, hdr, chain);
4355 hdr->sequence = ++hfs_alloc_sequence;
4356 lck_mtx_unlock(hfs_alloc_mtx);
4357 } else
4358 hdr->chain.le_prev = NULL;
4359 #else
4360 void *ptr;
4361 ptr = kalloc(size);
4362 #endif
4363
4364 OSAddAtomic64(size, &hfs_allocated);
4365
4366 return ptr;
4367 }
4368
4369 void hfs_free(void *ptr, size_t size)
4370 {
4371 if (!ptr)
4372 return;
4373
4374 OSAddAtomic64(-(int64_t)size, &hfs_allocated);
4375
4376 #if HFS_MALLOC_DEBUG
4377 struct alloc_debug_header *hdr = ptr + size;
4378
4379 hfs_assert(hdr->magic == HFS_ALLOC_MAGIC);
4380 hfs_assert(hdr->size == size);
4381
4382 hdr->magic = HFS_ALLOC_DEAD;
4383
4384 if (hdr->chain.le_prev) {
4385 lck_mtx_lock_spin(hfs_alloc_mtx);
4386 LIST_REMOVE(hdr, chain);
4387 lck_mtx_unlock(hfs_alloc_mtx);
4388 }
4389
4390 kfree(ptr, size + sizeof(*hdr));
4391 #else
4392 kfree(ptr, size);
4393 #endif
4394 }
4395
4396 void *hfs_mallocz(size_t size)
4397 {
4398 void *ptr = hfs_malloc(size);
4399 bzero(ptr, size);
4400 return ptr;
4401 }
4402
4403 // -- Zone allocator-related structures and routines --
4404
4405 hfs_zone_entry_t hfs_zone_entries[HFS_NUM_ZONES] = {
4406 { HFS_CNODE_ZONE, sizeof(struct cnode), "HFS node" },
4407 { HFS_FILEFORK_ZONE, sizeof(struct filefork), "HFS fork" },
4408 { HFS_DIRHINT_ZONE, sizeof(struct directoryhint), "HFS dirhint" }
4409 };
4410
4411 hfs_zone_t hfs_zones[HFS_NUM_ZONES];
4412
4413 void hfs_init_zones(void) {
4414 for (int i = 0; i < HFS_NUM_ZONES; i++) {
4415 hfs_zones[i].hz_zone = zone_create(hfs_zone_entries[i].hze_name,
4416 hfs_zone_entries[i].hze_elem_size, ZC_NOENCRYPT);
4417 }
4418 }
4419
4420 void *hfs_zalloc(hfs_zone_kind_t zone)
4421 {
4422 OSAddAtomic64(hfs_zones[zone].hz_elem_size, &hfs_allocated);
4423
4424 return zalloc(hfs_zones[zone].hz_zone);
4425 }
4426
4427 void hfs_zfree(void *ptr, hfs_zone_kind_t zone)
4428 {
4429 OSAddAtomic64(-(int64_t)hfs_zones[zone].hz_elem_size, &hfs_allocated);
4430
4431 zfree(hfs_zones[zone].hz_zone, ptr);
4432 }
4433
4434 struct hfs_sysctl_chain *sysctl_list;
4435
4436 void hfs_sysctl_register(void)
4437 {
4438 struct hfs_sysctl_chain *e = sysctl_list;
4439 while (e) {
4440 sysctl_register_oid(e->oid);
4441 e = e->next;
4442 }
4443 }
4444
4445 void hfs_sysctl_unregister(void)
4446 {
4447 struct hfs_sysctl_chain *e = sysctl_list;
4448 while (e) {
4449 sysctl_unregister_oid(e->oid);
4450 e = e->next;
4451 }
4452 }
4453
4454 void hfs_assert_fail(const char *file, unsigned line, const char *expr)
4455 {
4456 Assert(file, line, expr);
4457 __builtin_unreachable();
4458 }