]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_vfsutils.c
xnu-2782.10.72.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_vfsutils.c
1 /*
2 * Copyright (c) 2000-2014 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_vfsutils.c 4.0
29 *
30 * (c) 1997-2002 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_vfsutils.c -- Routines that go between the HFS layer and the VFS.
33 *
34 */
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/stat.h>
40 #include <sys/mount.h>
41 #include <sys/mount_internal.h>
42 #include <sys/buf.h>
43 #include <sys/buf_internal.h>
44 #include <sys/ubc.h>
45 #include <sys/unistd.h>
46 #include <sys/utfconv.h>
47 #include <sys/kauth.h>
48 #include <sys/fcntl.h>
49 #include <sys/fsctl.h>
50 #include <sys/vnode_internal.h>
51 #include <kern/clock.h>
52 #include <stdbool.h>
53
54 #include <libkern/OSAtomic.h>
55
56 /* for parsing boot-args */
57 #include <pexpert/pexpert.h>
58
59 #if CONFIG_PROTECT
60 #include <sys/cprotect.h>
61 #endif
62
63 #include "hfs.h"
64 #include "hfs_catalog.h"
65 #include "hfs_dbg.h"
66 #include "hfs_mount.h"
67 #include "hfs_endian.h"
68 #include "hfs_cnode.h"
69 #include "hfs_fsctl.h"
70
71 #include "hfscommon/headers/FileMgrInternal.h"
72 #include "hfscommon/headers/BTreesInternal.h"
73 #include "hfscommon/headers/HFSUnicodeWrappers.h"
74
75 /* Enable/disable debugging code for live volume resizing, defined in hfs_resize.c */
76 extern int hfs_resize_debug;
77
78 static void ReleaseMetaFileVNode(struct vnode *vp);
79 static int hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args);
80
81 static u_int32_t hfs_hotfile_freeblocks(struct hfsmount *);
82 static void hfs_thaw_locked(struct hfsmount *hfsmp);
83
84 #define HFS_MOUNT_DEBUG 1
85
86
87 //*******************************************************************************
88 // Note: Finder information in the HFS/HFS+ metadata are considered opaque and
89 // hence are not in the right byte order on little endian machines. It is
90 // the responsibility of the finder and other clients to swap the data.
91 //*******************************************************************************
92
93 //*******************************************************************************
94 // Routine: hfs_MountHFSVolume
95 //
96 //
97 //*******************************************************************************
98 unsigned char hfs_catname[] = "Catalog B-tree";
99 unsigned char hfs_extname[] = "Extents B-tree";
100 unsigned char hfs_vbmname[] = "Volume Bitmap";
101 unsigned char hfs_attrname[] = "Attribute B-tree";
102 unsigned char hfs_startupname[] = "Startup File";
103
104 #if CONFIG_HFS_STD
105 OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
106 __unused struct proc *p)
107 {
108 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
109 int error;
110 ByteCount utf8chars;
111 struct cat_desc cndesc;
112 struct cat_attr cnattr;
113 struct cat_fork fork;
114 int newvnode_flags = 0;
115
116 /* Block size must be a multiple of 512 */
117 if (SWAP_BE32(mdb->drAlBlkSiz) == 0 ||
118 (SWAP_BE32(mdb->drAlBlkSiz) & 0x01FF) != 0)
119 return (EINVAL);
120
121 /* don't mount a writeable volume if its dirty, it must be cleaned by fsck_hfs */
122 if (((hfsmp->hfs_flags & HFS_READ_ONLY) == 0) &&
123 ((SWAP_BE16(mdb->drAtrb) & kHFSVolumeUnmountedMask) == 0)) {
124 return (EINVAL);
125 }
126 hfsmp->hfs_flags |= HFS_STANDARD;
127 /*
128 * The MDB seems OK: transfer info from it into VCB
129 * Note - the VCB starts out clear (all zeros)
130 *
131 */
132 vcb->vcbSigWord = SWAP_BE16 (mdb->drSigWord);
133 vcb->hfs_itime = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drCrDate)));
134 vcb->localCreateDate = SWAP_BE32 (mdb->drCrDate);
135 vcb->vcbLsMod = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drLsMod)));
136 vcb->vcbAtrb = SWAP_BE16 (mdb->drAtrb);
137 vcb->vcbNmFls = SWAP_BE16 (mdb->drNmFls);
138 vcb->vcbVBMSt = SWAP_BE16 (mdb->drVBMSt);
139 vcb->nextAllocation = SWAP_BE16 (mdb->drAllocPtr);
140 vcb->totalBlocks = SWAP_BE16 (mdb->drNmAlBlks);
141 vcb->allocLimit = vcb->totalBlocks;
142 vcb->blockSize = SWAP_BE32 (mdb->drAlBlkSiz);
143 vcb->vcbClpSiz = SWAP_BE32 (mdb->drClpSiz);
144 vcb->vcbAlBlSt = SWAP_BE16 (mdb->drAlBlSt);
145 vcb->vcbNxtCNID = SWAP_BE32 (mdb->drNxtCNID);
146 vcb->freeBlocks = SWAP_BE16 (mdb->drFreeBks);
147 vcb->vcbVolBkUp = to_bsd_time(LocalToUTC(SWAP_BE32(mdb->drVolBkUp)));
148 vcb->vcbWrCnt = SWAP_BE32 (mdb->drWrCnt);
149 vcb->vcbNmRtDirs = SWAP_BE16 (mdb->drNmRtDirs);
150 vcb->vcbFilCnt = SWAP_BE32 (mdb->drFilCnt);
151 vcb->vcbDirCnt = SWAP_BE32 (mdb->drDirCnt);
152 bcopy(mdb->drFndrInfo, vcb->vcbFndrInfo, sizeof(vcb->vcbFndrInfo));
153 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
154 vcb->vcbWrCnt++; /* Compensate for write of MDB on last flush */
155
156 /* convert hfs encoded name into UTF-8 string */
157 error = hfs_to_utf8(vcb, mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
158 /*
159 * When an HFS name cannot be encoded with the current
160 * volume encoding we use MacRoman as a fallback.
161 */
162 if (error || (utf8chars == 0)) {
163 error = mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
164 /* If we fail to encode to UTF8 from Mac Roman, the name is bad. Deny the mount */
165 if (error) {
166 goto MtVolErr;
167 }
168 }
169
170 hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
171 vcb->vcbVBMIOSize = kHFSBlockSize;
172
173 /* Generate the partition-based AVH location */
174 hfsmp->hfs_partition_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
175 hfsmp->hfs_logical_block_count);
176
177 /* HFS standard is read-only, so just stuff the FS location in here, too */
178 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
179
180 bzero(&cndesc, sizeof(cndesc));
181 cndesc.cd_parentcnid = kHFSRootParentID;
182 cndesc.cd_flags |= CD_ISMETA;
183 bzero(&cnattr, sizeof(cnattr));
184 cnattr.ca_linkcount = 1;
185 cnattr.ca_mode = S_IFREG;
186 bzero(&fork, sizeof(fork));
187
188 /*
189 * Set up Extents B-tree vnode
190 */
191 cndesc.cd_nameptr = hfs_extname;
192 cndesc.cd_namelen = strlen((char *)hfs_extname);
193 cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
194 fork.cf_size = SWAP_BE32(mdb->drXTFlSize);
195 fork.cf_blocks = fork.cf_size / vcb->blockSize;
196 fork.cf_clump = SWAP_BE32(mdb->drXTClpSiz);
197 fork.cf_vblocks = 0;
198 fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drXTExtRec[0].startBlock);
199 fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drXTExtRec[0].blockCount);
200 fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drXTExtRec[1].startBlock);
201 fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drXTExtRec[1].blockCount);
202 fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drXTExtRec[2].startBlock);
203 fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drXTExtRec[2].blockCount);
204 cnattr.ca_blocks = fork.cf_blocks;
205
206 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
207 &hfsmp->hfs_extents_vp, &newvnode_flags);
208 if (error) {
209 if (HFS_MOUNT_DEBUG) {
210 printf("hfs_mounthfs (std): error creating Ext Vnode (%d) \n", error);
211 }
212 goto MtVolErr;
213 }
214 error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
215 (KeyCompareProcPtr)CompareExtentKeys));
216 if (error) {
217 if (HFS_MOUNT_DEBUG) {
218 printf("hfs_mounthfs (std): error opening Ext Vnode (%d) \n", error);
219 }
220 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
221 goto MtVolErr;
222 }
223 hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
224
225 /*
226 * Set up Catalog B-tree vnode...
227 */
228 cndesc.cd_nameptr = hfs_catname;
229 cndesc.cd_namelen = strlen((char *)hfs_catname);
230 cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
231 fork.cf_size = SWAP_BE32(mdb->drCTFlSize);
232 fork.cf_blocks = fork.cf_size / vcb->blockSize;
233 fork.cf_clump = SWAP_BE32(mdb->drCTClpSiz);
234 fork.cf_vblocks = 0;
235 fork.cf_extents[0].startBlock = SWAP_BE16(mdb->drCTExtRec[0].startBlock);
236 fork.cf_extents[0].blockCount = SWAP_BE16(mdb->drCTExtRec[0].blockCount);
237 fork.cf_extents[1].startBlock = SWAP_BE16(mdb->drCTExtRec[1].startBlock);
238 fork.cf_extents[1].blockCount = SWAP_BE16(mdb->drCTExtRec[1].blockCount);
239 fork.cf_extents[2].startBlock = SWAP_BE16(mdb->drCTExtRec[2].startBlock);
240 fork.cf_extents[2].blockCount = SWAP_BE16(mdb->drCTExtRec[2].blockCount);
241 cnattr.ca_blocks = fork.cf_blocks;
242
243 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
244 &hfsmp->hfs_catalog_vp, &newvnode_flags);
245 if (error) {
246 if (HFS_MOUNT_DEBUG) {
247 printf("hfs_mounthfs (std): error creating catalog Vnode (%d) \n", error);
248 }
249 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
250 goto MtVolErr;
251 }
252 error = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
253 (KeyCompareProcPtr)CompareCatalogKeys));
254 if (error) {
255 if (HFS_MOUNT_DEBUG) {
256 printf("hfs_mounthfs (std): error opening catalog Vnode (%d) \n", error);
257 }
258 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
259 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
260 goto MtVolErr;
261 }
262 hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
263
264 /*
265 * Set up dummy Allocation file vnode (used only for locking bitmap)
266 */
267 cndesc.cd_nameptr = hfs_vbmname;
268 cndesc.cd_namelen = strlen((char *)hfs_vbmname);
269 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
270 bzero(&fork, sizeof(fork));
271 cnattr.ca_blocks = 0;
272
273 error = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &fork,
274 &hfsmp->hfs_allocation_vp, &newvnode_flags);
275 if (error) {
276 if (HFS_MOUNT_DEBUG) {
277 printf("hfs_mounthfs (std): error creating bitmap Vnode (%d) \n", error);
278 }
279 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
280 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
281 goto MtVolErr;
282 }
283 hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
284
285 /* mark the volume dirty (clear clean unmount bit) */
286 vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
287
288 if (error == noErr) {
289 error = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, NULL, NULL, NULL);
290 if (HFS_MOUNT_DEBUG) {
291 printf("hfs_mounthfs (std): error looking up root folder (%d) \n", error);
292 }
293 }
294
295 if (error == noErr) {
296 /* If the disk isn't write protected.. */
297 if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask)) {
298 MarkVCBDirty (vcb); // mark VCB dirty so it will be written
299 }
300 }
301
302 /*
303 * all done with system files so we can unlock now...
304 */
305 hfs_unlock(VTOC(hfsmp->hfs_allocation_vp));
306 hfs_unlock(VTOC(hfsmp->hfs_catalog_vp));
307 hfs_unlock(VTOC(hfsmp->hfs_extents_vp));
308
309 if (error == noErr) {
310 /* If successful, then we can just return once we've unlocked the cnodes */
311 return error;
312 }
313
314 //-- Release any resources allocated so far before exiting with an error:
315 MtVolErr:
316 hfsUnmount(hfsmp, NULL);
317
318 return (error);
319 }
320
321 #endif
322
323 //*******************************************************************************
324 // Routine: hfs_MountHFSPlusVolume
325 //
326 //
327 //*******************************************************************************
328
329 OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
330 off_t embeddedOffset, u_int64_t disksize, __unused struct proc *p, void *args, kauth_cred_t cred)
331 {
332 register ExtendedVCB *vcb;
333 struct cat_desc cndesc;
334 struct cat_attr cnattr;
335 struct cat_fork cfork;
336 u_int32_t blockSize;
337 daddr64_t spare_sectors;
338 struct BTreeInfoRec btinfo;
339 u_int16_t signature;
340 u_int16_t hfs_version;
341 int newvnode_flags = 0;
342 int i;
343 OSErr retval;
344 char converted_volname[256];
345 size_t volname_length = 0;
346 size_t conv_volname_length = 0;
347
348 signature = SWAP_BE16(vhp->signature);
349 hfs_version = SWAP_BE16(vhp->version);
350
351 if (signature == kHFSPlusSigWord) {
352 if (hfs_version != kHFSPlusVersion) {
353 printf("hfs_mount: invalid HFS+ version: %x\n", hfs_version);
354 return (EINVAL);
355 }
356 } else if (signature == kHFSXSigWord) {
357 if (hfs_version != kHFSXVersion) {
358 printf("hfs_mount: invalid HFSX version: %x\n", hfs_version);
359 return (EINVAL);
360 }
361 /* The in-memory signature is always 'H+'. */
362 signature = kHFSPlusSigWord;
363 hfsmp->hfs_flags |= HFS_X;
364 } else {
365 /* Removed printf for invalid HFS+ signature because it gives
366 * false error for UFS root volume
367 */
368 if (HFS_MOUNT_DEBUG) {
369 printf("hfs_mounthfsplus: unknown Volume Signature : %x\n", signature);
370 }
371 return (EINVAL);
372 }
373
374 /* Block size must be at least 512 and a power of 2 */
375 blockSize = SWAP_BE32(vhp->blockSize);
376 if (blockSize < 512 || !powerof2(blockSize)) {
377 if (HFS_MOUNT_DEBUG) {
378 printf("hfs_mounthfsplus: invalid blocksize (%d) \n", blockSize);
379 }
380 return (EINVAL);
381 }
382
383 /* don't mount a writable volume if its dirty, it must be cleaned by fsck_hfs */
384 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0 && hfsmp->jnl == NULL &&
385 (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) == 0) {
386 if (HFS_MOUNT_DEBUG) {
387 printf("hfs_mounthfsplus: cannot mount dirty non-journaled volumes\n");
388 }
389 return (EINVAL);
390 }
391
392 /* Make sure we can live with the physical block size. */
393 if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
394 (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
395 (blockSize < hfsmp->hfs_logical_block_size)) {
396 if (HFS_MOUNT_DEBUG) {
397 printf("hfs_mounthfsplus: invalid physical blocksize (%d), hfs_logical_blocksize (%d) \n",
398 blockSize, hfsmp->hfs_logical_block_size);
399 }
400 return (ENXIO);
401 }
402
403 /* If allocation block size is less than the physical
404 * block size, we assume that the physical block size
405 * is same as logical block size. The physical block
406 * size value is used to round down the offsets for
407 * reading and writing the primary and alternate volume
408 * headers at physical block boundary and will cause
409 * problems if it is less than the block size.
410 */
411 if (blockSize < hfsmp->hfs_physical_block_size) {
412 hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
413 hfsmp->hfs_log_per_phys = 1;
414 }
415
416 /*
417 * The VolumeHeader seems OK: transfer info from it into VCB
418 * Note - the VCB starts out clear (all zeros)
419 */
420 vcb = HFSTOVCB(hfsmp);
421
422 vcb->vcbSigWord = signature;
423 vcb->vcbJinfoBlock = SWAP_BE32(vhp->journalInfoBlock);
424 vcb->vcbLsMod = to_bsd_time(SWAP_BE32(vhp->modifyDate));
425 vcb->vcbAtrb = SWAP_BE32(vhp->attributes);
426 vcb->vcbClpSiz = SWAP_BE32(vhp->rsrcClumpSize);
427 vcb->vcbNxtCNID = SWAP_BE32(vhp->nextCatalogID);
428 vcb->vcbVolBkUp = to_bsd_time(SWAP_BE32(vhp->backupDate));
429 vcb->vcbWrCnt = SWAP_BE32(vhp->writeCount);
430 vcb->vcbFilCnt = SWAP_BE32(vhp->fileCount);
431 vcb->vcbDirCnt = SWAP_BE32(vhp->folderCount);
432
433 /* copy 32 bytes of Finder info */
434 bcopy(vhp->finderInfo, vcb->vcbFndrInfo, sizeof(vhp->finderInfo));
435
436 vcb->vcbAlBlSt = 0; /* hfs+ allocation blocks start at first block of volume */
437 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
438 vcb->vcbWrCnt++; /* compensate for write of Volume Header on last flush */
439
440 /* Now fill in the Extended VCB info */
441 vcb->nextAllocation = SWAP_BE32(vhp->nextAllocation);
442 vcb->totalBlocks = SWAP_BE32(vhp->totalBlocks);
443 vcb->allocLimit = vcb->totalBlocks;
444 vcb->freeBlocks = SWAP_BE32(vhp->freeBlocks);
445 vcb->blockSize = blockSize;
446 vcb->encodingsBitmap = SWAP_BE64(vhp->encodingsBitmap);
447 vcb->localCreateDate = SWAP_BE32(vhp->createDate);
448
449 vcb->hfsPlusIOPosOffset = embeddedOffset;
450
451 /* Default to no free block reserve */
452 vcb->reserveBlocks = 0;
453
454 /*
455 * Update the logical block size in the mount struct
456 * (currently set up from the wrapper MDB) using the
457 * new blocksize value:
458 */
459 hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
460 vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
461
462 /*
463 * Validate and initialize the location of the alternate volume header.
464 *
465 * Note that there may be spare sectors beyond the end of the filesystem that still
466 * belong to our partition.
467 */
468
469 spare_sectors = hfsmp->hfs_logical_block_count -
470 (((daddr64_t)vcb->totalBlocks * blockSize) /
471 hfsmp->hfs_logical_block_size);
472
473 /*
474 * Differentiate between "innocuous" spare sectors and the more unusual
475 * degenerate case:
476 *
477 * *** Innocuous spare sectors exist if:
478 *
479 * A) the number of bytes assigned to the partition (by multiplying logical
480 * block size * logical block count) is greater than the filesystem size
481 * (by multiplying allocation block count and allocation block size)
482 *
483 * and
484 *
485 * B) the remainder is less than the size of a full allocation block's worth of bytes.
486 *
487 * This handles the normal case where there may be a few extra sectors, but the two
488 * are fundamentally in sync.
489 *
490 * *** Degenerate spare sectors exist if:
491 * A) The number of bytes assigned to the partition (by multiplying logical
492 * block size * logical block count) is greater than the filesystem size
493 * (by multiplying allocation block count and block size).
494 *
495 * and
496 *
497 * B) the remainder is greater than a full allocation's block worth of bytes.
498 * In this case, a smaller file system exists in a larger partition.
499 * This can happen in various ways, including when volume is resized but the
500 * partition is yet to be resized. Under this condition, we have to assume that
501 * a partition management software may resize the partition to match
502 * the file system size in the future. Therefore we should update
503 * alternate volume header at two locations on the disk,
504 * a. 1024 bytes before end of the partition
505 * b. 1024 bytes before end of the file system
506 */
507
508 if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
509 /*
510 * Handle the degenerate case above. FS < partition size.
511 * AVH located at 1024 bytes from the end of the partition
512 */
513 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
514 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
515
516 /* AVH located at 1024 bytes from the end of the filesystem */
517 hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
518 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
519 (((daddr64_t)vcb->totalBlocks * blockSize) / hfsmp->hfs_logical_block_size));
520 }
521 else {
522 /* Innocuous spare sectors; Partition & FS notion are in sync */
523 hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
524 HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
525
526 hfsmp->hfs_fs_avh_sector = hfsmp->hfs_partition_avh_sector;
527 }
528 if (hfs_resize_debug) {
529 printf ("hfs_MountHFSPlusVolume: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
530 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
531 }
532
533 bzero(&cndesc, sizeof(cndesc));
534 cndesc.cd_parentcnid = kHFSRootParentID;
535 cndesc.cd_flags |= CD_ISMETA;
536 bzero(&cnattr, sizeof(cnattr));
537 cnattr.ca_linkcount = 1;
538 cnattr.ca_mode = S_IFREG;
539
540 /*
541 * Set up Extents B-tree vnode
542 */
543 cndesc.cd_nameptr = hfs_extname;
544 cndesc.cd_namelen = strlen((char *)hfs_extname);
545 cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
546
547 cfork.cf_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
548 cfork.cf_new_size= 0;
549 cfork.cf_clump = SWAP_BE32 (vhp->extentsFile.clumpSize);
550 cfork.cf_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
551 cfork.cf_vblocks = 0;
552 cnattr.ca_blocks = cfork.cf_blocks;
553 for (i = 0; i < kHFSPlusExtentDensity; i++) {
554 cfork.cf_extents[i].startBlock =
555 SWAP_BE32 (vhp->extentsFile.extents[i].startBlock);
556 cfork.cf_extents[i].blockCount =
557 SWAP_BE32 (vhp->extentsFile.extents[i].blockCount);
558 }
559 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
560 &hfsmp->hfs_extents_vp, &newvnode_flags);
561 if (retval)
562 {
563 if (HFS_MOUNT_DEBUG) {
564 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting extentoverflow BT\n", retval);
565 }
566 goto ErrorExit;
567 }
568 hfsmp->hfs_extents_cp = VTOC(hfsmp->hfs_extents_vp);
569 hfs_unlock(hfsmp->hfs_extents_cp);
570
571 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_extents_vp),
572 (KeyCompareProcPtr) CompareExtentKeysPlus));
573 if (retval)
574 {
575 if (HFS_MOUNT_DEBUG) {
576 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting extentoverflow BT\n", retval);
577 }
578 goto ErrorExit;
579 }
580 /*
581 * Set up Catalog B-tree vnode
582 */
583 cndesc.cd_nameptr = hfs_catname;
584 cndesc.cd_namelen = strlen((char *)hfs_catname);
585 cndesc.cd_cnid = cnattr.ca_fileid = kHFSCatalogFileID;
586
587 cfork.cf_size = SWAP_BE64 (vhp->catalogFile.logicalSize);
588 cfork.cf_clump = SWAP_BE32 (vhp->catalogFile.clumpSize);
589 cfork.cf_blocks = SWAP_BE32 (vhp->catalogFile.totalBlocks);
590 cfork.cf_vblocks = 0;
591 cnattr.ca_blocks = cfork.cf_blocks;
592 for (i = 0; i < kHFSPlusExtentDensity; i++) {
593 cfork.cf_extents[i].startBlock =
594 SWAP_BE32 (vhp->catalogFile.extents[i].startBlock);
595 cfork.cf_extents[i].blockCount =
596 SWAP_BE32 (vhp->catalogFile.extents[i].blockCount);
597 }
598 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
599 &hfsmp->hfs_catalog_vp, &newvnode_flags);
600 if (retval) {
601 if (HFS_MOUNT_DEBUG) {
602 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting catalog BT\n", retval);
603 }
604 goto ErrorExit;
605 }
606 hfsmp->hfs_catalog_cp = VTOC(hfsmp->hfs_catalog_vp);
607 hfs_unlock(hfsmp->hfs_catalog_cp);
608
609 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
610 (KeyCompareProcPtr) CompareExtendedCatalogKeys));
611 if (retval) {
612 if (HFS_MOUNT_DEBUG) {
613 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting catalog BT\n", retval);
614 }
615 goto ErrorExit;
616 }
617 if ((hfsmp->hfs_flags & HFS_X) &&
618 BTGetInformation(VTOF(hfsmp->hfs_catalog_vp), 0, &btinfo) == 0) {
619 if (btinfo.keyCompareType == kHFSBinaryCompare) {
620 hfsmp->hfs_flags |= HFS_CASE_SENSITIVE;
621 /* Install a case-sensitive key compare */
622 (void) BTOpenPath(VTOF(hfsmp->hfs_catalog_vp),
623 (KeyCompareProcPtr)cat_binarykeycompare);
624 }
625 }
626
627 /*
628 * Set up Allocation file vnode
629 */
630 cndesc.cd_nameptr = hfs_vbmname;
631 cndesc.cd_namelen = strlen((char *)hfs_vbmname);
632 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAllocationFileID;
633
634 cfork.cf_size = SWAP_BE64 (vhp->allocationFile.logicalSize);
635 cfork.cf_clump = SWAP_BE32 (vhp->allocationFile.clumpSize);
636 cfork.cf_blocks = SWAP_BE32 (vhp->allocationFile.totalBlocks);
637 cfork.cf_vblocks = 0;
638 cnattr.ca_blocks = cfork.cf_blocks;
639 for (i = 0; i < kHFSPlusExtentDensity; i++) {
640 cfork.cf_extents[i].startBlock =
641 SWAP_BE32 (vhp->allocationFile.extents[i].startBlock);
642 cfork.cf_extents[i].blockCount =
643 SWAP_BE32 (vhp->allocationFile.extents[i].blockCount);
644 }
645 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
646 &hfsmp->hfs_allocation_vp, &newvnode_flags);
647 if (retval) {
648 if (HFS_MOUNT_DEBUG) {
649 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting bitmap\n", retval);
650 }
651 goto ErrorExit;
652 }
653 hfsmp->hfs_allocation_cp = VTOC(hfsmp->hfs_allocation_vp);
654 hfs_unlock(hfsmp->hfs_allocation_cp);
655
656 /*
657 * Set up Attribute B-tree vnode
658 */
659 if (vhp->attributesFile.totalBlocks != 0) {
660 cndesc.cd_nameptr = hfs_attrname;
661 cndesc.cd_namelen = strlen((char *)hfs_attrname);
662 cndesc.cd_cnid = cnattr.ca_fileid = kHFSAttributesFileID;
663
664 cfork.cf_size = SWAP_BE64 (vhp->attributesFile.logicalSize);
665 cfork.cf_clump = SWAP_BE32 (vhp->attributesFile.clumpSize);
666 cfork.cf_blocks = SWAP_BE32 (vhp->attributesFile.totalBlocks);
667 cfork.cf_vblocks = 0;
668 cnattr.ca_blocks = cfork.cf_blocks;
669 for (i = 0; i < kHFSPlusExtentDensity; i++) {
670 cfork.cf_extents[i].startBlock =
671 SWAP_BE32 (vhp->attributesFile.extents[i].startBlock);
672 cfork.cf_extents[i].blockCount =
673 SWAP_BE32 (vhp->attributesFile.extents[i].blockCount);
674 }
675 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
676 &hfsmp->hfs_attribute_vp, &newvnode_flags);
677 if (retval) {
678 if (HFS_MOUNT_DEBUG) {
679 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting EA BT\n", retval);
680 }
681 goto ErrorExit;
682 }
683 hfsmp->hfs_attribute_cp = VTOC(hfsmp->hfs_attribute_vp);
684 hfs_unlock(hfsmp->hfs_attribute_cp);
685 retval = MacToVFSError(BTOpenPath(VTOF(hfsmp->hfs_attribute_vp),
686 (KeyCompareProcPtr) hfs_attrkeycompare));
687 if (retval) {
688 if (HFS_MOUNT_DEBUG) {
689 printf("hfs_mounthfsplus: BTOpenPath returned (%d) getting EA BT\n", retval);
690 }
691 goto ErrorExit;
692 }
693
694 /* Initialize vnode for virtual attribute data file that spans the
695 * entire file system space for performing I/O to attribute btree
696 * We hold iocount on the attrdata vnode for the entire duration
697 * of mount (similar to btree vnodes)
698 */
699 retval = init_attrdata_vnode(hfsmp);
700 if (retval) {
701 if (HFS_MOUNT_DEBUG) {
702 printf("hfs_mounthfsplus: init_attrdata_vnode returned (%d) for virtual EA file\n", retval);
703 }
704 goto ErrorExit;
705 }
706 }
707
708 /*
709 * Set up Startup file vnode
710 */
711 if (vhp->startupFile.totalBlocks != 0) {
712 cndesc.cd_nameptr = hfs_startupname;
713 cndesc.cd_namelen = strlen((char *)hfs_startupname);
714 cndesc.cd_cnid = cnattr.ca_fileid = kHFSStartupFileID;
715
716 cfork.cf_size = SWAP_BE64 (vhp->startupFile.logicalSize);
717 cfork.cf_clump = SWAP_BE32 (vhp->startupFile.clumpSize);
718 cfork.cf_blocks = SWAP_BE32 (vhp->startupFile.totalBlocks);
719 cfork.cf_vblocks = 0;
720 cnattr.ca_blocks = cfork.cf_blocks;
721 for (i = 0; i < kHFSPlusExtentDensity; i++) {
722 cfork.cf_extents[i].startBlock =
723 SWAP_BE32 (vhp->startupFile.extents[i].startBlock);
724 cfork.cf_extents[i].blockCount =
725 SWAP_BE32 (vhp->startupFile.extents[i].blockCount);
726 }
727 retval = hfs_getnewvnode(hfsmp, NULL, NULL, &cndesc, 0, &cnattr, &cfork,
728 &hfsmp->hfs_startup_vp, &newvnode_flags);
729 if (retval) {
730 if (HFS_MOUNT_DEBUG) {
731 printf("hfs_mounthfsplus: hfs_getnewvnode returned (%d) getting startup file\n", retval);
732 }
733 goto ErrorExit;
734 }
735 hfsmp->hfs_startup_cp = VTOC(hfsmp->hfs_startup_vp);
736 hfs_unlock(hfsmp->hfs_startup_cp);
737 }
738
739 /*
740 * Pick up volume name and create date
741 *
742 * Acquiring the volume name should not manipulate the bitmap, only the catalog
743 * btree and possibly the extents overflow b-tree.
744 */
745 retval = cat_idlookup(hfsmp, kHFSRootFolderID, 0, 0, &cndesc, &cnattr, NULL);
746 if (retval) {
747 if (HFS_MOUNT_DEBUG) {
748 printf("hfs_mounthfsplus: cat_idlookup returned (%d) getting rootfolder \n", retval);
749 }
750 goto ErrorExit;
751 }
752 vcb->hfs_itime = cnattr.ca_itime;
753 vcb->volumeNameEncodingHint = cndesc.cd_encoding;
754 bcopy(cndesc.cd_nameptr, vcb->vcbVN, min(255, cndesc.cd_namelen));
755 volname_length = strlen ((const char*)vcb->vcbVN);
756 cat_releasedesc(&cndesc);
757
758 #define DKIOCCSSETLVNAME _IOW('d', 198, char[256])
759
760
761 /* Send the volume name down to CoreStorage if necessary */
762 retval = utf8_normalizestr(vcb->vcbVN, volname_length, (u_int8_t*)converted_volname, &conv_volname_length, 256, UTF_PRECOMPOSED);
763 if (retval == 0) {
764 (void) VNOP_IOCTL (hfsmp->hfs_devvp, DKIOCCSSETLVNAME, converted_volname, 0, vfs_context_current());
765 }
766
767 /* reset retval == 0. we don't care about errors in volname conversion */
768 retval = 0;
769
770
771 /*
772 * We now always initiate a full bitmap scan even if the volume is read-only because this is
773 * our only shot to do I/Os of dramaticallly different sizes than what the buffer cache ordinarily
774 * expects. TRIMs will not be delivered to the underlying media if the volume is not
775 * read-write though.
776 */
777 thread_t allocator_scanner;
778 hfsmp->scan_var = 0;
779
780 /* Take the HFS mount mutex and wait on scan_var */
781 hfs_lock_mount (hfsmp);
782
783 kernel_thread_start ((thread_continue_t) hfs_scan_blocks, hfsmp, &allocator_scanner);
784 /* Wait until it registers that it's got the appropriate locks */
785 while ((hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) == 0) {
786 (void) msleep (&hfsmp->scan_var, &hfsmp->hfs_mutex, (PDROP | PINOD), "hfs_scan_blocks", 0);
787 if (hfsmp->scan_var & HFS_ALLOCATOR_SCAN_INFLIGHT) {
788 break;
789 }
790 else {
791 hfs_lock_mount (hfsmp);
792 }
793 }
794
795 thread_deallocate (allocator_scanner);
796
797 /* mark the volume dirty (clear clean unmount bit) */
798 vcb->vcbAtrb &= ~kHFSVolumeUnmountedMask;
799 if (hfsmp->jnl && (hfsmp->hfs_flags & HFS_READ_ONLY) == 0) {
800 hfs_flushvolumeheader(hfsmp, TRUE, 0);
801 }
802
803 /* kHFSHasFolderCount is only supported/updated on HFSX volumes */
804 if ((hfsmp->hfs_flags & HFS_X) != 0) {
805 hfsmp->hfs_flags |= HFS_FOLDERCOUNT;
806 }
807
808 //
809 // Check if we need to do late journal initialization. This only
810 // happens if a previous version of MacOS X (or 9) touched the disk.
811 // In that case hfs_late_journal_init() will go re-locate the journal
812 // and journal_info_block files and validate that they're still kosher.
813 //
814 if ( (vcb->vcbAtrb & kHFSVolumeJournaledMask)
815 && (SWAP_BE32(vhp->lastMountedVersion) != kHFSJMountVersion)
816 && (hfsmp->jnl == NULL)) {
817
818 retval = hfs_late_journal_init(hfsmp, vhp, args);
819 if (retval != 0) {
820 if (retval == EROFS) {
821 // EROFS is a special error code that means the volume has an external
822 // journal which we couldn't find. in that case we do not want to
823 // rewrite the volume header - we'll just refuse to mount the volume.
824 if (HFS_MOUNT_DEBUG) {
825 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d), maybe an external jnl?\n", retval);
826 }
827 retval = EINVAL;
828 goto ErrorExit;
829 }
830
831 hfsmp->jnl = NULL;
832
833 // if the journal failed to open, then set the lastMountedVersion
834 // to be "FSK!" which fsck_hfs will see and force the fsck instead
835 // of just bailing out because the volume is journaled.
836 if (!(hfsmp->hfs_flags & HFS_READ_ONLY)) {
837 HFSPlusVolumeHeader *jvhp;
838 daddr64_t mdb_offset;
839 struct buf *bp = NULL;
840
841 hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
842
843 mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
844
845 bp = NULL;
846 retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
847 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
848 hfsmp->hfs_physical_block_size, cred, &bp);
849 if (retval == 0) {
850 jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
851
852 if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
853 printf ("hfs(3): Journal replay fail. Writing lastMountVersion as FSK!\n");
854 jvhp->lastMountedVersion = SWAP_BE32(kFSKMountVersion);
855 buf_bwrite(bp);
856 } else {
857 buf_brelse(bp);
858 }
859 bp = NULL;
860 } else if (bp) {
861 buf_brelse(bp);
862 // clear this so the error exit path won't try to use it
863 bp = NULL;
864 }
865 }
866
867 if (HFS_MOUNT_DEBUG) {
868 printf("hfs_mounthfsplus: hfs_late_journal_init returned (%d)\n", retval);
869 }
870 retval = EINVAL;
871 goto ErrorExit;
872 } else if (hfsmp->jnl) {
873 vfs_setflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
874 }
875 } else if (hfsmp->jnl || ((vcb->vcbAtrb & kHFSVolumeJournaledMask) && (hfsmp->hfs_flags & HFS_READ_ONLY))) {
876 struct cat_attr jinfo_attr, jnl_attr;
877
878 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
879 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
880 }
881
882 // if we're here we need to fill in the fileid's for the
883 // journal and journal_info_block.
884 hfsmp->hfs_jnlinfoblkid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jinfo_attr, NULL);
885 hfsmp->hfs_jnlfileid = GetFileInfo(vcb, kRootDirID, ".journal", &jnl_attr, NULL);
886 if (hfsmp->hfs_jnlinfoblkid == 0 || hfsmp->hfs_jnlfileid == 0) {
887 printf("hfs: danger! couldn't find the file-id's for the journal or journal_info_block\n");
888 printf("hfs: jnlfileid %d, jnlinfoblkid %d\n", hfsmp->hfs_jnlfileid, hfsmp->hfs_jnlinfoblkid);
889 }
890
891 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
892 vcb->vcbAtrb |= kHFSVolumeJournaledMask;
893 }
894
895 if (hfsmp->jnl == NULL) {
896 vfs_clearflags(hfsmp->hfs_mp, (u_int64_t)((unsigned int)MNT_JOURNALED));
897 }
898 }
899
900 if ( !(vcb->vcbAtrb & kHFSVolumeHardwareLockMask) ) // if the disk is not write protected
901 {
902 MarkVCBDirty( vcb ); // mark VCB dirty so it will be written
903 }
904
905 /*
906 * Distinguish 3 potential cases involving content protection:
907 * 1. mount point bit set; vcbAtrb does not support it. Fail.
908 * 2. mount point bit set; vcbattrb supports it. we're good.
909 * 3. mount point bit not set; vcbatrb supports it, turn bit on, then good.
910 */
911 if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
912 /* Does the mount point support it ? */
913 if ((vcb->vcbAtrb & kHFSContentProtectionMask) == 0) {
914 /* Case 1 above */
915 retval = EINVAL;
916 goto ErrorExit;
917 }
918 }
919 else {
920 /* not requested in the mount point. Is it in FS? */
921 if (vcb->vcbAtrb & kHFSContentProtectionMask) {
922 /* Case 3 above */
923 vfs_setflags (hfsmp->hfs_mp, MNT_CPROTECT);
924 }
925 }
926
927 /* At this point, if the mount point flag is set, we can enable it. */
928 if (vfs_flags(hfsmp->hfs_mp) & MNT_CPROTECT) {
929 /* Cases 2+3 above */
930 #if CONFIG_PROTECT
931 /* Get the EAs as needed. */
932 int cperr = 0;
933 uint16_t majorversion;
934 uint16_t minorversion;
935 uint64_t flags;
936 uint8_t cryptogen = 0;
937 struct cp_root_xattr *xattr = NULL;
938 MALLOC (xattr, struct cp_root_xattr*, sizeof(struct cp_root_xattr), M_TEMP, M_WAITOK);
939 if (xattr == NULL) {
940 retval = ENOMEM;
941 goto ErrorExit;
942 }
943 bzero (xattr, sizeof(struct cp_root_xattr));
944
945 /* go get the EA to get the version information */
946 cperr = cp_getrootxattr (hfsmp, xattr);
947 /*
948 * If there was no EA there, then write one out.
949 * Assuming EA is not present on the root means
950 * this is an erase install or a very old FS
951 */
952
953 if (cperr == 0) {
954 /* Have to run a valid CP version. */
955 if ((xattr->major_version < CP_PREV_MAJOR_VERS) || (xattr->major_version > CP_NEW_MAJOR_VERS)) {
956 cperr = EINVAL;
957 }
958 }
959 else if (cperr == ENOATTR) {
960 printf("No root EA set, creating new EA with new version: %d\n", CP_NEW_MAJOR_VERS);
961 bzero(xattr, sizeof(struct cp_root_xattr));
962 xattr->major_version = CP_NEW_MAJOR_VERS;
963 xattr->minor_version = CP_MINOR_VERS;
964 cperr = cp_setrootxattr (hfsmp, xattr);
965 }
966 majorversion = xattr->major_version;
967 minorversion = xattr->minor_version;
968 flags = xattr->flags;
969 if (xattr->flags & CP_ROOT_CRYPTOG1) {
970 cryptogen = 1;
971 }
972
973 if (xattr) {
974 FREE(xattr, M_TEMP);
975 }
976
977 /* Recheck for good status */
978 if (cperr == 0) {
979 /* If we got here, then the CP version is valid. Set it in the mount point */
980 hfsmp->hfs_running_cp_major_vers = majorversion;
981 printf("Running with CP root xattr: %d.%d\n", majorversion, minorversion);
982 hfsmp->cproot_flags = flags;
983 hfsmp->cp_crypto_generation = cryptogen;
984
985 /*
986 * Acquire the boot-arg for the AKS default key; if invalid, obtain from the device tree.
987 * Ensure that the boot-arg's value is valid for FILES (not directories),
988 * since only files are actually protected for now.
989 */
990
991 PE_parse_boot_argn("aks_default_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
992
993 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
994 PE_get_default("kern.default_cp_class", &hfsmp->default_cp_class, sizeof(hfsmp->default_cp_class));
995 }
996
997 if (cp_is_valid_class(0, hfsmp->default_cp_class) == 0) {
998 hfsmp->default_cp_class = PROTECTION_CLASS_C;
999 }
1000 }
1001 else {
1002 retval = EPERM;
1003 goto ErrorExit;
1004 }
1005 #else
1006 /* If CONFIG_PROTECT not built, ignore CP */
1007 vfs_clearflags(hfsmp->hfs_mp, MNT_CPROTECT);
1008 #endif
1009 }
1010
1011 /*
1012 * Establish a metadata allocation zone.
1013 */
1014 hfs_metadatazone_init(hfsmp, false);
1015
1016 /*
1017 * Make any metadata zone adjustments.
1018 */
1019 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1020 /* Keep the roving allocator out of the metadata zone. */
1021 if (vcb->nextAllocation >= hfsmp->hfs_metazone_start &&
1022 vcb->nextAllocation <= hfsmp->hfs_metazone_end) {
1023 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1024 }
1025 } else {
1026 if (vcb->nextAllocation <= 1) {
1027 vcb->nextAllocation = hfsmp->hfs_min_alloc_start;
1028 }
1029 }
1030 vcb->sparseAllocation = hfsmp->hfs_min_alloc_start;
1031
1032 /* Setup private/hidden directories for hardlinks. */
1033 hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
1034 hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
1035
1036 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1037 hfs_remove_orphans(hfsmp);
1038
1039 /* See if we need to erase unused Catalog nodes due to <rdar://problem/6947811>. */
1040 if ((hfsmp->hfs_flags & HFS_READ_ONLY) == 0)
1041 {
1042 retval = hfs_erase_unused_nodes(hfsmp);
1043 if (retval) {
1044 if (HFS_MOUNT_DEBUG) {
1045 printf("hfs_mounthfsplus: hfs_erase_unused_nodes returned (%d) for %s \n", retval, hfsmp->vcbVN);
1046 }
1047
1048 goto ErrorExit;
1049 }
1050 }
1051
1052 /*
1053 * Allow hot file clustering if conditions allow.
1054 */
1055 if ((hfsmp->hfs_flags & HFS_METADATA_ZONE) &&
1056 ((hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_SSD)) == 0)) {
1057 (void) hfs_recording_init(hfsmp);
1058 }
1059
1060 /* Force ACLs on HFS+ file systems. */
1061 vfs_setextendedsecurity(HFSTOVFS(hfsmp));
1062
1063 /* Enable extent-based extended attributes by default */
1064 hfsmp->hfs_flags |= HFS_XATTR_EXTENTS;
1065
1066 return (0);
1067
1068 ErrorExit:
1069 /*
1070 * A fatal error occurred and the volume cannot be mounted, so
1071 * release any resources that we acquired...
1072 */
1073 hfsUnmount(hfsmp, NULL);
1074
1075 if (HFS_MOUNT_DEBUG) {
1076 printf("hfs_mounthfsplus: encountered error (%d)\n", retval);
1077 }
1078 return (retval);
1079 }
1080
1081
1082 /*
1083 * ReleaseMetaFileVNode
1084 *
1085 * vp L - -
1086 */
1087 static void ReleaseMetaFileVNode(struct vnode *vp)
1088 {
1089 struct filefork *fp;
1090
1091 if (vp && (fp = VTOF(vp))) {
1092 if (fp->fcbBTCBPtr != NULL) {
1093 (void)hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1094 (void) BTClosePath(fp);
1095 hfs_unlock(VTOC(vp));
1096 }
1097
1098 /* release the node even if BTClosePath fails */
1099 vnode_recycle(vp);
1100 vnode_put(vp);
1101 }
1102 }
1103
1104
1105 /*************************************************************
1106 *
1107 * Unmounts a hfs volume.
1108 * At this point vflush() has been called (to dump all non-metadata files)
1109 *
1110 *************************************************************/
1111
1112 int
1113 hfsUnmount( register struct hfsmount *hfsmp, __unused struct proc *p)
1114 {
1115 /* Get rid of our attribute data vnode (if any). This is done
1116 * after the vflush() during mount, so we don't need to worry
1117 * about any locks.
1118 */
1119 if (hfsmp->hfs_attrdata_vp) {
1120 ReleaseMetaFileVNode(hfsmp->hfs_attrdata_vp);
1121 hfsmp->hfs_attrdata_vp = NULLVP;
1122 }
1123
1124 if (hfsmp->hfs_startup_vp) {
1125 ReleaseMetaFileVNode(hfsmp->hfs_startup_vp);
1126 hfsmp->hfs_startup_cp = NULL;
1127 hfsmp->hfs_startup_vp = NULL;
1128 }
1129
1130 if (hfsmp->hfs_attribute_vp) {
1131 ReleaseMetaFileVNode(hfsmp->hfs_attribute_vp);
1132 hfsmp->hfs_attribute_cp = NULL;
1133 hfsmp->hfs_attribute_vp = NULL;
1134 }
1135
1136 if (hfsmp->hfs_catalog_vp) {
1137 ReleaseMetaFileVNode(hfsmp->hfs_catalog_vp);
1138 hfsmp->hfs_catalog_cp = NULL;
1139 hfsmp->hfs_catalog_vp = NULL;
1140 }
1141
1142 if (hfsmp->hfs_extents_vp) {
1143 ReleaseMetaFileVNode(hfsmp->hfs_extents_vp);
1144 hfsmp->hfs_extents_cp = NULL;
1145 hfsmp->hfs_extents_vp = NULL;
1146 }
1147
1148 if (hfsmp->hfs_allocation_vp) {
1149 ReleaseMetaFileVNode(hfsmp->hfs_allocation_vp);
1150 hfsmp->hfs_allocation_cp = NULL;
1151 hfsmp->hfs_allocation_vp = NULL;
1152 }
1153
1154 return (0);
1155 }
1156
1157
1158 /*
1159 * Test if fork has overflow extents.
1160 *
1161 * Returns:
1162 * non-zero - overflow extents exist
1163 * zero - overflow extents do not exist
1164 */
1165 __private_extern__
1166 bool overflow_extents(struct filefork *fp)
1167 {
1168 u_int32_t blocks;
1169
1170 //
1171 // If the vnode pointer is NULL then we're being called
1172 // from hfs_remove_orphans() with a faked-up filefork
1173 // and therefore it has to be an HFS+ volume. Otherwise
1174 // we check through the volume header to see what type
1175 // of volume we're on.
1176 //
1177
1178 #if CONFIG_HFS_STD
1179 if (FTOV(fp) && VTOVCB(FTOV(fp))->vcbSigWord == kHFSSigWord) {
1180 if (fp->ff_extents[2].blockCount == 0)
1181 return false;
1182
1183 blocks = fp->ff_extents[0].blockCount +
1184 fp->ff_extents[1].blockCount +
1185 fp->ff_extents[2].blockCount;
1186
1187 return fp->ff_blocks > blocks;
1188 }
1189 #endif
1190
1191 if (fp->ff_extents[7].blockCount == 0)
1192 return false;
1193
1194 blocks = fp->ff_extents[0].blockCount +
1195 fp->ff_extents[1].blockCount +
1196 fp->ff_extents[2].blockCount +
1197 fp->ff_extents[3].blockCount +
1198 fp->ff_extents[4].blockCount +
1199 fp->ff_extents[5].blockCount +
1200 fp->ff_extents[6].blockCount +
1201 fp->ff_extents[7].blockCount;
1202
1203 return fp->ff_blocks > blocks;
1204 }
1205
1206 static __attribute__((pure))
1207 boolean_t hfs_is_frozen(struct hfsmount *hfsmp)
1208 {
1209 return (hfsmp->hfs_freeze_state == HFS_FROZEN
1210 || (hfsmp->hfs_freeze_state == HFS_FREEZING
1211 && current_thread() != hfsmp->hfs_freezing_thread));
1212 }
1213
1214 /*
1215 * Lock the HFS global journal lock
1216 */
1217 int
1218 hfs_lock_global (struct hfsmount *hfsmp, enum hfs_locktype locktype)
1219 {
1220 thread_t thread = current_thread();
1221
1222 if (hfsmp->hfs_global_lockowner == thread) {
1223 panic ("hfs_lock_global: locking against myself!");
1224 }
1225
1226 /*
1227 * This check isn't really necessary but this stops us taking
1228 * the mount lock in most cases. The essential check is below.
1229 */
1230 if (hfs_is_frozen(hfsmp)) {
1231 /*
1232 * Unfortunately, there is no easy way of getting a notification
1233 * for when a process is exiting and it's possible for the exiting
1234 * process to get blocked somewhere else. To catch this, we
1235 * periodically monitor the frozen process here and thaw if
1236 * we spot that it's exiting.
1237 */
1238 frozen:
1239 hfs_lock_mount(hfsmp);
1240
1241 struct timespec ts = { 0, 500 * NSEC_PER_MSEC };
1242
1243 while (hfs_is_frozen(hfsmp)) {
1244 if (hfsmp->hfs_freeze_state == HFS_FROZEN
1245 && proc_exiting(hfsmp->hfs_freezing_proc)) {
1246 hfs_thaw_locked(hfsmp);
1247 break;
1248 }
1249
1250 msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
1251 PWAIT, "hfs_lock_global (frozen)", &ts);
1252 }
1253 hfs_unlock_mount(hfsmp);
1254 }
1255
1256 /* HFS_SHARED_LOCK */
1257 if (locktype == HFS_SHARED_LOCK) {
1258 lck_rw_lock_shared (&hfsmp->hfs_global_lock);
1259 hfsmp->hfs_global_lockowner = HFS_SHARED_OWNER;
1260 }
1261 /* HFS_EXCLUSIVE_LOCK */
1262 else {
1263 lck_rw_lock_exclusive (&hfsmp->hfs_global_lock);
1264 hfsmp->hfs_global_lockowner = thread;
1265 }
1266
1267 /*
1268 * We have to check if we're frozen again because of the time
1269 * between when we checked and when we took the global lock.
1270 */
1271 if (hfs_is_frozen(hfsmp)) {
1272 hfs_unlock_global(hfsmp);
1273 goto frozen;
1274 }
1275
1276 return 0;
1277 }
1278
1279
1280 /*
1281 * Unlock the HFS global journal lock
1282 */
1283 void
1284 hfs_unlock_global (struct hfsmount *hfsmp)
1285 {
1286 thread_t thread = current_thread();
1287
1288 /* HFS_LOCK_EXCLUSIVE */
1289 if (hfsmp->hfs_global_lockowner == thread) {
1290 hfsmp->hfs_global_lockowner = NULL;
1291 lck_rw_unlock_exclusive (&hfsmp->hfs_global_lock);
1292 }
1293 /* HFS_LOCK_SHARED */
1294 else {
1295 lck_rw_unlock_shared (&hfsmp->hfs_global_lock);
1296 }
1297 }
1298
1299 /*
1300 * Lock the HFS mount lock
1301 *
1302 * Note: this is a mutex, not a rw lock!
1303 */
1304 inline
1305 void hfs_lock_mount (struct hfsmount *hfsmp) {
1306 lck_mtx_lock (&(hfsmp->hfs_mutex));
1307 }
1308
1309 /*
1310 * Unlock the HFS mount lock
1311 *
1312 * Note: this is a mutex, not a rw lock!
1313 */
1314 inline
1315 void hfs_unlock_mount (struct hfsmount *hfsmp) {
1316 lck_mtx_unlock (&(hfsmp->hfs_mutex));
1317 }
1318
1319 /*
1320 * Lock HFS system file(s).
1321 */
1322 int
1323 hfs_systemfile_lock(struct hfsmount *hfsmp, int flags, enum hfs_locktype locktype)
1324 {
1325 /*
1326 * Locking order is Catalog file, Attributes file, Startup file, Bitmap file, Extents file
1327 */
1328 if (flags & SFL_CATALOG) {
1329 #ifdef HFS_CHECK_LOCK_ORDER
1330 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == current_thread()) {
1331 panic("hfs_systemfile_lock: bad lock order (Attributes before Catalog)");
1332 }
1333 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1334 panic("hfs_systemfile_lock: bad lock order (Startup before Catalog)");
1335 }
1336 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1337 panic("hfs_systemfile_lock: bad lock order (Extents before Catalog)");
1338 }
1339 #endif /* HFS_CHECK_LOCK_ORDER */
1340
1341 if (hfsmp->hfs_catalog_cp) {
1342 (void) hfs_lock(hfsmp->hfs_catalog_cp, locktype, HFS_LOCK_DEFAULT);
1343 /*
1344 * When the catalog file has overflow extents then
1345 * also acquire the extents b-tree lock if its not
1346 * already requested.
1347 */
1348 if (((flags & SFL_EXTENTS) == 0) &&
1349 (hfsmp->hfs_catalog_vp != NULL) &&
1350 (overflow_extents(VTOF(hfsmp->hfs_catalog_vp)))) {
1351 flags |= SFL_EXTENTS;
1352 }
1353 } else {
1354 flags &= ~SFL_CATALOG;
1355 }
1356 }
1357
1358 if (flags & SFL_ATTRIBUTE) {
1359 #ifdef HFS_CHECK_LOCK_ORDER
1360 if (hfsmp->hfs_startup_cp && hfsmp->hfs_startup_cp->c_lockowner == current_thread()) {
1361 panic("hfs_systemfile_lock: bad lock order (Startup before Attributes)");
1362 }
1363 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1364 panic("hfs_systemfile_lock: bad lock order (Extents before Attributes)");
1365 }
1366 #endif /* HFS_CHECK_LOCK_ORDER */
1367
1368 if (hfsmp->hfs_attribute_cp) {
1369 (void) hfs_lock(hfsmp->hfs_attribute_cp, locktype, HFS_LOCK_DEFAULT);
1370 /*
1371 * When the attribute file has overflow extents then
1372 * also acquire the extents b-tree lock if its not
1373 * already requested.
1374 */
1375 if (((flags & SFL_EXTENTS) == 0) &&
1376 (hfsmp->hfs_attribute_vp != NULL) &&
1377 (overflow_extents(VTOF(hfsmp->hfs_attribute_vp)))) {
1378 flags |= SFL_EXTENTS;
1379 }
1380 } else {
1381 flags &= ~SFL_ATTRIBUTE;
1382 }
1383 }
1384
1385 if (flags & SFL_STARTUP) {
1386 #ifdef HFS_CHECK_LOCK_ORDER
1387 if (hfsmp-> hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == current_thread()) {
1388 panic("hfs_systemfile_lock: bad lock order (Extents before Startup)");
1389 }
1390 #endif /* HFS_CHECK_LOCK_ORDER */
1391
1392 if (hfsmp->hfs_startup_cp) {
1393 (void) hfs_lock(hfsmp->hfs_startup_cp, locktype, HFS_LOCK_DEFAULT);
1394 /*
1395 * When the startup file has overflow extents then
1396 * also acquire the extents b-tree lock if its not
1397 * already requested.
1398 */
1399 if (((flags & SFL_EXTENTS) == 0) &&
1400 (hfsmp->hfs_startup_vp != NULL) &&
1401 (overflow_extents(VTOF(hfsmp->hfs_startup_vp)))) {
1402 flags |= SFL_EXTENTS;
1403 }
1404 } else {
1405 flags &= ~SFL_STARTUP;
1406 }
1407 }
1408
1409 /*
1410 * To prevent locks being taken in the wrong order, the extent lock
1411 * gets a bitmap lock as well.
1412 */
1413 if (flags & (SFL_BITMAP | SFL_EXTENTS)) {
1414 if (hfsmp->hfs_allocation_cp) {
1415 (void) hfs_lock(hfsmp->hfs_allocation_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1416 /*
1417 * The bitmap lock is also grabbed when only extent lock
1418 * was requested. Set the bitmap lock bit in the lock
1419 * flags which callers will use during unlock.
1420 */
1421 flags |= SFL_BITMAP;
1422 } else {
1423 flags &= ~SFL_BITMAP;
1424 }
1425 }
1426
1427 if (flags & SFL_EXTENTS) {
1428 /*
1429 * Since the extents btree lock is recursive we always
1430 * need exclusive access.
1431 */
1432 if (hfsmp->hfs_extents_cp) {
1433 (void) hfs_lock(hfsmp->hfs_extents_cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1434
1435 if (hfsmp->hfs_mp->mnt_kern_flag & MNTK_SWAP_MOUNT) {
1436 /*
1437 * because we may need this lock on the pageout path (if a swapfile allocation
1438 * spills into the extents overflow tree), we will grant the holder of this
1439 * lock the privilege of dipping into the reserve free pool in order to prevent
1440 * a deadlock from occurring if we need those pageouts to complete before we
1441 * will make any new pages available on the free list... the deadlock can occur
1442 * if this thread needs to allocate memory while this lock is held
1443 */
1444 if (set_vm_privilege(TRUE) == FALSE) {
1445 /*
1446 * indicate that we need to drop vm_privilege
1447 * when we unlock
1448 */
1449 flags |= SFL_VM_PRIV;
1450 }
1451 }
1452 } else {
1453 flags &= ~SFL_EXTENTS;
1454 }
1455 }
1456
1457 return (flags);
1458 }
1459
1460 /*
1461 * unlock HFS system file(s).
1462 */
1463 void
1464 hfs_systemfile_unlock(struct hfsmount *hfsmp, int flags)
1465 {
1466 struct timeval tv;
1467 u_int32_t lastfsync;
1468 int numOfLockedBuffs;
1469
1470 if (hfsmp->jnl == NULL) {
1471 microuptime(&tv);
1472 lastfsync = tv.tv_sec;
1473 }
1474 if (flags & SFL_STARTUP && hfsmp->hfs_startup_cp) {
1475 hfs_unlock(hfsmp->hfs_startup_cp);
1476 }
1477 if (flags & SFL_ATTRIBUTE && hfsmp->hfs_attribute_cp) {
1478 if (hfsmp->jnl == NULL) {
1479 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_attribute_vp), &lastfsync);
1480 numOfLockedBuffs = count_lock_queue();
1481 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1482 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1483 kMaxSecsForFsync))) {
1484 hfs_btsync(hfsmp->hfs_attribute_vp, HFS_SYNCTRANS);
1485 }
1486 }
1487 hfs_unlock(hfsmp->hfs_attribute_cp);
1488 }
1489 if (flags & SFL_CATALOG && hfsmp->hfs_catalog_cp) {
1490 if (hfsmp->jnl == NULL) {
1491 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_catalog_vp), &lastfsync);
1492 numOfLockedBuffs = count_lock_queue();
1493 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1494 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1495 kMaxSecsForFsync))) {
1496 hfs_btsync(hfsmp->hfs_catalog_vp, HFS_SYNCTRANS);
1497 }
1498 }
1499 hfs_unlock(hfsmp->hfs_catalog_cp);
1500 }
1501 if (flags & SFL_BITMAP && hfsmp->hfs_allocation_cp) {
1502 hfs_unlock(hfsmp->hfs_allocation_cp);
1503 }
1504 if (flags & SFL_EXTENTS && hfsmp->hfs_extents_cp) {
1505 if (hfsmp->jnl == NULL) {
1506 BTGetLastSync((FCB*)VTOF(hfsmp->hfs_extents_vp), &lastfsync);
1507 numOfLockedBuffs = count_lock_queue();
1508 if ((numOfLockedBuffs > kMaxLockedMetaBuffers) ||
1509 ((numOfLockedBuffs > 1) && ((tv.tv_sec - lastfsync) >
1510 kMaxSecsForFsync))) {
1511 hfs_btsync(hfsmp->hfs_extents_vp, HFS_SYNCTRANS);
1512 }
1513 }
1514 hfs_unlock(hfsmp->hfs_extents_cp);
1515
1516 if (flags & SFL_VM_PRIV) {
1517 /*
1518 * revoke the vm_privilege we granted this thread
1519 * now that we have unlocked the overflow extents
1520 */
1521 set_vm_privilege(FALSE);
1522 }
1523 }
1524 }
1525
1526
1527 /*
1528 * RequireFileLock
1529 *
1530 * Check to see if a vnode is locked in the current context
1531 * This is to be used for debugging purposes only!!
1532 */
1533 #if HFS_DIAGNOSTIC
1534 void RequireFileLock(FileReference vp, int shareable)
1535 {
1536 int locked;
1537
1538 /* The extents btree and allocation bitmap are always exclusive. */
1539 if (VTOC(vp)->c_fileid == kHFSExtentsFileID ||
1540 VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1541 shareable = 0;
1542 }
1543
1544 locked = VTOC(vp)->c_lockowner == current_thread();
1545
1546 if (!locked && !shareable) {
1547 switch (VTOC(vp)->c_fileid) {
1548 case kHFSExtentsFileID:
1549 panic("hfs: extents btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1550 break;
1551 case kHFSCatalogFileID:
1552 panic("hfs: catalog btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1553 break;
1554 case kHFSAllocationFileID:
1555 /* The allocation file can hide behind the jornal lock. */
1556 if (VTOHFS(vp)->jnl == NULL)
1557 panic("hfs: allocation file not locked! v: 0x%08X\n #\n", (u_int)vp);
1558 break;
1559 case kHFSStartupFileID:
1560 panic("hfs: startup file not locked! v: 0x%08X\n #\n", (u_int)vp);
1561 case kHFSAttributesFileID:
1562 panic("hfs: attributes btree not locked! v: 0x%08X\n #\n", (u_int)vp);
1563 break;
1564 }
1565 }
1566 }
1567 #endif
1568
1569
1570 /*
1571 * There are three ways to qualify for ownership rights on an object:
1572 *
1573 * 1. (a) Your UID matches the cnode's UID.
1574 * (b) The object in question is owned by "unknown"
1575 * 2. (a) Permissions on the filesystem are being ignored and
1576 * your UID matches the replacement UID.
1577 * (b) Permissions on the filesystem are being ignored and
1578 * the replacement UID is "unknown".
1579 * 3. You are root.
1580 *
1581 */
1582 int
1583 hfs_owner_rights(struct hfsmount *hfsmp, uid_t cnode_uid, kauth_cred_t cred,
1584 __unused struct proc *p, int invokesuperuserstatus)
1585 {
1586 if ((kauth_cred_getuid(cred) == cnode_uid) || /* [1a] */
1587 (cnode_uid == UNKNOWNUID) || /* [1b] */
1588 ((((unsigned int)vfs_flags(HFSTOVFS(hfsmp))) & MNT_UNKNOWNPERMISSIONS) && /* [2] */
1589 ((kauth_cred_getuid(cred) == hfsmp->hfs_uid) || /* [2a] */
1590 (hfsmp->hfs_uid == UNKNOWNUID))) || /* [2b] */
1591 (invokesuperuserstatus && (suser(cred, 0) == 0))) { /* [3] */
1592 return (0);
1593 } else {
1594 return (EPERM);
1595 }
1596 }
1597
1598
1599 u_int32_t BestBlockSizeFit(u_int32_t allocationBlockSize,
1600 u_int32_t blockSizeLimit,
1601 u_int32_t baseMultiple) {
1602 /*
1603 Compute the optimal (largest) block size (no larger than allocationBlockSize) that is less than the
1604 specified limit but still an even multiple of the baseMultiple.
1605 */
1606 int baseBlockCount, blockCount;
1607 u_int32_t trialBlockSize;
1608
1609 if (allocationBlockSize % baseMultiple != 0) {
1610 /*
1611 Whoops: the allocation blocks aren't even multiples of the specified base:
1612 no amount of dividing them into even parts will be a multiple, either then!
1613 */
1614 return 512; /* Hope for the best */
1615 };
1616
1617 /* Try the obvious winner first, to prevent 12K allocation blocks, for instance,
1618 from being handled as two 6K logical blocks instead of 3 4K logical blocks.
1619 Even though the former (the result of the loop below) is the larger allocation
1620 block size, the latter is more efficient: */
1621 if (allocationBlockSize % PAGE_SIZE == 0) return PAGE_SIZE;
1622
1623 /* No clear winner exists: pick the largest even fraction <= MAXBSIZE: */
1624 baseBlockCount = allocationBlockSize / baseMultiple; /* Now guaranteed to be an even multiple */
1625
1626 for (blockCount = baseBlockCount; blockCount > 0; --blockCount) {
1627 trialBlockSize = blockCount * baseMultiple;
1628 if (allocationBlockSize % trialBlockSize == 0) { /* An even multiple? */
1629 if ((trialBlockSize <= blockSizeLimit) &&
1630 (trialBlockSize % baseMultiple == 0)) {
1631 return trialBlockSize;
1632 };
1633 };
1634 };
1635
1636 /* Note: we should never get here, since blockCount = 1 should always work,
1637 but this is nice and safe and makes the compiler happy, too ... */
1638 return 512;
1639 }
1640
1641
1642 u_int32_t
1643 GetFileInfo(ExtendedVCB *vcb, __unused u_int32_t dirid, const char *name,
1644 struct cat_attr *fattr, struct cat_fork *forkinfo)
1645 {
1646 struct hfsmount * hfsmp;
1647 struct cat_desc jdesc;
1648 int lockflags;
1649 int error;
1650
1651 if (vcb->vcbSigWord != kHFSPlusSigWord)
1652 return (0);
1653
1654 hfsmp = VCBTOHFS(vcb);
1655
1656 memset(&jdesc, 0, sizeof(struct cat_desc));
1657 jdesc.cd_parentcnid = kRootDirID;
1658 jdesc.cd_nameptr = (const u_int8_t *)name;
1659 jdesc.cd_namelen = strlen(name);
1660
1661 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1662 error = cat_lookup(hfsmp, &jdesc, 0, 0, NULL, fattr, forkinfo, NULL);
1663 hfs_systemfile_unlock(hfsmp, lockflags);
1664
1665 if (error == 0) {
1666 return (fattr->ca_fileid);
1667 } else if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1668 return (0);
1669 }
1670
1671 return (0); /* XXX what callers expect on an error */
1672 }
1673
1674
1675 /*
1676 * On HFS Plus Volumes, there can be orphaned files or directories
1677 * These are files or directories that were unlinked while busy.
1678 * If the volume was not cleanly unmounted then some of these may
1679 * have persisted and need to be removed.
1680 */
1681 void
1682 hfs_remove_orphans(struct hfsmount * hfsmp)
1683 {
1684 struct BTreeIterator * iterator = NULL;
1685 struct FSBufferDescriptor btdata;
1686 struct HFSPlusCatalogFile filerec;
1687 struct HFSPlusCatalogKey * keyp;
1688 struct proc *p = current_proc();
1689 FCB *fcb;
1690 ExtendedVCB *vcb;
1691 char filename[32];
1692 char tempname[32];
1693 size_t namelen;
1694 cat_cookie_t cookie;
1695 int catlock = 0;
1696 int catreserve = 0;
1697 int started_tr = 0;
1698 int lockflags;
1699 int result;
1700 int orphaned_files = 0;
1701 int orphaned_dirs = 0;
1702
1703 bzero(&cookie, sizeof(cookie));
1704
1705 if (hfsmp->hfs_flags & HFS_CLEANED_ORPHANS)
1706 return;
1707
1708 vcb = HFSTOVCB(hfsmp);
1709 fcb = VTOF(hfsmp->hfs_catalog_vp);
1710
1711 btdata.bufferAddress = &filerec;
1712 btdata.itemSize = sizeof(filerec);
1713 btdata.itemCount = 1;
1714
1715 MALLOC(iterator, struct BTreeIterator *, sizeof(*iterator), M_TEMP, M_WAITOK);
1716 bzero(iterator, sizeof(*iterator));
1717
1718 /* Build a key to "temp" */
1719 keyp = (HFSPlusCatalogKey*)&iterator->key;
1720 keyp->parentID = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1721 keyp->nodeName.length = 4; /* "temp" */
1722 keyp->keyLength = kHFSPlusCatalogKeyMinimumLength + keyp->nodeName.length * 2;
1723 keyp->nodeName.unicode[0] = 't';
1724 keyp->nodeName.unicode[1] = 'e';
1725 keyp->nodeName.unicode[2] = 'm';
1726 keyp->nodeName.unicode[3] = 'p';
1727
1728 /*
1729 * Position the iterator just before the first real temp file/dir.
1730 */
1731 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1732 (void) BTSearchRecord(fcb, iterator, NULL, NULL, iterator);
1733 hfs_systemfile_unlock(hfsmp, lockflags);
1734
1735 /* Visit all the temp files/dirs in the HFS+ private directory. */
1736 for (;;) {
1737 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1738 result = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
1739 hfs_systemfile_unlock(hfsmp, lockflags);
1740 if (result)
1741 break;
1742 if (keyp->parentID != hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid)
1743 break;
1744
1745 (void) utf8_encodestr(keyp->nodeName.unicode, keyp->nodeName.length * 2,
1746 (u_int8_t *)filename, &namelen, sizeof(filename), 0, 0);
1747
1748 (void) snprintf(tempname, sizeof(tempname), "%s%d",
1749 HFS_DELETE_PREFIX, filerec.fileID);
1750
1751 /*
1752 * Delete all files (and directories) named "tempxxx",
1753 * where xxx is the file's cnid in decimal.
1754 *
1755 */
1756 if (bcmp(tempname, filename, namelen) == 0) {
1757 struct filefork dfork;
1758 struct filefork rfork;
1759 struct cnode cnode;
1760 int mode = 0;
1761
1762 bzero(&dfork, sizeof(dfork));
1763 bzero(&rfork, sizeof(rfork));
1764 bzero(&cnode, sizeof(cnode));
1765
1766 /* Delete any attributes, ignore errors */
1767 (void) hfs_removeallattr(hfsmp, filerec.fileID);
1768
1769 if (hfs_start_transaction(hfsmp) != 0) {
1770 printf("hfs_remove_orphans: failed to start transaction\n");
1771 goto exit;
1772 }
1773 started_tr = 1;
1774
1775 /*
1776 * Reserve some space in the Catalog file.
1777 */
1778 if (cat_preflight(hfsmp, CAT_DELETE, &cookie, p) != 0) {
1779 printf("hfs_remove_orphans: cat_preflight failed\n");
1780 goto exit;
1781 }
1782 catreserve = 1;
1783
1784 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1785 catlock = 1;
1786
1787 /* Build a fake cnode */
1788 cat_convertattr(hfsmp, (CatalogRecord *)&filerec, &cnode.c_attr,
1789 &dfork.ff_data, &rfork.ff_data);
1790 cnode.c_desc.cd_parentcnid = hfsmp->hfs_private_desc[FILE_HARDLINKS].cd_cnid;
1791 cnode.c_desc.cd_nameptr = (const u_int8_t *)filename;
1792 cnode.c_desc.cd_namelen = namelen;
1793 cnode.c_desc.cd_cnid = cnode.c_attr.ca_fileid;
1794 cnode.c_blocks = dfork.ff_blocks + rfork.ff_blocks;
1795
1796 /* Position iterator at previous entry */
1797 if (BTIterateRecord(fcb, kBTreePrevRecord, iterator,
1798 NULL, NULL) != 0) {
1799 break;
1800 }
1801
1802 /* Truncate the file to zero (both forks) */
1803 if (dfork.ff_blocks > 0) {
1804 u_int64_t fsize;
1805
1806 dfork.ff_cp = &cnode;
1807 cnode.c_datafork = &dfork;
1808 cnode.c_rsrcfork = NULL;
1809 fsize = (u_int64_t)dfork.ff_blocks * (u_int64_t)HFSTOVCB(hfsmp)->blockSize;
1810 while (fsize > 0) {
1811 if (fsize > HFS_BIGFILE_SIZE) {
1812 fsize -= HFS_BIGFILE_SIZE;
1813 } else {
1814 fsize = 0;
1815 }
1816
1817 if (TruncateFileC(vcb, (FCB*)&dfork, fsize, 1, 0,
1818 cnode.c_attr.ca_fileid, false) != 0) {
1819 printf("hfs: error truncating data fork!\n");
1820 break;
1821 }
1822
1823 //
1824 // if we're iteratively truncating this file down,
1825 // then end the transaction and start a new one so
1826 // that no one transaction gets too big.
1827 //
1828 if (fsize > 0 && started_tr) {
1829 /* Drop system file locks before starting
1830 * another transaction to preserve lock order.
1831 */
1832 hfs_systemfile_unlock(hfsmp, lockflags);
1833 catlock = 0;
1834 hfs_end_transaction(hfsmp);
1835
1836 if (hfs_start_transaction(hfsmp) != 0) {
1837 started_tr = 0;
1838 break;
1839 }
1840 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
1841 catlock = 1;
1842 }
1843 }
1844 }
1845
1846 if (rfork.ff_blocks > 0) {
1847 rfork.ff_cp = &cnode;
1848 cnode.c_datafork = NULL;
1849 cnode.c_rsrcfork = &rfork;
1850 if (TruncateFileC(vcb, (FCB*)&rfork, 0, 1, 1, cnode.c_attr.ca_fileid, false) != 0) {
1851 printf("hfs: error truncating rsrc fork!\n");
1852 break;
1853 }
1854 }
1855
1856 /* Remove the file or folder record from the Catalog */
1857 if (cat_delete(hfsmp, &cnode.c_desc, &cnode.c_attr) != 0) {
1858 printf("hfs_remove_orphans: error deleting cat rec for id %d!\n", cnode.c_desc.cd_cnid);
1859 hfs_systemfile_unlock(hfsmp, lockflags);
1860 catlock = 0;
1861 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1862 break;
1863 }
1864
1865 mode = cnode.c_attr.ca_mode & S_IFMT;
1866
1867 if (mode == S_IFDIR) {
1868 orphaned_dirs++;
1869 }
1870 else {
1871 orphaned_files++;
1872 }
1873
1874 /* Update parent and volume counts */
1875 hfsmp->hfs_private_attr[FILE_HARDLINKS].ca_entries--;
1876 if (mode == S_IFDIR) {
1877 DEC_FOLDERCOUNT(hfsmp, hfsmp->hfs_private_attr[FILE_HARDLINKS]);
1878 }
1879
1880 (void)cat_update(hfsmp, &hfsmp->hfs_private_desc[FILE_HARDLINKS],
1881 &hfsmp->hfs_private_attr[FILE_HARDLINKS], NULL, NULL);
1882
1883 /* Drop locks and end the transaction */
1884 hfs_systemfile_unlock(hfsmp, lockflags);
1885 cat_postflight(hfsmp, &cookie, p);
1886 catlock = catreserve = 0;
1887
1888 /*
1889 Now that Catalog is unlocked, update the volume info, making
1890 sure to differentiate between files and directories
1891 */
1892 if (mode == S_IFDIR) {
1893 hfs_volupdate(hfsmp, VOL_RMDIR, 0);
1894 }
1895 else{
1896 hfs_volupdate(hfsmp, VOL_RMFILE, 0);
1897 }
1898
1899 if (started_tr) {
1900 hfs_end_transaction(hfsmp);
1901 started_tr = 0;
1902 }
1903
1904 } /* end if */
1905 } /* end for */
1906 if (orphaned_files > 0 || orphaned_dirs > 0)
1907 printf("hfs: Removed %d orphaned / unlinked files and %d directories \n", orphaned_files, orphaned_dirs);
1908 exit:
1909 if (catlock) {
1910 hfs_systemfile_unlock(hfsmp, lockflags);
1911 }
1912 if (catreserve) {
1913 cat_postflight(hfsmp, &cookie, p);
1914 }
1915 if (started_tr) {
1916 hfs_end_transaction(hfsmp);
1917 }
1918
1919 FREE(iterator, M_TEMP);
1920 hfsmp->hfs_flags |= HFS_CLEANED_ORPHANS;
1921 }
1922
1923
1924 /*
1925 * This will return the correct logical block size for a given vnode.
1926 * For most files, it is the allocation block size, for meta data like
1927 * BTrees, this is kept as part of the BTree private nodeSize
1928 */
1929 u_int32_t
1930 GetLogicalBlockSize(struct vnode *vp)
1931 {
1932 u_int32_t logBlockSize;
1933
1934 DBG_ASSERT(vp != NULL);
1935
1936 /* start with default */
1937 logBlockSize = VTOHFS(vp)->hfs_logBlockSize;
1938
1939 if (vnode_issystem(vp)) {
1940 if (VTOF(vp)->fcbBTCBPtr != NULL) {
1941 BTreeInfoRec bTreeInfo;
1942
1943 /*
1944 * We do not lock the BTrees, because if we are getting block..then the tree
1945 * should be locked in the first place.
1946 * We just want the nodeSize wich will NEVER change..so even if the world
1947 * is changing..the nodeSize should remain the same. Which argues why lock
1948 * it in the first place??
1949 */
1950
1951 (void) BTGetInformation (VTOF(vp), kBTreeInfoVersion, &bTreeInfo);
1952
1953 logBlockSize = bTreeInfo.nodeSize;
1954
1955 } else if (VTOC(vp)->c_fileid == kHFSAllocationFileID) {
1956 logBlockSize = VTOVCB(vp)->vcbVBMIOSize;
1957 }
1958 }
1959
1960 DBG_ASSERT(logBlockSize > 0);
1961
1962 return logBlockSize;
1963 }
1964
1965 #if HFS_SPARSE_DEV
1966 static bool hfs_get_backing_free_blks(hfsmount_t *hfsmp, uint64_t *pfree_blks)
1967 {
1968 struct vfsstatfs *vfsp; /* 272 bytes */
1969 uint64_t vfreeblks;
1970 struct timeval now;
1971
1972 hfs_lock_mount(hfsmp);
1973
1974 vnode_t backing_vp = hfsmp->hfs_backingfs_rootvp;
1975 if (!backing_vp) {
1976 hfs_unlock_mount(hfsmp);
1977 return false;
1978 }
1979
1980 // usecount is not enough; we need iocount
1981 if (vnode_get(backing_vp)) {
1982 hfs_unlock_mount(hfsmp);
1983 *pfree_blks = 0;
1984 return true;
1985 }
1986
1987 uint32_t loanedblks = hfsmp->loanedBlocks;
1988 uint32_t bandblks = hfsmp->hfs_sparsebandblks;
1989 uint64_t maxblks = hfsmp->hfs_backingfs_maxblocks;
1990
1991 hfs_unlock_mount(hfsmp);
1992
1993 mount_t backingfs_mp = vnode_mount(backing_vp);
1994
1995 microtime(&now);
1996 if ((now.tv_sec - hfsmp->hfs_last_backingstatfs) >= 1) {
1997 vfs_update_vfsstat(backingfs_mp, vfs_context_kernel(), VFS_KERNEL_EVENT);
1998 hfsmp->hfs_last_backingstatfs = now.tv_sec;
1999 }
2000
2001 if (!(vfsp = vfs_statfs(backingfs_mp))) {
2002 vnode_put(backing_vp);
2003 return false;
2004 }
2005
2006 vfreeblks = vfsp->f_bavail;
2007 /* Normalize block count if needed. */
2008 if (vfsp->f_bsize != hfsmp->blockSize)
2009 vfreeblks = vfreeblks * vfsp->f_bsize / hfsmp->blockSize;
2010 if (vfreeblks > bandblks)
2011 vfreeblks -= bandblks;
2012 else
2013 vfreeblks = 0;
2014
2015 /*
2016 * Take into account any delayed allocations. It is not
2017 * certain what the original reason for the "2 *" is. Most
2018 * likely it is to allow for additional requirements in the
2019 * host file system and metadata required by disk images. The
2020 * number of loaned blocks is likely to be small and we will
2021 * stop using them as we get close to the limit.
2022 */
2023 loanedblks = 2 * loanedblks;
2024 if (vfreeblks > loanedblks)
2025 vfreeblks -= loanedblks;
2026 else
2027 vfreeblks = 0;
2028
2029 if (maxblks)
2030 vfreeblks = MIN(vfreeblks, maxblks);
2031
2032 vnode_put(backing_vp);
2033
2034 *pfree_blks = vfreeblks;
2035
2036 return true;
2037 }
2038 #endif
2039
2040 u_int32_t
2041 hfs_freeblks(struct hfsmount * hfsmp, int wantreserve)
2042 {
2043 u_int32_t freeblks;
2044 u_int32_t rsrvblks;
2045 u_int32_t loanblks;
2046
2047 /*
2048 * We don't bother taking the mount lock
2049 * to look at these values since the values
2050 * themselves are each updated atomically
2051 * on aligned addresses.
2052 */
2053 freeblks = hfsmp->freeBlocks;
2054 rsrvblks = hfsmp->reserveBlocks;
2055 loanblks = hfsmp->loanedBlocks;
2056 if (wantreserve) {
2057 if (freeblks > rsrvblks)
2058 freeblks -= rsrvblks;
2059 else
2060 freeblks = 0;
2061 }
2062 if (freeblks > loanblks)
2063 freeblks -= loanblks;
2064 else
2065 freeblks = 0;
2066
2067 #if HFS_SPARSE_DEV
2068 /*
2069 * When the underlying device is sparse, check the
2070 * available space on the backing store volume.
2071 */
2072 uint64_t vfreeblks;
2073 if (hfs_get_backing_free_blks(hfsmp, &vfreeblks))
2074 freeblks = MIN(freeblks, vfreeblks);
2075 #endif /* HFS_SPARSE_DEV */
2076
2077 if (hfsmp->hfs_flags & HFS_CS) {
2078 uint64_t cs_free_bytes;
2079 uint64_t cs_free_blks;
2080 if (VNOP_IOCTL(hfsmp->hfs_devvp, _DKIOCCSGETFREEBYTES,
2081 (caddr_t)&cs_free_bytes, 0, vfs_context_kernel()) == 0) {
2082 cs_free_blks = cs_free_bytes / hfsmp->blockSize;
2083 if (cs_free_blks > loanblks)
2084 cs_free_blks -= loanblks;
2085 else
2086 cs_free_blks = 0;
2087 freeblks = MIN(cs_free_blks, freeblks);
2088 }
2089 }
2090
2091 return (freeblks);
2092 }
2093
2094 /*
2095 * Map HFS Common errors (negative) to BSD error codes (positive).
2096 * Positive errors (ie BSD errors) are passed through unchanged.
2097 */
2098 short MacToVFSError(OSErr err)
2099 {
2100 if (err >= 0)
2101 return err;
2102
2103 /* BSD/VFS internal errnos */
2104 switch (err) {
2105 case ERESERVEDNAME: /* -8 */
2106 return err;
2107 }
2108
2109 switch (err) {
2110 case dskFulErr: /* -34 */
2111 case btNoSpaceAvail: /* -32733 */
2112 return ENOSPC;
2113 case fxOvFlErr: /* -32750 */
2114 return EOVERFLOW;
2115
2116 case btBadNode: /* -32731 */
2117 return EIO;
2118
2119 case memFullErr: /* -108 */
2120 return ENOMEM; /* +12 */
2121
2122 case cmExists: /* -32718 */
2123 case btExists: /* -32734 */
2124 return EEXIST; /* +17 */
2125
2126 case cmNotFound: /* -32719 */
2127 case btNotFound: /* -32735 */
2128 return ENOENT; /* 28 */
2129
2130 case cmNotEmpty: /* -32717 */
2131 return ENOTEMPTY; /* 66 */
2132
2133 case cmFThdDirErr: /* -32714 */
2134 return EISDIR; /* 21 */
2135
2136 case fxRangeErr: /* -32751 */
2137 return ERANGE;
2138
2139 case bdNamErr: /* -37 */
2140 return ENAMETOOLONG; /* 63 */
2141
2142 case paramErr: /* -50 */
2143 case fileBoundsErr: /* -1309 */
2144 return EINVAL; /* +22 */
2145
2146 case fsBTBadNodeSize:
2147 return ENXIO;
2148
2149 default:
2150 return EIO; /* +5 */
2151 }
2152 }
2153
2154
2155 /*
2156 * Find the current thread's directory hint for a given index.
2157 *
2158 * Requires an exclusive lock on directory cnode.
2159 *
2160 * Use detach if the cnode lock must be dropped while the hint is still active.
2161 */
2162 __private_extern__
2163 directoryhint_t *
2164 hfs_getdirhint(struct cnode *dcp, int index, int detach)
2165 {
2166 struct timeval tv;
2167 directoryhint_t *hint;
2168 boolean_t need_remove, need_init;
2169 const u_int8_t * name;
2170
2171 microuptime(&tv);
2172
2173 /*
2174 * Look for an existing hint first. If not found, create a new one (when
2175 * the list is not full) or recycle the oldest hint. Since new hints are
2176 * always added to the head of the list, the last hint is always the
2177 * oldest.
2178 */
2179 TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2180 if (hint->dh_index == index)
2181 break;
2182 }
2183 if (hint != NULL) { /* found an existing hint */
2184 need_init = false;
2185 need_remove = true;
2186 } else { /* cannot find an existing hint */
2187 need_init = true;
2188 if (dcp->c_dirhintcnt < HFS_MAXDIRHINTS) { /* we don't need recycling */
2189 /* Create a default directory hint */
2190 MALLOC_ZONE(hint, directoryhint_t *, sizeof(directoryhint_t), M_HFSDIRHINT, M_WAITOK);
2191 ++dcp->c_dirhintcnt;
2192 need_remove = false;
2193 } else { /* recycle the last (i.e., the oldest) hint */
2194 hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead);
2195 if ((hint->dh_desc.cd_flags & CD_HASBUF) &&
2196 (name = hint->dh_desc.cd_nameptr)) {
2197 hint->dh_desc.cd_nameptr = NULL;
2198 hint->dh_desc.cd_namelen = 0;
2199 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2200 vfs_removename((const char *)name);
2201 }
2202 need_remove = true;
2203 }
2204 }
2205
2206 if (need_remove)
2207 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2208
2209 if (detach)
2210 --dcp->c_dirhintcnt;
2211 else
2212 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2213
2214 if (need_init) {
2215 hint->dh_index = index;
2216 hint->dh_desc.cd_flags = 0;
2217 hint->dh_desc.cd_encoding = 0;
2218 hint->dh_desc.cd_namelen = 0;
2219 hint->dh_desc.cd_nameptr = NULL;
2220 hint->dh_desc.cd_parentcnid = dcp->c_fileid;
2221 hint->dh_desc.cd_hint = dcp->c_childhint;
2222 hint->dh_desc.cd_cnid = 0;
2223 }
2224 hint->dh_time = tv.tv_sec;
2225 return (hint);
2226 }
2227
2228 /*
2229 * Release a single directory hint.
2230 *
2231 * Requires an exclusive lock on directory cnode.
2232 */
2233 __private_extern__
2234 void
2235 hfs_reldirhint(struct cnode *dcp, directoryhint_t * relhint)
2236 {
2237 const u_int8_t * name;
2238 directoryhint_t *hint;
2239
2240 /* Check if item is on list (could be detached) */
2241 TAILQ_FOREACH(hint, &dcp->c_hintlist, dh_link) {
2242 if (hint == relhint) {
2243 TAILQ_REMOVE(&dcp->c_hintlist, relhint, dh_link);
2244 --dcp->c_dirhintcnt;
2245 break;
2246 }
2247 }
2248 name = relhint->dh_desc.cd_nameptr;
2249 if ((relhint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2250 relhint->dh_desc.cd_nameptr = NULL;
2251 relhint->dh_desc.cd_namelen = 0;
2252 relhint->dh_desc.cd_flags &= ~CD_HASBUF;
2253 vfs_removename((const char *)name);
2254 }
2255 FREE_ZONE(relhint, sizeof(directoryhint_t), M_HFSDIRHINT);
2256 }
2257
2258 /*
2259 * Release directory hints for given directory
2260 *
2261 * Requires an exclusive lock on directory cnode.
2262 */
2263 __private_extern__
2264 void
2265 hfs_reldirhints(struct cnode *dcp, int stale_hints_only)
2266 {
2267 struct timeval tv;
2268 directoryhint_t *hint, *prev;
2269 const u_int8_t * name;
2270
2271 if (stale_hints_only)
2272 microuptime(&tv);
2273
2274 /* searching from the oldest to the newest, so we can stop early when releasing stale hints only */
2275 for (hint = TAILQ_LAST(&dcp->c_hintlist, hfs_hinthead); hint != NULL; hint = prev) {
2276 if (stale_hints_only && (tv.tv_sec - hint->dh_time) < HFS_DIRHINT_TTL)
2277 break; /* stop here if this entry is too new */
2278 name = hint->dh_desc.cd_nameptr;
2279 if ((hint->dh_desc.cd_flags & CD_HASBUF) && (name != NULL)) {
2280 hint->dh_desc.cd_nameptr = NULL;
2281 hint->dh_desc.cd_namelen = 0;
2282 hint->dh_desc.cd_flags &= ~CD_HASBUF;
2283 vfs_removename((const char *)name);
2284 }
2285 prev = TAILQ_PREV(hint, hfs_hinthead, dh_link); /* must save this pointer before calling FREE_ZONE on this node */
2286 TAILQ_REMOVE(&dcp->c_hintlist, hint, dh_link);
2287 FREE_ZONE(hint, sizeof(directoryhint_t), M_HFSDIRHINT);
2288 --dcp->c_dirhintcnt;
2289 }
2290 }
2291
2292 /*
2293 * Insert a detached directory hint back into the list of dirhints.
2294 *
2295 * Requires an exclusive lock on directory cnode.
2296 */
2297 __private_extern__
2298 void
2299 hfs_insertdirhint(struct cnode *dcp, directoryhint_t * hint)
2300 {
2301 directoryhint_t *test;
2302
2303 TAILQ_FOREACH(test, &dcp->c_hintlist, dh_link) {
2304 if (test == hint)
2305 panic("hfs_insertdirhint: hint %p already on list!", hint);
2306 }
2307
2308 TAILQ_INSERT_HEAD(&dcp->c_hintlist, hint, dh_link);
2309 ++dcp->c_dirhintcnt;
2310 }
2311
2312 /*
2313 * Perform a case-insensitive compare of two UTF-8 filenames.
2314 *
2315 * Returns 0 if the strings match.
2316 */
2317 __private_extern__
2318 int
2319 hfs_namecmp(const u_int8_t *str1, size_t len1, const u_int8_t *str2, size_t len2)
2320 {
2321 u_int16_t *ustr1, *ustr2;
2322 size_t ulen1, ulen2;
2323 size_t maxbytes;
2324 int cmp = -1;
2325
2326 if (len1 != len2)
2327 return (cmp);
2328
2329 maxbytes = kHFSPlusMaxFileNameChars << 1;
2330 MALLOC(ustr1, u_int16_t *, maxbytes << 1, M_TEMP, M_WAITOK);
2331 ustr2 = ustr1 + (maxbytes >> 1);
2332
2333 if (utf8_decodestr(str1, len1, ustr1, &ulen1, maxbytes, ':', 0) != 0)
2334 goto out;
2335 if (utf8_decodestr(str2, len2, ustr2, &ulen2, maxbytes, ':', 0) != 0)
2336 goto out;
2337
2338 cmp = FastUnicodeCompare(ustr1, ulen1>>1, ustr2, ulen2>>1);
2339 out:
2340 FREE(ustr1, M_TEMP);
2341 return (cmp);
2342 }
2343
2344
2345 typedef struct jopen_cb_info {
2346 off_t jsize;
2347 char *desired_uuid;
2348 struct vnode *jvp;
2349 size_t blksize;
2350 int need_clean;
2351 int need_init;
2352 } jopen_cb_info;
2353
2354 static int
2355 journal_open_cb(const char *bsd_dev_name, const char *uuid_str, void *arg)
2356 {
2357 struct nameidata nd;
2358 jopen_cb_info *ji = (jopen_cb_info *)arg;
2359 char bsd_name[256];
2360 int error;
2361
2362 strlcpy(&bsd_name[0], "/dev/", sizeof(bsd_name));
2363 strlcpy(&bsd_name[5], bsd_dev_name, sizeof(bsd_name)-5);
2364
2365 if (ji->desired_uuid && ji->desired_uuid[0] && strcmp(uuid_str, ji->desired_uuid) != 0) {
2366 return 1; // keep iterating
2367 }
2368
2369 // if we're here, either the desired uuid matched or there was no
2370 // desired uuid so let's try to open the device for writing and
2371 // see if it works. if it does, we'll use it.
2372
2373 NDINIT(&nd, LOOKUP, OP_LOOKUP, LOCKLEAF, UIO_SYSSPACE32, CAST_USER_ADDR_T(bsd_name), vfs_context_kernel());
2374 if ((error = namei(&nd))) {
2375 printf("hfs: journal open cb: error %d looking up device %s (dev uuid %s)\n", error, bsd_name, uuid_str);
2376 return 1; // keep iterating
2377 }
2378
2379 ji->jvp = nd.ni_vp;
2380 nameidone(&nd);
2381
2382 if (ji->jvp == NULL) {
2383 printf("hfs: journal open cb: did not find %s (error %d)\n", bsd_name, error);
2384 } else {
2385 error = VNOP_OPEN(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2386 if (error == 0) {
2387 // if the journal is dirty and we didn't specify a desired
2388 // journal device uuid, then do not use the journal. but
2389 // if the journal is just invalid (e.g. it hasn't been
2390 // initialized) then just set the need_init flag.
2391 if (ji->need_clean && ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2392 error = journal_is_clean(ji->jvp, 0, ji->jsize, (void *)1, ji->blksize);
2393 if (error == EBUSY) {
2394 VNOP_CLOSE(ji->jvp, FREAD|FWRITE, vfs_context_kernel());
2395 vnode_put(ji->jvp);
2396 ji->jvp = NULL;
2397 return 1; // keep iterating
2398 } else if (error == EINVAL) {
2399 ji->need_init = 1;
2400 }
2401 }
2402
2403 if (ji->desired_uuid && ji->desired_uuid[0] == '\0') {
2404 strlcpy(ji->desired_uuid, uuid_str, 128);
2405 }
2406 vnode_setmountedon(ji->jvp);
2407 return 0; // stop iterating
2408 } else {
2409 vnode_put(ji->jvp);
2410 ji->jvp = NULL;
2411 }
2412 }
2413
2414 return 1; // keep iterating
2415 }
2416
2417 extern void IOBSDIterateMediaWithContent(const char *uuid_cstring, int (*func)(const char *bsd_dev_name, const char *uuid_str, void *arg), void *arg);
2418 kern_return_t IOBSDGetPlatformSerialNumber(char *serial_number_str, u_int32_t len);
2419
2420
2421 static vnode_t
2422 open_journal_dev(const char *vol_device,
2423 int need_clean,
2424 char *uuid_str,
2425 char *machine_serial_num,
2426 off_t jsize,
2427 size_t blksize,
2428 int *need_init)
2429 {
2430 int retry_counter=0;
2431 jopen_cb_info ji;
2432
2433 ji.jsize = jsize;
2434 ji.desired_uuid = uuid_str;
2435 ji.jvp = NULL;
2436 ji.blksize = blksize;
2437 ji.need_clean = need_clean;
2438 ji.need_init = 0;
2439
2440 // if (uuid_str[0] == '\0') {
2441 // printf("hfs: open journal dev: %s: locating any available non-dirty external journal partition\n", vol_device);
2442 // } else {
2443 // printf("hfs: open journal dev: %s: trying to find the external journal partition w/uuid %s\n", vol_device, uuid_str);
2444 // }
2445 while (ji.jvp == NULL && retry_counter++ < 4) {
2446 if (retry_counter > 1) {
2447 if (uuid_str[0]) {
2448 printf("hfs: open_journal_dev: uuid %s not found. waiting 10sec.\n", uuid_str);
2449 } else {
2450 printf("hfs: open_journal_dev: no available external journal partition found. waiting 10sec.\n");
2451 }
2452 delay_for_interval(10* 1000000, NSEC_PER_USEC); // wait for ten seconds and then try again
2453 }
2454
2455 IOBSDIterateMediaWithContent(EXTJNL_CONTENT_TYPE_UUID, journal_open_cb, &ji);
2456 }
2457
2458 if (ji.jvp == NULL) {
2459 printf("hfs: volume: %s: did not find jnl device uuid: %s from machine serial number: %s\n",
2460 vol_device, uuid_str, machine_serial_num);
2461 }
2462
2463 *need_init = ji.need_init;
2464
2465 return ji.jvp;
2466 }
2467
2468
2469 int
2470 hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
2471 void *_args, off_t embeddedOffset, daddr64_t mdb_offset,
2472 HFSMasterDirectoryBlock *mdbp, kauth_cred_t cred)
2473 {
2474 JournalInfoBlock *jibp;
2475 struct buf *jinfo_bp, *bp;
2476 int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2477 int retval, write_jibp = 0;
2478 uint32_t blksize = hfsmp->hfs_logical_block_size;
2479 struct vnode *devvp;
2480 struct hfs_mount_args *args = _args;
2481 u_int32_t jib_flags;
2482 u_int64_t jib_offset;
2483 u_int64_t jib_size;
2484 const char *dev_name;
2485
2486 devvp = hfsmp->hfs_devvp;
2487 dev_name = vnode_getname_printable(devvp);
2488
2489 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2490 arg_flags = args->journal_flags;
2491 arg_tbufsz = args->journal_tbuffer_size;
2492 }
2493
2494 sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / blksize;
2495
2496 jinfo_bp = NULL;
2497 retval = (int)buf_meta_bread(devvp,
2498 (daddr64_t)((embeddedOffset/blksize) +
2499 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2500 hfsmp->hfs_physical_block_size, cred, &jinfo_bp);
2501 if (retval) {
2502 if (jinfo_bp) {
2503 buf_brelse(jinfo_bp);
2504 }
2505 goto cleanup_dev_name;
2506 }
2507
2508 jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2509 jib_flags = SWAP_BE32(jibp->flags);
2510 jib_size = SWAP_BE64(jibp->size);
2511
2512 if (jib_flags & kJIJournalInFSMask) {
2513 hfsmp->jvp = hfsmp->hfs_devvp;
2514 jib_offset = SWAP_BE64(jibp->offset);
2515 } else {
2516 int need_init=0;
2517
2518 // if the volume was unmounted cleanly then we'll pick any
2519 // available external journal partition
2520 //
2521 if (SWAP_BE32(vhp->attributes) & kHFSVolumeUnmountedMask) {
2522 *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2523 }
2524
2525 hfsmp->jvp = open_journal_dev(dev_name,
2526 !(jib_flags & kJIJournalNeedInitMask),
2527 (char *)&jibp->ext_jnl_uuid[0],
2528 (char *)&jibp->machine_serial_num[0],
2529 jib_size,
2530 hfsmp->hfs_logical_block_size,
2531 &need_init);
2532 if (hfsmp->jvp == NULL) {
2533 buf_brelse(jinfo_bp);
2534 retval = EROFS;
2535 goto cleanup_dev_name;
2536 } else {
2537 if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2538 strlcpy(&jibp->machine_serial_num[0], "unknown-machine-uuid", sizeof(jibp->machine_serial_num));
2539 }
2540 }
2541
2542 jib_offset = 0;
2543 write_jibp = 1;
2544 if (need_init) {
2545 jib_flags |= kJIJournalNeedInitMask;
2546 }
2547 }
2548
2549 // save this off for the hack-y check in hfs_remove()
2550 hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2551 hfsmp->jnl_size = jib_size;
2552
2553 if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2554 // if the file system is read-only, check if the journal is empty.
2555 // if it is, then we can allow the mount. otherwise we have to
2556 // return failure.
2557 retval = journal_is_clean(hfsmp->jvp,
2558 jib_offset + embeddedOffset,
2559 jib_size,
2560 devvp,
2561 hfsmp->hfs_logical_block_size);
2562
2563 hfsmp->jnl = NULL;
2564
2565 buf_brelse(jinfo_bp);
2566
2567 if (retval) {
2568 const char *name = vnode_getname_printable(devvp);
2569 printf("hfs: early journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
2570 name);
2571 vnode_putname_printable(name);
2572 }
2573
2574 goto cleanup_dev_name;
2575 }
2576
2577 if (jib_flags & kJIJournalNeedInitMask) {
2578 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2579 jib_offset + embeddedOffset, jib_size);
2580 hfsmp->jnl = journal_create(hfsmp->jvp,
2581 jib_offset + embeddedOffset,
2582 jib_size,
2583 devvp,
2584 blksize,
2585 arg_flags,
2586 arg_tbufsz,
2587 hfs_sync_metadata, hfsmp->hfs_mp,
2588 hfsmp->hfs_mp);
2589 if (hfsmp->jnl)
2590 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2591
2592 // no need to start a transaction here... if this were to fail
2593 // we'd just re-init it on the next mount.
2594 jib_flags &= ~kJIJournalNeedInitMask;
2595 jibp->flags = SWAP_BE32(jib_flags);
2596 buf_bwrite(jinfo_bp);
2597 jinfo_bp = NULL;
2598 jibp = NULL;
2599 } else {
2600 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2601 // jib_offset + embeddedOffset,
2602 // jib_size, SWAP_BE32(vhp->blockSize));
2603
2604 hfsmp->jnl = journal_open(hfsmp->jvp,
2605 jib_offset + embeddedOffset,
2606 jib_size,
2607 devvp,
2608 blksize,
2609 arg_flags,
2610 arg_tbufsz,
2611 hfs_sync_metadata, hfsmp->hfs_mp,
2612 hfsmp->hfs_mp);
2613 if (hfsmp->jnl)
2614 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2615
2616 if (write_jibp) {
2617 buf_bwrite(jinfo_bp);
2618 } else {
2619 buf_brelse(jinfo_bp);
2620 }
2621 jinfo_bp = NULL;
2622 jibp = NULL;
2623
2624 if (hfsmp->jnl && mdbp) {
2625 // reload the mdb because it could have changed
2626 // if the journal had to be replayed.
2627 if (mdb_offset == 0) {
2628 mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
2629 }
2630 bp = NULL;
2631 retval = (int)buf_meta_bread(devvp,
2632 HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
2633 hfsmp->hfs_physical_block_size, cred, &bp);
2634 if (retval) {
2635 if (bp) {
2636 buf_brelse(bp);
2637 }
2638 printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
2639 retval);
2640 goto cleanup_dev_name;
2641 }
2642 bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
2643 buf_brelse(bp);
2644 bp = NULL;
2645 }
2646 }
2647
2648 // if we expected the journal to be there and we couldn't
2649 // create it or open it then we have to bail out.
2650 if (hfsmp->jnl == NULL) {
2651 printf("hfs: early jnl init: failed to open/create the journal (retval %d).\n", retval);
2652 retval = EINVAL;
2653 goto cleanup_dev_name;
2654 }
2655
2656 retval = 0;
2657
2658 cleanup_dev_name:
2659 vnode_putname_printable(dev_name);
2660 return retval;
2661 }
2662
2663
2664 //
2665 // This function will go and re-locate the .journal_info_block and
2666 // the .journal files in case they moved (which can happen if you
2667 // run Norton SpeedDisk). If we fail to find either file we just
2668 // disable journaling for this volume and return. We turn off the
2669 // journaling bit in the vcb and assume it will get written to disk
2670 // later (if it doesn't on the next mount we'd do the same thing
2671 // again which is harmless). If we disable journaling we don't
2672 // return an error so that the volume is still mountable.
2673 //
2674 // If the info we find for the .journal_info_block and .journal files
2675 // isn't what we had stored, we re-set our cached info and proceed
2676 // with opening the journal normally.
2677 //
2678 static int
2679 hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_args)
2680 {
2681 JournalInfoBlock *jibp;
2682 struct buf *jinfo_bp;
2683 int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
2684 int retval, write_jibp = 0, recreate_journal = 0;
2685 struct vnode *devvp;
2686 struct cat_attr jib_attr, jattr;
2687 struct cat_fork jib_fork, jfork;
2688 ExtendedVCB *vcb;
2689 u_int32_t fid;
2690 struct hfs_mount_args *args = _args;
2691 u_int32_t jib_flags;
2692 u_int64_t jib_offset;
2693 u_int64_t jib_size;
2694
2695 devvp = hfsmp->hfs_devvp;
2696 vcb = HFSTOVCB(hfsmp);
2697
2698 if (args != NULL && (args->flags & HFSFSMNT_EXTENDED_ARGS)) {
2699 if (args->journal_disable) {
2700 return 0;
2701 }
2702
2703 arg_flags = args->journal_flags;
2704 arg_tbufsz = args->journal_tbuffer_size;
2705 }
2706
2707 fid = GetFileInfo(vcb, kRootDirID, ".journal_info_block", &jib_attr, &jib_fork);
2708 if (fid == 0 || jib_fork.cf_extents[0].startBlock == 0 || jib_fork.cf_size == 0) {
2709 printf("hfs: can't find the .journal_info_block! disabling journaling (start: %d).\n",
2710 jib_fork.cf_extents[0].startBlock);
2711 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2712 return 0;
2713 }
2714 hfsmp->hfs_jnlinfoblkid = fid;
2715
2716 // make sure the journal_info_block begins where we think it should.
2717 if (SWAP_BE32(vhp->journalInfoBlock) != jib_fork.cf_extents[0].startBlock) {
2718 printf("hfs: The journal_info_block moved (was: %d; is: %d). Fixing up\n",
2719 SWAP_BE32(vhp->journalInfoBlock), jib_fork.cf_extents[0].startBlock);
2720
2721 vcb->vcbJinfoBlock = jib_fork.cf_extents[0].startBlock;
2722 vhp->journalInfoBlock = SWAP_BE32(jib_fork.cf_extents[0].startBlock);
2723 recreate_journal = 1;
2724 }
2725
2726
2727 sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
2728 jinfo_bp = NULL;
2729 retval = (int)buf_meta_bread(devvp,
2730 (vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
2731 ((u_int64_t)SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
2732 hfsmp->hfs_physical_block_size, NOCRED, &jinfo_bp);
2733 if (retval) {
2734 if (jinfo_bp) {
2735 buf_brelse(jinfo_bp);
2736 }
2737 printf("hfs: can't read journal info block. disabling journaling.\n");
2738 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2739 return 0;
2740 }
2741
2742 jibp = (JournalInfoBlock *)buf_dataptr(jinfo_bp);
2743 jib_flags = SWAP_BE32(jibp->flags);
2744 jib_offset = SWAP_BE64(jibp->offset);
2745 jib_size = SWAP_BE64(jibp->size);
2746
2747 fid = GetFileInfo(vcb, kRootDirID, ".journal", &jattr, &jfork);
2748 if (fid == 0 || jfork.cf_extents[0].startBlock == 0 || jfork.cf_size == 0) {
2749 printf("hfs: can't find the journal file! disabling journaling (start: %d)\n",
2750 jfork.cf_extents[0].startBlock);
2751 buf_brelse(jinfo_bp);
2752 vcb->vcbAtrb &= ~kHFSVolumeJournaledMask;
2753 return 0;
2754 }
2755 hfsmp->hfs_jnlfileid = fid;
2756
2757 // make sure the journal file begins where we think it should.
2758 if ((jib_flags & kJIJournalInFSMask) && (jib_offset / (u_int64_t)vcb->blockSize) != jfork.cf_extents[0].startBlock) {
2759 printf("hfs: The journal file moved (was: %lld; is: %d). Fixing up\n",
2760 (jib_offset / (u_int64_t)vcb->blockSize), jfork.cf_extents[0].startBlock);
2761
2762 jib_offset = (u_int64_t)jfork.cf_extents[0].startBlock * (u_int64_t)vcb->blockSize;
2763 write_jibp = 1;
2764 recreate_journal = 1;
2765 }
2766
2767 // check the size of the journal file.
2768 if (jib_size != (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize) {
2769 printf("hfs: The journal file changed size! (was %lld; is %lld). Fixing up.\n",
2770 jib_size, (u_int64_t)jfork.cf_extents[0].blockCount*vcb->blockSize);
2771
2772 jib_size = (u_int64_t)jfork.cf_extents[0].blockCount * vcb->blockSize;
2773 write_jibp = 1;
2774 recreate_journal = 1;
2775 }
2776
2777 if (jib_flags & kJIJournalInFSMask) {
2778 hfsmp->jvp = hfsmp->hfs_devvp;
2779 jib_offset += (off_t)vcb->hfsPlusIOPosOffset;
2780 } else {
2781 const char *dev_name;
2782 int need_init = 0;
2783
2784 dev_name = vnode_getname_printable(devvp);
2785
2786 // since the journal is empty, just use any available external journal
2787 *((char *)&jibp->ext_jnl_uuid[0]) = '\0';
2788
2789 // this fills in the uuid of the device we actually get
2790 hfsmp->jvp = open_journal_dev(dev_name,
2791 !(jib_flags & kJIJournalNeedInitMask),
2792 (char *)&jibp->ext_jnl_uuid[0],
2793 (char *)&jibp->machine_serial_num[0],
2794 jib_size,
2795 hfsmp->hfs_logical_block_size,
2796 &need_init);
2797 if (hfsmp->jvp == NULL) {
2798 buf_brelse(jinfo_bp);
2799 vnode_putname_printable(dev_name);
2800 return EROFS;
2801 } else {
2802 if (IOBSDGetPlatformSerialNumber(&jibp->machine_serial_num[0], sizeof(jibp->machine_serial_num)) != KERN_SUCCESS) {
2803 strlcpy(&jibp->machine_serial_num[0], "unknown-machine-serial-num", sizeof(jibp->machine_serial_num));
2804 }
2805 }
2806 jib_offset = 0;
2807 recreate_journal = 1;
2808 write_jibp = 1;
2809 if (need_init) {
2810 jib_flags |= kJIJournalNeedInitMask;
2811 }
2812 vnode_putname_printable(dev_name);
2813 }
2814
2815 // save this off for the hack-y check in hfs_remove()
2816 hfsmp->jnl_start = jib_offset / SWAP_BE32(vhp->blockSize);
2817 hfsmp->jnl_size = jib_size;
2818
2819 if ((hfsmp->hfs_flags & HFS_READ_ONLY) && (vfs_flags(hfsmp->hfs_mp) & MNT_ROOTFS) == 0) {
2820 // if the file system is read-only, check if the journal is empty.
2821 // if it is, then we can allow the mount. otherwise we have to
2822 // return failure.
2823 retval = journal_is_clean(hfsmp->jvp,
2824 jib_offset,
2825 jib_size,
2826 devvp,
2827 hfsmp->hfs_logical_block_size);
2828
2829 hfsmp->jnl = NULL;
2830
2831 buf_brelse(jinfo_bp);
2832
2833 if (retval) {
2834 const char *name = vnode_getname_printable(devvp);
2835 printf("hfs: late journal init: volume on %s is read-only and journal is dirty. Can not mount volume.\n",
2836 name);
2837 vnode_putname_printable(name);
2838 }
2839
2840 return retval;
2841 }
2842
2843 if ((jib_flags & kJIJournalNeedInitMask) || recreate_journal) {
2844 printf("hfs: Initializing the journal (joffset 0x%llx sz 0x%llx)...\n",
2845 jib_offset, jib_size);
2846 hfsmp->jnl = journal_create(hfsmp->jvp,
2847 jib_offset,
2848 jib_size,
2849 devvp,
2850 hfsmp->hfs_logical_block_size,
2851 arg_flags,
2852 arg_tbufsz,
2853 hfs_sync_metadata, hfsmp->hfs_mp,
2854 hfsmp->hfs_mp);
2855 if (hfsmp->jnl)
2856 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2857
2858 // no need to start a transaction here... if this were to fail
2859 // we'd just re-init it on the next mount.
2860 jib_flags &= ~kJIJournalNeedInitMask;
2861 write_jibp = 1;
2862
2863 } else {
2864 //
2865 // if we weren't the last person to mount this volume
2866 // then we need to throw away the journal because it
2867 // is likely that someone else mucked with the disk.
2868 // if the journal is empty this is no big deal. if the
2869 // disk is dirty this prevents us from replaying the
2870 // journal over top of changes that someone else made.
2871 //
2872 arg_flags |= JOURNAL_RESET;
2873
2874 //printf("hfs: Opening the journal (joffset 0x%llx sz 0x%llx vhp_blksize %d)...\n",
2875 // jib_offset,
2876 // jib_size, SWAP_BE32(vhp->blockSize));
2877
2878 hfsmp->jnl = journal_open(hfsmp->jvp,
2879 jib_offset,
2880 jib_size,
2881 devvp,
2882 hfsmp->hfs_logical_block_size,
2883 arg_flags,
2884 arg_tbufsz,
2885 hfs_sync_metadata, hfsmp->hfs_mp,
2886 hfsmp->hfs_mp);
2887 if (hfsmp->jnl)
2888 journal_trim_set_callback(hfsmp->jnl, hfs_trim_callback, hfsmp);
2889 }
2890
2891
2892 if (write_jibp) {
2893 jibp->flags = SWAP_BE32(jib_flags);
2894 jibp->offset = SWAP_BE64(jib_offset);
2895 jibp->size = SWAP_BE64(jib_size);
2896
2897 buf_bwrite(jinfo_bp);
2898 } else {
2899 buf_brelse(jinfo_bp);
2900 }
2901 jinfo_bp = NULL;
2902 jibp = NULL;
2903
2904 // if we expected the journal to be there and we couldn't
2905 // create it or open it then we have to bail out.
2906 if (hfsmp->jnl == NULL) {
2907 printf("hfs: late jnl init: failed to open/create the journal (retval %d).\n", retval);
2908 return EINVAL;
2909 }
2910
2911 return 0;
2912 }
2913
2914 /*
2915 * Calculate the allocation zone for metadata.
2916 *
2917 * This zone includes the following:
2918 * Allocation Bitmap file
2919 * Overflow Extents file
2920 * Journal file
2921 * Quota files
2922 * Clustered Hot files
2923 * Catalog file
2924 *
2925 * METADATA ALLOCATION ZONE
2926 * ____________________________________________________________________________
2927 * | | | | | | |
2928 * | BM | JF | OEF | CATALOG |---> | HOT FILES |
2929 * |____|____|_____|_______________|______________________________|___________|
2930 *
2931 * <------------------------------- N * 128 MB ------------------------------->
2932 *
2933 */
2934 #define GIGABYTE (u_int64_t)(1024*1024*1024)
2935
2936 #define OVERFLOW_DEFAULT_SIZE (4*1024*1024)
2937 #define OVERFLOW_MAXIMUM_SIZE (128*1024*1024)
2938 #define JOURNAL_DEFAULT_SIZE (8*1024*1024)
2939 #define JOURNAL_MAXIMUM_SIZE (512*1024*1024)
2940 #define HOTBAND_MINIMUM_SIZE (10*1024*1024)
2941 #define HOTBAND_MAXIMUM_SIZE (512*1024*1024)
2942
2943 /* Initialize the metadata zone.
2944 *
2945 * If the size of the volume is less than the minimum size for
2946 * metadata zone, metadata zone is disabled.
2947 *
2948 * If disable is true, disable metadata zone unconditionally.
2949 */
2950 void
2951 hfs_metadatazone_init(struct hfsmount *hfsmp, int disable)
2952 {
2953 ExtendedVCB *vcb;
2954 u_int64_t fs_size;
2955 u_int64_t zonesize;
2956 u_int64_t temp;
2957 u_int64_t filesize;
2958 u_int32_t blk;
2959 int items, really_do_it=1;
2960
2961 vcb = HFSTOVCB(hfsmp);
2962 fs_size = (u_int64_t)vcb->blockSize * (u_int64_t)vcb->allocLimit;
2963
2964 /*
2965 * For volumes less than 10 GB, don't bother.
2966 */
2967 if (fs_size < ((u_int64_t)10 * GIGABYTE)) {
2968 really_do_it = 0;
2969 }
2970
2971 /*
2972 * Skip non-journaled volumes as well.
2973 */
2974 if (hfsmp->jnl == NULL) {
2975 really_do_it = 0;
2976 }
2977
2978 /* If caller wants to disable metadata zone, do it */
2979 if (disable == true) {
2980 really_do_it = 0;
2981 }
2982
2983 /*
2984 * Start with space for the boot blocks and Volume Header.
2985 * 1536 = byte offset from start of volume to end of volume header:
2986 * 1024 bytes is the offset from the start of the volume to the
2987 * start of the volume header (defined by the volume format)
2988 * + 512 bytes (the size of the volume header).
2989 */
2990 zonesize = roundup(1536, hfsmp->blockSize);
2991
2992 /*
2993 * Add the on-disk size of allocation bitmap.
2994 */
2995 zonesize += hfsmp->hfs_allocation_cp->c_datafork->ff_blocks * hfsmp->blockSize;
2996
2997 /*
2998 * Add space for the Journal Info Block and Journal (if they're in
2999 * this file system).
3000 */
3001 if (hfsmp->jnl && hfsmp->jvp == hfsmp->hfs_devvp) {
3002 zonesize += hfsmp->blockSize + hfsmp->jnl_size;
3003 }
3004
3005 /*
3006 * Add the existing size of the Extents Overflow B-tree.
3007 * (It rarely grows, so don't bother reserving additional room for it.)
3008 */
3009 zonesize += hfsmp->hfs_extents_cp->c_datafork->ff_blocks * hfsmp->blockSize;
3010
3011 /*
3012 * If there is an Attributes B-tree, leave room for 11 clumps worth.
3013 * newfs_hfs allocates one clump, and leaves a gap of 10 clumps.
3014 * When installing a full OS install onto a 20GB volume, we use
3015 * 7 to 8 clumps worth of space (depending on packages), so that leaves
3016 * us with another 3 or 4 clumps worth before we need another extent.
3017 */
3018 if (hfsmp->hfs_attribute_cp) {
3019 zonesize += 11 * hfsmp->hfs_attribute_cp->c_datafork->ff_clumpsize;
3020 }
3021
3022 /*
3023 * Leave room for 11 clumps of the Catalog B-tree.
3024 * Again, newfs_hfs allocates one clump plus a gap of 10 clumps.
3025 * When installing a full OS install onto a 20GB volume, we use
3026 * 7 to 8 clumps worth of space (depending on packages), so that leaves
3027 * us with another 3 or 4 clumps worth before we need another extent.
3028 */
3029 zonesize += 11 * hfsmp->hfs_catalog_cp->c_datafork->ff_clumpsize;
3030
3031 /*
3032 * Add space for hot file region.
3033 *
3034 * ...for now, use 5 MB per 1 GB (0.5 %)
3035 */
3036 filesize = (fs_size / 1024) * 5;
3037 if (filesize > HOTBAND_MAXIMUM_SIZE)
3038 filesize = HOTBAND_MAXIMUM_SIZE;
3039 else if (filesize < HOTBAND_MINIMUM_SIZE)
3040 filesize = HOTBAND_MINIMUM_SIZE;
3041 /*
3042 * Calculate user quota file requirements.
3043 */
3044 if (hfsmp->hfs_flags & HFS_QUOTAS) {
3045 items = QF_USERS_PER_GB * (fs_size / GIGABYTE);
3046 if (items < QF_MIN_USERS)
3047 items = QF_MIN_USERS;
3048 else if (items > QF_MAX_USERS)
3049 items = QF_MAX_USERS;
3050 if (!powerof2(items)) {
3051 int x = items;
3052 items = 4;
3053 while (x>>1 != 1) {
3054 x = x >> 1;
3055 items = items << 1;
3056 }
3057 }
3058 filesize += (items + 1) * sizeof(struct dqblk);
3059 /*
3060 * Calculate group quota file requirements.
3061 *
3062 */
3063 items = QF_GROUPS_PER_GB * (fs_size / GIGABYTE);
3064 if (items < QF_MIN_GROUPS)
3065 items = QF_MIN_GROUPS;
3066 else if (items > QF_MAX_GROUPS)
3067 items = QF_MAX_GROUPS;
3068 if (!powerof2(items)) {
3069 int x = items;
3070 items = 4;
3071 while (x>>1 != 1) {
3072 x = x >> 1;
3073 items = items << 1;
3074 }
3075 }
3076 filesize += (items + 1) * sizeof(struct dqblk);
3077 }
3078 zonesize += filesize;
3079
3080 /*
3081 * Round up entire zone to a bitmap block's worth.
3082 * The extra space goes to the catalog file and hot file area.
3083 */
3084 temp = zonesize;
3085 zonesize = roundup(zonesize, (u_int64_t)vcb->vcbVBMIOSize * 8 * vcb->blockSize);
3086 hfsmp->hfs_min_alloc_start = zonesize / vcb->blockSize;
3087 /*
3088 * If doing the round up for hfs_min_alloc_start would push us past
3089 * allocLimit, then just reset it back to 0. Though using a value
3090 * bigger than allocLimit would not cause damage in the block allocator
3091 * code, this value could get stored in the volume header and make it out
3092 * to disk, making the volume header technically corrupt.
3093 */
3094 if (hfsmp->hfs_min_alloc_start >= hfsmp->allocLimit) {
3095 hfsmp->hfs_min_alloc_start = 0;
3096 }
3097
3098 if (really_do_it == 0) {
3099 /* If metadata zone needs to be disabled because the
3100 * volume was truncated, clear the bit and zero out
3101 * the values that are no longer needed.
3102 */
3103 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3104 /* Disable metadata zone */
3105 hfsmp->hfs_flags &= ~HFS_METADATA_ZONE;
3106
3107 /* Zero out mount point values that are not required */
3108 hfsmp->hfs_catalog_maxblks = 0;
3109 hfsmp->hfs_hotfile_maxblks = 0;
3110 hfsmp->hfs_hotfile_start = 0;
3111 hfsmp->hfs_hotfile_end = 0;
3112 hfsmp->hfs_hotfile_freeblks = 0;
3113 hfsmp->hfs_metazone_start = 0;
3114 hfsmp->hfs_metazone_end = 0;
3115 }
3116
3117 return;
3118 }
3119
3120 temp = zonesize - temp; /* temp has extra space */
3121 filesize += temp / 3;
3122 hfsmp->hfs_catalog_maxblks += (temp - (temp / 3)) / vcb->blockSize;
3123
3124 hfsmp->hfs_hotfile_maxblks = filesize / vcb->blockSize;
3125
3126 /* Convert to allocation blocks. */
3127 blk = zonesize / vcb->blockSize;
3128
3129 /* The default metadata zone location is at the start of volume. */
3130 hfsmp->hfs_metazone_start = 1;
3131 hfsmp->hfs_metazone_end = blk - 1;
3132
3133 /* The default hotfile area is at the end of the zone. */
3134 if (vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) {
3135 hfsmp->hfs_hotfile_start = blk - (filesize / vcb->blockSize);
3136 hfsmp->hfs_hotfile_end = hfsmp->hfs_metazone_end;
3137 hfsmp->hfs_hotfile_freeblks = hfs_hotfile_freeblocks(hfsmp);
3138 }
3139 else {
3140 hfsmp->hfs_hotfile_start = 0;
3141 hfsmp->hfs_hotfile_end = 0;
3142 hfsmp->hfs_hotfile_freeblks = 0;
3143 }
3144 #if 0
3145 printf("hfs: metadata zone is %d to %d\n", hfsmp->hfs_metazone_start, hfsmp->hfs_metazone_end);
3146 printf("hfs: hot file band is %d to %d\n", hfsmp->hfs_hotfile_start, hfsmp->hfs_hotfile_end);
3147 printf("hfs: hot file band free blocks = %d\n", hfsmp->hfs_hotfile_freeblks);
3148 #endif
3149 hfsmp->hfs_flags |= HFS_METADATA_ZONE;
3150 }
3151
3152
3153 static u_int32_t
3154 hfs_hotfile_freeblocks(struct hfsmount *hfsmp)
3155 {
3156 ExtendedVCB *vcb = HFSTOVCB(hfsmp);
3157 int lockflags;
3158 int freeblocks;
3159
3160 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
3161 freeblocks = MetaZoneFreeBlocks(vcb);
3162 hfs_systemfile_unlock(hfsmp, lockflags);
3163
3164 /* Minus Extents overflow file reserve. */
3165 freeblocks -=
3166 hfsmp->hfs_overflow_maxblks - VTOF(hfsmp->hfs_extents_vp)->ff_blocks;
3167 /* Minus catalog file reserve. */
3168 freeblocks -=
3169 hfsmp->hfs_catalog_maxblks - VTOF(hfsmp->hfs_catalog_vp)->ff_blocks;
3170 if (freeblocks < 0)
3171 freeblocks = 0;
3172
3173 return MIN(freeblocks, hfsmp->hfs_hotfile_maxblks);
3174 }
3175
3176 /*
3177 * Determine if a file is a "virtual" metadata file.
3178 * This includes journal and quota files.
3179 */
3180 int
3181 hfs_virtualmetafile(struct cnode *cp)
3182 {
3183 const char * filename;
3184
3185
3186 if (cp->c_parentcnid != kHFSRootFolderID)
3187 return (0);
3188
3189 filename = (const char *)cp->c_desc.cd_nameptr;
3190 if (filename == NULL)
3191 return (0);
3192
3193 if ((strncmp(filename, ".journal", sizeof(".journal")) == 0) ||
3194 (strncmp(filename, ".journal_info_block", sizeof(".journal_info_block")) == 0) ||
3195 (strncmp(filename, ".quota.user", sizeof(".quota.user")) == 0) ||
3196 (strncmp(filename, ".quota.group", sizeof(".quota.group")) == 0) ||
3197 (strncmp(filename, ".hotfiles.btree", sizeof(".hotfiles.btree")) == 0))
3198 return (1);
3199
3200 return (0);
3201 }
3202
3203 __private_extern__
3204 void hfs_syncer_lock(struct hfsmount *hfsmp)
3205 {
3206 hfs_lock_mount(hfsmp);
3207 }
3208
3209 __private_extern__
3210 void hfs_syncer_unlock(struct hfsmount *hfsmp)
3211 {
3212 hfs_unlock_mount(hfsmp);
3213 }
3214
3215 __private_extern__
3216 void hfs_syncer_wait(struct hfsmount *hfsmp)
3217 {
3218 msleep(&hfsmp->hfs_sync_incomplete, &hfsmp->hfs_mutex, PWAIT,
3219 "hfs_syncer_wait", NULL);
3220 }
3221
3222 __private_extern__
3223 void hfs_syncer_wakeup(struct hfsmount *hfsmp)
3224 {
3225 wakeup(&hfsmp->hfs_sync_incomplete);
3226 }
3227
3228 __private_extern__
3229 uint64_t hfs_usecs_to_deadline(uint64_t usecs)
3230 {
3231 uint64_t deadline;
3232 clock_interval_to_deadline(usecs, NSEC_PER_USEC, &deadline);
3233 return deadline;
3234 }
3235
3236 __private_extern__
3237 void hfs_syncer_queue(thread_call_t syncer)
3238 {
3239 if (thread_call_enter_delayed_with_leeway(syncer,
3240 NULL,
3241 hfs_usecs_to_deadline(HFS_META_DELAY),
3242 0,
3243 THREAD_CALL_DELAY_SYS_BACKGROUND)) {
3244 printf("hfs: syncer already scheduled!\n");
3245 }
3246 }
3247
3248 //
3249 // Fire off a timed callback to sync the disk if the
3250 // volume is on ejectable media.
3251 //
3252 __private_extern__
3253 void
3254 hfs_sync_ejectable(struct hfsmount *hfsmp)
3255 {
3256 // If we don't have a syncer or we get called by the syncer, just return
3257 if (!hfsmp->hfs_syncer || current_thread() == hfsmp->hfs_syncer_thread)
3258 return;
3259
3260 hfs_syncer_lock(hfsmp);
3261
3262 if (!timerisset(&hfsmp->hfs_sync_req_oldest))
3263 microuptime(&hfsmp->hfs_sync_req_oldest);
3264
3265 /* If hfs_unmount is running, it will set hfs_syncer to NULL. Also we
3266 don't want to queue again if there is a sync outstanding. */
3267 if (!hfsmp->hfs_syncer || hfsmp->hfs_sync_incomplete) {
3268 hfs_syncer_unlock(hfsmp);
3269 return;
3270 }
3271
3272 hfsmp->hfs_sync_incomplete = TRUE;
3273
3274 thread_call_t syncer = hfsmp->hfs_syncer;
3275
3276 hfs_syncer_unlock(hfsmp);
3277
3278 hfs_syncer_queue(syncer);
3279 }
3280
3281 int
3282 hfs_start_transaction(struct hfsmount *hfsmp)
3283 {
3284 int ret = 0, unlock_on_err = 0;
3285 thread_t thread = current_thread();
3286
3287 #ifdef HFS_CHECK_LOCK_ORDER
3288 /*
3289 * You cannot start a transaction while holding a system
3290 * file lock. (unless the transaction is nested.)
3291 */
3292 if (hfsmp->jnl && journal_owner(hfsmp->jnl) != thread) {
3293 if (hfsmp->hfs_catalog_cp && hfsmp->hfs_catalog_cp->c_lockowner == thread) {
3294 panic("hfs_start_transaction: bad lock order (cat before jnl)\n");
3295 }
3296 if (hfsmp->hfs_attribute_cp && hfsmp->hfs_attribute_cp->c_lockowner == thread) {
3297 panic("hfs_start_transaction: bad lock order (attr before jnl)\n");
3298 }
3299 if (hfsmp->hfs_extents_cp && hfsmp->hfs_extents_cp->c_lockowner == thread) {
3300 panic("hfs_start_transaction: bad lock order (ext before jnl)\n");
3301 }
3302 }
3303 #endif /* HFS_CHECK_LOCK_ORDER */
3304
3305 if (hfsmp->jnl == NULL || journal_owner(hfsmp->jnl) != thread) {
3306 /*
3307 * The global lock should be held shared if journal is
3308 * active to prevent disabling. If we're not the owner
3309 * of the journal lock, verify that we're not already
3310 * holding the global lock exclusive before moving on.
3311 */
3312 if (hfsmp->hfs_global_lockowner == thread) {
3313 ret = EBUSY;
3314 goto out;
3315 }
3316
3317 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3318 OSAddAtomic(1, (SInt32 *)&hfsmp->hfs_active_threads);
3319 unlock_on_err = 1;
3320 }
3321
3322 /* If a downgrade to read-only mount is in progress, no other
3323 * thread than the downgrade thread is allowed to modify
3324 * the file system.
3325 */
3326 if ((hfsmp->hfs_flags & HFS_RDONLY_DOWNGRADE) &&
3327 hfsmp->hfs_downgrading_thread != thread) {
3328 ret = EROFS;
3329 goto out;
3330 }
3331
3332 if (hfsmp->jnl) {
3333 ret = journal_start_transaction(hfsmp->jnl);
3334 if (ret == 0) {
3335 OSAddAtomic(1, &hfsmp->hfs_global_lock_nesting);
3336 }
3337 } else {
3338 ret = 0;
3339 }
3340
3341 out:
3342 if (ret != 0 && unlock_on_err) {
3343 hfs_unlock_global (hfsmp);
3344 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3345 }
3346
3347 return ret;
3348 }
3349
3350 int
3351 hfs_end_transaction(struct hfsmount *hfsmp)
3352 {
3353 int need_unlock=0, ret;
3354
3355 if ((hfsmp->jnl == NULL) || ( journal_owner(hfsmp->jnl) == current_thread()
3356 && (OSAddAtomic(-1, &hfsmp->hfs_global_lock_nesting) == 1)) ) {
3357 need_unlock = 1;
3358 }
3359
3360 if (hfsmp->jnl) {
3361 ret = journal_end_transaction(hfsmp->jnl);
3362 } else {
3363 ret = 0;
3364 }
3365
3366 if (need_unlock) {
3367 OSAddAtomic(-1, (SInt32 *)&hfsmp->hfs_active_threads);
3368 hfs_unlock_global (hfsmp);
3369 hfs_sync_ejectable(hfsmp);
3370 }
3371
3372 return ret;
3373 }
3374
3375
3376 void
3377 hfs_journal_lock(struct hfsmount *hfsmp)
3378 {
3379 /* Only peek at hfsmp->jnl while holding the global lock */
3380 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3381 if (hfsmp->jnl) {
3382 journal_lock(hfsmp->jnl);
3383 }
3384 hfs_unlock_global (hfsmp);
3385 }
3386
3387 void
3388 hfs_journal_unlock(struct hfsmount *hfsmp)
3389 {
3390 /* Only peek at hfsmp->jnl while holding the global lock */
3391 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3392 if (hfsmp->jnl) {
3393 journal_unlock(hfsmp->jnl);
3394 }
3395 hfs_unlock_global (hfsmp);
3396 }
3397
3398 /*
3399 * Flush the contents of the journal to the disk.
3400 *
3401 * Input:
3402 * wait_for_IO -
3403 * If TRUE, wait to write in-memory journal to the disk
3404 * consistently, and also wait to write all asynchronous
3405 * metadata blocks to its corresponding locations
3406 * consistently on the disk. This means that the journal
3407 * is empty at this point and does not contain any
3408 * transactions. This is overkill in normal scenarios
3409 * but is useful whenever the metadata blocks are required
3410 * to be consistent on-disk instead of just the journal
3411 * being consistent; like before live verification
3412 * and live volume resizing.
3413 *
3414 * If FALSE, only wait to write in-memory journal to the
3415 * disk consistently. This means that the journal still
3416 * contains uncommitted transactions and the file system
3417 * metadata blocks in the journal transactions might be
3418 * written asynchronously to the disk. But there is no
3419 * guarantee that they are written to the disk before
3420 * returning to the caller. Note that this option is
3421 * sufficient for file system data integrity as it
3422 * guarantees consistent journal content on the disk.
3423 */
3424 int
3425 hfs_journal_flush(struct hfsmount *hfsmp, boolean_t wait_for_IO)
3426 {
3427 int ret;
3428
3429 /* Only peek at hfsmp->jnl while holding the global lock */
3430 hfs_lock_global (hfsmp, HFS_SHARED_LOCK);
3431 if (hfsmp->jnl) {
3432 ret = journal_flush(hfsmp->jnl, wait_for_IO);
3433 } else {
3434 ret = 0;
3435 }
3436 hfs_unlock_global (hfsmp);
3437
3438 return ret;
3439 }
3440
3441
3442 /*
3443 * hfs_erase_unused_nodes
3444 *
3445 * Check wheter a volume may suffer from unused Catalog B-tree nodes that
3446 * are not zeroed (due to <rdar://problem/6947811>). If so, just write
3447 * zeroes to the unused nodes.
3448 *
3449 * How do we detect when a volume needs this repair? We can't always be
3450 * certain. If a volume was created after a certain date, then it may have
3451 * been created with the faulty newfs_hfs. Since newfs_hfs only created one
3452 * clump, we can assume that if a Catalog B-tree is larger than its clump size,
3453 * that means that the entire first clump must have been written to, which means
3454 * there shouldn't be unused and unwritten nodes in that first clump, and this
3455 * repair is not needed.
3456 *
3457 * We have defined a bit in the Volume Header's attributes to indicate when the
3458 * unused nodes have been repaired. A newer newfs_hfs will set this bit.
3459 * As will fsck_hfs when it repairs the unused nodes.
3460 */
3461 int hfs_erase_unused_nodes(struct hfsmount *hfsmp)
3462 {
3463 int result;
3464 struct filefork *catalog;
3465 int lockflags;
3466
3467 if (hfsmp->vcbAtrb & kHFSUnusedNodeFixMask)
3468 {
3469 /* This volume has already been checked and repaired. */
3470 return 0;
3471 }
3472
3473 if ((hfsmp->localCreateDate < kHFSUnusedNodesFixDate))
3474 {
3475 /* This volume is too old to have had the problem. */
3476 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3477 return 0;
3478 }
3479
3480 catalog = hfsmp->hfs_catalog_cp->c_datafork;
3481 if (catalog->ff_size > catalog->ff_clumpsize)
3482 {
3483 /* The entire first clump must have been in use at some point. */
3484 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3485 return 0;
3486 }
3487
3488 /*
3489 * If we get here, we need to zero out those unused nodes.
3490 *
3491 * We start a transaction and lock the catalog since we're going to be
3492 * making on-disk changes. But note that BTZeroUnusedNodes doens't actually
3493 * do its writing via the journal, because that would be too much I/O
3494 * to fit in a transaction, and it's a pain to break it up into multiple
3495 * transactions. (It behaves more like growing a B-tree would.)
3496 */
3497 printf("hfs_erase_unused_nodes: updating volume %s.\n", hfsmp->vcbVN);
3498 result = hfs_start_transaction(hfsmp);
3499 if (result)
3500 goto done;
3501 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3502 result = BTZeroUnusedNodes(catalog);
3503 vnode_waitforwrites(hfsmp->hfs_catalog_vp, 0, 0, 0, "hfs_erase_unused_nodes");
3504 hfs_systemfile_unlock(hfsmp, lockflags);
3505 hfs_end_transaction(hfsmp);
3506 if (result == 0)
3507 hfsmp->vcbAtrb |= kHFSUnusedNodeFixMask;
3508 printf("hfs_erase_unused_nodes: done updating volume %s.\n", hfsmp->vcbVN);
3509
3510 done:
3511 return result;
3512 }
3513
3514
3515 extern time_t snapshot_timestamp;
3516
3517 int
3518 check_for_tracked_file(struct vnode *vp, time_t ctime, uint64_t op_type, void *arg)
3519 {
3520 int snapshot_error = 0;
3521
3522 if (vp == NULL) {
3523 return 0;
3524 }
3525
3526 /* Swap files are special; skip them */
3527 if (vnode_isswap(vp)) {
3528 return 0;
3529 }
3530
3531 if (ctime != 0 && snapshot_timestamp != 0 && (ctime <= snapshot_timestamp || vnode_needssnapshots(vp))) {
3532 // the change time is within this epoch
3533 int error;
3534
3535 error = resolve_nspace_item_ext(vp, op_type | NAMESPACE_HANDLER_SNAPSHOT_EVENT, arg);
3536 if (error == EDEADLK) {
3537 snapshot_error = 0;
3538 } else if (error) {
3539 if (error == EAGAIN) {
3540 printf("hfs: cow-snapshot: timed out waiting for namespace handler...\n");
3541 } else if (error == EINTR) {
3542 // printf("hfs: cow-snapshot: got a signal while waiting for namespace handler...\n");
3543 snapshot_error = EINTR;
3544 }
3545 }
3546 }
3547
3548 if (snapshot_error) return snapshot_error;
3549
3550 return 0;
3551 }
3552
3553 int
3554 check_for_dataless_file(struct vnode *vp, uint64_t op_type)
3555 {
3556 int error;
3557
3558 if (vp == NULL || (VTOC(vp)->c_bsdflags & UF_COMPRESSED) == 0 || VTOCMP(vp) == NULL || VTOCMP(vp)->cmp_type != DATALESS_CMPFS_TYPE) {
3559 // there's nothing to do, it's not dataless
3560 return 0;
3561 }
3562
3563 /* Swap files are special; ignore them */
3564 if (vnode_isswap(vp)) {
3565 return 0;
3566 }
3567
3568 // printf("hfs: dataless: encountered a file with the dataless bit set! (vp %p)\n", vp);
3569 error = resolve_nspace_item(vp, op_type | NAMESPACE_HANDLER_NSPACE_EVENT);
3570 if (error == EDEADLK && op_type == NAMESPACE_HANDLER_WRITE_OP) {
3571 error = 0;
3572 } else if (error) {
3573 if (error == EAGAIN) {
3574 printf("hfs: dataless: timed out waiting for namespace handler...\n");
3575 // XXXdbg - return the fabled ENOTPRESENT (i.e. EJUKEBOX)?
3576 return 0;
3577 } else if (error == EINTR) {
3578 // printf("hfs: dataless: got a signal while waiting for namespace handler...\n");
3579 return EINTR;
3580 }
3581 } else if (VTOC(vp)->c_bsdflags & UF_COMPRESSED) {
3582 //
3583 // if we're here, the dataless bit is still set on the file
3584 // which means it didn't get handled. we return an error
3585 // but it's presently ignored by all callers of this function.
3586 //
3587 // XXXdbg - EDATANOTPRESENT is what we really need...
3588 //
3589 return EBADF;
3590 }
3591
3592 return error;
3593 }
3594
3595
3596 //
3597 // NOTE: this function takes care of starting a transaction and
3598 // acquiring the systemfile lock so that it can call
3599 // cat_update().
3600 //
3601 // NOTE: do NOT hold and cnode locks while calling this function
3602 // to avoid deadlocks (because we take a lock on the root
3603 // cnode)
3604 //
3605 int
3606 hfs_generate_document_id(struct hfsmount *hfsmp, uint32_t *docid)
3607 {
3608 struct vnode *rvp;
3609 struct cnode *cp;
3610 int error;
3611
3612 error = VFS_ROOT(HFSTOVFS(hfsmp), &rvp, vfs_context_kernel());
3613 if (error) {
3614 return error;
3615 }
3616
3617 cp = VTOC(rvp);
3618 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) != 0) {
3619 return error;
3620 }
3621 struct FndrExtendedDirInfo *extinfo = (struct FndrExtendedDirInfo *)((void *)((char *)&cp->c_attr.ca_finderinfo + 16));
3622
3623 int lockflags;
3624 if (hfs_start_transaction(hfsmp) != 0) {
3625 return error;
3626 }
3627 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
3628
3629 if (extinfo->document_id == 0) {
3630 // initialize this to start at 3 (one greater than the root-dir id)
3631 extinfo->document_id = 3;
3632 }
3633
3634 *docid = extinfo->document_id++;
3635
3636 // mark the root cnode dirty
3637 cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
3638 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
3639
3640 hfs_systemfile_unlock (hfsmp, lockflags);
3641 (void) hfs_end_transaction(hfsmp);
3642
3643 (void) hfs_unlock(cp);
3644
3645 vnode_put(rvp);
3646 rvp = NULL;
3647
3648 return 0;
3649 }
3650
3651
3652 /*
3653 * Return information about number of file system allocation blocks
3654 * taken by metadata on a volume.
3655 *
3656 * This function populates struct hfsinfo_metadata with allocation blocks
3657 * used by extents overflow btree, catalog btree, bitmap, attribute btree,
3658 * journal file, and sum of all of the above.
3659 */
3660 int
3661 hfs_getinfo_metadata_blocks(struct hfsmount *hfsmp, struct hfsinfo_metadata *hinfo)
3662 {
3663 int lockflags = 0;
3664 int ret_lockflags = 0;
3665
3666 /* Zero out the output buffer */
3667 bzero(hinfo, sizeof(struct hfsinfo_metadata));
3668
3669 /*
3670 * Getting number of allocation blocks for all btrees
3671 * should be a quick operation, so we grab locks for
3672 * all of them at the same time
3673 */
3674 lockflags = SFL_CATALOG | SFL_EXTENTS | SFL_BITMAP | SFL_ATTRIBUTE;
3675 ret_lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3676 /*
3677 * Make sure that we were able to acquire all locks requested
3678 * to protect us against conditions like unmount in progress.
3679 */
3680 if ((lockflags & ret_lockflags) != lockflags) {
3681 /* Release any locks that were acquired */
3682 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3683 return EPERM;
3684 }
3685
3686 /* Get information about all the btrees */
3687 hinfo->extents = hfsmp->hfs_extents_cp->c_datafork->ff_blocks;
3688 hinfo->catalog = hfsmp->hfs_catalog_cp->c_datafork->ff_blocks;
3689 hinfo->allocation = hfsmp->hfs_allocation_cp->c_datafork->ff_blocks;
3690 hinfo->attribute = hfsmp->hfs_attribute_cp->c_datafork->ff_blocks;
3691
3692 /* Done with btrees, give up the locks */
3693 hfs_systemfile_unlock(hfsmp, ret_lockflags);
3694
3695 /* Get information about journal file */
3696 hinfo->journal = howmany(hfsmp->jnl_size, hfsmp->blockSize);
3697
3698 /* Calculate total number of metadata blocks */
3699 hinfo->total = hinfo->extents + hinfo->catalog +
3700 hinfo->allocation + hinfo->attribute +
3701 hinfo->journal;
3702
3703 return 0;
3704 }
3705
3706 static int
3707 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
3708 {
3709 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze 8");
3710
3711 return 0;
3712 }
3713
3714 __private_extern__
3715 int hfs_freeze(struct hfsmount *hfsmp)
3716 {
3717 // First make sure some other process isn't freezing
3718 hfs_lock_mount(hfsmp);
3719 while (hfsmp->hfs_freeze_state != HFS_THAWED) {
3720 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3721 PWAIT | PCATCH, "hfs freeze 1", NULL) == EINTR) {
3722 hfs_unlock_mount(hfsmp);
3723 return EINTR;
3724 }
3725 }
3726
3727 // Stop new syncers from starting
3728 hfsmp->hfs_freeze_state = HFS_WANT_TO_FREEZE;
3729
3730 // Now wait for all syncers to finish
3731 while (hfsmp->hfs_syncers) {
3732 if (msleep(&hfsmp->hfs_freeze_state, &hfsmp->hfs_mutex,
3733 PWAIT | PCATCH, "hfs freeze 2", NULL) == EINTR) {
3734 hfs_thaw_locked(hfsmp);
3735 hfs_unlock_mount(hfsmp);
3736 return EINTR;
3737 }
3738 }
3739 hfs_unlock_mount(hfsmp);
3740
3741 // flush things before we get started to try and prevent
3742 // dirty data from being paged out while we're frozen.
3743 // note: we can't do this once we're in the freezing state because
3744 // other threads will need to take the global lock
3745 vnode_iterate(hfsmp->hfs_mp, 0, hfs_freezewrite_callback, NULL);
3746
3747 // Block everything in hfs_lock_global now
3748 hfs_lock_mount(hfsmp);
3749 hfsmp->hfs_freeze_state = HFS_FREEZING;
3750 hfsmp->hfs_freezing_thread = current_thread();
3751 hfs_unlock_mount(hfsmp);
3752
3753 /* Take the exclusive lock to flush out anything else that
3754 might have the global lock at the moment and also so we
3755 can flush the journal. */
3756 hfs_lock_global(hfsmp, HFS_EXCLUSIVE_LOCK);
3757 journal_flush(hfsmp->jnl, TRUE);
3758 hfs_unlock_global(hfsmp);
3759
3760 // don't need to iterate on all vnodes, we just need to
3761 // wait for writes to the system files and the device vnode
3762 //
3763 // Now that journal flush waits for all metadata blocks to
3764 // be written out, waiting for btree writes is probably no
3765 // longer required.
3766 if (HFSTOVCB(hfsmp)->extentsRefNum)
3767 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze 3");
3768 if (HFSTOVCB(hfsmp)->catalogRefNum)
3769 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze 4");
3770 if (HFSTOVCB(hfsmp)->allocationsRefNum)
3771 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze 5");
3772 if (hfsmp->hfs_attribute_vp)
3773 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze 6");
3774 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze 7");
3775
3776 // We're done, mark frozen
3777 hfs_lock_mount(hfsmp);
3778 hfsmp->hfs_freeze_state = HFS_FROZEN;
3779 hfsmp->hfs_freezing_proc = current_proc();
3780 hfs_unlock_mount(hfsmp);
3781
3782 return 0;
3783 }
3784
3785 __private_extern__
3786 int hfs_thaw(struct hfsmount *hfsmp, const struct proc *process)
3787 {
3788 hfs_lock_mount(hfsmp);
3789
3790 if (hfsmp->hfs_freeze_state != HFS_FROZEN) {
3791 hfs_unlock_mount(hfsmp);
3792 return EINVAL;
3793 }
3794 if (process && hfsmp->hfs_freezing_proc != process) {
3795 hfs_unlock_mount(hfsmp);
3796 return EPERM;
3797 }
3798
3799 hfs_thaw_locked(hfsmp);
3800
3801 hfs_unlock_mount(hfsmp);
3802
3803 return 0;
3804 }
3805
3806 static void hfs_thaw_locked(struct hfsmount *hfsmp)
3807 {
3808 hfsmp->hfs_freezing_proc = NULL;
3809 hfsmp->hfs_freeze_state = HFS_THAWED;
3810
3811 wakeup(&hfsmp->hfs_freeze_state);
3812 }