2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
53 #include <sys/sysctl.h>
54 #include <sys/fsctl.h>
56 #include <miscfs/specfs/specdev.h>
59 #include <sys/ubc_internal.h>
61 #include <vm/vm_pageout.h>
62 #include <vm/vm_kern.h>
64 #include <sys/kdebug.h>
67 #include "hfs_attrlist.h"
68 #include "hfs_endian.h"
69 #include "hfs_fsctl.h"
70 #include "hfs_quota.h"
71 #include "hfscommon/headers/FileMgrInternal.h"
72 #include "hfscommon/headers/BTreesInternal.h"
73 #include "hfs_cnode.h"
76 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
79 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
82 /* from bsd/hfs/hfs_vfsops.c */
83 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
85 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
86 static int hfs_clonefile(struct vnode
*, int, int, int);
87 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
88 static int hfs_minorupdate(struct vnode
*vp
);
89 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
92 int flush_cache_on_write
= 0;
93 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
96 * Read data from a file.
99 hfs_vnop_read(struct vnop_read_args
*ap
)
101 uio_t uio
= ap
->a_uio
;
102 struct vnode
*vp
= ap
->a_vp
;
105 struct hfsmount
*hfsmp
;
108 off_t start_resid
= uio_resid(uio
);
109 off_t offset
= uio_offset(uio
);
111 int took_truncate_lock
= 0;
113 /* Preflight checks */
114 if (!vnode_isreg(vp
)) {
115 /* can only read regular files */
121 if (start_resid
== 0)
122 return (0); /* Nothing left to do */
124 return (EINVAL
); /* cant read from a negative offset */
127 if (VNODE_IS_RSRC(vp
)) {
128 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
131 /* otherwise read the resource fork normally */
133 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
135 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
138 /* successful read, update the access time */
139 VTOC(vp
)->c_touch_acctime
= TRUE
;
141 /* compressed files are not hot file candidates */
142 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
143 VTOF(vp
)->ff_bytesread
= 0;
148 /* otherwise the file was converted back to a regular file while we were reading it */
150 } else if ((VTOC(vp
)->c_flags
& UF_COMPRESSED
)) {
153 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
160 #endif /* HFS_COMPRESSION */
167 if ((retval
= cp_handle_vnop (cp
, CP_READ_ACCESS
)) != 0) {
172 /* Protect against a size change. */
173 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
);
174 took_truncate_lock
= 1;
176 filesize
= fp
->ff_size
;
177 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
178 if (offset
> filesize
) {
179 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
180 (offset
> (off_t
)MAXHFSFILESIZE
)) {
186 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
187 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
189 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
);
191 cp
->c_touch_acctime
= TRUE
;
193 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
194 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
197 * Keep track blocks read
199 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
200 int took_cnode_lock
= 0;
203 bytesread
= start_resid
- uio_resid(uio
);
205 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
206 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
207 hfs_lock(cp
, HFS_FORCE_LOCK
);
211 * If this file hasn't been seen since the start of
212 * the current sampling period then start over.
214 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
217 fp
->ff_bytesread
= bytesread
;
219 cp
->c_atime
= tv
.tv_sec
;
221 fp
->ff_bytesread
+= bytesread
;
227 if (took_truncate_lock
) {
228 hfs_unlock_truncate(cp
, 0);
235 * Write data to a file.
238 hfs_vnop_write(struct vnop_write_args
*ap
)
240 uio_t uio
= ap
->a_uio
;
241 struct vnode
*vp
= ap
->a_vp
;
244 struct hfsmount
*hfsmp
;
245 kauth_cred_t cred
= NULL
;
248 off_t bytesToAdd
= 0;
249 off_t actualBytesAdded
;
254 int ioflag
= ap
->a_ioflag
;
257 int cnode_locked
= 0;
258 int partialwrite
= 0;
260 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
261 int took_truncate_lock
= 0;
262 struct rl_entry
*invalid_range
;
265 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
266 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
268 case FILE_IS_COMPRESSED
:
270 case FILE_IS_CONVERTING
:
271 /* if FILE_IS_CONVERTING, we allow writes but do not
272 bother with snapshots or else we will deadlock.
277 printf("invalid state %d for compressed file\n", state
);
280 } else if ((VTOC(vp
)->c_flags
& UF_COMPRESSED
)) {
283 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
290 check_for_tracked_file(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
295 // LP64todo - fix this! uio_resid may be 64-bit value
296 resid
= uio_resid(uio
);
297 offset
= uio_offset(uio
);
303 if (!vnode_isreg(vp
))
304 return (EPERM
); /* Can only write regular files */
311 if ((retval
= cp_handle_vnop (cp
, CP_WRITE_ACCESS
)) != 0) {
316 eflags
= kEFDeferMask
; /* defer file block allocations */
319 * When the underlying device is sparse and space
320 * is low (< 8MB), stop doing delayed allocations
321 * and begin doing synchronous I/O.
323 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
324 (hfs_freeblks(hfsmp
, 0) < 2048)) {
325 eflags
&= ~kEFDeferMask
;
328 #endif /* HFS_SPARSE_DEV */
331 /* Protect against a size change. */
333 * Protect against a size change.
335 * Note: If took_truncate_lock is true, then we previously got the lock shared
336 * but needed to upgrade to exclusive. So try getting it exclusive from the
339 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
340 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
343 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
);
345 took_truncate_lock
= 1;
348 if (ioflag
& IO_APPEND
) {
349 uio_setoffset(uio
, fp
->ff_size
);
350 offset
= fp
->ff_size
;
352 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
357 origFileSize
= fp
->ff_size
;
358 writelimit
= offset
+ resid
;
359 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
362 * We may need an exclusive truncate lock for several reasons, all
363 * of which are because we may be writing to a (portion of a) block
364 * for the first time, and we need to make sure no readers see the
365 * prior, uninitialized contents of the block. The cases are:
367 * 1. We have unallocated (delayed allocation) blocks. We may be
368 * allocating new blocks to the file and writing to them.
369 * (A more precise check would be whether the range we're writing
370 * to contains delayed allocation blocks.)
371 * 2. We need to extend the file. The bytes between the old EOF
372 * and the new EOF are not yet initialized. This is important
373 * even if we're not allocating new blocks to the file. If the
374 * old EOF and new EOF are in the same block, we still need to
375 * protect that range of bytes until they are written for the
377 * 3. The write overlaps some invalid ranges (delayed zero fill; that
378 * part of the file has been allocated, but not yet written).
380 * If we had a shared lock with the above cases, we need to try to upgrade
381 * to an exclusive lock. If the upgrade fails, we will lose the shared
382 * lock, and will need to take the truncate lock again; the took_truncate_lock
383 * flag will still be set, causing us to try for an exclusive lock next time.
385 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
386 * lock is held, since it protects the range lists.
388 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
389 ((fp
->ff_unallocblocks
!= 0) ||
390 (writelimit
> origFileSize
))) {
391 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
393 * Lock upgrade failed and we lost our shared lock, try again.
394 * Note: we do not set took_truncate_lock=0 here. Leaving it
395 * set to 1 will cause us to try to get the lock exclusive.
400 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
401 cp
->c_truncatelockowner
= current_thread();
405 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
411 * Now that we have the cnode lock, see if there are delayed zero fill ranges
412 * overlapping our write. If so, we need the truncate lock exclusive (see above).
414 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
415 (rl_scan(&fp
->ff_invalidranges
, offset
, writelimit
-1, &invalid_range
) != RL_NOOVERLAP
)) {
417 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
418 * a deadlock, rather than simply returning failure. (That is, it apparently does
419 * not behave like a "try_lock"). Since this condition is rare, just drop the
420 * cnode lock and try again. Since took_truncate_lock is set, we will
421 * automatically take the truncate lock exclusive.
425 hfs_unlock_truncate(cp
, 0);
429 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
430 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
433 /* Check if we do not need to extend the file */
434 if (writelimit
<= filebytes
) {
438 cred
= vfs_context_ucred(ap
->a_context
);
439 bytesToAdd
= writelimit
- filebytes
;
442 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
448 if (hfs_start_transaction(hfsmp
) != 0) {
453 while (writelimit
> filebytes
) {
454 bytesToAdd
= writelimit
- filebytes
;
455 if (cred
&& suser(cred
, NULL
) != 0)
456 eflags
|= kEFReserveMask
;
458 /* Protect extents b-tree and allocation bitmap */
459 lockflags
= SFL_BITMAP
;
460 if (overflow_extents(fp
))
461 lockflags
|= SFL_EXTENTS
;
462 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
464 /* Files that are changing size are not hot file candidates. */
465 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
466 fp
->ff_bytesread
= 0;
468 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
469 0, eflags
, &actualBytesAdded
));
471 hfs_systemfile_unlock(hfsmp
, lockflags
);
473 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
475 if (retval
!= E_NONE
)
477 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
478 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
479 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
481 (void) hfs_update(vp
, TRUE
);
482 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
483 (void) hfs_end_transaction(hfsmp
);
486 * If we didn't grow the file enough try a partial write.
487 * POSIX expects this behavior.
489 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
492 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
494 writelimit
= filebytes
;
497 if (retval
== E_NONE
) {
506 if (writelimit
> fp
->ff_size
)
507 filesize
= writelimit
;
509 filesize
= fp
->ff_size
;
511 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
513 if (offset
<= fp
->ff_size
) {
514 zero_off
= offset
& ~PAGE_MASK_64
;
516 /* Check to see whether the area between the zero_offset and the start
517 of the transfer to see whether is invalid and should be zero-filled
518 as part of the transfer:
520 if (offset
> zero_off
) {
521 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
522 lflag
|= IO_HEADZEROFILL
;
525 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
527 /* The bytes between fp->ff_size and uio->uio_offset must never be
528 read without being zeroed. The current last block is filled with zeroes
529 if it holds valid data but in all cases merely do a little bookkeeping
530 to track the area from the end of the current last page to the start of
531 the area actually written. For the same reason only the bytes up to the
532 start of the page where this write will start is invalidated; any remainder
533 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
535 Note that inval_start, the start of the page after the current EOF,
536 may be past the start of the write, in which case the zeroing
537 will be handled by the cluser_write of the actual data.
539 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
540 inval_end
= offset
& ~PAGE_MASK_64
;
541 zero_off
= fp
->ff_size
;
543 if ((fp
->ff_size
& PAGE_MASK_64
) &&
544 (rl_scan(&fp
->ff_invalidranges
,
547 &invalid_range
) != RL_NOOVERLAP
)) {
548 /* The page containing the EOF is not valid, so the
549 entire page must be made inaccessible now. If the write
550 starts on a page beyond the page containing the eof
551 (inval_end > eof_page_base), add the
552 whole page to the range to be invalidated. Otherwise
553 (i.e. if the write starts on the same page), zero-fill
554 the entire page explicitly now:
556 if (inval_end
> eof_page_base
) {
557 inval_start
= eof_page_base
;
559 zero_off
= eof_page_base
;
563 if (inval_start
< inval_end
) {
565 /* There's some range of data that's going to be marked invalid */
567 if (zero_off
< inval_start
) {
568 /* The pages between inval_start and inval_end are going to be invalidated,
569 and the actual write will start on a page past inval_end. Now's the last
570 chance to zero-fill the page containing the EOF:
574 retval
= cluster_write(vp
, (uio_t
) 0,
575 fp
->ff_size
, inval_start
,
577 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
578 hfs_lock(cp
, HFS_FORCE_LOCK
);
580 if (retval
) goto ioerr_exit
;
581 offset
= uio_offset(uio
);
584 /* Mark the remaining area of the newly allocated space as invalid: */
585 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
587 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
588 zero_off
= fp
->ff_size
= inval_end
;
591 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
594 /* Check to see whether the area between the end of the write and the end of
595 the page it falls in is invalid and should be zero-filled as part of the transfer:
597 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
598 if (tail_off
> filesize
) tail_off
= filesize
;
599 if (tail_off
> writelimit
) {
600 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
601 lflag
|= IO_TAILZEROFILL
;
606 * if the write starts beyond the current EOF (possibly advanced in the
607 * zeroing of the last block, above), then we'll zero fill from the current EOF
608 * to where the write begins:
610 * NOTE: If (and ONLY if) the portion of the file about to be written is
611 * before the current EOF it might be marked as invalid now and must be
612 * made readable (removed from the invalid ranges) before cluster_write
615 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
616 if (io_start
< fp
->ff_size
) {
619 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
620 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
627 * We need to tell UBC the fork's new size BEFORE calling
628 * cluster_write, in case any of the new pages need to be
629 * paged out before cluster_write completes (which does happen
630 * in embedded systems due to extreme memory pressure).
631 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
632 * will be, so that it can pass that on to cluster_pageout, and
633 * allow those pageouts.
635 * We don't update ff_size yet since we don't want pageins to
636 * be able to see uninitialized data between the old and new
637 * EOF, until cluster_write has completed and initialized that
640 * The vnode pager relies on the file size last given to UBC via
641 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
642 * ff_size (whichever is larger). NOTE: ff_new_size is always
643 * zero, unless we are extending the file via write.
645 if (filesize
> fp
->ff_size
) {
646 fp
->ff_new_size
= filesize
;
647 ubc_setsize(vp
, filesize
);
649 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
650 tail_off
, lflag
| IO_NOZERODIRTY
);
652 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
653 if (filesize
> origFileSize
) {
654 ubc_setsize(vp
, origFileSize
);
659 if (filesize
> origFileSize
) {
660 fp
->ff_size
= filesize
;
662 /* Files that are changing size are not hot file candidates. */
663 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
664 fp
->ff_bytesread
= 0;
667 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
669 /* If we wrote some bytes, then touch the change and mod times */
670 if (resid
> uio_resid(uio
)) {
671 cp
->c_touch_chgtime
= TRUE
;
672 cp
->c_touch_modtime
= TRUE
;
676 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
680 // XXXdbg - see radar 4871353 for more info
682 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
683 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
689 * If we successfully wrote any data, and we are not the superuser
690 * we clear the setuid and setgid bits as a precaution against
693 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
694 cred
= vfs_context_ucred(ap
->a_context
);
695 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
697 hfs_lock(cp
, HFS_FORCE_LOCK
);
700 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
704 if (ioflag
& IO_UNIT
) {
706 hfs_lock(cp
, HFS_FORCE_LOCK
);
709 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
710 0, 0, ap
->a_context
);
711 // LP64todo - fix this! resid needs to by user_ssize_t
712 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
713 uio_setresid(uio
, resid
);
714 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
716 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
718 hfs_lock(cp
, HFS_FORCE_LOCK
);
721 retval
= hfs_update(vp
, TRUE
);
723 /* Updating vcbWrCnt doesn't need to be atomic. */
726 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
727 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
732 if (took_truncate_lock
) {
733 hfs_unlock_truncate(cp
, 0);
738 /* support for the "bulk-access" fcntl */
740 #define CACHE_LEVELS 16
741 #define NUM_CACHE_ENTRIES (64*16)
742 #define PARENT_IDS_FLAG 0x100
744 struct access_cache
{
746 int cachehits
; /* these two for statistics gathering */
748 unsigned int *acache
;
749 unsigned char *haveaccess
;
753 uid_t uid
; /* IN: effective user id */
754 short flags
; /* IN: access requested (i.e. R_OK) */
755 short num_groups
; /* IN: number of groups user belongs to */
756 int num_files
; /* IN: number of files to process */
757 int *file_ids
; /* IN: array of file ids */
758 gid_t
*groups
; /* IN: array of groups */
759 short *access
; /* OUT: access info for each file (0 for 'has access') */
760 } __attribute__((unavailable
)); // this structure is for reference purposes only
762 struct user32_access_t
{
763 uid_t uid
; /* IN: effective user id */
764 short flags
; /* IN: access requested (i.e. R_OK) */
765 short num_groups
; /* IN: number of groups user belongs to */
766 int num_files
; /* IN: number of files to process */
767 user32_addr_t file_ids
; /* IN: array of file ids */
768 user32_addr_t groups
; /* IN: array of groups */
769 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
772 struct user64_access_t
{
773 uid_t uid
; /* IN: effective user id */
774 short flags
; /* IN: access requested (i.e. R_OK) */
775 short num_groups
; /* IN: number of groups user belongs to */
776 int num_files
; /* IN: number of files to process */
777 user64_addr_t file_ids
; /* IN: array of file ids */
778 user64_addr_t groups
; /* IN: array of groups */
779 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
783 // these are the "extended" versions of the above structures
784 // note that it is crucial that they be different sized than
785 // the regular version
786 struct ext_access_t
{
787 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
788 uint32_t num_files
; /* IN: number of files to process */
789 uint32_t map_size
; /* IN: size of the bit map */
790 uint32_t *file_ids
; /* IN: Array of file ids */
791 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
792 short *access
; /* OUT: access info for each file (0 for 'has access') */
793 uint32_t num_parents
; /* future use */
794 cnid_t
*parents
; /* future use */
795 } __attribute__((unavailable
)); // this structure is for reference purposes only
797 struct user32_ext_access_t
{
798 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
799 uint32_t num_files
; /* IN: number of files to process */
800 uint32_t map_size
; /* IN: size of the bit map */
801 user32_addr_t file_ids
; /* IN: Array of file ids */
802 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
803 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
804 uint32_t num_parents
; /* future use */
805 user32_addr_t parents
; /* future use */
808 struct user64_ext_access_t
{
809 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
810 uint32_t num_files
; /* IN: number of files to process */
811 uint32_t map_size
; /* IN: size of the bit map */
812 user64_addr_t file_ids
; /* IN: array of file ids */
813 user64_addr_t bitmap
; /* IN: array of groups */
814 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
815 uint32_t num_parents
;/* future use */
816 user64_addr_t parents
;/* future use */
821 * Perform a binary search for the given parent_id. Return value is
822 * the index if there is a match. If no_match_indexp is non-NULL it
823 * will be assigned with the index to insert the item (even if it was
826 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
832 unsigned int mid
= ((hi
- lo
)/2) + lo
;
833 unsigned int this_id
= array
[mid
];
835 if (parent_id
== this_id
) {
840 if (parent_id
< this_id
) {
845 if (parent_id
> this_id
) {
851 /* check if lo and hi converged on the match */
852 if (parent_id
== array
[hi
]) {
856 if (no_match_indexp
) {
857 *no_match_indexp
= hi
;
865 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
869 int index
, no_match_index
;
871 if (cache
->numcached
== 0) {
873 return 0; // table is empty, so insert at index=0 and report no match
876 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
877 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
878 cache->numcached, NUM_CACHE_ENTRIES);*/
879 cache
->numcached
= NUM_CACHE_ENTRIES
;
882 hi
= cache
->numcached
- 1;
884 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
886 /* if no existing entry found, find index for new one */
888 index
= no_match_index
;
899 * Add a node to the access_cache at the given index (or do a lookup first
900 * to find the index if -1 is passed in). We currently do a replace rather
901 * than an insert if the cache is full.
904 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
906 int lookup_index
= -1;
908 /* need to do a lookup first if -1 passed for index */
910 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
911 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
912 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
913 cache
->haveaccess
[lookup_index
] = access
;
916 /* mission accomplished */
919 index
= lookup_index
;
924 /* if the cache is full, do a replace rather than an insert */
925 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
926 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
927 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
929 if (index
> cache
->numcached
) {
930 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
931 index
= cache
->numcached
;
935 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
939 if (index
>= 0 && index
< cache
->numcached
) {
940 /* only do bcopy if we're inserting */
941 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
942 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
945 cache
->acache
[index
] = nodeID
;
946 cache
->haveaccess
[index
] = access
;
960 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
962 struct cinfo
*cip
= (struct cinfo
*)arg
;
964 cip
->uid
= attrp
->ca_uid
;
965 cip
->gid
= attrp
->ca_gid
;
966 cip
->mode
= attrp
->ca_mode
;
967 cip
->parentcnid
= descp
->cd_parentcnid
;
968 cip
->recflags
= attrp
->ca_recflags
;
974 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
975 * isn't incore, then go to the catalog.
978 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
979 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
983 /* if this id matches the one the fsctl was called with, skip the lookup */
984 if (cnid
== skip_cp
->c_cnid
) {
985 cnattrp
->ca_uid
= skip_cp
->c_uid
;
986 cnattrp
->ca_gid
= skip_cp
->c_gid
;
987 cnattrp
->ca_mode
= skip_cp
->c_mode
;
988 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
989 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
993 /* otherwise, check the cnode hash incase the file/dir is incore */
994 if (hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
) == 0) {
995 cnattrp
->ca_uid
= c_info
.uid
;
996 cnattrp
->ca_gid
= c_info
.gid
;
997 cnattrp
->ca_mode
= c_info
.mode
;
998 cnattrp
->ca_recflags
= c_info
.recflags
;
999 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1003 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1005 /* lookup this cnid in the catalog */
1006 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1008 hfs_systemfile_unlock(hfsmp
, lockflags
);
1019 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1020 * up to CACHE_LEVELS as we progress towards the root.
1023 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1024 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1025 struct vfs_context
*my_context
,
1029 uint32_t num_parents
)
1033 HFSCatalogNodeID thisNodeID
;
1034 unsigned int myPerms
;
1035 struct cat_attr cnattr
;
1036 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1039 int i
= 0, ids_to_cache
= 0;
1040 int parent_ids
[CACHE_LEVELS
];
1042 thisNodeID
= nodeID
;
1043 while (thisNodeID
>= kRootDirID
) {
1044 myResult
= 0; /* default to "no access" */
1046 /* check the cache before resorting to hitting the catalog */
1048 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1049 * to look any further after hitting cached dir */
1051 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1053 myErr
= cache
->haveaccess
[cache_index
];
1054 if (scope_index
!= -1) {
1055 if (myErr
== ESRCH
) {
1059 scope_index
= 0; // so we'll just use the cache result
1060 scope_idx_start
= ids_to_cache
;
1062 myResult
= (myErr
== 0) ? 1 : 0;
1063 goto ExitThisRoutine
;
1069 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1070 if (scope_index
== -1)
1072 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1073 scope_idx_start
= ids_to_cache
;
1077 /* remember which parents we want to cache */
1078 if (ids_to_cache
< CACHE_LEVELS
) {
1079 parent_ids
[ids_to_cache
] = thisNodeID
;
1082 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1083 if (bitmap
&& map_size
) {
1084 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1088 /* do the lookup (checks the cnode hash, then the catalog) */
1089 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1091 goto ExitThisRoutine
; /* no access */
1094 /* Root always gets access. */
1095 if (suser(myp_ucred
, NULL
) == 0) {
1096 thisNodeID
= catkey
.hfsPlus
.parentID
;
1101 // if the thing has acl's, do the full permission check
1102 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1105 /* get the vnode for this cnid */
1106 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1109 goto ExitThisRoutine
;
1112 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1114 hfs_unlock(VTOC(vp
));
1116 if (vnode_vtype(vp
) == VDIR
) {
1117 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1119 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1125 goto ExitThisRoutine
;
1129 int mode
= cnattr
.ca_mode
& S_IFMT
;
1130 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1132 if (mode
== S_IFDIR
) {
1133 flags
= R_OK
| X_OK
;
1137 if ( (myPerms
& flags
) != flags
) {
1140 goto ExitThisRoutine
; /* no access */
1143 /* up the hierarchy we go */
1144 thisNodeID
= catkey
.hfsPlus
.parentID
;
1148 /* if here, we have access to this node */
1152 if (parents
&& myErr
== 0 && scope_index
== -1) {
1161 /* cache the parent directory(ies) */
1162 for (i
= 0; i
< ids_to_cache
; i
++) {
1163 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1164 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1166 add_node(cache
, -1, parent_ids
[i
], myErr
);
1174 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1175 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1180 * NOTE: on entry, the vnode is locked. Incase this vnode
1181 * happens to be in our list of file_ids, we'll note it
1182 * avoid calling hfs_chashget_nowait() on that id as that
1183 * will cause a "locking against myself" panic.
1185 Boolean check_leaf
= true;
1187 struct user64_ext_access_t
*user_access_structp
;
1188 struct user64_ext_access_t tmp_user_access
;
1189 struct access_cache cache
;
1191 int error
= 0, prev_parent_check_ok
=1;
1195 unsigned int num_files
= 0;
1197 int num_parents
= 0;
1201 cnid_t
*parents
=NULL
;
1205 cnid_t prevParent_cnid
= 0;
1206 unsigned int myPerms
;
1208 struct cat_attr cnattr
;
1210 struct cnode
*skip_cp
= VTOC(vp
);
1211 kauth_cred_t cred
= vfs_context_ucred(context
);
1212 proc_t p
= vfs_context_proc(context
);
1214 is64bit
= proc_is64bit(p
);
1216 /* initialize the local cache and buffers */
1217 cache
.numcached
= 0;
1218 cache
.cachehits
= 0;
1220 cache
.acache
= NULL
;
1221 cache
.haveaccess
= NULL
;
1223 /* struct copyin done during dispatch... need to copy file_id array separately */
1224 if (ap
->a_data
== NULL
) {
1226 goto err_exit_bulk_access
;
1230 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1232 goto err_exit_bulk_access
;
1235 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1237 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1238 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1240 // convert an old style bulk-access struct to the new style
1241 tmp_user_access
.flags
= accessp
->flags
;
1242 tmp_user_access
.num_files
= accessp
->num_files
;
1243 tmp_user_access
.map_size
= 0;
1244 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1245 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1246 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1247 tmp_user_access
.num_parents
= 0;
1248 user_access_structp
= &tmp_user_access
;
1250 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1251 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1253 // up-cast from a 32-bit version of the struct
1254 tmp_user_access
.flags
= accessp
->flags
;
1255 tmp_user_access
.num_files
= accessp
->num_files
;
1256 tmp_user_access
.map_size
= accessp
->map_size
;
1257 tmp_user_access
.num_parents
= accessp
->num_parents
;
1259 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1260 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1261 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1262 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1264 user_access_structp
= &tmp_user_access
;
1267 goto err_exit_bulk_access
;
1270 map_size
= user_access_structp
->map_size
;
1272 num_files
= user_access_structp
->num_files
;
1274 num_parents
= user_access_structp
->num_parents
;
1276 if (num_files
< 1) {
1277 goto err_exit_bulk_access
;
1279 if (num_files
> 1024) {
1281 goto err_exit_bulk_access
;
1284 if (num_parents
> 1024) {
1286 goto err_exit_bulk_access
;
1289 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1290 access
= (short *) kalloc(sizeof(short) * num_files
);
1292 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1296 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1299 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1300 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1302 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1304 kfree(file_ids
, sizeof(int) * num_files
);
1307 kfree(bitmap
, sizeof(char) * map_size
);
1310 kfree(access
, sizeof(short) * num_files
);
1313 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1315 if (cache
.haveaccess
) {
1316 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1319 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1324 // make sure the bitmap is zero'ed out...
1326 bzero(bitmap
, (sizeof(char) * map_size
));
1329 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1330 num_files
* sizeof(int)))) {
1331 goto err_exit_bulk_access
;
1335 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1336 num_parents
* sizeof(cnid_t
)))) {
1337 goto err_exit_bulk_access
;
1341 flags
= user_access_structp
->flags
;
1342 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1346 /* check if we've been passed leaf node ids or parent ids */
1347 if (flags
& PARENT_IDS_FLAG
) {
1351 /* Check access to each file_id passed in */
1352 for (i
= 0; i
< num_files
; i
++) {
1354 cnid
= (cnid_t
) file_ids
[i
];
1356 /* root always has access */
1357 if ((!parents
) && (!suser(cred
, NULL
))) {
1363 /* do the lookup (checks the cnode hash, then the catalog) */
1364 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1366 access
[i
] = (short) error
;
1371 // Check if the leaf matches one of the parent scopes
1372 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1373 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1374 prev_parent_check_ok
= 0;
1375 else if (leaf_index
>= 0)
1376 prev_parent_check_ok
= 1;
1379 // if the thing has acl's, do the full permission check
1380 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1383 /* get the vnode for this cnid */
1384 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1390 hfs_unlock(VTOC(cvp
));
1392 if (vnode_vtype(cvp
) == VDIR
) {
1393 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1395 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1404 /* before calling CheckAccess(), check the target file for read access */
1405 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1406 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1408 /* fail fast if no access */
1409 if ((myPerms
& flags
) == 0) {
1415 /* we were passed an array of parent ids */
1416 catkey
.hfsPlus
.parentID
= cnid
;
1419 /* if the last guy had the same parent and had access, we're done */
1420 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1426 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1427 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1429 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1430 access
[i
] = 0; // have access.. no errors to report
1432 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1435 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1438 /* copyout the access array */
1439 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1440 num_files
* sizeof (short)))) {
1441 goto err_exit_bulk_access
;
1443 if (map_size
&& bitmap
) {
1444 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1445 map_size
* sizeof (char)))) {
1446 goto err_exit_bulk_access
;
1451 err_exit_bulk_access
:
1453 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1456 kfree(file_ids
, sizeof(int) * num_files
);
1458 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1460 kfree(bitmap
, sizeof(char) * map_size
);
1462 kfree(access
, sizeof(short) * num_files
);
1464 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1465 if (cache
.haveaccess
)
1466 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1472 /* end "bulk-access" support */
1476 * Callback for use with freeze ioctl.
1479 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1481 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1487 * Control filesystem operating characteristics.
1490 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1495 vfs_context_t a_context;
1498 struct vnode
* vp
= ap
->a_vp
;
1499 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1500 vfs_context_t context
= ap
->a_context
;
1501 kauth_cred_t cred
= vfs_context_ucred(context
);
1502 proc_t p
= vfs_context_proc(context
);
1503 struct vfsstatfs
*vfsp
;
1505 off_t jnl_start
, jnl_size
;
1506 struct hfs_journal_info
*jip
;
1509 off_t uncompressed_size
= -1;
1510 int decmpfs_error
= 0;
1512 if (ap
->a_command
== F_RDADVISE
) {
1513 /* we need to inspect the decmpfs state of the file as early as possible */
1514 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1516 if (VNODE_IS_RSRC(vp
)) {
1517 /* if this is the resource fork, treat it as if it were empty */
1518 uncompressed_size
= 0;
1520 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1521 if (decmpfs_error
!= 0) {
1522 /* failed to get the uncompressed size, we'll check for this later */
1523 uncompressed_size
= -1;
1528 #endif /* HFS_COMPRESSION */
1530 is64bit
= proc_is64bit(p
);
1535 if ((error
= cp_handle_vnop(VTOC(vp
), CP_WRITE_ACCESS
)) != 0) {
1539 #endif /* CONFIG_PROTECT */
1541 switch (ap
->a_command
) {
1545 struct vnode
*file_vp
;
1551 /* Caller must be owner of file system. */
1552 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1553 if (suser(cred
, NULL
) &&
1554 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1557 /* Target vnode must be file system's root. */
1558 if (!vnode_isvroot(vp
)) {
1561 bufptr
= (char *)ap
->a_data
;
1562 cnid
= strtoul(bufptr
, NULL
, 10);
1564 /* We need to call hfs_vfs_vget to leverage the code that will
1565 * fix the origin list for us if needed, as opposed to calling
1566 * hfs_vget, since we will need the parent for build_path call.
1569 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1572 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, 0, context
);
1586 /* Caller must be owner of file system. */
1587 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1588 if (suser(cred
, NULL
) &&
1589 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1592 /* Target vnode must be file system's root. */
1593 if (!vnode_isvroot(vp
)) {
1596 linkfileid
= *(cnid_t
*)ap
->a_data
;
1597 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1600 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1603 if (ap
->a_command
== HFS_NEXT_LINK
) {
1604 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1606 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1611 case HFS_RESIZE_PROGRESS
: {
1613 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1614 if (suser(cred
, NULL
) &&
1615 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1616 return (EACCES
); /* must be owner of file system */
1618 if (!vnode_isvroot(vp
)) {
1621 /* file system must not be mounted read-only */
1622 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1626 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1629 case HFS_RESIZE_VOLUME
: {
1633 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1634 if (suser(cred
, NULL
) &&
1635 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1636 return (EACCES
); /* must be owner of file system */
1638 if (!vnode_isvroot(vp
)) {
1642 /* filesystem must not be mounted read only */
1643 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1646 newsize
= *(u_int64_t
*)ap
->a_data
;
1647 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1649 if (newsize
> cursize
) {
1650 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1651 } else if (newsize
< cursize
) {
1652 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1657 case HFS_CHANGE_NEXT_ALLOCATION
: {
1658 int error
= 0; /* Assume success */
1661 if (vnode_vfsisrdonly(vp
)) {
1664 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1665 if (suser(cred
, NULL
) &&
1666 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1667 return (EACCES
); /* must be owner of file system */
1669 if (!vnode_isvroot(vp
)) {
1672 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1673 location
= *(u_int32_t
*)ap
->a_data
;
1674 if ((location
>= hfsmp
->allocLimit
) &&
1675 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1677 goto fail_change_next_allocation
;
1679 /* Return previous value. */
1680 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1681 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1682 /* On magic value for location, set nextAllocation to next block
1683 * after metadata zone and set flag in mount structure to indicate
1684 * that nextAllocation should not be updated again.
1686 if (hfsmp
->hfs_metazone_end
!= 0) {
1687 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1689 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1691 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1692 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1694 MarkVCBDirty(hfsmp
);
1695 fail_change_next_allocation
:
1696 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1701 case HFS_SETBACKINGSTOREINFO
: {
1702 struct vnode
* bsfs_rootvp
;
1703 struct vnode
* di_vp
;
1704 struct hfs_backingstoreinfo
*bsdata
;
1707 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1710 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1713 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1714 if (suser(cred
, NULL
) &&
1715 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1716 return (EACCES
); /* must be owner of file system */
1718 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1719 if (bsdata
== NULL
) {
1722 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1725 if ((error
= vnode_getwithref(di_vp
))) {
1726 file_drop(bsdata
->backingfd
);
1730 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1731 (void)vnode_put(di_vp
);
1732 file_drop(bsdata
->backingfd
);
1737 * Obtain the backing fs root vnode and keep a reference
1738 * on it. This reference will be dropped in hfs_unmount.
1740 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1742 (void)vnode_put(di_vp
);
1743 file_drop(bsdata
->backingfd
);
1746 vnode_ref(bsfs_rootvp
);
1747 vnode_put(bsfs_rootvp
);
1749 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1751 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1752 /* The free extent cache is managed differently for sparse devices.
1753 * There is a window between which the volume is mounted and the
1754 * device is marked as sparse, so the free extent cache for this
1755 * volume is currently initialized as normal volume (sorted by block
1756 * count). Reset the cache so that it will be rebuilt again
1757 * for sparse device (sorted by start block).
1759 ResetVCBFreeExtCache(hfsmp
);
1761 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1762 hfsmp
->hfs_sparsebandblks
*= 4;
1764 vfs_markdependency(hfsmp
->hfs_mp
);
1767 * If the sparse image is on a sparse image file (as opposed to a sparse
1768 * bundle), then we may need to limit the free space to the maximum size
1769 * of a file on that volume. So we query (using pathconf), and if we get
1770 * a meaningful result, we cache the number of blocks for later use in
1773 hfsmp
->hfs_backingfs_maxblocks
= 0;
1774 if (vnode_vtype(di_vp
) == VREG
) {
1777 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1778 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1779 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1781 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1785 (void)vnode_put(di_vp
);
1786 file_drop(bsdata
->backingfd
);
1789 case HFS_CLRBACKINGSTOREINFO
: {
1790 struct vnode
* tmpvp
;
1792 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1793 if (suser(cred
, NULL
) &&
1794 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1795 return (EACCES
); /* must be owner of file system */
1797 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1801 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1802 hfsmp
->hfs_backingfs_rootvp
) {
1804 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1805 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1806 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1807 hfsmp
->hfs_sparsebandblks
= 0;
1812 #endif /* HFS_SPARSE_DEV */
1817 mp
= vnode_mount(vp
);
1818 hfsmp
= VFSTOHFS(mp
);
1823 vfsp
= vfs_statfs(mp
);
1825 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
1826 !kauth_cred_issuser(cred
))
1829 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1831 // flush things before we get started to try and prevent
1832 // dirty data from being paged out while we're frozen.
1833 // note: can't do this after taking the lock as it will
1834 // deadlock against ourselves.
1835 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1836 hfs_lock_global (hfsmp
, HFS_EXCLUSIVE_LOCK
);
1838 // DO NOT call hfs_journal_flush() because that takes a
1839 // shared lock on the global exclusive lock!
1840 journal_flush(hfsmp
->jnl
, TRUE
);
1842 // don't need to iterate on all vnodes, we just need to
1843 // wait for writes to the system files and the device vnode
1845 // Now that journal flush waits for all metadata blocks to
1846 // be written out, waiting for btree writes is probably no
1848 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1849 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1850 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1851 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1852 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1853 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1854 if (hfsmp
->hfs_attribute_vp
)
1855 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1856 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1858 hfsmp
->hfs_freezing_proc
= current_proc();
1864 vfsp
= vfs_statfs(vnode_mount(vp
));
1865 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
1866 !kauth_cred_issuser(cred
))
1869 // if we're not the one who froze the fs then we
1871 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1875 // NOTE: if you add code here, also go check the
1876 // code that "thaws" the fs in hfs_vnop_close()
1878 hfsmp
->hfs_freezing_proc
= NULL
;
1879 hfs_unlock_global (hfsmp
);
1880 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1885 case HFS_BULKACCESS_FSCTL
: {
1888 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1893 size
= sizeof(struct user64_access_t
);
1895 size
= sizeof(struct user32_access_t
);
1898 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1901 case HFS_EXT_BULKACCESS_FSCTL
: {
1904 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1909 size
= sizeof(struct user64_ext_access_t
);
1911 size
= sizeof(struct user32_ext_access_t
);
1914 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1917 case HFS_SET_XATTREXTENTS_STATE
: {
1920 if (ap
->a_data
== NULL
) {
1924 state
= *(int *)ap
->a_data
;
1926 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1930 /* Super-user can enable or disable extent-based extended
1931 * attribute support on a volume
1932 * Note: Starting Mac OS X 10.7, extent-based extended attributes
1933 * are enabled by default, so any change will be transient only
1934 * till the volume is remounted.
1939 if (state
== 0 || state
== 1)
1940 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
1948 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1951 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1953 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
1954 hfs_unlock(VTOC(vp
));
1961 register struct cnode
*cp
;
1964 if (!vnode_isreg(vp
))
1967 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1971 * used by regression test to determine if
1972 * all the dirty pages (via write) have been cleaned
1973 * after a call to 'fsysnc'.
1975 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1982 register struct radvisory
*ra
;
1983 struct filefork
*fp
;
1986 if (!vnode_isreg(vp
))
1989 ra
= (struct radvisory
*)(ap
->a_data
);
1992 /* Protect against a size change. */
1993 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1996 if (compressed
&& (uncompressed_size
== -1)) {
1997 /* fetching the uncompressed size failed above, so return the error */
1998 error
= decmpfs_error
;
1999 } else if ((compressed
&& (ra
->ra_offset
>= uncompressed_size
)) ||
2000 (!compressed
&& (ra
->ra_offset
>= fp
->ff_size
))) {
2003 #else /* HFS_COMPRESSION */
2004 if (ra
->ra_offset
>= fp
->ff_size
) {
2007 #endif /* HFS_COMPRESSION */
2009 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2012 hfs_unlock_truncate(VTOC(vp
), 0);
2016 case F_READBOOTSTRAP
:
2017 case F_WRITEBOOTSTRAP
:
2020 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2023 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2026 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2031 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME
:
2032 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2035 case SPOTLIGHT_FSCTL_GET_LAST_MTIME
:
2036 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2039 case HFS_FSCTL_SET_VERY_LOW_DISK
:
2040 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2044 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2047 case HFS_FSCTL_SET_LOW_DISK
:
2048 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2049 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2054 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2057 case HFS_FSCTL_SET_DESIRED_DISK
:
2058 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2062 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2065 case HFS_VOLUME_STATUS
:
2066 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2069 case HFS_SET_BOOT_INFO
:
2070 if (!vnode_isvroot(vp
))
2072 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2073 return(EACCES
); /* must be superuser or owner of filesystem */
2074 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2077 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2078 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2079 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2080 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
2083 case HFS_GET_BOOT_INFO
:
2084 if (!vnode_isvroot(vp
))
2086 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2087 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2088 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2091 case HFS_MARK_BOOT_CORRUPT
:
2092 /* Mark the boot volume corrupt by setting
2093 * kHFSVolumeInconsistentBit in the volume header. This will
2094 * force fsck_hfs on next mount.
2100 /* Allowed only on the root vnode of the boot volume */
2101 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2102 !vnode_isvroot(vp
)) {
2105 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2108 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2109 hfs_mark_volume_inconsistent(hfsmp
);
2112 case HFS_FSCTL_GET_JOURNAL_INFO
:
2113 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2118 if (hfsmp
->jnl
== NULL
) {
2122 jnl_start
= (off_t
)(hfsmp
->jnl_start
* HFSTOVCB(hfsmp
)->blockSize
) + (off_t
)HFSTOVCB(hfsmp
)->hfsPlusIOPosOffset
;
2123 jnl_size
= (off_t
)hfsmp
->jnl_size
;
2126 jip
->jstart
= jnl_start
;
2127 jip
->jsize
= jnl_size
;
2130 case HFS_SET_ALWAYS_ZEROFILL
: {
2131 struct cnode
*cp
= VTOC(vp
);
2133 if (*(int *)ap
->a_data
) {
2134 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2136 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2141 case HFS_DISABLE_METAZONE
: {
2142 /* Only root can disable metadata zone */
2146 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2150 /* Disable metadata zone now */
2151 (void) hfs_metadatazone_init(hfsmp
, true);
2152 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2167 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2169 struct vnop_select_args {
2174 vfs_context_t a_context;
2179 * We should really check to see if I/O is possible.
2185 * Converts a logical block number to a physical block, and optionally returns
2186 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2187 * The physical block number is based on the device block size, currently its 512.
2188 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2191 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2193 struct filefork
*fp
= VTOF(vp
);
2194 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2195 int retval
= E_NONE
;
2196 u_int32_t logBlockSize
;
2197 size_t bytesContAvail
= 0;
2198 off_t blockposition
;
2203 * Check for underlying vnode requests and ensure that logical
2204 * to physical mapping is requested.
2207 *vpp
= hfsmp
->hfs_devvp
;
2211 logBlockSize
= GetLogicalBlockSize(vp
);
2212 blockposition
= (off_t
)bn
* logBlockSize
;
2214 lockExtBtree
= overflow_extents(fp
);
2217 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2219 retval
= MacToVFSError(
2220 MapFileBlockC (HFSTOVCB(hfsmp
),
2228 hfs_systemfile_unlock(hfsmp
, lockflags
);
2230 if (retval
== E_NONE
) {
2231 /* Figure out how many read ahead blocks there are */
2233 if (can_cluster(logBlockSize
)) {
2234 /* Make sure this result never goes negative: */
2235 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2245 * Convert logical block number to file offset.
2248 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
2250 struct vnop_blktooff_args {
2257 if (ap
->a_vp
== NULL
)
2259 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2265 * Convert file offset to logical block number.
2268 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
2270 struct vnop_offtoblk_args {
2273 daddr64_t *a_lblkno;
2277 if (ap
->a_vp
== NULL
)
2279 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2285 * Map file offset to physical block number.
2287 * If this function is called for write operation, and if the file
2288 * had virtual blocks allocated (delayed allocation), real blocks
2289 * are allocated by calling ExtendFileC().
2291 * If this function is called for read operation, and if the file
2292 * had virtual blocks allocated (delayed allocation), no change
2293 * to the size of file is done, and if required, rangelist is
2294 * searched for mapping.
2296 * System file cnodes are expected to be locked (shared or exclusive).
2299 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
2301 struct vnop_blockmap_args {
2309 vfs_context_t a_context;
2313 struct vnode
*vp
= ap
->a_vp
;
2315 struct filefork
*fp
;
2316 struct hfsmount
*hfsmp
;
2317 size_t bytesContAvail
= 0;
2318 int retval
= E_NONE
;
2321 struct rl_entry
*invalid_range
;
2322 enum rl_overlaptype overlaptype
;
2327 if (VNODE_IS_RSRC(vp
)) {
2328 /* allow blockmaps to the resource fork */
2330 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
2331 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
2333 case FILE_IS_COMPRESSED
:
2335 case FILE_IS_CONVERTING
:
2336 /* if FILE_IS_CONVERTING, we allow blockmap */
2339 printf("invalid state %d for compressed file\n", state
);
2344 #endif /* HFS_COMPRESSION */
2346 /* Do not allow blockmap operation on a directory */
2347 if (vnode_isdir(vp
)) {
2352 * Check for underlying vnode requests and ensure that logical
2353 * to physical mapping is requested.
2355 if (ap
->a_bpn
== NULL
)
2358 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2359 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2360 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2369 /* Check virtual blocks only when performing write operation */
2370 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2371 if (hfs_start_transaction(hfsmp
) != 0) {
2377 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2379 } else if (overflow_extents(fp
)) {
2380 syslocks
= SFL_EXTENTS
;
2384 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2387 * Check for any delayed allocations.
2389 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2391 u_int32_t loanedBlocks
;
2394 // Make sure we have a transaction. It's possible
2395 // that we came in and fp->ff_unallocblocks was zero
2396 // but during the time we blocked acquiring the extents
2397 // btree, ff_unallocblocks became non-zero and so we
2398 // will need to start a transaction.
2400 if (started_tr
== 0) {
2402 hfs_systemfile_unlock(hfsmp
, lockflags
);
2409 * Note: ExtendFileC will Release any blocks on loan and
2410 * aquire real blocks. So we ask to extend by zero bytes
2411 * since ExtendFileC will account for the virtual blocks.
2414 loanedBlocks
= fp
->ff_unallocblocks
;
2415 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2416 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2419 fp
->ff_unallocblocks
= loanedBlocks
;
2420 cp
->c_blocks
+= loanedBlocks
;
2421 fp
->ff_blocks
+= loanedBlocks
;
2423 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2424 hfsmp
->loanedBlocks
+= loanedBlocks
;
2425 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2427 hfs_systemfile_unlock(hfsmp
, lockflags
);
2428 cp
->c_flag
|= C_MODIFIED
;
2430 (void) hfs_update(vp
, TRUE
);
2431 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2433 hfs_end_transaction(hfsmp
);
2440 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2441 ap
->a_bpn
, &bytesContAvail
);
2443 hfs_systemfile_unlock(hfsmp
, lockflags
);
2448 (void) hfs_update(vp
, TRUE
);
2449 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2450 hfs_end_transaction(hfsmp
);
2454 /* On write, always return error because virtual blocks, if any,
2455 * should have been allocated in ExtendFileC(). We do not
2456 * allocate virtual blocks on read, therefore return error
2457 * only if no virtual blocks are allocated. Otherwise we search
2458 * rangelist for zero-fills
2460 if ((MacToVFSError(retval
) != ERANGE
) ||
2461 (ap
->a_flags
& VNODE_WRITE
) ||
2462 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2466 /* Validate if the start offset is within logical file size */
2467 if (ap
->a_foffset
> fp
->ff_size
) {
2471 /* Searching file extents has failed for read operation, therefore
2472 * search rangelist for any uncommitted holes in the file.
2474 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2475 ap
->a_foffset
+ (off_t
)(ap
->a_size
- 1),
2477 switch(overlaptype
) {
2478 case RL_OVERLAPISCONTAINED
:
2479 /* start_offset <= rl_start, end_offset >= rl_end */
2480 if (ap
->a_foffset
!= invalid_range
->rl_start
) {
2483 case RL_MATCHINGOVERLAP
:
2484 /* start_offset = rl_start, end_offset = rl_end */
2485 case RL_OVERLAPCONTAINSRANGE
:
2486 /* start_offset >= rl_start, end_offset <= rl_end */
2487 case RL_OVERLAPSTARTSBEFORE
:
2488 /* start_offset > rl_start, end_offset >= rl_start */
2489 if ((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) {
2490 bytesContAvail
= (invalid_range
->rl_end
+ 1) - ap
->a_foffset
;
2492 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2494 if (bytesContAvail
> ap
->a_size
) {
2495 bytesContAvail
= ap
->a_size
;
2497 *ap
->a_bpn
= (daddr64_t
)-1;
2500 case RL_OVERLAPENDSAFTER
:
2501 /* start_offset < rl_start, end_offset < rl_end */
2508 /* MapFileC() found a valid extent in the filefork. Search the
2509 * mapping information further for invalid file ranges
2511 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2512 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2514 if (overlaptype
!= RL_NOOVERLAP
) {
2515 switch(overlaptype
) {
2516 case RL_MATCHINGOVERLAP
:
2517 case RL_OVERLAPCONTAINSRANGE
:
2518 case RL_OVERLAPSTARTSBEFORE
:
2519 /* There's no valid block for this byte offset */
2520 *ap
->a_bpn
= (daddr64_t
)-1;
2521 /* There's no point limiting the amount to be returned
2522 * if the invalid range that was hit extends all the way
2523 * to the EOF (i.e. there's no valid bytes between the
2524 * end of this range and the file's EOF):
2526 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2527 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2528 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2532 case RL_OVERLAPISCONTAINED
:
2533 case RL_OVERLAPENDSAFTER
:
2534 /* The range of interest hits an invalid block before the end: */
2535 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2536 /* There's actually no valid information to be had starting here: */
2537 *ap
->a_bpn
= (daddr64_t
)-1;
2538 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2539 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2540 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2543 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2550 if (bytesContAvail
> ap
->a_size
)
2551 bytesContAvail
= ap
->a_size
;
2557 *ap
->a_run
= bytesContAvail
;
2560 *(int *)ap
->a_poff
= 0;
2566 return (MacToVFSError(retval
));
2571 * prepare and issue the I/O
2572 * buf_strategy knows how to deal
2573 * with requests that require
2577 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2579 buf_t bp
= ap
->a_bp
;
2580 vnode_t vp
= buf_vnode(bp
);
2586 if ((cp
= cp_get_protected_cnode(vp
)) != NULL
) {
2588 * Some paths to hfs_vnop_strategy will take the cnode lock,
2589 * and some won't. But since content protection is only enabled
2590 * for files that (a) aren't system files and (b) are regular
2591 * files, any valid cnode here will be unlocked.
2593 hfs_lock(cp
, HFS_SHARED_LOCK
);
2594 buf_setcpaddr(bp
, cp
->c_cpentry
);
2596 #endif /* CONFIG_PROTECT */
2598 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
2610 hfs_minorupdate(struct vnode
*vp
) {
2611 struct cnode
*cp
= VTOC(vp
);
2612 cp
->c_flag
&= ~C_MODIFIED
;
2613 cp
->c_touch_acctime
= 0;
2614 cp
->c_touch_chgtime
= 0;
2615 cp
->c_touch_modtime
= 0;
2621 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipupdate
, vfs_context_t context
)
2623 register struct cnode
*cp
= VTOC(vp
);
2624 struct filefork
*fp
= VTOF(vp
);
2625 struct proc
*p
= vfs_context_proc(context
);;
2626 kauth_cred_t cred
= vfs_context_ucred(context
);
2629 off_t actualBytesAdded
;
2631 u_int32_t fileblocks
;
2633 struct hfsmount
*hfsmp
;
2636 blksize
= VTOVCB(vp
)->blockSize
;
2637 fileblocks
= fp
->ff_blocks
;
2638 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2640 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
2641 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2646 /* This should only happen with a corrupt filesystem */
2647 if ((off_t
)fp
->ff_size
< 0)
2650 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
2657 /* Files that are changing size are not hot file candidates. */
2658 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
2659 fp
->ff_bytesread
= 0;
2663 * We cannot just check if fp->ff_size == length (as an optimization)
2664 * since there may be extra physical blocks that also need truncation.
2667 if ((retval
= hfs_getinoquota(cp
)))
2672 * Lengthen the size of the file. We must ensure that the
2673 * last byte of the file is allocated. Since the smallest
2674 * value of ff_size is 0, length will be at least 1.
2676 if (length
> (off_t
)fp
->ff_size
) {
2678 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
2684 * If we don't have enough physical space then
2685 * we need to extend the physical size.
2687 if (length
> filebytes
) {
2689 u_int32_t blockHint
= 0;
2691 /* All or nothing and don't round up to clumpsize. */
2692 eflags
= kEFAllMask
| kEFNoClumpMask
;
2694 if (cred
&& suser(cred
, NULL
) != 0)
2695 eflags
|= kEFReserveMask
; /* keep a reserve */
2698 * Allocate Journal and Quota files in metadata zone.
2700 if (filebytes
== 0 &&
2701 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
2702 hfs_virtualmetafile(cp
)) {
2703 eflags
|= kEFMetadataMask
;
2704 blockHint
= hfsmp
->hfs_metazone_start
;
2706 if (hfs_start_transaction(hfsmp
) != 0) {
2711 /* Protect extents b-tree and allocation bitmap */
2712 lockflags
= SFL_BITMAP
;
2713 if (overflow_extents(fp
))
2714 lockflags
|= SFL_EXTENTS
;
2715 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2717 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2718 bytesToAdd
= length
- filebytes
;
2719 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
2724 &actualBytesAdded
));
2726 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2727 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
2728 if (length
> filebytes
)
2734 hfs_systemfile_unlock(hfsmp
, lockflags
);
2738 (void) hfs_minorupdate(vp
);
2741 (void) hfs_update(vp
, TRUE
);
2742 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2746 hfs_end_transaction(hfsmp
);
2751 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2752 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2755 if (!(flags
& IO_NOZEROFILL
)) {
2756 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
2757 struct rl_entry
*invalid_range
;
2760 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
2761 if (length
< zero_limit
) zero_limit
= length
;
2763 if (length
> (off_t
)fp
->ff_size
) {
2766 /* Extending the file: time to fill out the current last page w. zeroes? */
2767 if ((fp
->ff_size
& PAGE_MASK_64
) &&
2768 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2769 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2771 /* There's some valid data at the start of the (current) last page
2772 of the file, so zero out the remainder of that page to ensure the
2773 entire page contains valid data. Since there is no invalid range
2774 possible past the (current) eof, there's no need to remove anything
2775 from the invalid range list before calling cluster_write(): */
2777 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2778 fp
->ff_size
, (off_t
)0,
2779 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2780 hfs_lock(cp
, HFS_FORCE_LOCK
);
2781 if (retval
) goto Err_Exit
;
2783 /* Merely invalidate the remaining area, if necessary: */
2784 if (length
> zero_limit
) {
2786 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
2787 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2790 /* The page containing the (current) eof is invalid: just add the
2791 remainder of the page to the invalid list, along with the area
2792 being newly allocated:
2795 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
2796 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2800 panic("hfs_truncate: invoked on non-UBC object?!");
2803 cp
->c_touch_modtime
= TRUE
;
2804 fp
->ff_size
= length
;
2806 } else { /* Shorten the size of the file */
2808 if ((off_t
)fp
->ff_size
> length
) {
2809 /* Any space previously marked as invalid is now irrelevant: */
2810 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2814 * Account for any unmapped blocks. Note that the new
2815 * file length can still end up with unmapped blocks.
2817 if (fp
->ff_unallocblocks
> 0) {
2818 u_int32_t finalblks
;
2819 u_int32_t loanedBlocks
;
2821 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2823 loanedBlocks
= fp
->ff_unallocblocks
;
2824 cp
->c_blocks
-= loanedBlocks
;
2825 fp
->ff_blocks
-= loanedBlocks
;
2826 fp
->ff_unallocblocks
= 0;
2828 hfsmp
->loanedBlocks
-= loanedBlocks
;
2830 finalblks
= (length
+ blksize
- 1) / blksize
;
2831 if (finalblks
> fp
->ff_blocks
) {
2832 /* calculate required unmapped blocks */
2833 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2834 hfsmp
->loanedBlocks
+= loanedBlocks
;
2836 fp
->ff_unallocblocks
= loanedBlocks
;
2837 cp
->c_blocks
+= loanedBlocks
;
2838 fp
->ff_blocks
+= loanedBlocks
;
2840 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2844 * For a TBE process the deallocation of the file blocks is
2845 * delayed until the file is closed. And hfs_close calls
2846 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2847 * isn't set, we make sure this isn't a TBE process.
2849 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2851 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2853 if (hfs_start_transaction(hfsmp
) != 0) {
2858 if (fp
->ff_unallocblocks
== 0) {
2859 /* Protect extents b-tree and allocation bitmap */
2860 lockflags
= SFL_BITMAP
;
2861 if (overflow_extents(fp
))
2862 lockflags
|= SFL_EXTENTS
;
2863 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2865 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
2866 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
2868 hfs_systemfile_unlock(hfsmp
, lockflags
);
2872 fp
->ff_size
= length
;
2875 (void) hfs_minorupdate(vp
);
2878 (void) hfs_update(vp
, TRUE
);
2879 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2882 hfs_end_transaction(hfsmp
);
2884 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2888 /* These are bytesreleased */
2889 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2892 /* Only set update flag if the logical length changes */
2893 if ((off_t
)fp
->ff_size
!= length
)
2894 cp
->c_touch_modtime
= TRUE
;
2895 fp
->ff_size
= length
;
2897 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
2898 if (!vfs_context_issuser(context
)) {
2899 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2904 retval
= hfs_minorupdate(vp
);
2907 cp
->c_touch_chgtime
= TRUE
; /* status changed */
2908 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
2909 retval
= hfs_update(vp
, MNT_WAIT
);
2912 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2913 -1, -1, -1, retval
, 0);
2918 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2919 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2925 * Preparation which must be done prior to deleting the catalog record
2926 * of a file or directory. In order to make the on-disk as safe as possible,
2927 * we remove the catalog entry before releasing the bitmap blocks and the
2928 * overflow extent records. However, some work must be done prior to deleting
2929 * the catalog record.
2931 * When calling this function, the cnode must exist both in memory and on-disk.
2932 * If there are both resource fork and data fork vnodes, this function should
2933 * be called on both.
2937 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
2939 struct filefork
*fp
= VTOF(vp
);
2940 struct cnode
*cp
= VTOC(vp
);
2943 /* Cannot truncate an HFS directory! */
2944 if (vnode_isdir(vp
)) {
2949 * See the comment below in hfs_truncate for why we need to call
2950 * setsize here. Essentially we want to avoid pending IO if we
2951 * already know that the blocks are going to be released here.
2952 * This function is only called when totally removing all storage for a file, so
2953 * we can take a shortcut and immediately setsize (0);
2957 /* This should only happen with a corrupt filesystem */
2958 if ((off_t
)fp
->ff_size
< 0)
2962 * We cannot just check if fp->ff_size == length (as an optimization)
2963 * since there may be extra physical blocks that also need truncation.
2966 if ((retval
= hfs_getinoquota(cp
))) {
2971 /* Wipe out any invalid ranges which have yet to be backed by disk */
2972 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2975 * Account for any unmapped blocks. Since we're deleting the
2976 * entire file, we don't have to worry about just shrinking
2977 * to a smaller number of borrowed blocks.
2979 if (fp
->ff_unallocblocks
> 0) {
2980 u_int32_t loanedBlocks
;
2982 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2984 loanedBlocks
= fp
->ff_unallocblocks
;
2985 cp
->c_blocks
-= loanedBlocks
;
2986 fp
->ff_blocks
-= loanedBlocks
;
2987 fp
->ff_unallocblocks
= 0;
2989 hfsmp
->loanedBlocks
-= loanedBlocks
;
2991 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2999 * Special wrapper around calling TruncateFileC. This function is useable
3000 * even when the catalog record does not exist any longer, making it ideal
3001 * for use when deleting a file. The simplification here is that we know
3002 * that we are releasing all blocks.
3004 * The caller is responsible for saving off a copy of the filefork(s)
3005 * embedded within the cnode prior to calling this function. The pointers
3006 * supplied as arguments must be valid even if the cnode is no longer valid.
3010 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3011 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3014 u_int32_t fileblocks
;
3019 blksize
= hfsmp
->blockSize
;
3022 if (datafork
->ff_blocks
> 0) {
3023 fileblocks
= datafork
->ff_blocks
;
3024 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3026 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3028 while (filebytes
> 0) {
3029 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(datafork
)) {
3030 filebytes
-= HFS_BIGFILE_SIZE
;
3035 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3036 if (hfs_start_transaction(hfsmp
) != 0) {
3041 if (datafork
->ff_unallocblocks
== 0) {
3042 /* Protect extents b-tree and allocation bitmap */
3043 lockflags
= SFL_BITMAP
;
3044 if (overflow_extents(datafork
))
3045 lockflags
|= SFL_EXTENTS
;
3046 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3048 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
3050 hfs_systemfile_unlock(hfsmp
, lockflags
);
3053 datafork
->ff_size
= filebytes
;
3055 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3057 /* Finish the transaction and start over if necessary */
3058 hfs_end_transaction(hfsmp
);
3067 if (error
== 0 && (rsrcfork
!= NULL
) && rsrcfork
->ff_blocks
> 0) {
3068 fileblocks
= rsrcfork
->ff_blocks
;
3069 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3071 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3073 while (filebytes
> 0) {
3074 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(rsrcfork
)) {
3075 filebytes
-= HFS_BIGFILE_SIZE
;
3080 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3081 if (hfs_start_transaction(hfsmp
) != 0) {
3086 if (rsrcfork
->ff_unallocblocks
== 0) {
3087 /* Protect extents b-tree and allocation bitmap */
3088 lockflags
= SFL_BITMAP
;
3089 if (overflow_extents(rsrcfork
))
3090 lockflags
|= SFL_EXTENTS
;
3091 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3093 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
3095 hfs_systemfile_unlock(hfsmp
, lockflags
);
3098 rsrcfork
->ff_size
= filebytes
;
3100 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3102 /* Finish the transaction and start over if necessary */
3103 hfs_end_transaction(hfsmp
);
3116 * Truncate a cnode to at most length size, freeing (or adding) the
3120 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
3121 int skipupdate
, vfs_context_t context
)
3123 struct filefork
*fp
= VTOF(vp
);
3125 u_int32_t fileblocks
;
3126 int blksize
, error
= 0;
3127 struct cnode
*cp
= VTOC(vp
);
3129 /* Cannot truncate an HFS directory! */
3130 if (vnode_isdir(vp
)) {
3133 /* A swap file cannot change size. */
3134 if (vnode_isswap(vp
) && (length
!= 0)) {
3138 blksize
= VTOVCB(vp
)->blockSize
;
3139 fileblocks
= fp
->ff_blocks
;
3140 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3143 // Have to do this here so that we don't wind up with
3144 // i/o pending for blocks that are about to be released
3145 // if we truncate the file.
3147 // If skipsetsize is set, then the caller is responsible
3148 // for the ubc_setsize.
3150 // Even if skipsetsize is set, if the length is zero we
3151 // want to call ubc_setsize() because as of SnowLeopard
3152 // it will no longer cause any page-ins and it will drop
3153 // any dirty pages so that we don't do any i/o that we
3154 // don't have to. This also prevents a race where i/o
3155 // for truncated blocks may overwrite later data if the
3156 // blocks get reallocated to a different file.
3158 if (!skipsetsize
|| length
== 0)
3159 ubc_setsize(vp
, length
);
3161 // have to loop truncating or growing files that are
3162 // really big because otherwise transactions can get
3163 // enormous and consume too many kernel resources.
3165 if (length
< filebytes
) {
3166 while (filebytes
> length
) {
3167 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3168 filebytes
-= HFS_BIGFILE_SIZE
;
3172 cp
->c_flag
|= C_FORCEUPDATE
;
3173 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipupdate
, context
);
3177 } else if (length
> filebytes
) {
3178 while (filebytes
< length
) {
3179 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3180 filebytes
+= HFS_BIGFILE_SIZE
;
3184 cp
->c_flag
|= C_FORCEUPDATE
;
3185 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipupdate
, context
);
3189 } else /* Same logical size */ {
3191 error
= do_hfs_truncate(vp
, length
, flags
, skipupdate
, context
);
3193 /* Files that are changing size are not hot file candidates. */
3194 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3195 fp
->ff_bytesread
= 0;
3204 * Preallocate file storage space.
3207 hfs_vnop_allocate(struct vnop_allocate_args
/* {
3211 off_t *a_bytesallocated;
3213 vfs_context_t a_context;
3216 struct vnode
*vp
= ap
->a_vp
;
3218 struct filefork
*fp
;
3220 off_t length
= ap
->a_length
;
3222 off_t moreBytesRequested
;
3223 off_t actualBytesAdded
;
3225 u_int32_t fileblocks
;
3226 int retval
, retval2
;
3227 u_int32_t blockHint
;
3228 u_int32_t extendFlags
; /* For call to ExtendFileC */
3229 struct hfsmount
*hfsmp
;
3230 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
3234 *(ap
->a_bytesallocated
) = 0;
3236 if (!vnode_isreg(vp
))
3238 if (length
< (off_t
)0)
3243 orig_ctime
= VTOC(vp
)->c_ctime
;
3245 check_for_tracked_file(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
3247 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
3249 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3257 fileblocks
= fp
->ff_blocks
;
3258 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
3260 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
3265 /* Fill in the flags word for the call to Extend the file */
3267 extendFlags
= kEFNoClumpMask
;
3268 if (ap
->a_flags
& ALLOCATECONTIG
)
3269 extendFlags
|= kEFContigMask
;
3270 if (ap
->a_flags
& ALLOCATEALL
)
3271 extendFlags
|= kEFAllMask
;
3272 if (cred
&& suser(cred
, NULL
) != 0)
3273 extendFlags
|= kEFReserveMask
;
3274 if (hfs_virtualmetafile(cp
))
3275 extendFlags
|= kEFMetadataMask
;
3279 startingPEOF
= filebytes
;
3281 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
3282 length
+= filebytes
;
3283 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
3284 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
3286 /* If no changes are necesary, then we're done */
3287 if (filebytes
== length
)
3291 * Lengthen the size of the file. We must ensure that the
3292 * last byte of the file is allocated. Since the smallest
3293 * value of filebytes is 0, length will be at least 1.
3295 if (length
> filebytes
) {
3296 off_t total_bytes_added
= 0, orig_request_size
;
3298 orig_request_size
= moreBytesRequested
= length
- filebytes
;
3301 retval
= hfs_chkdq(cp
,
3302 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
3309 * Metadata zone checks.
3311 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
3313 * Allocate Journal and Quota files in metadata zone.
3315 if (hfs_virtualmetafile(cp
)) {
3316 blockHint
= hfsmp
->hfs_metazone_start
;
3317 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
3318 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
3320 * Move blockHint outside metadata zone.
3322 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
3327 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3328 off_t bytesRequested
;
3330 if (hfs_start_transaction(hfsmp
) != 0) {
3335 /* Protect extents b-tree and allocation bitmap */
3336 lockflags
= SFL_BITMAP
;
3337 if (overflow_extents(fp
))
3338 lockflags
|= SFL_EXTENTS
;
3339 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3341 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
3342 bytesRequested
= HFS_BIGFILE_SIZE
;
3344 bytesRequested
= moreBytesRequested
;
3347 if (extendFlags
& kEFContigMask
) {
3348 // if we're on a sparse device, this will force it to do a
3349 // full scan to find the space needed.
3350 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
3353 retval
= MacToVFSError(ExtendFileC(vcb
,
3358 &actualBytesAdded
));
3360 if (retval
== E_NONE
) {
3361 *(ap
->a_bytesallocated
) += actualBytesAdded
;
3362 total_bytes_added
+= actualBytesAdded
;
3363 moreBytesRequested
-= actualBytesAdded
;
3364 if (blockHint
!= 0) {
3365 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
3368 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3370 hfs_systemfile_unlock(hfsmp
, lockflags
);
3373 (void) hfs_update(vp
, TRUE
);
3374 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3377 hfs_end_transaction(hfsmp
);
3382 * if we get an error and no changes were made then exit
3383 * otherwise we must do the hfs_update to reflect the changes
3385 if (retval
&& (startingPEOF
== filebytes
))
3389 * Adjust actualBytesAdded to be allocation block aligned, not
3390 * clump size aligned.
3391 * NOTE: So what we are reporting does not affect reality
3392 * until the file is closed, when we truncate the file to allocation
3395 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
3396 *(ap
->a_bytesallocated
) =
3397 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
3399 } else { /* Shorten the size of the file */
3401 if (fp
->ff_size
> length
) {
3403 * Any buffers that are past the truncation point need to be
3404 * invalidated (to maintain buffer cache consistency).
3408 retval
= hfs_truncate(vp
, length
, 0, 0, 0, ap
->a_context
);
3409 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3412 * if we get an error and no changes were made then exit
3413 * otherwise we must do the hfs_update to reflect the changes
3415 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
3417 /* These are bytesreleased */
3418 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
3421 if (fp
->ff_size
> filebytes
) {
3422 fp
->ff_size
= filebytes
;
3425 ubc_setsize(vp
, fp
->ff_size
);
3426 hfs_lock(cp
, HFS_FORCE_LOCK
);
3431 cp
->c_touch_chgtime
= TRUE
;
3432 cp
->c_touch_modtime
= TRUE
;
3433 retval2
= hfs_update(vp
, MNT_WAIT
);
3438 hfs_unlock_truncate(cp
, 0);
3445 * Pagein for HFS filesystem
3448 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
3450 struct vnop_pagein_args {
3453 vm_offset_t a_pl_offset,
3457 vfs_context_t a_context;
3463 struct filefork
*fp
;
3466 upl_page_info_t
*pl
;
3471 boolean_t truncate_lock_held
= FALSE
;
3472 boolean_t file_converted
= FALSE
;
3480 if ((error
= cp_handle_vnop(cp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
)) != 0) {
3483 #endif /* CONFIG_PROTECT */
3485 if (ap
->a_pl
!= NULL
) {
3487 * this can only happen for swap files now that
3488 * we're asking for V2 paging behavior...
3489 * so don't need to worry about decompression, or
3490 * keeping track of blocks read or taking the truncate lock
3492 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
3493 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
3499 * take truncate lock (shared/recursive) to guard against
3500 * zero-fill thru fsync interfering, but only for v2
3502 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
3503 * lock shared and we are allowed to recurse 1 level if this thread already
3504 * owns the lock exclusively... this can legally occur
3505 * if we are doing a shrinking ftruncate against a file
3506 * that is mapped private, and the pages being truncated
3507 * do not currently exist in the cache... in that case
3508 * we will have to page-in the missing pages in order
3509 * to provide them to the private mapping... we must
3510 * also call hfs_unlock_truncate with a postive been_recursed
3511 * arg to indicate that if we have recursed, there is no need to drop
3512 * the lock. Allowing this simple recursion is necessary
3513 * in order to avoid a certain deadlock... since the ftruncate
3514 * already holds the truncate lock exclusively, if we try
3515 * to acquire it shared to protect the pagein path, we will
3518 * NOTE: The if () block below is a workaround in order to prevent a
3519 * VM deadlock. See rdar://7853471.
3521 * If we are in a forced unmount, then launchd will still have the
3522 * dyld_shared_cache file mapped as it is trying to reboot. If we
3523 * take the truncate lock here to service a page fault, then our
3524 * thread could deadlock with the forced-unmount. The forced unmount
3525 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
3526 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
3527 * thread will think it needs to copy all of the data out of the file
3528 * and into a VM copy object. If we hold the cnode lock here, then that
3529 * VM operation will not be able to proceed, because we'll set a busy page
3530 * before attempting to grab the lock. Note that this isn't as simple as "don't
3531 * call ubc_setsize" because doing that would just shift the problem to the
3532 * ubc_msync done before the vnode is reclaimed.
3534 * So, if a forced unmount on this volume is in flight AND the cnode is
3535 * marked C_DELETED, then just go ahead and do the page in without taking
3536 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
3537 * that is not going to be available on the next mount, this seems like a
3538 * OK solution from a correctness point of view, even though it is hacky.
3540 if (vfs_isforce(vp
->v_mount
)) {
3541 if (cp
->c_flag
& C_DELETED
) {
3542 /* If we don't get it, then just go ahead and operate without the lock */
3543 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_RECURSE_TRUNCLOCK
);
3547 hfs_lock_truncate(cp
, HFS_RECURSE_TRUNCLOCK
);
3548 truncate_lock_held
= TRUE
;
3551 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
3553 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
3560 * Scan from the back to find the last page in the UPL, so that we
3561 * aren't looking at a UPL that may have already been freed by the
3562 * preceding aborts/completions.
3564 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
3565 if (upl_page_present(pl
, --pg_index
))
3567 if (pg_index
== 0) {
3569 * no absent pages were found in the range specified
3570 * just abort the UPL to get rid of it and then we're done
3572 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3577 * initialize the offset variables before we touch the UPL.
3578 * f_offset is the position into the file, in bytes
3579 * offset is the position into the UPL, in bytes
3580 * pg_index is the pg# of the UPL we're operating on
3581 * isize is the offset into the UPL of the last page that is present.
3583 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
3586 f_offset
= ap
->a_f_offset
;
3592 if ( !upl_page_present(pl
, pg_index
)) {
3594 * we asked for RET_ONLY_ABSENT, so it's possible
3595 * to get back empty slots in the UPL.
3596 * just skip over them
3598 f_offset
+= PAGE_SIZE
;
3599 offset
+= PAGE_SIZE
;
3606 * We know that we have at least one absent page.
3607 * Now checking to see how many in a row we have
3610 xsize
= isize
- PAGE_SIZE
;
3613 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
3618 xsize
= num_of_pages
* PAGE_SIZE
;
3621 if (VNODE_IS_RSRC(vp
)) {
3622 /* allow pageins of the resource fork */
3624 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
3627 if (truncate_lock_held
) {
3629 * can't hold the truncate lock when calling into the decmpfs layer
3630 * since it calls back into this layer... even though we're only
3631 * holding the lock in shared mode, and the re-entrant path only
3632 * takes the lock shared, we can deadlock if some other thread
3633 * tries to grab the lock exclusively in between.
3635 hfs_unlock_truncate(cp
, 1);
3636 truncate_lock_held
= FALSE
;
3639 ap
->a_pl_offset
= offset
;
3640 ap
->a_f_offset
= f_offset
;
3643 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
3645 * note that decpfs_pagein_compressed can change the state of
3646 * 'compressed'... it will set it to 0 if the file is no longer
3647 * compressed once the compression lock is successfully taken
3648 * i.e. we would block on that lock while the file is being inflated
3652 /* successful page-in, update the access time */
3653 VTOC(vp
)->c_touch_acctime
= TRUE
;
3655 /* compressed files are not hot file candidates */
3656 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3657 fp
->ff_bytesread
= 0;
3659 } else if (error
== EAGAIN
) {
3661 * EAGAIN indicates someone else already holds the compression lock...
3662 * to avoid deadlocking, we'll abort this range of pages with an
3663 * indication that the pagein needs to be redriven
3665 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
3667 goto pagein_next_range
;
3671 * Set file_converted only if the file became decompressed while we were
3672 * paging in. If it were still compressed, we would re-start the loop using the goto
3673 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
3674 * condition below, since we could have avoided taking the truncate lock to prevent
3675 * a deadlock in the force unmount case.
3677 file_converted
= TRUE
;
3680 if (file_converted
== TRUE
) {
3682 * the file was converted back to a regular file after we first saw it as compressed
3683 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
3684 * reset a_size so that we consider what remains of the original request
3685 * and null out a_upl and a_pl_offset.
3687 * We should only be able to get into this block if the decmpfs_pagein_compressed
3688 * successfully decompressed the range in question for this file.
3690 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3694 ap
->a_pl_offset
= 0;
3696 /* Reset file_converted back to false so that we don't infinite-loop. */
3697 file_converted
= FALSE
;
3702 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
3705 * Keep track of blocks read.
3707 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
3709 int took_cnode_lock
= 0;
3711 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
3712 bytesread
= fp
->ff_size
;
3716 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
3717 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
3718 hfs_lock(cp
, HFS_FORCE_LOCK
);
3719 took_cnode_lock
= 1;
3722 * If this file hasn't been seen since the start of
3723 * the current sampling period then start over.
3725 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
3728 fp
->ff_bytesread
= bytesread
;
3730 cp
->c_atime
= tv
.tv_sec
;
3732 fp
->ff_bytesread
+= bytesread
;
3734 cp
->c_touch_acctime
= TRUE
;
3735 if (took_cnode_lock
)
3742 pg_index
+= num_of_pages
;
3748 if (truncate_lock_held
== TRUE
) {
3749 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
3750 hfs_unlock_truncate(cp
, 1);
3757 * Pageout for HFS filesystem.
3760 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
3762 struct vnop_pageout_args {
3765 vm_offset_t a_pl_offset,
3769 vfs_context_t a_context;
3773 vnode_t vp
= ap
->a_vp
;
3775 struct filefork
*fp
;
3779 upl_page_info_t
* pl
;
3780 vm_offset_t a_pl_offset
;
3782 int is_pageoutv2
= 0;
3789 * Figure out where the file ends, for pageout purposes. If
3790 * ff_new_size > ff_size, then we're in the middle of extending the
3791 * file via a write, so it is safe (and necessary) that we be able
3792 * to pageout up to that point.
3794 filesize
= fp
->ff_size
;
3795 if (fp
->ff_new_size
> filesize
)
3796 filesize
= fp
->ff_new_size
;
3798 a_flags
= ap
->a_flags
;
3799 a_pl_offset
= ap
->a_pl_offset
;
3802 * we can tell if we're getting the new or old behavior from the UPL
3804 if ((upl
= ap
->a_pl
) == NULL
) {
3809 * we're in control of any UPL we commit
3810 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
3812 a_flags
&= ~UPL_NOCOMMIT
;
3816 * take truncate lock (shared) to guard against
3817 * zero-fill thru fsync interfering, but only for v2
3819 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
);
3821 if (a_flags
& UPL_MSYNC
) {
3822 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
3825 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
3828 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
3830 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
3836 * from this point forward upl points at the UPL we're working with
3837 * it was either passed in or we succesfully created it
3841 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
3842 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
3843 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
3844 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
3845 * logic in vnode_pageout except that we need to do it after grabbing the truncate
3846 * lock in HFS so that we don't lock invert ourselves.
3848 * Note that we can still get into this function on behalf of the default pager with
3849 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
3850 * since fsync and other writing threads will grab the locks, then mark the
3851 * relevant pages as busy. But the pageout codepath marks the pages as busy,
3852 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
3853 * we do not try to grab anything for the pre-V2 case, which should only be accessed
3854 * by the paging/VM system.
3866 f_offset
= ap
->a_f_offset
;
3869 * Scan from the back to find the last page in the UPL, so that we
3870 * aren't looking at a UPL that may have already been freed by the
3871 * preceding aborts/completions.
3873 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
3874 if (upl_page_present(pl
, --pg_index
))
3876 if (pg_index
== 0) {
3877 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3883 * initialize the offset variables before we touch the UPL.
3884 * a_f_offset is the position into the file, in bytes
3885 * offset is the position into the UPL, in bytes
3886 * pg_index is the pg# of the UPL we're operating on.
3887 * isize is the offset into the UPL of the last non-clean page.
3889 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
3898 if ( !upl_page_present(pl
, pg_index
)) {
3900 * we asked for RET_ONLY_DIRTY, so it's possible
3901 * to get back empty slots in the UPL.
3902 * just skip over them
3904 f_offset
+= PAGE_SIZE
;
3905 offset
+= PAGE_SIZE
;
3911 if ( !upl_dirty_page(pl
, pg_index
)) {
3912 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
3916 * We know that we have at least one dirty page.
3917 * Now checking to see how many in a row we have
3920 xsize
= isize
- PAGE_SIZE
;
3923 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
3928 xsize
= num_of_pages
* PAGE_SIZE
;
3930 if (!vnode_isswap(vp
)) {
3936 if (cp
->c_lockowner
!= current_thread()) {
3937 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3939 * we're in the v2 path, so we are the
3940 * owner of the UPL... we may have already
3941 * processed some of the UPL, so abort it
3942 * from the current working offset to the
3945 ubc_upl_abort_range(upl
,
3947 ap
->a_size
- offset
,
3948 UPL_ABORT_FREE_ON_EMPTY
);
3953 end_of_range
= f_offset
+ xsize
- 1;
3955 if (end_of_range
>= filesize
) {
3956 end_of_range
= (off_t
)(filesize
- 1);
3958 if (f_offset
< filesize
) {
3959 rl_remove(f_offset
, end_of_range
, &fp
->ff_invalidranges
);
3960 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
3966 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
3967 xsize
, filesize
, a_flags
))) {
3974 pg_index
+= num_of_pages
;
3976 /* capture errnos bubbled out of cluster_pageout if they occurred */
3977 if (error_ret
!= 0) {
3980 } /* end block for v2 pageout behavior */
3982 if (!vnode_isswap(vp
)) {
3986 if (cp
->c_lockowner
!= current_thread()) {
3987 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3988 if (!(a_flags
& UPL_NOCOMMIT
)) {
3989 ubc_upl_abort_range(upl
,
3992 UPL_ABORT_FREE_ON_EMPTY
);
3998 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
4000 if (end_of_range
>= filesize
) {
4001 end_of_range
= (off_t
)(filesize
- 1);
4003 if (ap
->a_f_offset
< filesize
) {
4004 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
4005 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
4013 * just call cluster_pageout for old pre-v2 behavior
4015 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
4016 ap
->a_size
, filesize
, a_flags
);
4020 * If data was written, update the modification time of the file.
4021 * If setuid or setgid bits are set and this process is not the
4022 * superuser then clear the setuid and setgid bits as a precaution
4023 * against tampering.
4026 cp
->c_touch_modtime
= TRUE
;
4027 cp
->c_touch_chgtime
= TRUE
;
4028 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
4029 (vfs_context_suser(ap
->a_context
) != 0)) {
4030 hfs_lock(cp
, HFS_FORCE_LOCK
);
4031 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
4038 /* release truncate lock (shared) */
4039 hfs_unlock_truncate(cp
, 0);
4045 * Intercept B-Tree node writes to unswap them if necessary.
4048 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
4051 register struct buf
*bp
= ap
->a_bp
;
4052 register struct vnode
*vp
= buf_vnode(bp
);
4053 BlockDescriptor block
;
4055 /* Trap B-Tree writes */
4056 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
4057 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
4058 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
4059 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
4062 * Swap and validate the node if it is in native byte order.
4063 * This is always be true on big endian, so we always validate
4064 * before writing here. On little endian, the node typically has
4065 * been swapped and validated when it was written to the journal,
4066 * so we won't do anything here.
4068 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
4069 /* Prepare the block pointer */
4070 block
.blockHeader
= bp
;
4071 block
.buffer
= (char *)buf_dataptr(bp
);
4072 block
.blockNum
= buf_lblkno(bp
);
4073 /* not found in cache ==> came from disk */
4074 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
4075 block
.blockSize
= buf_count(bp
);
4077 /* Endian un-swap B-Tree node */
4078 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
4080 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4084 /* This buffer shouldn't be locked anymore but if it is clear it */
4085 if ((buf_flags(bp
) & B_LOCKED
)) {
4087 if (VTOHFS(vp
)->jnl
) {
4088 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
4090 buf_clearflags(bp
, B_LOCKED
);
4092 retval
= vn_bwrite (ap
);
4098 * Relocate a file to a new location on disk
4099 * cnode must be locked on entry
4101 * Relocation occurs by cloning the file's data from its
4102 * current set of blocks to a new set of blocks. During
4103 * the relocation all of the blocks (old and new) are
4104 * owned by the file.
4111 * ----------------- -----------------
4112 * |///////////////| | | STEP 1 (acquire new blocks)
4113 * ----------------- -----------------
4116 * ----------------- -----------------
4117 * |///////////////| |///////////////| STEP 2 (clone data)
4118 * ----------------- -----------------
4122 * |///////////////| STEP 3 (head truncate blocks)
4126 * During steps 2 and 3 page-outs to file offsets less
4127 * than or equal to N are suspended.
4129 * During step 3 page-ins to the file get suspended.
4132 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
4136 struct filefork
*fp
;
4137 struct hfsmount
*hfsmp
;
4142 u_int32_t nextallocsave
;
4143 daddr64_t sector_a
, sector_b
;
4148 int took_trunc_lock
= 0;
4150 enum vtype vnodetype
;
4152 vnodetype
= vnode_vtype(vp
);
4153 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
4158 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
4164 if (fp
->ff_unallocblocks
)
4169 * <rdar://problem/9118426>
4170 * Disable HFS file relocation on content-protected filesystems
4172 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
4177 /* If it's an SSD, also disable HFS relocation */
4178 if (hfsmp
->hfs_flags
& HFS_SSD
) {
4182 blksize
= hfsmp
->blockSize
;
4184 blockHint
= hfsmp
->nextAllocation
;
4186 if ((fp
->ff_size
> 0x7fffffff) ||
4187 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
4192 // We do not believe that this call to hfs_fsync() is
4193 // necessary and it causes a journal transaction
4194 // deadlock so we are removing it.
4196 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4197 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4202 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
4204 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
);
4205 /* Force lock since callers expects lock to be held. */
4206 if ((retval
= hfs_lock(cp
, HFS_FORCE_LOCK
))) {
4207 hfs_unlock_truncate(cp
, 0);
4210 /* No need to continue if file was removed. */
4211 if (cp
->c_flag
& C_NOEXISTS
) {
4212 hfs_unlock_truncate(cp
, 0);
4215 took_trunc_lock
= 1;
4217 headblks
= fp
->ff_blocks
;
4218 datablks
= howmany(fp
->ff_size
, blksize
);
4219 growsize
= datablks
* blksize
;
4220 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
4221 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
4222 blockHint
<= hfsmp
->hfs_metazone_end
)
4223 eflags
|= kEFMetadataMask
;
4225 if (hfs_start_transaction(hfsmp
) != 0) {
4226 if (took_trunc_lock
)
4227 hfs_unlock_truncate(cp
, 0);
4232 * Protect the extents b-tree and the allocation bitmap
4233 * during MapFileBlockC and ExtendFileC operations.
4235 lockflags
= SFL_BITMAP
;
4236 if (overflow_extents(fp
))
4237 lockflags
|= SFL_EXTENTS
;
4238 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4240 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
4242 retval
= MacToVFSError(retval
);
4247 * STEP 1 - acquire new allocation blocks.
4249 nextallocsave
= hfsmp
->nextAllocation
;
4250 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
4251 if (eflags
& kEFMetadataMask
) {
4252 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
4253 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
4254 MarkVCBDirty(hfsmp
);
4255 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
4258 retval
= MacToVFSError(retval
);
4260 cp
->c_flag
|= C_MODIFIED
;
4261 if (newbytes
< growsize
) {
4264 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
4265 printf("hfs_relocate: allocation failed");
4270 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
4272 retval
= MacToVFSError(retval
);
4273 } else if ((sector_a
+ 1) == sector_b
) {
4276 } else if ((eflags
& kEFMetadataMask
) &&
4277 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
4278 hfsmp
->hfs_metazone_end
)) {
4280 const char * filestr
;
4281 char emptystr
= '\0';
4283 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
4284 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
4285 } else if (vnode_name(vp
) != NULL
) {
4286 filestr
= vnode_name(vp
);
4288 filestr
= &emptystr
;
4295 /* Done with system locks and journal for now. */
4296 hfs_systemfile_unlock(hfsmp
, lockflags
);
4298 hfs_end_transaction(hfsmp
);
4303 * Check to see if failure is due to excessive fragmentation.
4305 if ((retval
== ENOSPC
) &&
4306 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
4307 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
4312 * STEP 2 - clone file data into the new allocation blocks.
4315 if (vnodetype
== VLNK
)
4316 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
4317 else if (vnode_issystem(vp
))
4318 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
4320 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
4322 /* Start transaction for step 3 or for a restore. */
4323 if (hfs_start_transaction(hfsmp
) != 0) {
4332 * STEP 3 - switch to cloned data and remove old blocks.
4334 lockflags
= SFL_BITMAP
;
4335 if (overflow_extents(fp
))
4336 lockflags
|= SFL_EXTENTS
;
4337 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4339 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
4341 hfs_systemfile_unlock(hfsmp
, lockflags
);
4346 if (took_trunc_lock
)
4347 hfs_unlock_truncate(cp
, 0);
4350 hfs_systemfile_unlock(hfsmp
, lockflags
);
4354 /* Push cnode's new extent data to disk. */
4356 (void) hfs_update(vp
, MNT_WAIT
);
4359 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
4360 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
4362 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
4366 hfs_end_transaction(hfsmp
);
4371 if (fp
->ff_blocks
== headblks
) {
4372 if (took_trunc_lock
)
4373 hfs_unlock_truncate(cp
, 0);
4377 * Give back any newly allocated space.
4379 if (lockflags
== 0) {
4380 lockflags
= SFL_BITMAP
;
4381 if (overflow_extents(fp
))
4382 lockflags
|= SFL_EXTENTS
;
4383 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4386 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
4387 FTOC(fp
)->c_fileid
, false);
4389 hfs_systemfile_unlock(hfsmp
, lockflags
);
4392 if (took_trunc_lock
)
4393 hfs_unlock_truncate(cp
, 0);
4403 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, __unused
struct proc
*p
)
4405 struct buf
*head_bp
= NULL
;
4406 struct buf
*tail_bp
= NULL
;
4410 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
4414 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
4415 if (tail_bp
== NULL
) {
4419 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
4420 error
= (int)buf_bwrite(tail_bp
);
4423 buf_markinvalid(head_bp
);
4424 buf_brelse(head_bp
);
4426 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
4432 * Clone a file's data within the file.
4436 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
4447 writebase
= blkstart
* blksize
;
4448 copysize
= blkcnt
* blksize
;
4449 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
4452 hfs_unlock(VTOC(vp
));
4455 if ((error
= cp_handle_vnop(VTOC(vp
), CP_WRITE_ACCESS
)) != 0) {
4456 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4459 #endif /* CONFIG_PROTECT */
4461 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
4462 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4466 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
4468 while (offset
< copysize
) {
4469 iosize
= MIN(copysize
- offset
, iosize
);
4471 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
4472 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
4474 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
4476 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
4479 if (uio_resid(auio
) != 0) {
4480 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
4485 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
4486 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
4488 error
= cluster_write(vp
, auio
, writebase
+ offset
,
4489 writebase
+ offset
+ iosize
,
4490 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
4492 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
4495 if (uio_resid(auio
) != 0) {
4496 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4504 if ((blksize
& PAGE_MASK
)) {
4506 * since the copy may not have started on a PAGE
4507 * boundary (or may not have ended on one), we
4508 * may have pages left in the cache since NOCACHE
4509 * will let partially written pages linger...
4510 * lets just flush the entire range to make sure
4511 * we don't have any pages left that are beyond
4512 * (or intersect) the real LEOF of this file
4514 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
4517 * No need to call ubc_sync_range or hfs_invalbuf
4518 * since the file was copied using IO_NOCACHE and
4519 * the copy was done starting and ending on a page
4520 * boundary in the file.
4523 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4525 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4530 * Clone a system (metadata) file.
4534 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
4535 kauth_cred_t cred
, struct proc
*p
)
4541 struct buf
*bp
= NULL
;
4544 daddr64_t start_blk
;
4551 iosize
= GetLogicalBlockSize(vp
);
4552 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
4553 breadcnt
= bufsize
/ iosize
;
4555 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
4558 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
4559 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
4562 while (blkno
< last_blk
) {
4564 * Read up to a megabyte
4567 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
4568 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
4570 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
4573 if (buf_count(bp
) != iosize
) {
4574 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
4577 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
4579 buf_markinvalid(bp
);
4587 * Write up to a megabyte
4590 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
4591 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
4593 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
4597 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
4598 error
= (int)buf_bwrite(bp
);
4610 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4612 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);