2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
46 #include <sys/vfs_context.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
51 #include <sys/fsevents.h>
52 #include <uuid/uuid.h>
54 #include <libkern/OSDebug.h>
56 #include <miscfs/specfs/specdev.h>
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
63 #include <IOKit/IOBSD.h>
65 #include <sys/kdebug.h>
68 #include "hfs_attrlist.h"
69 #include "hfs_endian.h"
70 #include "hfs_fsctl.h"
71 #include "hfs_quota.h"
72 #include "FileMgrInternal.h"
73 #include "BTreesInternal.h"
74 #include "hfs_cnode.h"
77 #if HFS_CONFIG_KEY_ROLL
78 #include "hfs_key_roll.h"
81 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
84 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
87 /* from bsd/hfs/hfs_vfsops.c */
88 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
90 /* from hfs_hotfiles.c */
91 extern int hfs_pin_overflow_extents (struct hfsmount
*hfsmp
, uint32_t fileid
,
92 uint8_t forktype
, uint32_t *pinned
);
94 static int hfs_clonefile(struct vnode
*, int, int, int);
95 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
96 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
100 * Read data from a file.
103 hfs_vnop_read(struct vnop_read_args
*ap
)
106 struct vnop_read_args {
107 struct vnodeop_desc *a_desc;
111 vfs_context_t a_context;
115 uio_t uio
= ap
->a_uio
;
116 struct vnode
*vp
= ap
->a_vp
;
119 struct hfsmount
*hfsmp
;
122 off_t start_resid
= uio_resid(uio
);
123 off_t offset
= uio_offset(uio
);
125 int took_truncate_lock
= 0;
127 int throttled_count
= 0;
129 /* Preflight checks */
130 if (!vnode_isreg(vp
)) {
131 /* can only read regular files */
137 if (start_resid
== 0)
138 return (0); /* Nothing left to do */
140 return (EINVAL
); /* cant read from a negative offset */
143 if ((ap
->a_ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
144 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
145 /* Don't allow unencrypted io request from user space */
151 if (VNODE_IS_RSRC(vp
)) {
152 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
155 /* otherwise read the resource fork normally */
157 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
159 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
160 if (retval
== 0 && !(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
161 (void) hfs_addhotfile(vp
);
165 /* successful read, update the access time */
166 VTOC(vp
)->c_touch_acctime
= TRUE
;
169 // compressed files are not traditional hot file candidates
170 // but they may be for CF (which ignores the ff_bytesread
173 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
174 VTOF(vp
)->ff_bytesread
= 0;
179 /* otherwise the file was converted back to a regular file while we were reading it */
181 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
184 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
191 #endif /* HFS_COMPRESSION */
198 if ((retval
= cp_handle_vnop (vp
, CP_READ_ACCESS
, ap
->a_ioflag
)) != 0) {
202 #if HFS_CONFIG_KEY_ROLL
203 if (ISSET(ap
->a_ioflag
, IO_ENCRYPTED
)) {
204 off_rsrc_t off_rsrc
= off_rsrc_make(offset
+ start_resid
,
207 retval
= hfs_key_roll_up_to(ap
->a_context
, vp
, off_rsrc
);
211 #endif // HFS_CONFIG_KEY_ROLL
212 #endif // CONFIG_PROTECT
215 * If this read request originated from a syscall (as opposed to
216 * an in-kernel page fault or something), then set it up for
219 if (ap
->a_ioflag
& IO_SYSCALL_DISPATCH
) {
220 io_throttle
= IO_RETURN_ON_THROTTLE
;
225 /* Protect against a size change. */
226 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
227 took_truncate_lock
= 1;
229 filesize
= fp
->ff_size
;
230 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
233 * Check the file size. Note that per POSIX spec, we return 0 at
234 * file EOF, so attempting a read at an offset that is too big
235 * should just return 0 on HFS+. Since the return value was initialized
236 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
238 if (offset
> filesize
) {
240 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
241 (offset
> (off_t
)MAXHFSFILESIZE
)) {
248 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_START
,
249 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
251 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
|io_throttle
);
253 cp
->c_touch_acctime
= TRUE
;
255 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_END
,
256 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
259 * Keep track blocks read
261 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
262 int took_cnode_lock
= 0;
265 bytesread
= start_resid
- uio_resid(uio
);
267 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
268 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
269 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
273 * If this file hasn't been seen since the start of
274 * the current sampling period then start over.
276 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
279 fp
->ff_bytesread
= bytesread
;
281 cp
->c_atime
= tv
.tv_sec
;
283 fp
->ff_bytesread
+= bytesread
;
286 if (!(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
288 // We don't add hotfiles for processes doing IO_EVTONLY I/O
289 // on the assumption that they're system processes such as
290 // mdworker which scan everything in the system (and thus
291 // do not represent user-initiated access to files)
293 (void) hfs_addhotfile(vp
);
299 if (took_truncate_lock
) {
300 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
302 if (retval
== EAGAIN
) {
303 throttle_lowpri_io(1);
310 throttle_info_reset_window(NULL
);
315 * Ideally, this wouldn't be necessary; the cluster code should be
316 * able to handle this on the read-side. See <rdar://20420068>.
318 static errno_t
hfs_zero_eof_page(vnode_t vp
, off_t zero_up_to
)
320 hfs_assert(VTOC(vp
)->c_lockowner
!= current_thread());
321 hfs_assert(VTOC(vp
)->c_truncatelockowner
== current_thread());
323 struct filefork
*fp
= VTOF(vp
);
325 if (!(fp
->ff_size
& PAGE_MASK_64
) || zero_up_to
<= fp
->ff_size
) {
330 zero_up_to
= MIN(zero_up_to
, (off_t
)round_page_64(fp
->ff_size
));
332 /* N.B. At present, @zero_up_to is not important because the cluster
333 code will always zero up to the end of the page anyway. */
334 return cluster_write(vp
, NULL
, fp
->ff_size
, zero_up_to
,
335 fp
->ff_size
, 0, IO_HEADZEROFILL
);
339 * Write data to a file.
342 hfs_vnop_write(struct vnop_write_args
*ap
)
344 uio_t uio
= ap
->a_uio
;
345 struct vnode
*vp
= ap
->a_vp
;
348 struct hfsmount
*hfsmp
;
349 kauth_cred_t cred
= NULL
;
352 off_t bytesToAdd
= 0;
353 off_t actualBytesAdded
;
358 int ioflag
= ap
->a_ioflag
;
361 int cnode_locked
= 0;
362 int partialwrite
= 0;
364 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
365 int took_truncate_lock
= 0;
366 int io_return_on_throttle
= 0;
367 int throttled_count
= 0;
370 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
371 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
373 case FILE_IS_COMPRESSED
:
375 case FILE_IS_CONVERTING
:
376 /* if FILE_IS_CONVERTING, we allow writes but do not
377 bother with snapshots or else we will deadlock.
382 printf("invalid state %d for compressed file\n", state
);
385 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
388 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
395 nspace_snapshot_event(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
401 if ((ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
402 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
403 /* Don't allow unencrypted io request from user space */
408 resid
= uio_resid(uio
);
409 offset
= uio_offset(uio
);
415 if (!vnode_isreg(vp
))
416 return (EPERM
); /* Can only write regular files */
423 if ((retval
= cp_handle_vnop (vp
, CP_WRITE_ACCESS
, 0)) != 0) {
428 eflags
= kEFDeferMask
; /* defer file block allocations */
431 * When the underlying device is sparse and space
432 * is low (< 8MB), stop doing delayed allocations
433 * and begin doing synchronous I/O.
435 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
436 (hfs_freeblks(hfsmp
, 0) < 2048)) {
437 eflags
&= ~kEFDeferMask
;
440 #endif /* HFS_SPARSE_DEV */
442 if ((ioflag
& (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) ==
443 (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) {
444 io_return_on_throttle
= IO_RETURN_ON_THROTTLE
;
449 * Protect against a size change.
451 * Note: If took_truncate_lock is true, then we previously got the lock shared
452 * but needed to upgrade to exclusive. So try getting it exclusive from the
455 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
456 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
459 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
461 took_truncate_lock
= 1;
464 if (ioflag
& IO_APPEND
) {
465 uio_setoffset(uio
, fp
->ff_size
);
466 offset
= fp
->ff_size
;
468 if ((cp
->c_bsdflags
& APPEND
) && offset
!= fp
->ff_size
) {
473 cred
= vfs_context_ucred(ap
->a_context
);
474 if (cred
&& suser(cred
, NULL
) != 0)
475 eflags
|= kEFReserveMask
;
477 origFileSize
= fp
->ff_size
;
478 writelimit
= offset
+ resid
;
481 * We may need an exclusive truncate lock for several reasons, all
482 * of which are because we may be writing to a (portion of a) block
483 * for the first time, and we need to make sure no readers see the
484 * prior, uninitialized contents of the block. The cases are:
486 * 1. We have unallocated (delayed allocation) blocks. We may be
487 * allocating new blocks to the file and writing to them.
488 * (A more precise check would be whether the range we're writing
489 * to contains delayed allocation blocks.)
490 * 2. We need to extend the file. The bytes between the old EOF
491 * and the new EOF are not yet initialized. This is important
492 * even if we're not allocating new blocks to the file. If the
493 * old EOF and new EOF are in the same block, we still need to
494 * protect that range of bytes until they are written for the
497 * If we had a shared lock with the above cases, we need to try to upgrade
498 * to an exclusive lock. If the upgrade fails, we will lose the shared
499 * lock, and will need to take the truncate lock again; the took_truncate_lock
500 * flag will still be set, causing us to try for an exclusive lock next time.
502 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
503 ((fp
->ff_unallocblocks
!= 0) ||
504 (writelimit
> origFileSize
))) {
505 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
507 * Lock upgrade failed and we lost our shared lock, try again.
508 * Note: we do not set took_truncate_lock=0 here. Leaving it
509 * set to 1 will cause us to try to get the lock exclusive.
514 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
515 cp
->c_truncatelockowner
= current_thread();
519 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
524 filebytes
= hfs_blk_to_bytes(fp
->ff_blocks
, hfsmp
->blockSize
);
526 if (offset
> filebytes
527 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp
, ISSET(eflags
, kEFReserveMask
)),
528 hfsmp
->blockSize
) < offset
- filebytes
)) {
533 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_START
,
534 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
537 /* Check if we do not need to extend the file */
538 if (writelimit
<= filebytes
) {
542 bytesToAdd
= writelimit
- filebytes
;
545 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
551 if (hfs_start_transaction(hfsmp
) != 0) {
556 while (writelimit
> filebytes
) {
557 bytesToAdd
= writelimit
- filebytes
;
559 /* Protect extents b-tree and allocation bitmap */
560 lockflags
= SFL_BITMAP
;
561 if (overflow_extents(fp
))
562 lockflags
|= SFL_EXTENTS
;
563 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
565 /* Files that are changing size are not hot file candidates. */
566 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
567 fp
->ff_bytesread
= 0;
569 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
570 0, eflags
, &actualBytesAdded
));
572 hfs_systemfile_unlock(hfsmp
, lockflags
);
574 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
576 if (retval
!= E_NONE
)
578 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
579 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_NONE
,
580 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
582 (void) hfs_update(vp
, 0);
583 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
584 (void) hfs_end_transaction(hfsmp
);
587 * If we didn't grow the file enough try a partial write.
588 * POSIX expects this behavior.
590 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
593 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
595 writelimit
= filebytes
;
598 if (retval
== E_NONE
) {
603 if (writelimit
> fp
->ff_size
) {
604 filesize
= writelimit
;
606 rl_add(fp
->ff_size
, writelimit
- 1 , &fp
->ff_invalidranges
);
608 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
610 filesize
= fp
->ff_size
;
612 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
615 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
616 * for one case below). For the regions that lie before the
617 * beginning and after the end of this write that are in the
618 * same page, we let the cluster code handle zeroing that out
619 * if necessary. If those areas are not cached, the cluster
620 * code will try and read those areas in, and in the case
621 * where those regions have never been written to,
622 * hfs_vnop_blockmap will consult the invalid ranges and then
623 * indicate that. The cluster code will zero out those areas.
626 head_off
= trunc_page_64(offset
);
628 if (head_off
< offset
&& head_off
>= fp
->ff_size
) {
630 * The first page is beyond current EOF, so as an
631 * optimisation, we can pass IO_HEADZEROFILL.
633 lflag
|= IO_HEADZEROFILL
;
640 * We need to tell UBC the fork's new size BEFORE calling
641 * cluster_write, in case any of the new pages need to be
642 * paged out before cluster_write completes (which does happen
643 * in embedded systems due to extreme memory pressure).
644 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
645 * will be, so that it can pass that on to cluster_pageout, and
646 * allow those pageouts.
648 * We don't update ff_size yet since we don't want pageins to
649 * be able to see uninitialized data between the old and new
650 * EOF, until cluster_write has completed and initialized that
653 * The vnode pager relies on the file size last given to UBC via
654 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
655 * ff_size (whichever is larger). NOTE: ff_new_size is always
656 * zero, unless we are extending the file via write.
658 if (filesize
> fp
->ff_size
) {
659 retval
= hfs_zero_eof_page(vp
, offset
);
662 fp
->ff_new_size
= filesize
;
663 ubc_setsize(vp
, filesize
);
665 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, head_off
,
666 0, lflag
| IO_NOZERODIRTY
| io_return_on_throttle
);
668 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
670 if (retval
== EAGAIN
) {
672 * EAGAIN indicates that we still have I/O to do, but
673 * that we now need to be throttled
675 if (resid
!= uio_resid(uio
)) {
677 * did manage to do some I/O before returning EAGAIN
679 resid
= uio_resid(uio
);
680 offset
= uio_offset(uio
);
682 cp
->c_touch_chgtime
= TRUE
;
683 cp
->c_touch_modtime
= TRUE
;
684 hfs_incr_gencount(cp
);
686 if (filesize
> fp
->ff_size
) {
688 * we called ubc_setsize before the call to
689 * cluster_write... since we only partially
690 * completed the I/O, we need to
691 * re-adjust our idea of the filesize based
694 ubc_setsize(vp
, offset
);
696 fp
->ff_size
= offset
;
700 if (filesize
> origFileSize
) {
701 ubc_setsize(vp
, origFileSize
);
706 if (filesize
> origFileSize
) {
707 fp
->ff_size
= filesize
;
709 /* Files that are changing size are not hot file candidates. */
710 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
711 fp
->ff_bytesread
= 0;
714 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
717 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
721 if (vnode_should_flush_after_write(vp
, ioflag
))
722 hfs_flush(hfsmp
, HFS_FLUSH_CACHE
);
726 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
730 if (resid
> uio_resid(uio
)) {
731 cp
->c_touch_chgtime
= TRUE
;
732 cp
->c_touch_modtime
= TRUE
;
733 hfs_incr_gencount(cp
);
736 * If we successfully wrote any data, and we are not the superuser
737 * we clear the setuid and setgid bits as a precaution against
740 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
741 cred
= vfs_context_ucred(ap
->a_context
);
742 if (cred
&& suser(cred
, NULL
)) {
743 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
748 if (ioflag
& IO_UNIT
) {
749 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
751 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
752 uio_setresid(uio
, resid
);
753 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
755 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
)))
756 retval
= hfs_update(vp
, 0);
758 /* Updating vcbWrCnt doesn't need to be atomic. */
761 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_END
,
762 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
764 if (retval
&& took_truncate_lock
765 && cp
->c_truncatelockowner
== current_thread()) {
767 rl_remove(fp
->ff_size
, RL_INFINITY
, &fp
->ff_invalidranges
);
773 if (took_truncate_lock
) {
774 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
776 if (retval
== EAGAIN
) {
777 throttle_lowpri_io(1);
784 throttle_info_reset_window(NULL
);
788 /* support for the "bulk-access" fcntl */
790 #define CACHE_LEVELS 16
791 #define NUM_CACHE_ENTRIES (64*16)
792 #define PARENT_IDS_FLAG 0x100
794 struct access_cache
{
796 int cachehits
; /* these two for statistics gathering */
798 unsigned int *acache
;
799 unsigned char *haveaccess
;
803 uid_t uid
; /* IN: effective user id */
804 short flags
; /* IN: access requested (i.e. R_OK) */
805 short num_groups
; /* IN: number of groups user belongs to */
806 int num_files
; /* IN: number of files to process */
807 int *file_ids
; /* IN: array of file ids */
808 gid_t
*groups
; /* IN: array of groups */
809 short *access
; /* OUT: access info for each file (0 for 'has access') */
810 } __attribute__((unavailable
)); // this structure is for reference purposes only
812 struct user32_access_t
{
813 uid_t uid
; /* IN: effective user id */
814 short flags
; /* IN: access requested (i.e. R_OK) */
815 short num_groups
; /* IN: number of groups user belongs to */
816 int num_files
; /* IN: number of files to process */
817 user32_addr_t file_ids
; /* IN: array of file ids */
818 user32_addr_t groups
; /* IN: array of groups */
819 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
822 struct user64_access_t
{
823 uid_t uid
; /* IN: effective user id */
824 short flags
; /* IN: access requested (i.e. R_OK) */
825 short num_groups
; /* IN: number of groups user belongs to */
826 int num_files
; /* IN: number of files to process */
827 user64_addr_t file_ids
; /* IN: array of file ids */
828 user64_addr_t groups
; /* IN: array of groups */
829 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
833 // these are the "extended" versions of the above structures
834 // note that it is crucial that they be different sized than
835 // the regular version
836 struct ext_access_t
{
837 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
838 uint32_t num_files
; /* IN: number of files to process */
839 uint32_t map_size
; /* IN: size of the bit map */
840 uint32_t *file_ids
; /* IN: Array of file ids */
841 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
842 short *access
; /* OUT: access info for each file (0 for 'has access') */
843 uint32_t num_parents
; /* future use */
844 cnid_t
*parents
; /* future use */
845 } __attribute__((unavailable
)); // this structure is for reference purposes only
847 struct user32_ext_access_t
{
848 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
849 uint32_t num_files
; /* IN: number of files to process */
850 uint32_t map_size
; /* IN: size of the bit map */
851 user32_addr_t file_ids
; /* IN: Array of file ids */
852 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
853 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
854 uint32_t num_parents
; /* future use */
855 user32_addr_t parents
; /* future use */
858 struct user64_ext_access_t
{
859 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files
; /* IN: number of files to process */
861 uint32_t map_size
; /* IN: size of the bit map */
862 user64_addr_t file_ids
; /* IN: array of file ids */
863 user64_addr_t bitmap
; /* IN: array of groups */
864 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents
;/* future use */
866 user64_addr_t parents
;/* future use */
871 * Perform a binary search for the given parent_id. Return value is
872 * the index if there is a match. If no_match_indexp is non-NULL it
873 * will be assigned with the index to insert the item (even if it was
876 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
882 unsigned int mid
= ((hi
- lo
)/2) + lo
;
883 unsigned int this_id
= array
[mid
];
885 if (parent_id
== this_id
) {
890 if (parent_id
< this_id
) {
895 if (parent_id
> this_id
) {
901 /* check if lo and hi converged on the match */
902 if (parent_id
== array
[hi
]) {
906 if (no_match_indexp
) {
907 *no_match_indexp
= hi
;
915 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
919 int index
, no_match_index
;
921 if (cache
->numcached
== 0) {
923 return 0; // table is empty, so insert at index=0 and report no match
926 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
927 cache
->numcached
= NUM_CACHE_ENTRIES
;
930 hi
= cache
->numcached
- 1;
932 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
934 /* if no existing entry found, find index for new one */
936 index
= no_match_index
;
947 * Add a node to the access_cache at the given index (or do a lookup first
948 * to find the index if -1 is passed in). We currently do a replace rather
949 * than an insert if the cache is full.
952 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
954 int lookup_index
= -1;
956 /* need to do a lookup first if -1 passed for index */
958 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
959 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
960 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
961 cache
->haveaccess
[lookup_index
] = access
;
964 /* mission accomplished */
967 index
= lookup_index
;
972 /* if the cache is full, do a replace rather than an insert */
973 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
974 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
976 if (index
> cache
->numcached
) {
977 index
= cache
->numcached
;
981 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
985 if (index
>= 0 && index
< cache
->numcached
) {
986 /* only do bcopy if we're inserting */
987 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
988 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
991 cache
->acache
[index
] = nodeID
;
992 cache
->haveaccess
[index
] = access
;
1006 snoop_callback(const cnode_t
*cp
, void *arg
)
1008 struct cinfo
*cip
= arg
;
1010 cip
->uid
= cp
->c_uid
;
1011 cip
->gid
= cp
->c_gid
;
1012 cip
->mode
= cp
->c_mode
;
1013 cip
->parentcnid
= cp
->c_parentcnid
;
1014 cip
->recflags
= cp
->c_attr
.ca_recflags
;
1020 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1021 * isn't incore, then go to the catalog.
1024 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
1025 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
1029 /* if this id matches the one the fsctl was called with, skip the lookup */
1030 if (cnid
== skip_cp
->c_cnid
) {
1031 cnattrp
->ca_uid
= skip_cp
->c_uid
;
1032 cnattrp
->ca_gid
= skip_cp
->c_gid
;
1033 cnattrp
->ca_mode
= skip_cp
->c_mode
;
1034 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
1035 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
1037 struct cinfo c_info
;
1039 /* otherwise, check the cnode hash incase the file/dir is incore */
1040 error
= hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
);
1042 if (error
== EACCES
) {
1045 } else if (!error
) {
1046 cnattrp
->ca_uid
= c_info
.uid
;
1047 cnattrp
->ca_gid
= c_info
.gid
;
1048 cnattrp
->ca_mode
= c_info
.mode
;
1049 cnattrp
->ca_recflags
= c_info
.recflags
;
1050 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1054 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp
)))
1055 throttle_lowpri_io(1);
1057 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1059 /* lookup this cnid in the catalog */
1060 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1062 hfs_systemfile_unlock(hfsmp
, lockflags
);
1073 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1074 * up to CACHE_LEVELS as we progress towards the root.
1077 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1078 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1079 struct vfs_context
*my_context
,
1083 uint32_t num_parents
)
1087 HFSCatalogNodeID thisNodeID
;
1088 unsigned int myPerms
;
1089 struct cat_attr cnattr
;
1090 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1093 int i
= 0, ids_to_cache
= 0;
1094 int parent_ids
[CACHE_LEVELS
];
1096 thisNodeID
= nodeID
;
1097 while (thisNodeID
>= kRootDirID
) {
1098 myResult
= 0; /* default to "no access" */
1100 /* check the cache before resorting to hitting the catalog */
1102 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1103 * to look any further after hitting cached dir */
1105 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1107 myErr
= cache
->haveaccess
[cache_index
];
1108 if (scope_index
!= -1) {
1109 if (myErr
== ESRCH
) {
1113 scope_index
= 0; // so we'll just use the cache result
1114 scope_idx_start
= ids_to_cache
;
1116 myResult
= (myErr
== 0) ? 1 : 0;
1117 goto ExitThisRoutine
;
1123 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1124 if (scope_index
== -1)
1126 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1127 scope_idx_start
= ids_to_cache
;
1131 /* remember which parents we want to cache */
1132 if (ids_to_cache
< CACHE_LEVELS
) {
1133 parent_ids
[ids_to_cache
] = thisNodeID
;
1136 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1137 if (bitmap
&& map_size
) {
1138 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1142 /* do the lookup (checks the cnode hash, then the catalog) */
1143 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1145 goto ExitThisRoutine
; /* no access */
1148 /* Root always gets access. */
1149 if (suser(myp_ucred
, NULL
) == 0) {
1150 thisNodeID
= catkey
.hfsPlus
.parentID
;
1155 // if the thing has acl's, do the full permission check
1156 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1159 /* get the vnode for this cnid */
1160 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1163 goto ExitThisRoutine
;
1166 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1168 hfs_unlock(VTOC(vp
));
1170 if (vnode_vtype(vp
) == VDIR
) {
1171 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1173 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1179 goto ExitThisRoutine
;
1183 int mode
= cnattr
.ca_mode
& S_IFMT
;
1184 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1186 if (mode
== S_IFDIR
) {
1187 flags
= R_OK
| X_OK
;
1191 if ( (myPerms
& flags
) != flags
) {
1194 goto ExitThisRoutine
; /* no access */
1197 /* up the hierarchy we go */
1198 thisNodeID
= catkey
.hfsPlus
.parentID
;
1202 /* if here, we have access to this node */
1206 if (parents
&& myErr
== 0 && scope_index
== -1) {
1215 /* cache the parent directory(ies) */
1216 for (i
= 0; i
< ids_to_cache
; i
++) {
1217 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1218 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1220 add_node(cache
, -1, parent_ids
[i
], myErr
);
1228 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1229 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1234 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1235 * happens to be in our list of file_ids, we'll note it
1236 * avoid calling hfs_chashget_nowait() on that id as that
1237 * will cause a "locking against myself" panic.
1239 Boolean check_leaf
= true;
1241 struct user64_ext_access_t
*user_access_structp
;
1242 struct user64_ext_access_t tmp_user_access
;
1243 struct access_cache cache
;
1245 int error
= 0, prev_parent_check_ok
=1;
1249 unsigned int num_files
= 0;
1251 int num_parents
= 0;
1255 cnid_t
*parents
=NULL
;
1259 cnid_t prevParent_cnid
= 0;
1260 unsigned int myPerms
;
1262 struct cat_attr cnattr
;
1264 struct cnode
*skip_cp
= VTOC(vp
);
1265 kauth_cred_t cred
= vfs_context_ucred(context
);
1266 proc_t p
= vfs_context_proc(context
);
1268 is64bit
= proc_is64bit(p
);
1270 /* initialize the local cache and buffers */
1271 cache
.numcached
= 0;
1272 cache
.cachehits
= 0;
1274 cache
.acache
= NULL
;
1275 cache
.haveaccess
= NULL
;
1277 /* struct copyin done during dispatch... need to copy file_id array separately */
1278 if (ap
->a_data
== NULL
) {
1280 goto err_exit_bulk_access
;
1284 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1286 goto err_exit_bulk_access
;
1289 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1291 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1292 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1294 // convert an old style bulk-access struct to the new style
1295 tmp_user_access
.flags
= accessp
->flags
;
1296 tmp_user_access
.num_files
= accessp
->num_files
;
1297 tmp_user_access
.map_size
= 0;
1298 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1299 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1300 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1301 tmp_user_access
.num_parents
= 0;
1302 user_access_structp
= &tmp_user_access
;
1304 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1305 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1307 // up-cast from a 32-bit version of the struct
1308 tmp_user_access
.flags
= accessp
->flags
;
1309 tmp_user_access
.num_files
= accessp
->num_files
;
1310 tmp_user_access
.map_size
= accessp
->map_size
;
1311 tmp_user_access
.num_parents
= accessp
->num_parents
;
1313 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1314 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1315 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1316 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1318 user_access_structp
= &tmp_user_access
;
1321 goto err_exit_bulk_access
;
1324 map_size
= user_access_structp
->map_size
;
1326 num_files
= user_access_structp
->num_files
;
1328 num_parents
= user_access_structp
->num_parents
;
1330 if (num_files
< 1) {
1331 goto err_exit_bulk_access
;
1333 if (num_files
> 1024) {
1335 goto err_exit_bulk_access
;
1338 if (num_parents
> 1024) {
1340 goto err_exit_bulk_access
;
1343 file_ids
= hfs_malloc(sizeof(int) * num_files
);
1344 access
= hfs_malloc(sizeof(short) * num_files
);
1346 bitmap
= hfs_mallocz(sizeof(char) * map_size
);
1350 parents
= hfs_malloc(sizeof(cnid_t
) * num_parents
);
1353 cache
.acache
= hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1354 cache
.haveaccess
= hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1356 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1357 num_files
* sizeof(int)))) {
1358 goto err_exit_bulk_access
;
1362 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1363 num_parents
* sizeof(cnid_t
)))) {
1364 goto err_exit_bulk_access
;
1368 flags
= user_access_structp
->flags
;
1369 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1373 /* check if we've been passed leaf node ids or parent ids */
1374 if (flags
& PARENT_IDS_FLAG
) {
1378 /* Check access to each file_id passed in */
1379 for (i
= 0; i
< num_files
; i
++) {
1381 cnid
= (cnid_t
) file_ids
[i
];
1383 /* root always has access */
1384 if ((!parents
) && (!suser(cred
, NULL
))) {
1390 /* do the lookup (checks the cnode hash, then the catalog) */
1391 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1393 access
[i
] = (short) error
;
1398 // Check if the leaf matches one of the parent scopes
1399 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1400 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1401 prev_parent_check_ok
= 0;
1402 else if (leaf_index
>= 0)
1403 prev_parent_check_ok
= 1;
1406 // if the thing has acl's, do the full permission check
1407 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1410 /* get the vnode for this cnid */
1411 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1417 hfs_unlock(VTOC(cvp
));
1419 if (vnode_vtype(cvp
) == VDIR
) {
1420 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1422 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1431 /* before calling CheckAccess(), check the target file for read access */
1432 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1433 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1435 /* fail fast if no access */
1436 if ((myPerms
& flags
) == 0) {
1442 /* we were passed an array of parent ids */
1443 catkey
.hfsPlus
.parentID
= cnid
;
1446 /* if the last guy had the same parent and had access, we're done */
1447 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1453 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1454 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1456 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1457 access
[i
] = 0; // have access.. no errors to report
1459 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1462 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1465 /* copyout the access array */
1466 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1467 num_files
* sizeof (short)))) {
1468 goto err_exit_bulk_access
;
1470 if (map_size
&& bitmap
) {
1471 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1472 map_size
* sizeof (char)))) {
1473 goto err_exit_bulk_access
;
1478 err_exit_bulk_access
:
1480 hfs_free(file_ids
, sizeof(int) * num_files
);
1481 hfs_free(parents
, sizeof(cnid_t
) * num_parents
);
1482 hfs_free(bitmap
, sizeof(char) * map_size
);
1483 hfs_free(access
, sizeof(short) * num_files
);
1484 hfs_free(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1485 hfs_free(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1491 /* end "bulk-access" support */
1495 * Control filesystem operating characteristics.
1498 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1503 vfs_context_t a_context;
1506 struct vnode
* vp
= ap
->a_vp
;
1507 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1508 vfs_context_t context
= ap
->a_context
;
1509 kauth_cred_t cred
= vfs_context_ucred(context
);
1510 proc_t p
= vfs_context_proc(context
);
1511 struct vfsstatfs
*vfsp
;
1513 off_t jnl_start
, jnl_size
;
1514 struct hfs_journal_info
*jip
;
1517 off_t uncompressed_size
= -1;
1518 int decmpfs_error
= 0;
1520 if (ap
->a_command
== F_RDADVISE
) {
1521 /* we need to inspect the decmpfs state of the file as early as possible */
1522 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1524 if (VNODE_IS_RSRC(vp
)) {
1525 /* if this is the resource fork, treat it as if it were empty */
1526 uncompressed_size
= 0;
1528 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1529 if (decmpfs_error
!= 0) {
1530 /* failed to get the uncompressed size, we'll check for this later */
1531 uncompressed_size
= -1;
1536 #endif /* HFS_COMPRESSION */
1538 is64bit
= proc_is64bit(p
);
1541 #if HFS_CONFIG_KEY_ROLL
1542 // The HFSIOC_KEY_ROLL fsctl does its own access checks
1543 if (ap
->a_command
!= HFSIOC_KEY_ROLL
)
1547 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
1551 #endif /* CONFIG_PROTECT */
1553 switch (ap
->a_command
) {
1555 case HFSIOC_GETPATH
:
1557 struct vnode
*file_vp
;
1564 /* Caller must be owner of file system. */
1565 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1566 if (suser(cred
, NULL
) &&
1567 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1570 /* Target vnode must be file system's root. */
1571 if (!vnode_isvroot(vp
)) {
1574 bufptr
= (char *)ap
->a_data
;
1575 cnid
= strtoul(bufptr
, NULL
, 10);
1576 if (ap
->a_fflag
& HFS_GETPATH_VOLUME_RELATIVE
) {
1577 flags
|= BUILDPATH_VOLUME_RELATIVE
;
1580 /* We need to call hfs_vfs_vget to leverage the code that will
1581 * fix the origin list for us if needed, as opposed to calling
1582 * hfs_vget, since we will need the parent for build_path call.
1585 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1589 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, flags
, context
);
1595 case HFSIOC_SET_MAX_DEFRAG_SIZE
:
1597 int error
= 0; /* Assume success */
1598 u_int32_t maxsize
= 0;
1600 if (vnode_vfsisrdonly(vp
)) {
1603 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1604 if (!kauth_cred_issuser(cred
)) {
1605 return (EACCES
); /* must be root */
1608 maxsize
= *(u_int32_t
*)ap
->a_data
;
1610 hfs_lock_mount(hfsmp
);
1611 if (maxsize
> HFS_MAX_DEFRAG_SIZE
) {
1615 hfsmp
->hfs_defrag_max
= maxsize
;
1617 hfs_unlock_mount(hfsmp
);
1622 case HFSIOC_FORCE_ENABLE_DEFRAG
:
1624 int error
= 0; /* Assume success */
1625 u_int32_t do_enable
= 0;
1627 if (vnode_vfsisrdonly(vp
)) {
1630 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1631 if (!kauth_cred_issuser(cred
)) {
1632 return (EACCES
); /* must be root */
1635 do_enable
= *(u_int32_t
*)ap
->a_data
;
1637 hfs_lock_mount(hfsmp
);
1638 if (do_enable
!= 0) {
1639 hfsmp
->hfs_defrag_nowait
= 1;
1645 hfs_unlock_mount(hfsmp
);
1651 case HFSIOC_TRANSFER_DOCUMENT_ID
:
1653 struct cnode
*cp
= NULL
;
1655 u_int32_t to_fd
= *(u_int32_t
*)ap
->a_data
;
1656 struct fileproc
*to_fp
;
1657 struct vnode
*to_vp
;
1658 struct cnode
*to_cp
;
1662 if ((error
= fp_getfvp(p
, to_fd
, &to_fp
, &to_vp
)) != 0) {
1663 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1666 if ( (error
= vnode_getwithref(to_vp
)) ) {
1671 if (VTOHFS(to_vp
) != hfsmp
) {
1673 goto transfer_cleanup
;
1676 int need_unlock
= 1;
1677 to_cp
= VTOC(to_vp
);
1678 error
= hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1680 //printf("could not lock the pair of cnodes (error %d)\n", error);
1681 goto transfer_cleanup
;
1684 if (!(cp
->c_bsdflags
& UF_TRACKED
)) {
1686 } else if (to_cp
->c_bsdflags
& UF_TRACKED
) {
1688 // if the destination is already tracked, return an error
1689 // as otherwise it's a silent deletion of the target's
1693 } else if (S_ISDIR(cp
->c_attr
.ca_mode
) || S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
1695 // we can use the FndrExtendedFileInfo because the doc-id is the first
1696 // thing in both it and the ExtendedDirInfo struct which is fixed in
1697 // format and can not change layout
1699 struct FndrExtendedFileInfo
*f_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)cp
->c_finderinfo
+ 16);
1700 struct FndrExtendedFileInfo
*to_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)to_cp
->c_finderinfo
+ 16);
1702 if (f_extinfo
->document_id
== 0) {
1705 hfs_unlockpair(cp
, to_cp
); // have to unlock to be able to get a new-id
1707 if ((error
= hfs_generate_document_id(hfsmp
, &new_id
)) == 0) {
1709 // re-lock the pair now that we have the document-id
1711 hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1712 f_extinfo
->document_id
= new_id
;
1714 goto transfer_cleanup
;
1718 to_extinfo
->document_id
= f_extinfo
->document_id
;
1719 f_extinfo
->document_id
= 0;
1720 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1722 // make sure the destination is also UF_TRACKED
1723 to_cp
->c_bsdflags
|= UF_TRACKED
;
1724 cp
->c_bsdflags
&= ~UF_TRACKED
;
1726 // mark the cnodes dirty
1727 cp
->c_flag
|= C_MODIFIED
;
1728 to_cp
->c_flag
|= C_MODIFIED
;
1731 if ((error
= hfs_start_transaction(hfsmp
)) == 0) {
1733 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1735 (void) cat_update(hfsmp
, &cp
->c_desc
, &cp
->c_attr
, NULL
, NULL
);
1736 (void) cat_update(hfsmp
, &to_cp
->c_desc
, &to_cp
->c_attr
, NULL
, NULL
);
1738 hfs_systemfile_unlock (hfsmp
, lockflags
);
1739 (void) hfs_end_transaction(hfsmp
);
1742 add_fsevent(FSE_DOCID_CHANGED
, context
,
1743 FSE_ARG_DEV
, hfsmp
->hfs_raw_dev
,
1744 FSE_ARG_INO
, (ino64_t
)cp
->c_fileid
, // src inode #
1745 FSE_ARG_INO
, (ino64_t
)to_cp
->c_fileid
, // dst inode #
1746 FSE_ARG_INT32
, to_extinfo
->document_id
,
1749 hfs_unlockpair(cp
, to_cp
); // unlock this so we can send the fsevents
1752 if (need_fsevent(FSE_STAT_CHANGED
, vp
)) {
1753 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
1755 if (need_fsevent(FSE_STAT_CHANGED
, to_vp
)) {
1756 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, to_vp
, FSE_ARG_DONE
);
1761 hfs_unlockpair(cp
, to_cp
);
1773 case HFSIOC_PREV_LINK
:
1774 case HFSIOC_NEXT_LINK
:
1781 /* Caller must be owner of file system. */
1782 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1783 if (suser(cred
, NULL
) &&
1784 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1787 /* Target vnode must be file system's root. */
1788 if (!vnode_isvroot(vp
)) {
1791 linkfileid
= *(cnid_t
*)ap
->a_data
;
1792 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1795 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1798 if (ap
->a_command
== HFSIOC_NEXT_LINK
) {
1799 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1801 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1806 case HFSIOC_RESIZE_PROGRESS
: {
1808 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1809 if (suser(cred
, NULL
) &&
1810 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1811 return (EACCES
); /* must be owner of file system */
1813 if (!vnode_isvroot(vp
)) {
1816 /* file system must not be mounted read-only */
1817 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1821 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1824 case HFSIOC_RESIZE_VOLUME
: {
1829 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1830 if (suser(cred
, NULL
) &&
1831 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1832 return (EACCES
); /* must be owner of file system */
1834 if (!vnode_isvroot(vp
)) {
1838 /* filesystem must not be mounted read only */
1839 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1842 newsize
= *(u_int64_t
*)ap
->a_data
;
1843 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1845 if (newsize
== cursize
) {
1848 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeWillResize
);
1849 if (newsize
> cursize
) {
1850 ret
= hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1852 ret
= hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1854 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeDidResize
);
1857 case HFSIOC_CHANGE_NEXT_ALLOCATION
: {
1858 int error
= 0; /* Assume success */
1861 if (vnode_vfsisrdonly(vp
)) {
1864 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1865 if (suser(cred
, NULL
) &&
1866 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1867 return (EACCES
); /* must be owner of file system */
1869 if (!vnode_isvroot(vp
)) {
1872 hfs_lock_mount(hfsmp
);
1873 location
= *(u_int32_t
*)ap
->a_data
;
1874 if ((location
>= hfsmp
->allocLimit
) &&
1875 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1877 goto fail_change_next_allocation
;
1879 /* Return previous value. */
1880 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1881 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1882 /* On magic value for location, set nextAllocation to next block
1883 * after metadata zone and set flag in mount structure to indicate
1884 * that nextAllocation should not be updated again.
1886 if (hfsmp
->hfs_metazone_end
!= 0) {
1887 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1889 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1891 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1892 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1894 MarkVCBDirty(hfsmp
);
1895 fail_change_next_allocation
:
1896 hfs_unlock_mount(hfsmp
);
1901 case HFSIOC_SETBACKINGSTOREINFO
: {
1902 struct vnode
* di_vp
;
1903 struct hfs_backingstoreinfo
*bsdata
;
1906 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1909 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1912 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1913 if (suser(cred
, NULL
) &&
1914 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1915 return (EACCES
); /* must be owner of file system */
1917 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1918 if (bsdata
== NULL
) {
1921 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1924 if ((error
= vnode_getwithref(di_vp
))) {
1925 file_drop(bsdata
->backingfd
);
1929 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1930 (void)vnode_put(di_vp
);
1931 file_drop(bsdata
->backingfd
);
1935 // Dropped in unmount
1938 hfs_lock_mount(hfsmp
);
1939 hfsmp
->hfs_backingvp
= di_vp
;
1940 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1941 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ hfsmp
->blockSize
* 4;
1942 hfs_unlock_mount(hfsmp
);
1944 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1947 * If the sparse image is on a sparse image file (as opposed to a sparse
1948 * bundle), then we may need to limit the free space to the maximum size
1949 * of a file on that volume. So we query (using pathconf), and if we get
1950 * a meaningful result, we cache the number of blocks for later use in
1953 hfsmp
->hfs_backingfs_maxblocks
= 0;
1954 if (vnode_vtype(di_vp
) == VREG
) {
1957 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1958 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1959 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1961 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1965 /* The free extent cache is managed differently for sparse devices.
1966 * There is a window between which the volume is mounted and the
1967 * device is marked as sparse, so the free extent cache for this
1968 * volume is currently initialized as normal volume (sorted by block
1969 * count). Reset the cache so that it will be rebuilt again
1970 * for sparse device (sorted by start block).
1972 ResetVCBFreeExtCache(hfsmp
);
1974 (void)vnode_put(di_vp
);
1975 file_drop(bsdata
->backingfd
);
1979 case HFSIOC_CLRBACKINGSTOREINFO
: {
1980 struct vnode
* tmpvp
;
1982 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1983 if (suser(cred
, NULL
) &&
1984 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1985 return (EACCES
); /* must be owner of file system */
1987 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1991 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1992 hfsmp
->hfs_backingvp
) {
1994 hfs_lock_mount(hfsmp
);
1995 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1996 tmpvp
= hfsmp
->hfs_backingvp
;
1997 hfsmp
->hfs_backingvp
= NULLVP
;
1998 hfsmp
->hfs_sparsebandblks
= 0;
1999 hfs_unlock_mount(hfsmp
);
2005 #endif /* HFS_SPARSE_DEV */
2007 /* Change the next CNID stored in the VH */
2008 case HFSIOC_CHANGE_NEXTCNID
: {
2009 int error
= 0; /* Assume success */
2014 if (vnode_vfsisrdonly(vp
)) {
2017 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2018 if (suser(cred
, NULL
) &&
2019 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2020 return (EACCES
); /* must be owner of file system */
2023 fileid
= *(u_int32_t
*)ap
->a_data
;
2025 /* Must have catalog lock excl. to advance the CNID pointer */
2026 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
2028 hfs_lock_mount(hfsmp
);
2030 /* If it is less than the current next CNID, force the wraparound bit to be set */
2031 if (fileid
< hfsmp
->vcbNxtCNID
) {
2035 /* Return previous value. */
2036 *(u_int32_t
*)ap
->a_data
= hfsmp
->vcbNxtCNID
;
2038 hfsmp
->vcbNxtCNID
= fileid
;
2041 hfsmp
->vcbAtrb
|= kHFSCatalogNodeIDsReusedMask
;
2044 MarkVCBDirty(hfsmp
);
2045 hfs_unlock_mount(hfsmp
);
2046 hfs_systemfile_unlock (hfsmp
, lockflags
);
2054 mp
= vnode_mount(vp
);
2055 hfsmp
= VFSTOHFS(mp
);
2060 vfsp
= vfs_statfs(mp
);
2062 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2063 !kauth_cred_issuser(cred
))
2066 return hfs_freeze(hfsmp
);
2070 vfsp
= vfs_statfs(vnode_mount(vp
));
2071 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2072 !kauth_cred_issuser(cred
))
2075 return hfs_thaw(hfsmp
, current_proc());
2078 case HFSIOC_EXT_BULKACCESS32
:
2079 case HFSIOC_EXT_BULKACCESS64
: {
2082 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2088 size
= sizeof(struct user64_ext_access_t
);
2090 size
= sizeof(struct user32_ext_access_t
);
2093 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2096 case HFSIOC_SET_XATTREXTENTS_STATE
: {
2099 if (ap
->a_data
== NULL
) {
2103 state
= *(int *)ap
->a_data
;
2105 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2109 /* Super-user can enable or disable extent-based extended
2110 * attribute support on a volume
2111 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2112 * are enabled by default, so any change will be transient only
2113 * till the volume is remounted.
2115 if (!kauth_cred_issuser(kauth_cred_get())) {
2118 if (state
== 0 || state
== 1)
2119 return hfs_set_volxattr(hfsmp
, HFSIOC_SET_XATTREXTENTS_STATE
, state
);
2124 case F_SETSTATICCONTENT
: {
2126 int enable_static
= 0;
2127 struct cnode
*cp
= NULL
;
2129 * lock the cnode, decorate the cnode flag, and bail out.
2130 * VFS should have already authenticated the caller for us.
2135 * Note that even though ap->a_data is of type caddr_t,
2136 * the fcntl layer at the syscall handler will pass in NULL
2137 * or 1 depending on what the argument supplied to the fcntl
2138 * was. So it is in fact correct to check the ap->a_data
2139 * argument for zero or non-zero value when deciding whether or not
2140 * to enable the static bit in the cnode.
2144 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2149 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2151 if (enable_static
) {
2152 cp
->c_flag
|= C_SSD_STATIC
;
2155 cp
->c_flag
&= ~C_SSD_STATIC
;
2162 case F_SET_GREEDY_MODE
: {
2164 int enable_greedy_mode
= 0;
2165 struct cnode
*cp
= NULL
;
2167 * lock the cnode, decorate the cnode flag, and bail out.
2168 * VFS should have already authenticated the caller for us.
2173 * Note that even though ap->a_data is of type caddr_t,
2174 * the fcntl layer at the syscall handler will pass in NULL
2175 * or 1 depending on what the argument supplied to the fcntl
2176 * was. So it is in fact correct to check the ap->a_data
2177 * argument for zero or non-zero value when deciding whether or not
2178 * to enable the greedy mode bit in the cnode.
2180 enable_greedy_mode
= 1;
2182 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2187 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2189 if (enable_greedy_mode
) {
2190 cp
->c_flag
|= C_SSD_GREEDY_MODE
;
2193 cp
->c_flag
&= ~C_SSD_GREEDY_MODE
;
2202 uint32_t iotypeflag
= 0;
2204 struct cnode
*cp
= NULL
;
2206 * lock the cnode, decorate the cnode flag, and bail out.
2207 * VFS should have already authenticated the caller for us.
2210 if (ap
->a_data
== NULL
) {
2215 * Note that even though ap->a_data is of type caddr_t, we
2216 * can only use 32 bits of flag values.
2218 iotypeflag
= (uint32_t) ap
->a_data
;
2219 switch (iotypeflag
) {
2220 case F_IOTYPE_ISOCHRONOUS
:
2227 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2232 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2234 switch (iotypeflag
) {
2235 case F_IOTYPE_ISOCHRONOUS
:
2236 cp
->c_flag
|= C_IO_ISOCHRONOUS
;
2246 case F_MAKECOMPRESSED
: {
2248 uint32_t gen_counter
;
2249 struct cnode
*cp
= NULL
;
2250 int reset_decmp
= 0;
2252 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2257 * acquire & lock the cnode.
2258 * VFS should have already authenticated the caller for us.
2263 * Cast the pointer into a uint32_t so we can extract the
2264 * supplied generation counter.
2266 gen_counter
= *((uint32_t*)ap
->a_data
);
2274 /* Grab truncate lock first; we may truncate the file */
2275 hfs_lock_truncate (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2277 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2279 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2283 /* Are there any other usecounts/FDs? */
2284 if (vnode_isinuse(vp
, 1)) {
2286 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2290 /* now we have the cnode locked down; Validate arguments */
2291 if (cp
->c_attr
.ca_flags
& (UF_IMMUTABLE
| UF_COMPRESSED
)) {
2292 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2294 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2298 if ((hfs_get_gencount (cp
)) == gen_counter
) {
2300 * OK, the gen_counter matched. Go for it:
2301 * Toggle state bits, truncate file, and suppress mtime update
2304 cp
->c_bsdflags
|= UF_COMPRESSED
;
2306 error
= hfs_truncate(vp
, 0, IO_NDELAY
, HFS_TRUNCATE_SKIPTIMES
,
2313 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2317 * Reset the decmp state while still holding the truncate lock. We need to
2318 * serialize here against a listxattr on this node which may occur at any
2321 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2322 * that will still potentially require getting the com.apple.decmpfs EA. If the
2323 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2324 * generic(through VFS), and can't pass along any info telling it that we're already
2325 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2326 * and trying to fill in the hfs_file_is_compressed info during the callback
2327 * operation, which will result in deadlock against the b-tree node.
2329 * So, to serialize against listxattr (which will grab buf_t meta references on
2330 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2333 if ((reset_decmp
) && (error
== 0)) {
2334 decmpfs_cnode
*dp
= VTOCMP (vp
);
2336 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
2339 /* Initialize the decmpfs node as needed */
2340 (void) hfs_file_is_compressed (cp
, 0); /* ok to take lock */
2343 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2349 case F_SETBACKINGSTORE
: {
2354 * See comment in F_SETSTATICCONTENT re: using
2355 * a null check for a_data
2358 error
= hfs_set_backingstore (vp
, 1);
2361 error
= hfs_set_backingstore (vp
, 0);
2367 case F_GETPATH_MTMINFO
: {
2370 int *data
= (int*) ap
->a_data
;
2372 /* Ask if this is a backingstore vnode */
2373 error
= hfs_is_backingstore (vp
, data
);
2381 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2384 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2386 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_FULL
, p
);
2387 hfs_unlock(VTOC(vp
));
2393 case F_BARRIERFSYNC
: {
2396 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2399 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2401 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_BARRIER
, p
);
2402 hfs_unlock(VTOC(vp
));
2409 register struct cnode
*cp
;
2412 if (!vnode_isreg(vp
))
2415 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2419 * used by regression test to determine if
2420 * all the dirty pages (via write) have been cleaned
2421 * after a call to 'fsysnc'.
2423 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
2430 register struct radvisory
*ra
;
2431 struct filefork
*fp
;
2434 if (!vnode_isreg(vp
))
2437 ra
= (struct radvisory
*)(ap
->a_data
);
2440 /* Protect against a size change. */
2441 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2445 if (uncompressed_size
== -1) {
2446 /* fetching the uncompressed size failed above, so return the error */
2447 error
= decmpfs_error
;
2448 } else if (ra
->ra_offset
>= uncompressed_size
) {
2451 error
= advisory_read(vp
, uncompressed_size
, ra
->ra_offset
, ra
->ra_count
);
2454 #endif /* HFS_COMPRESSION */
2455 if (ra
->ra_offset
>= fp
->ff_size
) {
2458 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2461 hfs_unlock_truncate(VTOC(vp
), HFS_LOCK_DEFAULT
);
2465 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2468 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2471 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2476 case SPOTLIGHT_IOC_GET_MOUNT_TIME
:
2477 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2480 case SPOTLIGHT_IOC_GET_LAST_MTIME
:
2481 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2484 case HFSIOC_GET_VERY_LOW_DISK
:
2485 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_dangerlimit
;
2488 case HFSIOC_SET_VERY_LOW_DISK
:
2489 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2493 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2496 case HFSIOC_GET_LOW_DISK
:
2497 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_warninglimit
;
2500 case HFSIOC_SET_LOW_DISK
:
2501 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2502 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2507 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2510 /* The following two fsctls were ported from apfs. */
2511 case APFSIOC_GET_NEAR_LOW_DISK
:
2512 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_nearwarninglimit
;
2515 case APFSIOC_SET_NEAR_LOW_DISK
:
2516 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2517 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2521 hfsmp
->hfs_freespace_notify_nearwarninglimit
= *(uint32_t *)ap
->a_data
;
2524 case HFSIOC_GET_DESIRED_DISK
:
2525 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_desiredlevel
;
2528 case HFSIOC_SET_DESIRED_DISK
:
2529 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2533 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2536 case HFSIOC_VOLUME_STATUS
:
2537 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2540 case HFS_SET_BOOT_INFO
:
2541 if (!vnode_isvroot(vp
))
2543 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2544 return(EACCES
); /* must be superuser or owner of filesystem */
2545 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2548 hfs_lock_mount (hfsmp
);
2549 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2550 /* Null out the cached UUID, to be safe */
2551 uuid_clear (hfsmp
->hfs_full_uuid
);
2552 hfs_unlock_mount (hfsmp
);
2553 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
);
2556 case HFS_GET_BOOT_INFO
:
2557 if (!vnode_isvroot(vp
))
2559 hfs_lock_mount (hfsmp
);
2560 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2561 hfs_unlock_mount(hfsmp
);
2564 /* case HFS_MARK_BOOT_CORRUPT: _IO are the same */
2565 case HFSIOC_MARK_BOOT_CORRUPT
:
2566 /* Mark the boot volume corrupt by setting
2567 * kHFSVolumeInconsistentBit in the volume header. This will
2568 * force fsck_hfs on next mount.
2570 if (!kauth_cred_issuser(kauth_cred_get())) {
2574 /* Allowed only on the root vnode of the boot volume */
2575 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2576 !vnode_isvroot(vp
)) {
2579 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2582 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2583 hfs_mark_inconsistent(hfsmp
, HFS_FSCK_FORCED
);
2586 case HFSIOC_GET_JOURNAL_INFO
:
2587 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2592 if (hfsmp
->jnl
== NULL
) {
2596 jnl_start
= hfs_blk_to_bytes(hfsmp
->jnl_start
, hfsmp
->blockSize
) + hfsmp
->hfsPlusIOPosOffset
;
2597 jnl_size
= hfsmp
->jnl_size
;
2600 jip
->jstart
= jnl_start
;
2601 jip
->jsize
= jnl_size
;
2604 case HFSIOC_SET_ALWAYS_ZEROFILL
: {
2605 struct cnode
*cp
= VTOC(vp
);
2607 if (*(int *)ap
->a_data
) {
2608 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2610 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2615 /* case HFS_DISABLE_METAZONE: _IO are the same */
2616 case HFSIOC_DISABLE_METAZONE
: {
2617 /* Only root can disable metadata zone */
2618 if (!kauth_cred_issuser(kauth_cred_get())) {
2621 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2625 /* Disable metadata zone now */
2626 (void) hfs_metadatazone_init(hfsmp
, true);
2627 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2632 case HFSIOC_FSINFO_METADATA_BLOCKS
: {
2634 struct hfsinfo_metadata
*hinfo
;
2636 hinfo
= (struct hfsinfo_metadata
*)ap
->a_data
;
2638 /* Get information about number of metadata blocks */
2639 error
= hfs_getinfo_metadata_blocks(hfsmp
, hinfo
);
2647 case HFSIOC_GET_FSINFO
: {
2648 hfs_fsinfo
*fsinfo
= (hfs_fsinfo
*)ap
->a_data
;
2650 /* Only root is allowed to get fsinfo */
2651 if (!kauth_cred_issuser(kauth_cred_get())) {
2656 * Make sure that the caller's version number matches with
2657 * the kernel's version number. This will make sure that
2658 * if the structures being read/written into are changed
2659 * by the kernel, the caller will not read incorrect data.
2661 * The first three fields --- request_type, version and
2662 * flags are same for all the hfs_fsinfo structures, so
2663 * we can access the version number by assuming any
2664 * structure for now.
2666 if (fsinfo
->header
.version
!= HFS_FSINFO_VERSION
) {
2670 /* Make sure that the current file system is not marked inconsistent */
2671 if (hfsmp
->vcbAtrb
& kHFSVolumeInconsistentMask
) {
2675 return hfs_get_fsinfo(hfsmp
, ap
->a_data
);
2678 case HFSIOC_CS_FREESPACE_TRIM
: {
2682 /* Only root allowed */
2683 if (!kauth_cred_issuser(kauth_cred_get())) {
2688 * This core functionality is similar to hfs_scan_blocks().
2689 * The main difference is that hfs_scan_blocks() is called
2690 * as part of mount where we are assured that the journal is
2691 * empty to start with. This fcntl() can be called on a
2692 * mounted volume, therefore it has to flush the content of
2693 * the journal as well as ensure the state of summary table.
2695 * This fcntl scans over the entire allocation bitmap,
2696 * creates list of all the free blocks, and issues TRIM
2697 * down to the underlying device. This can take long time
2698 * as it can generate up to 512MB of read I/O.
2701 if ((hfsmp
->hfs_flags
& HFS_SUMMARY_TABLE
) == 0) {
2702 error
= hfs_init_summary(hfsmp
);
2704 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp
->vcbVN
);
2710 * The journal maintains list of recently deallocated blocks to
2711 * issue DKIOCUNMAPs when the corresponding journal transaction is
2712 * flushed to the disk. To avoid any race conditions, we only
2713 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2714 * Therefore we make sure that the journal trim list is sync'ed,
2715 * empty, and not modifiable for the duration of our scan.
2717 * Take the journal lock before flushing the journal to the disk.
2718 * We will keep on holding the journal lock till we don't get the
2719 * bitmap lock to make sure that no new journal transactions can
2720 * start. This will make sure that the journal trim list is not
2721 * modified after the journal flush and before getting bitmap lock.
2722 * We can release the journal lock after we acquire the bitmap
2723 * lock as it will prevent any further block deallocations.
2725 hfs_journal_lock(hfsmp
);
2727 /* Flush the journal and wait for all I/Os to finish up */
2728 error
= hfs_flush(hfsmp
, HFS_FLUSH_JOURNAL_META
);
2730 hfs_journal_unlock(hfsmp
);
2734 /* Take bitmap lock to ensure it is not being modified */
2735 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
);
2737 /* Release the journal lock */
2738 hfs_journal_unlock(hfsmp
);
2741 * ScanUnmapBlocks reads the bitmap in large block size
2742 * (up to 1MB) unlike the runtime which reads the bitmap
2743 * in the 4K block size. This can cause buf_t collisions
2744 * and potential data corruption. To avoid this, we
2745 * invalidate all the existing buffers associated with
2746 * the bitmap vnode before scanning it.
2748 * Note: ScanUnmapBlock() cleans up all the buffers
2749 * after itself, so there won't be any large buffers left
2750 * for us to clean up after it returns.
2752 error
= buf_invalidateblks(hfsmp
->hfs_allocation_vp
, 0, 0, 0);
2754 hfs_systemfile_unlock(hfsmp
, lockflags
);
2758 /* Traverse bitmap and issue DKIOCUNMAPs */
2759 error
= ScanUnmapBlocks(hfsmp
);
2760 hfs_systemfile_unlock(hfsmp
, lockflags
);
2768 case HFSIOC_SET_HOTFILE_STATE
: {
2770 struct cnode
*cp
= VTOC(vp
);
2771 uint32_t hf_state
= *((uint32_t*)ap
->a_data
);
2772 uint32_t num_unpinned
= 0;
2774 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2779 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2780 if (hf_state
== HFS_MARK_FASTDEVCANDIDATE
) {
2781 vnode_setfastdevicecandidate(vp
);
2783 cp
->c_attr
.ca_recflags
|= kHFSFastDevCandidateMask
;
2784 cp
->c_attr
.ca_recflags
&= ~kHFSDoNotFastDevPinMask
;
2785 cp
->c_flag
|= C_MODIFIED
;
2786 } else if (hf_state
== HFS_UNMARK_FASTDEVCANDIDATE
|| hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2787 vnode_clearfastdevicecandidate(vp
);
2788 hfs_removehotfile(vp
);
2790 if (cp
->c_attr
.ca_recflags
& kHFSFastDevPinnedMask
) {
2791 hfs_pin_vnode(hfsmp
, vp
, HFS_UNPIN_IT
, &num_unpinned
);
2794 if (hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2795 cp
->c_attr
.ca_recflags
|= kHFSDoNotFastDevPinMask
;
2797 cp
->c_attr
.ca_recflags
&= ~(kHFSFastDevCandidateMask
|kHFSFastDevPinnedMask
);
2798 cp
->c_flag
|= C_MODIFIED
;
2804 if (num_unpinned
!= 0) {
2805 lck_mtx_lock(&hfsmp
->hfc_mutex
);
2806 hfsmp
->hfs_hotfile_freeblks
+= num_unpinned
;
2807 lck_mtx_unlock(&hfsmp
->hfc_mutex
);
2814 case HFSIOC_REPIN_HOTFILE_STATE
: {
2816 uint32_t repin_what
= *((uint32_t*)ap
->a_data
);
2818 /* Only root allowed */
2819 if (!kauth_cred_issuser(kauth_cred_get())) {
2823 if (!(hfsmp
->hfs_flags
& (HFS_CS_METADATA_PIN
| HFS_CS_HOTFILE_PIN
))) {
2824 // this system is neither regular Fusion or Cooperative Fusion
2825 // so this fsctl makes no sense.
2830 // After a converting a CoreStorage volume to be encrypted, the
2831 // extents could have moved around underneath us. This call
2832 // allows corestoraged to re-pin everything that should be
2833 // pinned (it would happen on the next reboot too but that could
2834 // be a long time away).
2836 if ((repin_what
& HFS_REPIN_METADATA
) && (hfsmp
->hfs_flags
& HFS_CS_METADATA_PIN
)) {
2837 hfs_pin_fs_metadata(hfsmp
);
2839 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_HOTFILE_PIN
)) {
2840 hfs_repin_hotfiles(hfsmp
);
2842 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_SWAPFILE_PIN
)) {
2843 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2844 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2850 #if HFS_CONFIG_KEY_ROLL
2852 case HFSIOC_KEY_ROLL
: {
2853 if (!kauth_cred_issuser(kauth_cred_get()))
2856 hfs_key_roll_args_t
*args
= (hfs_key_roll_args_t
*)ap
->a_data
;
2858 return hfs_key_roll_op(ap
->a_context
, ap
->a_vp
, args
);
2861 case HFSIOC_GET_KEY_AUTO_ROLL
: {
2862 if (!kauth_cred_issuser(kauth_cred_get()))
2865 hfs_key_auto_roll_args_t
*args
= (hfs_key_auto_roll_args_t
*)ap
->a_data
;
2866 if (args
->api_version
!= HFS_KEY_AUTO_ROLL_API_VERSION_1
)
2868 args
->flags
= (ISSET(hfsmp
->cproot_flags
, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION
)
2869 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION
: 0);
2870 args
->min_key_os_version
= hfsmp
->hfs_auto_roll_min_key_os_version
;
2871 args
->max_key_os_version
= hfsmp
->hfs_auto_roll_max_key_os_version
;
2875 case HFSIOC_SET_KEY_AUTO_ROLL
: {
2876 if (!kauth_cred_issuser(kauth_cred_get()))
2879 hfs_key_auto_roll_args_t
*args
= (hfs_key_auto_roll_args_t
*)ap
->a_data
;
2880 if (args
->api_version
!= HFS_KEY_AUTO_ROLL_API_VERSION_1
)
2882 return cp_set_auto_roll(hfsmp
, args
);
2885 #endif // HFS_CONFIG_KEY_ROLL
2888 case F_TRANSCODEKEY
:
2890 * This API is only supported when called via kernel so
2891 * a_fflag must be set to 1 (it's not possible to get here
2892 * with it set to 1 via fsctl).
2894 if (ap
->a_fflag
!= 1)
2896 return cp_vnode_transcode(vp
, (cp_key_t
*)ap
->a_data
);
2898 case F_GETPROTECTIONLEVEL
:
2899 return cp_get_root_major_vers (vp
, (uint32_t *)ap
->a_data
);
2901 case F_GETDEFAULTPROTLEVEL
:
2902 return cp_get_default_level(vp
, (uint32_t *)ap
->a_data
);
2903 #endif // CONFIG_PROTECT
2906 return hfs_pin_vnode(hfsmp
, vp
, HFS_PIN_IT
| HFS_DATALESS_PIN
,
2920 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2922 struct vnop_select_args {
2927 vfs_context_t a_context;
2932 * We should really check to see if I/O is possible.
2938 * Converts a logical block number to a physical block, and optionally returns
2939 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2940 * The physical block number is based on the device block size, currently its 512.
2941 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2944 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2946 struct filefork
*fp
= VTOF(vp
);
2947 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2948 int retval
= E_NONE
;
2949 u_int32_t logBlockSize
;
2950 size_t bytesContAvail
= 0;
2951 off_t blockposition
;
2956 * Check for underlying vnode requests and ensure that logical
2957 * to physical mapping is requested.
2960 *vpp
= hfsmp
->hfs_devvp
;
2964 logBlockSize
= GetLogicalBlockSize(vp
);
2965 blockposition
= (off_t
)bn
* logBlockSize
;
2967 lockExtBtree
= overflow_extents(fp
);
2970 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2972 retval
= MacToVFSError(
2973 MapFileBlockC (HFSTOVCB(hfsmp
),
2981 hfs_systemfile_unlock(hfsmp
, lockflags
);
2983 if (retval
== E_NONE
) {
2984 /* Figure out how many read ahead blocks there are */
2986 if (can_cluster(logBlockSize
)) {
2987 /* Make sure this result never goes negative: */
2988 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2998 * Convert logical block number to file offset.
3001 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
3003 struct vnop_blktooff_args {
3010 if (ap
->a_vp
== NULL
)
3012 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
3018 * Convert file offset to logical block number.
3021 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
3023 struct vnop_offtoblk_args {
3026 daddr64_t *a_lblkno;
3030 if (ap
->a_vp
== NULL
)
3032 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
3038 * Map file offset to physical block number.
3040 * If this function is called for write operation, and if the file
3041 * had virtual blocks allocated (delayed allocation), real blocks
3042 * are allocated by calling ExtendFileC().
3044 * If this function is called for read operation, and if the file
3045 * had virtual blocks allocated (delayed allocation), no change
3046 * to the size of file is done, and if required, rangelist is
3047 * searched for mapping.
3049 * System file cnodes are expected to be locked (shared or exclusive).
3051 * -- INVALID RANGES --
3053 * Invalid ranges are used to keep track of where we have extended a
3054 * file, but have not yet written that data to disk. In the past we
3055 * would clear up the invalid ranges as we wrote to those areas, but
3056 * before data was actually flushed to disk. The problem with that
3057 * approach is that the data can be left in the cache and is therefore
3058 * still not valid on disk. So now we clear up the ranges here, when
3059 * the flags field has VNODE_WRITE set, indicating a write is about to
3060 * occur. This isn't ideal (ideally we want to clear them up when
3061 * know the data has been successfully written), but it's the best we
3064 * For reads, we use the invalid ranges here in block map to indicate
3065 * to the caller that the data should be zeroed (a_bpn == -1). We
3066 * have to be careful about what ranges we return to the cluster code.
3067 * Currently the cluster code can only handle non-rounded values for
3068 * the EOF; it cannot handle funny sized ranges in the middle of the
3069 * file (the main problem is that it sends down odd sized I/Os to the
3070 * disk). Our code currently works because whilst the very first
3071 * offset and the last offset in the invalid ranges are not aligned,
3072 * gaps in the invalid ranges between the first and last, have to be
3073 * aligned (because we always write page sized blocks). For example,
3074 * consider this arrangement:
3076 * +-------------+-----+-------+------+
3077 * | |XXXXX| |XXXXXX|
3078 * +-------------+-----+-------+------+
3081 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3082 * are not necessarily aligned, b and c *must* be.
3084 * Zero-filling occurs in a number of ways:
3086 * 1. When a read occurs and we return with a_bpn == -1.
3088 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3089 * which will cause us to iterate over the ranges bringing in
3090 * pages that are not present in the cache and zeroing them. Any
3091 * pages that are already in the cache are left untouched. Note
3092 * that hfs_fsync does not always flush invalid ranges.
3094 * 3. When we extend a file we zero out from the old EOF to the end
3095 * of the page. It would be nice if we didn't have to do this if
3096 * the page wasn't present (and could defer it), but because of
3097 * the problem described above, we have to.
3099 * The invalid ranges are also used to restrict the size that we write
3100 * out on disk: see hfs_prepare_fork_for_update.
3102 * Note that invalid ranges are ignored when neither the VNODE_READ or
3103 * the VNODE_WRITE flag is specified. This is useful for the
3104 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3105 * just want to know whether blocks are physically allocated or not.
3108 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
3110 struct vnop_blockmap_args {
3118 vfs_context_t a_context;
3122 struct vnode
*vp
= ap
->a_vp
;
3124 struct filefork
*fp
;
3125 struct hfsmount
*hfsmp
;
3126 size_t bytesContAvail
= ap
->a_size
;
3127 int retval
= E_NONE
;
3130 struct rl_entry
*invalid_range
;
3131 enum rl_overlaptype overlaptype
;
3136 if (VNODE_IS_RSRC(vp
)) {
3137 /* allow blockmaps to the resource fork */
3139 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
3140 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
3142 case FILE_IS_COMPRESSED
:
3144 case FILE_IS_CONVERTING
:
3145 /* if FILE_IS_CONVERTING, we allow blockmap */
3148 printf("invalid state %d for compressed file\n", state
);
3153 #endif /* HFS_COMPRESSION */
3155 /* Do not allow blockmap operation on a directory */
3156 if (vnode_isdir(vp
)) {
3161 * Check for underlying vnode requests and ensure that logical
3162 * to physical mapping is requested.
3164 if (ap
->a_bpn
== NULL
)
3171 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
3172 if (cp
->c_lockowner
!= current_thread()) {
3173 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3177 // For reads, check the invalid ranges
3178 if (ISSET(ap
->a_flags
, VNODE_READ
)) {
3179 if (ap
->a_foffset
>= fp
->ff_size
) {
3184 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
3185 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
3187 switch(overlaptype
) {
3188 case RL_MATCHINGOVERLAP
:
3189 case RL_OVERLAPCONTAINSRANGE
:
3190 case RL_OVERLAPSTARTSBEFORE
:
3191 /* There's no valid block for this byte offset */
3192 *ap
->a_bpn
= (daddr64_t
)-1;
3193 /* There's no point limiting the amount to be returned
3194 * if the invalid range that was hit extends all the way
3195 * to the EOF (i.e. there's no valid bytes between the
3196 * end of this range and the file's EOF):
3198 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3199 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3200 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3206 case RL_OVERLAPISCONTAINED
:
3207 case RL_OVERLAPENDSAFTER
:
3208 /* The range of interest hits an invalid block before the end: */
3209 if (invalid_range
->rl_start
== ap
->a_foffset
) {
3210 /* There's actually no valid information to be had starting here: */
3211 *ap
->a_bpn
= (daddr64_t
)-1;
3212 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3213 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3214 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3221 * Sadly, the lower layers don't like us to
3222 * return unaligned ranges, so we skip over
3223 * any invalid ranges here that are less than
3224 * a page: zeroing of those bits is not our
3225 * responsibility (it's dealt with elsewhere).
3228 off_t rounded_start
= round_page_64(invalid_range
->rl_start
);
3229 if ((off_t
)bytesContAvail
< rounded_start
- ap
->a_foffset
)
3231 if (rounded_start
< invalid_range
->rl_end
+ 1) {
3232 bytesContAvail
= rounded_start
- ap
->a_foffset
;
3235 } while ((invalid_range
= TAILQ_NEXT(invalid_range
,
3247 if (cp
->c_cpentry
) {
3248 const int direction
= (ISSET(ap
->a_flags
, VNODE_WRITE
)
3249 ? VNODE_WRITE
: VNODE_READ
);
3251 cp_io_params_t io_params
;
3252 cp_io_params(hfsmp
, cp
->c_cpentry
,
3253 off_rsrc_make(ap
->a_foffset
, VNODE_IS_RSRC(vp
)),
3254 direction
, &io_params
);
3256 if (io_params
.max_len
< (off_t
)bytesContAvail
)
3257 bytesContAvail
= io_params
.max_len
;
3259 if (io_params
.phys_offset
!= -1) {
3260 *ap
->a_bpn
= ((io_params
.phys_offset
+ hfsmp
->hfsPlusIOPosOffset
)
3261 / hfsmp
->hfs_logical_block_size
);
3271 /* Check virtual blocks only when performing write operation */
3272 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3273 if (hfs_start_transaction(hfsmp
) != 0) {
3279 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
3281 } else if (overflow_extents(fp
)) {
3282 syslocks
= SFL_EXTENTS
;
3286 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
3289 * Check for any delayed allocations.
3291 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3293 u_int32_t loanedBlocks
;
3296 // Make sure we have a transaction. It's possible
3297 // that we came in and fp->ff_unallocblocks was zero
3298 // but during the time we blocked acquiring the extents
3299 // btree, ff_unallocblocks became non-zero and so we
3300 // will need to start a transaction.
3302 if (started_tr
== 0) {
3304 hfs_systemfile_unlock(hfsmp
, lockflags
);
3311 * Note: ExtendFileC will Release any blocks on loan and
3312 * aquire real blocks. So we ask to extend by zero bytes
3313 * since ExtendFileC will account for the virtual blocks.
3316 loanedBlocks
= fp
->ff_unallocblocks
;
3317 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
3318 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
3321 fp
->ff_unallocblocks
= loanedBlocks
;
3322 cp
->c_blocks
+= loanedBlocks
;
3323 fp
->ff_blocks
+= loanedBlocks
;
3325 hfs_lock_mount (hfsmp
);
3326 hfsmp
->loanedBlocks
+= loanedBlocks
;
3327 hfs_unlock_mount (hfsmp
);
3329 hfs_systemfile_unlock(hfsmp
, lockflags
);
3330 cp
->c_flag
|= C_MODIFIED
;
3332 (void) hfs_update(vp
, 0);
3333 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3335 hfs_end_transaction(hfsmp
);
3342 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, bytesContAvail
, ap
->a_foffset
,
3343 ap
->a_bpn
, &bytesContAvail
);
3345 hfs_systemfile_unlock(hfsmp
, lockflags
);
3350 /* On write, always return error because virtual blocks, if any,
3351 * should have been allocated in ExtendFileC(). We do not
3352 * allocate virtual blocks on read, therefore return error
3353 * only if no virtual blocks are allocated. Otherwise we search
3354 * rangelist for zero-fills
3356 if ((MacToVFSError(retval
) != ERANGE
) ||
3357 (ap
->a_flags
& VNODE_WRITE
) ||
3358 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
3362 /* Validate if the start offset is within logical file size */
3363 if (ap
->a_foffset
>= fp
->ff_size
) {
3368 * At this point, we have encountered a failure during
3369 * MapFileBlockC that resulted in ERANGE, and we are not
3370 * servicing a write, and there are borrowed blocks.
3372 * However, the cluster layer will not call blockmap for
3373 * blocks that are borrowed and in-cache. We have to assume
3374 * that because we observed ERANGE being emitted from
3375 * MapFileBlockC, this extent range is not valid on-disk. So
3376 * we treat this as a mapping that needs to be zero-filled
3380 if (fp
->ff_size
- ap
->a_foffset
< (off_t
)bytesContAvail
)
3381 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
3383 *ap
->a_bpn
= (daddr64_t
) -1;
3391 if (ISSET(ap
->a_flags
, VNODE_WRITE
)) {
3392 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
3394 // See if we might be overlapping invalid ranges...
3395 if (r
&& (ap
->a_foffset
+ (off_t
)bytesContAvail
) > r
->rl_start
) {
3397 * Mark the file as needing an update if we think the
3398 * on-disk EOF has changed.
3400 if (ap
->a_foffset
<= r
->rl_start
)
3401 SET(cp
->c_flag
, C_MODIFIED
);
3404 * This isn't the ideal place to put this. Ideally, we
3405 * should do something *after* we have successfully
3406 * written to the range, but that's difficult to do
3407 * because we cannot take locks in the callback. At
3408 * present, the cluster code will call us with VNODE_WRITE
3409 * set just before it's about to write the data so we know
3410 * that data is about to be written. If we get an I/O
3411 * error at this point then chances are the metadata
3412 * update to follow will also have an I/O error so the
3413 * risk here is small.
3415 rl_remove(ap
->a_foffset
, ap
->a_foffset
+ bytesContAvail
- 1,
3416 &fp
->ff_invalidranges
);
3418 if (!TAILQ_FIRST(&fp
->ff_invalidranges
)) {
3419 cp
->c_flag
&= ~C_ZFWANTSYNC
;
3420 cp
->c_zftimeout
= 0;
3426 *ap
->a_run
= bytesContAvail
;
3429 *(int *)ap
->a_poff
= 0;
3433 hfs_update(vp
, TRUE
);
3434 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3435 hfs_end_transaction(hfsmp
);
3442 return (MacToVFSError(retval
));
3446 * prepare and issue the I/O
3447 * buf_strategy knows how to deal
3448 * with requests that require
3452 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
3454 buf_t bp
= ap
->a_bp
;
3455 vnode_t vp
= buf_vnode(bp
);
3458 /* Mark buffer as containing static data if cnode flag set */
3459 if (VTOC(vp
)->c_flag
& C_SSD_STATIC
) {
3463 /* Mark buffer as containing static data if cnode flag set */
3464 if (VTOC(vp
)->c_flag
& C_SSD_GREEDY_MODE
) {
3465 bufattr_markgreedymode(buf_attr(bp
));
3468 /* mark buffer as containing burst mode data if cnode flag set */
3469 if (VTOC(vp
)->c_flag
& C_IO_ISOCHRONOUS
) {
3470 bufattr_markisochronous(buf_attr(bp
));
3474 error
= cp_handle_strategy(bp
);
3480 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
3486 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int truncateflags
, vfs_context_t context
)
3488 register struct cnode
*cp
= VTOC(vp
);
3489 struct filefork
*fp
= VTOF(vp
);
3490 kauth_cred_t cred
= vfs_context_ucred(context
);
3493 off_t actualBytesAdded
;
3495 u_int32_t fileblocks
;
3497 struct hfsmount
*hfsmp
;
3499 int suppress_times
= (truncateflags
& HFS_TRUNCATE_SKIPTIMES
);
3501 blksize
= VTOVCB(vp
)->blockSize
;
3502 fileblocks
= fp
->ff_blocks
;
3503 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3505 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_START
,
3506 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3511 /* This should only happen with a corrupt filesystem */
3512 if ((off_t
)fp
->ff_size
< 0)
3515 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
3522 /* Files that are changing size are not hot file candidates. */
3523 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
3524 fp
->ff_bytesread
= 0;
3528 * We cannot just check if fp->ff_size == length (as an optimization)
3529 * since there may be extra physical blocks that also need truncation.
3532 if ((retval
= hfs_getinoquota(cp
)))
3537 * Lengthen the size of the file. We must ensure that the
3538 * last byte of the file is allocated. Since the smallest
3539 * value of ff_size is 0, length will be at least 1.
3541 if (length
> (off_t
)fp
->ff_size
) {
3543 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
3549 * If we don't have enough physical space then
3550 * we need to extend the physical size.
3552 if (length
> filebytes
) {
3554 u_int32_t blockHint
= 0;
3556 /* All or nothing and don't round up to clumpsize. */
3557 eflags
= kEFAllMask
| kEFNoClumpMask
;
3559 if (cred
&& (suser(cred
, NULL
) != 0)) {
3560 eflags
|= kEFReserveMask
; /* keep a reserve */
3564 * Allocate Journal and Quota files in metadata zone.
3566 if (filebytes
== 0 &&
3567 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
3568 hfs_virtualmetafile(cp
)) {
3569 eflags
|= kEFMetadataMask
;
3570 blockHint
= hfsmp
->hfs_metazone_start
;
3572 if (hfs_start_transaction(hfsmp
) != 0) {
3577 /* Protect extents b-tree and allocation bitmap */
3578 lockflags
= SFL_BITMAP
;
3579 if (overflow_extents(fp
))
3580 lockflags
|= SFL_EXTENTS
;
3581 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3584 * Keep growing the file as long as the current EOF is
3585 * less than the desired value.
3587 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3588 bytesToAdd
= length
- filebytes
;
3589 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
3594 &actualBytesAdded
));
3596 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3597 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
3598 if (length
> filebytes
)
3604 hfs_systemfile_unlock(hfsmp
, lockflags
);
3608 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3611 hfs_end_transaction(hfsmp
);
3616 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3617 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3620 if (ISSET(flags
, IO_NOZEROFILL
)) {
3621 // An optimisation for the hibernation file
3622 if (vnode_isswap(vp
))
3623 rl_remove_all(&fp
->ff_invalidranges
);
3625 if (!vnode_issystem(vp
) && retval
== E_NONE
) {
3626 if (length
> (off_t
)fp
->ff_size
) {
3629 /* Extending the file: time to fill out the current last page w. zeroes? */
3630 if (fp
->ff_size
& PAGE_MASK_64
) {
3631 /* There might be some valid data at the start of the (current) last page
3632 of the file, so zero out the remainder of that page to ensure the
3633 entire page contains valid data. */
3635 retval
= hfs_zero_eof_page(vp
, length
);
3636 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3637 if (retval
) goto Err_Exit
;
3640 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
3641 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3644 panic("hfs_truncate: invoked on non-UBC object?!");
3647 if (suppress_times
== 0) {
3648 cp
->c_touch_modtime
= TRUE
;
3650 fp
->ff_size
= length
;
3652 } else { /* Shorten the size of the file */
3654 // An optimisation for the hibernation file
3655 if (ISSET(flags
, IO_NOZEROFILL
) && vnode_isswap(vp
)) {
3656 rl_remove_all(&fp
->ff_invalidranges
);
3657 } else if ((off_t
)fp
->ff_size
> length
) {
3658 /* Any space previously marked as invalid is now irrelevant: */
3659 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3663 * Account for any unmapped blocks. Note that the new
3664 * file length can still end up with unmapped blocks.
3666 if (fp
->ff_unallocblocks
> 0) {
3667 u_int32_t finalblks
;
3668 u_int32_t loanedBlocks
;
3670 hfs_lock_mount(hfsmp
);
3671 loanedBlocks
= fp
->ff_unallocblocks
;
3672 cp
->c_blocks
-= loanedBlocks
;
3673 fp
->ff_blocks
-= loanedBlocks
;
3674 fp
->ff_unallocblocks
= 0;
3676 hfsmp
->loanedBlocks
-= loanedBlocks
;
3678 finalblks
= (length
+ blksize
- 1) / blksize
;
3679 if (finalblks
> fp
->ff_blocks
) {
3680 /* calculate required unmapped blocks */
3681 loanedBlocks
= finalblks
- fp
->ff_blocks
;
3682 hfsmp
->loanedBlocks
+= loanedBlocks
;
3684 fp
->ff_unallocblocks
= loanedBlocks
;
3685 cp
->c_blocks
+= loanedBlocks
;
3686 fp
->ff_blocks
+= loanedBlocks
;
3688 hfs_unlock_mount (hfsmp
);
3691 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
3692 if (hfs_start_transaction(hfsmp
) != 0) {
3697 if (fp
->ff_unallocblocks
== 0) {
3698 /* Protect extents b-tree and allocation bitmap */
3699 lockflags
= SFL_BITMAP
;
3700 if (overflow_extents(fp
))
3701 lockflags
|= SFL_EXTENTS
;
3702 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3704 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
3705 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
3707 hfs_systemfile_unlock(hfsmp
, lockflags
);
3711 fp
->ff_size
= length
;
3714 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3716 hfs_end_transaction(hfsmp
);
3718 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3722 /* These are bytesreleased */
3723 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
3727 // Unlike when growing a file, we adjust the hotfile block count here
3728 // instead of deeper down in the block allocation code because we do
3729 // not necessarily have a vnode or "fcb" at the time we're deleting
3730 // the file and so we wouldn't know if it was hotfile cached or not
3732 hfs_hotfile_adjust_blocks(vp
, (int64_t)((savedbytes
- filebytes
) / blksize
));
3736 * Only set update flag if the logical length changes & we aren't
3737 * suppressing modtime updates.
3739 if (((off_t
)fp
->ff_size
!= length
) && (suppress_times
== 0)) {
3740 cp
->c_touch_modtime
= TRUE
;
3742 fp
->ff_size
= length
;
3744 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
3745 if (!vfs_context_issuser(context
))
3746 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3748 cp
->c_flag
|= C_MODIFIED
;
3749 cp
->c_touch_chgtime
= TRUE
; /* status changed */
3750 if (suppress_times
== 0) {
3751 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
3754 * If we are not suppressing the modtime update, then
3755 * update the gen count as well.
3757 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK (cp
->c_attr
.ca_mode
)) {
3758 hfs_incr_gencount(cp
);
3762 retval
= hfs_update(vp
, 0);
3764 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3765 -1, -1, -1, retval
, 0);
3770 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_END
,
3771 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
3777 * Preparation which must be done prior to deleting the catalog record
3778 * of a file or directory. In order to make the on-disk as safe as possible,
3779 * we remove the catalog entry before releasing the bitmap blocks and the
3780 * overflow extent records. However, some work must be done prior to deleting
3781 * the catalog record.
3783 * When calling this function, the cnode must exist both in memory and on-disk.
3784 * If there are both resource fork and data fork vnodes, this function should
3785 * be called on both.
3789 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
3791 struct filefork
*fp
= VTOF(vp
);
3792 struct cnode
*cp
= VTOC(vp
);
3797 /* Cannot truncate an HFS directory! */
3798 if (vnode_isdir(vp
)) {
3803 * See the comment below in hfs_truncate for why we need to call
3804 * setsize here. Essentially we want to avoid pending IO if we
3805 * already know that the blocks are going to be released here.
3806 * This function is only called when totally removing all storage for a file, so
3807 * we can take a shortcut and immediately setsize (0);
3811 /* This should only happen with a corrupt filesystem */
3812 if ((off_t
)fp
->ff_size
< 0)
3816 * We cannot just check if fp->ff_size == length (as an optimization)
3817 * since there may be extra physical blocks that also need truncation.
3820 if ((retval
= hfs_getinoquota(cp
))) {
3825 /* Wipe out any invalid ranges which have yet to be backed by disk */
3826 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3829 * Account for any unmapped blocks. Since we're deleting the
3830 * entire file, we don't have to worry about just shrinking
3831 * to a smaller number of borrowed blocks.
3833 if (fp
->ff_unallocblocks
> 0) {
3834 u_int32_t loanedBlocks
;
3836 hfs_lock_mount (hfsmp
);
3837 loanedBlocks
= fp
->ff_unallocblocks
;
3838 cp
->c_blocks
-= loanedBlocks
;
3839 fp
->ff_blocks
-= loanedBlocks
;
3840 fp
->ff_unallocblocks
= 0;
3842 hfsmp
->loanedBlocks
-= loanedBlocks
;
3844 hfs_unlock_mount (hfsmp
);
3852 * Special wrapper around calling TruncateFileC. This function is useable
3853 * even when the catalog record does not exist any longer, making it ideal
3854 * for use when deleting a file. The simplification here is that we know
3855 * that we are releasing all blocks.
3857 * Note that this function may be called when there is no vnode backing
3858 * the file fork in question. We may call this from hfs_vnop_inactive
3859 * to clear out resource fork data (and may not want to clear out the data
3860 * fork yet). As a result, we pointer-check both sets of inputs before
3861 * doing anything with them.
3863 * The caller is responsible for saving off a copy of the filefork(s)
3864 * embedded within the cnode prior to calling this function. The pointers
3865 * supplied as arguments must be valid even if the cnode is no longer valid.
3869 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3870 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3873 u_int32_t fileblocks
;
3878 blksize
= hfsmp
->blockSize
;
3882 off_t prev_filebytes
;
3884 datafork
->ff_size
= 0;
3886 fileblocks
= datafork
->ff_blocks
;
3887 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3888 prev_filebytes
= filebytes
;
3890 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3892 while (filebytes
> 0) {
3893 if (filebytes
> HFS_BIGFILE_SIZE
) {
3894 filebytes
-= HFS_BIGFILE_SIZE
;
3899 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3900 if (hfs_start_transaction(hfsmp
) != 0) {
3905 if (datafork
->ff_unallocblocks
== 0) {
3906 /* Protect extents b-tree and allocation bitmap */
3907 lockflags
= SFL_BITMAP
;
3908 if (overflow_extents(datafork
))
3909 lockflags
|= SFL_EXTENTS
;
3910 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3912 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
3914 hfs_systemfile_unlock(hfsmp
, lockflags
);
3916 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3918 struct cnode
*cp
= datafork
? FTOC(datafork
) : NULL
;
3920 vp
= cp
? CTOV(cp
, 0) : NULL
;
3921 hfs_hotfile_adjust_blocks(vp
, (int64_t)((prev_filebytes
- filebytes
) / blksize
));
3922 prev_filebytes
= filebytes
;
3924 /* Finish the transaction and start over if necessary */
3925 hfs_end_transaction(hfsmp
);
3934 if (error
== 0 && rsrcfork
) {
3935 rsrcfork
->ff_size
= 0;
3937 fileblocks
= rsrcfork
->ff_blocks
;
3938 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3940 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3942 while (filebytes
> 0) {
3943 if (filebytes
> HFS_BIGFILE_SIZE
) {
3944 filebytes
-= HFS_BIGFILE_SIZE
;
3949 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3950 if (hfs_start_transaction(hfsmp
) != 0) {
3955 if (rsrcfork
->ff_unallocblocks
== 0) {
3956 /* Protect extents b-tree and allocation bitmap */
3957 lockflags
= SFL_BITMAP
;
3958 if (overflow_extents(rsrcfork
))
3959 lockflags
|= SFL_EXTENTS
;
3960 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3962 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
3964 hfs_systemfile_unlock(hfsmp
, lockflags
);
3966 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3968 /* Finish the transaction and start over if necessary */
3969 hfs_end_transaction(hfsmp
);
3980 errno_t
hfs_ubc_setsize(vnode_t vp
, off_t len
, bool have_cnode_lock
)
3985 * Call ubc_setsize to give the VM subsystem a chance to do
3986 * whatever it needs to with existing pages before we delete
3987 * blocks. Note that symlinks don't use the UBC so we'll
3988 * get back ENOENT in that case.
3990 if (have_cnode_lock
) {
3991 error
= ubc_setsize_ex(vp
, len
, UBC_SETSIZE_NO_FS_REENTRY
);
3992 if (error
== EAGAIN
) {
3993 cnode_t
*cp
= VTOC(vp
);
3995 if (cp
->c_truncatelockowner
!= current_thread())
3996 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
3999 error
= ubc_setsize_ex(vp
, len
, 0);
4000 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
4003 error
= ubc_setsize_ex(vp
, len
, 0);
4005 return error
== ENOENT
? 0 : error
;
4009 * Truncate a cnode to at most length size, freeing (or adding) the
4013 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
,
4014 int truncateflags
, vfs_context_t context
)
4016 struct filefork
*fp
= VTOF(vp
);
4018 u_int32_t fileblocks
;
4021 struct cnode
*cp
= VTOC(vp
);
4022 hfsmount_t
*hfsmp
= VTOHFS(vp
);
4024 /* Cannot truncate an HFS directory! */
4025 if (vnode_isdir(vp
)) {
4028 /* A swap file cannot change size. */
4029 if (vnode_isswap(vp
) && length
&& !ISSET(flags
, IO_NOAUTH
)) {
4033 blksize
= hfsmp
->blockSize
;
4034 fileblocks
= fp
->ff_blocks
;
4035 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
4037 bool caller_has_cnode_lock
= (cp
->c_lockowner
== current_thread());
4039 error
= hfs_ubc_setsize(vp
, length
, caller_has_cnode_lock
);
4043 if (!caller_has_cnode_lock
) {
4044 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4049 if (vnode_islnk(vp
) && cp
->c_datafork
->ff_symlinkptr
) {
4050 hfs_free(cp
->c_datafork
->ff_symlinkptr
, cp
->c_datafork
->ff_size
);
4051 cp
->c_datafork
->ff_symlinkptr
= NULL
;
4054 // have to loop truncating or growing files that are
4055 // really big because otherwise transactions can get
4056 // enormous and consume too many kernel resources.
4058 if (length
< filebytes
) {
4059 while (filebytes
> length
) {
4060 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
4061 filebytes
-= HFS_BIGFILE_SIZE
;
4065 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4069 } else if (length
> filebytes
) {
4070 kauth_cred_t cred
= vfs_context_ucred(context
);
4071 const bool keep_reserve
= cred
&& suser(cred
, NULL
) != 0;
4073 if (hfs_freeblks(hfsmp
, keep_reserve
)
4074 < howmany(length
- filebytes
, blksize
)) {
4077 while (filebytes
< length
) {
4078 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
4079 filebytes
+= HFS_BIGFILE_SIZE
;
4083 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4088 } else /* Same logical size */ {
4090 error
= do_hfs_truncate(vp
, length
, flags
, truncateflags
, context
);
4092 /* Files that are changing size are not hot file candidates. */
4093 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4094 fp
->ff_bytesread
= 0;
4097 #if HFS_CONFIG_KEY_ROLL
4098 if (!error
&& cp
->c_truncatelockowner
== current_thread()) {
4099 hfs_key_roll_check(cp
, true);
4103 if (!caller_has_cnode_lock
)
4106 // Make sure UBC's size matches up (in case we didn't completely succeed)
4107 errno_t err2
= hfs_ubc_setsize(vp
, fp
->ff_size
, caller_has_cnode_lock
);
4116 * Preallocate file storage space.
4119 hfs_vnop_allocate(struct vnop_allocate_args
/* {
4123 off_t *a_bytesallocated;
4125 vfs_context_t a_context;
4128 struct vnode
*vp
= ap
->a_vp
;
4130 struct filefork
*fp
;
4132 off_t length
= ap
->a_length
;
4134 off_t moreBytesRequested
;
4135 off_t actualBytesAdded
;
4137 u_int32_t fileblocks
;
4138 int retval
, retval2
;
4139 u_int32_t blockHint
;
4140 u_int32_t extendFlags
; /* For call to ExtendFileC */
4141 struct hfsmount
*hfsmp
;
4142 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
4146 *(ap
->a_bytesallocated
) = 0;
4148 if (!vnode_isreg(vp
))
4150 if (length
< (off_t
)0)
4155 orig_ctime
= VTOC(vp
)->c_ctime
;
4157 nspace_snapshot_event(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
4159 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4161 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4169 fileblocks
= fp
->ff_blocks
;
4170 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
4172 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
4177 /* Fill in the flags word for the call to Extend the file */
4179 extendFlags
= kEFNoClumpMask
;
4180 if (ap
->a_flags
& ALLOCATECONTIG
)
4181 extendFlags
|= kEFContigMask
;
4182 if (ap
->a_flags
& ALLOCATEALL
)
4183 extendFlags
|= kEFAllMask
;
4184 if (cred
&& suser(cred
, NULL
) != 0)
4185 extendFlags
|= kEFReserveMask
;
4186 if (hfs_virtualmetafile(cp
))
4187 extendFlags
|= kEFMetadataMask
;
4191 startingPEOF
= filebytes
;
4193 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
4194 length
+= filebytes
;
4195 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
4196 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
4198 /* If no changes are necesary, then we're done */
4199 if (filebytes
== length
)
4203 * Lengthen the size of the file. We must ensure that the
4204 * last byte of the file is allocated. Since the smallest
4205 * value of filebytes is 0, length will be at least 1.
4207 if (length
> filebytes
) {
4208 if (ISSET(extendFlags
, kEFAllMask
)
4209 && (hfs_freeblks(hfsmp
, ISSET(extendFlags
, kEFReserveMask
))
4210 < howmany(length
- filebytes
, hfsmp
->blockSize
))) {
4215 off_t total_bytes_added
= 0, orig_request_size
;
4217 orig_request_size
= moreBytesRequested
= length
- filebytes
;
4220 retval
= hfs_chkdq(cp
,
4221 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
4228 * Metadata zone checks.
4230 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
4232 * Allocate Journal and Quota files in metadata zone.
4234 if (hfs_virtualmetafile(cp
)) {
4235 blockHint
= hfsmp
->hfs_metazone_start
;
4236 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
4237 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
4239 * Move blockHint outside metadata zone.
4241 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
4246 while ((length
> filebytes
) && (retval
== E_NONE
)) {
4247 off_t bytesRequested
;
4249 if (hfs_start_transaction(hfsmp
) != 0) {
4254 /* Protect extents b-tree and allocation bitmap */
4255 lockflags
= SFL_BITMAP
;
4256 if (overflow_extents(fp
))
4257 lockflags
|= SFL_EXTENTS
;
4258 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4260 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
4261 bytesRequested
= HFS_BIGFILE_SIZE
;
4263 bytesRequested
= moreBytesRequested
;
4266 if (extendFlags
& kEFContigMask
) {
4267 // if we're on a sparse device, this will force it to do a
4268 // full scan to find the space needed.
4269 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
4272 retval
= MacToVFSError(ExtendFileC(vcb
,
4277 &actualBytesAdded
));
4279 if (retval
== E_NONE
) {
4280 *(ap
->a_bytesallocated
) += actualBytesAdded
;
4281 total_bytes_added
+= actualBytesAdded
;
4282 moreBytesRequested
-= actualBytesAdded
;
4283 if (blockHint
!= 0) {
4284 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
4287 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4289 hfs_systemfile_unlock(hfsmp
, lockflags
);
4292 (void) hfs_update(vp
, 0);
4293 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4296 hfs_end_transaction(hfsmp
);
4301 * if we get an error and no changes were made then exit
4302 * otherwise we must do the hfs_update to reflect the changes
4304 if (retval
&& (startingPEOF
== filebytes
))
4308 * Adjust actualBytesAdded to be allocation block aligned, not
4309 * clump size aligned.
4310 * NOTE: So what we are reporting does not affect reality
4311 * until the file is closed, when we truncate the file to allocation
4314 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
4315 *(ap
->a_bytesallocated
) =
4316 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
4318 } else { /* Shorten the size of the file */
4321 * N.B. At present, this code is never called. If and when we
4322 * do start using it, it looks like there might be slightly
4323 * strange semantics with the file size: it's possible for the
4324 * file size to *increase* e.g. if current file size is 5,
4325 * length is 1024 and filebytes is 4096, the file size will
4326 * end up being 1024 bytes. This isn't necessarily a problem
4327 * but it's not consistent with the code above which doesn't
4328 * change the file size.
4331 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
4332 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4335 * if we get an error and no changes were made then exit
4336 * otherwise we must do the hfs_update to reflect the changes
4338 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
4340 /* These are bytesreleased */
4341 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
4344 if (fp
->ff_size
> filebytes
) {
4345 fp
->ff_size
= filebytes
;
4347 hfs_ubc_setsize(vp
, fp
->ff_size
, true);
4352 cp
->c_flag
|= C_MODIFIED
;
4353 cp
->c_touch_chgtime
= TRUE
;
4354 cp
->c_touch_modtime
= TRUE
;
4355 retval2
= hfs_update(vp
, 0);
4360 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4367 * Pagein for HFS filesystem
4370 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
4372 struct vnop_pagein_args {
4375 vm_offset_t a_pl_offset,
4379 vfs_context_t a_context;
4385 struct filefork
*fp
;
4388 upl_page_info_t
*pl
;
4390 off_t page_needed_f_offset
;
4395 boolean_t truncate_lock_held
= FALSE
;
4396 boolean_t file_converted
= FALSE
;
4404 if ((error
= cp_handle_vnop(vp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
, 0)) != 0) {
4406 * If we errored here, then this means that one of two things occurred:
4407 * 1. there was a problem with the decryption of the key.
4408 * 2. the device is locked and we are not allowed to access this particular file.
4410 * Either way, this means that we need to shut down this upl now. As long as
4411 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4412 * then we create a upl and immediately abort it.
4414 if (ap
->a_pl
== NULL
) {
4415 /* create the upl */
4416 ubc_create_upl (vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
,
4417 UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4418 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4419 ubc_upl_range_needed (upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4421 /* Abort the range */
4422 ubc_upl_abort_range (upl
, 0, ap
->a_size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4428 #endif /* CONFIG_PROTECT */
4430 if (ap
->a_pl
!= NULL
) {
4432 * this can only happen for swap files now that
4433 * we're asking for V2 paging behavior...
4434 * so don't need to worry about decompression, or
4435 * keeping track of blocks read or taking the truncate lock
4437 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
4438 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4442 page_needed_f_offset
= ap
->a_f_offset
+ ap
->a_pl_offset
;
4446 * take truncate lock (shared/recursive) to guard against
4447 * zero-fill thru fsync interfering, but only for v2
4449 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4450 * lock shared and we are allowed to recurse 1 level if this thread already
4451 * owns the lock exclusively... this can legally occur
4452 * if we are doing a shrinking ftruncate against a file
4453 * that is mapped private, and the pages being truncated
4454 * do not currently exist in the cache... in that case
4455 * we will have to page-in the missing pages in order
4456 * to provide them to the private mapping... we must
4457 * also call hfs_unlock_truncate with a postive been_recursed
4458 * arg to indicate that if we have recursed, there is no need to drop
4459 * the lock. Allowing this simple recursion is necessary
4460 * in order to avoid a certain deadlock... since the ftruncate
4461 * already holds the truncate lock exclusively, if we try
4462 * to acquire it shared to protect the pagein path, we will
4465 * NOTE: The if () block below is a workaround in order to prevent a
4466 * VM deadlock. See rdar://7853471.
4468 * If we are in a forced unmount, then launchd will still have the
4469 * dyld_shared_cache file mapped as it is trying to reboot. If we
4470 * take the truncate lock here to service a page fault, then our
4471 * thread could deadlock with the forced-unmount. The forced unmount
4472 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4473 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4474 * thread will think it needs to copy all of the data out of the file
4475 * and into a VM copy object. If we hold the cnode lock here, then that
4476 * VM operation will not be able to proceed, because we'll set a busy page
4477 * before attempting to grab the lock. Note that this isn't as simple as "don't
4478 * call ubc_setsize" because doing that would just shift the problem to the
4479 * ubc_msync done before the vnode is reclaimed.
4481 * So, if a forced unmount on this volume is in flight AND the cnode is
4482 * marked C_DELETED, then just go ahead and do the page in without taking
4483 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4484 * that is not going to be available on the next mount, this seems like a
4485 * OK solution from a correctness point of view, even though it is hacky.
4487 if (vfs_isforce(vnode_mount(vp
))) {
4488 if (cp
->c_flag
& C_DELETED
) {
4489 /* If we don't get it, then just go ahead and operate without the lock */
4490 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4494 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4495 truncate_lock_held
= TRUE
;
4498 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4500 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4504 ubc_upl_range_needed(upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4506 upl_size
= isize
= ap
->a_size
;
4509 * Scan from the back to find the last page in the UPL, so that we
4510 * aren't looking at a UPL that may have already been freed by the
4511 * preceding aborts/completions.
4513 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4514 if (upl_page_present(pl
, --pg_index
))
4516 if (pg_index
== 0) {
4518 * no absent pages were found in the range specified
4519 * just abort the UPL to get rid of it and then we're done
4521 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4526 * initialize the offset variables before we touch the UPL.
4527 * f_offset is the position into the file, in bytes
4528 * offset is the position into the UPL, in bytes
4529 * pg_index is the pg# of the UPL we're operating on
4530 * isize is the offset into the UPL of the last page that is present.
4532 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4535 f_offset
= ap
->a_f_offset
;
4541 if ( !upl_page_present(pl
, pg_index
)) {
4543 * we asked for RET_ONLY_ABSENT, so it's possible
4544 * to get back empty slots in the UPL.
4545 * just skip over them
4547 f_offset
+= PAGE_SIZE
;
4548 offset
+= PAGE_SIZE
;
4555 * We know that we have at least one absent page.
4556 * Now checking to see how many in a row we have
4559 xsize
= isize
- PAGE_SIZE
;
4562 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
4567 xsize
= num_of_pages
* PAGE_SIZE
;
4570 if (VNODE_IS_RSRC(vp
)) {
4571 /* allow pageins of the resource fork */
4573 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
4577 if (truncate_lock_held
) {
4579 * can't hold the truncate lock when calling into the decmpfs layer
4580 * since it calls back into this layer... even though we're only
4581 * holding the lock in shared mode, and the re-entrant path only
4582 * takes the lock shared, we can deadlock if some other thread
4583 * tries to grab the lock exclusively in between.
4585 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4586 truncate_lock_held
= FALSE
;
4589 ap
->a_pl_offset
= offset
;
4590 ap
->a_f_offset
= f_offset
;
4593 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
4595 * note that decpfs_pagein_compressed can change the state of
4596 * 'compressed'... it will set it to 0 if the file is no longer
4597 * compressed once the compression lock is successfully taken
4598 * i.e. we would block on that lock while the file is being inflated
4600 if (error
== 0 && vnode_isfastdevicecandidate(vp
)) {
4601 (void) hfs_addhotfile(vp
);
4605 /* successful page-in, update the access time */
4606 VTOC(vp
)->c_touch_acctime
= TRUE
;
4609 // compressed files are not traditional hot file candidates
4610 // but they may be for CF (which ignores the ff_bytesread
4613 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4614 fp
->ff_bytesread
= 0;
4616 } else if (error
== EAGAIN
) {
4618 * EAGAIN indicates someone else already holds the compression lock...
4619 * to avoid deadlocking, we'll abort this range of pages with an
4620 * indication that the pagein needs to be redriven
4622 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
4623 } else if (error
== ENOSPC
) {
4625 if (upl_size
== PAGE_SIZE
)
4626 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4628 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4630 ap
->a_size
= PAGE_SIZE
;
4632 ap
->a_pl_offset
= 0;
4633 ap
->a_f_offset
= page_needed_f_offset
;
4637 ubc_upl_abort(upl
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4640 goto pagein_next_range
;
4644 * Set file_converted only if the file became decompressed while we were
4645 * paging in. If it were still compressed, we would re-start the loop using the goto
4646 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4647 * condition below, since we could have avoided taking the truncate lock to prevent
4648 * a deadlock in the force unmount case.
4650 file_converted
= TRUE
;
4653 if (file_converted
== TRUE
) {
4655 * the file was converted back to a regular file after we first saw it as compressed
4656 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4657 * reset a_size so that we consider what remains of the original request
4658 * and null out a_upl and a_pl_offset.
4660 * We should only be able to get into this block if the decmpfs_pagein_compressed
4661 * successfully decompressed the range in question for this file.
4663 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4667 ap
->a_pl_offset
= 0;
4669 /* Reset file_converted back to false so that we don't infinite-loop. */
4670 file_converted
= FALSE
;
4675 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4678 * Keep track of blocks read.
4680 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
4682 int took_cnode_lock
= 0;
4684 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
4685 bytesread
= fp
->ff_size
;
4689 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4690 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
4691 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4692 took_cnode_lock
= 1;
4695 * If this file hasn't been seen since the start of
4696 * the current sampling period then start over.
4698 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
4701 fp
->ff_bytesread
= bytesread
;
4703 cp
->c_atime
= tv
.tv_sec
;
4705 fp
->ff_bytesread
+= bytesread
;
4707 cp
->c_touch_acctime
= TRUE
;
4709 if (vnode_isfastdevicecandidate(vp
)) {
4710 (void) hfs_addhotfile(vp
);
4712 if (took_cnode_lock
)
4719 pg_index
+= num_of_pages
;
4725 if (truncate_lock_held
== TRUE
) {
4726 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4727 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4734 * Pageout for HFS filesystem.
4737 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
4739 struct vnop_pageout_args {
4742 vm_offset_t a_pl_offset,
4746 vfs_context_t a_context;
4750 vnode_t vp
= ap
->a_vp
;
4752 struct filefork
*fp
;
4756 upl_page_info_t
* pl
= NULL
;
4757 vm_offset_t a_pl_offset
;
4759 int is_pageoutv2
= 0;
4765 a_flags
= ap
->a_flags
;
4766 a_pl_offset
= ap
->a_pl_offset
;
4769 * we can tell if we're getting the new or old behavior from the UPL
4771 if ((upl
= ap
->a_pl
) == NULL
) {
4776 * we're in control of any UPL we commit
4777 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4779 a_flags
&= ~UPL_NOCOMMIT
;
4783 * For V2 semantics, we want to take the cnode truncate lock
4784 * shared to guard against the file size changing via zero-filling.
4786 * However, we have to be careful because we may be invoked
4787 * via the ubc_msync path to write out dirty mmap'd pages
4788 * in response to a lock event on a content-protected
4789 * filesystem (e.g. to write out class A files).
4790 * As a result, we want to take the truncate lock 'SHARED' with
4791 * the mini-recursion locktype so that we don't deadlock/panic
4792 * because we may be already holding the truncate lock exclusive to force any other
4793 * IOs to have blocked behind us.
4795 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4797 if (a_flags
& UPL_MSYNC
) {
4798 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
4801 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
4804 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
4806 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4812 * from this point forward upl points at the UPL we're working with
4813 * it was either passed in or we succesfully created it
4817 * Figure out where the file ends, for pageout purposes. If
4818 * ff_new_size > ff_size, then we're in the middle of extending the
4819 * file via a write, so it is safe (and necessary) that we be able
4820 * to pageout up to that point.
4822 filesize
= fp
->ff_size
;
4823 if (fp
->ff_new_size
> filesize
)
4824 filesize
= fp
->ff_new_size
;
4827 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4828 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4829 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4830 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4831 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4832 * lock in HFS so that we don't lock invert ourselves.
4834 * Note that we can still get into this function on behalf of the default pager with
4835 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4836 * since fsync and other writing threads will grab the locks, then mark the
4837 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4838 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4839 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4840 * by the paging/VM system.
4852 f_offset
= ap
->a_f_offset
;
4855 * Scan from the back to find the last page in the UPL, so that we
4856 * aren't looking at a UPL that may have already been freed by the
4857 * preceding aborts/completions.
4859 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4860 if (upl_page_present(pl
, --pg_index
))
4862 if (pg_index
== 0) {
4863 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4869 * initialize the offset variables before we touch the UPL.
4870 * a_f_offset is the position into the file, in bytes
4871 * offset is the position into the UPL, in bytes
4872 * pg_index is the pg# of the UPL we're operating on.
4873 * isize is the offset into the UPL of the last non-clean page.
4875 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4884 if ( !upl_page_present(pl
, pg_index
)) {
4886 * we asked for RET_ONLY_DIRTY, so it's possible
4887 * to get back empty slots in the UPL.
4888 * just skip over them
4890 f_offset
+= PAGE_SIZE
;
4891 offset
+= PAGE_SIZE
;
4897 if ( !upl_dirty_page(pl
, pg_index
)) {
4898 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
4902 * We know that we have at least one dirty page.
4903 * Now checking to see how many in a row we have
4906 xsize
= isize
- PAGE_SIZE
;
4909 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
4914 xsize
= num_of_pages
* PAGE_SIZE
;
4916 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
4917 xsize
, filesize
, a_flags
))) {
4924 pg_index
+= num_of_pages
;
4926 /* capture errnos bubbled out of cluster_pageout if they occurred */
4927 if (error_ret
!= 0) {
4930 } /* end block for v2 pageout behavior */
4933 * just call cluster_pageout for old pre-v2 behavior
4935 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
4936 ap
->a_size
, filesize
, a_flags
);
4940 * If data was written, update the modification time of the file
4941 * but only if it's mapped writable; we will have touched the
4942 * modifcation time for direct writes.
4944 if (retval
== 0 && (ubc_is_mapped_writable(vp
)
4945 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
))) {
4946 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4948 // Check again with lock
4949 bool mapped_writable
= ubc_is_mapped_writable(vp
);
4951 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
)) {
4952 cp
->c_touch_modtime
= TRUE
;
4953 cp
->c_touch_chgtime
= TRUE
;
4956 * We only need to increment the generation counter if
4957 * it's currently mapped writable because we incremented
4958 * the counter in hfs_vnop_mnomap.
4960 if (mapped_writable
)
4961 hfs_incr_gencount(VTOC(vp
));
4964 * If setuid or setgid bits are set and this process is
4965 * not the superuser then clear the setuid and setgid bits
4966 * as a precaution against tampering.
4968 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
4969 (vfs_context_suser(ap
->a_context
) != 0)) {
4970 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
4980 * Release the truncate lock. Note that because
4981 * we may have taken the lock recursively by
4982 * being invoked via ubc_msync due to lockdown,
4983 * we should release it recursively, too.
4985 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4991 * Intercept B-Tree node writes to unswap them if necessary.
4994 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
4997 register struct buf
*bp
= ap
->a_bp
;
4998 register struct vnode
*vp
= buf_vnode(bp
);
4999 BlockDescriptor block
;
5001 /* Trap B-Tree writes */
5002 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
5003 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
5004 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
5005 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
5008 * Swap and validate the node if it is in native byte order.
5009 * This is always be true on big endian, so we always validate
5010 * before writing here. On little endian, the node typically has
5011 * been swapped and validated when it was written to the journal,
5012 * so we won't do anything here.
5014 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
5015 /* Prepare the block pointer */
5016 block
.blockHeader
= bp
;
5017 block
.buffer
= (char *)buf_dataptr(bp
);
5018 block
.blockNum
= buf_lblkno(bp
);
5019 /* not found in cache ==> came from disk */
5020 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
5021 block
.blockSize
= buf_count(bp
);
5023 /* Endian un-swap B-Tree node */
5024 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
5026 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5030 /* This buffer shouldn't be locked anymore but if it is clear it */
5031 if ((buf_flags(bp
) & B_LOCKED
)) {
5033 if (VTOHFS(vp
)->jnl
) {
5034 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
5036 buf_clearflags(bp
, B_LOCKED
);
5038 retval
= vn_bwrite (ap
);
5045 hfs_pin_block_range(struct hfsmount
*hfsmp
, int pin_state
, uint32_t start_block
, uint32_t nblocks
)
5051 memset(&pin
, 0, sizeof(pin
));
5052 pin
.cp_extent
.offset
= ((uint64_t)start_block
) * HFSTOVCB(hfsmp
)->blockSize
;
5053 pin
.cp_extent
.length
= ((uint64_t)nblocks
) * HFSTOVCB(hfsmp
)->blockSize
;
5054 switch (pin_state
) {
5056 ioc
= _DKIOCCSPINEXTENT
;
5057 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
;
5059 case HFS_PIN_IT
| HFS_TEMP_PIN
:
5060 ioc
= _DKIOCCSPINEXTENT
;
5061 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSTEMPORARYPIN
;
5063 case HFS_PIN_IT
| HFS_DATALESS_PIN
:
5064 ioc
= _DKIOCCSPINEXTENT
;
5065 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSPINFORSWAPFILE
;
5068 ioc
= _DKIOCCSUNPINEXTENT
;
5071 case HFS_UNPIN_IT
| HFS_EVICT_PIN
:
5072 ioc
= _DKIOCCSPINEXTENT
;
5073 pin
.cp_flags
= _DKIOCCSPINTOSLOWMEDIA
;
5078 err
= VNOP_IOCTL(hfsmp
->hfs_devvp
, ioc
, (caddr_t
)&pin
, 0, vfs_context_kernel());
5083 // The cnode lock should already be held on entry to this function
5086 hfs_pin_vnode(struct hfsmount
*hfsmp
, struct vnode
*vp
, int pin_state
, uint32_t *num_blocks_pinned
)
5088 struct filefork
*fp
= VTOF(vp
);
5089 int i
, err
=0, need_put
=0;
5090 struct vnode
*rsrc_vp
=NULL
;
5091 uint32_t npinned
= 0;
5094 if (num_blocks_pinned
) {
5095 *num_blocks_pinned
= 0;
5098 if (vnode_vtype(vp
) != VREG
) {
5099 /* Not allowed to pin directories or symlinks */
5100 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp
));
5104 if (fp
->ff_unallocblocks
) {
5105 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp
->ff_unallocblocks
);
5110 * It is possible that if the caller unlocked/re-locked the cnode after checking
5111 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5112 * cnode was unlocked. So check the condition again and return ENOENT so that
5113 * the caller knows why we failed to pin the vnode.
5115 if (VTOC(vp
)->c_flag
& (C_NOEXISTS
|C_DELETED
)) {
5116 // makes no sense to pin something that's pending deletion
5120 if (fp
->ff_blocks
== 0 && (VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
5121 if (!VNODE_IS_RSRC(vp
) && hfs_vgetrsrc(hfsmp
, vp
, &rsrc_vp
) == 0) {
5122 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5123 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5125 fp
= VTOC(rsrc_vp
)->c_rsrcfork
;
5129 if (fp
->ff_blocks
== 0) {
5132 // use a distinct error code for a compressed file that has no resource fork;
5133 // we return EALREADY to indicate that the data is already probably hot file
5134 // cached because it's in an EA and the attributes btree is on the ssd
5144 for (i
= 0; i
< kHFSPlusExtentDensity
; i
++) {
5145 if (fp
->ff_extents
[i
].startBlock
== 0) {
5149 err
= hfs_pin_block_range(hfsmp
, pin_state
, fp
->ff_extents
[i
].startBlock
, fp
->ff_extents
[i
].blockCount
);
5153 npinned
+= fp
->ff_extents
[i
].blockCount
;
5157 if (err
|| npinned
== 0) {
5161 if (fp
->ff_extents
[kHFSPlusExtentDensity
-1].startBlock
) {
5163 uint8_t forktype
= 0;
5165 if (fp
== VTOC(vp
)->c_rsrcfork
) {
5169 * The file could have overflow extents, better pin them.
5171 * We assume that since we are holding the cnode lock for this cnode,
5172 * the files extents cannot be manipulated, but the tree could, so we
5173 * need to ensure that it doesn't change behind our back as we iterate it.
5175 int lockflags
= hfs_systemfile_lock (hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
5176 err
= hfs_pin_overflow_extents(hfsmp
, VTOC(vp
)->c_fileid
, forktype
, &pblocks
);
5177 hfs_systemfile_unlock (hfsmp
, lockflags
);
5186 if (num_blocks_pinned
) {
5187 *num_blocks_pinned
= npinned
;
5190 if (need_put
&& rsrc_vp
) {
5192 // have to unlock the cnode since it's shared between the
5193 // resource fork vnode and the data fork vnode (and the
5194 // vnode_put() may need to re-acquire the cnode lock to
5195 // reclaim the resource fork vnode)
5197 hfs_unlock(VTOC(vp
));
5199 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5206 * Relocate a file to a new location on disk
5207 * cnode must be locked on entry
5209 * Relocation occurs by cloning the file's data from its
5210 * current set of blocks to a new set of blocks. During
5211 * the relocation all of the blocks (old and new) are
5212 * owned by the file.
5219 * ----------------- -----------------
5220 * |///////////////| | | STEP 1 (acquire new blocks)
5221 * ----------------- -----------------
5224 * ----------------- -----------------
5225 * |///////////////| |///////////////| STEP 2 (clone data)
5226 * ----------------- -----------------
5230 * |///////////////| STEP 3 (head truncate blocks)
5234 * During steps 2 and 3 page-outs to file offsets less
5235 * than or equal to N are suspended.
5237 * During step 3 page-ins to the file get suspended.
5240 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
5244 struct filefork
*fp
;
5245 struct hfsmount
*hfsmp
;
5250 u_int32_t nextallocsave
;
5251 daddr64_t sector_a
, sector_b
;
5256 int took_trunc_lock
= 0;
5258 enum vtype vnodetype
;
5260 vnodetype
= vnode_vtype(vp
);
5261 if (vnodetype
!= VREG
) {
5262 /* Not allowed to move symlinks. */
5267 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
5273 if (fp
->ff_unallocblocks
)
5278 * <rdar://problem/9118426>
5279 * Disable HFS file relocation on content-protected filesystems
5281 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
5285 /* If it's an SSD, also disable HFS relocation */
5286 if (hfsmp
->hfs_flags
& HFS_SSD
) {
5291 blksize
= hfsmp
->blockSize
;
5293 blockHint
= hfsmp
->nextAllocation
;
5295 if (fp
->ff_size
> 0x7fffffff) {
5299 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
5301 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
5302 /* Force lock since callers expects lock to be held. */
5303 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
))) {
5304 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5307 /* No need to continue if file was removed. */
5308 if (cp
->c_flag
& C_NOEXISTS
) {
5309 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5312 took_trunc_lock
= 1;
5314 headblks
= fp
->ff_blocks
;
5315 datablks
= howmany(fp
->ff_size
, blksize
);
5316 growsize
= datablks
* blksize
;
5317 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
5318 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
5319 blockHint
<= hfsmp
->hfs_metazone_end
)
5320 eflags
|= kEFMetadataMask
;
5322 if (hfs_start_transaction(hfsmp
) != 0) {
5323 if (took_trunc_lock
)
5324 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5329 * Protect the extents b-tree and the allocation bitmap
5330 * during MapFileBlockC and ExtendFileC operations.
5332 lockflags
= SFL_BITMAP
;
5333 if (overflow_extents(fp
))
5334 lockflags
|= SFL_EXTENTS
;
5335 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5337 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
5339 retval
= MacToVFSError(retval
);
5344 * STEP 1 - acquire new allocation blocks.
5346 nextallocsave
= hfsmp
->nextAllocation
;
5347 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
5348 if (eflags
& kEFMetadataMask
) {
5349 hfs_lock_mount(hfsmp
);
5350 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
5351 MarkVCBDirty(hfsmp
);
5352 hfs_unlock_mount(hfsmp
);
5355 retval
= MacToVFSError(retval
);
5357 cp
->c_flag
|= C_MODIFIED
;
5358 if (newbytes
< growsize
) {
5361 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
5362 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp
->c_cnid
, hfsmp
->vcbVN
);
5367 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
5369 retval
= MacToVFSError(retval
);
5370 } else if ((sector_a
+ 1) == sector_b
) {
5373 } else if ((eflags
& kEFMetadataMask
) &&
5374 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
5375 hfsmp
->hfs_metazone_end
)) {
5377 const char * filestr
;
5378 char emptystr
= '\0';
5380 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
5381 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
5382 } else if (vnode_name(vp
) != NULL
) {
5383 filestr
= vnode_name(vp
);
5385 filestr
= &emptystr
;
5392 /* Done with system locks and journal for now. */
5393 hfs_systemfile_unlock(hfsmp
, lockflags
);
5395 hfs_end_transaction(hfsmp
);
5400 * Check to see if failure is due to excessive fragmentation.
5402 if ((retval
== ENOSPC
) &&
5403 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
5404 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
5409 * STEP 2 - clone file data into the new allocation blocks.
5412 if (vnodetype
== VLNK
)
5414 else if (vnode_issystem(vp
))
5415 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
5417 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
5419 /* Start transaction for step 3 or for a restore. */
5420 if (hfs_start_transaction(hfsmp
) != 0) {
5429 * STEP 3 - switch to cloned data and remove old blocks.
5431 lockflags
= SFL_BITMAP
;
5432 if (overflow_extents(fp
))
5433 lockflags
|= SFL_EXTENTS
;
5434 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5436 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
5438 hfs_systemfile_unlock(hfsmp
, lockflags
);
5443 if (took_trunc_lock
)
5444 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5447 hfs_systemfile_unlock(hfsmp
, lockflags
);
5451 /* Push cnode's new extent data to disk. */
5456 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
5457 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
| HFS_FVH_WRITE_ALT
);
5459 (void) hfs_flushvolumeheader(hfsmp
, 0);
5463 hfs_end_transaction(hfsmp
);
5468 if (fp
->ff_blocks
== headblks
) {
5469 if (took_trunc_lock
)
5470 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5474 * Give back any newly allocated space.
5476 if (lockflags
== 0) {
5477 lockflags
= SFL_BITMAP
;
5478 if (overflow_extents(fp
))
5479 lockflags
|= SFL_EXTENTS
;
5480 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5483 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
5484 FTOC(fp
)->c_fileid
, false);
5486 hfs_systemfile_unlock(hfsmp
, lockflags
);
5489 if (took_trunc_lock
)
5490 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5496 * Clone a file's data within the file.
5500 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
5511 writebase
= blkstart
* blksize
;
5512 copysize
= blkcnt
* blksize
;
5513 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
5516 hfs_unlock(VTOC(vp
));
5519 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
5520 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5523 #endif /* CONFIG_PROTECT */
5525 bufp
= hfs_malloc(bufsize
);
5527 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
5529 while (offset
< copysize
) {
5530 iosize
= MIN(copysize
- offset
, iosize
);
5532 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
5533 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5535 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
5537 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
5540 if (uio_resid(auio
) != 0) {
5541 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio
));
5546 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
5547 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5549 error
= cluster_write(vp
, auio
, writebase
+ offset
,
5550 writebase
+ offset
+ iosize
,
5551 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
5553 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
5556 if (uio_resid(auio
) != 0) {
5557 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5565 if ((blksize
& PAGE_MASK
)) {
5567 * since the copy may not have started on a PAGE
5568 * boundary (or may not have ended on one), we
5569 * may have pages left in the cache since NOCACHE
5570 * will let partially written pages linger...
5571 * lets just flush the entire range to make sure
5572 * we don't have any pages left that are beyond
5573 * (or intersect) the real LEOF of this file
5575 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
5578 * No need to call ubc_msync or hfs_invalbuf
5579 * since the file was copied using IO_NOCACHE and
5580 * the copy was done starting and ending on a page
5581 * boundary in the file.
5584 hfs_free(bufp
, bufsize
);
5586 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5591 * Clone a system (metadata) file.
5595 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
5596 kauth_cred_t cred
, struct proc
*p
)
5602 struct buf
*bp
= NULL
;
5605 daddr64_t start_blk
;
5612 iosize
= GetLogicalBlockSize(vp
);
5613 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
5614 breadcnt
= bufsize
/ iosize
;
5616 bufp
= hfs_malloc(bufsize
);
5618 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
5619 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
5622 while (blkno
< last_blk
) {
5624 * Read up to a megabyte
5627 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
5628 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
5630 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
5633 if (buf_count(bp
) != iosize
) {
5634 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
5637 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
5639 buf_markinvalid(bp
);
5647 * Write up to a megabyte
5650 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
5651 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
5653 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
5657 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
5658 error
= (int)buf_bwrite(bp
);
5670 hfs_free(bufp
, bufsize
);
5672 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);
5677 errno_t
hfs_flush_invalid_ranges(vnode_t vp
)
5679 cnode_t
*cp
= VTOC(vp
);
5681 hfs_assert(cp
->c_lockowner
== current_thread());
5682 hfs_assert(cp
->c_truncatelockowner
== current_thread());
5684 if (!ISSET(cp
->c_flag
, C_ZFWANTSYNC
) && !cp
->c_zftimeout
)
5687 filefork_t
*fp
= VTOF(vp
);
5690 * We can't hold the cnode lock whilst we call cluster_write so we
5691 * need to copy the extents into a local buffer.
5696 } exts_buf
[max_exts
]; // 256 bytes
5697 struct ext
*exts
= exts_buf
;
5701 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
5704 /* If we have more than can fit in our stack buffer, switch
5705 to a heap buffer. */
5706 if (exts
== exts_buf
&& ext_count
== max_exts
) {
5708 exts
= hfs_malloc(sizeof(struct ext
) * max_exts
);
5709 memcpy(exts
, exts_buf
, ext_count
* sizeof(struct ext
));
5712 struct rl_entry
*next
= TAILQ_NEXT(r
, rl_link
);
5714 exts
[ext_count
++] = (struct ext
){ r
->rl_start
, r
->rl_end
};
5716 if (!next
|| (ext_count
== max_exts
&& exts
!= exts_buf
)) {
5718 for (int i
= 0; i
< ext_count
; ++i
) {
5719 ret
= cluster_write(vp
, NULL
, fp
->ff_size
, exts
[i
].end
+ 1,
5721 IO_HEADZEROFILL
| IO_NOZERODIRTY
| IO_NOCACHE
);
5723 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5729 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5733 /* Push any existing clusters which should clean up our invalid
5734 ranges as they go through hfs_vnop_blockmap. */
5735 cluster_push(vp
, 0);
5737 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5740 * Get back to where we were (given we dropped the lock).
5741 * This shouldn't be many because we pushed above.
5743 TAILQ_FOREACH(r
, &fp
->ff_invalidranges
, rl_link
) {
5744 if (r
->rl_end
> exts
[ext_count
- 1].end
)
5757 if (exts
!= exts_buf
)
5758 hfs_free(exts
, sizeof(struct ext
) * max_exts
);