2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
46 #include <sys/vfs_context.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
51 #include <sys/fsevents.h>
53 #include <libkern/OSDebug.h>
55 #include <miscfs/specfs/specdev.h>
59 #include <vm/vm_pageout.h>
60 #include <vm/vm_kern.h>
62 #include <IOKit/IOBSD.h>
64 #include <sys/kdebug.h>
67 #include "hfs_attrlist.h"
68 #include "hfs_endian.h"
69 #include "hfs_fsctl.h"
70 #include "hfs_quota.h"
71 #include "FileMgrInternal.h"
72 #include "BTreesInternal.h"
73 #include "hfs_cnode.h"
76 #if HFS_CONFIG_KEY_ROLL
77 #include "hfs_key_roll.h"
80 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
83 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
86 /* from bsd/hfs/hfs_vfsops.c */
87 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
89 /* from hfs_hotfiles.c */
90 extern int hfs_pin_overflow_extents (struct hfsmount
*hfsmp
, uint32_t fileid
,
91 uint8_t forktype
, uint32_t *pinned
);
93 static int hfs_clonefile(struct vnode
*, int, int, int);
94 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
95 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
99 * Read data from a file.
102 hfs_vnop_read(struct vnop_read_args
*ap
)
105 struct vnop_read_args {
106 struct vnodeop_desc *a_desc;
110 vfs_context_t a_context;
114 uio_t uio
= ap
->a_uio
;
115 struct vnode
*vp
= ap
->a_vp
;
118 struct hfsmount
*hfsmp
;
121 off_t start_resid
= uio_resid(uio
);
122 off_t offset
= uio_offset(uio
);
124 int took_truncate_lock
= 0;
126 int throttled_count
= 0;
128 /* Preflight checks */
129 if (!vnode_isreg(vp
)) {
130 /* can only read regular files */
136 if (start_resid
== 0)
137 return (0); /* Nothing left to do */
139 return (EINVAL
); /* cant read from a negative offset */
142 if ((ap
->a_ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
143 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
144 /* Don't allow unencrypted io request from user space */
150 if (VNODE_IS_RSRC(vp
)) {
151 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
154 /* otherwise read the resource fork normally */
156 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
158 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
159 if (retval
== 0 && !(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
160 (void) hfs_addhotfile(vp
);
164 /* successful read, update the access time */
165 VTOC(vp
)->c_touch_acctime
= TRUE
;
168 // compressed files are not traditional hot file candidates
169 // but they may be for CF (which ignores the ff_bytesread
172 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
173 VTOF(vp
)->ff_bytesread
= 0;
178 /* otherwise the file was converted back to a regular file while we were reading it */
180 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
183 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
190 #endif /* HFS_COMPRESSION */
197 if ((retval
= cp_handle_vnop (vp
, CP_READ_ACCESS
, ap
->a_ioflag
)) != 0) {
201 #if HFS_CONFIG_KEY_ROLL
202 if (ISSET(ap
->a_ioflag
, IO_ENCRYPTED
)) {
203 off_rsrc_t off_rsrc
= off_rsrc_make(offset
+ start_resid
,
206 retval
= hfs_key_roll_up_to(ap
->a_context
, vp
, off_rsrc
);
210 #endif // HFS_CONFIG_KEY_ROLL
211 #endif // CONFIG_PROTECT
214 * If this read request originated from a syscall (as opposed to
215 * an in-kernel page fault or something), then set it up for
218 if (ap
->a_ioflag
& IO_SYSCALL_DISPATCH
) {
219 io_throttle
= IO_RETURN_ON_THROTTLE
;
224 /* Protect against a size change. */
225 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
226 took_truncate_lock
= 1;
228 filesize
= fp
->ff_size
;
229 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
232 * Check the file size. Note that per POSIX spec, we return 0 at
233 * file EOF, so attempting a read at an offset that is too big
234 * should just return 0 on HFS+. Since the return value was initialized
235 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
237 if (offset
> filesize
) {
239 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
240 (offset
> (off_t
)MAXHFSFILESIZE
)) {
247 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_START
,
248 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
250 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
|io_throttle
);
252 cp
->c_touch_acctime
= TRUE
;
254 KERNEL_DEBUG(HFSDBG_READ
| DBG_FUNC_END
,
255 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
258 * Keep track blocks read
260 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
261 int took_cnode_lock
= 0;
264 bytesread
= start_resid
- uio_resid(uio
);
266 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
267 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
268 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
272 * If this file hasn't been seen since the start of
273 * the current sampling period then start over.
275 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
278 fp
->ff_bytesread
= bytesread
;
280 cp
->c_atime
= tv
.tv_sec
;
282 fp
->ff_bytesread
+= bytesread
;
285 if (!(ap
->a_ioflag
& IO_EVTONLY
) && vnode_isfastdevicecandidate(vp
)) {
287 // We don't add hotfiles for processes doing IO_EVTONLY I/O
288 // on the assumption that they're system processes such as
289 // mdworker which scan everything in the system (and thus
290 // do not represent user-initiated access to files)
292 (void) hfs_addhotfile(vp
);
298 if (took_truncate_lock
) {
299 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
301 if (retval
== EAGAIN
) {
302 throttle_lowpri_io(1);
309 throttle_info_reset_window(NULL
);
314 * Ideally, this wouldn't be necessary; the cluster code should be
315 * able to handle this on the read-side. See <rdar://20420068>.
317 static errno_t
hfs_zero_eof_page(vnode_t vp
, off_t zero_up_to
)
319 hfs_assert(VTOC(vp
)->c_lockowner
!= current_thread());
320 hfs_assert(VTOC(vp
)->c_truncatelockowner
== current_thread());
322 struct filefork
*fp
= VTOF(vp
);
324 if (!(fp
->ff_size
& PAGE_MASK_64
) || zero_up_to
<= fp
->ff_size
) {
329 zero_up_to
= MIN(zero_up_to
, (off_t
)round_page_64(fp
->ff_size
));
331 /* N.B. At present, @zero_up_to is not important because the cluster
332 code will always zero up to the end of the page anyway. */
333 return cluster_write(vp
, NULL
, fp
->ff_size
, zero_up_to
,
334 fp
->ff_size
, 0, IO_HEADZEROFILL
);
338 * Write data to a file.
341 hfs_vnop_write(struct vnop_write_args
*ap
)
343 uio_t uio
= ap
->a_uio
;
344 struct vnode
*vp
= ap
->a_vp
;
347 struct hfsmount
*hfsmp
;
348 kauth_cred_t cred
= NULL
;
351 off_t bytesToAdd
= 0;
352 off_t actualBytesAdded
;
357 int ioflag
= ap
->a_ioflag
;
360 int cnode_locked
= 0;
361 int partialwrite
= 0;
363 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
364 int took_truncate_lock
= 0;
365 int io_return_on_throttle
= 0;
366 int throttled_count
= 0;
369 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
370 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
372 case FILE_IS_COMPRESSED
:
374 case FILE_IS_CONVERTING
:
375 /* if FILE_IS_CONVERTING, we allow writes but do not
376 bother with snapshots or else we will deadlock.
381 printf("invalid state %d for compressed file\n", state
);
384 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
387 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
394 nspace_snapshot_event(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
400 if ((ioflag
& (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) ==
401 (IO_SKIP_ENCRYPTION
|IO_SYSCALL_DISPATCH
)) {
402 /* Don't allow unencrypted io request from user space */
407 resid
= uio_resid(uio
);
408 offset
= uio_offset(uio
);
414 if (!vnode_isreg(vp
))
415 return (EPERM
); /* Can only write regular files */
422 if ((retval
= cp_handle_vnop (vp
, CP_WRITE_ACCESS
, 0)) != 0) {
427 eflags
= kEFDeferMask
; /* defer file block allocations */
430 * When the underlying device is sparse and space
431 * is low (< 8MB), stop doing delayed allocations
432 * and begin doing synchronous I/O.
434 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
435 (hfs_freeblks(hfsmp
, 0) < 2048)) {
436 eflags
&= ~kEFDeferMask
;
439 #endif /* HFS_SPARSE_DEV */
441 if ((ioflag
& (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) ==
442 (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) {
443 io_return_on_throttle
= IO_RETURN_ON_THROTTLE
;
448 * Protect against a size change.
450 * Note: If took_truncate_lock is true, then we previously got the lock shared
451 * but needed to upgrade to exclusive. So try getting it exclusive from the
454 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
455 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
458 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
460 took_truncate_lock
= 1;
463 if (ioflag
& IO_APPEND
) {
464 uio_setoffset(uio
, fp
->ff_size
);
465 offset
= fp
->ff_size
;
467 if ((cp
->c_bsdflags
& APPEND
) && offset
!= fp
->ff_size
) {
472 cred
= vfs_context_ucred(ap
->a_context
);
473 if (cred
&& suser(cred
, NULL
) != 0)
474 eflags
|= kEFReserveMask
;
476 origFileSize
= fp
->ff_size
;
477 writelimit
= offset
+ resid
;
480 * We may need an exclusive truncate lock for several reasons, all
481 * of which are because we may be writing to a (portion of a) block
482 * for the first time, and we need to make sure no readers see the
483 * prior, uninitialized contents of the block. The cases are:
485 * 1. We have unallocated (delayed allocation) blocks. We may be
486 * allocating new blocks to the file and writing to them.
487 * (A more precise check would be whether the range we're writing
488 * to contains delayed allocation blocks.)
489 * 2. We need to extend the file. The bytes between the old EOF
490 * and the new EOF are not yet initialized. This is important
491 * even if we're not allocating new blocks to the file. If the
492 * old EOF and new EOF are in the same block, we still need to
493 * protect that range of bytes until they are written for the
496 * If we had a shared lock with the above cases, we need to try to upgrade
497 * to an exclusive lock. If the upgrade fails, we will lose the shared
498 * lock, and will need to take the truncate lock again; the took_truncate_lock
499 * flag will still be set, causing us to try for an exclusive lock next time.
501 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
502 ((fp
->ff_unallocblocks
!= 0) ||
503 (writelimit
> origFileSize
))) {
504 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
506 * Lock upgrade failed and we lost our shared lock, try again.
507 * Note: we do not set took_truncate_lock=0 here. Leaving it
508 * set to 1 will cause us to try to get the lock exclusive.
513 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
514 cp
->c_truncatelockowner
= current_thread();
518 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
523 filebytes
= hfs_blk_to_bytes(fp
->ff_blocks
, hfsmp
->blockSize
);
525 if (offset
> filebytes
526 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp
, ISSET(eflags
, kEFReserveMask
)),
527 hfsmp
->blockSize
) < offset
- filebytes
)) {
532 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_START
,
533 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
536 /* Check if we do not need to extend the file */
537 if (writelimit
<= filebytes
) {
541 bytesToAdd
= writelimit
- filebytes
;
544 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
550 if (hfs_start_transaction(hfsmp
) != 0) {
555 while (writelimit
> filebytes
) {
556 bytesToAdd
= writelimit
- filebytes
;
558 /* Protect extents b-tree and allocation bitmap */
559 lockflags
= SFL_BITMAP
;
560 if (overflow_extents(fp
))
561 lockflags
|= SFL_EXTENTS
;
562 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
564 /* Files that are changing size are not hot file candidates. */
565 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
566 fp
->ff_bytesread
= 0;
568 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
569 0, eflags
, &actualBytesAdded
));
571 hfs_systemfile_unlock(hfsmp
, lockflags
);
573 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
575 if (retval
!= E_NONE
)
577 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
578 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_NONE
,
579 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
581 (void) hfs_update(vp
, 0);
582 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
583 (void) hfs_end_transaction(hfsmp
);
586 * If we didn't grow the file enough try a partial write.
587 * POSIX expects this behavior.
589 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
592 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
594 writelimit
= filebytes
;
597 if (retval
== E_NONE
) {
602 if (writelimit
> fp
->ff_size
) {
603 filesize
= writelimit
;
605 rl_add(fp
->ff_size
, writelimit
- 1 , &fp
->ff_invalidranges
);
607 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
609 filesize
= fp
->ff_size
;
611 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
614 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
615 * for one case below). For the regions that lie before the
616 * beginning and after the end of this write that are in the
617 * same page, we let the cluster code handle zeroing that out
618 * if necessary. If those areas are not cached, the cluster
619 * code will try and read those areas in, and in the case
620 * where those regions have never been written to,
621 * hfs_vnop_blockmap will consult the invalid ranges and then
622 * indicate that. The cluster code will zero out those areas.
625 head_off
= trunc_page_64(offset
);
627 if (head_off
< offset
&& head_off
>= fp
->ff_size
) {
629 * The first page is beyond current EOF, so as an
630 * optimisation, we can pass IO_HEADZEROFILL.
632 lflag
|= IO_HEADZEROFILL
;
639 * We need to tell UBC the fork's new size BEFORE calling
640 * cluster_write, in case any of the new pages need to be
641 * paged out before cluster_write completes (which does happen
642 * in embedded systems due to extreme memory pressure).
643 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
644 * will be, so that it can pass that on to cluster_pageout, and
645 * allow those pageouts.
647 * We don't update ff_size yet since we don't want pageins to
648 * be able to see uninitialized data between the old and new
649 * EOF, until cluster_write has completed and initialized that
652 * The vnode pager relies on the file size last given to UBC via
653 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
654 * ff_size (whichever is larger). NOTE: ff_new_size is always
655 * zero, unless we are extending the file via write.
657 if (filesize
> fp
->ff_size
) {
658 retval
= hfs_zero_eof_page(vp
, offset
);
661 fp
->ff_new_size
= filesize
;
662 ubc_setsize(vp
, filesize
);
664 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, head_off
,
665 0, lflag
| IO_NOZERODIRTY
| io_return_on_throttle
);
667 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
669 if (retval
== EAGAIN
) {
671 * EAGAIN indicates that we still have I/O to do, but
672 * that we now need to be throttled
674 if (resid
!= uio_resid(uio
)) {
676 * did manage to do some I/O before returning EAGAIN
678 resid
= uio_resid(uio
);
679 offset
= uio_offset(uio
);
681 cp
->c_touch_chgtime
= TRUE
;
682 cp
->c_touch_modtime
= TRUE
;
683 hfs_incr_gencount(cp
);
685 if (filesize
> fp
->ff_size
) {
687 * we called ubc_setsize before the call to
688 * cluster_write... since we only partially
689 * completed the I/O, we need to
690 * re-adjust our idea of the filesize based
693 ubc_setsize(vp
, offset
);
695 fp
->ff_size
= offset
;
699 if (filesize
> origFileSize
) {
700 ubc_setsize(vp
, origFileSize
);
705 if (filesize
> origFileSize
) {
706 fp
->ff_size
= filesize
;
708 /* Files that are changing size are not hot file candidates. */
709 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
710 fp
->ff_bytesread
= 0;
713 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
716 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
720 if (vnode_should_flush_after_write(vp
, ioflag
))
721 hfs_flush(hfsmp
, HFS_FLUSH_CACHE
);
725 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
729 if (resid
> uio_resid(uio
)) {
730 cp
->c_touch_chgtime
= TRUE
;
731 cp
->c_touch_modtime
= TRUE
;
732 hfs_incr_gencount(cp
);
735 * If we successfully wrote any data, and we are not the superuser
736 * we clear the setuid and setgid bits as a precaution against
739 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
740 cred
= vfs_context_ucred(ap
->a_context
);
741 if (cred
&& suser(cred
, NULL
)) {
742 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
747 if (ioflag
& IO_UNIT
) {
748 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
750 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
751 uio_setresid(uio
, resid
);
752 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
754 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
)))
755 retval
= hfs_update(vp
, 0);
757 /* Updating vcbWrCnt doesn't need to be atomic. */
760 KERNEL_DEBUG(HFSDBG_WRITE
| DBG_FUNC_END
,
761 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
763 if (retval
&& took_truncate_lock
764 && cp
->c_truncatelockowner
== current_thread()) {
766 rl_remove(fp
->ff_size
, RL_INFINITY
, &fp
->ff_invalidranges
);
772 if (took_truncate_lock
) {
773 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
775 if (retval
== EAGAIN
) {
776 throttle_lowpri_io(1);
783 throttle_info_reset_window(NULL
);
787 /* support for the "bulk-access" fcntl */
789 #define CACHE_LEVELS 16
790 #define NUM_CACHE_ENTRIES (64*16)
791 #define PARENT_IDS_FLAG 0x100
793 struct access_cache
{
795 int cachehits
; /* these two for statistics gathering */
797 unsigned int *acache
;
798 unsigned char *haveaccess
;
802 uid_t uid
; /* IN: effective user id */
803 short flags
; /* IN: access requested (i.e. R_OK) */
804 short num_groups
; /* IN: number of groups user belongs to */
805 int num_files
; /* IN: number of files to process */
806 int *file_ids
; /* IN: array of file ids */
807 gid_t
*groups
; /* IN: array of groups */
808 short *access
; /* OUT: access info for each file (0 for 'has access') */
809 } __attribute__((unavailable
)); // this structure is for reference purposes only
811 struct user32_access_t
{
812 uid_t uid
; /* IN: effective user id */
813 short flags
; /* IN: access requested (i.e. R_OK) */
814 short num_groups
; /* IN: number of groups user belongs to */
815 int num_files
; /* IN: number of files to process */
816 user32_addr_t file_ids
; /* IN: array of file ids */
817 user32_addr_t groups
; /* IN: array of groups */
818 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
821 struct user64_access_t
{
822 uid_t uid
; /* IN: effective user id */
823 short flags
; /* IN: access requested (i.e. R_OK) */
824 short num_groups
; /* IN: number of groups user belongs to */
825 int num_files
; /* IN: number of files to process */
826 user64_addr_t file_ids
; /* IN: array of file ids */
827 user64_addr_t groups
; /* IN: array of groups */
828 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
832 // these are the "extended" versions of the above structures
833 // note that it is crucial that they be different sized than
834 // the regular version
835 struct ext_access_t
{
836 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
837 uint32_t num_files
; /* IN: number of files to process */
838 uint32_t map_size
; /* IN: size of the bit map */
839 uint32_t *file_ids
; /* IN: Array of file ids */
840 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
841 short *access
; /* OUT: access info for each file (0 for 'has access') */
842 uint32_t num_parents
; /* future use */
843 cnid_t
*parents
; /* future use */
844 } __attribute__((unavailable
)); // this structure is for reference purposes only
846 struct user32_ext_access_t
{
847 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
848 uint32_t num_files
; /* IN: number of files to process */
849 uint32_t map_size
; /* IN: size of the bit map */
850 user32_addr_t file_ids
; /* IN: Array of file ids */
851 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
852 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
853 uint32_t num_parents
; /* future use */
854 user32_addr_t parents
; /* future use */
857 struct user64_ext_access_t
{
858 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
859 uint32_t num_files
; /* IN: number of files to process */
860 uint32_t map_size
; /* IN: size of the bit map */
861 user64_addr_t file_ids
; /* IN: array of file ids */
862 user64_addr_t bitmap
; /* IN: array of groups */
863 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
864 uint32_t num_parents
;/* future use */
865 user64_addr_t parents
;/* future use */
870 * Perform a binary search for the given parent_id. Return value is
871 * the index if there is a match. If no_match_indexp is non-NULL it
872 * will be assigned with the index to insert the item (even if it was
875 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
881 unsigned int mid
= ((hi
- lo
)/2) + lo
;
882 unsigned int this_id
= array
[mid
];
884 if (parent_id
== this_id
) {
889 if (parent_id
< this_id
) {
894 if (parent_id
> this_id
) {
900 /* check if lo and hi converged on the match */
901 if (parent_id
== array
[hi
]) {
905 if (no_match_indexp
) {
906 *no_match_indexp
= hi
;
914 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
918 int index
, no_match_index
;
920 if (cache
->numcached
== 0) {
922 return 0; // table is empty, so insert at index=0 and report no match
925 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
926 cache
->numcached
= NUM_CACHE_ENTRIES
;
929 hi
= cache
->numcached
- 1;
931 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
933 /* if no existing entry found, find index for new one */
935 index
= no_match_index
;
946 * Add a node to the access_cache at the given index (or do a lookup first
947 * to find the index if -1 is passed in). We currently do a replace rather
948 * than an insert if the cache is full.
951 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
953 int lookup_index
= -1;
955 /* need to do a lookup first if -1 passed for index */
957 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
958 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
959 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
960 cache
->haveaccess
[lookup_index
] = access
;
963 /* mission accomplished */
966 index
= lookup_index
;
971 /* if the cache is full, do a replace rather than an insert */
972 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
973 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
975 if (index
> cache
->numcached
) {
976 index
= cache
->numcached
;
980 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
984 if (index
>= 0 && index
< cache
->numcached
) {
985 /* only do bcopy if we're inserting */
986 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
987 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
990 cache
->acache
[index
] = nodeID
;
991 cache
->haveaccess
[index
] = access
;
1005 snoop_callback(const cnode_t
*cp
, void *arg
)
1007 struct cinfo
*cip
= arg
;
1009 cip
->uid
= cp
->c_uid
;
1010 cip
->gid
= cp
->c_gid
;
1011 cip
->mode
= cp
->c_mode
;
1012 cip
->parentcnid
= cp
->c_parentcnid
;
1013 cip
->recflags
= cp
->c_attr
.ca_recflags
;
1019 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1020 * isn't incore, then go to the catalog.
1023 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
1024 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
1028 /* if this id matches the one the fsctl was called with, skip the lookup */
1029 if (cnid
== skip_cp
->c_cnid
) {
1030 cnattrp
->ca_uid
= skip_cp
->c_uid
;
1031 cnattrp
->ca_gid
= skip_cp
->c_gid
;
1032 cnattrp
->ca_mode
= skip_cp
->c_mode
;
1033 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
1034 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
1036 struct cinfo c_info
;
1038 /* otherwise, check the cnode hash incase the file/dir is incore */
1039 error
= hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
);
1041 if (error
== EACCES
) {
1044 } else if (!error
) {
1045 cnattrp
->ca_uid
= c_info
.uid
;
1046 cnattrp
->ca_gid
= c_info
.gid
;
1047 cnattrp
->ca_mode
= c_info
.mode
;
1048 cnattrp
->ca_recflags
= c_info
.recflags
;
1049 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1053 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp
)))
1054 throttle_lowpri_io(1);
1056 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1058 /* lookup this cnid in the catalog */
1059 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1061 hfs_systemfile_unlock(hfsmp
, lockflags
);
1072 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1073 * up to CACHE_LEVELS as we progress towards the root.
1076 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1077 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1078 struct vfs_context
*my_context
,
1082 uint32_t num_parents
)
1086 HFSCatalogNodeID thisNodeID
;
1087 unsigned int myPerms
;
1088 struct cat_attr cnattr
;
1089 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1092 int i
= 0, ids_to_cache
= 0;
1093 int parent_ids
[CACHE_LEVELS
];
1095 thisNodeID
= nodeID
;
1096 while (thisNodeID
>= kRootDirID
) {
1097 myResult
= 0; /* default to "no access" */
1099 /* check the cache before resorting to hitting the catalog */
1101 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1102 * to look any further after hitting cached dir */
1104 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1106 myErr
= cache
->haveaccess
[cache_index
];
1107 if (scope_index
!= -1) {
1108 if (myErr
== ESRCH
) {
1112 scope_index
= 0; // so we'll just use the cache result
1113 scope_idx_start
= ids_to_cache
;
1115 myResult
= (myErr
== 0) ? 1 : 0;
1116 goto ExitThisRoutine
;
1122 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1123 if (scope_index
== -1)
1125 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1126 scope_idx_start
= ids_to_cache
;
1130 /* remember which parents we want to cache */
1131 if (ids_to_cache
< CACHE_LEVELS
) {
1132 parent_ids
[ids_to_cache
] = thisNodeID
;
1135 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1136 if (bitmap
&& map_size
) {
1137 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1141 /* do the lookup (checks the cnode hash, then the catalog) */
1142 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1144 goto ExitThisRoutine
; /* no access */
1147 /* Root always gets access. */
1148 if (suser(myp_ucred
, NULL
) == 0) {
1149 thisNodeID
= catkey
.hfsPlus
.parentID
;
1154 // if the thing has acl's, do the full permission check
1155 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1158 /* get the vnode for this cnid */
1159 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1162 goto ExitThisRoutine
;
1165 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1167 hfs_unlock(VTOC(vp
));
1169 if (vnode_vtype(vp
) == VDIR
) {
1170 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1172 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1178 goto ExitThisRoutine
;
1182 int mode
= cnattr
.ca_mode
& S_IFMT
;
1183 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1185 if (mode
== S_IFDIR
) {
1186 flags
= R_OK
| X_OK
;
1190 if ( (myPerms
& flags
) != flags
) {
1193 goto ExitThisRoutine
; /* no access */
1196 /* up the hierarchy we go */
1197 thisNodeID
= catkey
.hfsPlus
.parentID
;
1201 /* if here, we have access to this node */
1205 if (parents
&& myErr
== 0 && scope_index
== -1) {
1214 /* cache the parent directory(ies) */
1215 for (i
= 0; i
< ids_to_cache
; i
++) {
1216 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1217 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1219 add_node(cache
, -1, parent_ids
[i
], myErr
);
1227 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1228 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1233 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1234 * happens to be in our list of file_ids, we'll note it
1235 * avoid calling hfs_chashget_nowait() on that id as that
1236 * will cause a "locking against myself" panic.
1238 Boolean check_leaf
= true;
1240 struct user64_ext_access_t
*user_access_structp
;
1241 struct user64_ext_access_t tmp_user_access
;
1242 struct access_cache cache
;
1244 int error
= 0, prev_parent_check_ok
=1;
1248 unsigned int num_files
= 0;
1250 int num_parents
= 0;
1254 cnid_t
*parents
=NULL
;
1258 cnid_t prevParent_cnid
= 0;
1259 unsigned int myPerms
;
1261 struct cat_attr cnattr
;
1263 struct cnode
*skip_cp
= VTOC(vp
);
1264 kauth_cred_t cred
= vfs_context_ucred(context
);
1265 proc_t p
= vfs_context_proc(context
);
1267 is64bit
= proc_is64bit(p
);
1269 /* initialize the local cache and buffers */
1270 cache
.numcached
= 0;
1271 cache
.cachehits
= 0;
1273 cache
.acache
= NULL
;
1274 cache
.haveaccess
= NULL
;
1276 /* struct copyin done during dispatch... need to copy file_id array separately */
1277 if (ap
->a_data
== NULL
) {
1279 goto err_exit_bulk_access
;
1283 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1285 goto err_exit_bulk_access
;
1288 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1290 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1291 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1293 // convert an old style bulk-access struct to the new style
1294 tmp_user_access
.flags
= accessp
->flags
;
1295 tmp_user_access
.num_files
= accessp
->num_files
;
1296 tmp_user_access
.map_size
= 0;
1297 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1298 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1299 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1300 tmp_user_access
.num_parents
= 0;
1301 user_access_structp
= &tmp_user_access
;
1303 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1304 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1306 // up-cast from a 32-bit version of the struct
1307 tmp_user_access
.flags
= accessp
->flags
;
1308 tmp_user_access
.num_files
= accessp
->num_files
;
1309 tmp_user_access
.map_size
= accessp
->map_size
;
1310 tmp_user_access
.num_parents
= accessp
->num_parents
;
1312 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1313 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1314 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1315 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1317 user_access_structp
= &tmp_user_access
;
1320 goto err_exit_bulk_access
;
1323 map_size
= user_access_structp
->map_size
;
1325 num_files
= user_access_structp
->num_files
;
1327 num_parents
= user_access_structp
->num_parents
;
1329 if (num_files
< 1) {
1330 goto err_exit_bulk_access
;
1332 if (num_files
> 1024) {
1334 goto err_exit_bulk_access
;
1337 if (num_parents
> 1024) {
1339 goto err_exit_bulk_access
;
1342 file_ids
= hfs_malloc(sizeof(int) * num_files
);
1343 access
= hfs_malloc(sizeof(short) * num_files
);
1345 bitmap
= hfs_mallocz(sizeof(char) * map_size
);
1349 parents
= hfs_malloc(sizeof(cnid_t
) * num_parents
);
1352 cache
.acache
= hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1353 cache
.haveaccess
= hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1355 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1356 num_files
* sizeof(int)))) {
1357 goto err_exit_bulk_access
;
1361 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1362 num_parents
* sizeof(cnid_t
)))) {
1363 goto err_exit_bulk_access
;
1367 flags
= user_access_structp
->flags
;
1368 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1372 /* check if we've been passed leaf node ids or parent ids */
1373 if (flags
& PARENT_IDS_FLAG
) {
1377 /* Check access to each file_id passed in */
1378 for (i
= 0; i
< num_files
; i
++) {
1380 cnid
= (cnid_t
) file_ids
[i
];
1382 /* root always has access */
1383 if ((!parents
) && (!suser(cred
, NULL
))) {
1389 /* do the lookup (checks the cnode hash, then the catalog) */
1390 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1392 access
[i
] = (short) error
;
1397 // Check if the leaf matches one of the parent scopes
1398 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1399 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1400 prev_parent_check_ok
= 0;
1401 else if (leaf_index
>= 0)
1402 prev_parent_check_ok
= 1;
1405 // if the thing has acl's, do the full permission check
1406 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1409 /* get the vnode for this cnid */
1410 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1416 hfs_unlock(VTOC(cvp
));
1418 if (vnode_vtype(cvp
) == VDIR
) {
1419 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1421 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1430 /* before calling CheckAccess(), check the target file for read access */
1431 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1432 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1434 /* fail fast if no access */
1435 if ((myPerms
& flags
) == 0) {
1441 /* we were passed an array of parent ids */
1442 catkey
.hfsPlus
.parentID
= cnid
;
1445 /* if the last guy had the same parent and had access, we're done */
1446 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1452 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1453 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1455 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1456 access
[i
] = 0; // have access.. no errors to report
1458 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1461 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1464 /* copyout the access array */
1465 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1466 num_files
* sizeof (short)))) {
1467 goto err_exit_bulk_access
;
1469 if (map_size
&& bitmap
) {
1470 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1471 map_size
* sizeof (char)))) {
1472 goto err_exit_bulk_access
;
1477 err_exit_bulk_access
:
1479 hfs_free(file_ids
, sizeof(int) * num_files
);
1480 hfs_free(parents
, sizeof(cnid_t
) * num_parents
);
1481 hfs_free(bitmap
, sizeof(char) * map_size
);
1482 hfs_free(access
, sizeof(short) * num_files
);
1483 hfs_free(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1484 hfs_free(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1490 /* end "bulk-access" support */
1494 * Control filesystem operating characteristics.
1497 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1502 vfs_context_t a_context;
1505 struct vnode
* vp
= ap
->a_vp
;
1506 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1507 vfs_context_t context
= ap
->a_context
;
1508 kauth_cred_t cred
= vfs_context_ucred(context
);
1509 proc_t p
= vfs_context_proc(context
);
1510 struct vfsstatfs
*vfsp
;
1512 off_t jnl_start
, jnl_size
;
1513 struct hfs_journal_info
*jip
;
1516 off_t uncompressed_size
= -1;
1517 int decmpfs_error
= 0;
1519 if (ap
->a_command
== F_RDADVISE
) {
1520 /* we need to inspect the decmpfs state of the file as early as possible */
1521 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1523 if (VNODE_IS_RSRC(vp
)) {
1524 /* if this is the resource fork, treat it as if it were empty */
1525 uncompressed_size
= 0;
1527 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1528 if (decmpfs_error
!= 0) {
1529 /* failed to get the uncompressed size, we'll check for this later */
1530 uncompressed_size
= -1;
1535 #endif /* HFS_COMPRESSION */
1537 is64bit
= proc_is64bit(p
);
1540 #if HFS_CONFIG_KEY_ROLL
1541 // The HFS_KEY_ROLL fsctl does its own access checks
1542 if (ap
->a_command
!= HFS_KEY_ROLL
)
1546 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
1550 #endif /* CONFIG_PROTECT */
1552 switch (ap
->a_command
) {
1556 struct vnode
*file_vp
;
1563 /* Caller must be owner of file system. */
1564 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1565 if (suser(cred
, NULL
) &&
1566 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1569 /* Target vnode must be file system's root. */
1570 if (!vnode_isvroot(vp
)) {
1573 bufptr
= (char *)ap
->a_data
;
1574 cnid
= strtoul(bufptr
, NULL
, 10);
1575 if (ap
->a_fflag
& HFS_GETPATH_VOLUME_RELATIVE
) {
1576 flags
|= BUILDPATH_VOLUME_RELATIVE
;
1579 /* We need to call hfs_vfs_vget to leverage the code that will
1580 * fix the origin list for us if needed, as opposed to calling
1581 * hfs_vget, since we will need the parent for build_path call.
1584 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1588 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, flags
, context
);
1594 case HFS_SET_MAX_DEFRAG_SIZE
:
1596 int error
= 0; /* Assume success */
1597 u_int32_t maxsize
= 0;
1599 if (vnode_vfsisrdonly(vp
)) {
1602 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1603 if (!kauth_cred_issuser(cred
)) {
1604 return (EACCES
); /* must be root */
1607 maxsize
= *(u_int32_t
*)ap
->a_data
;
1609 hfs_lock_mount(hfsmp
);
1610 if (maxsize
> HFS_MAX_DEFRAG_SIZE
) {
1614 hfsmp
->hfs_defrag_max
= maxsize
;
1616 hfs_unlock_mount(hfsmp
);
1621 case HFS_FORCE_ENABLE_DEFRAG
:
1623 int error
= 0; /* Assume success */
1624 u_int32_t do_enable
= 0;
1626 if (vnode_vfsisrdonly(vp
)) {
1629 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1630 if (!kauth_cred_issuser(cred
)) {
1631 return (EACCES
); /* must be root */
1634 do_enable
= *(u_int32_t
*)ap
->a_data
;
1636 hfs_lock_mount(hfsmp
);
1637 if (do_enable
!= 0) {
1638 hfsmp
->hfs_defrag_nowait
= 1;
1644 hfs_unlock_mount(hfsmp
);
1650 case HFS_TRANSFER_DOCUMENT_ID
:
1652 struct cnode
*cp
= NULL
;
1654 u_int32_t to_fd
= *(u_int32_t
*)ap
->a_data
;
1655 struct fileproc
*to_fp
;
1656 struct vnode
*to_vp
;
1657 struct cnode
*to_cp
;
1661 if ((error
= fp_getfvp(p
, to_fd
, &to_fp
, &to_vp
)) != 0) {
1662 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1665 if ( (error
= vnode_getwithref(to_vp
)) ) {
1670 if (VTOHFS(to_vp
) != hfsmp
) {
1672 goto transfer_cleanup
;
1675 int need_unlock
= 1;
1676 to_cp
= VTOC(to_vp
);
1677 error
= hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1679 //printf("could not lock the pair of cnodes (error %d)\n", error);
1680 goto transfer_cleanup
;
1683 if (!(cp
->c_bsdflags
& UF_TRACKED
)) {
1685 } else if (to_cp
->c_bsdflags
& UF_TRACKED
) {
1687 // if the destination is already tracked, return an error
1688 // as otherwise it's a silent deletion of the target's
1692 } else if (S_ISDIR(cp
->c_attr
.ca_mode
) || S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
1694 // we can use the FndrExtendedFileInfo because the doc-id is the first
1695 // thing in both it and the ExtendedDirInfo struct which is fixed in
1696 // format and can not change layout
1698 struct FndrExtendedFileInfo
*f_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)cp
->c_finderinfo
+ 16);
1699 struct FndrExtendedFileInfo
*to_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)to_cp
->c_finderinfo
+ 16);
1701 if (f_extinfo
->document_id
== 0) {
1704 hfs_unlockpair(cp
, to_cp
); // have to unlock to be able to get a new-id
1706 if ((error
= hfs_generate_document_id(hfsmp
, &new_id
)) == 0) {
1708 // re-lock the pair now that we have the document-id
1710 hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1711 f_extinfo
->document_id
= new_id
;
1713 goto transfer_cleanup
;
1717 to_extinfo
->document_id
= f_extinfo
->document_id
;
1718 f_extinfo
->document_id
= 0;
1719 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1721 // make sure the destination is also UF_TRACKED
1722 to_cp
->c_bsdflags
|= UF_TRACKED
;
1723 cp
->c_bsdflags
&= ~UF_TRACKED
;
1725 // mark the cnodes dirty
1726 cp
->c_flag
|= C_MODIFIED
;
1727 to_cp
->c_flag
|= C_MODIFIED
;
1730 if ((error
= hfs_start_transaction(hfsmp
)) == 0) {
1732 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1734 (void) cat_update(hfsmp
, &cp
->c_desc
, &cp
->c_attr
, NULL
, NULL
);
1735 (void) cat_update(hfsmp
, &to_cp
->c_desc
, &to_cp
->c_attr
, NULL
, NULL
);
1737 hfs_systemfile_unlock (hfsmp
, lockflags
);
1738 (void) hfs_end_transaction(hfsmp
);
1741 add_fsevent(FSE_DOCID_CHANGED
, context
,
1742 FSE_ARG_DEV
, hfsmp
->hfs_raw_dev
,
1743 FSE_ARG_INO
, (ino64_t
)cp
->c_fileid
, // src inode #
1744 FSE_ARG_INO
, (ino64_t
)to_cp
->c_fileid
, // dst inode #
1745 FSE_ARG_INT32
, to_extinfo
->document_id
,
1748 hfs_unlockpair(cp
, to_cp
); // unlock this so we can send the fsevents
1751 if (need_fsevent(FSE_STAT_CHANGED
, vp
)) {
1752 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
1754 if (need_fsevent(FSE_STAT_CHANGED
, to_vp
)) {
1755 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, to_vp
, FSE_ARG_DONE
);
1760 hfs_unlockpair(cp
, to_cp
);
1780 /* Caller must be owner of file system. */
1781 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1782 if (suser(cred
, NULL
) &&
1783 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1786 /* Target vnode must be file system's root. */
1787 if (!vnode_isvroot(vp
)) {
1790 linkfileid
= *(cnid_t
*)ap
->a_data
;
1791 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1794 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1797 if (ap
->a_command
== HFS_NEXT_LINK
) {
1798 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1800 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1805 case HFS_RESIZE_PROGRESS
: {
1807 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1808 if (suser(cred
, NULL
) &&
1809 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1810 return (EACCES
); /* must be owner of file system */
1812 if (!vnode_isvroot(vp
)) {
1815 /* file system must not be mounted read-only */
1816 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1820 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1823 case HFS_RESIZE_VOLUME
: {
1828 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1829 if (suser(cred
, NULL
) &&
1830 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1831 return (EACCES
); /* must be owner of file system */
1833 if (!vnode_isvroot(vp
)) {
1837 /* filesystem must not be mounted read only */
1838 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1841 newsize
= *(u_int64_t
*)ap
->a_data
;
1842 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1844 if (newsize
== cursize
) {
1847 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeWillResize
);
1848 if (newsize
> cursize
) {
1849 ret
= hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1851 ret
= hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1853 IOBSDMountChange(hfsmp
->hfs_mp
, kIOMountChangeDidResize
);
1856 case HFS_CHANGE_NEXT_ALLOCATION
: {
1857 int error
= 0; /* Assume success */
1860 if (vnode_vfsisrdonly(vp
)) {
1863 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1864 if (suser(cred
, NULL
) &&
1865 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1866 return (EACCES
); /* must be owner of file system */
1868 if (!vnode_isvroot(vp
)) {
1871 hfs_lock_mount(hfsmp
);
1872 location
= *(u_int32_t
*)ap
->a_data
;
1873 if ((location
>= hfsmp
->allocLimit
) &&
1874 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1876 goto fail_change_next_allocation
;
1878 /* Return previous value. */
1879 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1880 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1881 /* On magic value for location, set nextAllocation to next block
1882 * after metadata zone and set flag in mount structure to indicate
1883 * that nextAllocation should not be updated again.
1885 if (hfsmp
->hfs_metazone_end
!= 0) {
1886 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1888 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1890 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1891 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1893 MarkVCBDirty(hfsmp
);
1894 fail_change_next_allocation
:
1895 hfs_unlock_mount(hfsmp
);
1900 case HFS_SETBACKINGSTOREINFO
: {
1901 struct vnode
* di_vp
;
1902 struct hfs_backingstoreinfo
*bsdata
;
1905 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1908 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1911 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1912 if (suser(cred
, NULL
) &&
1913 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1914 return (EACCES
); /* must be owner of file system */
1916 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1917 if (bsdata
== NULL
) {
1920 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1923 if ((error
= vnode_getwithref(di_vp
))) {
1924 file_drop(bsdata
->backingfd
);
1928 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1929 (void)vnode_put(di_vp
);
1930 file_drop(bsdata
->backingfd
);
1934 // Dropped in unmount
1937 hfs_lock_mount(hfsmp
);
1938 hfsmp
->hfs_backingvp
= di_vp
;
1939 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1940 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ hfsmp
->blockSize
* 4;
1941 hfs_unlock_mount(hfsmp
);
1943 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1946 * If the sparse image is on a sparse image file (as opposed to a sparse
1947 * bundle), then we may need to limit the free space to the maximum size
1948 * of a file on that volume. So we query (using pathconf), and if we get
1949 * a meaningful result, we cache the number of blocks for later use in
1952 hfsmp
->hfs_backingfs_maxblocks
= 0;
1953 if (vnode_vtype(di_vp
) == VREG
) {
1956 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1957 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1958 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1960 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1964 /* The free extent cache is managed differently for sparse devices.
1965 * There is a window between which the volume is mounted and the
1966 * device is marked as sparse, so the free extent cache for this
1967 * volume is currently initialized as normal volume (sorted by block
1968 * count). Reset the cache so that it will be rebuilt again
1969 * for sparse device (sorted by start block).
1971 ResetVCBFreeExtCache(hfsmp
);
1973 (void)vnode_put(di_vp
);
1974 file_drop(bsdata
->backingfd
);
1977 case HFS_CLRBACKINGSTOREINFO
: {
1978 struct vnode
* tmpvp
;
1980 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1981 if (suser(cred
, NULL
) &&
1982 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1983 return (EACCES
); /* must be owner of file system */
1985 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1989 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1990 hfsmp
->hfs_backingvp
) {
1992 hfs_lock_mount(hfsmp
);
1993 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1994 tmpvp
= hfsmp
->hfs_backingvp
;
1995 hfsmp
->hfs_backingvp
= NULLVP
;
1996 hfsmp
->hfs_sparsebandblks
= 0;
1997 hfs_unlock_mount(hfsmp
);
2003 #endif /* HFS_SPARSE_DEV */
2005 /* Change the next CNID stored in the VH */
2006 case HFS_CHANGE_NEXTCNID
: {
2007 int error
= 0; /* Assume success */
2012 if (vnode_vfsisrdonly(vp
)) {
2015 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2016 if (suser(cred
, NULL
) &&
2017 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2018 return (EACCES
); /* must be owner of file system */
2021 fileid
= *(u_int32_t
*)ap
->a_data
;
2023 /* Must have catalog lock excl. to advance the CNID pointer */
2024 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
2026 hfs_lock_mount(hfsmp
);
2028 /* If it is less than the current next CNID, force the wraparound bit to be set */
2029 if (fileid
< hfsmp
->vcbNxtCNID
) {
2033 /* Return previous value. */
2034 *(u_int32_t
*)ap
->a_data
= hfsmp
->vcbNxtCNID
;
2036 hfsmp
->vcbNxtCNID
= fileid
;
2039 hfsmp
->vcbAtrb
|= kHFSCatalogNodeIDsReusedMask
;
2042 MarkVCBDirty(hfsmp
);
2043 hfs_unlock_mount(hfsmp
);
2044 hfs_systemfile_unlock (hfsmp
, lockflags
);
2052 mp
= vnode_mount(vp
);
2053 hfsmp
= VFSTOHFS(mp
);
2058 vfsp
= vfs_statfs(mp
);
2060 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2061 !kauth_cred_issuser(cred
))
2064 return hfs_freeze(hfsmp
);
2068 vfsp
= vfs_statfs(vnode_mount(vp
));
2069 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2070 !kauth_cred_issuser(cred
))
2073 return hfs_thaw(hfsmp
, current_proc());
2076 case HFS_EXT_BULKACCESS_FSCTL
: {
2079 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2085 size
= sizeof(struct user64_ext_access_t
);
2087 size
= sizeof(struct user32_ext_access_t
);
2090 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2093 case HFS_SET_XATTREXTENTS_STATE
: {
2096 if (ap
->a_data
== NULL
) {
2100 state
= *(int *)ap
->a_data
;
2102 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2106 /* Super-user can enable or disable extent-based extended
2107 * attribute support on a volume
2108 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2109 * are enabled by default, so any change will be transient only
2110 * till the volume is remounted.
2112 if (!kauth_cred_issuser(kauth_cred_get())) {
2115 if (state
== 0 || state
== 1)
2116 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
2121 case F_SETSTATICCONTENT
: {
2123 int enable_static
= 0;
2124 struct cnode
*cp
= NULL
;
2126 * lock the cnode, decorate the cnode flag, and bail out.
2127 * VFS should have already authenticated the caller for us.
2132 * Note that even though ap->a_data is of type caddr_t,
2133 * the fcntl layer at the syscall handler will pass in NULL
2134 * or 1 depending on what the argument supplied to the fcntl
2135 * was. So it is in fact correct to check the ap->a_data
2136 * argument for zero or non-zero value when deciding whether or not
2137 * to enable the static bit in the cnode.
2141 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2146 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2148 if (enable_static
) {
2149 cp
->c_flag
|= C_SSD_STATIC
;
2152 cp
->c_flag
&= ~C_SSD_STATIC
;
2159 case F_SET_GREEDY_MODE
: {
2161 int enable_greedy_mode
= 0;
2162 struct cnode
*cp
= NULL
;
2164 * lock the cnode, decorate the cnode flag, and bail out.
2165 * VFS should have already authenticated the caller for us.
2170 * Note that even though ap->a_data is of type caddr_t,
2171 * the fcntl layer at the syscall handler will pass in NULL
2172 * or 1 depending on what the argument supplied to the fcntl
2173 * was. So it is in fact correct to check the ap->a_data
2174 * argument for zero or non-zero value when deciding whether or not
2175 * to enable the greedy mode bit in the cnode.
2177 enable_greedy_mode
= 1;
2179 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2184 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2186 if (enable_greedy_mode
) {
2187 cp
->c_flag
|= C_SSD_GREEDY_MODE
;
2190 cp
->c_flag
&= ~C_SSD_GREEDY_MODE
;
2199 uint32_t iotypeflag
= 0;
2201 struct cnode
*cp
= NULL
;
2203 * lock the cnode, decorate the cnode flag, and bail out.
2204 * VFS should have already authenticated the caller for us.
2207 if (ap
->a_data
== NULL
) {
2212 * Note that even though ap->a_data is of type caddr_t, we
2213 * can only use 32 bits of flag values.
2215 iotypeflag
= (uint32_t) ap
->a_data
;
2216 switch (iotypeflag
) {
2217 case F_IOTYPE_ISOCHRONOUS
:
2224 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2229 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2231 switch (iotypeflag
) {
2232 case F_IOTYPE_ISOCHRONOUS
:
2233 cp
->c_flag
|= C_IO_ISOCHRONOUS
;
2243 case F_MAKECOMPRESSED
: {
2245 uint32_t gen_counter
;
2246 struct cnode
*cp
= NULL
;
2247 int reset_decmp
= 0;
2249 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2254 * acquire & lock the cnode.
2255 * VFS should have already authenticated the caller for us.
2260 * Cast the pointer into a uint32_t so we can extract the
2261 * supplied generation counter.
2263 gen_counter
= *((uint32_t*)ap
->a_data
);
2271 /* Grab truncate lock first; we may truncate the file */
2272 hfs_lock_truncate (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2274 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2276 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2280 /* Are there any other usecounts/FDs? */
2281 if (vnode_isinuse(vp
, 1)) {
2283 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2287 /* now we have the cnode locked down; Validate arguments */
2288 if (cp
->c_attr
.ca_flags
& (UF_IMMUTABLE
| UF_COMPRESSED
)) {
2289 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2291 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2295 if ((hfs_get_gencount (cp
)) == gen_counter
) {
2297 * OK, the gen_counter matched. Go for it:
2298 * Toggle state bits, truncate file, and suppress mtime update
2301 cp
->c_bsdflags
|= UF_COMPRESSED
;
2303 error
= hfs_truncate(vp
, 0, IO_NDELAY
, HFS_TRUNCATE_SKIPTIMES
,
2310 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2314 * Reset the decmp state while still holding the truncate lock. We need to
2315 * serialize here against a listxattr on this node which may occur at any
2318 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2319 * that will still potentially require getting the com.apple.decmpfs EA. If the
2320 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2321 * generic(through VFS), and can't pass along any info telling it that we're already
2322 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2323 * and trying to fill in the hfs_file_is_compressed info during the callback
2324 * operation, which will result in deadlock against the b-tree node.
2326 * So, to serialize against listxattr (which will grab buf_t meta references on
2327 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2330 if ((reset_decmp
) && (error
== 0)) {
2331 decmpfs_cnode
*dp
= VTOCMP (vp
);
2333 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
2336 /* Initialize the decmpfs node as needed */
2337 (void) hfs_file_is_compressed (cp
, 0); /* ok to take lock */
2340 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2346 case F_SETBACKINGSTORE
: {
2351 * See comment in F_SETSTATICCONTENT re: using
2352 * a null check for a_data
2355 error
= hfs_set_backingstore (vp
, 1);
2358 error
= hfs_set_backingstore (vp
, 0);
2364 case F_GETPATH_MTMINFO
: {
2367 int *data
= (int*) ap
->a_data
;
2369 /* Ask if this is a backingstore vnode */
2370 error
= hfs_is_backingstore (vp
, data
);
2378 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2381 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2383 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_FULL
, p
);
2384 hfs_unlock(VTOC(vp
));
2390 case F_BARRIERFSYNC
: {
2393 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2396 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2398 error
= hfs_fsync(vp
, MNT_WAIT
, HFS_FSYNC_BARRIER
, p
);
2399 hfs_unlock(VTOC(vp
));
2406 register struct cnode
*cp
;
2409 if (!vnode_isreg(vp
))
2412 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2416 * used by regression test to determine if
2417 * all the dirty pages (via write) have been cleaned
2418 * after a call to 'fsysnc'.
2420 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
2427 register struct radvisory
*ra
;
2428 struct filefork
*fp
;
2431 if (!vnode_isreg(vp
))
2434 ra
= (struct radvisory
*)(ap
->a_data
);
2437 /* Protect against a size change. */
2438 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2442 if (uncompressed_size
== -1) {
2443 /* fetching the uncompressed size failed above, so return the error */
2444 error
= decmpfs_error
;
2445 } else if (ra
->ra_offset
>= uncompressed_size
) {
2448 error
= advisory_read(vp
, uncompressed_size
, ra
->ra_offset
, ra
->ra_count
);
2451 #endif /* HFS_COMPRESSION */
2452 if (ra
->ra_offset
>= fp
->ff_size
) {
2455 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2458 hfs_unlock_truncate(VTOC(vp
), HFS_LOCK_DEFAULT
);
2462 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2465 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2468 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2473 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME
:
2474 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2477 case SPOTLIGHT_FSCTL_GET_LAST_MTIME
:
2478 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2481 case HFS_FSCTL_GET_VERY_LOW_DISK
:
2482 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_dangerlimit
;
2485 case HFS_FSCTL_SET_VERY_LOW_DISK
:
2486 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2490 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2493 case HFS_FSCTL_GET_LOW_DISK
:
2494 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_warninglimit
;
2497 case HFS_FSCTL_SET_LOW_DISK
:
2498 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2499 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2504 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2507 case HFS_FSCTL_GET_DESIRED_DISK
:
2508 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_desiredlevel
;
2511 case HFS_FSCTL_SET_DESIRED_DISK
:
2512 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2516 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2519 case HFS_VOLUME_STATUS
:
2520 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2523 case HFS_SET_BOOT_INFO
:
2524 if (!vnode_isvroot(vp
))
2526 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2527 return(EACCES
); /* must be superuser or owner of filesystem */
2528 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2531 hfs_lock_mount (hfsmp
);
2532 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2533 hfs_unlock_mount (hfsmp
);
2534 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
);
2537 case HFS_GET_BOOT_INFO
:
2538 if (!vnode_isvroot(vp
))
2540 hfs_lock_mount (hfsmp
);
2541 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2542 hfs_unlock_mount(hfsmp
);
2545 case HFS_MARK_BOOT_CORRUPT
:
2546 /* Mark the boot volume corrupt by setting
2547 * kHFSVolumeInconsistentBit in the volume header. This will
2548 * force fsck_hfs on next mount.
2550 if (!kauth_cred_issuser(kauth_cred_get())) {
2554 /* Allowed only on the root vnode of the boot volume */
2555 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2556 !vnode_isvroot(vp
)) {
2559 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2562 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2563 hfs_mark_inconsistent(hfsmp
, HFS_FSCK_FORCED
);
2566 case HFS_FSCTL_GET_JOURNAL_INFO
:
2567 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2572 if (hfsmp
->jnl
== NULL
) {
2576 jnl_start
= hfs_blk_to_bytes(hfsmp
->jnl_start
, hfsmp
->blockSize
) + hfsmp
->hfsPlusIOPosOffset
;
2577 jnl_size
= hfsmp
->jnl_size
;
2580 jip
->jstart
= jnl_start
;
2581 jip
->jsize
= jnl_size
;
2584 case HFS_SET_ALWAYS_ZEROFILL
: {
2585 struct cnode
*cp
= VTOC(vp
);
2587 if (*(int *)ap
->a_data
) {
2588 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2590 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2595 case HFS_DISABLE_METAZONE
: {
2596 /* Only root can disable metadata zone */
2597 if (!kauth_cred_issuser(kauth_cred_get())) {
2600 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2604 /* Disable metadata zone now */
2605 (void) hfs_metadatazone_init(hfsmp
, true);
2606 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2611 case HFS_FSINFO_METADATA_BLOCKS
: {
2613 struct hfsinfo_metadata
*hinfo
;
2615 hinfo
= (struct hfsinfo_metadata
*)ap
->a_data
;
2617 /* Get information about number of metadata blocks */
2618 error
= hfs_getinfo_metadata_blocks(hfsmp
, hinfo
);
2626 case HFS_GET_FSINFO
: {
2627 hfs_fsinfo
*fsinfo
= (hfs_fsinfo
*)ap
->a_data
;
2629 /* Only root is allowed to get fsinfo */
2630 if (!kauth_cred_issuser(kauth_cred_get())) {
2635 * Make sure that the caller's version number matches with
2636 * the kernel's version number. This will make sure that
2637 * if the structures being read/written into are changed
2638 * by the kernel, the caller will not read incorrect data.
2640 * The first three fields --- request_type, version and
2641 * flags are same for all the hfs_fsinfo structures, so
2642 * we can access the version number by assuming any
2643 * structure for now.
2645 if (fsinfo
->header
.version
!= HFS_FSINFO_VERSION
) {
2649 /* Make sure that the current file system is not marked inconsistent */
2650 if (hfsmp
->vcbAtrb
& kHFSVolumeInconsistentMask
) {
2654 return hfs_get_fsinfo(hfsmp
, ap
->a_data
);
2657 case HFS_CS_FREESPACE_TRIM
: {
2661 /* Only root allowed */
2662 if (!kauth_cred_issuser(kauth_cred_get())) {
2667 * This core functionality is similar to hfs_scan_blocks().
2668 * The main difference is that hfs_scan_blocks() is called
2669 * as part of mount where we are assured that the journal is
2670 * empty to start with. This fcntl() can be called on a
2671 * mounted volume, therefore it has to flush the content of
2672 * the journal as well as ensure the state of summary table.
2674 * This fcntl scans over the entire allocation bitmap,
2675 * creates list of all the free blocks, and issues TRIM
2676 * down to the underlying device. This can take long time
2677 * as it can generate up to 512MB of read I/O.
2680 if ((hfsmp
->hfs_flags
& HFS_SUMMARY_TABLE
) == 0) {
2681 error
= hfs_init_summary(hfsmp
);
2683 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp
->vcbVN
);
2689 * The journal maintains list of recently deallocated blocks to
2690 * issue DKIOCUNMAPs when the corresponding journal transaction is
2691 * flushed to the disk. To avoid any race conditions, we only
2692 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2693 * Therefore we make sure that the journal trim list is sync'ed,
2694 * empty, and not modifiable for the duration of our scan.
2696 * Take the journal lock before flushing the journal to the disk.
2697 * We will keep on holding the journal lock till we don't get the
2698 * bitmap lock to make sure that no new journal transactions can
2699 * start. This will make sure that the journal trim list is not
2700 * modified after the journal flush and before getting bitmap lock.
2701 * We can release the journal lock after we acquire the bitmap
2702 * lock as it will prevent any further block deallocations.
2704 hfs_journal_lock(hfsmp
);
2706 /* Flush the journal and wait for all I/Os to finish up */
2707 error
= hfs_flush(hfsmp
, HFS_FLUSH_JOURNAL_META
);
2709 hfs_journal_unlock(hfsmp
);
2713 /* Take bitmap lock to ensure it is not being modified */
2714 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_BITMAP
, HFS_EXCLUSIVE_LOCK
);
2716 /* Release the journal lock */
2717 hfs_journal_unlock(hfsmp
);
2720 * ScanUnmapBlocks reads the bitmap in large block size
2721 * (up to 1MB) unlike the runtime which reads the bitmap
2722 * in the 4K block size. This can cause buf_t collisions
2723 * and potential data corruption. To avoid this, we
2724 * invalidate all the existing buffers associated with
2725 * the bitmap vnode before scanning it.
2727 * Note: ScanUnmapBlock() cleans up all the buffers
2728 * after itself, so there won't be any large buffers left
2729 * for us to clean up after it returns.
2731 error
= buf_invalidateblks(hfsmp
->hfs_allocation_vp
, 0, 0, 0);
2733 hfs_systemfile_unlock(hfsmp
, lockflags
);
2737 /* Traverse bitmap and issue DKIOCUNMAPs */
2738 error
= ScanUnmapBlocks(hfsmp
);
2739 hfs_systemfile_unlock(hfsmp
, lockflags
);
2747 case HFS_SET_HOTFILE_STATE
: {
2749 struct cnode
*cp
= VTOC(vp
);
2750 uint32_t hf_state
= *((uint32_t*)ap
->a_data
);
2751 uint32_t num_unpinned
= 0;
2753 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2758 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2759 if (hf_state
== HFS_MARK_FASTDEVCANDIDATE
) {
2760 vnode_setfastdevicecandidate(vp
);
2762 cp
->c_attr
.ca_recflags
|= kHFSFastDevCandidateMask
;
2763 cp
->c_attr
.ca_recflags
&= ~kHFSDoNotFastDevPinMask
;
2764 cp
->c_flag
|= C_MODIFIED
;
2765 } else if (hf_state
== HFS_UNMARK_FASTDEVCANDIDATE
|| hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2766 vnode_clearfastdevicecandidate(vp
);
2767 hfs_removehotfile(vp
);
2769 if (cp
->c_attr
.ca_recflags
& kHFSFastDevPinnedMask
) {
2770 hfs_pin_vnode(hfsmp
, vp
, HFS_UNPIN_IT
, &num_unpinned
);
2773 if (hf_state
== HFS_NEVER_FASTDEVCANDIDATE
) {
2774 cp
->c_attr
.ca_recflags
|= kHFSDoNotFastDevPinMask
;
2776 cp
->c_attr
.ca_recflags
&= ~(kHFSFastDevCandidateMask
|kHFSFastDevPinnedMask
);
2777 cp
->c_flag
|= C_MODIFIED
;
2783 if (num_unpinned
!= 0) {
2784 lck_mtx_lock(&hfsmp
->hfc_mutex
);
2785 hfsmp
->hfs_hotfile_freeblks
+= num_unpinned
;
2786 lck_mtx_unlock(&hfsmp
->hfc_mutex
);
2793 case HFS_REPIN_HOTFILE_STATE
: {
2795 uint32_t repin_what
= *((uint32_t*)ap
->a_data
);
2797 /* Only root allowed */
2798 if (!kauth_cred_issuser(kauth_cred_get())) {
2802 if (!(hfsmp
->hfs_flags
& (HFS_CS_METADATA_PIN
| HFS_CS_HOTFILE_PIN
))) {
2803 // this system is neither regular Fusion or Cooperative Fusion
2804 // so this fsctl makes no sense.
2809 // After a converting a CoreStorage volume to be encrypted, the
2810 // extents could have moved around underneath us. This call
2811 // allows corestoraged to re-pin everything that should be
2812 // pinned (it would happen on the next reboot too but that could
2813 // be a long time away).
2815 if ((repin_what
& HFS_REPIN_METADATA
) && (hfsmp
->hfs_flags
& HFS_CS_METADATA_PIN
)) {
2816 hfs_pin_fs_metadata(hfsmp
);
2818 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_HOTFILE_PIN
)) {
2819 hfs_repin_hotfiles(hfsmp
);
2821 if ((repin_what
& HFS_REPIN_USERDATA
) && (hfsmp
->hfs_flags
& HFS_CS_SWAPFILE_PIN
)) {
2822 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2823 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2829 #if HFS_CONFIG_KEY_ROLL
2831 case HFS_KEY_ROLL
: {
2832 if (!kauth_cred_issuser(kauth_cred_get()))
2835 hfs_key_roll_args_t
*args
= (hfs_key_roll_args_t
*)ap
->a_data
;
2837 return hfs_key_roll_op(ap
->a_context
, ap
->a_vp
, args
);
2840 case HFS_GET_KEY_AUTO_ROLL
: {
2841 if (!kauth_cred_issuser(kauth_cred_get()))
2844 hfs_key_auto_roll_args_t
*args
= (hfs_key_auto_roll_args_t
*)ap
->a_data
;
2845 if (args
->api_version
!= HFS_KEY_AUTO_ROLL_API_VERSION_1
)
2847 args
->flags
= (ISSET(hfsmp
->cproot_flags
, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION
)
2848 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION
: 0);
2849 args
->min_key_os_version
= hfsmp
->hfs_auto_roll_min_key_os_version
;
2850 args
->max_key_os_version
= hfsmp
->hfs_auto_roll_max_key_os_version
;
2854 case HFS_SET_KEY_AUTO_ROLL
: {
2855 if (!kauth_cred_issuser(kauth_cred_get()))
2858 hfs_key_auto_roll_args_t
*args
= (hfs_key_auto_roll_args_t
*)ap
->a_data
;
2859 if (args
->api_version
!= HFS_KEY_AUTO_ROLL_API_VERSION_1
)
2861 return cp_set_auto_roll(hfsmp
, args
);
2864 #endif // HFS_CONFIG_KEY_ROLL
2867 case F_TRANSCODEKEY
:
2869 * This API is only supported when called via kernel so
2870 * a_fflag must be set to 1 (it's not possible to get here
2871 * with it set to 1 via fsctl).
2873 if (ap
->a_fflag
!= 1)
2875 return cp_vnode_transcode(vp
, (cp_key_t
*)ap
->a_data
);
2877 case F_GETPROTECTIONLEVEL
:
2878 return cp_get_root_major_vers (vp
, (uint32_t *)ap
->a_data
);
2880 case F_GETDEFAULTPROTLEVEL
:
2881 return cp_get_default_level(vp
, (uint32_t *)ap
->a_data
);
2882 #endif // CONFIG_PROTECT
2885 return hfs_pin_vnode(hfsmp
, vp
, HFS_PIN_IT
| HFS_DATALESS_PIN
,
2899 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2901 struct vnop_select_args {
2906 vfs_context_t a_context;
2911 * We should really check to see if I/O is possible.
2917 * Converts a logical block number to a physical block, and optionally returns
2918 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2919 * The physical block number is based on the device block size, currently its 512.
2920 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2923 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2925 struct filefork
*fp
= VTOF(vp
);
2926 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2927 int retval
= E_NONE
;
2928 u_int32_t logBlockSize
;
2929 size_t bytesContAvail
= 0;
2930 off_t blockposition
;
2935 * Check for underlying vnode requests and ensure that logical
2936 * to physical mapping is requested.
2939 *vpp
= hfsmp
->hfs_devvp
;
2943 logBlockSize
= GetLogicalBlockSize(vp
);
2944 blockposition
= (off_t
)bn
* logBlockSize
;
2946 lockExtBtree
= overflow_extents(fp
);
2949 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2951 retval
= MacToVFSError(
2952 MapFileBlockC (HFSTOVCB(hfsmp
),
2960 hfs_systemfile_unlock(hfsmp
, lockflags
);
2962 if (retval
== E_NONE
) {
2963 /* Figure out how many read ahead blocks there are */
2965 if (can_cluster(logBlockSize
)) {
2966 /* Make sure this result never goes negative: */
2967 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2977 * Convert logical block number to file offset.
2980 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
2982 struct vnop_blktooff_args {
2989 if (ap
->a_vp
== NULL
)
2991 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2997 * Convert file offset to logical block number.
3000 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
3002 struct vnop_offtoblk_args {
3005 daddr64_t *a_lblkno;
3009 if (ap
->a_vp
== NULL
)
3011 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
3017 * Map file offset to physical block number.
3019 * If this function is called for write operation, and if the file
3020 * had virtual blocks allocated (delayed allocation), real blocks
3021 * are allocated by calling ExtendFileC().
3023 * If this function is called for read operation, and if the file
3024 * had virtual blocks allocated (delayed allocation), no change
3025 * to the size of file is done, and if required, rangelist is
3026 * searched for mapping.
3028 * System file cnodes are expected to be locked (shared or exclusive).
3030 * -- INVALID RANGES --
3032 * Invalid ranges are used to keep track of where we have extended a
3033 * file, but have not yet written that data to disk. In the past we
3034 * would clear up the invalid ranges as we wrote to those areas, but
3035 * before data was actually flushed to disk. The problem with that
3036 * approach is that the data can be left in the cache and is therefore
3037 * still not valid on disk. So now we clear up the ranges here, when
3038 * the flags field has VNODE_WRITE set, indicating a write is about to
3039 * occur. This isn't ideal (ideally we want to clear them up when
3040 * know the data has been successfully written), but it's the best we
3043 * For reads, we use the invalid ranges here in block map to indicate
3044 * to the caller that the data should be zeroed (a_bpn == -1). We
3045 * have to be careful about what ranges we return to the cluster code.
3046 * Currently the cluster code can only handle non-rounded values for
3047 * the EOF; it cannot handle funny sized ranges in the middle of the
3048 * file (the main problem is that it sends down odd sized I/Os to the
3049 * disk). Our code currently works because whilst the very first
3050 * offset and the last offset in the invalid ranges are not aligned,
3051 * gaps in the invalid ranges between the first and last, have to be
3052 * aligned (because we always write page sized blocks). For example,
3053 * consider this arrangement:
3055 * +-------------+-----+-------+------+
3056 * | |XXXXX| |XXXXXX|
3057 * +-------------+-----+-------+------+
3060 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3061 * are not necessarily aligned, b and c *must* be.
3063 * Zero-filling occurs in a number of ways:
3065 * 1. When a read occurs and we return with a_bpn == -1.
3067 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3068 * which will cause us to iterate over the ranges bringing in
3069 * pages that are not present in the cache and zeroing them. Any
3070 * pages that are already in the cache are left untouched. Note
3071 * that hfs_fsync does not always flush invalid ranges.
3073 * 3. When we extend a file we zero out from the old EOF to the end
3074 * of the page. It would be nice if we didn't have to do this if
3075 * the page wasn't present (and could defer it), but because of
3076 * the problem described above, we have to.
3078 * The invalid ranges are also used to restrict the size that we write
3079 * out on disk: see hfs_prepare_fork_for_update.
3081 * Note that invalid ranges are ignored when neither the VNODE_READ or
3082 * the VNODE_WRITE flag is specified. This is useful for the
3083 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3084 * just want to know whether blocks are physically allocated or not.
3087 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
3089 struct vnop_blockmap_args {
3097 vfs_context_t a_context;
3101 struct vnode
*vp
= ap
->a_vp
;
3103 struct filefork
*fp
;
3104 struct hfsmount
*hfsmp
;
3105 size_t bytesContAvail
= ap
->a_size
;
3106 int retval
= E_NONE
;
3109 struct rl_entry
*invalid_range
;
3110 enum rl_overlaptype overlaptype
;
3115 if (VNODE_IS_RSRC(vp
)) {
3116 /* allow blockmaps to the resource fork */
3118 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
3119 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
3121 case FILE_IS_COMPRESSED
:
3123 case FILE_IS_CONVERTING
:
3124 /* if FILE_IS_CONVERTING, we allow blockmap */
3127 printf("invalid state %d for compressed file\n", state
);
3132 #endif /* HFS_COMPRESSION */
3134 /* Do not allow blockmap operation on a directory */
3135 if (vnode_isdir(vp
)) {
3140 * Check for underlying vnode requests and ensure that logical
3141 * to physical mapping is requested.
3143 if (ap
->a_bpn
== NULL
)
3150 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
3151 if (cp
->c_lockowner
!= current_thread()) {
3152 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3156 // For reads, check the invalid ranges
3157 if (ISSET(ap
->a_flags
, VNODE_READ
)) {
3158 if (ap
->a_foffset
>= fp
->ff_size
) {
3163 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
3164 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
3166 switch(overlaptype
) {
3167 case RL_MATCHINGOVERLAP
:
3168 case RL_OVERLAPCONTAINSRANGE
:
3169 case RL_OVERLAPSTARTSBEFORE
:
3170 /* There's no valid block for this byte offset */
3171 *ap
->a_bpn
= (daddr64_t
)-1;
3172 /* There's no point limiting the amount to be returned
3173 * if the invalid range that was hit extends all the way
3174 * to the EOF (i.e. there's no valid bytes between the
3175 * end of this range and the file's EOF):
3177 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3178 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3179 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3185 case RL_OVERLAPISCONTAINED
:
3186 case RL_OVERLAPENDSAFTER
:
3187 /* The range of interest hits an invalid block before the end: */
3188 if (invalid_range
->rl_start
== ap
->a_foffset
) {
3189 /* There's actually no valid information to be had starting here: */
3190 *ap
->a_bpn
= (daddr64_t
)-1;
3191 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3192 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3193 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3200 * Sadly, the lower layers don't like us to
3201 * return unaligned ranges, so we skip over
3202 * any invalid ranges here that are less than
3203 * a page: zeroing of those bits is not our
3204 * responsibility (it's dealt with elsewhere).
3207 off_t rounded_start
= round_page_64(invalid_range
->rl_start
);
3208 if ((off_t
)bytesContAvail
< rounded_start
- ap
->a_foffset
)
3210 if (rounded_start
< invalid_range
->rl_end
+ 1) {
3211 bytesContAvail
= rounded_start
- ap
->a_foffset
;
3214 } while ((invalid_range
= TAILQ_NEXT(invalid_range
,
3226 if (cp
->c_cpentry
) {
3227 const int direction
= (ISSET(ap
->a_flags
, VNODE_WRITE
)
3228 ? VNODE_WRITE
: VNODE_READ
);
3230 cp_io_params_t io_params
;
3231 cp_io_params(hfsmp
, cp
->c_cpentry
,
3232 off_rsrc_make(ap
->a_foffset
, VNODE_IS_RSRC(vp
)),
3233 direction
, &io_params
);
3235 if (io_params
.max_len
< (off_t
)bytesContAvail
)
3236 bytesContAvail
= io_params
.max_len
;
3238 if (io_params
.phys_offset
!= -1) {
3239 *ap
->a_bpn
= ((io_params
.phys_offset
+ hfsmp
->hfsPlusIOPosOffset
)
3240 / hfsmp
->hfs_logical_block_size
);
3250 /* Check virtual blocks only when performing write operation */
3251 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3252 if (hfs_start_transaction(hfsmp
) != 0) {
3258 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
3260 } else if (overflow_extents(fp
)) {
3261 syslocks
= SFL_EXTENTS
;
3265 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
3268 * Check for any delayed allocations.
3270 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3272 u_int32_t loanedBlocks
;
3275 // Make sure we have a transaction. It's possible
3276 // that we came in and fp->ff_unallocblocks was zero
3277 // but during the time we blocked acquiring the extents
3278 // btree, ff_unallocblocks became non-zero and so we
3279 // will need to start a transaction.
3281 if (started_tr
== 0) {
3283 hfs_systemfile_unlock(hfsmp
, lockflags
);
3290 * Note: ExtendFileC will Release any blocks on loan and
3291 * aquire real blocks. So we ask to extend by zero bytes
3292 * since ExtendFileC will account for the virtual blocks.
3295 loanedBlocks
= fp
->ff_unallocblocks
;
3296 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
3297 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
3300 fp
->ff_unallocblocks
= loanedBlocks
;
3301 cp
->c_blocks
+= loanedBlocks
;
3302 fp
->ff_blocks
+= loanedBlocks
;
3304 hfs_lock_mount (hfsmp
);
3305 hfsmp
->loanedBlocks
+= loanedBlocks
;
3306 hfs_unlock_mount (hfsmp
);
3308 hfs_systemfile_unlock(hfsmp
, lockflags
);
3309 cp
->c_flag
|= C_MODIFIED
;
3311 (void) hfs_update(vp
, 0);
3312 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3314 hfs_end_transaction(hfsmp
);
3321 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, bytesContAvail
, ap
->a_foffset
,
3322 ap
->a_bpn
, &bytesContAvail
);
3324 hfs_systemfile_unlock(hfsmp
, lockflags
);
3329 /* On write, always return error because virtual blocks, if any,
3330 * should have been allocated in ExtendFileC(). We do not
3331 * allocate virtual blocks on read, therefore return error
3332 * only if no virtual blocks are allocated. Otherwise we search
3333 * rangelist for zero-fills
3335 if ((MacToVFSError(retval
) != ERANGE
) ||
3336 (ap
->a_flags
& VNODE_WRITE
) ||
3337 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
3341 /* Validate if the start offset is within logical file size */
3342 if (ap
->a_foffset
>= fp
->ff_size
) {
3347 * At this point, we have encountered a failure during
3348 * MapFileBlockC that resulted in ERANGE, and we are not
3349 * servicing a write, and there are borrowed blocks.
3351 * However, the cluster layer will not call blockmap for
3352 * blocks that are borrowed and in-cache. We have to assume
3353 * that because we observed ERANGE being emitted from
3354 * MapFileBlockC, this extent range is not valid on-disk. So
3355 * we treat this as a mapping that needs to be zero-filled
3359 if (fp
->ff_size
- ap
->a_foffset
< (off_t
)bytesContAvail
)
3360 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
3362 *ap
->a_bpn
= (daddr64_t
) -1;
3370 if (ISSET(ap
->a_flags
, VNODE_WRITE
)) {
3371 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
3373 // See if we might be overlapping invalid ranges...
3374 if (r
&& (ap
->a_foffset
+ (off_t
)bytesContAvail
) > r
->rl_start
) {
3376 * Mark the file as needing an update if we think the
3377 * on-disk EOF has changed.
3379 if (ap
->a_foffset
<= r
->rl_start
)
3380 SET(cp
->c_flag
, C_MODIFIED
);
3383 * This isn't the ideal place to put this. Ideally, we
3384 * should do something *after* we have successfully
3385 * written to the range, but that's difficult to do
3386 * because we cannot take locks in the callback. At
3387 * present, the cluster code will call us with VNODE_WRITE
3388 * set just before it's about to write the data so we know
3389 * that data is about to be written. If we get an I/O
3390 * error at this point then chances are the metadata
3391 * update to follow will also have an I/O error so the
3392 * risk here is small.
3394 rl_remove(ap
->a_foffset
, ap
->a_foffset
+ bytesContAvail
- 1,
3395 &fp
->ff_invalidranges
);
3397 if (!TAILQ_FIRST(&fp
->ff_invalidranges
)) {
3398 cp
->c_flag
&= ~C_ZFWANTSYNC
;
3399 cp
->c_zftimeout
= 0;
3405 *ap
->a_run
= bytesContAvail
;
3408 *(int *)ap
->a_poff
= 0;
3412 hfs_update(vp
, TRUE
);
3413 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3414 hfs_end_transaction(hfsmp
);
3421 return (MacToVFSError(retval
));
3425 * prepare and issue the I/O
3426 * buf_strategy knows how to deal
3427 * with requests that require
3431 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
3433 buf_t bp
= ap
->a_bp
;
3434 vnode_t vp
= buf_vnode(bp
);
3437 /* Mark buffer as containing static data if cnode flag set */
3438 if (VTOC(vp
)->c_flag
& C_SSD_STATIC
) {
3442 /* Mark buffer as containing static data if cnode flag set */
3443 if (VTOC(vp
)->c_flag
& C_SSD_GREEDY_MODE
) {
3444 bufattr_markgreedymode(buf_attr(bp
));
3447 /* mark buffer as containing burst mode data if cnode flag set */
3448 if (VTOC(vp
)->c_flag
& C_IO_ISOCHRONOUS
) {
3449 bufattr_markisochronous(buf_attr(bp
));
3453 error
= cp_handle_strategy(bp
);
3459 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
3465 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int truncateflags
, vfs_context_t context
)
3467 register struct cnode
*cp
= VTOC(vp
);
3468 struct filefork
*fp
= VTOF(vp
);
3469 kauth_cred_t cred
= vfs_context_ucred(context
);
3472 off_t actualBytesAdded
;
3474 u_int32_t fileblocks
;
3476 struct hfsmount
*hfsmp
;
3478 int suppress_times
= (truncateflags
& HFS_TRUNCATE_SKIPTIMES
);
3480 blksize
= VTOVCB(vp
)->blockSize
;
3481 fileblocks
= fp
->ff_blocks
;
3482 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3484 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_START
,
3485 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3490 /* This should only happen with a corrupt filesystem */
3491 if ((off_t
)fp
->ff_size
< 0)
3494 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
3501 /* Files that are changing size are not hot file candidates. */
3502 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
3503 fp
->ff_bytesread
= 0;
3507 * We cannot just check if fp->ff_size == length (as an optimization)
3508 * since there may be extra physical blocks that also need truncation.
3511 if ((retval
= hfs_getinoquota(cp
)))
3516 * Lengthen the size of the file. We must ensure that the
3517 * last byte of the file is allocated. Since the smallest
3518 * value of ff_size is 0, length will be at least 1.
3520 if (length
> (off_t
)fp
->ff_size
) {
3522 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
3528 * If we don't have enough physical space then
3529 * we need to extend the physical size.
3531 if (length
> filebytes
) {
3533 u_int32_t blockHint
= 0;
3535 /* All or nothing and don't round up to clumpsize. */
3536 eflags
= kEFAllMask
| kEFNoClumpMask
;
3538 if (cred
&& (suser(cred
, NULL
) != 0)) {
3539 eflags
|= kEFReserveMask
; /* keep a reserve */
3543 * Allocate Journal and Quota files in metadata zone.
3545 if (filebytes
== 0 &&
3546 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
3547 hfs_virtualmetafile(cp
)) {
3548 eflags
|= kEFMetadataMask
;
3549 blockHint
= hfsmp
->hfs_metazone_start
;
3551 if (hfs_start_transaction(hfsmp
) != 0) {
3556 /* Protect extents b-tree and allocation bitmap */
3557 lockflags
= SFL_BITMAP
;
3558 if (overflow_extents(fp
))
3559 lockflags
|= SFL_EXTENTS
;
3560 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3563 * Keep growing the file as long as the current EOF is
3564 * less than the desired value.
3566 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3567 bytesToAdd
= length
- filebytes
;
3568 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
3573 &actualBytesAdded
));
3575 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3576 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
3577 if (length
> filebytes
)
3583 hfs_systemfile_unlock(hfsmp
, lockflags
);
3587 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3590 hfs_end_transaction(hfsmp
);
3595 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3596 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3599 if (ISSET(flags
, IO_NOZEROFILL
)) {
3600 // An optimisation for the hibernation file
3601 if (vnode_isswap(vp
))
3602 rl_remove_all(&fp
->ff_invalidranges
);
3604 if (!vnode_issystem(vp
) && retval
== E_NONE
) {
3605 if (length
> (off_t
)fp
->ff_size
) {
3608 /* Extending the file: time to fill out the current last page w. zeroes? */
3609 if (fp
->ff_size
& PAGE_MASK_64
) {
3610 /* There might be some valid data at the start of the (current) last page
3611 of the file, so zero out the remainder of that page to ensure the
3612 entire page contains valid data. */
3614 retval
= hfs_zero_eof_page(vp
, length
);
3615 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3616 if (retval
) goto Err_Exit
;
3619 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
3620 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3623 panic("hfs_truncate: invoked on non-UBC object?!");
3626 if (suppress_times
== 0) {
3627 cp
->c_touch_modtime
= TRUE
;
3629 fp
->ff_size
= length
;
3631 } else { /* Shorten the size of the file */
3633 // An optimisation for the hibernation file
3634 if (ISSET(flags
, IO_NOZEROFILL
) && vnode_isswap(vp
)) {
3635 rl_remove_all(&fp
->ff_invalidranges
);
3636 } else if ((off_t
)fp
->ff_size
> length
) {
3637 /* Any space previously marked as invalid is now irrelevant: */
3638 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3642 * Account for any unmapped blocks. Note that the new
3643 * file length can still end up with unmapped blocks.
3645 if (fp
->ff_unallocblocks
> 0) {
3646 u_int32_t finalblks
;
3647 u_int32_t loanedBlocks
;
3649 hfs_lock_mount(hfsmp
);
3650 loanedBlocks
= fp
->ff_unallocblocks
;
3651 cp
->c_blocks
-= loanedBlocks
;
3652 fp
->ff_blocks
-= loanedBlocks
;
3653 fp
->ff_unallocblocks
= 0;
3655 hfsmp
->loanedBlocks
-= loanedBlocks
;
3657 finalblks
= (length
+ blksize
- 1) / blksize
;
3658 if (finalblks
> fp
->ff_blocks
) {
3659 /* calculate required unmapped blocks */
3660 loanedBlocks
= finalblks
- fp
->ff_blocks
;
3661 hfsmp
->loanedBlocks
+= loanedBlocks
;
3663 fp
->ff_unallocblocks
= loanedBlocks
;
3664 cp
->c_blocks
+= loanedBlocks
;
3665 fp
->ff_blocks
+= loanedBlocks
;
3667 hfs_unlock_mount (hfsmp
);
3670 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
3671 if (hfs_start_transaction(hfsmp
) != 0) {
3676 if (fp
->ff_unallocblocks
== 0) {
3677 /* Protect extents b-tree and allocation bitmap */
3678 lockflags
= SFL_BITMAP
;
3679 if (overflow_extents(fp
))
3680 lockflags
|= SFL_EXTENTS
;
3681 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3683 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
3684 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
3686 hfs_systemfile_unlock(hfsmp
, lockflags
);
3690 fp
->ff_size
= length
;
3693 hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3695 hfs_end_transaction(hfsmp
);
3697 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3701 /* These are bytesreleased */
3702 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
3706 // Unlike when growing a file, we adjust the hotfile block count here
3707 // instead of deeper down in the block allocation code because we do
3708 // not necessarily have a vnode or "fcb" at the time we're deleting
3709 // the file and so we wouldn't know if it was hotfile cached or not
3711 hfs_hotfile_adjust_blocks(vp
, (int64_t)((savedbytes
- filebytes
) / blksize
));
3715 * Only set update flag if the logical length changes & we aren't
3716 * suppressing modtime updates.
3718 if (((off_t
)fp
->ff_size
!= length
) && (suppress_times
== 0)) {
3719 cp
->c_touch_modtime
= TRUE
;
3721 fp
->ff_size
= length
;
3723 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
3724 if (!vfs_context_issuser(context
))
3725 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3727 cp
->c_flag
|= C_MODIFIED
;
3728 cp
->c_touch_chgtime
= TRUE
; /* status changed */
3729 if (suppress_times
== 0) {
3730 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
3733 * If we are not suppressing the modtime update, then
3734 * update the gen count as well.
3736 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK (cp
->c_attr
.ca_mode
)) {
3737 hfs_incr_gencount(cp
);
3741 retval
= hfs_update(vp
, 0);
3743 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_NONE
,
3744 -1, -1, -1, retval
, 0);
3749 KERNEL_DEBUG(HFSDBG_TRUNCATE
| DBG_FUNC_END
,
3750 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
3756 * Preparation which must be done prior to deleting the catalog record
3757 * of a file or directory. In order to make the on-disk as safe as possible,
3758 * we remove the catalog entry before releasing the bitmap blocks and the
3759 * overflow extent records. However, some work must be done prior to deleting
3760 * the catalog record.
3762 * When calling this function, the cnode must exist both in memory and on-disk.
3763 * If there are both resource fork and data fork vnodes, this function should
3764 * be called on both.
3768 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
3770 struct filefork
*fp
= VTOF(vp
);
3771 struct cnode
*cp
= VTOC(vp
);
3776 /* Cannot truncate an HFS directory! */
3777 if (vnode_isdir(vp
)) {
3782 * See the comment below in hfs_truncate for why we need to call
3783 * setsize here. Essentially we want to avoid pending IO if we
3784 * already know that the blocks are going to be released here.
3785 * This function is only called when totally removing all storage for a file, so
3786 * we can take a shortcut and immediately setsize (0);
3790 /* This should only happen with a corrupt filesystem */
3791 if ((off_t
)fp
->ff_size
< 0)
3795 * We cannot just check if fp->ff_size == length (as an optimization)
3796 * since there may be extra physical blocks that also need truncation.
3799 if ((retval
= hfs_getinoquota(cp
))) {
3804 /* Wipe out any invalid ranges which have yet to be backed by disk */
3805 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3808 * Account for any unmapped blocks. Since we're deleting the
3809 * entire file, we don't have to worry about just shrinking
3810 * to a smaller number of borrowed blocks.
3812 if (fp
->ff_unallocblocks
> 0) {
3813 u_int32_t loanedBlocks
;
3815 hfs_lock_mount (hfsmp
);
3816 loanedBlocks
= fp
->ff_unallocblocks
;
3817 cp
->c_blocks
-= loanedBlocks
;
3818 fp
->ff_blocks
-= loanedBlocks
;
3819 fp
->ff_unallocblocks
= 0;
3821 hfsmp
->loanedBlocks
-= loanedBlocks
;
3823 hfs_unlock_mount (hfsmp
);
3831 * Special wrapper around calling TruncateFileC. This function is useable
3832 * even when the catalog record does not exist any longer, making it ideal
3833 * for use when deleting a file. The simplification here is that we know
3834 * that we are releasing all blocks.
3836 * Note that this function may be called when there is no vnode backing
3837 * the file fork in question. We may call this from hfs_vnop_inactive
3838 * to clear out resource fork data (and may not want to clear out the data
3839 * fork yet). As a result, we pointer-check both sets of inputs before
3840 * doing anything with them.
3842 * The caller is responsible for saving off a copy of the filefork(s)
3843 * embedded within the cnode prior to calling this function. The pointers
3844 * supplied as arguments must be valid even if the cnode is no longer valid.
3848 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3849 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3852 u_int32_t fileblocks
;
3857 blksize
= hfsmp
->blockSize
;
3861 off_t prev_filebytes
;
3863 datafork
->ff_size
= 0;
3865 fileblocks
= datafork
->ff_blocks
;
3866 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3867 prev_filebytes
= filebytes
;
3869 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3871 while (filebytes
> 0) {
3872 if (filebytes
> HFS_BIGFILE_SIZE
) {
3873 filebytes
-= HFS_BIGFILE_SIZE
;
3878 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3879 if (hfs_start_transaction(hfsmp
) != 0) {
3884 if (datafork
->ff_unallocblocks
== 0) {
3885 /* Protect extents b-tree and allocation bitmap */
3886 lockflags
= SFL_BITMAP
;
3887 if (overflow_extents(datafork
))
3888 lockflags
|= SFL_EXTENTS
;
3889 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3891 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
3893 hfs_systemfile_unlock(hfsmp
, lockflags
);
3895 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3897 struct cnode
*cp
= datafork
? FTOC(datafork
) : NULL
;
3899 vp
= cp
? CTOV(cp
, 0) : NULL
;
3900 hfs_hotfile_adjust_blocks(vp
, (int64_t)((prev_filebytes
- filebytes
) / blksize
));
3901 prev_filebytes
= filebytes
;
3903 /* Finish the transaction and start over if necessary */
3904 hfs_end_transaction(hfsmp
);
3913 if (error
== 0 && rsrcfork
) {
3914 rsrcfork
->ff_size
= 0;
3916 fileblocks
= rsrcfork
->ff_blocks
;
3917 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3919 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3921 while (filebytes
> 0) {
3922 if (filebytes
> HFS_BIGFILE_SIZE
) {
3923 filebytes
-= HFS_BIGFILE_SIZE
;
3928 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3929 if (hfs_start_transaction(hfsmp
) != 0) {
3934 if (rsrcfork
->ff_unallocblocks
== 0) {
3935 /* Protect extents b-tree and allocation bitmap */
3936 lockflags
= SFL_BITMAP
;
3937 if (overflow_extents(rsrcfork
))
3938 lockflags
|= SFL_EXTENTS
;
3939 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3941 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
3943 hfs_systemfile_unlock(hfsmp
, lockflags
);
3945 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3947 /* Finish the transaction and start over if necessary */
3948 hfs_end_transaction(hfsmp
);
3959 errno_t
hfs_ubc_setsize(vnode_t vp
, off_t len
, bool have_cnode_lock
)
3964 * Call ubc_setsize to give the VM subsystem a chance to do
3965 * whatever it needs to with existing pages before we delete
3966 * blocks. Note that symlinks don't use the UBC so we'll
3967 * get back ENOENT in that case.
3969 if (have_cnode_lock
) {
3970 error
= ubc_setsize_ex(vp
, len
, UBC_SETSIZE_NO_FS_REENTRY
);
3971 if (error
== EAGAIN
) {
3972 cnode_t
*cp
= VTOC(vp
);
3974 if (cp
->c_truncatelockowner
!= current_thread())
3975 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
3978 error
= ubc_setsize_ex(vp
, len
, 0);
3979 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
3982 error
= ubc_setsize_ex(vp
, len
, 0);
3984 return error
== ENOENT
? 0 : error
;
3988 * Truncate a cnode to at most length size, freeing (or adding) the
3992 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
,
3993 int truncateflags
, vfs_context_t context
)
3995 struct filefork
*fp
= VTOF(vp
);
3997 u_int32_t fileblocks
;
4000 struct cnode
*cp
= VTOC(vp
);
4001 hfsmount_t
*hfsmp
= VTOHFS(vp
);
4003 /* Cannot truncate an HFS directory! */
4004 if (vnode_isdir(vp
)) {
4007 /* A swap file cannot change size. */
4008 if (vnode_isswap(vp
) && length
&& !ISSET(flags
, IO_NOAUTH
)) {
4012 blksize
= hfsmp
->blockSize
;
4013 fileblocks
= fp
->ff_blocks
;
4014 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
4016 bool caller_has_cnode_lock
= (cp
->c_lockowner
== current_thread());
4018 error
= hfs_ubc_setsize(vp
, length
, caller_has_cnode_lock
);
4022 if (!caller_has_cnode_lock
) {
4023 error
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4028 if (vnode_islnk(vp
) && cp
->c_datafork
->ff_symlinkptr
) {
4029 hfs_free(cp
->c_datafork
->ff_symlinkptr
, cp
->c_datafork
->ff_size
);
4030 cp
->c_datafork
->ff_symlinkptr
= NULL
;
4033 // have to loop truncating or growing files that are
4034 // really big because otherwise transactions can get
4035 // enormous and consume too many kernel resources.
4037 if (length
< filebytes
) {
4038 while (filebytes
> length
) {
4039 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
4040 filebytes
-= HFS_BIGFILE_SIZE
;
4044 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4048 } else if (length
> filebytes
) {
4049 kauth_cred_t cred
= vfs_context_ucred(context
);
4050 const bool keep_reserve
= cred
&& suser(cred
, NULL
) != 0;
4052 if (hfs_freeblks(hfsmp
, keep_reserve
)
4053 < howmany(length
- filebytes
, blksize
)) {
4056 while (filebytes
< length
) {
4057 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
4058 filebytes
+= HFS_BIGFILE_SIZE
;
4062 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
4067 } else /* Same logical size */ {
4069 error
= do_hfs_truncate(vp
, length
, flags
, truncateflags
, context
);
4071 /* Files that are changing size are not hot file candidates. */
4072 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4073 fp
->ff_bytesread
= 0;
4076 #if HFS_CONFIG_KEY_ROLL
4077 if (!error
&& cp
->c_truncatelockowner
== current_thread()) {
4078 hfs_key_roll_check(cp
, true);
4082 if (!caller_has_cnode_lock
)
4085 // Make sure UBC's size matches up (in case we didn't completely succeed)
4086 errno_t err2
= hfs_ubc_setsize(vp
, fp
->ff_size
, caller_has_cnode_lock
);
4095 * Preallocate file storage space.
4098 hfs_vnop_allocate(struct vnop_allocate_args
/* {
4102 off_t *a_bytesallocated;
4104 vfs_context_t a_context;
4107 struct vnode
*vp
= ap
->a_vp
;
4109 struct filefork
*fp
;
4111 off_t length
= ap
->a_length
;
4113 off_t moreBytesRequested
;
4114 off_t actualBytesAdded
;
4116 u_int32_t fileblocks
;
4117 int retval
, retval2
;
4118 u_int32_t blockHint
;
4119 u_int32_t extendFlags
; /* For call to ExtendFileC */
4120 struct hfsmount
*hfsmp
;
4121 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
4125 *(ap
->a_bytesallocated
) = 0;
4127 if (!vnode_isreg(vp
))
4129 if (length
< (off_t
)0)
4134 orig_ctime
= VTOC(vp
)->c_ctime
;
4136 nspace_snapshot_event(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
4138 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4140 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4148 fileblocks
= fp
->ff_blocks
;
4149 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
4151 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
4156 /* Fill in the flags word for the call to Extend the file */
4158 extendFlags
= kEFNoClumpMask
;
4159 if (ap
->a_flags
& ALLOCATECONTIG
)
4160 extendFlags
|= kEFContigMask
;
4161 if (ap
->a_flags
& ALLOCATEALL
)
4162 extendFlags
|= kEFAllMask
;
4163 if (cred
&& suser(cred
, NULL
) != 0)
4164 extendFlags
|= kEFReserveMask
;
4165 if (hfs_virtualmetafile(cp
))
4166 extendFlags
|= kEFMetadataMask
;
4170 startingPEOF
= filebytes
;
4172 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
4173 length
+= filebytes
;
4174 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
4175 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
4177 /* If no changes are necesary, then we're done */
4178 if (filebytes
== length
)
4182 * Lengthen the size of the file. We must ensure that the
4183 * last byte of the file is allocated. Since the smallest
4184 * value of filebytes is 0, length will be at least 1.
4186 if (length
> filebytes
) {
4187 if (ISSET(extendFlags
, kEFAllMask
)
4188 && (hfs_freeblks(hfsmp
, ISSET(extendFlags
, kEFReserveMask
))
4189 < howmany(length
- filebytes
, hfsmp
->blockSize
))) {
4194 off_t total_bytes_added
= 0, orig_request_size
;
4196 orig_request_size
= moreBytesRequested
= length
- filebytes
;
4199 retval
= hfs_chkdq(cp
,
4200 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
4207 * Metadata zone checks.
4209 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
4211 * Allocate Journal and Quota files in metadata zone.
4213 if (hfs_virtualmetafile(cp
)) {
4214 blockHint
= hfsmp
->hfs_metazone_start
;
4215 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
4216 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
4218 * Move blockHint outside metadata zone.
4220 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
4225 while ((length
> filebytes
) && (retval
== E_NONE
)) {
4226 off_t bytesRequested
;
4228 if (hfs_start_transaction(hfsmp
) != 0) {
4233 /* Protect extents b-tree and allocation bitmap */
4234 lockflags
= SFL_BITMAP
;
4235 if (overflow_extents(fp
))
4236 lockflags
|= SFL_EXTENTS
;
4237 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4239 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
4240 bytesRequested
= HFS_BIGFILE_SIZE
;
4242 bytesRequested
= moreBytesRequested
;
4245 if (extendFlags
& kEFContigMask
) {
4246 // if we're on a sparse device, this will force it to do a
4247 // full scan to find the space needed.
4248 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
4251 retval
= MacToVFSError(ExtendFileC(vcb
,
4256 &actualBytesAdded
));
4258 if (retval
== E_NONE
) {
4259 *(ap
->a_bytesallocated
) += actualBytesAdded
;
4260 total_bytes_added
+= actualBytesAdded
;
4261 moreBytesRequested
-= actualBytesAdded
;
4262 if (blockHint
!= 0) {
4263 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
4266 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4268 hfs_systemfile_unlock(hfsmp
, lockflags
);
4271 (void) hfs_update(vp
, 0);
4272 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4275 hfs_end_transaction(hfsmp
);
4280 * if we get an error and no changes were made then exit
4281 * otherwise we must do the hfs_update to reflect the changes
4283 if (retval
&& (startingPEOF
== filebytes
))
4287 * Adjust actualBytesAdded to be allocation block aligned, not
4288 * clump size aligned.
4289 * NOTE: So what we are reporting does not affect reality
4290 * until the file is closed, when we truncate the file to allocation
4293 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
4294 *(ap
->a_bytesallocated
) =
4295 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
4297 } else { /* Shorten the size of the file */
4300 * N.B. At present, this code is never called. If and when we
4301 * do start using it, it looks like there might be slightly
4302 * strange semantics with the file size: it's possible for the
4303 * file size to *increase* e.g. if current file size is 5,
4304 * length is 1024 and filebytes is 4096, the file size will
4305 * end up being 1024 bytes. This isn't necessarily a problem
4306 * but it's not consistent with the code above which doesn't
4307 * change the file size.
4310 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
4311 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4314 * if we get an error and no changes were made then exit
4315 * otherwise we must do the hfs_update to reflect the changes
4317 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
4319 /* These are bytesreleased */
4320 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
4323 if (fp
->ff_size
> filebytes
) {
4324 fp
->ff_size
= filebytes
;
4326 hfs_ubc_setsize(vp
, fp
->ff_size
, true);
4331 cp
->c_flag
|= C_MODIFIED
;
4332 cp
->c_touch_chgtime
= TRUE
;
4333 cp
->c_touch_modtime
= TRUE
;
4334 retval2
= hfs_update(vp
, 0);
4339 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4346 * Pagein for HFS filesystem
4349 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
4351 struct vnop_pagein_args {
4354 vm_offset_t a_pl_offset,
4358 vfs_context_t a_context;
4364 struct filefork
*fp
;
4367 upl_page_info_t
*pl
;
4369 off_t page_needed_f_offset
;
4374 boolean_t truncate_lock_held
= FALSE
;
4375 boolean_t file_converted
= FALSE
;
4383 if ((error
= cp_handle_vnop(vp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
, 0)) != 0) {
4385 * If we errored here, then this means that one of two things occurred:
4386 * 1. there was a problem with the decryption of the key.
4387 * 2. the device is locked and we are not allowed to access this particular file.
4389 * Either way, this means that we need to shut down this upl now. As long as
4390 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4391 * then we create a upl and immediately abort it.
4393 if (ap
->a_pl
== NULL
) {
4394 /* create the upl */
4395 ubc_create_upl (vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
,
4396 UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4397 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4398 ubc_upl_range_needed (upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4400 /* Abort the range */
4401 ubc_upl_abort_range (upl
, 0, ap
->a_size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4407 #endif /* CONFIG_PROTECT */
4409 if (ap
->a_pl
!= NULL
) {
4411 * this can only happen for swap files now that
4412 * we're asking for V2 paging behavior...
4413 * so don't need to worry about decompression, or
4414 * keeping track of blocks read or taking the truncate lock
4416 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
4417 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4421 page_needed_f_offset
= ap
->a_f_offset
+ ap
->a_pl_offset
;
4425 * take truncate lock (shared/recursive) to guard against
4426 * zero-fill thru fsync interfering, but only for v2
4428 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4429 * lock shared and we are allowed to recurse 1 level if this thread already
4430 * owns the lock exclusively... this can legally occur
4431 * if we are doing a shrinking ftruncate against a file
4432 * that is mapped private, and the pages being truncated
4433 * do not currently exist in the cache... in that case
4434 * we will have to page-in the missing pages in order
4435 * to provide them to the private mapping... we must
4436 * also call hfs_unlock_truncate with a postive been_recursed
4437 * arg to indicate that if we have recursed, there is no need to drop
4438 * the lock. Allowing this simple recursion is necessary
4439 * in order to avoid a certain deadlock... since the ftruncate
4440 * already holds the truncate lock exclusively, if we try
4441 * to acquire it shared to protect the pagein path, we will
4444 * NOTE: The if () block below is a workaround in order to prevent a
4445 * VM deadlock. See rdar://7853471.
4447 * If we are in a forced unmount, then launchd will still have the
4448 * dyld_shared_cache file mapped as it is trying to reboot. If we
4449 * take the truncate lock here to service a page fault, then our
4450 * thread could deadlock with the forced-unmount. The forced unmount
4451 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4452 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4453 * thread will think it needs to copy all of the data out of the file
4454 * and into a VM copy object. If we hold the cnode lock here, then that
4455 * VM operation will not be able to proceed, because we'll set a busy page
4456 * before attempting to grab the lock. Note that this isn't as simple as "don't
4457 * call ubc_setsize" because doing that would just shift the problem to the
4458 * ubc_msync done before the vnode is reclaimed.
4460 * So, if a forced unmount on this volume is in flight AND the cnode is
4461 * marked C_DELETED, then just go ahead and do the page in without taking
4462 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4463 * that is not going to be available on the next mount, this seems like a
4464 * OK solution from a correctness point of view, even though it is hacky.
4466 if (vfs_isforce(vnode_mount(vp
))) {
4467 if (cp
->c_flag
& C_DELETED
) {
4468 /* If we don't get it, then just go ahead and operate without the lock */
4469 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4473 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4474 truncate_lock_held
= TRUE
;
4477 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4479 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4483 ubc_upl_range_needed(upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4485 upl_size
= isize
= ap
->a_size
;
4488 * Scan from the back to find the last page in the UPL, so that we
4489 * aren't looking at a UPL that may have already been freed by the
4490 * preceding aborts/completions.
4492 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4493 if (upl_page_present(pl
, --pg_index
))
4495 if (pg_index
== 0) {
4497 * no absent pages were found in the range specified
4498 * just abort the UPL to get rid of it and then we're done
4500 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4505 * initialize the offset variables before we touch the UPL.
4506 * f_offset is the position into the file, in bytes
4507 * offset is the position into the UPL, in bytes
4508 * pg_index is the pg# of the UPL we're operating on
4509 * isize is the offset into the UPL of the last page that is present.
4511 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4514 f_offset
= ap
->a_f_offset
;
4520 if ( !upl_page_present(pl
, pg_index
)) {
4522 * we asked for RET_ONLY_ABSENT, so it's possible
4523 * to get back empty slots in the UPL.
4524 * just skip over them
4526 f_offset
+= PAGE_SIZE
;
4527 offset
+= PAGE_SIZE
;
4534 * We know that we have at least one absent page.
4535 * Now checking to see how many in a row we have
4538 xsize
= isize
- PAGE_SIZE
;
4541 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
4546 xsize
= num_of_pages
* PAGE_SIZE
;
4549 if (VNODE_IS_RSRC(vp
)) {
4550 /* allow pageins of the resource fork */
4552 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
4556 if (truncate_lock_held
) {
4558 * can't hold the truncate lock when calling into the decmpfs layer
4559 * since it calls back into this layer... even though we're only
4560 * holding the lock in shared mode, and the re-entrant path only
4561 * takes the lock shared, we can deadlock if some other thread
4562 * tries to grab the lock exclusively in between.
4564 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4565 truncate_lock_held
= FALSE
;
4568 ap
->a_pl_offset
= offset
;
4569 ap
->a_f_offset
= f_offset
;
4572 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
4574 * note that decpfs_pagein_compressed can change the state of
4575 * 'compressed'... it will set it to 0 if the file is no longer
4576 * compressed once the compression lock is successfully taken
4577 * i.e. we would block on that lock while the file is being inflated
4579 if (error
== 0 && vnode_isfastdevicecandidate(vp
)) {
4580 (void) hfs_addhotfile(vp
);
4584 /* successful page-in, update the access time */
4585 VTOC(vp
)->c_touch_acctime
= TRUE
;
4588 // compressed files are not traditional hot file candidates
4589 // but they may be for CF (which ignores the ff_bytesread
4592 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4593 fp
->ff_bytesread
= 0;
4595 } else if (error
== EAGAIN
) {
4597 * EAGAIN indicates someone else already holds the compression lock...
4598 * to avoid deadlocking, we'll abort this range of pages with an
4599 * indication that the pagein needs to be redriven
4601 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
4602 } else if (error
== ENOSPC
) {
4604 if (upl_size
== PAGE_SIZE
)
4605 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4607 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4609 ap
->a_size
= PAGE_SIZE
;
4611 ap
->a_pl_offset
= 0;
4612 ap
->a_f_offset
= page_needed_f_offset
;
4616 ubc_upl_abort(upl
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4619 goto pagein_next_range
;
4623 * Set file_converted only if the file became decompressed while we were
4624 * paging in. If it were still compressed, we would re-start the loop using the goto
4625 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4626 * condition below, since we could have avoided taking the truncate lock to prevent
4627 * a deadlock in the force unmount case.
4629 file_converted
= TRUE
;
4632 if (file_converted
== TRUE
) {
4634 * the file was converted back to a regular file after we first saw it as compressed
4635 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4636 * reset a_size so that we consider what remains of the original request
4637 * and null out a_upl and a_pl_offset.
4639 * We should only be able to get into this block if the decmpfs_pagein_compressed
4640 * successfully decompressed the range in question for this file.
4642 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4646 ap
->a_pl_offset
= 0;
4648 /* Reset file_converted back to false so that we don't infinite-loop. */
4649 file_converted
= FALSE
;
4654 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4657 * Keep track of blocks read.
4659 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
4661 int took_cnode_lock
= 0;
4663 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
4664 bytesread
= fp
->ff_size
;
4668 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4669 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
4670 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4671 took_cnode_lock
= 1;
4674 * If this file hasn't been seen since the start of
4675 * the current sampling period then start over.
4677 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
4680 fp
->ff_bytesread
= bytesread
;
4682 cp
->c_atime
= tv
.tv_sec
;
4684 fp
->ff_bytesread
+= bytesread
;
4686 cp
->c_touch_acctime
= TRUE
;
4688 if (vnode_isfastdevicecandidate(vp
)) {
4689 (void) hfs_addhotfile(vp
);
4691 if (took_cnode_lock
)
4698 pg_index
+= num_of_pages
;
4704 if (truncate_lock_held
== TRUE
) {
4705 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4706 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4713 * Pageout for HFS filesystem.
4716 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
4718 struct vnop_pageout_args {
4721 vm_offset_t a_pl_offset,
4725 vfs_context_t a_context;
4729 vnode_t vp
= ap
->a_vp
;
4731 struct filefork
*fp
;
4735 upl_page_info_t
* pl
= NULL
;
4736 vm_offset_t a_pl_offset
;
4738 int is_pageoutv2
= 0;
4744 a_flags
= ap
->a_flags
;
4745 a_pl_offset
= ap
->a_pl_offset
;
4748 * we can tell if we're getting the new or old behavior from the UPL
4750 if ((upl
= ap
->a_pl
) == NULL
) {
4755 * we're in control of any UPL we commit
4756 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4758 a_flags
&= ~UPL_NOCOMMIT
;
4762 * For V2 semantics, we want to take the cnode truncate lock
4763 * shared to guard against the file size changing via zero-filling.
4765 * However, we have to be careful because we may be invoked
4766 * via the ubc_msync path to write out dirty mmap'd pages
4767 * in response to a lock event on a content-protected
4768 * filesystem (e.g. to write out class A files).
4769 * As a result, we want to take the truncate lock 'SHARED' with
4770 * the mini-recursion locktype so that we don't deadlock/panic
4771 * because we may be already holding the truncate lock exclusive to force any other
4772 * IOs to have blocked behind us.
4774 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4776 if (a_flags
& UPL_MSYNC
) {
4777 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
4780 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
4783 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
4785 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4791 * from this point forward upl points at the UPL we're working with
4792 * it was either passed in or we succesfully created it
4796 * Figure out where the file ends, for pageout purposes. If
4797 * ff_new_size > ff_size, then we're in the middle of extending the
4798 * file via a write, so it is safe (and necessary) that we be able
4799 * to pageout up to that point.
4801 filesize
= fp
->ff_size
;
4802 if (fp
->ff_new_size
> filesize
)
4803 filesize
= fp
->ff_new_size
;
4806 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4807 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4808 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4809 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4810 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4811 * lock in HFS so that we don't lock invert ourselves.
4813 * Note that we can still get into this function on behalf of the default pager with
4814 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4815 * since fsync and other writing threads will grab the locks, then mark the
4816 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4817 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4818 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4819 * by the paging/VM system.
4831 f_offset
= ap
->a_f_offset
;
4834 * Scan from the back to find the last page in the UPL, so that we
4835 * aren't looking at a UPL that may have already been freed by the
4836 * preceding aborts/completions.
4838 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4839 if (upl_page_present(pl
, --pg_index
))
4841 if (pg_index
== 0) {
4842 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4848 * initialize the offset variables before we touch the UPL.
4849 * a_f_offset is the position into the file, in bytes
4850 * offset is the position into the UPL, in bytes
4851 * pg_index is the pg# of the UPL we're operating on.
4852 * isize is the offset into the UPL of the last non-clean page.
4854 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4863 if ( !upl_page_present(pl
, pg_index
)) {
4865 * we asked for RET_ONLY_DIRTY, so it's possible
4866 * to get back empty slots in the UPL.
4867 * just skip over them
4869 f_offset
+= PAGE_SIZE
;
4870 offset
+= PAGE_SIZE
;
4876 if ( !upl_dirty_page(pl
, pg_index
)) {
4877 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
4881 * We know that we have at least one dirty page.
4882 * Now checking to see how many in a row we have
4885 xsize
= isize
- PAGE_SIZE
;
4888 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
4893 xsize
= num_of_pages
* PAGE_SIZE
;
4895 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
4896 xsize
, filesize
, a_flags
))) {
4903 pg_index
+= num_of_pages
;
4905 /* capture errnos bubbled out of cluster_pageout if they occurred */
4906 if (error_ret
!= 0) {
4909 } /* end block for v2 pageout behavior */
4912 * just call cluster_pageout for old pre-v2 behavior
4914 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
4915 ap
->a_size
, filesize
, a_flags
);
4919 * If data was written, update the modification time of the file
4920 * but only if it's mapped writable; we will have touched the
4921 * modifcation time for direct writes.
4923 if (retval
== 0 && (ubc_is_mapped_writable(vp
)
4924 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
))) {
4925 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4927 // Check again with lock
4928 bool mapped_writable
= ubc_is_mapped_writable(vp
);
4930 || ISSET(cp
->c_flag
, C_MIGHT_BE_DIRTY_FROM_MAPPING
)) {
4931 cp
->c_touch_modtime
= TRUE
;
4932 cp
->c_touch_chgtime
= TRUE
;
4935 * We only need to increment the generation counter if
4936 * it's currently mapped writable because we incremented
4937 * the counter in hfs_vnop_mnomap.
4939 if (mapped_writable
)
4940 hfs_incr_gencount(VTOC(vp
));
4943 * If setuid or setgid bits are set and this process is
4944 * not the superuser then clear the setuid and setgid bits
4945 * as a precaution against tampering.
4947 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
4948 (vfs_context_suser(ap
->a_context
) != 0)) {
4949 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
4959 * Release the truncate lock. Note that because
4960 * we may have taken the lock recursively by
4961 * being invoked via ubc_msync due to lockdown,
4962 * we should release it recursively, too.
4964 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4970 * Intercept B-Tree node writes to unswap them if necessary.
4973 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
4976 register struct buf
*bp
= ap
->a_bp
;
4977 register struct vnode
*vp
= buf_vnode(bp
);
4978 BlockDescriptor block
;
4980 /* Trap B-Tree writes */
4981 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
4982 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
4983 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
4984 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
4987 * Swap and validate the node if it is in native byte order.
4988 * This is always be true on big endian, so we always validate
4989 * before writing here. On little endian, the node typically has
4990 * been swapped and validated when it was written to the journal,
4991 * so we won't do anything here.
4993 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
4994 /* Prepare the block pointer */
4995 block
.blockHeader
= bp
;
4996 block
.buffer
= (char *)buf_dataptr(bp
);
4997 block
.blockNum
= buf_lblkno(bp
);
4998 /* not found in cache ==> came from disk */
4999 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
5000 block
.blockSize
= buf_count(bp
);
5002 /* Endian un-swap B-Tree node */
5003 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
5005 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5009 /* This buffer shouldn't be locked anymore but if it is clear it */
5010 if ((buf_flags(bp
) & B_LOCKED
)) {
5012 if (VTOHFS(vp
)->jnl
) {
5013 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
5015 buf_clearflags(bp
, B_LOCKED
);
5017 retval
= vn_bwrite (ap
);
5024 hfs_pin_block_range(struct hfsmount
*hfsmp
, int pin_state
, uint32_t start_block
, uint32_t nblocks
)
5030 memset(&pin
, 0, sizeof(pin
));
5031 pin
.cp_extent
.offset
= ((uint64_t)start_block
) * HFSTOVCB(hfsmp
)->blockSize
;
5032 pin
.cp_extent
.length
= ((uint64_t)nblocks
) * HFSTOVCB(hfsmp
)->blockSize
;
5033 switch (pin_state
) {
5035 ioc
= _DKIOCCSPINEXTENT
;
5036 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
;
5038 case HFS_PIN_IT
| HFS_TEMP_PIN
:
5039 ioc
= _DKIOCCSPINEXTENT
;
5040 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSTEMPORARYPIN
;
5042 case HFS_PIN_IT
| HFS_DATALESS_PIN
:
5043 ioc
= _DKIOCCSPINEXTENT
;
5044 pin
.cp_flags
= _DKIOCCSPINTOFASTMEDIA
| _DKIOCCSPINFORSWAPFILE
;
5047 ioc
= _DKIOCCSUNPINEXTENT
;
5050 case HFS_UNPIN_IT
| HFS_EVICT_PIN
:
5051 ioc
= _DKIOCCSPINEXTENT
;
5052 pin
.cp_flags
= _DKIOCCSPINTOSLOWMEDIA
;
5057 err
= VNOP_IOCTL(hfsmp
->hfs_devvp
, ioc
, (caddr_t
)&pin
, 0, vfs_context_kernel());
5062 // The cnode lock should already be held on entry to this function
5065 hfs_pin_vnode(struct hfsmount
*hfsmp
, struct vnode
*vp
, int pin_state
, uint32_t *num_blocks_pinned
)
5067 struct filefork
*fp
= VTOF(vp
);
5068 int i
, err
=0, need_put
=0;
5069 struct vnode
*rsrc_vp
=NULL
;
5070 uint32_t npinned
= 0;
5073 if (num_blocks_pinned
) {
5074 *num_blocks_pinned
= 0;
5077 if (vnode_vtype(vp
) != VREG
) {
5078 /* Not allowed to pin directories or symlinks */
5079 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp
));
5083 if (fp
->ff_unallocblocks
) {
5084 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp
->ff_unallocblocks
);
5089 * It is possible that if the caller unlocked/re-locked the cnode after checking
5090 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5091 * cnode was unlocked. So check the condition again and return ENOENT so that
5092 * the caller knows why we failed to pin the vnode.
5094 if (VTOC(vp
)->c_flag
& (C_NOEXISTS
|C_DELETED
)) {
5095 // makes no sense to pin something that's pending deletion
5099 if (fp
->ff_blocks
== 0 && (VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
5100 if (!VNODE_IS_RSRC(vp
) && hfs_vgetrsrc(hfsmp
, vp
, &rsrc_vp
) == 0) {
5101 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5102 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5104 fp
= VTOC(rsrc_vp
)->c_rsrcfork
;
5108 if (fp
->ff_blocks
== 0) {
5111 // use a distinct error code for a compressed file that has no resource fork;
5112 // we return EALREADY to indicate that the data is already probably hot file
5113 // cached because it's in an EA and the attributes btree is on the ssd
5123 for (i
= 0; i
< kHFSPlusExtentDensity
; i
++) {
5124 if (fp
->ff_extents
[i
].startBlock
== 0) {
5128 err
= hfs_pin_block_range(hfsmp
, pin_state
, fp
->ff_extents
[i
].startBlock
, fp
->ff_extents
[i
].blockCount
);
5132 npinned
+= fp
->ff_extents
[i
].blockCount
;
5136 if (err
|| npinned
== 0) {
5140 if (fp
->ff_extents
[kHFSPlusExtentDensity
-1].startBlock
) {
5142 uint8_t forktype
= 0;
5144 if (fp
== VTOC(vp
)->c_rsrcfork
) {
5148 * The file could have overflow extents, better pin them.
5150 * We assume that since we are holding the cnode lock for this cnode,
5151 * the files extents cannot be manipulated, but the tree could, so we
5152 * need to ensure that it doesn't change behind our back as we iterate it.
5154 int lockflags
= hfs_systemfile_lock (hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
5155 err
= hfs_pin_overflow_extents(hfsmp
, VTOC(vp
)->c_fileid
, forktype
, &pblocks
);
5156 hfs_systemfile_unlock (hfsmp
, lockflags
);
5165 if (num_blocks_pinned
) {
5166 *num_blocks_pinned
= npinned
;
5169 if (need_put
&& rsrc_vp
) {
5171 // have to unlock the cnode since it's shared between the
5172 // resource fork vnode and the data fork vnode (and the
5173 // vnode_put() may need to re-acquire the cnode lock to
5174 // reclaim the resource fork vnode)
5176 hfs_unlock(VTOC(vp
));
5178 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5185 * Relocate a file to a new location on disk
5186 * cnode must be locked on entry
5188 * Relocation occurs by cloning the file's data from its
5189 * current set of blocks to a new set of blocks. During
5190 * the relocation all of the blocks (old and new) are
5191 * owned by the file.
5198 * ----------------- -----------------
5199 * |///////////////| | | STEP 1 (acquire new blocks)
5200 * ----------------- -----------------
5203 * ----------------- -----------------
5204 * |///////////////| |///////////////| STEP 2 (clone data)
5205 * ----------------- -----------------
5209 * |///////////////| STEP 3 (head truncate blocks)
5213 * During steps 2 and 3 page-outs to file offsets less
5214 * than or equal to N are suspended.
5216 * During step 3 page-ins to the file get suspended.
5219 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
5223 struct filefork
*fp
;
5224 struct hfsmount
*hfsmp
;
5229 u_int32_t nextallocsave
;
5230 daddr64_t sector_a
, sector_b
;
5235 int took_trunc_lock
= 0;
5237 enum vtype vnodetype
;
5239 vnodetype
= vnode_vtype(vp
);
5240 if (vnodetype
!= VREG
) {
5241 /* Not allowed to move symlinks. */
5246 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
5252 if (fp
->ff_unallocblocks
)
5257 * <rdar://problem/9118426>
5258 * Disable HFS file relocation on content-protected filesystems
5260 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
5264 /* If it's an SSD, also disable HFS relocation */
5265 if (hfsmp
->hfs_flags
& HFS_SSD
) {
5270 blksize
= hfsmp
->blockSize
;
5272 blockHint
= hfsmp
->nextAllocation
;
5274 if (fp
->ff_size
> 0x7fffffff) {
5278 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
5280 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
5281 /* Force lock since callers expects lock to be held. */
5282 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
))) {
5283 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5286 /* No need to continue if file was removed. */
5287 if (cp
->c_flag
& C_NOEXISTS
) {
5288 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5291 took_trunc_lock
= 1;
5293 headblks
= fp
->ff_blocks
;
5294 datablks
= howmany(fp
->ff_size
, blksize
);
5295 growsize
= datablks
* blksize
;
5296 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
5297 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
5298 blockHint
<= hfsmp
->hfs_metazone_end
)
5299 eflags
|= kEFMetadataMask
;
5301 if (hfs_start_transaction(hfsmp
) != 0) {
5302 if (took_trunc_lock
)
5303 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5308 * Protect the extents b-tree and the allocation bitmap
5309 * during MapFileBlockC and ExtendFileC operations.
5311 lockflags
= SFL_BITMAP
;
5312 if (overflow_extents(fp
))
5313 lockflags
|= SFL_EXTENTS
;
5314 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5316 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
5318 retval
= MacToVFSError(retval
);
5323 * STEP 1 - acquire new allocation blocks.
5325 nextallocsave
= hfsmp
->nextAllocation
;
5326 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
5327 if (eflags
& kEFMetadataMask
) {
5328 hfs_lock_mount(hfsmp
);
5329 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
5330 MarkVCBDirty(hfsmp
);
5331 hfs_unlock_mount(hfsmp
);
5334 retval
= MacToVFSError(retval
);
5336 cp
->c_flag
|= C_MODIFIED
;
5337 if (newbytes
< growsize
) {
5340 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
5341 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp
->c_cnid
, hfsmp
->vcbVN
);
5346 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
5348 retval
= MacToVFSError(retval
);
5349 } else if ((sector_a
+ 1) == sector_b
) {
5352 } else if ((eflags
& kEFMetadataMask
) &&
5353 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
5354 hfsmp
->hfs_metazone_end
)) {
5356 const char * filestr
;
5357 char emptystr
= '\0';
5359 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
5360 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
5361 } else if (vnode_name(vp
) != NULL
) {
5362 filestr
= vnode_name(vp
);
5364 filestr
= &emptystr
;
5371 /* Done with system locks and journal for now. */
5372 hfs_systemfile_unlock(hfsmp
, lockflags
);
5374 hfs_end_transaction(hfsmp
);
5379 * Check to see if failure is due to excessive fragmentation.
5381 if ((retval
== ENOSPC
) &&
5382 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
5383 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
5388 * STEP 2 - clone file data into the new allocation blocks.
5391 if (vnodetype
== VLNK
)
5393 else if (vnode_issystem(vp
))
5394 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
5396 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
5398 /* Start transaction for step 3 or for a restore. */
5399 if (hfs_start_transaction(hfsmp
) != 0) {
5408 * STEP 3 - switch to cloned data and remove old blocks.
5410 lockflags
= SFL_BITMAP
;
5411 if (overflow_extents(fp
))
5412 lockflags
|= SFL_EXTENTS
;
5413 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5415 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
5417 hfs_systemfile_unlock(hfsmp
, lockflags
);
5422 if (took_trunc_lock
)
5423 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5426 hfs_systemfile_unlock(hfsmp
, lockflags
);
5430 /* Push cnode's new extent data to disk. */
5435 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
5436 (void) hfs_flushvolumeheader(hfsmp
, HFS_FVH_WAIT
| HFS_FVH_WRITE_ALT
);
5438 (void) hfs_flushvolumeheader(hfsmp
, 0);
5442 hfs_end_transaction(hfsmp
);
5447 if (fp
->ff_blocks
== headblks
) {
5448 if (took_trunc_lock
)
5449 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5453 * Give back any newly allocated space.
5455 if (lockflags
== 0) {
5456 lockflags
= SFL_BITMAP
;
5457 if (overflow_extents(fp
))
5458 lockflags
|= SFL_EXTENTS
;
5459 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5462 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
5463 FTOC(fp
)->c_fileid
, false);
5465 hfs_systemfile_unlock(hfsmp
, lockflags
);
5468 if (took_trunc_lock
)
5469 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5475 * Clone a file's data within the file.
5479 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
5490 writebase
= blkstart
* blksize
;
5491 copysize
= blkcnt
* blksize
;
5492 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
5495 hfs_unlock(VTOC(vp
));
5498 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
5499 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5502 #endif /* CONFIG_PROTECT */
5504 bufp
= hfs_malloc(bufsize
);
5506 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
5508 while (offset
< copysize
) {
5509 iosize
= MIN(copysize
- offset
, iosize
);
5511 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
5512 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5514 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
5516 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
5519 if (uio_resid(auio
) != 0) {
5520 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio
));
5525 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
5526 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5528 error
= cluster_write(vp
, auio
, writebase
+ offset
,
5529 writebase
+ offset
+ iosize
,
5530 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
5532 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
5535 if (uio_resid(auio
) != 0) {
5536 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5544 if ((blksize
& PAGE_MASK
)) {
5546 * since the copy may not have started on a PAGE
5547 * boundary (or may not have ended on one), we
5548 * may have pages left in the cache since NOCACHE
5549 * will let partially written pages linger...
5550 * lets just flush the entire range to make sure
5551 * we don't have any pages left that are beyond
5552 * (or intersect) the real LEOF of this file
5554 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
5557 * No need to call ubc_msync or hfs_invalbuf
5558 * since the file was copied using IO_NOCACHE and
5559 * the copy was done starting and ending on a page
5560 * boundary in the file.
5563 hfs_free(bufp
, bufsize
);
5565 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5570 * Clone a system (metadata) file.
5574 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
5575 kauth_cred_t cred
, struct proc
*p
)
5581 struct buf
*bp
= NULL
;
5584 daddr64_t start_blk
;
5591 iosize
= GetLogicalBlockSize(vp
);
5592 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
5593 breadcnt
= bufsize
/ iosize
;
5595 bufp
= hfs_malloc(bufsize
);
5597 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
5598 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
5601 while (blkno
< last_blk
) {
5603 * Read up to a megabyte
5606 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
5607 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
5609 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
5612 if (buf_count(bp
) != iosize
) {
5613 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
5616 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
5618 buf_markinvalid(bp
);
5626 * Write up to a megabyte
5629 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
5630 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
5632 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
5636 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
5637 error
= (int)buf_bwrite(bp
);
5649 hfs_free(bufp
, bufsize
);
5651 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);
5656 errno_t
hfs_flush_invalid_ranges(vnode_t vp
)
5658 cnode_t
*cp
= VTOC(vp
);
5660 hfs_assert(cp
->c_lockowner
== current_thread());
5661 hfs_assert(cp
->c_truncatelockowner
== current_thread());
5663 if (!ISSET(cp
->c_flag
, C_ZFWANTSYNC
) && !cp
->c_zftimeout
)
5666 filefork_t
*fp
= VTOF(vp
);
5669 * We can't hold the cnode lock whilst we call cluster_write so we
5670 * need to copy the extents into a local buffer.
5675 } exts_buf
[max_exts
]; // 256 bytes
5676 struct ext
*exts
= exts_buf
;
5680 struct rl_entry
*r
= TAILQ_FIRST(&fp
->ff_invalidranges
);
5683 /* If we have more than can fit in our stack buffer, switch
5684 to a heap buffer. */
5685 if (exts
== exts_buf
&& ext_count
== max_exts
) {
5687 exts
= hfs_malloc(sizeof(struct ext
) * max_exts
);
5688 memcpy(exts
, exts_buf
, ext_count
* sizeof(struct ext
));
5691 struct rl_entry
*next
= TAILQ_NEXT(r
, rl_link
);
5693 exts
[ext_count
++] = (struct ext
){ r
->rl_start
, r
->rl_end
};
5695 if (!next
|| (ext_count
== max_exts
&& exts
!= exts_buf
)) {
5697 for (int i
= 0; i
< ext_count
; ++i
) {
5698 ret
= cluster_write(vp
, NULL
, fp
->ff_size
, exts
[i
].end
+ 1,
5700 IO_HEADZEROFILL
| IO_NOZERODIRTY
| IO_NOCACHE
);
5702 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5708 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5712 /* Push any existing clusters which should clean up our invalid
5713 ranges as they go through hfs_vnop_blockmap. */
5714 cluster_push(vp
, 0);
5716 hfs_lock_always(cp
, HFS_EXCLUSIVE_LOCK
);
5719 * Get back to where we were (given we dropped the lock).
5720 * This shouldn't be many because we pushed above.
5722 TAILQ_FOREACH(r
, &fp
->ff_invalidranges
, rl_link
) {
5723 if (r
->rl_end
> exts
[ext_count
- 1].end
)
5736 if (exts
!= exts_buf
)
5737 hfs_free(exts
, sizeof(struct ext
) * max_exts
);