2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
44 #include <sys/buf_internal.h>
46 #include <sys/kauth.h>
47 #include <sys/vnode.h>
48 #include <sys/vnode_internal.h>
50 #include <sys/vfs_context.h>
51 #include <sys/fsevents.h>
52 #include <kern/kalloc.h>
54 #include <sys/sysctl.h>
55 #include <sys/fsctl.h>
56 #include <sys/mount_internal.h>
58 #include <miscfs/specfs/specdev.h>
61 #include <sys/ubc_internal.h>
63 #include <vm/vm_pageout.h>
64 #include <vm/vm_kern.h>
66 #include <sys/kdebug.h>
69 #include "hfs_attrlist.h"
70 #include "hfs_endian.h"
71 #include "hfs_fsctl.h"
72 #include "hfs_quota.h"
73 #include "hfscommon/headers/FileMgrInternal.h"
74 #include "hfscommon/headers/BTreesInternal.h"
75 #include "hfs_cnode.h"
78 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
81 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
84 /* from bsd/hfs/hfs_vfsops.c */
85 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
87 static int hfs_clonefile(struct vnode
*, int, int, int);
88 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
89 static int hfs_minorupdate(struct vnode
*vp
);
90 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
92 /* from bsd/hfs/hfs_vnops.c */
93 extern decmpfs_cnode
* hfs_lazy_init_decmpfs_cnode (struct cnode
*cp
);
97 int flush_cache_on_write
= 0;
98 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
101 * Read data from a file.
104 hfs_vnop_read(struct vnop_read_args
*ap
)
107 struct vnop_read_args {
108 struct vnodeop_desc *a_desc;
112 vfs_context_t a_context;
116 uio_t uio
= ap
->a_uio
;
117 struct vnode
*vp
= ap
->a_vp
;
120 struct hfsmount
*hfsmp
;
123 off_t start_resid
= uio_resid(uio
);
124 off_t offset
= uio_offset(uio
);
126 int took_truncate_lock
= 0;
129 /* Preflight checks */
130 if (!vnode_isreg(vp
)) {
131 /* can only read regular files */
137 if (start_resid
== 0)
138 return (0); /* Nothing left to do */
140 return (EINVAL
); /* cant read from a negative offset */
145 if (VNODE_IS_RSRC(vp
)) {
146 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
149 /* otherwise read the resource fork normally */
151 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
153 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
156 /* successful read, update the access time */
157 VTOC(vp
)->c_touch_acctime
= TRUE
;
159 /* compressed files are not hot file candidates */
160 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
161 VTOF(vp
)->ff_bytesread
= 0;
166 /* otherwise the file was converted back to a regular file while we were reading it */
168 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
171 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
178 #endif /* HFS_COMPRESSION */
185 if ((retval
= cp_handle_vnop (vp
, CP_READ_ACCESS
, ap
->a_ioflag
)) != 0) {
191 * If this read request originated from a syscall (as opposed to
192 * an in-kernel page fault or something), then set it up for
195 if (ap
->a_ioflag
& IO_SYSCALL_DISPATCH
) {
196 io_throttle
= IO_RETURN_ON_THROTTLE
;
201 /* Protect against a size change. */
202 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
203 took_truncate_lock
= 1;
205 filesize
= fp
->ff_size
;
206 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
207 if (offset
> filesize
) {
208 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
209 (offset
> (off_t
)MAXHFSFILESIZE
)) {
215 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
216 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
218 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
|io_throttle
);
220 cp
->c_touch_acctime
= TRUE
;
222 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
223 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
226 * Keep track blocks read
228 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
229 int took_cnode_lock
= 0;
232 bytesread
= start_resid
- uio_resid(uio
);
234 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
235 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
236 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
240 * If this file hasn't been seen since the start of
241 * the current sampling period then start over.
243 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
246 fp
->ff_bytesread
= bytesread
;
248 cp
->c_atime
= tv
.tv_sec
;
250 fp
->ff_bytesread
+= bytesread
;
256 if (took_truncate_lock
) {
257 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
259 if (retval
== EAGAIN
) {
260 throttle_lowpri_io(1);
269 * Write data to a file.
272 hfs_vnop_write(struct vnop_write_args
*ap
)
274 uio_t uio
= ap
->a_uio
;
275 struct vnode
*vp
= ap
->a_vp
;
278 struct hfsmount
*hfsmp
;
279 kauth_cred_t cred
= NULL
;
282 off_t bytesToAdd
= 0;
283 off_t actualBytesAdded
;
288 int ioflag
= ap
->a_ioflag
;
291 int cnode_locked
= 0;
292 int partialwrite
= 0;
294 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
295 int took_truncate_lock
= 0;
296 int io_return_on_throttle
= 0;
297 struct rl_entry
*invalid_range
;
300 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
301 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
303 case FILE_IS_COMPRESSED
:
305 case FILE_IS_CONVERTING
:
306 /* if FILE_IS_CONVERTING, we allow writes but do not
307 bother with snapshots or else we will deadlock.
312 printf("invalid state %d for compressed file\n", state
);
315 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
318 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
325 check_for_tracked_file(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
330 resid
= uio_resid(uio
);
331 offset
= uio_offset(uio
);
337 if (!vnode_isreg(vp
))
338 return (EPERM
); /* Can only write regular files */
345 if ((retval
= cp_handle_vnop (vp
, CP_WRITE_ACCESS
, 0)) != 0) {
350 eflags
= kEFDeferMask
; /* defer file block allocations */
353 * When the underlying device is sparse and space
354 * is low (< 8MB), stop doing delayed allocations
355 * and begin doing synchronous I/O.
357 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
358 (hfs_freeblks(hfsmp
, 0) < 2048)) {
359 eflags
&= ~kEFDeferMask
;
362 #endif /* HFS_SPARSE_DEV */
364 if ((ioflag
& (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) ==
365 (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) {
366 io_return_on_throttle
= IO_RETURN_ON_THROTTLE
;
370 /* Protect against a size change. */
372 * Protect against a size change.
374 * Note: If took_truncate_lock is true, then we previously got the lock shared
375 * but needed to upgrade to exclusive. So try getting it exclusive from the
378 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
379 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
382 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
384 took_truncate_lock
= 1;
387 if (ioflag
& IO_APPEND
) {
388 uio_setoffset(uio
, fp
->ff_size
);
389 offset
= fp
->ff_size
;
391 if ((cp
->c_bsdflags
& APPEND
) && offset
!= fp
->ff_size
) {
396 origFileSize
= fp
->ff_size
;
397 writelimit
= offset
+ resid
;
398 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
401 * We may need an exclusive truncate lock for several reasons, all
402 * of which are because we may be writing to a (portion of a) block
403 * for the first time, and we need to make sure no readers see the
404 * prior, uninitialized contents of the block. The cases are:
406 * 1. We have unallocated (delayed allocation) blocks. We may be
407 * allocating new blocks to the file and writing to them.
408 * (A more precise check would be whether the range we're writing
409 * to contains delayed allocation blocks.)
410 * 2. We need to extend the file. The bytes between the old EOF
411 * and the new EOF are not yet initialized. This is important
412 * even if we're not allocating new blocks to the file. If the
413 * old EOF and new EOF are in the same block, we still need to
414 * protect that range of bytes until they are written for the
416 * 3. The write overlaps some invalid ranges (delayed zero fill; that
417 * part of the file has been allocated, but not yet written).
419 * If we had a shared lock with the above cases, we need to try to upgrade
420 * to an exclusive lock. If the upgrade fails, we will lose the shared
421 * lock, and will need to take the truncate lock again; the took_truncate_lock
422 * flag will still be set, causing us to try for an exclusive lock next time.
424 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
425 * lock is held, since it protects the range lists.
427 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
428 ((fp
->ff_unallocblocks
!= 0) ||
429 (writelimit
> origFileSize
))) {
430 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
432 * Lock upgrade failed and we lost our shared lock, try again.
433 * Note: we do not set took_truncate_lock=0 here. Leaving it
434 * set to 1 will cause us to try to get the lock exclusive.
439 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
440 cp
->c_truncatelockowner
= current_thread();
444 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
449 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
450 hfs_incr_gencount (cp
);
454 * Now that we have the cnode lock, see if there are delayed zero fill ranges
455 * overlapping our write. If so, we need the truncate lock exclusive (see above).
457 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
458 (rl_scan(&fp
->ff_invalidranges
, offset
, writelimit
-1, &invalid_range
) != RL_NOOVERLAP
)) {
460 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
461 * a deadlock, rather than simply returning failure. (That is, it apparently does
462 * not behave like a "try_lock"). Since this condition is rare, just drop the
463 * cnode lock and try again. Since took_truncate_lock is set, we will
464 * automatically take the truncate lock exclusive.
468 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
472 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
473 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
476 /* Check if we do not need to extend the file */
477 if (writelimit
<= filebytes
) {
481 cred
= vfs_context_ucred(ap
->a_context
);
482 bytesToAdd
= writelimit
- filebytes
;
485 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
491 if (hfs_start_transaction(hfsmp
) != 0) {
496 while (writelimit
> filebytes
) {
497 bytesToAdd
= writelimit
- filebytes
;
498 if (cred
&& suser(cred
, NULL
) != 0)
499 eflags
|= kEFReserveMask
;
501 /* Protect extents b-tree and allocation bitmap */
502 lockflags
= SFL_BITMAP
;
503 if (overflow_extents(fp
))
504 lockflags
|= SFL_EXTENTS
;
505 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
507 /* Files that are changing size are not hot file candidates. */
508 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
509 fp
->ff_bytesread
= 0;
511 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
512 0, eflags
, &actualBytesAdded
));
514 hfs_systemfile_unlock(hfsmp
, lockflags
);
516 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
518 if (retval
!= E_NONE
)
520 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
521 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
522 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
524 (void) hfs_update(vp
, TRUE
);
525 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
526 (void) hfs_end_transaction(hfsmp
);
529 * If we didn't grow the file enough try a partial write.
530 * POSIX expects this behavior.
532 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
535 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
537 writelimit
= filebytes
;
540 if (retval
== E_NONE
) {
549 if (writelimit
> fp
->ff_size
)
550 filesize
= writelimit
;
552 filesize
= fp
->ff_size
;
554 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
556 if (offset
<= fp
->ff_size
) {
557 zero_off
= offset
& ~PAGE_MASK_64
;
559 /* Check to see whether the area between the zero_offset and the start
560 of the transfer to see whether is invalid and should be zero-filled
561 as part of the transfer:
563 if (offset
> zero_off
) {
564 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
565 lflag
|= IO_HEADZEROFILL
;
568 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
570 /* The bytes between fp->ff_size and uio->uio_offset must never be
571 read without being zeroed. The current last block is filled with zeroes
572 if it holds valid data but in all cases merely do a little bookkeeping
573 to track the area from the end of the current last page to the start of
574 the area actually written. For the same reason only the bytes up to the
575 start of the page where this write will start is invalidated; any remainder
576 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
578 Note that inval_start, the start of the page after the current EOF,
579 may be past the start of the write, in which case the zeroing
580 will be handled by the cluser_write of the actual data.
582 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
583 inval_end
= offset
& ~PAGE_MASK_64
;
584 zero_off
= fp
->ff_size
;
586 if ((fp
->ff_size
& PAGE_MASK_64
) &&
587 (rl_scan(&fp
->ff_invalidranges
,
590 &invalid_range
) != RL_NOOVERLAP
)) {
591 /* The page containing the EOF is not valid, so the
592 entire page must be made inaccessible now. If the write
593 starts on a page beyond the page containing the eof
594 (inval_end > eof_page_base), add the
595 whole page to the range to be invalidated. Otherwise
596 (i.e. if the write starts on the same page), zero-fill
597 the entire page explicitly now:
599 if (inval_end
> eof_page_base
) {
600 inval_start
= eof_page_base
;
602 zero_off
= eof_page_base
;
606 if (inval_start
< inval_end
) {
608 /* There's some range of data that's going to be marked invalid */
610 if (zero_off
< inval_start
) {
611 /* The pages between inval_start and inval_end are going to be invalidated,
612 and the actual write will start on a page past inval_end. Now's the last
613 chance to zero-fill the page containing the EOF:
617 retval
= cluster_write(vp
, (uio_t
) 0,
618 fp
->ff_size
, inval_start
,
620 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
621 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
623 if (retval
) goto ioerr_exit
;
624 offset
= uio_offset(uio
);
627 /* Mark the remaining area of the newly allocated space as invalid: */
628 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
630 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
631 zero_off
= fp
->ff_size
= inval_end
;
634 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
637 /* Check to see whether the area between the end of the write and the end of
638 the page it falls in is invalid and should be zero-filled as part of the transfer:
640 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
641 if (tail_off
> filesize
) tail_off
= filesize
;
642 if (tail_off
> writelimit
) {
643 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
644 lflag
|= IO_TAILZEROFILL
;
649 * if the write starts beyond the current EOF (possibly advanced in the
650 * zeroing of the last block, above), then we'll zero fill from the current EOF
651 * to where the write begins:
653 * NOTE: If (and ONLY if) the portion of the file about to be written is
654 * before the current EOF it might be marked as invalid now and must be
655 * made readable (removed from the invalid ranges) before cluster_write
658 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
659 if (io_start
< fp
->ff_size
) {
662 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
663 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
670 * We need to tell UBC the fork's new size BEFORE calling
671 * cluster_write, in case any of the new pages need to be
672 * paged out before cluster_write completes (which does happen
673 * in embedded systems due to extreme memory pressure).
674 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
675 * will be, so that it can pass that on to cluster_pageout, and
676 * allow those pageouts.
678 * We don't update ff_size yet since we don't want pageins to
679 * be able to see uninitialized data between the old and new
680 * EOF, until cluster_write has completed and initialized that
683 * The vnode pager relies on the file size last given to UBC via
684 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
685 * ff_size (whichever is larger). NOTE: ff_new_size is always
686 * zero, unless we are extending the file via write.
688 if (filesize
> fp
->ff_size
) {
689 fp
->ff_new_size
= filesize
;
690 ubc_setsize(vp
, filesize
);
692 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
693 tail_off
, lflag
| IO_NOZERODIRTY
| io_return_on_throttle
);
695 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
697 if (retval
== EAGAIN
) {
699 * EAGAIN indicates that we still have I/O to do, but
700 * that we now need to be throttled
702 if (resid
!= uio_resid(uio
)) {
704 * did manage to do some I/O before returning EAGAIN
706 resid
= uio_resid(uio
);
707 offset
= uio_offset(uio
);
709 cp
->c_touch_chgtime
= TRUE
;
710 cp
->c_touch_modtime
= TRUE
;
712 if (filesize
> fp
->ff_size
) {
714 * we called ubc_setsize before the call to
715 * cluster_write... since we only partially
716 * completed the I/O, we need to
717 * re-adjust our idea of the filesize based
720 ubc_setsize(vp
, offset
);
722 fp
->ff_size
= offset
;
726 if (filesize
> origFileSize
) {
727 ubc_setsize(vp
, origFileSize
);
732 if (filesize
> origFileSize
) {
733 fp
->ff_size
= filesize
;
735 /* Files that are changing size are not hot file candidates. */
736 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
737 fp
->ff_bytesread
= 0;
740 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
742 /* If we wrote some bytes, then touch the change and mod times */
743 if (resid
> uio_resid(uio
)) {
744 cp
->c_touch_chgtime
= TRUE
;
745 cp
->c_touch_modtime
= TRUE
;
749 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
753 // XXXdbg - see radar 4871353 for more info
755 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
756 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
762 * If we successfully wrote any data, and we are not the superuser
763 * we clear the setuid and setgid bits as a precaution against
766 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
767 cred
= vfs_context_ucred(ap
->a_context
);
768 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
770 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
773 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
777 if (ioflag
& IO_UNIT
) {
779 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
782 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
783 0, 0, ap
->a_context
);
784 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
785 uio_setresid(uio
, resid
);
786 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
788 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
790 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
793 retval
= hfs_update(vp
, TRUE
);
795 /* Updating vcbWrCnt doesn't need to be atomic. */
798 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
799 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
804 if (took_truncate_lock
) {
805 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
807 if (retval
== EAGAIN
) {
808 throttle_lowpri_io(1);
816 /* support for the "bulk-access" fcntl */
818 #define CACHE_LEVELS 16
819 #define NUM_CACHE_ENTRIES (64*16)
820 #define PARENT_IDS_FLAG 0x100
822 struct access_cache
{
824 int cachehits
; /* these two for statistics gathering */
826 unsigned int *acache
;
827 unsigned char *haveaccess
;
831 uid_t uid
; /* IN: effective user id */
832 short flags
; /* IN: access requested (i.e. R_OK) */
833 short num_groups
; /* IN: number of groups user belongs to */
834 int num_files
; /* IN: number of files to process */
835 int *file_ids
; /* IN: array of file ids */
836 gid_t
*groups
; /* IN: array of groups */
837 short *access
; /* OUT: access info for each file (0 for 'has access') */
838 } __attribute__((unavailable
)); // this structure is for reference purposes only
840 struct user32_access_t
{
841 uid_t uid
; /* IN: effective user id */
842 short flags
; /* IN: access requested (i.e. R_OK) */
843 short num_groups
; /* IN: number of groups user belongs to */
844 int num_files
; /* IN: number of files to process */
845 user32_addr_t file_ids
; /* IN: array of file ids */
846 user32_addr_t groups
; /* IN: array of groups */
847 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
850 struct user64_access_t
{
851 uid_t uid
; /* IN: effective user id */
852 short flags
; /* IN: access requested (i.e. R_OK) */
853 short num_groups
; /* IN: number of groups user belongs to */
854 int num_files
; /* IN: number of files to process */
855 user64_addr_t file_ids
; /* IN: array of file ids */
856 user64_addr_t groups
; /* IN: array of groups */
857 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
861 // these are the "extended" versions of the above structures
862 // note that it is crucial that they be different sized than
863 // the regular version
864 struct ext_access_t
{
865 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
866 uint32_t num_files
; /* IN: number of files to process */
867 uint32_t map_size
; /* IN: size of the bit map */
868 uint32_t *file_ids
; /* IN: Array of file ids */
869 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
870 short *access
; /* OUT: access info for each file (0 for 'has access') */
871 uint32_t num_parents
; /* future use */
872 cnid_t
*parents
; /* future use */
873 } __attribute__((unavailable
)); // this structure is for reference purposes only
875 struct user32_ext_access_t
{
876 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
877 uint32_t num_files
; /* IN: number of files to process */
878 uint32_t map_size
; /* IN: size of the bit map */
879 user32_addr_t file_ids
; /* IN: Array of file ids */
880 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
881 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
882 uint32_t num_parents
; /* future use */
883 user32_addr_t parents
; /* future use */
886 struct user64_ext_access_t
{
887 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
888 uint32_t num_files
; /* IN: number of files to process */
889 uint32_t map_size
; /* IN: size of the bit map */
890 user64_addr_t file_ids
; /* IN: array of file ids */
891 user64_addr_t bitmap
; /* IN: array of groups */
892 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
893 uint32_t num_parents
;/* future use */
894 user64_addr_t parents
;/* future use */
899 * Perform a binary search for the given parent_id. Return value is
900 * the index if there is a match. If no_match_indexp is non-NULL it
901 * will be assigned with the index to insert the item (even if it was
904 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
910 unsigned int mid
= ((hi
- lo
)/2) + lo
;
911 unsigned int this_id
= array
[mid
];
913 if (parent_id
== this_id
) {
918 if (parent_id
< this_id
) {
923 if (parent_id
> this_id
) {
929 /* check if lo and hi converged on the match */
930 if (parent_id
== array
[hi
]) {
934 if (no_match_indexp
) {
935 *no_match_indexp
= hi
;
943 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
947 int index
, no_match_index
;
949 if (cache
->numcached
== 0) {
951 return 0; // table is empty, so insert at index=0 and report no match
954 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
955 cache
->numcached
= NUM_CACHE_ENTRIES
;
958 hi
= cache
->numcached
- 1;
960 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
962 /* if no existing entry found, find index for new one */
964 index
= no_match_index
;
975 * Add a node to the access_cache at the given index (or do a lookup first
976 * to find the index if -1 is passed in). We currently do a replace rather
977 * than an insert if the cache is full.
980 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
982 int lookup_index
= -1;
984 /* need to do a lookup first if -1 passed for index */
986 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
987 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
988 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
989 cache
->haveaccess
[lookup_index
] = access
;
992 /* mission accomplished */
995 index
= lookup_index
;
1000 /* if the cache is full, do a replace rather than an insert */
1001 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
1002 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
1004 if (index
> cache
->numcached
) {
1005 index
= cache
->numcached
;
1009 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
1013 if (index
>= 0 && index
< cache
->numcached
) {
1014 /* only do bcopy if we're inserting */
1015 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
1016 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
1019 cache
->acache
[index
] = nodeID
;
1020 cache
->haveaccess
[index
] = access
;
1034 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
1036 struct cinfo
*cip
= (struct cinfo
*)arg
;
1038 cip
->uid
= attrp
->ca_uid
;
1039 cip
->gid
= attrp
->ca_gid
;
1040 cip
->mode
= attrp
->ca_mode
;
1041 cip
->parentcnid
= descp
->cd_parentcnid
;
1042 cip
->recflags
= attrp
->ca_recflags
;
1048 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1049 * isn't incore, then go to the catalog.
1052 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
1053 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
1057 /* if this id matches the one the fsctl was called with, skip the lookup */
1058 if (cnid
== skip_cp
->c_cnid
) {
1059 cnattrp
->ca_uid
= skip_cp
->c_uid
;
1060 cnattrp
->ca_gid
= skip_cp
->c_gid
;
1061 cnattrp
->ca_mode
= skip_cp
->c_mode
;
1062 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
1063 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
1065 struct cinfo c_info
;
1067 /* otherwise, check the cnode hash incase the file/dir is incore */
1068 if (hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
) == 0) {
1069 cnattrp
->ca_uid
= c_info
.uid
;
1070 cnattrp
->ca_gid
= c_info
.gid
;
1071 cnattrp
->ca_mode
= c_info
.mode
;
1072 cnattrp
->ca_recflags
= c_info
.recflags
;
1073 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1077 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp
)))
1078 throttle_lowpri_io(1);
1080 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1082 /* lookup this cnid in the catalog */
1083 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1085 hfs_systemfile_unlock(hfsmp
, lockflags
);
1096 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1097 * up to CACHE_LEVELS as we progress towards the root.
1100 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1101 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1102 struct vfs_context
*my_context
,
1106 uint32_t num_parents
)
1110 HFSCatalogNodeID thisNodeID
;
1111 unsigned int myPerms
;
1112 struct cat_attr cnattr
;
1113 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1116 int i
= 0, ids_to_cache
= 0;
1117 int parent_ids
[CACHE_LEVELS
];
1119 thisNodeID
= nodeID
;
1120 while (thisNodeID
>= kRootDirID
) {
1121 myResult
= 0; /* default to "no access" */
1123 /* check the cache before resorting to hitting the catalog */
1125 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1126 * to look any further after hitting cached dir */
1128 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1130 myErr
= cache
->haveaccess
[cache_index
];
1131 if (scope_index
!= -1) {
1132 if (myErr
== ESRCH
) {
1136 scope_index
= 0; // so we'll just use the cache result
1137 scope_idx_start
= ids_to_cache
;
1139 myResult
= (myErr
== 0) ? 1 : 0;
1140 goto ExitThisRoutine
;
1146 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1147 if (scope_index
== -1)
1149 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1150 scope_idx_start
= ids_to_cache
;
1154 /* remember which parents we want to cache */
1155 if (ids_to_cache
< CACHE_LEVELS
) {
1156 parent_ids
[ids_to_cache
] = thisNodeID
;
1159 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1160 if (bitmap
&& map_size
) {
1161 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1165 /* do the lookup (checks the cnode hash, then the catalog) */
1166 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1168 goto ExitThisRoutine
; /* no access */
1171 /* Root always gets access. */
1172 if (suser(myp_ucred
, NULL
) == 0) {
1173 thisNodeID
= catkey
.hfsPlus
.parentID
;
1178 // if the thing has acl's, do the full permission check
1179 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1182 /* get the vnode for this cnid */
1183 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1186 goto ExitThisRoutine
;
1189 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1191 hfs_unlock(VTOC(vp
));
1193 if (vnode_vtype(vp
) == VDIR
) {
1194 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1196 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1202 goto ExitThisRoutine
;
1206 int mode
= cnattr
.ca_mode
& S_IFMT
;
1207 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1209 if (mode
== S_IFDIR
) {
1210 flags
= R_OK
| X_OK
;
1214 if ( (myPerms
& flags
) != flags
) {
1217 goto ExitThisRoutine
; /* no access */
1220 /* up the hierarchy we go */
1221 thisNodeID
= catkey
.hfsPlus
.parentID
;
1225 /* if here, we have access to this node */
1229 if (parents
&& myErr
== 0 && scope_index
== -1) {
1238 /* cache the parent directory(ies) */
1239 for (i
= 0; i
< ids_to_cache
; i
++) {
1240 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1241 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1243 add_node(cache
, -1, parent_ids
[i
], myErr
);
1251 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1252 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1257 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1258 * happens to be in our list of file_ids, we'll note it
1259 * avoid calling hfs_chashget_nowait() on that id as that
1260 * will cause a "locking against myself" panic.
1262 Boolean check_leaf
= true;
1264 struct user64_ext_access_t
*user_access_structp
;
1265 struct user64_ext_access_t tmp_user_access
;
1266 struct access_cache cache
;
1268 int error
= 0, prev_parent_check_ok
=1;
1272 unsigned int num_files
= 0;
1274 int num_parents
= 0;
1278 cnid_t
*parents
=NULL
;
1282 cnid_t prevParent_cnid
= 0;
1283 unsigned int myPerms
;
1285 struct cat_attr cnattr
;
1287 struct cnode
*skip_cp
= VTOC(vp
);
1288 kauth_cred_t cred
= vfs_context_ucred(context
);
1289 proc_t p
= vfs_context_proc(context
);
1291 is64bit
= proc_is64bit(p
);
1293 /* initialize the local cache and buffers */
1294 cache
.numcached
= 0;
1295 cache
.cachehits
= 0;
1297 cache
.acache
= NULL
;
1298 cache
.haveaccess
= NULL
;
1300 /* struct copyin done during dispatch... need to copy file_id array separately */
1301 if (ap
->a_data
== NULL
) {
1303 goto err_exit_bulk_access
;
1307 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1309 goto err_exit_bulk_access
;
1312 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1314 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1315 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1317 // convert an old style bulk-access struct to the new style
1318 tmp_user_access
.flags
= accessp
->flags
;
1319 tmp_user_access
.num_files
= accessp
->num_files
;
1320 tmp_user_access
.map_size
= 0;
1321 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1322 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1323 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1324 tmp_user_access
.num_parents
= 0;
1325 user_access_structp
= &tmp_user_access
;
1327 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1328 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1330 // up-cast from a 32-bit version of the struct
1331 tmp_user_access
.flags
= accessp
->flags
;
1332 tmp_user_access
.num_files
= accessp
->num_files
;
1333 tmp_user_access
.map_size
= accessp
->map_size
;
1334 tmp_user_access
.num_parents
= accessp
->num_parents
;
1336 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1337 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1338 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1339 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1341 user_access_structp
= &tmp_user_access
;
1344 goto err_exit_bulk_access
;
1347 map_size
= user_access_structp
->map_size
;
1349 num_files
= user_access_structp
->num_files
;
1351 num_parents
= user_access_structp
->num_parents
;
1353 if (num_files
< 1) {
1354 goto err_exit_bulk_access
;
1356 if (num_files
> 1024) {
1358 goto err_exit_bulk_access
;
1361 if (num_parents
> 1024) {
1363 goto err_exit_bulk_access
;
1366 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1367 access
= (short *) kalloc(sizeof(short) * num_files
);
1369 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1373 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1376 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1377 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1379 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1381 kfree(file_ids
, sizeof(int) * num_files
);
1384 kfree(bitmap
, sizeof(char) * map_size
);
1387 kfree(access
, sizeof(short) * num_files
);
1390 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1392 if (cache
.haveaccess
) {
1393 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1396 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1401 // make sure the bitmap is zero'ed out...
1403 bzero(bitmap
, (sizeof(char) * map_size
));
1406 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1407 num_files
* sizeof(int)))) {
1408 goto err_exit_bulk_access
;
1412 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1413 num_parents
* sizeof(cnid_t
)))) {
1414 goto err_exit_bulk_access
;
1418 flags
= user_access_structp
->flags
;
1419 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1423 /* check if we've been passed leaf node ids or parent ids */
1424 if (flags
& PARENT_IDS_FLAG
) {
1428 /* Check access to each file_id passed in */
1429 for (i
= 0; i
< num_files
; i
++) {
1431 cnid
= (cnid_t
) file_ids
[i
];
1433 /* root always has access */
1434 if ((!parents
) && (!suser(cred
, NULL
))) {
1440 /* do the lookup (checks the cnode hash, then the catalog) */
1441 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1443 access
[i
] = (short) error
;
1448 // Check if the leaf matches one of the parent scopes
1449 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1450 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1451 prev_parent_check_ok
= 0;
1452 else if (leaf_index
>= 0)
1453 prev_parent_check_ok
= 1;
1456 // if the thing has acl's, do the full permission check
1457 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1460 /* get the vnode for this cnid */
1461 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1467 hfs_unlock(VTOC(cvp
));
1469 if (vnode_vtype(cvp
) == VDIR
) {
1470 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1472 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1481 /* before calling CheckAccess(), check the target file for read access */
1482 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1483 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1485 /* fail fast if no access */
1486 if ((myPerms
& flags
) == 0) {
1492 /* we were passed an array of parent ids */
1493 catkey
.hfsPlus
.parentID
= cnid
;
1496 /* if the last guy had the same parent and had access, we're done */
1497 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1503 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1504 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1506 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1507 access
[i
] = 0; // have access.. no errors to report
1509 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1512 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1515 /* copyout the access array */
1516 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1517 num_files
* sizeof (short)))) {
1518 goto err_exit_bulk_access
;
1520 if (map_size
&& bitmap
) {
1521 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1522 map_size
* sizeof (char)))) {
1523 goto err_exit_bulk_access
;
1528 err_exit_bulk_access
:
1531 kfree(file_ids
, sizeof(int) * num_files
);
1533 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1535 kfree(bitmap
, sizeof(char) * map_size
);
1537 kfree(access
, sizeof(short) * num_files
);
1539 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1540 if (cache
.haveaccess
)
1541 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1547 /* end "bulk-access" support */
1551 * Callback for use with freeze ioctl.
1554 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1556 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1562 * Control filesystem operating characteristics.
1565 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1570 vfs_context_t a_context;
1573 struct vnode
* vp
= ap
->a_vp
;
1574 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1575 vfs_context_t context
= ap
->a_context
;
1576 kauth_cred_t cred
= vfs_context_ucred(context
);
1577 proc_t p
= vfs_context_proc(context
);
1578 struct vfsstatfs
*vfsp
;
1580 off_t jnl_start
, jnl_size
;
1581 struct hfs_journal_info
*jip
;
1584 off_t uncompressed_size
= -1;
1585 int decmpfs_error
= 0;
1587 if (ap
->a_command
== F_RDADVISE
) {
1588 /* we need to inspect the decmpfs state of the file as early as possible */
1589 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1591 if (VNODE_IS_RSRC(vp
)) {
1592 /* if this is the resource fork, treat it as if it were empty */
1593 uncompressed_size
= 0;
1595 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1596 if (decmpfs_error
!= 0) {
1597 /* failed to get the uncompressed size, we'll check for this later */
1598 uncompressed_size
= -1;
1603 #endif /* HFS_COMPRESSION */
1605 is64bit
= proc_is64bit(p
);
1610 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
1614 #endif /* CONFIG_PROTECT */
1616 switch (ap
->a_command
) {
1620 struct vnode
*file_vp
;
1627 /* Caller must be owner of file system. */
1628 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1629 if (suser(cred
, NULL
) &&
1630 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1633 /* Target vnode must be file system's root. */
1634 if (!vnode_isvroot(vp
)) {
1637 bufptr
= (char *)ap
->a_data
;
1638 cnid
= strtoul(bufptr
, NULL
, 10);
1639 if (ap
->a_fflag
& HFS_GETPATH_VOLUME_RELATIVE
) {
1640 flags
|= BUILDPATH_VOLUME_RELATIVE
;
1643 /* We need to call hfs_vfs_vget to leverage the code that will
1644 * fix the origin list for us if needed, as opposed to calling
1645 * hfs_vget, since we will need the parent for build_path call.
1648 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1651 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, flags
, context
);
1657 case HFS_GET_WRITE_GEN_COUNTER
:
1659 struct cnode
*cp
= NULL
;
1661 u_int32_t
*counter
= (u_int32_t
*)ap
->a_data
;
1665 if (vnode_isdir (vp
)) {
1671 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
1673 struct ubc_info
*uip
;
1676 if (UBCINFOEXISTS(vp
)) {
1677 uip
= vp
->v_ubcinfo
;
1678 if (uip
->ui_flags
& UI_ISMAPPED
) {
1684 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
1685 uint32_t gcount
= hfs_get_gencount(cp
);
1687 // Even though we return EBUSY for files that are mmap'ed
1688 // we also want to bump the value so that the write-gen
1689 // counter will always be different once the file is unmapped
1690 // (since the file may be unmapped but the pageouts have not
1694 hfs_incr_gencount (cp
);
1695 gcount
= hfs_get_gencount(cp
);
1702 /* not a file or dir? silently return */
1723 /* Caller must be owner of file system. */
1724 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1725 if (suser(cred
, NULL
) &&
1726 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1729 /* Target vnode must be file system's root. */
1730 if (!vnode_isvroot(vp
)) {
1733 linkfileid
= *(cnid_t
*)ap
->a_data
;
1734 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1737 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1740 if (ap
->a_command
== HFS_NEXT_LINK
) {
1741 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1743 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1748 case HFS_RESIZE_PROGRESS
: {
1750 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1751 if (suser(cred
, NULL
) &&
1752 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1753 return (EACCES
); /* must be owner of file system */
1755 if (!vnode_isvroot(vp
)) {
1758 /* file system must not be mounted read-only */
1759 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1763 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1766 case HFS_RESIZE_VOLUME
: {
1770 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1771 if (suser(cred
, NULL
) &&
1772 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1773 return (EACCES
); /* must be owner of file system */
1775 if (!vnode_isvroot(vp
)) {
1779 /* filesystem must not be mounted read only */
1780 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1783 newsize
= *(u_int64_t
*)ap
->a_data
;
1784 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1786 if (newsize
> cursize
) {
1787 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1788 } else if (newsize
< cursize
) {
1789 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1794 case HFS_CHANGE_NEXT_ALLOCATION
: {
1795 int error
= 0; /* Assume success */
1798 if (vnode_vfsisrdonly(vp
)) {
1801 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1802 if (suser(cred
, NULL
) &&
1803 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1804 return (EACCES
); /* must be owner of file system */
1806 if (!vnode_isvroot(vp
)) {
1809 hfs_lock_mount(hfsmp
);
1810 location
= *(u_int32_t
*)ap
->a_data
;
1811 if ((location
>= hfsmp
->allocLimit
) &&
1812 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1814 goto fail_change_next_allocation
;
1816 /* Return previous value. */
1817 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1818 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1819 /* On magic value for location, set nextAllocation to next block
1820 * after metadata zone and set flag in mount structure to indicate
1821 * that nextAllocation should not be updated again.
1823 if (hfsmp
->hfs_metazone_end
!= 0) {
1824 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1826 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1828 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1829 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1831 MarkVCBDirty(hfsmp
);
1832 fail_change_next_allocation
:
1833 hfs_unlock_mount(hfsmp
);
1838 case HFS_SETBACKINGSTOREINFO
: {
1839 struct vnode
* bsfs_rootvp
;
1840 struct vnode
* di_vp
;
1841 struct hfs_backingstoreinfo
*bsdata
;
1844 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1847 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1850 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1851 if (suser(cred
, NULL
) &&
1852 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1853 return (EACCES
); /* must be owner of file system */
1855 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1856 if (bsdata
== NULL
) {
1859 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1862 if ((error
= vnode_getwithref(di_vp
))) {
1863 file_drop(bsdata
->backingfd
);
1867 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1868 (void)vnode_put(di_vp
);
1869 file_drop(bsdata
->backingfd
);
1874 * Obtain the backing fs root vnode and keep a reference
1875 * on it. This reference will be dropped in hfs_unmount.
1877 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1879 (void)vnode_put(di_vp
);
1880 file_drop(bsdata
->backingfd
);
1883 vnode_ref(bsfs_rootvp
);
1884 vnode_put(bsfs_rootvp
);
1886 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1888 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1889 /* The free extent cache is managed differently for sparse devices.
1890 * There is a window between which the volume is mounted and the
1891 * device is marked as sparse, so the free extent cache for this
1892 * volume is currently initialized as normal volume (sorted by block
1893 * count). Reset the cache so that it will be rebuilt again
1894 * for sparse device (sorted by start block).
1896 ResetVCBFreeExtCache(hfsmp
);
1898 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1899 hfsmp
->hfs_sparsebandblks
*= 4;
1901 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1904 * If the sparse image is on a sparse image file (as opposed to a sparse
1905 * bundle), then we may need to limit the free space to the maximum size
1906 * of a file on that volume. So we query (using pathconf), and if we get
1907 * a meaningful result, we cache the number of blocks for later use in
1910 hfsmp
->hfs_backingfs_maxblocks
= 0;
1911 if (vnode_vtype(di_vp
) == VREG
) {
1914 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1915 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1916 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1918 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1922 (void)vnode_put(di_vp
);
1923 file_drop(bsdata
->backingfd
);
1926 case HFS_CLRBACKINGSTOREINFO
: {
1927 struct vnode
* tmpvp
;
1929 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1930 if (suser(cred
, NULL
) &&
1931 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1932 return (EACCES
); /* must be owner of file system */
1934 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1938 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1939 hfsmp
->hfs_backingfs_rootvp
) {
1941 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1942 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1943 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1944 hfsmp
->hfs_sparsebandblks
= 0;
1949 #endif /* HFS_SPARSE_DEV */
1951 /* Change the next CNID stored in the VH */
1952 case HFS_CHANGE_NEXTCNID
: {
1953 int error
= 0; /* Assume success */
1958 if (vnode_vfsisrdonly(vp
)) {
1961 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1962 if (suser(cred
, NULL
) &&
1963 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1964 return (EACCES
); /* must be owner of file system */
1967 fileid
= *(u_int32_t
*)ap
->a_data
;
1969 /* Must have catalog lock excl. to advance the CNID pointer */
1970 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1972 hfs_lock_mount(hfsmp
);
1974 /* If it is less than the current next CNID, force the wraparound bit to be set */
1975 if (fileid
< hfsmp
->vcbNxtCNID
) {
1979 /* Return previous value. */
1980 *(u_int32_t
*)ap
->a_data
= hfsmp
->vcbNxtCNID
;
1982 hfsmp
->vcbNxtCNID
= fileid
;
1985 hfsmp
->vcbAtrb
|= kHFSCatalogNodeIDsReusedMask
;
1988 MarkVCBDirty(hfsmp
);
1989 hfs_unlock_mount(hfsmp
);
1990 hfs_systemfile_unlock (hfsmp
, lockflags
);
1998 mp
= vnode_mount(vp
);
1999 hfsmp
= VFSTOHFS(mp
);
2004 vfsp
= vfs_statfs(mp
);
2006 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2007 !kauth_cred_issuser(cred
))
2010 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
2012 // flush things before we get started to try and prevent
2013 // dirty data from being paged out while we're frozen.
2014 // note: can't do this after taking the lock as it will
2015 // deadlock against ourselves.
2016 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
2017 hfs_lock_global (hfsmp
, HFS_EXCLUSIVE_LOCK
);
2019 // DO NOT call hfs_journal_flush() because that takes a
2020 // shared lock on the global exclusive lock!
2021 journal_flush(hfsmp
->jnl
, TRUE
);
2023 // don't need to iterate on all vnodes, we just need to
2024 // wait for writes to the system files and the device vnode
2026 // Now that journal flush waits for all metadata blocks to
2027 // be written out, waiting for btree writes is probably no
2029 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
2030 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
2031 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
2032 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
2033 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
2034 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
2035 if (hfsmp
->hfs_attribute_vp
)
2036 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
2037 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
2039 hfsmp
->hfs_freezing_proc
= current_proc();
2045 vfsp
= vfs_statfs(vnode_mount(vp
));
2046 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2047 !kauth_cred_issuser(cred
))
2050 // if we're not the one who froze the fs then we
2052 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
2056 // NOTE: if you add code here, also go check the
2057 // code that "thaws" the fs in hfs_vnop_close()
2059 hfsmp
->hfs_freezing_proc
= NULL
;
2060 hfs_unlock_global (hfsmp
);
2061 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
2066 case HFS_BULKACCESS_FSCTL
: {
2069 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2074 size
= sizeof(struct user64_access_t
);
2076 size
= sizeof(struct user32_access_t
);
2079 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2082 case HFS_EXT_BULKACCESS_FSCTL
: {
2085 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2090 size
= sizeof(struct user64_ext_access_t
);
2092 size
= sizeof(struct user32_ext_access_t
);
2095 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2098 case HFS_SET_XATTREXTENTS_STATE
: {
2101 if (ap
->a_data
== NULL
) {
2105 state
= *(int *)ap
->a_data
;
2107 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2111 /* Super-user can enable or disable extent-based extended
2112 * attribute support on a volume
2113 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2114 * are enabled by default, so any change will be transient only
2115 * till the volume is remounted.
2117 if (!kauth_cred_issuser(kauth_cred_get())) {
2120 if (state
== 0 || state
== 1)
2121 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
2126 case F_SETSTATICCONTENT
: {
2128 int enable_static
= 0;
2129 struct cnode
*cp
= NULL
;
2131 * lock the cnode, decorate the cnode flag, and bail out.
2132 * VFS should have already authenticated the caller for us.
2137 * Note that even though ap->a_data is of type caddr_t,
2138 * the fcntl layer at the syscall handler will pass in NULL
2139 * or 1 depending on what the argument supplied to the fcntl
2140 * was. So it is in fact correct to check the ap->a_data
2141 * argument for zero or non-zero value when deciding whether or not
2142 * to enable the static bit in the cnode.
2146 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2151 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2153 if (enable_static
) {
2154 cp
->c_flag
|= C_SSD_STATIC
;
2157 cp
->c_flag
&= ~C_SSD_STATIC
;
2164 case F_SET_GREEDY_MODE
: {
2166 int enable_greedy_mode
= 0;
2167 struct cnode
*cp
= NULL
;
2169 * lock the cnode, decorate the cnode flag, and bail out.
2170 * VFS should have already authenticated the caller for us.
2175 * Note that even though ap->a_data is of type caddr_t,
2176 * the fcntl layer at the syscall handler will pass in NULL
2177 * or 1 depending on what the argument supplied to the fcntl
2178 * was. So it is in fact correct to check the ap->a_data
2179 * argument for zero or non-zero value when deciding whether or not
2180 * to enable the greedy mode bit in the cnode.
2182 enable_greedy_mode
= 1;
2184 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2189 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2191 if (enable_greedy_mode
) {
2192 cp
->c_flag
|= C_SSD_GREEDY_MODE
;
2195 cp
->c_flag
&= ~C_SSD_GREEDY_MODE
;
2202 case F_MAKECOMPRESSED
: {
2204 uint32_t gen_counter
;
2205 struct cnode
*cp
= NULL
;
2206 int reset_decmp
= 0;
2208 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2213 * acquire & lock the cnode.
2214 * VFS should have already authenticated the caller for us.
2219 * Cast the pointer into a uint32_t so we can extract the
2220 * supplied generation counter.
2222 gen_counter
= *((uint32_t*)ap
->a_data
);
2230 /* Grab truncate lock first; we may truncate the file */
2231 hfs_lock_truncate (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2233 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2235 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2239 /* Are there any other usecounts/FDs? */
2240 if (vnode_isinuse(vp
, 1)) {
2242 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2247 /* now we have the cnode locked down; Validate arguments */
2248 if (cp
->c_attr
.ca_flags
& (UF_IMMUTABLE
| UF_COMPRESSED
)) {
2249 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2251 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2255 if ((hfs_get_gencount (cp
)) == gen_counter
) {
2257 * OK, the gen_counter matched. Go for it:
2258 * Toggle state bits, truncate file, and suppress mtime update
2261 cp
->c_bsdflags
|= UF_COMPRESSED
;
2263 error
= hfs_truncate(vp
, 0, IO_NDELAY
, 0, (HFS_TRUNCATE_SKIPTIMES
), ap
->a_context
);
2269 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2273 * Reset the decmp state while still holding the truncate lock. We need to
2274 * serialize here against a listxattr on this node which may occur at any
2277 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2278 * that will still potentially require getting the com.apple.decmpfs EA. If the
2279 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2280 * generic(through VFS), and can't pass along any info telling it that we're already
2281 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2282 * and trying to fill in the hfs_file_is_compressed info during the callback
2283 * operation, which will result in deadlock against the b-tree node.
2285 * So, to serialize against listxattr (which will grab buf_t meta references on
2286 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2289 if ((reset_decmp
) && (error
== 0)) {
2290 decmpfs_cnode
*dp
= VTOCMP (vp
);
2292 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
2295 /* Initialize the decmpfs node as needed */
2296 (void) hfs_file_is_compressed (cp
, 0); /* ok to take lock */
2299 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2305 case F_SETBACKINGSTORE
: {
2310 * See comment in F_SETSTATICCONTENT re: using
2311 * a null check for a_data
2314 error
= hfs_set_backingstore (vp
, 1);
2317 error
= hfs_set_backingstore (vp
, 0);
2323 case F_GETPATH_MTMINFO
: {
2326 int *data
= (int*) ap
->a_data
;
2328 /* Ask if this is a backingstore vnode */
2329 error
= hfs_is_backingstore (vp
, data
);
2337 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2340 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2342 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
2343 hfs_unlock(VTOC(vp
));
2350 register struct cnode
*cp
;
2353 if (!vnode_isreg(vp
))
2356 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2360 * used by regression test to determine if
2361 * all the dirty pages (via write) have been cleaned
2362 * after a call to 'fsysnc'.
2364 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
2371 register struct radvisory
*ra
;
2372 struct filefork
*fp
;
2375 if (!vnode_isreg(vp
))
2378 ra
= (struct radvisory
*)(ap
->a_data
);
2381 /* Protect against a size change. */
2382 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2385 if (compressed
&& (uncompressed_size
== -1)) {
2386 /* fetching the uncompressed size failed above, so return the error */
2387 error
= decmpfs_error
;
2388 } else if ((compressed
&& (ra
->ra_offset
>= uncompressed_size
)) ||
2389 (!compressed
&& (ra
->ra_offset
>= fp
->ff_size
))) {
2392 #else /* HFS_COMPRESSION */
2393 if (ra
->ra_offset
>= fp
->ff_size
) {
2396 #endif /* HFS_COMPRESSION */
2398 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2401 hfs_unlock_truncate(VTOC(vp
), HFS_LOCK_DEFAULT
);
2405 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2408 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2411 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2416 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME
:
2417 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2420 case SPOTLIGHT_FSCTL_GET_LAST_MTIME
:
2421 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2424 case HFS_FSCTL_GET_VERY_LOW_DISK
:
2425 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_dangerlimit
;
2428 case HFS_FSCTL_SET_VERY_LOW_DISK
:
2429 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2433 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2436 case HFS_FSCTL_GET_LOW_DISK
:
2437 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_warninglimit
;
2440 case HFS_FSCTL_SET_LOW_DISK
:
2441 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2442 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2447 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2450 case HFS_FSCTL_GET_DESIRED_DISK
:
2451 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_desiredlevel
;
2454 case HFS_FSCTL_SET_DESIRED_DISK
:
2455 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2459 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2462 case HFS_VOLUME_STATUS
:
2463 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2466 case HFS_SET_BOOT_INFO
:
2467 if (!vnode_isvroot(vp
))
2469 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2470 return(EACCES
); /* must be superuser or owner of filesystem */
2471 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2474 hfs_lock_mount (hfsmp
);
2475 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2476 hfs_unlock_mount (hfsmp
);
2477 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
2480 case HFS_GET_BOOT_INFO
:
2481 if (!vnode_isvroot(vp
))
2483 hfs_lock_mount (hfsmp
);
2484 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2485 hfs_unlock_mount(hfsmp
);
2488 case HFS_MARK_BOOT_CORRUPT
:
2489 /* Mark the boot volume corrupt by setting
2490 * kHFSVolumeInconsistentBit in the volume header. This will
2491 * force fsck_hfs on next mount.
2493 if (!kauth_cred_issuser(kauth_cred_get())) {
2497 /* Allowed only on the root vnode of the boot volume */
2498 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2499 !vnode_isvroot(vp
)) {
2502 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2505 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2506 hfs_mark_volume_inconsistent(hfsmp
);
2509 case HFS_FSCTL_GET_JOURNAL_INFO
:
2510 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2515 if (hfsmp
->jnl
== NULL
) {
2519 jnl_start
= (off_t
)(hfsmp
->jnl_start
* HFSTOVCB(hfsmp
)->blockSize
) + (off_t
)HFSTOVCB(hfsmp
)->hfsPlusIOPosOffset
;
2520 jnl_size
= (off_t
)hfsmp
->jnl_size
;
2523 jip
->jstart
= jnl_start
;
2524 jip
->jsize
= jnl_size
;
2527 case HFS_SET_ALWAYS_ZEROFILL
: {
2528 struct cnode
*cp
= VTOC(vp
);
2530 if (*(int *)ap
->a_data
) {
2531 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2533 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2538 case HFS_DISABLE_METAZONE
: {
2539 /* Only root can disable metadata zone */
2540 if (!kauth_cred_issuser(kauth_cred_get())) {
2543 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2547 /* Disable metadata zone now */
2548 (void) hfs_metadatazone_init(hfsmp
, true);
2549 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2564 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2566 struct vnop_select_args {
2571 vfs_context_t a_context;
2576 * We should really check to see if I/O is possible.
2582 * Converts a logical block number to a physical block, and optionally returns
2583 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2584 * The physical block number is based on the device block size, currently its 512.
2585 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2588 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2590 struct filefork
*fp
= VTOF(vp
);
2591 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2592 int retval
= E_NONE
;
2593 u_int32_t logBlockSize
;
2594 size_t bytesContAvail
= 0;
2595 off_t blockposition
;
2600 * Check for underlying vnode requests and ensure that logical
2601 * to physical mapping is requested.
2604 *vpp
= hfsmp
->hfs_devvp
;
2608 logBlockSize
= GetLogicalBlockSize(vp
);
2609 blockposition
= (off_t
)bn
* logBlockSize
;
2611 lockExtBtree
= overflow_extents(fp
);
2614 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2616 retval
= MacToVFSError(
2617 MapFileBlockC (HFSTOVCB(hfsmp
),
2625 hfs_systemfile_unlock(hfsmp
, lockflags
);
2627 if (retval
== E_NONE
) {
2628 /* Figure out how many read ahead blocks there are */
2630 if (can_cluster(logBlockSize
)) {
2631 /* Make sure this result never goes negative: */
2632 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2642 * Convert logical block number to file offset.
2645 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
2647 struct vnop_blktooff_args {
2654 if (ap
->a_vp
== NULL
)
2656 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2662 * Convert file offset to logical block number.
2665 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
2667 struct vnop_offtoblk_args {
2670 daddr64_t *a_lblkno;
2674 if (ap
->a_vp
== NULL
)
2676 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2682 * Map file offset to physical block number.
2684 * If this function is called for write operation, and if the file
2685 * had virtual blocks allocated (delayed allocation), real blocks
2686 * are allocated by calling ExtendFileC().
2688 * If this function is called for read operation, and if the file
2689 * had virtual blocks allocated (delayed allocation), no change
2690 * to the size of file is done, and if required, rangelist is
2691 * searched for mapping.
2693 * System file cnodes are expected to be locked (shared or exclusive).
2696 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
2698 struct vnop_blockmap_args {
2706 vfs_context_t a_context;
2710 struct vnode
*vp
= ap
->a_vp
;
2712 struct filefork
*fp
;
2713 struct hfsmount
*hfsmp
;
2714 size_t bytesContAvail
= 0;
2715 int retval
= E_NONE
;
2718 struct rl_entry
*invalid_range
;
2719 enum rl_overlaptype overlaptype
;
2724 if (VNODE_IS_RSRC(vp
)) {
2725 /* allow blockmaps to the resource fork */
2727 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
2728 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
2730 case FILE_IS_COMPRESSED
:
2732 case FILE_IS_CONVERTING
:
2733 /* if FILE_IS_CONVERTING, we allow blockmap */
2736 printf("invalid state %d for compressed file\n", state
);
2741 #endif /* HFS_COMPRESSION */
2743 /* Do not allow blockmap operation on a directory */
2744 if (vnode_isdir(vp
)) {
2749 * Check for underlying vnode requests and ensure that logical
2750 * to physical mapping is requested.
2752 if (ap
->a_bpn
== NULL
)
2755 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2756 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2757 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
2766 /* Check virtual blocks only when performing write operation */
2767 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2768 if (hfs_start_transaction(hfsmp
) != 0) {
2774 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2776 } else if (overflow_extents(fp
)) {
2777 syslocks
= SFL_EXTENTS
;
2781 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2784 * Check for any delayed allocations.
2786 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2788 u_int32_t loanedBlocks
;
2791 // Make sure we have a transaction. It's possible
2792 // that we came in and fp->ff_unallocblocks was zero
2793 // but during the time we blocked acquiring the extents
2794 // btree, ff_unallocblocks became non-zero and so we
2795 // will need to start a transaction.
2797 if (started_tr
== 0) {
2799 hfs_systemfile_unlock(hfsmp
, lockflags
);
2806 * Note: ExtendFileC will Release any blocks on loan and
2807 * aquire real blocks. So we ask to extend by zero bytes
2808 * since ExtendFileC will account for the virtual blocks.
2811 loanedBlocks
= fp
->ff_unallocblocks
;
2812 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2813 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2816 fp
->ff_unallocblocks
= loanedBlocks
;
2817 cp
->c_blocks
+= loanedBlocks
;
2818 fp
->ff_blocks
+= loanedBlocks
;
2820 hfs_lock_mount (hfsmp
);
2821 hfsmp
->loanedBlocks
+= loanedBlocks
;
2822 hfs_unlock_mount (hfsmp
);
2824 hfs_systemfile_unlock(hfsmp
, lockflags
);
2825 cp
->c_flag
|= C_MODIFIED
;
2827 (void) hfs_update(vp
, TRUE
);
2828 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2830 hfs_end_transaction(hfsmp
);
2837 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2838 ap
->a_bpn
, &bytesContAvail
);
2840 hfs_systemfile_unlock(hfsmp
, lockflags
);
2845 (void) hfs_update(vp
, TRUE
);
2846 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2847 hfs_end_transaction(hfsmp
);
2851 /* On write, always return error because virtual blocks, if any,
2852 * should have been allocated in ExtendFileC(). We do not
2853 * allocate virtual blocks on read, therefore return error
2854 * only if no virtual blocks are allocated. Otherwise we search
2855 * rangelist for zero-fills
2857 if ((MacToVFSError(retval
) != ERANGE
) ||
2858 (ap
->a_flags
& VNODE_WRITE
) ||
2859 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2863 /* Validate if the start offset is within logical file size */
2864 if (ap
->a_foffset
>= fp
->ff_size
) {
2869 * At this point, we have encountered a failure during
2870 * MapFileBlockC that resulted in ERANGE, and we are not servicing
2871 * a write, and there are borrowed blocks.
2873 * However, the cluster layer will not call blockmap for
2874 * blocks that are borrowed and in-cache. We have to assume that
2875 * because we observed ERANGE being emitted from MapFileBlockC, this
2876 * extent range is not valid on-disk. So we treat this as a
2877 * mapping that needs to be zero-filled prior to reading.
2879 * Note that under certain circumstances (such as non-contiguous
2880 * userland VM mappings in the calling process), cluster_io
2881 * may be forced to split a large I/O driven by hfs_vnop_write
2882 * into multiple sub-I/Os that necessitate a RMW cycle. If this is
2883 * the case here, then we have already removed the invalid range list
2884 * mapping prior to getting to this blockmap call, so we should not
2885 * search the invalid rangelist for this byte range.
2888 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2890 * Clip the contiguous available bytes to, at most, the allowable
2891 * maximum or the amount requested.
2894 if (bytesContAvail
> ap
->a_size
) {
2895 bytesContAvail
= ap
->a_size
;
2898 *ap
->a_bpn
= (daddr64_t
) -1;
2904 /* MapFileC() found a valid extent in the filefork. Search the
2905 * mapping information further for invalid file ranges
2907 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2908 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2910 if (overlaptype
!= RL_NOOVERLAP
) {
2911 switch(overlaptype
) {
2912 case RL_MATCHINGOVERLAP
:
2913 case RL_OVERLAPCONTAINSRANGE
:
2914 case RL_OVERLAPSTARTSBEFORE
:
2915 /* There's no valid block for this byte offset */
2916 *ap
->a_bpn
= (daddr64_t
)-1;
2917 /* There's no point limiting the amount to be returned
2918 * if the invalid range that was hit extends all the way
2919 * to the EOF (i.e. there's no valid bytes between the
2920 * end of this range and the file's EOF):
2922 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2923 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2924 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2928 case RL_OVERLAPISCONTAINED
:
2929 case RL_OVERLAPENDSAFTER
:
2930 /* The range of interest hits an invalid block before the end: */
2931 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2932 /* There's actually no valid information to be had starting here: */
2933 *ap
->a_bpn
= (daddr64_t
)-1;
2934 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2935 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2936 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2939 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2946 if (bytesContAvail
> ap
->a_size
)
2947 bytesContAvail
= ap
->a_size
;
2953 *ap
->a_run
= bytesContAvail
;
2956 *(int *)ap
->a_poff
= 0;
2962 return (MacToVFSError(retval
));
2966 * prepare and issue the I/O
2967 * buf_strategy knows how to deal
2968 * with requests that require
2972 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2974 buf_t bp
= ap
->a_bp
;
2975 vnode_t vp
= buf_vnode(bp
);
2978 /* Mark buffer as containing static data if cnode flag set */
2979 if (VTOC(vp
)->c_flag
& C_SSD_STATIC
) {
2983 /* Mark buffer as containing static data if cnode flag set */
2984 if (VTOC(vp
)->c_flag
& C_SSD_GREEDY_MODE
) {
2985 bufattr_markgreedymode((bufattr_t
)(&bp
->b_attr
));
2991 if ((cp
= cp_get_protected_cnode(vp
)) != NULL
) {
2993 * We rely upon the truncate lock to protect the
2994 * CP cache key from getting tossed prior to our IO finishing here.
2995 * Nearly all cluster io calls to manipulate file payload from HFS
2996 * take the truncate lock before calling into the cluster
2997 * layer to ensure the file size does not change, or that they
2998 * have exclusive right to change the EOF of the file.
2999 * That same guarantee protects us here since the code that
3000 * deals with CP lock events must now take the truncate lock
3001 * before doing anything.
3003 * There is 1 exception here:
3004 * 1) One exception should be the VM swapfile IO, because HFS will
3005 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
3006 * swapfile code only without holding the truncate lock. This is because
3007 * individual swapfiles are maintained at fixed-length sizes by the VM code.
3008 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
3009 * create our own UPL and thus take the truncate lock before calling
3010 * into the cluster layer. In that case, however, we are not concerned
3011 * with the CP blob being wiped out in the middle of the IO
3012 * because there isn't anything to toss; the VM swapfile key stays
3013 * in-core as long as the file is open.
3016 * For filesystem resize, we may not have access to the underlying
3017 * file's cache key for whatever reason (device may be locked). However,
3018 * we do not need it since we are going to use the temporary HFS-wide resize key
3019 * which is generated once we start relocating file content. If this file's I/O
3020 * should be done using the resize key, it will have been supplied already, so
3021 * do not attach the file's cp blob to the buffer.
3023 if ((cp
->c_cpentry
->cp_flags
& CP_RELOCATION_INFLIGHT
) == 0) {
3024 buf_setcpaddr(bp
, cp
->c_cpentry
);
3027 #endif /* CONFIG_PROTECT */
3029 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
3035 hfs_minorupdate(struct vnode
*vp
) {
3036 struct cnode
*cp
= VTOC(vp
);
3037 cp
->c_flag
&= ~C_MODIFIED
;
3038 cp
->c_touch_acctime
= 0;
3039 cp
->c_touch_chgtime
= 0;
3040 cp
->c_touch_modtime
= 0;
3046 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int truncateflags
, vfs_context_t context
)
3048 register struct cnode
*cp
= VTOC(vp
);
3049 struct filefork
*fp
= VTOF(vp
);
3050 struct proc
*p
= vfs_context_proc(context
);;
3051 kauth_cred_t cred
= vfs_context_ucred(context
);
3054 off_t actualBytesAdded
;
3056 u_int32_t fileblocks
;
3058 struct hfsmount
*hfsmp
;
3060 int skipupdate
= (truncateflags
& HFS_TRUNCATE_SKIPUPDATE
);
3061 int suppress_times
= (truncateflags
& HFS_TRUNCATE_SKIPTIMES
);
3063 blksize
= VTOVCB(vp
)->blockSize
;
3064 fileblocks
= fp
->ff_blocks
;
3065 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3067 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
3068 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3073 /* This should only happen with a corrupt filesystem */
3074 if ((off_t
)fp
->ff_size
< 0)
3077 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
3084 /* Files that are changing size are not hot file candidates. */
3085 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
3086 fp
->ff_bytesread
= 0;
3090 * We cannot just check if fp->ff_size == length (as an optimization)
3091 * since there may be extra physical blocks that also need truncation.
3094 if ((retval
= hfs_getinoquota(cp
)))
3099 * Lengthen the size of the file. We must ensure that the
3100 * last byte of the file is allocated. Since the smallest
3101 * value of ff_size is 0, length will be at least 1.
3103 if (length
> (off_t
)fp
->ff_size
) {
3105 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
3111 * If we don't have enough physical space then
3112 * we need to extend the physical size.
3114 if (length
> filebytes
) {
3116 u_int32_t blockHint
= 0;
3118 /* All or nothing and don't round up to clumpsize. */
3119 eflags
= kEFAllMask
| kEFNoClumpMask
;
3121 if (cred
&& suser(cred
, NULL
) != 0)
3122 eflags
|= kEFReserveMask
; /* keep a reserve */
3125 * Allocate Journal and Quota files in metadata zone.
3127 if (filebytes
== 0 &&
3128 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
3129 hfs_virtualmetafile(cp
)) {
3130 eflags
|= kEFMetadataMask
;
3131 blockHint
= hfsmp
->hfs_metazone_start
;
3133 if (hfs_start_transaction(hfsmp
) != 0) {
3138 /* Protect extents b-tree and allocation bitmap */
3139 lockflags
= SFL_BITMAP
;
3140 if (overflow_extents(fp
))
3141 lockflags
|= SFL_EXTENTS
;
3142 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3144 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3145 bytesToAdd
= length
- filebytes
;
3146 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
3151 &actualBytesAdded
));
3153 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3154 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
3155 if (length
> filebytes
)
3161 hfs_systemfile_unlock(hfsmp
, lockflags
);
3165 (void) hfs_minorupdate(vp
);
3168 (void) hfs_update(vp
, TRUE
);
3169 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3173 hfs_end_transaction(hfsmp
);
3178 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
3179 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3182 if (!(flags
& IO_NOZEROFILL
)) {
3183 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
3184 struct rl_entry
*invalid_range
;
3187 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
3188 if (length
< zero_limit
) zero_limit
= length
;
3190 if (length
> (off_t
)fp
->ff_size
) {
3193 /* Extending the file: time to fill out the current last page w. zeroes? */
3194 if ((fp
->ff_size
& PAGE_MASK_64
) &&
3195 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
3196 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
3198 /* There's some valid data at the start of the (current) last page
3199 of the file, so zero out the remainder of that page to ensure the
3200 entire page contains valid data. Since there is no invalid range
3201 possible past the (current) eof, there's no need to remove anything
3202 from the invalid range list before calling cluster_write(): */
3204 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
3205 fp
->ff_size
, (off_t
)0,
3206 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
3207 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3208 if (retval
) goto Err_Exit
;
3210 /* Merely invalidate the remaining area, if necessary: */
3211 if (length
> zero_limit
) {
3213 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
3214 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3217 /* The page containing the (current) eof is invalid: just add the
3218 remainder of the page to the invalid list, along with the area
3219 being newly allocated:
3222 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
3223 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3227 panic("hfs_truncate: invoked on non-UBC object?!");
3230 if (suppress_times
== 0) {
3231 cp
->c_touch_modtime
= TRUE
;
3233 fp
->ff_size
= length
;
3235 } else { /* Shorten the size of the file */
3237 if ((off_t
)fp
->ff_size
> length
) {
3238 /* Any space previously marked as invalid is now irrelevant: */
3239 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3243 * Account for any unmapped blocks. Note that the new
3244 * file length can still end up with unmapped blocks.
3246 if (fp
->ff_unallocblocks
> 0) {
3247 u_int32_t finalblks
;
3248 u_int32_t loanedBlocks
;
3250 hfs_lock_mount(hfsmp
);
3251 loanedBlocks
= fp
->ff_unallocblocks
;
3252 cp
->c_blocks
-= loanedBlocks
;
3253 fp
->ff_blocks
-= loanedBlocks
;
3254 fp
->ff_unallocblocks
= 0;
3256 hfsmp
->loanedBlocks
-= loanedBlocks
;
3258 finalblks
= (length
+ blksize
- 1) / blksize
;
3259 if (finalblks
> fp
->ff_blocks
) {
3260 /* calculate required unmapped blocks */
3261 loanedBlocks
= finalblks
- fp
->ff_blocks
;
3262 hfsmp
->loanedBlocks
+= loanedBlocks
;
3264 fp
->ff_unallocblocks
= loanedBlocks
;
3265 cp
->c_blocks
+= loanedBlocks
;
3266 fp
->ff_blocks
+= loanedBlocks
;
3268 hfs_unlock_mount (hfsmp
);
3272 * For a TBE process the deallocation of the file blocks is
3273 * delayed until the file is closed. And hfs_close calls
3274 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
3275 * isn't set, we make sure this isn't a TBE process.
3277 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
3279 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
3281 if (hfs_start_transaction(hfsmp
) != 0) {
3286 if (fp
->ff_unallocblocks
== 0) {
3287 /* Protect extents b-tree and allocation bitmap */
3288 lockflags
= SFL_BITMAP
;
3289 if (overflow_extents(fp
))
3290 lockflags
|= SFL_EXTENTS
;
3291 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3293 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
3294 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
3296 hfs_systemfile_unlock(hfsmp
, lockflags
);
3300 fp
->ff_size
= length
;
3303 (void) hfs_minorupdate(vp
);
3306 (void) hfs_update(vp
, TRUE
);
3307 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3310 hfs_end_transaction(hfsmp
);
3312 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3316 /* These are bytesreleased */
3317 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
3321 * Only set update flag if the logical length changes & we aren't
3322 * suppressing modtime updates.
3324 if (((off_t
)fp
->ff_size
!= length
) && (suppress_times
== 0)) {
3325 cp
->c_touch_modtime
= TRUE
;
3327 fp
->ff_size
= length
;
3329 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
3330 if (!vfs_context_issuser(context
)) {
3331 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3336 retval
= hfs_minorupdate(vp
);
3339 cp
->c_touch_chgtime
= TRUE
; /* status changed */
3340 if (suppress_times
== 0) {
3341 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
3344 * If we are not suppressing the modtime update, then
3345 * update the gen count as well.
3347 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK (cp
->c_attr
.ca_mode
)) {
3348 hfs_incr_gencount(cp
);
3352 retval
= hfs_update(vp
, MNT_WAIT
);
3355 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
3356 -1, -1, -1, retval
, 0);
3361 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
3362 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
3368 * Preparation which must be done prior to deleting the catalog record
3369 * of a file or directory. In order to make the on-disk as safe as possible,
3370 * we remove the catalog entry before releasing the bitmap blocks and the
3371 * overflow extent records. However, some work must be done prior to deleting
3372 * the catalog record.
3374 * When calling this function, the cnode must exist both in memory and on-disk.
3375 * If there are both resource fork and data fork vnodes, this function should
3376 * be called on both.
3380 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
3382 struct filefork
*fp
= VTOF(vp
);
3383 struct cnode
*cp
= VTOC(vp
);
3388 /* Cannot truncate an HFS directory! */
3389 if (vnode_isdir(vp
)) {
3394 * See the comment below in hfs_truncate for why we need to call
3395 * setsize here. Essentially we want to avoid pending IO if we
3396 * already know that the blocks are going to be released here.
3397 * This function is only called when totally removing all storage for a file, so
3398 * we can take a shortcut and immediately setsize (0);
3402 /* This should only happen with a corrupt filesystem */
3403 if ((off_t
)fp
->ff_size
< 0)
3407 * We cannot just check if fp->ff_size == length (as an optimization)
3408 * since there may be extra physical blocks that also need truncation.
3411 if ((retval
= hfs_getinoquota(cp
))) {
3416 /* Wipe out any invalid ranges which have yet to be backed by disk */
3417 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3420 * Account for any unmapped blocks. Since we're deleting the
3421 * entire file, we don't have to worry about just shrinking
3422 * to a smaller number of borrowed blocks.
3424 if (fp
->ff_unallocblocks
> 0) {
3425 u_int32_t loanedBlocks
;
3427 hfs_lock_mount (hfsmp
);
3428 loanedBlocks
= fp
->ff_unallocblocks
;
3429 cp
->c_blocks
-= loanedBlocks
;
3430 fp
->ff_blocks
-= loanedBlocks
;
3431 fp
->ff_unallocblocks
= 0;
3433 hfsmp
->loanedBlocks
-= loanedBlocks
;
3435 hfs_unlock_mount (hfsmp
);
3443 * Special wrapper around calling TruncateFileC. This function is useable
3444 * even when the catalog record does not exist any longer, making it ideal
3445 * for use when deleting a file. The simplification here is that we know
3446 * that we are releasing all blocks.
3448 * Note that this function may be called when there is no vnode backing
3449 * the file fork in question. We may call this from hfs_vnop_inactive
3450 * to clear out resource fork data (and may not want to clear out the data
3451 * fork yet). As a result, we pointer-check both sets of inputs before
3452 * doing anything with them.
3454 * The caller is responsible for saving off a copy of the filefork(s)
3455 * embedded within the cnode prior to calling this function. The pointers
3456 * supplied as arguments must be valid even if the cnode is no longer valid.
3460 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3461 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3464 u_int32_t fileblocks
;
3469 blksize
= hfsmp
->blockSize
;
3472 if ((datafork
!= NULL
) && (datafork
->ff_blocks
> 0)) {
3473 fileblocks
= datafork
->ff_blocks
;
3474 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3476 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3478 while (filebytes
> 0) {
3479 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(datafork
)) {
3480 filebytes
-= HFS_BIGFILE_SIZE
;
3485 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3486 if (hfs_start_transaction(hfsmp
) != 0) {
3491 if (datafork
->ff_unallocblocks
== 0) {
3492 /* Protect extents b-tree and allocation bitmap */
3493 lockflags
= SFL_BITMAP
;
3494 if (overflow_extents(datafork
))
3495 lockflags
|= SFL_EXTENTS
;
3496 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3498 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
3500 hfs_systemfile_unlock(hfsmp
, lockflags
);
3503 datafork
->ff_size
= filebytes
;
3505 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3507 /* Finish the transaction and start over if necessary */
3508 hfs_end_transaction(hfsmp
);
3517 if (error
== 0 && (rsrcfork
!= NULL
) && rsrcfork
->ff_blocks
> 0) {
3518 fileblocks
= rsrcfork
->ff_blocks
;
3519 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3521 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3523 while (filebytes
> 0) {
3524 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(rsrcfork
)) {
3525 filebytes
-= HFS_BIGFILE_SIZE
;
3530 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3531 if (hfs_start_transaction(hfsmp
) != 0) {
3536 if (rsrcfork
->ff_unallocblocks
== 0) {
3537 /* Protect extents b-tree and allocation bitmap */
3538 lockflags
= SFL_BITMAP
;
3539 if (overflow_extents(rsrcfork
))
3540 lockflags
|= SFL_EXTENTS
;
3541 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3543 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
3545 hfs_systemfile_unlock(hfsmp
, lockflags
);
3548 rsrcfork
->ff_size
= filebytes
;
3550 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3552 /* Finish the transaction and start over if necessary */
3553 hfs_end_transaction(hfsmp
);
3566 * Truncate a cnode to at most length size, freeing (or adding) the
3570 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
3571 int truncateflags
, vfs_context_t context
)
3573 struct filefork
*fp
= VTOF(vp
);
3575 u_int32_t fileblocks
;
3576 int blksize
, error
= 0;
3577 struct cnode
*cp
= VTOC(vp
);
3579 /* Cannot truncate an HFS directory! */
3580 if (vnode_isdir(vp
)) {
3583 /* A swap file cannot change size. */
3584 if (vnode_isswap(vp
) && (length
!= 0)) {
3588 blksize
= VTOVCB(vp
)->blockSize
;
3589 fileblocks
= fp
->ff_blocks
;
3590 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3593 // Have to do this here so that we don't wind up with
3594 // i/o pending for blocks that are about to be released
3595 // if we truncate the file.
3597 // If skipsetsize is set, then the caller is responsible
3598 // for the ubc_setsize.
3600 // Even if skipsetsize is set, if the length is zero we
3601 // want to call ubc_setsize() because as of SnowLeopard
3602 // it will no longer cause any page-ins and it will drop
3603 // any dirty pages so that we don't do any i/o that we
3604 // don't have to. This also prevents a race where i/o
3605 // for truncated blocks may overwrite later data if the
3606 // blocks get reallocated to a different file.
3608 if (!skipsetsize
|| length
== 0)
3609 ubc_setsize(vp
, length
);
3611 // have to loop truncating or growing files that are
3612 // really big because otherwise transactions can get
3613 // enormous and consume too many kernel resources.
3615 if (length
< filebytes
) {
3616 while (filebytes
> length
) {
3617 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3618 filebytes
-= HFS_BIGFILE_SIZE
;
3622 cp
->c_flag
|= C_FORCEUPDATE
;
3623 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
3627 } else if (length
> filebytes
) {
3628 while (filebytes
< length
) {
3629 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3630 filebytes
+= HFS_BIGFILE_SIZE
;
3634 cp
->c_flag
|= C_FORCEUPDATE
;
3635 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
3639 } else /* Same logical size */ {
3641 error
= do_hfs_truncate(vp
, length
, flags
, truncateflags
, context
);
3643 /* Files that are changing size are not hot file candidates. */
3644 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3645 fp
->ff_bytesread
= 0;
3654 * Preallocate file storage space.
3657 hfs_vnop_allocate(struct vnop_allocate_args
/* {
3661 off_t *a_bytesallocated;
3663 vfs_context_t a_context;
3666 struct vnode
*vp
= ap
->a_vp
;
3668 struct filefork
*fp
;
3670 off_t length
= ap
->a_length
;
3672 off_t moreBytesRequested
;
3673 off_t actualBytesAdded
;
3675 u_int32_t fileblocks
;
3676 int retval
, retval2
;
3677 u_int32_t blockHint
;
3678 u_int32_t extendFlags
; /* For call to ExtendFileC */
3679 struct hfsmount
*hfsmp
;
3680 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
3684 *(ap
->a_bytesallocated
) = 0;
3686 if (!vnode_isreg(vp
))
3688 if (length
< (off_t
)0)
3693 orig_ctime
= VTOC(vp
)->c_ctime
;
3695 check_for_tracked_file(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
3697 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
3699 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
3707 fileblocks
= fp
->ff_blocks
;
3708 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
3710 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
3715 /* Fill in the flags word for the call to Extend the file */
3717 extendFlags
= kEFNoClumpMask
;
3718 if (ap
->a_flags
& ALLOCATECONTIG
)
3719 extendFlags
|= kEFContigMask
;
3720 if (ap
->a_flags
& ALLOCATEALL
)
3721 extendFlags
|= kEFAllMask
;
3722 if (cred
&& suser(cred
, NULL
) != 0)
3723 extendFlags
|= kEFReserveMask
;
3724 if (hfs_virtualmetafile(cp
))
3725 extendFlags
|= kEFMetadataMask
;
3729 startingPEOF
= filebytes
;
3731 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
3732 length
+= filebytes
;
3733 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
3734 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
3736 /* If no changes are necesary, then we're done */
3737 if (filebytes
== length
)
3741 * Lengthen the size of the file. We must ensure that the
3742 * last byte of the file is allocated. Since the smallest
3743 * value of filebytes is 0, length will be at least 1.
3745 if (length
> filebytes
) {
3746 off_t total_bytes_added
= 0, orig_request_size
;
3748 orig_request_size
= moreBytesRequested
= length
- filebytes
;
3751 retval
= hfs_chkdq(cp
,
3752 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
3759 * Metadata zone checks.
3761 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
3763 * Allocate Journal and Quota files in metadata zone.
3765 if (hfs_virtualmetafile(cp
)) {
3766 blockHint
= hfsmp
->hfs_metazone_start
;
3767 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
3768 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
3770 * Move blockHint outside metadata zone.
3772 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
3777 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3778 off_t bytesRequested
;
3780 if (hfs_start_transaction(hfsmp
) != 0) {
3785 /* Protect extents b-tree and allocation bitmap */
3786 lockflags
= SFL_BITMAP
;
3787 if (overflow_extents(fp
))
3788 lockflags
|= SFL_EXTENTS
;
3789 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3791 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
3792 bytesRequested
= HFS_BIGFILE_SIZE
;
3794 bytesRequested
= moreBytesRequested
;
3797 if (extendFlags
& kEFContigMask
) {
3798 // if we're on a sparse device, this will force it to do a
3799 // full scan to find the space needed.
3800 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
3803 retval
= MacToVFSError(ExtendFileC(vcb
,
3808 &actualBytesAdded
));
3810 if (retval
== E_NONE
) {
3811 *(ap
->a_bytesallocated
) += actualBytesAdded
;
3812 total_bytes_added
+= actualBytesAdded
;
3813 moreBytesRequested
-= actualBytesAdded
;
3814 if (blockHint
!= 0) {
3815 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
3818 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3820 hfs_systemfile_unlock(hfsmp
, lockflags
);
3823 (void) hfs_update(vp
, TRUE
);
3824 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3827 hfs_end_transaction(hfsmp
);
3832 * if we get an error and no changes were made then exit
3833 * otherwise we must do the hfs_update to reflect the changes
3835 if (retval
&& (startingPEOF
== filebytes
))
3839 * Adjust actualBytesAdded to be allocation block aligned, not
3840 * clump size aligned.
3841 * NOTE: So what we are reporting does not affect reality
3842 * until the file is closed, when we truncate the file to allocation
3845 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
3846 *(ap
->a_bytesallocated
) =
3847 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
3849 } else { /* Shorten the size of the file */
3851 if (fp
->ff_size
> length
) {
3853 * Any buffers that are past the truncation point need to be
3854 * invalidated (to maintain buffer cache consistency).
3858 retval
= hfs_truncate(vp
, length
, 0, 0, 0, ap
->a_context
);
3859 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3862 * if we get an error and no changes were made then exit
3863 * otherwise we must do the hfs_update to reflect the changes
3865 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
3867 /* These are bytesreleased */
3868 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
3871 if (fp
->ff_size
> filebytes
) {
3872 fp
->ff_size
= filebytes
;
3875 ubc_setsize(vp
, fp
->ff_size
);
3876 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3881 cp
->c_touch_chgtime
= TRUE
;
3882 cp
->c_touch_modtime
= TRUE
;
3883 retval2
= hfs_update(vp
, MNT_WAIT
);
3888 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
3895 * Pagein for HFS filesystem
3898 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
3900 struct vnop_pagein_args {
3903 vm_offset_t a_pl_offset,
3907 vfs_context_t a_context;
3913 struct filefork
*fp
;
3916 upl_page_info_t
*pl
;
3921 boolean_t truncate_lock_held
= FALSE
;
3922 boolean_t file_converted
= FALSE
;
3930 if ((error
= cp_handle_vnop(vp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
, 0)) != 0) {
3932 * If we errored here, then this means that one of two things occurred:
3933 * 1. there was a problem with the decryption of the key.
3934 * 2. the device is locked and we are not allowed to access this particular file.
3936 * Either way, this means that we need to shut down this upl now. As long as
3937 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
3938 * then we create a upl and immediately abort it.
3940 if (ap
->a_pl
== NULL
) {
3941 /* create the upl */
3942 ubc_create_upl (vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
,
3943 UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
3944 /* mark the range as needed so it doesn't immediately get discarded upon abort */
3945 ubc_upl_range_needed (upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
3947 /* Abort the range */
3948 ubc_upl_abort_range (upl
, 0, ap
->a_size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
3954 #endif /* CONFIG_PROTECT */
3956 if (ap
->a_pl
!= NULL
) {
3958 * this can only happen for swap files now that
3959 * we're asking for V2 paging behavior...
3960 * so don't need to worry about decompression, or
3961 * keeping track of blocks read or taking the truncate lock
3963 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
3964 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
3970 * take truncate lock (shared/recursive) to guard against
3971 * zero-fill thru fsync interfering, but only for v2
3973 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
3974 * lock shared and we are allowed to recurse 1 level if this thread already
3975 * owns the lock exclusively... this can legally occur
3976 * if we are doing a shrinking ftruncate against a file
3977 * that is mapped private, and the pages being truncated
3978 * do not currently exist in the cache... in that case
3979 * we will have to page-in the missing pages in order
3980 * to provide them to the private mapping... we must
3981 * also call hfs_unlock_truncate with a postive been_recursed
3982 * arg to indicate that if we have recursed, there is no need to drop
3983 * the lock. Allowing this simple recursion is necessary
3984 * in order to avoid a certain deadlock... since the ftruncate
3985 * already holds the truncate lock exclusively, if we try
3986 * to acquire it shared to protect the pagein path, we will
3989 * NOTE: The if () block below is a workaround in order to prevent a
3990 * VM deadlock. See rdar://7853471.
3992 * If we are in a forced unmount, then launchd will still have the
3993 * dyld_shared_cache file mapped as it is trying to reboot. If we
3994 * take the truncate lock here to service a page fault, then our
3995 * thread could deadlock with the forced-unmount. The forced unmount
3996 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
3997 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
3998 * thread will think it needs to copy all of the data out of the file
3999 * and into a VM copy object. If we hold the cnode lock here, then that
4000 * VM operation will not be able to proceed, because we'll set a busy page
4001 * before attempting to grab the lock. Note that this isn't as simple as "don't
4002 * call ubc_setsize" because doing that would just shift the problem to the
4003 * ubc_msync done before the vnode is reclaimed.
4005 * So, if a forced unmount on this volume is in flight AND the cnode is
4006 * marked C_DELETED, then just go ahead and do the page in without taking
4007 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4008 * that is not going to be available on the next mount, this seems like a
4009 * OK solution from a correctness point of view, even though it is hacky.
4011 if (vfs_isforce(vp
->v_mount
)) {
4012 if (cp
->c_flag
& C_DELETED
) {
4013 /* If we don't get it, then just go ahead and operate without the lock */
4014 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4018 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4019 truncate_lock_held
= TRUE
;
4022 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4024 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4028 ubc_upl_range_needed(upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4033 * Scan from the back to find the last page in the UPL, so that we
4034 * aren't looking at a UPL that may have already been freed by the
4035 * preceding aborts/completions.
4037 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4038 if (upl_page_present(pl
, --pg_index
))
4040 if (pg_index
== 0) {
4042 * no absent pages were found in the range specified
4043 * just abort the UPL to get rid of it and then we're done
4045 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4050 * initialize the offset variables before we touch the UPL.
4051 * f_offset is the position into the file, in bytes
4052 * offset is the position into the UPL, in bytes
4053 * pg_index is the pg# of the UPL we're operating on
4054 * isize is the offset into the UPL of the last page that is present.
4056 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4059 f_offset
= ap
->a_f_offset
;
4065 if ( !upl_page_present(pl
, pg_index
)) {
4067 * we asked for RET_ONLY_ABSENT, so it's possible
4068 * to get back empty slots in the UPL.
4069 * just skip over them
4071 f_offset
+= PAGE_SIZE
;
4072 offset
+= PAGE_SIZE
;
4079 * We know that we have at least one absent page.
4080 * Now checking to see how many in a row we have
4083 xsize
= isize
- PAGE_SIZE
;
4086 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
4091 xsize
= num_of_pages
* PAGE_SIZE
;
4094 if (VNODE_IS_RSRC(vp
)) {
4095 /* allow pageins of the resource fork */
4097 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
4100 if (truncate_lock_held
) {
4102 * can't hold the truncate lock when calling into the decmpfs layer
4103 * since it calls back into this layer... even though we're only
4104 * holding the lock in shared mode, and the re-entrant path only
4105 * takes the lock shared, we can deadlock if some other thread
4106 * tries to grab the lock exclusively in between.
4108 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4109 truncate_lock_held
= FALSE
;
4112 ap
->a_pl_offset
= offset
;
4113 ap
->a_f_offset
= f_offset
;
4116 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
4118 * note that decpfs_pagein_compressed can change the state of
4119 * 'compressed'... it will set it to 0 if the file is no longer
4120 * compressed once the compression lock is successfully taken
4121 * i.e. we would block on that lock while the file is being inflated
4125 /* successful page-in, update the access time */
4126 VTOC(vp
)->c_touch_acctime
= TRUE
;
4128 /* compressed files are not hot file candidates */
4129 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4130 fp
->ff_bytesread
= 0;
4132 } else if (error
== EAGAIN
) {
4134 * EAGAIN indicates someone else already holds the compression lock...
4135 * to avoid deadlocking, we'll abort this range of pages with an
4136 * indication that the pagein needs to be redriven
4138 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
4140 goto pagein_next_range
;
4144 * Set file_converted only if the file became decompressed while we were
4145 * paging in. If it were still compressed, we would re-start the loop using the goto
4146 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4147 * condition below, since we could have avoided taking the truncate lock to prevent
4148 * a deadlock in the force unmount case.
4150 file_converted
= TRUE
;
4153 if (file_converted
== TRUE
) {
4155 * the file was converted back to a regular file after we first saw it as compressed
4156 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4157 * reset a_size so that we consider what remains of the original request
4158 * and null out a_upl and a_pl_offset.
4160 * We should only be able to get into this block if the decmpfs_pagein_compressed
4161 * successfully decompressed the range in question for this file.
4163 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4167 ap
->a_pl_offset
= 0;
4169 /* Reset file_converted back to false so that we don't infinite-loop. */
4170 file_converted
= FALSE
;
4175 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4178 * Keep track of blocks read.
4180 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
4182 int took_cnode_lock
= 0;
4184 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
4185 bytesread
= fp
->ff_size
;
4189 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4190 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
4191 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4192 took_cnode_lock
= 1;
4195 * If this file hasn't been seen since the start of
4196 * the current sampling period then start over.
4198 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
4201 fp
->ff_bytesread
= bytesread
;
4203 cp
->c_atime
= tv
.tv_sec
;
4205 fp
->ff_bytesread
+= bytesread
;
4207 cp
->c_touch_acctime
= TRUE
;
4208 if (took_cnode_lock
)
4215 pg_index
+= num_of_pages
;
4221 if (truncate_lock_held
== TRUE
) {
4222 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4223 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4230 * Pageout for HFS filesystem.
4233 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
4235 struct vnop_pageout_args {
4238 vm_offset_t a_pl_offset,
4242 vfs_context_t a_context;
4246 vnode_t vp
= ap
->a_vp
;
4248 struct filefork
*fp
;
4252 upl_page_info_t
* pl
;
4253 vm_offset_t a_pl_offset
;
4255 int is_pageoutv2
= 0;
4262 * Figure out where the file ends, for pageout purposes. If
4263 * ff_new_size > ff_size, then we're in the middle of extending the
4264 * file via a write, so it is safe (and necessary) that we be able
4265 * to pageout up to that point.
4267 filesize
= fp
->ff_size
;
4268 if (fp
->ff_new_size
> filesize
)
4269 filesize
= fp
->ff_new_size
;
4271 a_flags
= ap
->a_flags
;
4272 a_pl_offset
= ap
->a_pl_offset
;
4274 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
4275 hfs_incr_gencount (cp
);
4279 * we can tell if we're getting the new or old behavior from the UPL
4281 if ((upl
= ap
->a_pl
) == NULL
) {
4286 * we're in control of any UPL we commit
4287 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4289 a_flags
&= ~UPL_NOCOMMIT
;
4293 * For V2 semantics, we want to take the cnode truncate lock
4294 * shared to guard against the file size changing via zero-filling.
4296 * However, we have to be careful because we may be invoked
4297 * via the ubc_msync path to write out dirty mmap'd pages
4298 * in response to a lock event on a content-protected
4299 * filesystem (e.g. to write out class A files).
4300 * As a result, we want to take the truncate lock 'SHARED' with
4301 * the mini-recursion locktype so that we don't deadlock/panic
4302 * because we may be already holding the truncate lock exclusive to force any other
4303 * IOs to have blocked behind us.
4305 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4307 if (a_flags
& UPL_MSYNC
) {
4308 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
4311 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
4314 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
4316 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4322 * from this point forward upl points at the UPL we're working with
4323 * it was either passed in or we succesfully created it
4327 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4328 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4329 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4330 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4331 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4332 * lock in HFS so that we don't lock invert ourselves.
4334 * Note that we can still get into this function on behalf of the default pager with
4335 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4336 * since fsync and other writing threads will grab the locks, then mark the
4337 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4338 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4339 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4340 * by the paging/VM system.
4352 f_offset
= ap
->a_f_offset
;
4355 * Scan from the back to find the last page in the UPL, so that we
4356 * aren't looking at a UPL that may have already been freed by the
4357 * preceding aborts/completions.
4359 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4360 if (upl_page_present(pl
, --pg_index
))
4362 if (pg_index
== 0) {
4363 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4369 * initialize the offset variables before we touch the UPL.
4370 * a_f_offset is the position into the file, in bytes
4371 * offset is the position into the UPL, in bytes
4372 * pg_index is the pg# of the UPL we're operating on.
4373 * isize is the offset into the UPL of the last non-clean page.
4375 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4384 if ( !upl_page_present(pl
, pg_index
)) {
4386 * we asked for RET_ONLY_DIRTY, so it's possible
4387 * to get back empty slots in the UPL.
4388 * just skip over them
4390 f_offset
+= PAGE_SIZE
;
4391 offset
+= PAGE_SIZE
;
4397 if ( !upl_dirty_page(pl
, pg_index
)) {
4398 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
4402 * We know that we have at least one dirty page.
4403 * Now checking to see how many in a row we have
4406 xsize
= isize
- PAGE_SIZE
;
4409 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
4414 xsize
= num_of_pages
* PAGE_SIZE
;
4416 if (!vnode_isswap(vp
)) {
4422 if (cp
->c_lockowner
!= current_thread()) {
4423 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4425 * we're in the v2 path, so we are the
4426 * owner of the UPL... we may have already
4427 * processed some of the UPL, so abort it
4428 * from the current working offset to the
4431 ubc_upl_abort_range(upl
,
4433 ap
->a_size
- offset
,
4434 UPL_ABORT_FREE_ON_EMPTY
);
4439 end_of_range
= f_offset
+ xsize
- 1;
4441 if (end_of_range
>= filesize
) {
4442 end_of_range
= (off_t
)(filesize
- 1);
4444 if (f_offset
< filesize
) {
4445 rl_remove(f_offset
, end_of_range
, &fp
->ff_invalidranges
);
4446 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
4452 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
4453 xsize
, filesize
, a_flags
))) {
4460 pg_index
+= num_of_pages
;
4462 /* capture errnos bubbled out of cluster_pageout if they occurred */
4463 if (error_ret
!= 0) {
4466 } /* end block for v2 pageout behavior */
4468 if (!vnode_isswap(vp
)) {
4472 if (cp
->c_lockowner
!= current_thread()) {
4473 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4474 if (!(a_flags
& UPL_NOCOMMIT
)) {
4475 ubc_upl_abort_range(upl
,
4478 UPL_ABORT_FREE_ON_EMPTY
);
4484 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
4486 if (end_of_range
>= filesize
) {
4487 end_of_range
= (off_t
)(filesize
- 1);
4489 if (ap
->a_f_offset
< filesize
) {
4490 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
4491 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
4499 * just call cluster_pageout for old pre-v2 behavior
4501 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
4502 ap
->a_size
, filesize
, a_flags
);
4506 * If data was written, update the modification time of the file.
4507 * If setuid or setgid bits are set and this process is not the
4508 * superuser then clear the setuid and setgid bits as a precaution
4509 * against tampering.
4512 cp
->c_touch_modtime
= TRUE
;
4513 cp
->c_touch_chgtime
= TRUE
;
4514 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
4515 (vfs_context_suser(ap
->a_context
) != 0)) {
4516 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4517 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
4525 * Release the truncate lock. Note that because
4526 * we may have taken the lock recursively by
4527 * being invoked via ubc_msync due to lockdown,
4528 * we should release it recursively, too.
4530 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4536 * Intercept B-Tree node writes to unswap them if necessary.
4539 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
4542 register struct buf
*bp
= ap
->a_bp
;
4543 register struct vnode
*vp
= buf_vnode(bp
);
4544 BlockDescriptor block
;
4546 /* Trap B-Tree writes */
4547 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
4548 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
4549 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
4550 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
4553 * Swap and validate the node if it is in native byte order.
4554 * This is always be true on big endian, so we always validate
4555 * before writing here. On little endian, the node typically has
4556 * been swapped and validated when it was written to the journal,
4557 * so we won't do anything here.
4559 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
4560 /* Prepare the block pointer */
4561 block
.blockHeader
= bp
;
4562 block
.buffer
= (char *)buf_dataptr(bp
);
4563 block
.blockNum
= buf_lblkno(bp
);
4564 /* not found in cache ==> came from disk */
4565 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
4566 block
.blockSize
= buf_count(bp
);
4568 /* Endian un-swap B-Tree node */
4569 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
4571 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4575 /* This buffer shouldn't be locked anymore but if it is clear it */
4576 if ((buf_flags(bp
) & B_LOCKED
)) {
4578 if (VTOHFS(vp
)->jnl
) {
4579 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
4581 buf_clearflags(bp
, B_LOCKED
);
4583 retval
= vn_bwrite (ap
);
4589 * Relocate a file to a new location on disk
4590 * cnode must be locked on entry
4592 * Relocation occurs by cloning the file's data from its
4593 * current set of blocks to a new set of blocks. During
4594 * the relocation all of the blocks (old and new) are
4595 * owned by the file.
4602 * ----------------- -----------------
4603 * |///////////////| | | STEP 1 (acquire new blocks)
4604 * ----------------- -----------------
4607 * ----------------- -----------------
4608 * |///////////////| |///////////////| STEP 2 (clone data)
4609 * ----------------- -----------------
4613 * |///////////////| STEP 3 (head truncate blocks)
4617 * During steps 2 and 3 page-outs to file offsets less
4618 * than or equal to N are suspended.
4620 * During step 3 page-ins to the file get suspended.
4623 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
4627 struct filefork
*fp
;
4628 struct hfsmount
*hfsmp
;
4633 u_int32_t nextallocsave
;
4634 daddr64_t sector_a
, sector_b
;
4639 int took_trunc_lock
= 0;
4641 enum vtype vnodetype
;
4643 vnodetype
= vnode_vtype(vp
);
4644 if (vnodetype
!= VREG
) {
4645 /* Not allowed to move symlinks. */
4650 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
4656 if (fp
->ff_unallocblocks
)
4661 * <rdar://problem/9118426>
4662 * Disable HFS file relocation on content-protected filesystems
4664 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
4668 /* If it's an SSD, also disable HFS relocation */
4669 if (hfsmp
->hfs_flags
& HFS_SSD
) {
4674 blksize
= hfsmp
->blockSize
;
4676 blockHint
= hfsmp
->nextAllocation
;
4678 if (fp
->ff_size
> 0x7fffffff) {
4683 // We do not believe that this call to hfs_fsync() is
4684 // necessary and it causes a journal transaction
4685 // deadlock so we are removing it.
4687 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4688 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4693 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
4695 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4696 /* Force lock since callers expects lock to be held. */
4697 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
))) {
4698 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4701 /* No need to continue if file was removed. */
4702 if (cp
->c_flag
& C_NOEXISTS
) {
4703 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4706 took_trunc_lock
= 1;
4708 headblks
= fp
->ff_blocks
;
4709 datablks
= howmany(fp
->ff_size
, blksize
);
4710 growsize
= datablks
* blksize
;
4711 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
4712 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
4713 blockHint
<= hfsmp
->hfs_metazone_end
)
4714 eflags
|= kEFMetadataMask
;
4716 if (hfs_start_transaction(hfsmp
) != 0) {
4717 if (took_trunc_lock
)
4718 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4723 * Protect the extents b-tree and the allocation bitmap
4724 * during MapFileBlockC and ExtendFileC operations.
4726 lockflags
= SFL_BITMAP
;
4727 if (overflow_extents(fp
))
4728 lockflags
|= SFL_EXTENTS
;
4729 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4731 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
4733 retval
= MacToVFSError(retval
);
4738 * STEP 1 - acquire new allocation blocks.
4740 nextallocsave
= hfsmp
->nextAllocation
;
4741 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
4742 if (eflags
& kEFMetadataMask
) {
4743 hfs_lock_mount(hfsmp
);
4744 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
4745 MarkVCBDirty(hfsmp
);
4746 hfs_unlock_mount(hfsmp
);
4749 retval
= MacToVFSError(retval
);
4751 cp
->c_flag
|= C_MODIFIED
;
4752 if (newbytes
< growsize
) {
4755 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
4756 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp
->c_cnid
, hfsmp
->vcbVN
);
4761 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
4763 retval
= MacToVFSError(retval
);
4764 } else if ((sector_a
+ 1) == sector_b
) {
4767 } else if ((eflags
& kEFMetadataMask
) &&
4768 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
4769 hfsmp
->hfs_metazone_end
)) {
4771 const char * filestr
;
4772 char emptystr
= '\0';
4774 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
4775 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
4776 } else if (vnode_name(vp
) != NULL
) {
4777 filestr
= vnode_name(vp
);
4779 filestr
= &emptystr
;
4786 /* Done with system locks and journal for now. */
4787 hfs_systemfile_unlock(hfsmp
, lockflags
);
4789 hfs_end_transaction(hfsmp
);
4794 * Check to see if failure is due to excessive fragmentation.
4796 if ((retval
== ENOSPC
) &&
4797 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
4798 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
4803 * STEP 2 - clone file data into the new allocation blocks.
4806 if (vnodetype
== VLNK
)
4808 else if (vnode_issystem(vp
))
4809 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
4811 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
4813 /* Start transaction for step 3 or for a restore. */
4814 if (hfs_start_transaction(hfsmp
) != 0) {
4823 * STEP 3 - switch to cloned data and remove old blocks.
4825 lockflags
= SFL_BITMAP
;
4826 if (overflow_extents(fp
))
4827 lockflags
|= SFL_EXTENTS
;
4828 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4830 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
4832 hfs_systemfile_unlock(hfsmp
, lockflags
);
4837 if (took_trunc_lock
)
4838 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4841 hfs_systemfile_unlock(hfsmp
, lockflags
);
4845 /* Push cnode's new extent data to disk. */
4847 (void) hfs_update(vp
, MNT_WAIT
);
4850 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
4851 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
4853 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
4857 hfs_end_transaction(hfsmp
);
4862 if (fp
->ff_blocks
== headblks
) {
4863 if (took_trunc_lock
)
4864 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4868 * Give back any newly allocated space.
4870 if (lockflags
== 0) {
4871 lockflags
= SFL_BITMAP
;
4872 if (overflow_extents(fp
))
4873 lockflags
|= SFL_EXTENTS
;
4874 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4877 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
4878 FTOC(fp
)->c_fileid
, false);
4880 hfs_systemfile_unlock(hfsmp
, lockflags
);
4883 if (took_trunc_lock
)
4884 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4890 * Clone a file's data within the file.
4894 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
4905 writebase
= blkstart
* blksize
;
4906 copysize
= blkcnt
* blksize
;
4907 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
4910 hfs_unlock(VTOC(vp
));
4913 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
4914 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4917 #endif /* CONFIG_PROTECT */
4919 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
4920 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4924 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
4926 while (offset
< copysize
) {
4927 iosize
= MIN(copysize
- offset
, iosize
);
4929 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
4930 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
4932 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
4934 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
4937 if (uio_resid(auio
) != 0) {
4938 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio
));
4943 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
4944 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
4946 error
= cluster_write(vp
, auio
, writebase
+ offset
,
4947 writebase
+ offset
+ iosize
,
4948 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
4950 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
4953 if (uio_resid(auio
) != 0) {
4954 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4962 if ((blksize
& PAGE_MASK
)) {
4964 * since the copy may not have started on a PAGE
4965 * boundary (or may not have ended on one), we
4966 * may have pages left in the cache since NOCACHE
4967 * will let partially written pages linger...
4968 * lets just flush the entire range to make sure
4969 * we don't have any pages left that are beyond
4970 * (or intersect) the real LEOF of this file
4972 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
4975 * No need to call ubc_sync_range or hfs_invalbuf
4976 * since the file was copied using IO_NOCACHE and
4977 * the copy was done starting and ending on a page
4978 * boundary in the file.
4981 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4983 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4988 * Clone a system (metadata) file.
4992 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
4993 kauth_cred_t cred
, struct proc
*p
)
4999 struct buf
*bp
= NULL
;
5002 daddr64_t start_blk
;
5009 iosize
= GetLogicalBlockSize(vp
);
5010 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
5011 breadcnt
= bufsize
/ iosize
;
5013 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
5016 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
5017 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
5020 while (blkno
< last_blk
) {
5022 * Read up to a megabyte
5025 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
5026 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
5028 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
5031 if (buf_count(bp
) != iosize
) {
5032 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
5035 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
5037 buf_markinvalid(bp
);
5045 * Write up to a megabyte
5048 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
5049 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
5051 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
5055 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
5056 error
= (int)buf_bwrite(bp
);
5068 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
5070 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);