2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
44 #include <sys/buf_internal.h>
46 #include <sys/kauth.h>
47 #include <sys/vnode.h>
48 #include <sys/vnode_internal.h>
50 #include <sys/vfs_context.h>
51 #include <sys/fsevents.h>
52 #include <kern/kalloc.h>
54 #include <sys/sysctl.h>
55 #include <sys/fsctl.h>
56 #include <sys/mount_internal.h>
57 #include <sys/file_internal.h>
59 #include <miscfs/specfs/specdev.h>
62 #include <sys/ubc_internal.h>
64 #include <vm/vm_pageout.h>
65 #include <vm/vm_kern.h>
67 #include <sys/kdebug.h>
70 #include "hfs_attrlist.h"
71 #include "hfs_endian.h"
72 #include "hfs_fsctl.h"
73 #include "hfs_quota.h"
74 #include "hfscommon/headers/FileMgrInternal.h"
75 #include "hfscommon/headers/BTreesInternal.h"
76 #include "hfs_cnode.h"
79 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
82 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
85 /* from bsd/hfs/hfs_vfsops.c */
86 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
88 static int hfs_clonefile(struct vnode
*, int, int, int);
89 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
90 static int hfs_minorupdate(struct vnode
*vp
);
91 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
93 /* from bsd/hfs/hfs_vnops.c */
94 extern decmpfs_cnode
* hfs_lazy_init_decmpfs_cnode (struct cnode
*cp
);
98 int flush_cache_on_write
= 0;
99 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
| CTLFLAG_LOCKED
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
102 * Read data from a file.
105 hfs_vnop_read(struct vnop_read_args
*ap
)
108 struct vnop_read_args {
109 struct vnodeop_desc *a_desc;
113 vfs_context_t a_context;
117 uio_t uio
= ap
->a_uio
;
118 struct vnode
*vp
= ap
->a_vp
;
121 struct hfsmount
*hfsmp
;
124 off_t start_resid
= uio_resid(uio
);
125 off_t offset
= uio_offset(uio
);
127 int took_truncate_lock
= 0;
130 /* Preflight checks */
131 if (!vnode_isreg(vp
)) {
132 /* can only read regular files */
138 if (start_resid
== 0)
139 return (0); /* Nothing left to do */
141 return (EINVAL
); /* cant read from a negative offset */
146 if (VNODE_IS_RSRC(vp
)) {
147 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
150 /* otherwise read the resource fork normally */
152 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
154 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
157 /* successful read, update the access time */
158 VTOC(vp
)->c_touch_acctime
= TRUE
;
160 /* compressed files are not hot file candidates */
161 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
162 VTOF(vp
)->ff_bytesread
= 0;
167 /* otherwise the file was converted back to a regular file while we were reading it */
169 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
172 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_READ_OP
);
179 #endif /* HFS_COMPRESSION */
186 if ((retval
= cp_handle_vnop (vp
, CP_READ_ACCESS
, ap
->a_ioflag
)) != 0) {
192 * If this read request originated from a syscall (as opposed to
193 * an in-kernel page fault or something), then set it up for
196 if (ap
->a_ioflag
& IO_SYSCALL_DISPATCH
) {
197 io_throttle
= IO_RETURN_ON_THROTTLE
;
202 /* Protect against a size change. */
203 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
204 took_truncate_lock
= 1;
206 filesize
= fp
->ff_size
;
207 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
208 if (offset
> filesize
) {
209 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
210 (offset
> (off_t
)MAXHFSFILESIZE
)) {
216 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
217 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
219 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
|io_throttle
);
221 cp
->c_touch_acctime
= TRUE
;
223 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
224 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
227 * Keep track blocks read
229 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
230 int took_cnode_lock
= 0;
233 bytesread
= start_resid
- uio_resid(uio
);
235 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
236 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
237 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
241 * If this file hasn't been seen since the start of
242 * the current sampling period then start over.
244 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
247 fp
->ff_bytesread
= bytesread
;
249 cp
->c_atime
= tv
.tv_sec
;
251 fp
->ff_bytesread
+= bytesread
;
257 if (took_truncate_lock
) {
258 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
260 if (retval
== EAGAIN
) {
261 throttle_lowpri_io(1);
270 * Write data to a file.
273 hfs_vnop_write(struct vnop_write_args
*ap
)
275 uio_t uio
= ap
->a_uio
;
276 struct vnode
*vp
= ap
->a_vp
;
279 struct hfsmount
*hfsmp
;
280 kauth_cred_t cred
= NULL
;
283 off_t bytesToAdd
= 0;
284 off_t actualBytesAdded
;
289 int ioflag
= ap
->a_ioflag
;
292 int cnode_locked
= 0;
293 int partialwrite
= 0;
295 time_t orig_ctime
=VTOC(vp
)->c_ctime
;
296 int took_truncate_lock
= 0;
297 int io_return_on_throttle
= 0;
298 struct rl_entry
*invalid_range
;
301 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
302 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
304 case FILE_IS_COMPRESSED
:
306 case FILE_IS_CONVERTING
:
307 /* if FILE_IS_CONVERTING, we allow writes but do not
308 bother with snapshots or else we will deadlock.
313 printf("invalid state %d for compressed file\n", state
);
316 } else if ((VTOC(vp
)->c_bsdflags
& UF_COMPRESSED
)) {
319 error
= check_for_dataless_file(vp
, NAMESPACE_HANDLER_WRITE_OP
);
326 check_for_tracked_file(vp
, orig_ctime
, NAMESPACE_HANDLER_WRITE_OP
, uio
);
331 resid
= uio_resid(uio
);
332 offset
= uio_offset(uio
);
338 if (!vnode_isreg(vp
))
339 return (EPERM
); /* Can only write regular files */
346 if ((retval
= cp_handle_vnop (vp
, CP_WRITE_ACCESS
, 0)) != 0) {
351 eflags
= kEFDeferMask
; /* defer file block allocations */
354 * When the underlying device is sparse and space
355 * is low (< 8MB), stop doing delayed allocations
356 * and begin doing synchronous I/O.
358 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
359 (hfs_freeblks(hfsmp
, 0) < 2048)) {
360 eflags
&= ~kEFDeferMask
;
363 #endif /* HFS_SPARSE_DEV */
365 if ((ioflag
& (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) ==
366 (IO_SINGLE_WRITER
| IO_SYSCALL_DISPATCH
)) {
367 io_return_on_throttle
= IO_RETURN_ON_THROTTLE
;
371 /* Protect against a size change. */
373 * Protect against a size change.
375 * Note: If took_truncate_lock is true, then we previously got the lock shared
376 * but needed to upgrade to exclusive. So try getting it exclusive from the
379 if (ioflag
& IO_APPEND
|| took_truncate_lock
) {
380 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
383 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
385 took_truncate_lock
= 1;
388 if (ioflag
& IO_APPEND
) {
389 uio_setoffset(uio
, fp
->ff_size
);
390 offset
= fp
->ff_size
;
392 if ((cp
->c_bsdflags
& APPEND
) && offset
!= fp
->ff_size
) {
397 origFileSize
= fp
->ff_size
;
398 writelimit
= offset
+ resid
;
399 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
402 * We may need an exclusive truncate lock for several reasons, all
403 * of which are because we may be writing to a (portion of a) block
404 * for the first time, and we need to make sure no readers see the
405 * prior, uninitialized contents of the block. The cases are:
407 * 1. We have unallocated (delayed allocation) blocks. We may be
408 * allocating new blocks to the file and writing to them.
409 * (A more precise check would be whether the range we're writing
410 * to contains delayed allocation blocks.)
411 * 2. We need to extend the file. The bytes between the old EOF
412 * and the new EOF are not yet initialized. This is important
413 * even if we're not allocating new blocks to the file. If the
414 * old EOF and new EOF are in the same block, we still need to
415 * protect that range of bytes until they are written for the
417 * 3. The write overlaps some invalid ranges (delayed zero fill; that
418 * part of the file has been allocated, but not yet written).
420 * If we had a shared lock with the above cases, we need to try to upgrade
421 * to an exclusive lock. If the upgrade fails, we will lose the shared
422 * lock, and will need to take the truncate lock again; the took_truncate_lock
423 * flag will still be set, causing us to try for an exclusive lock next time.
425 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
426 * lock is held, since it protects the range lists.
428 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
429 ((fp
->ff_unallocblocks
!= 0) ||
430 (writelimit
> origFileSize
))) {
431 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
433 * Lock upgrade failed and we lost our shared lock, try again.
434 * Note: we do not set took_truncate_lock=0 here. Leaving it
435 * set to 1 will cause us to try to get the lock exclusive.
440 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
441 cp
->c_truncatelockowner
= current_thread();
445 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
450 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
451 hfs_incr_gencount (cp
);
455 * Now that we have the cnode lock, see if there are delayed zero fill ranges
456 * overlapping our write. If so, we need the truncate lock exclusive (see above).
458 if ((cp
->c_truncatelockowner
== HFS_SHARED_OWNER
) &&
459 (rl_scan(&fp
->ff_invalidranges
, offset
, writelimit
-1, &invalid_range
) != RL_NOOVERLAP
)) {
461 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
462 * a deadlock, rather than simply returning failure. (That is, it apparently does
463 * not behave like a "try_lock"). Since this condition is rare, just drop the
464 * cnode lock and try again. Since took_truncate_lock is set, we will
465 * automatically take the truncate lock exclusive.
469 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
473 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
474 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
477 /* Check if we do not need to extend the file */
478 if (writelimit
<= filebytes
) {
482 cred
= vfs_context_ucred(ap
->a_context
);
483 bytesToAdd
= writelimit
- filebytes
;
486 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
492 if (hfs_start_transaction(hfsmp
) != 0) {
497 while (writelimit
> filebytes
) {
498 bytesToAdd
= writelimit
- filebytes
;
499 if (cred
&& suser(cred
, NULL
) != 0)
500 eflags
|= kEFReserveMask
;
502 /* Protect extents b-tree and allocation bitmap */
503 lockflags
= SFL_BITMAP
;
504 if (overflow_extents(fp
))
505 lockflags
|= SFL_EXTENTS
;
506 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
508 /* Files that are changing size are not hot file candidates. */
509 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
510 fp
->ff_bytesread
= 0;
512 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
513 0, eflags
, &actualBytesAdded
));
515 hfs_systemfile_unlock(hfsmp
, lockflags
);
517 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
519 if (retval
!= E_NONE
)
521 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
522 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
523 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
525 (void) hfs_update(vp
, TRUE
);
526 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
527 (void) hfs_end_transaction(hfsmp
);
530 * If we didn't grow the file enough try a partial write.
531 * POSIX expects this behavior.
533 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
536 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
538 writelimit
= filebytes
;
541 if (retval
== E_NONE
) {
550 if (writelimit
> fp
->ff_size
)
551 filesize
= writelimit
;
553 filesize
= fp
->ff_size
;
555 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
557 if (offset
<= fp
->ff_size
) {
558 zero_off
= offset
& ~PAGE_MASK_64
;
560 /* Check to see whether the area between the zero_offset and the start
561 of the transfer to see whether is invalid and should be zero-filled
562 as part of the transfer:
564 if (offset
> zero_off
) {
565 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
566 lflag
|= IO_HEADZEROFILL
;
569 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
571 /* The bytes between fp->ff_size and uio->uio_offset must never be
572 read without being zeroed. The current last block is filled with zeroes
573 if it holds valid data but in all cases merely do a little bookkeeping
574 to track the area from the end of the current last page to the start of
575 the area actually written. For the same reason only the bytes up to the
576 start of the page where this write will start is invalidated; any remainder
577 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
579 Note that inval_start, the start of the page after the current EOF,
580 may be past the start of the write, in which case the zeroing
581 will be handled by the cluser_write of the actual data.
583 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
584 inval_end
= offset
& ~PAGE_MASK_64
;
585 zero_off
= fp
->ff_size
;
587 if ((fp
->ff_size
& PAGE_MASK_64
) &&
588 (rl_scan(&fp
->ff_invalidranges
,
591 &invalid_range
) != RL_NOOVERLAP
)) {
592 /* The page containing the EOF is not valid, so the
593 entire page must be made inaccessible now. If the write
594 starts on a page beyond the page containing the eof
595 (inval_end > eof_page_base), add the
596 whole page to the range to be invalidated. Otherwise
597 (i.e. if the write starts on the same page), zero-fill
598 the entire page explicitly now:
600 if (inval_end
> eof_page_base
) {
601 inval_start
= eof_page_base
;
603 zero_off
= eof_page_base
;
607 if (inval_start
< inval_end
) {
609 /* There's some range of data that's going to be marked invalid */
611 if (zero_off
< inval_start
) {
612 /* The pages between inval_start and inval_end are going to be invalidated,
613 and the actual write will start on a page past inval_end. Now's the last
614 chance to zero-fill the page containing the EOF:
618 retval
= cluster_write(vp
, (uio_t
) 0,
619 fp
->ff_size
, inval_start
,
621 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
622 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
624 if (retval
) goto ioerr_exit
;
625 offset
= uio_offset(uio
);
628 /* Mark the remaining area of the newly allocated space as invalid: */
629 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
631 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
632 zero_off
= fp
->ff_size
= inval_end
;
635 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
638 /* Check to see whether the area between the end of the write and the end of
639 the page it falls in is invalid and should be zero-filled as part of the transfer:
641 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
642 if (tail_off
> filesize
) tail_off
= filesize
;
643 if (tail_off
> writelimit
) {
644 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
645 lflag
|= IO_TAILZEROFILL
;
650 * if the write starts beyond the current EOF (possibly advanced in the
651 * zeroing of the last block, above), then we'll zero fill from the current EOF
652 * to where the write begins:
654 * NOTE: If (and ONLY if) the portion of the file about to be written is
655 * before the current EOF it might be marked as invalid now and must be
656 * made readable (removed from the invalid ranges) before cluster_write
659 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
660 if (io_start
< fp
->ff_size
) {
663 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
664 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
671 * We need to tell UBC the fork's new size BEFORE calling
672 * cluster_write, in case any of the new pages need to be
673 * paged out before cluster_write completes (which does happen
674 * in embedded systems due to extreme memory pressure).
675 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
676 * will be, so that it can pass that on to cluster_pageout, and
677 * allow those pageouts.
679 * We don't update ff_size yet since we don't want pageins to
680 * be able to see uninitialized data between the old and new
681 * EOF, until cluster_write has completed and initialized that
684 * The vnode pager relies on the file size last given to UBC via
685 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
686 * ff_size (whichever is larger). NOTE: ff_new_size is always
687 * zero, unless we are extending the file via write.
689 if (filesize
> fp
->ff_size
) {
690 fp
->ff_new_size
= filesize
;
691 ubc_setsize(vp
, filesize
);
693 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
694 tail_off
, lflag
| IO_NOZERODIRTY
| io_return_on_throttle
);
696 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
698 if (retval
== EAGAIN
) {
700 * EAGAIN indicates that we still have I/O to do, but
701 * that we now need to be throttled
703 if (resid
!= uio_resid(uio
)) {
705 * did manage to do some I/O before returning EAGAIN
707 resid
= uio_resid(uio
);
708 offset
= uio_offset(uio
);
710 cp
->c_touch_chgtime
= TRUE
;
711 cp
->c_touch_modtime
= TRUE
;
713 if (filesize
> fp
->ff_size
) {
715 * we called ubc_setsize before the call to
716 * cluster_write... since we only partially
717 * completed the I/O, we need to
718 * re-adjust our idea of the filesize based
721 ubc_setsize(vp
, offset
);
723 fp
->ff_size
= offset
;
727 if (filesize
> origFileSize
) {
728 ubc_setsize(vp
, origFileSize
);
733 if (filesize
> origFileSize
) {
734 fp
->ff_size
= filesize
;
736 /* Files that are changing size are not hot file candidates. */
737 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
738 fp
->ff_bytesread
= 0;
741 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
743 /* If we wrote some bytes, then touch the change and mod times */
744 if (resid
> uio_resid(uio
)) {
745 cp
->c_touch_chgtime
= TRUE
;
746 cp
->c_touch_modtime
= TRUE
;
750 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
754 // XXXdbg - see radar 4871353 for more info
756 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
757 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
763 * If we successfully wrote any data, and we are not the superuser
764 * we clear the setuid and setgid bits as a precaution against
767 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
768 cred
= vfs_context_ucred(ap
->a_context
);
769 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
771 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
774 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
778 if (ioflag
& IO_UNIT
) {
780 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
783 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
784 0, 0, ap
->a_context
);
785 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
786 uio_setresid(uio
, resid
);
787 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
789 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
791 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
794 retval
= hfs_update(vp
, TRUE
);
796 /* Updating vcbWrCnt doesn't need to be atomic. */
799 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
800 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
805 if (took_truncate_lock
) {
806 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
808 if (retval
== EAGAIN
) {
809 throttle_lowpri_io(1);
817 /* support for the "bulk-access" fcntl */
819 #define CACHE_LEVELS 16
820 #define NUM_CACHE_ENTRIES (64*16)
821 #define PARENT_IDS_FLAG 0x100
823 struct access_cache
{
825 int cachehits
; /* these two for statistics gathering */
827 unsigned int *acache
;
828 unsigned char *haveaccess
;
832 uid_t uid
; /* IN: effective user id */
833 short flags
; /* IN: access requested (i.e. R_OK) */
834 short num_groups
; /* IN: number of groups user belongs to */
835 int num_files
; /* IN: number of files to process */
836 int *file_ids
; /* IN: array of file ids */
837 gid_t
*groups
; /* IN: array of groups */
838 short *access
; /* OUT: access info for each file (0 for 'has access') */
839 } __attribute__((unavailable
)); // this structure is for reference purposes only
841 struct user32_access_t
{
842 uid_t uid
; /* IN: effective user id */
843 short flags
; /* IN: access requested (i.e. R_OK) */
844 short num_groups
; /* IN: number of groups user belongs to */
845 int num_files
; /* IN: number of files to process */
846 user32_addr_t file_ids
; /* IN: array of file ids */
847 user32_addr_t groups
; /* IN: array of groups */
848 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
851 struct user64_access_t
{
852 uid_t uid
; /* IN: effective user id */
853 short flags
; /* IN: access requested (i.e. R_OK) */
854 short num_groups
; /* IN: number of groups user belongs to */
855 int num_files
; /* IN: number of files to process */
856 user64_addr_t file_ids
; /* IN: array of file ids */
857 user64_addr_t groups
; /* IN: array of groups */
858 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
862 // these are the "extended" versions of the above structures
863 // note that it is crucial that they be different sized than
864 // the regular version
865 struct ext_access_t
{
866 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
867 uint32_t num_files
; /* IN: number of files to process */
868 uint32_t map_size
; /* IN: size of the bit map */
869 uint32_t *file_ids
; /* IN: Array of file ids */
870 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
871 short *access
; /* OUT: access info for each file (0 for 'has access') */
872 uint32_t num_parents
; /* future use */
873 cnid_t
*parents
; /* future use */
874 } __attribute__((unavailable
)); // this structure is for reference purposes only
876 struct user32_ext_access_t
{
877 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
878 uint32_t num_files
; /* IN: number of files to process */
879 uint32_t map_size
; /* IN: size of the bit map */
880 user32_addr_t file_ids
; /* IN: Array of file ids */
881 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
882 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
883 uint32_t num_parents
; /* future use */
884 user32_addr_t parents
; /* future use */
887 struct user64_ext_access_t
{
888 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
889 uint32_t num_files
; /* IN: number of files to process */
890 uint32_t map_size
; /* IN: size of the bit map */
891 user64_addr_t file_ids
; /* IN: array of file ids */
892 user64_addr_t bitmap
; /* IN: array of groups */
893 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
894 uint32_t num_parents
;/* future use */
895 user64_addr_t parents
;/* future use */
900 * Perform a binary search for the given parent_id. Return value is
901 * the index if there is a match. If no_match_indexp is non-NULL it
902 * will be assigned with the index to insert the item (even if it was
905 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
911 unsigned int mid
= ((hi
- lo
)/2) + lo
;
912 unsigned int this_id
= array
[mid
];
914 if (parent_id
== this_id
) {
919 if (parent_id
< this_id
) {
924 if (parent_id
> this_id
) {
930 /* check if lo and hi converged on the match */
931 if (parent_id
== array
[hi
]) {
935 if (no_match_indexp
) {
936 *no_match_indexp
= hi
;
944 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
948 int index
, no_match_index
;
950 if (cache
->numcached
== 0) {
952 return 0; // table is empty, so insert at index=0 and report no match
955 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
956 cache
->numcached
= NUM_CACHE_ENTRIES
;
959 hi
= cache
->numcached
- 1;
961 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
963 /* if no existing entry found, find index for new one */
965 index
= no_match_index
;
976 * Add a node to the access_cache at the given index (or do a lookup first
977 * to find the index if -1 is passed in). We currently do a replace rather
978 * than an insert if the cache is full.
981 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
983 int lookup_index
= -1;
985 /* need to do a lookup first if -1 passed for index */
987 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
988 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
989 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
990 cache
->haveaccess
[lookup_index
] = access
;
993 /* mission accomplished */
996 index
= lookup_index
;
1001 /* if the cache is full, do a replace rather than an insert */
1002 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
1003 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
1005 if (index
> cache
->numcached
) {
1006 index
= cache
->numcached
;
1010 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
1014 if (index
>= 0 && index
< cache
->numcached
) {
1015 /* only do bcopy if we're inserting */
1016 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
1017 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
1020 cache
->acache
[index
] = nodeID
;
1021 cache
->haveaccess
[index
] = access
;
1035 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
1037 struct cinfo
*cip
= (struct cinfo
*)arg
;
1039 cip
->uid
= attrp
->ca_uid
;
1040 cip
->gid
= attrp
->ca_gid
;
1041 cip
->mode
= attrp
->ca_mode
;
1042 cip
->parentcnid
= descp
->cd_parentcnid
;
1043 cip
->recflags
= attrp
->ca_recflags
;
1049 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1050 * isn't incore, then go to the catalog.
1053 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
1054 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
1058 /* if this id matches the one the fsctl was called with, skip the lookup */
1059 if (cnid
== skip_cp
->c_cnid
) {
1060 cnattrp
->ca_uid
= skip_cp
->c_uid
;
1061 cnattrp
->ca_gid
= skip_cp
->c_gid
;
1062 cnattrp
->ca_mode
= skip_cp
->c_mode
;
1063 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
1064 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
1066 struct cinfo c_info
;
1068 /* otherwise, check the cnode hash incase the file/dir is incore */
1069 if (hfs_chash_snoop(hfsmp
, cnid
, 0, snoop_callback
, &c_info
) == 0) {
1070 cnattrp
->ca_uid
= c_info
.uid
;
1071 cnattrp
->ca_gid
= c_info
.gid
;
1072 cnattrp
->ca_mode
= c_info
.mode
;
1073 cnattrp
->ca_recflags
= c_info
.recflags
;
1074 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
1078 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp
)))
1079 throttle_lowpri_io(1);
1081 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
1083 /* lookup this cnid in the catalog */
1084 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
1086 hfs_systemfile_unlock(hfsmp
, lockflags
);
1097 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1098 * up to CACHE_LEVELS as we progress towards the root.
1101 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
1102 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
1103 struct vfs_context
*my_context
,
1107 uint32_t num_parents
)
1111 HFSCatalogNodeID thisNodeID
;
1112 unsigned int myPerms
;
1113 struct cat_attr cnattr
;
1114 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
1117 int i
= 0, ids_to_cache
= 0;
1118 int parent_ids
[CACHE_LEVELS
];
1120 thisNodeID
= nodeID
;
1121 while (thisNodeID
>= kRootDirID
) {
1122 myResult
= 0; /* default to "no access" */
1124 /* check the cache before resorting to hitting the catalog */
1126 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1127 * to look any further after hitting cached dir */
1129 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
1131 myErr
= cache
->haveaccess
[cache_index
];
1132 if (scope_index
!= -1) {
1133 if (myErr
== ESRCH
) {
1137 scope_index
= 0; // so we'll just use the cache result
1138 scope_idx_start
= ids_to_cache
;
1140 myResult
= (myErr
== 0) ? 1 : 0;
1141 goto ExitThisRoutine
;
1147 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
1148 if (scope_index
== -1)
1150 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
1151 scope_idx_start
= ids_to_cache
;
1155 /* remember which parents we want to cache */
1156 if (ids_to_cache
< CACHE_LEVELS
) {
1157 parent_ids
[ids_to_cache
] = thisNodeID
;
1160 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1161 if (bitmap
&& map_size
) {
1162 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
1166 /* do the lookup (checks the cnode hash, then the catalog) */
1167 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
1169 goto ExitThisRoutine
; /* no access */
1172 /* Root always gets access. */
1173 if (suser(myp_ucred
, NULL
) == 0) {
1174 thisNodeID
= catkey
.hfsPlus
.parentID
;
1179 // if the thing has acl's, do the full permission check
1180 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1183 /* get the vnode for this cnid */
1184 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0, 0);
1187 goto ExitThisRoutine
;
1190 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1192 hfs_unlock(VTOC(vp
));
1194 if (vnode_vtype(vp
) == VDIR
) {
1195 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1197 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1203 goto ExitThisRoutine
;
1207 int mode
= cnattr
.ca_mode
& S_IFMT
;
1208 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
, cnattr
.ca_mode
, hfsmp
->hfs_mp
,myp_ucred
, theProcPtr
);
1210 if (mode
== S_IFDIR
) {
1211 flags
= R_OK
| X_OK
;
1215 if ( (myPerms
& flags
) != flags
) {
1218 goto ExitThisRoutine
; /* no access */
1221 /* up the hierarchy we go */
1222 thisNodeID
= catkey
.hfsPlus
.parentID
;
1226 /* if here, we have access to this node */
1230 if (parents
&& myErr
== 0 && scope_index
== -1) {
1239 /* cache the parent directory(ies) */
1240 for (i
= 0; i
< ids_to_cache
; i
++) {
1241 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1242 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1244 add_node(cache
, -1, parent_ids
[i
], myErr
);
1252 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1253 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1258 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1259 * happens to be in our list of file_ids, we'll note it
1260 * avoid calling hfs_chashget_nowait() on that id as that
1261 * will cause a "locking against myself" panic.
1263 Boolean check_leaf
= true;
1265 struct user64_ext_access_t
*user_access_structp
;
1266 struct user64_ext_access_t tmp_user_access
;
1267 struct access_cache cache
;
1269 int error
= 0, prev_parent_check_ok
=1;
1273 unsigned int num_files
= 0;
1275 int num_parents
= 0;
1279 cnid_t
*parents
=NULL
;
1283 cnid_t prevParent_cnid
= 0;
1284 unsigned int myPerms
;
1286 struct cat_attr cnattr
;
1288 struct cnode
*skip_cp
= VTOC(vp
);
1289 kauth_cred_t cred
= vfs_context_ucred(context
);
1290 proc_t p
= vfs_context_proc(context
);
1292 is64bit
= proc_is64bit(p
);
1294 /* initialize the local cache and buffers */
1295 cache
.numcached
= 0;
1296 cache
.cachehits
= 0;
1298 cache
.acache
= NULL
;
1299 cache
.haveaccess
= NULL
;
1301 /* struct copyin done during dispatch... need to copy file_id array separately */
1302 if (ap
->a_data
== NULL
) {
1304 goto err_exit_bulk_access
;
1308 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1310 goto err_exit_bulk_access
;
1313 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1315 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1316 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1318 // convert an old style bulk-access struct to the new style
1319 tmp_user_access
.flags
= accessp
->flags
;
1320 tmp_user_access
.num_files
= accessp
->num_files
;
1321 tmp_user_access
.map_size
= 0;
1322 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1323 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1324 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1325 tmp_user_access
.num_parents
= 0;
1326 user_access_structp
= &tmp_user_access
;
1328 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1329 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1331 // up-cast from a 32-bit version of the struct
1332 tmp_user_access
.flags
= accessp
->flags
;
1333 tmp_user_access
.num_files
= accessp
->num_files
;
1334 tmp_user_access
.map_size
= accessp
->map_size
;
1335 tmp_user_access
.num_parents
= accessp
->num_parents
;
1337 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1338 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1339 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1340 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1342 user_access_structp
= &tmp_user_access
;
1345 goto err_exit_bulk_access
;
1348 map_size
= user_access_structp
->map_size
;
1350 num_files
= user_access_structp
->num_files
;
1352 num_parents
= user_access_structp
->num_parents
;
1354 if (num_files
< 1) {
1355 goto err_exit_bulk_access
;
1357 if (num_files
> 1024) {
1359 goto err_exit_bulk_access
;
1362 if (num_parents
> 1024) {
1364 goto err_exit_bulk_access
;
1367 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1368 access
= (short *) kalloc(sizeof(short) * num_files
);
1370 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1374 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1377 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1378 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1380 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1382 kfree(file_ids
, sizeof(int) * num_files
);
1385 kfree(bitmap
, sizeof(char) * map_size
);
1388 kfree(access
, sizeof(short) * num_files
);
1391 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1393 if (cache
.haveaccess
) {
1394 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1397 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1402 // make sure the bitmap is zero'ed out...
1404 bzero(bitmap
, (sizeof(char) * map_size
));
1407 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1408 num_files
* sizeof(int)))) {
1409 goto err_exit_bulk_access
;
1413 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1414 num_parents
* sizeof(cnid_t
)))) {
1415 goto err_exit_bulk_access
;
1419 flags
= user_access_structp
->flags
;
1420 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1424 /* check if we've been passed leaf node ids or parent ids */
1425 if (flags
& PARENT_IDS_FLAG
) {
1429 /* Check access to each file_id passed in */
1430 for (i
= 0; i
< num_files
; i
++) {
1432 cnid
= (cnid_t
) file_ids
[i
];
1434 /* root always has access */
1435 if ((!parents
) && (!suser(cred
, NULL
))) {
1441 /* do the lookup (checks the cnode hash, then the catalog) */
1442 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1444 access
[i
] = (short) error
;
1449 // Check if the leaf matches one of the parent scopes
1450 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1451 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1452 prev_parent_check_ok
= 0;
1453 else if (leaf_index
>= 0)
1454 prev_parent_check_ok
= 1;
1457 // if the thing has acl's, do the full permission check
1458 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1461 /* get the vnode for this cnid */
1462 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0, 0);
1468 hfs_unlock(VTOC(cvp
));
1470 if (vnode_vtype(cvp
) == VDIR
) {
1471 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1473 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1482 /* before calling CheckAccess(), check the target file for read access */
1483 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1484 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1486 /* fail fast if no access */
1487 if ((myPerms
& flags
) == 0) {
1493 /* we were passed an array of parent ids */
1494 catkey
.hfsPlus
.parentID
= cnid
;
1497 /* if the last guy had the same parent and had access, we're done */
1498 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1504 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1505 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1507 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1508 access
[i
] = 0; // have access.. no errors to report
1510 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1513 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1516 /* copyout the access array */
1517 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1518 num_files
* sizeof (short)))) {
1519 goto err_exit_bulk_access
;
1521 if (map_size
&& bitmap
) {
1522 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1523 map_size
* sizeof (char)))) {
1524 goto err_exit_bulk_access
;
1529 err_exit_bulk_access
:
1532 kfree(file_ids
, sizeof(int) * num_files
);
1534 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1536 kfree(bitmap
, sizeof(char) * map_size
);
1538 kfree(access
, sizeof(short) * num_files
);
1540 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1541 if (cache
.haveaccess
)
1542 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1548 /* end "bulk-access" support */
1552 * Callback for use with freeze ioctl.
1555 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1557 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1563 * Control filesystem operating characteristics.
1566 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1571 vfs_context_t a_context;
1574 struct vnode
* vp
= ap
->a_vp
;
1575 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1576 vfs_context_t context
= ap
->a_context
;
1577 kauth_cred_t cred
= vfs_context_ucred(context
);
1578 proc_t p
= vfs_context_proc(context
);
1579 struct vfsstatfs
*vfsp
;
1581 off_t jnl_start
, jnl_size
;
1582 struct hfs_journal_info
*jip
;
1585 off_t uncompressed_size
= -1;
1586 int decmpfs_error
= 0;
1588 if (ap
->a_command
== F_RDADVISE
) {
1589 /* we need to inspect the decmpfs state of the file as early as possible */
1590 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1592 if (VNODE_IS_RSRC(vp
)) {
1593 /* if this is the resource fork, treat it as if it were empty */
1594 uncompressed_size
= 0;
1596 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1597 if (decmpfs_error
!= 0) {
1598 /* failed to get the uncompressed size, we'll check for this later */
1599 uncompressed_size
= -1;
1604 #endif /* HFS_COMPRESSION */
1606 is64bit
= proc_is64bit(p
);
1611 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
1615 #endif /* CONFIG_PROTECT */
1617 switch (ap
->a_command
) {
1621 struct vnode
*file_vp
;
1628 /* Caller must be owner of file system. */
1629 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1630 if (suser(cred
, NULL
) &&
1631 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1634 /* Target vnode must be file system's root. */
1635 if (!vnode_isvroot(vp
)) {
1638 bufptr
= (char *)ap
->a_data
;
1639 cnid
= strtoul(bufptr
, NULL
, 10);
1640 if (ap
->a_fflag
& HFS_GETPATH_VOLUME_RELATIVE
) {
1641 flags
|= BUILDPATH_VOLUME_RELATIVE
;
1644 /* We need to call hfs_vfs_vget to leverage the code that will
1645 * fix the origin list for us if needed, as opposed to calling
1646 * hfs_vget, since we will need the parent for build_path call.
1649 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1652 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, flags
, context
);
1658 case HFS_GET_WRITE_GEN_COUNTER
:
1660 struct cnode
*cp
= NULL
;
1662 u_int32_t
*counter
= (u_int32_t
*)ap
->a_data
;
1666 if (!vnode_isdir(vp
) && !(vnode_isreg(vp
)) &&
1667 !(vnode_islnk(vp
))) {
1673 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
1675 struct ubc_info
*uip
;
1676 int is_mapped_writable
= 0;
1678 if (UBCINFOEXISTS(vp
)) {
1679 uip
= vp
->v_ubcinfo
;
1680 if ((uip
->ui_flags
& UI_ISMAPPED
) && (uip
->ui_flags
& UI_MAPPEDWRITE
)) {
1681 is_mapped_writable
= 1;
1686 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
1687 uint32_t gcount
= hfs_get_gencount(cp
);
1689 // Even though we return EBUSY for files that are mmap'ed
1690 // we also want to bump the value so that the write-gen
1691 // counter will always be different once the file is unmapped
1692 // (since the file may be unmapped but the pageouts have not
1695 if (is_mapped_writable
) {
1696 hfs_incr_gencount (cp
);
1697 gcount
= hfs_get_gencount(cp
);
1701 } else if (S_ISDIR(cp
->c_attr
.ca_mode
)) {
1702 *counter
= hfs_get_gencount(cp
);
1704 /* not a file or dir? silently return */
1709 if (is_mapped_writable
) {
1717 case HFS_GET_DOCUMENT_ID
:
1719 struct cnode
*cp
= NULL
;
1721 u_int32_t
*document_id
= (u_int32_t
*)ap
->a_data
;
1725 if (cp
->c_desc
.cd_cnid
== kHFSRootFolderID
) {
1726 // the root-dir always has document id '2' (aka kHFSRootFolderID)
1727 *document_id
= kHFSRootFolderID
;
1729 } else if ((S_ISDIR(cp
->c_attr
.ca_mode
) || S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
))) {
1731 uint32_t tmp_doc_id
;
1734 // we can use the FndrExtendedFileInfo because the doc-id is the first
1735 // thing in both it and the FndrExtendedDirInfo struct which is fixed
1736 // in format and can not change layout
1738 struct FndrExtendedFileInfo
*extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)cp
->c_finderinfo
+ 16);
1740 hfs_lock(cp
, HFS_SHARED_LOCK
, HFS_LOCK_DEFAULT
);
1743 // if the cnode isn't UF_TRACKED and the doc-id-allocate flag isn't set
1744 // then just return a zero for the doc-id
1746 if (!(cp
->c_bsdflags
& UF_TRACKED
) && !(ap
->a_fflag
& HFS_DOCUMENT_ID_ALLOCATE
)) {
1753 // if the cnode isn't UF_TRACKED and the doc-id-allocate flag IS set,
1754 // then set mark_it so we know to set the UF_TRACKED flag once the
1757 if (!(cp
->c_bsdflags
& UF_TRACKED
) && (ap
->a_fflag
& HFS_DOCUMENT_ID_ALLOCATE
)) {
1761 tmp_doc_id
= extinfo
->document_id
; // get a copy of this
1763 hfs_unlock(cp
); // in case we have to call hfs_generate_document_id()
1766 // If the document_id isn't set, get a new one and then set it.
1767 // Note: we first get the document id, then lock the cnode to
1768 // avoid any deadlock potential between cp and the root vnode.
1771 if (tmp_doc_id
== 0 && (error
= hfs_generate_document_id(hfsmp
, &new_id
)) == 0) {
1773 if ((error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
)) == 0) {
1774 extinfo
->document_id
= tmp_doc_id
= new_id
;
1775 //printf("ASSIGNING: doc-id %d to ino %d\n", extinfo->document_id, cp->c_fileid);
1778 cp
->c_bsdflags
|= UF_TRACKED
;
1781 // mark the cnode dirty
1782 cp
->c_flag
|= C_MODIFIED
| C_FORCEUPDATE
;
1785 if ((error
= hfs_start_transaction(hfsmp
)) == 0) {
1786 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1788 (void) cat_update(hfsmp
, &cp
->c_desc
, &cp
->c_attr
, NULL
, NULL
);
1790 hfs_systemfile_unlock (hfsmp
, lockflags
);
1791 (void) hfs_end_transaction(hfsmp
);
1795 add_fsevent(FSE_DOCID_CHANGED
, context
,
1796 FSE_ARG_DEV
, hfsmp
->hfs_raw_dev
,
1797 FSE_ARG_INO
, (ino64_t
)0, // src inode #
1798 FSE_ARG_INO
, (ino64_t
)cp
->c_fileid
, // dst inode #
1799 FSE_ARG_INT32
, extinfo
->document_id
,
1802 hfs_unlock (cp
); // so we can send the STAT_CHANGED event without deadlocking
1804 if (need_fsevent(FSE_STAT_CHANGED
, vp
)) {
1805 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
1813 *document_id
= tmp_doc_id
;
1821 case HFS_TRANSFER_DOCUMENT_ID
:
1823 struct cnode
*cp
= NULL
;
1825 u_int32_t to_fd
= *(u_int32_t
*)ap
->a_data
;
1826 struct fileproc
*to_fp
;
1827 struct vnode
*to_vp
;
1828 struct cnode
*to_cp
;
1832 if ((error
= fp_getfvp(p
, to_fd
, &to_fp
, &to_vp
)) != 0) {
1833 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1836 if ( (error
= vnode_getwithref(to_vp
)) ) {
1841 if (VTOHFS(to_vp
) != hfsmp
) {
1843 goto transfer_cleanup
;
1846 int need_unlock
= 1;
1847 to_cp
= VTOC(to_vp
);
1848 error
= hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1850 //printf("could not lock the pair of cnodes (error %d)\n", error);
1851 goto transfer_cleanup
;
1854 if (!(cp
->c_bsdflags
& UF_TRACKED
)) {
1856 } else if (to_cp
->c_bsdflags
& UF_TRACKED
) {
1858 // if the destination is already tracked, return an error
1859 // as otherwise it's a silent deletion of the target's
1863 } else if (S_ISDIR(cp
->c_attr
.ca_mode
) || S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
1865 // we can use the FndrExtendedFileInfo because the doc-id is the first
1866 // thing in both it and the ExtendedDirInfo struct which is fixed in
1867 // format and can not change layout
1869 struct FndrExtendedFileInfo
*f_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)cp
->c_finderinfo
+ 16);
1870 struct FndrExtendedFileInfo
*to_extinfo
= (struct FndrExtendedFileInfo
*)((u_int8_t
*)to_cp
->c_finderinfo
+ 16);
1872 if (f_extinfo
->document_id
== 0) {
1875 hfs_unlockpair(cp
, to_cp
); // have to unlock to be able to get a new-id
1877 if ((error
= hfs_generate_document_id(hfsmp
, &new_id
)) == 0) {
1879 // re-lock the pair now that we have the document-id
1881 hfs_lockpair(cp
, to_cp
, HFS_EXCLUSIVE_LOCK
);
1882 f_extinfo
->document_id
= new_id
;
1884 goto transfer_cleanup
;
1888 to_extinfo
->document_id
= f_extinfo
->document_id
;
1889 f_extinfo
->document_id
= 0;
1890 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1892 // make sure the destination is also UF_TRACKED
1893 to_cp
->c_bsdflags
|= UF_TRACKED
;
1894 cp
->c_bsdflags
&= ~UF_TRACKED
;
1896 // mark the cnodes dirty
1897 cp
->c_flag
|= C_MODIFIED
| C_FORCEUPDATE
;
1898 to_cp
->c_flag
|= C_MODIFIED
| C_FORCEUPDATE
;
1901 if ((error
= hfs_start_transaction(hfsmp
)) == 0) {
1903 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
1905 (void) cat_update(hfsmp
, &cp
->c_desc
, &cp
->c_attr
, NULL
, NULL
);
1906 (void) cat_update(hfsmp
, &to_cp
->c_desc
, &to_cp
->c_attr
, NULL
, NULL
);
1908 hfs_systemfile_unlock (hfsmp
, lockflags
);
1909 (void) hfs_end_transaction(hfsmp
);
1913 add_fsevent(FSE_DOCID_CHANGED
, context
,
1914 FSE_ARG_DEV
, hfsmp
->hfs_raw_dev
,
1915 FSE_ARG_INO
, (ino64_t
)cp
->c_fileid
, // src inode #
1916 FSE_ARG_INO
, (ino64_t
)to_cp
->c_fileid
, // dst inode #
1917 FSE_ARG_INT32
, to_extinfo
->document_id
,
1920 hfs_unlockpair(cp
, to_cp
); // unlock this so we can send the fsevents
1923 if (need_fsevent(FSE_STAT_CHANGED
, vp
)) {
1924 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, vp
, FSE_ARG_DONE
);
1926 if (need_fsevent(FSE_STAT_CHANGED
, to_vp
)) {
1927 add_fsevent(FSE_STAT_CHANGED
, context
, FSE_ARG_VNODE
, to_vp
, FSE_ARG_DONE
);
1930 hfs_unlockpair(cp
, to_cp
); // unlock this so we can send the fsevents
1936 hfs_unlockpair(cp
, to_cp
);
1954 /* Caller must be owner of file system. */
1955 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1956 if (suser(cred
, NULL
) &&
1957 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1960 /* Target vnode must be file system's root. */
1961 if (!vnode_isvroot(vp
)) {
1964 linkfileid
= *(cnid_t
*)ap
->a_data
;
1965 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1968 if ((error
= hfs_lookup_siblinglinks(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1971 if (ap
->a_command
== HFS_NEXT_LINK
) {
1972 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1974 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1979 case HFS_RESIZE_PROGRESS
: {
1981 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1982 if (suser(cred
, NULL
) &&
1983 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1984 return (EACCES
); /* must be owner of file system */
1986 if (!vnode_isvroot(vp
)) {
1989 /* file system must not be mounted read-only */
1990 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1994 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1997 case HFS_RESIZE_VOLUME
: {
2001 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2002 if (suser(cred
, NULL
) &&
2003 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2004 return (EACCES
); /* must be owner of file system */
2006 if (!vnode_isvroot(vp
)) {
2010 /* filesystem must not be mounted read only */
2011 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2014 newsize
= *(u_int64_t
*)ap
->a_data
;
2015 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
2017 if (newsize
> cursize
) {
2018 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
2019 } else if (newsize
< cursize
) {
2020 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
2025 case HFS_CHANGE_NEXT_ALLOCATION
: {
2026 int error
= 0; /* Assume success */
2029 if (vnode_vfsisrdonly(vp
)) {
2032 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2033 if (suser(cred
, NULL
) &&
2034 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2035 return (EACCES
); /* must be owner of file system */
2037 if (!vnode_isvroot(vp
)) {
2040 hfs_lock_mount(hfsmp
);
2041 location
= *(u_int32_t
*)ap
->a_data
;
2042 if ((location
>= hfsmp
->allocLimit
) &&
2043 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
2045 goto fail_change_next_allocation
;
2047 /* Return previous value. */
2048 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
2049 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
2050 /* On magic value for location, set nextAllocation to next block
2051 * after metadata zone and set flag in mount structure to indicate
2052 * that nextAllocation should not be updated again.
2054 if (hfsmp
->hfs_metazone_end
!= 0) {
2055 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
2057 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
2059 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
2060 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
2062 MarkVCBDirty(hfsmp
);
2063 fail_change_next_allocation
:
2064 hfs_unlock_mount(hfsmp
);
2069 case HFS_SETBACKINGSTOREINFO
: {
2070 struct vnode
* bsfs_rootvp
;
2071 struct vnode
* di_vp
;
2072 struct hfs_backingstoreinfo
*bsdata
;
2075 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2078 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
2081 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2082 if (suser(cred
, NULL
) &&
2083 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2084 return (EACCES
); /* must be owner of file system */
2086 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
2087 if (bsdata
== NULL
) {
2090 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
2093 if ((error
= vnode_getwithref(di_vp
))) {
2094 file_drop(bsdata
->backingfd
);
2098 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
2099 (void)vnode_put(di_vp
);
2100 file_drop(bsdata
->backingfd
);
2105 * Obtain the backing fs root vnode and keep a reference
2106 * on it. This reference will be dropped in hfs_unmount.
2108 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
2110 (void)vnode_put(di_vp
);
2111 file_drop(bsdata
->backingfd
);
2114 vnode_ref(bsfs_rootvp
);
2115 vnode_put(bsfs_rootvp
);
2117 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
2119 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
2120 /* The free extent cache is managed differently for sparse devices.
2121 * There is a window between which the volume is mounted and the
2122 * device is marked as sparse, so the free extent cache for this
2123 * volume is currently initialized as normal volume (sorted by block
2124 * count). Reset the cache so that it will be rebuilt again
2125 * for sparse device (sorted by start block).
2127 ResetVCBFreeExtCache(hfsmp
);
2129 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
2130 hfsmp
->hfs_sparsebandblks
*= 4;
2132 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
2135 * If the sparse image is on a sparse image file (as opposed to a sparse
2136 * bundle), then we may need to limit the free space to the maximum size
2137 * of a file on that volume. So we query (using pathconf), and if we get
2138 * a meaningful result, we cache the number of blocks for later use in
2141 hfsmp
->hfs_backingfs_maxblocks
= 0;
2142 if (vnode_vtype(di_vp
) == VREG
) {
2145 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
2146 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
2147 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
2149 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
2153 (void)vnode_put(di_vp
);
2154 file_drop(bsdata
->backingfd
);
2157 case HFS_CLRBACKINGSTOREINFO
: {
2158 struct vnode
* tmpvp
;
2160 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2161 if (suser(cred
, NULL
) &&
2162 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2163 return (EACCES
); /* must be owner of file system */
2165 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2169 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
2170 hfsmp
->hfs_backingfs_rootvp
) {
2172 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
2173 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
2174 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
2175 hfsmp
->hfs_sparsebandblks
= 0;
2180 #endif /* HFS_SPARSE_DEV */
2182 /* Change the next CNID stored in the VH */
2183 case HFS_CHANGE_NEXTCNID
: {
2184 int error
= 0; /* Assume success */
2189 if (vnode_vfsisrdonly(vp
)) {
2192 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
2193 if (suser(cred
, NULL
) &&
2194 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
2195 return (EACCES
); /* must be owner of file system */
2198 fileid
= *(u_int32_t
*)ap
->a_data
;
2200 /* Must have catalog lock excl. to advance the CNID pointer */
2201 lockflags
= hfs_systemfile_lock (hfsmp
, SFL_CATALOG
, HFS_EXCLUSIVE_LOCK
);
2203 hfs_lock_mount(hfsmp
);
2205 /* If it is less than the current next CNID, force the wraparound bit to be set */
2206 if (fileid
< hfsmp
->vcbNxtCNID
) {
2210 /* Return previous value. */
2211 *(u_int32_t
*)ap
->a_data
= hfsmp
->vcbNxtCNID
;
2213 hfsmp
->vcbNxtCNID
= fileid
;
2216 hfsmp
->vcbAtrb
|= kHFSCatalogNodeIDsReusedMask
;
2219 MarkVCBDirty(hfsmp
);
2220 hfs_unlock_mount(hfsmp
);
2221 hfs_systemfile_unlock (hfsmp
, lockflags
);
2229 mp
= vnode_mount(vp
);
2230 hfsmp
= VFSTOHFS(mp
);
2235 vfsp
= vfs_statfs(mp
);
2237 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2238 !kauth_cred_issuser(cred
))
2241 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
2243 // flush things before we get started to try and prevent
2244 // dirty data from being paged out while we're frozen.
2245 // note: can't do this after taking the lock as it will
2246 // deadlock against ourselves.
2247 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
2248 hfs_lock_global (hfsmp
, HFS_EXCLUSIVE_LOCK
);
2250 // DO NOT call hfs_journal_flush() because that takes a
2251 // shared lock on the global exclusive lock!
2252 journal_flush(hfsmp
->jnl
, TRUE
);
2254 // don't need to iterate on all vnodes, we just need to
2255 // wait for writes to the system files and the device vnode
2257 // Now that journal flush waits for all metadata blocks to
2258 // be written out, waiting for btree writes is probably no
2260 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
2261 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
2262 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
2263 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
2264 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
2265 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
2266 if (hfsmp
->hfs_attribute_vp
)
2267 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
2268 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
2270 hfsmp
->hfs_freezing_proc
= current_proc();
2276 vfsp
= vfs_statfs(vnode_mount(vp
));
2277 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
2278 !kauth_cred_issuser(cred
))
2281 // if we're not the one who froze the fs then we
2283 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
2287 // NOTE: if you add code here, also go check the
2288 // code that "thaws" the fs in hfs_vnop_close()
2290 hfsmp
->hfs_freezing_proc
= NULL
;
2291 hfs_unlock_global (hfsmp
);
2292 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
2297 case HFS_BULKACCESS_FSCTL
: {
2300 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2305 size
= sizeof(struct user64_access_t
);
2307 size
= sizeof(struct user32_access_t
);
2310 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2313 case HFS_EXT_BULKACCESS_FSCTL
: {
2316 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
2321 size
= sizeof(struct user64_ext_access_t
);
2323 size
= sizeof(struct user32_ext_access_t
);
2326 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
2329 case HFS_SET_XATTREXTENTS_STATE
: {
2332 if (ap
->a_data
== NULL
) {
2336 state
= *(int *)ap
->a_data
;
2338 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2342 /* Super-user can enable or disable extent-based extended
2343 * attribute support on a volume
2344 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2345 * are enabled by default, so any change will be transient only
2346 * till the volume is remounted.
2348 if (!kauth_cred_issuser(kauth_cred_get())) {
2351 if (state
== 0 || state
== 1)
2352 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
2357 case F_SETSTATICCONTENT
: {
2359 int enable_static
= 0;
2360 struct cnode
*cp
= NULL
;
2362 * lock the cnode, decorate the cnode flag, and bail out.
2363 * VFS should have already authenticated the caller for us.
2368 * Note that even though ap->a_data is of type caddr_t,
2369 * the fcntl layer at the syscall handler will pass in NULL
2370 * or 1 depending on what the argument supplied to the fcntl
2371 * was. So it is in fact correct to check the ap->a_data
2372 * argument for zero or non-zero value when deciding whether or not
2373 * to enable the static bit in the cnode.
2377 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2382 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2384 if (enable_static
) {
2385 cp
->c_flag
|= C_SSD_STATIC
;
2388 cp
->c_flag
&= ~C_SSD_STATIC
;
2395 case F_SET_GREEDY_MODE
: {
2397 int enable_greedy_mode
= 0;
2398 struct cnode
*cp
= NULL
;
2400 * lock the cnode, decorate the cnode flag, and bail out.
2401 * VFS should have already authenticated the caller for us.
2406 * Note that even though ap->a_data is of type caddr_t,
2407 * the fcntl layer at the syscall handler will pass in NULL
2408 * or 1 depending on what the argument supplied to the fcntl
2409 * was. So it is in fact correct to check the ap->a_data
2410 * argument for zero or non-zero value when deciding whether or not
2411 * to enable the greedy mode bit in the cnode.
2413 enable_greedy_mode
= 1;
2415 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2420 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2422 if (enable_greedy_mode
) {
2423 cp
->c_flag
|= C_SSD_GREEDY_MODE
;
2426 cp
->c_flag
&= ~C_SSD_GREEDY_MODE
;
2433 case F_MAKECOMPRESSED
: {
2435 uint32_t gen_counter
;
2436 struct cnode
*cp
= NULL
;
2437 int reset_decmp
= 0;
2439 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2444 * acquire & lock the cnode.
2445 * VFS should have already authenticated the caller for us.
2450 * Cast the pointer into a uint32_t so we can extract the
2451 * supplied generation counter.
2453 gen_counter
= *((uint32_t*)ap
->a_data
);
2461 /* Grab truncate lock first; we may truncate the file */
2462 hfs_lock_truncate (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2464 error
= hfs_lock (cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2466 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2470 /* Are there any other usecounts/FDs? */
2471 if (vnode_isinuse(vp
, 1)) {
2473 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
2478 /* now we have the cnode locked down; Validate arguments */
2479 if (cp
->c_attr
.ca_flags
& (UF_IMMUTABLE
| UF_COMPRESSED
)) {
2480 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2482 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2486 if ((hfs_get_gencount (cp
)) == gen_counter
) {
2488 * OK, the gen_counter matched. Go for it:
2489 * Toggle state bits, truncate file, and suppress mtime update
2492 cp
->c_bsdflags
|= UF_COMPRESSED
;
2494 error
= hfs_truncate(vp
, 0, IO_NDELAY
, 0, (HFS_TRUNCATE_SKIPTIMES
), ap
->a_context
);
2500 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2504 * Reset the decmp state while still holding the truncate lock. We need to
2505 * serialize here against a listxattr on this node which may occur at any
2508 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2509 * that will still potentially require getting the com.apple.decmpfs EA. If the
2510 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2511 * generic(through VFS), and can't pass along any info telling it that we're already
2512 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2513 * and trying to fill in the hfs_file_is_compressed info during the callback
2514 * operation, which will result in deadlock against the b-tree node.
2516 * So, to serialize against listxattr (which will grab buf_t meta references on
2517 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2520 if ((reset_decmp
) && (error
== 0)) {
2521 decmpfs_cnode
*dp
= VTOCMP (vp
);
2523 decmpfs_cnode_set_vnode_state(dp
, FILE_TYPE_UNKNOWN
, 0);
2526 /* Initialize the decmpfs node as needed */
2527 (void) hfs_file_is_compressed (cp
, 0); /* ok to take lock */
2530 hfs_unlock_truncate (cp
, HFS_LOCK_DEFAULT
);
2536 case F_SETBACKINGSTORE
: {
2541 * See comment in F_SETSTATICCONTENT re: using
2542 * a null check for a_data
2545 error
= hfs_set_backingstore (vp
, 1);
2548 error
= hfs_set_backingstore (vp
, 0);
2554 case F_GETPATH_MTMINFO
: {
2557 int *data
= (int*) ap
->a_data
;
2559 /* Ask if this is a backingstore vnode */
2560 error
= hfs_is_backingstore (vp
, data
);
2568 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2571 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2573 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
2574 hfs_unlock(VTOC(vp
));
2581 register struct cnode
*cp
;
2584 if (!vnode_isreg(vp
))
2587 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2591 * used by regression test to determine if
2592 * all the dirty pages (via write) have been cleaned
2593 * after a call to 'fsysnc'.
2595 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
2602 register struct radvisory
*ra
;
2603 struct filefork
*fp
;
2606 if (!vnode_isreg(vp
))
2609 ra
= (struct radvisory
*)(ap
->a_data
);
2612 /* Protect against a size change. */
2613 hfs_lock_truncate(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
2616 if (compressed
&& (uncompressed_size
== -1)) {
2617 /* fetching the uncompressed size failed above, so return the error */
2618 error
= decmpfs_error
;
2619 } else if ((compressed
&& (ra
->ra_offset
>= uncompressed_size
)) ||
2620 (!compressed
&& (ra
->ra_offset
>= fp
->ff_size
))) {
2623 #else /* HFS_COMPRESSION */
2624 if (ra
->ra_offset
>= fp
->ff_size
) {
2627 #endif /* HFS_COMPRESSION */
2629 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
2632 hfs_unlock_truncate(VTOC(vp
), HFS_LOCK_DEFAULT
);
2636 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2639 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2642 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2647 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME
:
2648 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2651 case SPOTLIGHT_FSCTL_GET_LAST_MTIME
:
2652 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2655 case HFS_FSCTL_GET_VERY_LOW_DISK
:
2656 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_dangerlimit
;
2659 case HFS_FSCTL_SET_VERY_LOW_DISK
:
2660 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2664 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2667 case HFS_FSCTL_GET_LOW_DISK
:
2668 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_warninglimit
;
2671 case HFS_FSCTL_SET_LOW_DISK
:
2672 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2673 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2678 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2681 case HFS_FSCTL_GET_DESIRED_DISK
:
2682 *(uint32_t*)ap
->a_data
= hfsmp
->hfs_freespace_notify_desiredlevel
;
2685 case HFS_FSCTL_SET_DESIRED_DISK
:
2686 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2690 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2693 case HFS_VOLUME_STATUS
:
2694 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2697 case HFS_SET_BOOT_INFO
:
2698 if (!vnode_isvroot(vp
))
2700 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2701 return(EACCES
); /* must be superuser or owner of filesystem */
2702 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2705 hfs_lock_mount (hfsmp
);
2706 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2707 hfs_unlock_mount (hfsmp
);
2708 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
2711 case HFS_GET_BOOT_INFO
:
2712 if (!vnode_isvroot(vp
))
2714 hfs_lock_mount (hfsmp
);
2715 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2716 hfs_unlock_mount(hfsmp
);
2719 case HFS_MARK_BOOT_CORRUPT
:
2720 /* Mark the boot volume corrupt by setting
2721 * kHFSVolumeInconsistentBit in the volume header. This will
2722 * force fsck_hfs on next mount.
2724 if (!kauth_cred_issuser(kauth_cred_get())) {
2728 /* Allowed only on the root vnode of the boot volume */
2729 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2730 !vnode_isvroot(vp
)) {
2733 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2736 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2737 hfs_mark_volume_inconsistent(hfsmp
);
2740 case HFS_FSCTL_GET_JOURNAL_INFO
:
2741 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2746 if (hfsmp
->jnl
== NULL
) {
2750 jnl_start
= (off_t
)(hfsmp
->jnl_start
* HFSTOVCB(hfsmp
)->blockSize
) + (off_t
)HFSTOVCB(hfsmp
)->hfsPlusIOPosOffset
;
2751 jnl_size
= (off_t
)hfsmp
->jnl_size
;
2754 jip
->jstart
= jnl_start
;
2755 jip
->jsize
= jnl_size
;
2758 case HFS_SET_ALWAYS_ZEROFILL
: {
2759 struct cnode
*cp
= VTOC(vp
);
2761 if (*(int *)ap
->a_data
) {
2762 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2764 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2769 case HFS_DISABLE_METAZONE
: {
2770 /* Only root can disable metadata zone */
2771 if (!kauth_cred_issuser(kauth_cred_get())) {
2774 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2778 /* Disable metadata zone now */
2779 (void) hfs_metadatazone_init(hfsmp
, true);
2780 printf ("hfs: Disabling metadata zone on %s\n", hfsmp
->vcbVN
);
2795 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2797 struct vnop_select_args {
2802 vfs_context_t a_context;
2807 * We should really check to see if I/O is possible.
2813 * Converts a logical block number to a physical block, and optionally returns
2814 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2815 * The physical block number is based on the device block size, currently its 512.
2816 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2819 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2821 struct filefork
*fp
= VTOF(vp
);
2822 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2823 int retval
= E_NONE
;
2824 u_int32_t logBlockSize
;
2825 size_t bytesContAvail
= 0;
2826 off_t blockposition
;
2831 * Check for underlying vnode requests and ensure that logical
2832 * to physical mapping is requested.
2835 *vpp
= hfsmp
->hfs_devvp
;
2839 logBlockSize
= GetLogicalBlockSize(vp
);
2840 blockposition
= (off_t
)bn
* logBlockSize
;
2842 lockExtBtree
= overflow_extents(fp
);
2845 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2847 retval
= MacToVFSError(
2848 MapFileBlockC (HFSTOVCB(hfsmp
),
2856 hfs_systemfile_unlock(hfsmp
, lockflags
);
2858 if (retval
== E_NONE
) {
2859 /* Figure out how many read ahead blocks there are */
2861 if (can_cluster(logBlockSize
)) {
2862 /* Make sure this result never goes negative: */
2863 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2873 * Convert logical block number to file offset.
2876 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
2878 struct vnop_blktooff_args {
2885 if (ap
->a_vp
== NULL
)
2887 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2893 * Convert file offset to logical block number.
2896 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
2898 struct vnop_offtoblk_args {
2901 daddr64_t *a_lblkno;
2905 if (ap
->a_vp
== NULL
)
2907 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2913 * Map file offset to physical block number.
2915 * If this function is called for write operation, and if the file
2916 * had virtual blocks allocated (delayed allocation), real blocks
2917 * are allocated by calling ExtendFileC().
2919 * If this function is called for read operation, and if the file
2920 * had virtual blocks allocated (delayed allocation), no change
2921 * to the size of file is done, and if required, rangelist is
2922 * searched for mapping.
2924 * System file cnodes are expected to be locked (shared or exclusive).
2927 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
2929 struct vnop_blockmap_args {
2937 vfs_context_t a_context;
2941 struct vnode
*vp
= ap
->a_vp
;
2943 struct filefork
*fp
;
2944 struct hfsmount
*hfsmp
;
2945 size_t bytesContAvail
= 0;
2946 int retval
= E_NONE
;
2949 struct rl_entry
*invalid_range
;
2950 enum rl_overlaptype overlaptype
;
2955 if (VNODE_IS_RSRC(vp
)) {
2956 /* allow blockmaps to the resource fork */
2958 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
2959 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
2961 case FILE_IS_COMPRESSED
:
2963 case FILE_IS_CONVERTING
:
2964 /* if FILE_IS_CONVERTING, we allow blockmap */
2967 printf("invalid state %d for compressed file\n", state
);
2972 #endif /* HFS_COMPRESSION */
2974 /* Do not allow blockmap operation on a directory */
2975 if (vnode_isdir(vp
)) {
2980 * Check for underlying vnode requests and ensure that logical
2981 * to physical mapping is requested.
2983 if (ap
->a_bpn
== NULL
)
2986 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2987 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2988 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
2997 /* Check virtual blocks only when performing write operation */
2998 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2999 if (hfs_start_transaction(hfsmp
) != 0) {
3005 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
3007 } else if (overflow_extents(fp
)) {
3008 syslocks
= SFL_EXTENTS
;
3012 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
3015 * Check for any delayed allocations.
3017 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
3019 u_int32_t loanedBlocks
;
3022 // Make sure we have a transaction. It's possible
3023 // that we came in and fp->ff_unallocblocks was zero
3024 // but during the time we blocked acquiring the extents
3025 // btree, ff_unallocblocks became non-zero and so we
3026 // will need to start a transaction.
3028 if (started_tr
== 0) {
3030 hfs_systemfile_unlock(hfsmp
, lockflags
);
3037 * Note: ExtendFileC will Release any blocks on loan and
3038 * aquire real blocks. So we ask to extend by zero bytes
3039 * since ExtendFileC will account for the virtual blocks.
3042 loanedBlocks
= fp
->ff_unallocblocks
;
3043 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
3044 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
3047 fp
->ff_unallocblocks
= loanedBlocks
;
3048 cp
->c_blocks
+= loanedBlocks
;
3049 fp
->ff_blocks
+= loanedBlocks
;
3051 hfs_lock_mount (hfsmp
);
3052 hfsmp
->loanedBlocks
+= loanedBlocks
;
3053 hfs_unlock_mount (hfsmp
);
3055 hfs_systemfile_unlock(hfsmp
, lockflags
);
3056 cp
->c_flag
|= C_MODIFIED
;
3058 (void) hfs_update(vp
, TRUE
);
3059 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3061 hfs_end_transaction(hfsmp
);
3068 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
3069 ap
->a_bpn
, &bytesContAvail
);
3071 hfs_systemfile_unlock(hfsmp
, lockflags
);
3076 (void) hfs_update(vp
, TRUE
);
3077 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3078 hfs_end_transaction(hfsmp
);
3082 /* On write, always return error because virtual blocks, if any,
3083 * should have been allocated in ExtendFileC(). We do not
3084 * allocate virtual blocks on read, therefore return error
3085 * only if no virtual blocks are allocated. Otherwise we search
3086 * rangelist for zero-fills
3088 if ((MacToVFSError(retval
) != ERANGE
) ||
3089 (ap
->a_flags
& VNODE_WRITE
) ||
3090 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
3094 /* Validate if the start offset is within logical file size */
3095 if (ap
->a_foffset
>= fp
->ff_size
) {
3100 * At this point, we have encountered a failure during
3101 * MapFileBlockC that resulted in ERANGE, and we are not servicing
3102 * a write, and there are borrowed blocks.
3104 * However, the cluster layer will not call blockmap for
3105 * blocks that are borrowed and in-cache. We have to assume that
3106 * because we observed ERANGE being emitted from MapFileBlockC, this
3107 * extent range is not valid on-disk. So we treat this as a
3108 * mapping that needs to be zero-filled prior to reading.
3110 * Note that under certain circumstances (such as non-contiguous
3111 * userland VM mappings in the calling process), cluster_io
3112 * may be forced to split a large I/O driven by hfs_vnop_write
3113 * into multiple sub-I/Os that necessitate a RMW cycle. If this is
3114 * the case here, then we have already removed the invalid range list
3115 * mapping prior to getting to this blockmap call, so we should not
3116 * search the invalid rangelist for this byte range.
3119 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
3121 * Clip the contiguous available bytes to, at most, the allowable
3122 * maximum or the amount requested.
3125 if (bytesContAvail
> ap
->a_size
) {
3126 bytesContAvail
= ap
->a_size
;
3129 *ap
->a_bpn
= (daddr64_t
) -1;
3135 /* MapFileC() found a valid extent in the filefork. Search the
3136 * mapping information further for invalid file ranges
3138 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
3139 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
3141 if (overlaptype
!= RL_NOOVERLAP
) {
3142 switch(overlaptype
) {
3143 case RL_MATCHINGOVERLAP
:
3144 case RL_OVERLAPCONTAINSRANGE
:
3145 case RL_OVERLAPSTARTSBEFORE
:
3146 /* There's no valid block for this byte offset */
3147 *ap
->a_bpn
= (daddr64_t
)-1;
3148 /* There's no point limiting the amount to be returned
3149 * if the invalid range that was hit extends all the way
3150 * to the EOF (i.e. there's no valid bytes between the
3151 * end of this range and the file's EOF):
3153 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3154 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3155 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3159 case RL_OVERLAPISCONTAINED
:
3160 case RL_OVERLAPENDSAFTER
:
3161 /* The range of interest hits an invalid block before the end: */
3162 if (invalid_range
->rl_start
== ap
->a_foffset
) {
3163 /* There's actually no valid information to be had starting here: */
3164 *ap
->a_bpn
= (daddr64_t
)-1;
3165 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
3166 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
3167 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
3170 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
3177 if (bytesContAvail
> ap
->a_size
)
3178 bytesContAvail
= ap
->a_size
;
3184 *ap
->a_run
= bytesContAvail
;
3187 *(int *)ap
->a_poff
= 0;
3193 return (MacToVFSError(retval
));
3197 * prepare and issue the I/O
3198 * buf_strategy knows how to deal
3199 * with requests that require
3203 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
3205 buf_t bp
= ap
->a_bp
;
3206 vnode_t vp
= buf_vnode(bp
);
3209 /* Mark buffer as containing static data if cnode flag set */
3210 if (VTOC(vp
)->c_flag
& C_SSD_STATIC
) {
3214 /* Mark buffer as containing static data if cnode flag set */
3215 if (VTOC(vp
)->c_flag
& C_SSD_GREEDY_MODE
) {
3216 bufattr_markgreedymode((bufattr_t
)(&bp
->b_attr
));
3222 if ((cp
= cp_get_protected_cnode(vp
)) != NULL
) {
3224 * We rely upon the truncate lock to protect the
3225 * CP cache key from getting tossed prior to our IO finishing here.
3226 * Nearly all cluster io calls to manipulate file payload from HFS
3227 * take the truncate lock before calling into the cluster
3228 * layer to ensure the file size does not change, or that they
3229 * have exclusive right to change the EOF of the file.
3230 * That same guarantee protects us here since the code that
3231 * deals with CP lock events must now take the truncate lock
3232 * before doing anything.
3234 * There is 1 exception here:
3235 * 1) One exception should be the VM swapfile IO, because HFS will
3236 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
3237 * swapfile code only without holding the truncate lock. This is because
3238 * individual swapfiles are maintained at fixed-length sizes by the VM code.
3239 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
3240 * create our own UPL and thus take the truncate lock before calling
3241 * into the cluster layer. In that case, however, we are not concerned
3242 * with the CP blob being wiped out in the middle of the IO
3243 * because there isn't anything to toss; the VM swapfile key stays
3244 * in-core as long as the file is open.
3247 * For filesystem resize, we may not have access to the underlying
3248 * file's cache key for whatever reason (device may be locked). However,
3249 * we do not need it since we are going to use the temporary HFS-wide resize key
3250 * which is generated once we start relocating file content. If this file's I/O
3251 * should be done using the resize key, it will have been supplied already, so
3252 * do not attach the file's cp blob to the buffer.
3254 if ((cp
->c_cpentry
->cp_flags
& CP_RELOCATION_INFLIGHT
) == 0) {
3255 buf_setcpaddr(bp
, cp
->c_cpentry
);
3258 #endif /* CONFIG_PROTECT */
3260 error
= buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
);
3266 hfs_minorupdate(struct vnode
*vp
) {
3267 struct cnode
*cp
= VTOC(vp
);
3268 cp
->c_flag
&= ~C_MODIFIED
;
3269 cp
->c_touch_acctime
= 0;
3270 cp
->c_touch_chgtime
= 0;
3271 cp
->c_touch_modtime
= 0;
3277 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int truncateflags
, vfs_context_t context
)
3279 register struct cnode
*cp
= VTOC(vp
);
3280 struct filefork
*fp
= VTOF(vp
);
3281 struct proc
*p
= vfs_context_proc(context
);;
3282 kauth_cred_t cred
= vfs_context_ucred(context
);
3285 off_t actualBytesAdded
;
3287 u_int32_t fileblocks
;
3289 struct hfsmount
*hfsmp
;
3291 int skipupdate
= (truncateflags
& HFS_TRUNCATE_SKIPUPDATE
);
3292 int suppress_times
= (truncateflags
& HFS_TRUNCATE_SKIPTIMES
);
3294 blksize
= VTOVCB(vp
)->blockSize
;
3295 fileblocks
= fp
->ff_blocks
;
3296 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3298 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
3299 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3304 /* This should only happen with a corrupt filesystem */
3305 if ((off_t
)fp
->ff_size
< 0)
3308 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
3315 /* Files that are changing size are not hot file candidates. */
3316 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
3317 fp
->ff_bytesread
= 0;
3321 * We cannot just check if fp->ff_size == length (as an optimization)
3322 * since there may be extra physical blocks that also need truncation.
3325 if ((retval
= hfs_getinoquota(cp
)))
3330 * Lengthen the size of the file. We must ensure that the
3331 * last byte of the file is allocated. Since the smallest
3332 * value of ff_size is 0, length will be at least 1.
3334 if (length
> (off_t
)fp
->ff_size
) {
3336 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
3342 * If we don't have enough physical space then
3343 * we need to extend the physical size.
3345 if (length
> filebytes
) {
3347 u_int32_t blockHint
= 0;
3349 /* All or nothing and don't round up to clumpsize. */
3350 eflags
= kEFAllMask
| kEFNoClumpMask
;
3352 if (cred
&& suser(cred
, NULL
) != 0)
3353 eflags
|= kEFReserveMask
; /* keep a reserve */
3356 * Allocate Journal and Quota files in metadata zone.
3358 if (filebytes
== 0 &&
3359 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
3360 hfs_virtualmetafile(cp
)) {
3361 eflags
|= kEFMetadataMask
;
3362 blockHint
= hfsmp
->hfs_metazone_start
;
3364 if (hfs_start_transaction(hfsmp
) != 0) {
3369 /* Protect extents b-tree and allocation bitmap */
3370 lockflags
= SFL_BITMAP
;
3371 if (overflow_extents(fp
))
3372 lockflags
|= SFL_EXTENTS
;
3373 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3375 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3376 bytesToAdd
= length
- filebytes
;
3377 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
3382 &actualBytesAdded
));
3384 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3385 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
3386 if (length
> filebytes
)
3392 hfs_systemfile_unlock(hfsmp
, lockflags
);
3396 (void) hfs_minorupdate(vp
);
3399 (void) hfs_update(vp
, TRUE
);
3400 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3404 hfs_end_transaction(hfsmp
);
3409 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
3410 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
3413 if (!(flags
& IO_NOZEROFILL
)) {
3414 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
3415 struct rl_entry
*invalid_range
;
3418 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
3419 if (length
< zero_limit
) zero_limit
= length
;
3421 if (length
> (off_t
)fp
->ff_size
) {
3424 /* Extending the file: time to fill out the current last page w. zeroes? */
3425 if ((fp
->ff_size
& PAGE_MASK_64
) &&
3426 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
3427 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
3429 /* There's some valid data at the start of the (current) last page
3430 of the file, so zero out the remainder of that page to ensure the
3431 entire page contains valid data. Since there is no invalid range
3432 possible past the (current) eof, there's no need to remove anything
3433 from the invalid range list before calling cluster_write(): */
3435 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
3436 fp
->ff_size
, (off_t
)0,
3437 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
3438 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
3439 if (retval
) goto Err_Exit
;
3441 /* Merely invalidate the remaining area, if necessary: */
3442 if (length
> zero_limit
) {
3444 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
3445 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3448 /* The page containing the (current) eof is invalid: just add the
3449 remainder of the page to the invalid list, along with the area
3450 being newly allocated:
3453 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
3454 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
3458 panic("hfs_truncate: invoked on non-UBC object?!");
3461 if (suppress_times
== 0) {
3462 cp
->c_touch_modtime
= TRUE
;
3464 fp
->ff_size
= length
;
3466 } else { /* Shorten the size of the file */
3468 if ((off_t
)fp
->ff_size
> length
) {
3469 /* Any space previously marked as invalid is now irrelevant: */
3470 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3474 * Account for any unmapped blocks. Note that the new
3475 * file length can still end up with unmapped blocks.
3477 if (fp
->ff_unallocblocks
> 0) {
3478 u_int32_t finalblks
;
3479 u_int32_t loanedBlocks
;
3481 hfs_lock_mount(hfsmp
);
3482 loanedBlocks
= fp
->ff_unallocblocks
;
3483 cp
->c_blocks
-= loanedBlocks
;
3484 fp
->ff_blocks
-= loanedBlocks
;
3485 fp
->ff_unallocblocks
= 0;
3487 hfsmp
->loanedBlocks
-= loanedBlocks
;
3489 finalblks
= (length
+ blksize
- 1) / blksize
;
3490 if (finalblks
> fp
->ff_blocks
) {
3491 /* calculate required unmapped blocks */
3492 loanedBlocks
= finalblks
- fp
->ff_blocks
;
3493 hfsmp
->loanedBlocks
+= loanedBlocks
;
3495 fp
->ff_unallocblocks
= loanedBlocks
;
3496 cp
->c_blocks
+= loanedBlocks
;
3497 fp
->ff_blocks
+= loanedBlocks
;
3499 hfs_unlock_mount (hfsmp
);
3503 * For a TBE process the deallocation of the file blocks is
3504 * delayed until the file is closed. And hfs_close calls
3505 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
3506 * isn't set, we make sure this isn't a TBE process.
3508 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
3510 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
3512 if (hfs_start_transaction(hfsmp
) != 0) {
3517 if (fp
->ff_unallocblocks
== 0) {
3518 /* Protect extents b-tree and allocation bitmap */
3519 lockflags
= SFL_BITMAP
;
3520 if (overflow_extents(fp
))
3521 lockflags
|= SFL_EXTENTS
;
3522 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3524 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
), (FCB
*)fp
, length
, 0,
3525 FORK_IS_RSRC (fp
), FTOC(fp
)->c_fileid
, false));
3527 hfs_systemfile_unlock(hfsmp
, lockflags
);
3531 fp
->ff_size
= length
;
3534 (void) hfs_minorupdate(vp
);
3537 (void) hfs_update(vp
, TRUE
);
3538 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3541 hfs_end_transaction(hfsmp
);
3543 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
3547 /* These are bytesreleased */
3548 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
3552 * Only set update flag if the logical length changes & we aren't
3553 * suppressing modtime updates.
3555 if (((off_t
)fp
->ff_size
!= length
) && (suppress_times
== 0)) {
3556 cp
->c_touch_modtime
= TRUE
;
3558 fp
->ff_size
= length
;
3560 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
3561 if (!vfs_context_issuser(context
)) {
3562 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3567 retval
= hfs_minorupdate(vp
);
3570 cp
->c_touch_chgtime
= TRUE
; /* status changed */
3571 if (suppress_times
== 0) {
3572 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
3575 * If we are not suppressing the modtime update, then
3576 * update the gen count as well.
3578 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK (cp
->c_attr
.ca_mode
)) {
3579 hfs_incr_gencount(cp
);
3583 retval
= hfs_update(vp
, MNT_WAIT
);
3586 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
3587 -1, -1, -1, retval
, 0);
3592 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
3593 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
3599 * Preparation which must be done prior to deleting the catalog record
3600 * of a file or directory. In order to make the on-disk as safe as possible,
3601 * we remove the catalog entry before releasing the bitmap blocks and the
3602 * overflow extent records. However, some work must be done prior to deleting
3603 * the catalog record.
3605 * When calling this function, the cnode must exist both in memory and on-disk.
3606 * If there are both resource fork and data fork vnodes, this function should
3607 * be called on both.
3611 hfs_prepare_release_storage (struct hfsmount
*hfsmp
, struct vnode
*vp
) {
3613 struct filefork
*fp
= VTOF(vp
);
3614 struct cnode
*cp
= VTOC(vp
);
3619 /* Cannot truncate an HFS directory! */
3620 if (vnode_isdir(vp
)) {
3625 * See the comment below in hfs_truncate for why we need to call
3626 * setsize here. Essentially we want to avoid pending IO if we
3627 * already know that the blocks are going to be released here.
3628 * This function is only called when totally removing all storage for a file, so
3629 * we can take a shortcut and immediately setsize (0);
3633 /* This should only happen with a corrupt filesystem */
3634 if ((off_t
)fp
->ff_size
< 0)
3638 * We cannot just check if fp->ff_size == length (as an optimization)
3639 * since there may be extra physical blocks that also need truncation.
3642 if ((retval
= hfs_getinoquota(cp
))) {
3647 /* Wipe out any invalid ranges which have yet to be backed by disk */
3648 rl_remove(0, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
3651 * Account for any unmapped blocks. Since we're deleting the
3652 * entire file, we don't have to worry about just shrinking
3653 * to a smaller number of borrowed blocks.
3655 if (fp
->ff_unallocblocks
> 0) {
3656 u_int32_t loanedBlocks
;
3658 hfs_lock_mount (hfsmp
);
3659 loanedBlocks
= fp
->ff_unallocblocks
;
3660 cp
->c_blocks
-= loanedBlocks
;
3661 fp
->ff_blocks
-= loanedBlocks
;
3662 fp
->ff_unallocblocks
= 0;
3664 hfsmp
->loanedBlocks
-= loanedBlocks
;
3666 hfs_unlock_mount (hfsmp
);
3674 * Special wrapper around calling TruncateFileC. This function is useable
3675 * even when the catalog record does not exist any longer, making it ideal
3676 * for use when deleting a file. The simplification here is that we know
3677 * that we are releasing all blocks.
3679 * Note that this function may be called when there is no vnode backing
3680 * the file fork in question. We may call this from hfs_vnop_inactive
3681 * to clear out resource fork data (and may not want to clear out the data
3682 * fork yet). As a result, we pointer-check both sets of inputs before
3683 * doing anything with them.
3685 * The caller is responsible for saving off a copy of the filefork(s)
3686 * embedded within the cnode prior to calling this function. The pointers
3687 * supplied as arguments must be valid even if the cnode is no longer valid.
3691 hfs_release_storage (struct hfsmount
*hfsmp
, struct filefork
*datafork
,
3692 struct filefork
*rsrcfork
, u_int32_t fileid
) {
3695 u_int32_t fileblocks
;
3700 blksize
= hfsmp
->blockSize
;
3703 if ((datafork
!= NULL
) && (datafork
->ff_blocks
> 0)) {
3704 fileblocks
= datafork
->ff_blocks
;
3705 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3707 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3709 while (filebytes
> 0) {
3710 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(datafork
)) {
3711 filebytes
-= HFS_BIGFILE_SIZE
;
3716 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3717 if (hfs_start_transaction(hfsmp
) != 0) {
3722 if (datafork
->ff_unallocblocks
== 0) {
3723 /* Protect extents b-tree and allocation bitmap */
3724 lockflags
= SFL_BITMAP
;
3725 if (overflow_extents(datafork
))
3726 lockflags
|= SFL_EXTENTS
;
3727 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3729 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), datafork
, filebytes
, 1, 0, fileid
, false));
3731 hfs_systemfile_unlock(hfsmp
, lockflags
);
3734 datafork
->ff_size
= filebytes
;
3736 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3738 /* Finish the transaction and start over if necessary */
3739 hfs_end_transaction(hfsmp
);
3748 if (error
== 0 && (rsrcfork
!= NULL
) && rsrcfork
->ff_blocks
> 0) {
3749 fileblocks
= rsrcfork
->ff_blocks
;
3750 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3752 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3754 while (filebytes
> 0) {
3755 if (filebytes
> HFS_BIGFILE_SIZE
&& overflow_extents(rsrcfork
)) {
3756 filebytes
-= HFS_BIGFILE_SIZE
;
3761 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3762 if (hfs_start_transaction(hfsmp
) != 0) {
3767 if (rsrcfork
->ff_unallocblocks
== 0) {
3768 /* Protect extents b-tree and allocation bitmap */
3769 lockflags
= SFL_BITMAP
;
3770 if (overflow_extents(rsrcfork
))
3771 lockflags
|= SFL_EXTENTS
;
3772 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3774 error
= MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp
), rsrcfork
, filebytes
, 1, 1, fileid
, false));
3776 hfs_systemfile_unlock(hfsmp
, lockflags
);
3779 rsrcfork
->ff_size
= filebytes
;
3781 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3783 /* Finish the transaction and start over if necessary */
3784 hfs_end_transaction(hfsmp
);
3797 * Truncate a cnode to at most length size, freeing (or adding) the
3801 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
3802 int truncateflags
, vfs_context_t context
)
3804 struct filefork
*fp
= VTOF(vp
);
3806 u_int32_t fileblocks
;
3807 int blksize
, error
= 0;
3808 struct cnode
*cp
= VTOC(vp
);
3810 /* Cannot truncate an HFS directory! */
3811 if (vnode_isdir(vp
)) {
3814 /* A swap file cannot change size. */
3815 if (vnode_isswap(vp
) && (length
!= 0)) {
3819 blksize
= VTOVCB(vp
)->blockSize
;
3820 fileblocks
= fp
->ff_blocks
;
3821 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
3824 // Have to do this here so that we don't wind up with
3825 // i/o pending for blocks that are about to be released
3826 // if we truncate the file.
3828 // If skipsetsize is set, then the caller is responsible
3829 // for the ubc_setsize.
3831 // Even if skipsetsize is set, if the length is zero we
3832 // want to call ubc_setsize() because as of SnowLeopard
3833 // it will no longer cause any page-ins and it will drop
3834 // any dirty pages so that we don't do any i/o that we
3835 // don't have to. This also prevents a race where i/o
3836 // for truncated blocks may overwrite later data if the
3837 // blocks get reallocated to a different file.
3839 if (!skipsetsize
|| length
== 0)
3840 ubc_setsize(vp
, length
);
3842 // have to loop truncating or growing files that are
3843 // really big because otherwise transactions can get
3844 // enormous and consume too many kernel resources.
3846 if (length
< filebytes
) {
3847 while (filebytes
> length
) {
3848 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3849 filebytes
-= HFS_BIGFILE_SIZE
;
3853 cp
->c_flag
|= C_FORCEUPDATE
;
3854 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
3858 } else if (length
> filebytes
) {
3859 while (filebytes
< length
) {
3860 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
3861 filebytes
+= HFS_BIGFILE_SIZE
;
3865 cp
->c_flag
|= C_FORCEUPDATE
;
3866 error
= do_hfs_truncate(vp
, filebytes
, flags
, truncateflags
, context
);
3870 } else /* Same logical size */ {
3872 error
= do_hfs_truncate(vp
, length
, flags
, truncateflags
, context
);
3874 /* Files that are changing size are not hot file candidates. */
3875 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3876 fp
->ff_bytesread
= 0;
3885 * Preallocate file storage space.
3888 hfs_vnop_allocate(struct vnop_allocate_args
/* {
3892 off_t *a_bytesallocated;
3894 vfs_context_t a_context;
3897 struct vnode
*vp
= ap
->a_vp
;
3899 struct filefork
*fp
;
3901 off_t length
= ap
->a_length
;
3903 off_t moreBytesRequested
;
3904 off_t actualBytesAdded
;
3906 u_int32_t fileblocks
;
3907 int retval
, retval2
;
3908 u_int32_t blockHint
;
3909 u_int32_t extendFlags
; /* For call to ExtendFileC */
3910 struct hfsmount
*hfsmp
;
3911 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
3915 *(ap
->a_bytesallocated
) = 0;
3917 if (!vnode_isreg(vp
))
3919 if (length
< (off_t
)0)
3924 orig_ctime
= VTOC(vp
)->c_ctime
;
3926 check_for_tracked_file(vp
, orig_ctime
, ap
->a_length
== 0 ? NAMESPACE_HANDLER_TRUNCATE_OP
|NAMESPACE_HANDLER_DELETE_OP
: NAMESPACE_HANDLER_TRUNCATE_OP
, NULL
);
3928 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
3930 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
3938 fileblocks
= fp
->ff_blocks
;
3939 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
3941 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
3946 /* Fill in the flags word for the call to Extend the file */
3948 extendFlags
= kEFNoClumpMask
;
3949 if (ap
->a_flags
& ALLOCATECONTIG
)
3950 extendFlags
|= kEFContigMask
;
3951 if (ap
->a_flags
& ALLOCATEALL
)
3952 extendFlags
|= kEFAllMask
;
3953 if (cred
&& suser(cred
, NULL
) != 0)
3954 extendFlags
|= kEFReserveMask
;
3955 if (hfs_virtualmetafile(cp
))
3956 extendFlags
|= kEFMetadataMask
;
3960 startingPEOF
= filebytes
;
3962 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
3963 length
+= filebytes
;
3964 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
3965 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
3967 /* If no changes are necesary, then we're done */
3968 if (filebytes
== length
)
3972 * Lengthen the size of the file. We must ensure that the
3973 * last byte of the file is allocated. Since the smallest
3974 * value of filebytes is 0, length will be at least 1.
3976 if (length
> filebytes
) {
3977 off_t total_bytes_added
= 0, orig_request_size
;
3979 orig_request_size
= moreBytesRequested
= length
- filebytes
;
3982 retval
= hfs_chkdq(cp
,
3983 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
3990 * Metadata zone checks.
3992 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
3994 * Allocate Journal and Quota files in metadata zone.
3996 if (hfs_virtualmetafile(cp
)) {
3997 blockHint
= hfsmp
->hfs_metazone_start
;
3998 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
3999 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
4001 * Move blockHint outside metadata zone.
4003 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
4008 while ((length
> filebytes
) && (retval
== E_NONE
)) {
4009 off_t bytesRequested
;
4011 if (hfs_start_transaction(hfsmp
) != 0) {
4016 /* Protect extents b-tree and allocation bitmap */
4017 lockflags
= SFL_BITMAP
;
4018 if (overflow_extents(fp
))
4019 lockflags
|= SFL_EXTENTS
;
4020 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4022 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
4023 bytesRequested
= HFS_BIGFILE_SIZE
;
4025 bytesRequested
= moreBytesRequested
;
4028 if (extendFlags
& kEFContigMask
) {
4029 // if we're on a sparse device, this will force it to do a
4030 // full scan to find the space needed.
4031 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
4034 retval
= MacToVFSError(ExtendFileC(vcb
,
4039 &actualBytesAdded
));
4041 if (retval
== E_NONE
) {
4042 *(ap
->a_bytesallocated
) += actualBytesAdded
;
4043 total_bytes_added
+= actualBytesAdded
;
4044 moreBytesRequested
-= actualBytesAdded
;
4045 if (blockHint
!= 0) {
4046 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
4049 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4051 hfs_systemfile_unlock(hfsmp
, lockflags
);
4054 (void) hfs_update(vp
, TRUE
);
4055 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
4058 hfs_end_transaction(hfsmp
);
4063 * if we get an error and no changes were made then exit
4064 * otherwise we must do the hfs_update to reflect the changes
4066 if (retval
&& (startingPEOF
== filebytes
))
4070 * Adjust actualBytesAdded to be allocation block aligned, not
4071 * clump size aligned.
4072 * NOTE: So what we are reporting does not affect reality
4073 * until the file is closed, when we truncate the file to allocation
4076 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
4077 *(ap
->a_bytesallocated
) =
4078 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
4080 } else { /* Shorten the size of the file */
4082 if (fp
->ff_size
> length
) {
4084 * Any buffers that are past the truncation point need to be
4085 * invalidated (to maintain buffer cache consistency).
4089 retval
= hfs_truncate(vp
, length
, 0, 0, 0, ap
->a_context
);
4090 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
4093 * if we get an error and no changes were made then exit
4094 * otherwise we must do the hfs_update to reflect the changes
4096 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
4098 /* These are bytesreleased */
4099 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
4102 if (fp
->ff_size
> filebytes
) {
4103 fp
->ff_size
= filebytes
;
4106 ubc_setsize(vp
, fp
->ff_size
);
4107 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4112 cp
->c_touch_chgtime
= TRUE
;
4113 cp
->c_touch_modtime
= TRUE
;
4114 retval2
= hfs_update(vp
, MNT_WAIT
);
4119 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4126 * Pagein for HFS filesystem
4129 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
4131 struct vnop_pagein_args {
4134 vm_offset_t a_pl_offset,
4138 vfs_context_t a_context;
4144 struct filefork
*fp
;
4147 upl_page_info_t
*pl
;
4152 boolean_t truncate_lock_held
= FALSE
;
4153 boolean_t file_converted
= FALSE
;
4161 if ((error
= cp_handle_vnop(vp
, CP_READ_ACCESS
| CP_WRITE_ACCESS
, 0)) != 0) {
4163 * If we errored here, then this means that one of two things occurred:
4164 * 1. there was a problem with the decryption of the key.
4165 * 2. the device is locked and we are not allowed to access this particular file.
4167 * Either way, this means that we need to shut down this upl now. As long as
4168 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4169 * then we create a upl and immediately abort it.
4171 if (ap
->a_pl
== NULL
) {
4172 /* create the upl */
4173 ubc_create_upl (vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
,
4174 UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4175 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4176 ubc_upl_range_needed (upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4178 /* Abort the range */
4179 ubc_upl_abort_range (upl
, 0, ap
->a_size
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_ERROR
);
4185 #endif /* CONFIG_PROTECT */
4187 if (ap
->a_pl
!= NULL
) {
4189 * this can only happen for swap files now that
4190 * we're asking for V2 paging behavior...
4191 * so don't need to worry about decompression, or
4192 * keeping track of blocks read or taking the truncate lock
4194 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
4195 ap
->a_size
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4201 * take truncate lock (shared/recursive) to guard against
4202 * zero-fill thru fsync interfering, but only for v2
4204 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4205 * lock shared and we are allowed to recurse 1 level if this thread already
4206 * owns the lock exclusively... this can legally occur
4207 * if we are doing a shrinking ftruncate against a file
4208 * that is mapped private, and the pages being truncated
4209 * do not currently exist in the cache... in that case
4210 * we will have to page-in the missing pages in order
4211 * to provide them to the private mapping... we must
4212 * also call hfs_unlock_truncate with a postive been_recursed
4213 * arg to indicate that if we have recursed, there is no need to drop
4214 * the lock. Allowing this simple recursion is necessary
4215 * in order to avoid a certain deadlock... since the ftruncate
4216 * already holds the truncate lock exclusively, if we try
4217 * to acquire it shared to protect the pagein path, we will
4220 * NOTE: The if () block below is a workaround in order to prevent a
4221 * VM deadlock. See rdar://7853471.
4223 * If we are in a forced unmount, then launchd will still have the
4224 * dyld_shared_cache file mapped as it is trying to reboot. If we
4225 * take the truncate lock here to service a page fault, then our
4226 * thread could deadlock with the forced-unmount. The forced unmount
4227 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4228 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4229 * thread will think it needs to copy all of the data out of the file
4230 * and into a VM copy object. If we hold the cnode lock here, then that
4231 * VM operation will not be able to proceed, because we'll set a busy page
4232 * before attempting to grab the lock. Note that this isn't as simple as "don't
4233 * call ubc_setsize" because doing that would just shift the problem to the
4234 * ubc_msync done before the vnode is reclaimed.
4236 * So, if a forced unmount on this volume is in flight AND the cnode is
4237 * marked C_DELETED, then just go ahead and do the page in without taking
4238 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4239 * that is not going to be available on the next mount, this seems like a
4240 * OK solution from a correctness point of view, even though it is hacky.
4242 if (vfs_isforce(vp
->v_mount
)) {
4243 if (cp
->c_flag
& C_DELETED
) {
4244 /* If we don't get it, then just go ahead and operate without the lock */
4245 truncate_lock_held
= hfs_try_trunclock(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4249 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4250 truncate_lock_held
= TRUE
;
4253 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, UPL_UBC_PAGEIN
| UPL_RET_ONLY_ABSENT
);
4255 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4259 ubc_upl_range_needed(upl
, ap
->a_pl_offset
/ PAGE_SIZE
, 1);
4264 * Scan from the back to find the last page in the UPL, so that we
4265 * aren't looking at a UPL that may have already been freed by the
4266 * preceding aborts/completions.
4268 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4269 if (upl_page_present(pl
, --pg_index
))
4271 if (pg_index
== 0) {
4273 * no absent pages were found in the range specified
4274 * just abort the UPL to get rid of it and then we're done
4276 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4281 * initialize the offset variables before we touch the UPL.
4282 * f_offset is the position into the file, in bytes
4283 * offset is the position into the UPL, in bytes
4284 * pg_index is the pg# of the UPL we're operating on
4285 * isize is the offset into the UPL of the last page that is present.
4287 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4290 f_offset
= ap
->a_f_offset
;
4296 if ( !upl_page_present(pl
, pg_index
)) {
4298 * we asked for RET_ONLY_ABSENT, so it's possible
4299 * to get back empty slots in the UPL.
4300 * just skip over them
4302 f_offset
+= PAGE_SIZE
;
4303 offset
+= PAGE_SIZE
;
4310 * We know that we have at least one absent page.
4311 * Now checking to see how many in a row we have
4314 xsize
= isize
- PAGE_SIZE
;
4317 if ( !upl_page_present(pl
, pg_index
+ num_of_pages
))
4322 xsize
= num_of_pages
* PAGE_SIZE
;
4325 if (VNODE_IS_RSRC(vp
)) {
4326 /* allow pageins of the resource fork */
4328 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
4331 if (truncate_lock_held
) {
4333 * can't hold the truncate lock when calling into the decmpfs layer
4334 * since it calls back into this layer... even though we're only
4335 * holding the lock in shared mode, and the re-entrant path only
4336 * takes the lock shared, we can deadlock if some other thread
4337 * tries to grab the lock exclusively in between.
4339 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4340 truncate_lock_held
= FALSE
;
4343 ap
->a_pl_offset
= offset
;
4344 ap
->a_f_offset
= f_offset
;
4347 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
4349 * note that decpfs_pagein_compressed can change the state of
4350 * 'compressed'... it will set it to 0 if the file is no longer
4351 * compressed once the compression lock is successfully taken
4352 * i.e. we would block on that lock while the file is being inflated
4356 /* successful page-in, update the access time */
4357 VTOC(vp
)->c_touch_acctime
= TRUE
;
4359 /* compressed files are not hot file candidates */
4360 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
4361 fp
->ff_bytesread
= 0;
4363 } else if (error
== EAGAIN
) {
4365 * EAGAIN indicates someone else already holds the compression lock...
4366 * to avoid deadlocking, we'll abort this range of pages with an
4367 * indication that the pagein needs to be redriven
4369 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, xsize
, UPL_ABORT_FREE_ON_EMPTY
| UPL_ABORT_RESTART
);
4371 goto pagein_next_range
;
4375 * Set file_converted only if the file became decompressed while we were
4376 * paging in. If it were still compressed, we would re-start the loop using the goto
4377 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4378 * condition below, since we could have avoided taking the truncate lock to prevent
4379 * a deadlock in the force unmount case.
4381 file_converted
= TRUE
;
4384 if (file_converted
== TRUE
) {
4386 * the file was converted back to a regular file after we first saw it as compressed
4387 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4388 * reset a_size so that we consider what remains of the original request
4389 * and null out a_upl and a_pl_offset.
4391 * We should only be able to get into this block if the decmpfs_pagein_compressed
4392 * successfully decompressed the range in question for this file.
4394 ubc_upl_abort_range(upl
, (upl_offset_t
) offset
, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4398 ap
->a_pl_offset
= 0;
4400 /* Reset file_converted back to false so that we don't infinite-loop. */
4401 file_converted
= FALSE
;
4406 error
= cluster_pagein(vp
, upl
, offset
, f_offset
, xsize
, (off_t
)fp
->ff_size
, ap
->a_flags
);
4409 * Keep track of blocks read.
4411 if ( !vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
4413 int took_cnode_lock
= 0;
4415 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
4416 bytesread
= fp
->ff_size
;
4420 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4421 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
4422 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4423 took_cnode_lock
= 1;
4426 * If this file hasn't been seen since the start of
4427 * the current sampling period then start over.
4429 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
4432 fp
->ff_bytesread
= bytesread
;
4434 cp
->c_atime
= tv
.tv_sec
;
4436 fp
->ff_bytesread
+= bytesread
;
4438 cp
->c_touch_acctime
= TRUE
;
4439 if (took_cnode_lock
)
4446 pg_index
+= num_of_pages
;
4452 if (truncate_lock_held
== TRUE
) {
4453 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4454 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4461 * Pageout for HFS filesystem.
4464 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
4466 struct vnop_pageout_args {
4469 vm_offset_t a_pl_offset,
4473 vfs_context_t a_context;
4477 vnode_t vp
= ap
->a_vp
;
4479 struct filefork
*fp
;
4483 upl_page_info_t
* pl
;
4484 vm_offset_t a_pl_offset
;
4486 int is_pageoutv2
= 0;
4493 * Figure out where the file ends, for pageout purposes. If
4494 * ff_new_size > ff_size, then we're in the middle of extending the
4495 * file via a write, so it is safe (and necessary) that we be able
4496 * to pageout up to that point.
4498 filesize
= fp
->ff_size
;
4499 if (fp
->ff_new_size
> filesize
)
4500 filesize
= fp
->ff_new_size
;
4502 a_flags
= ap
->a_flags
;
4503 a_pl_offset
= ap
->a_pl_offset
;
4505 if (S_ISREG(cp
->c_attr
.ca_mode
) || S_ISLNK(cp
->c_attr
.ca_mode
)) {
4506 hfs_incr_gencount (cp
);
4510 * we can tell if we're getting the new or old behavior from the UPL
4512 if ((upl
= ap
->a_pl
) == NULL
) {
4517 * we're in control of any UPL we commit
4518 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4520 a_flags
&= ~UPL_NOCOMMIT
;
4524 * For V2 semantics, we want to take the cnode truncate lock
4525 * shared to guard against the file size changing via zero-filling.
4527 * However, we have to be careful because we may be invoked
4528 * via the ubc_msync path to write out dirty mmap'd pages
4529 * in response to a lock event on a content-protected
4530 * filesystem (e.g. to write out class A files).
4531 * As a result, we want to take the truncate lock 'SHARED' with
4532 * the mini-recursion locktype so that we don't deadlock/panic
4533 * because we may be already holding the truncate lock exclusive to force any other
4534 * IOs to have blocked behind us.
4536 hfs_lock_truncate(cp
, HFS_SHARED_LOCK
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4538 if (a_flags
& UPL_MSYNC
) {
4539 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
4542 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
4545 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
4547 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
4553 * from this point forward upl points at the UPL we're working with
4554 * it was either passed in or we succesfully created it
4558 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4559 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4560 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4561 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4562 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4563 * lock in HFS so that we don't lock invert ourselves.
4565 * Note that we can still get into this function on behalf of the default pager with
4566 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4567 * since fsync and other writing threads will grab the locks, then mark the
4568 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4569 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4570 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4571 * by the paging/VM system.
4583 f_offset
= ap
->a_f_offset
;
4586 * Scan from the back to find the last page in the UPL, so that we
4587 * aren't looking at a UPL that may have already been freed by the
4588 * preceding aborts/completions.
4590 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
4591 if (upl_page_present(pl
, --pg_index
))
4593 if (pg_index
== 0) {
4594 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
4600 * initialize the offset variables before we touch the UPL.
4601 * a_f_offset is the position into the file, in bytes
4602 * offset is the position into the UPL, in bytes
4603 * pg_index is the pg# of the UPL we're operating on.
4604 * isize is the offset into the UPL of the last non-clean page.
4606 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
4615 if ( !upl_page_present(pl
, pg_index
)) {
4617 * we asked for RET_ONLY_DIRTY, so it's possible
4618 * to get back empty slots in the UPL.
4619 * just skip over them
4621 f_offset
+= PAGE_SIZE
;
4622 offset
+= PAGE_SIZE
;
4628 if ( !upl_dirty_page(pl
, pg_index
)) {
4629 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
4633 * We know that we have at least one dirty page.
4634 * Now checking to see how many in a row we have
4637 xsize
= isize
- PAGE_SIZE
;
4640 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
4645 xsize
= num_of_pages
* PAGE_SIZE
;
4647 if (!vnode_isswap(vp
)) {
4653 if (cp
->c_lockowner
!= current_thread()) {
4654 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4656 * we're in the v2 path, so we are the
4657 * owner of the UPL... we may have already
4658 * processed some of the UPL, so abort it
4659 * from the current working offset to the
4662 ubc_upl_abort_range(upl
,
4664 ap
->a_size
- offset
,
4665 UPL_ABORT_FREE_ON_EMPTY
);
4670 end_of_range
= f_offset
+ xsize
- 1;
4672 if (end_of_range
>= filesize
) {
4673 end_of_range
= (off_t
)(filesize
- 1);
4675 if (f_offset
< filesize
) {
4676 rl_remove(f_offset
, end_of_range
, &fp
->ff_invalidranges
);
4677 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
4683 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
4684 xsize
, filesize
, a_flags
))) {
4691 pg_index
+= num_of_pages
;
4693 /* capture errnos bubbled out of cluster_pageout if they occurred */
4694 if (error_ret
!= 0) {
4697 } /* end block for v2 pageout behavior */
4699 if (!vnode_isswap(vp
)) {
4703 if (cp
->c_lockowner
!= current_thread()) {
4704 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
))) {
4705 if (!(a_flags
& UPL_NOCOMMIT
)) {
4706 ubc_upl_abort_range(upl
,
4709 UPL_ABORT_FREE_ON_EMPTY
);
4715 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
4717 if (end_of_range
>= filesize
) {
4718 end_of_range
= (off_t
)(filesize
- 1);
4720 if (ap
->a_f_offset
< filesize
) {
4721 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
4722 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
4730 * just call cluster_pageout for old pre-v2 behavior
4732 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
4733 ap
->a_size
, filesize
, a_flags
);
4737 * If data was written, update the modification time of the file.
4738 * If setuid or setgid bits are set and this process is not the
4739 * superuser then clear the setuid and setgid bits as a precaution
4740 * against tampering.
4743 cp
->c_touch_modtime
= TRUE
;
4744 cp
->c_touch_chgtime
= TRUE
;
4745 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
4746 (vfs_context_suser(ap
->a_context
) != 0)) {
4747 hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
4748 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
4756 * Release the truncate lock. Note that because
4757 * we may have taken the lock recursively by
4758 * being invoked via ubc_msync due to lockdown,
4759 * we should release it recursively, too.
4761 hfs_unlock_truncate(cp
, HFS_LOCK_SKIP_IF_EXCLUSIVE
);
4767 * Intercept B-Tree node writes to unswap them if necessary.
4770 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
4773 register struct buf
*bp
= ap
->a_bp
;
4774 register struct vnode
*vp
= buf_vnode(bp
);
4775 BlockDescriptor block
;
4777 /* Trap B-Tree writes */
4778 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
4779 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
4780 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
4781 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
4784 * Swap and validate the node if it is in native byte order.
4785 * This is always be true on big endian, so we always validate
4786 * before writing here. On little endian, the node typically has
4787 * been swapped and validated when it was written to the journal,
4788 * so we won't do anything here.
4790 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
4791 /* Prepare the block pointer */
4792 block
.blockHeader
= bp
;
4793 block
.buffer
= (char *)buf_dataptr(bp
);
4794 block
.blockNum
= buf_lblkno(bp
);
4795 /* not found in cache ==> came from disk */
4796 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
4797 block
.blockSize
= buf_count(bp
);
4799 /* Endian un-swap B-Tree node */
4800 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
4802 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4806 /* This buffer shouldn't be locked anymore but if it is clear it */
4807 if ((buf_flags(bp
) & B_LOCKED
)) {
4809 if (VTOHFS(vp
)->jnl
) {
4810 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
4812 buf_clearflags(bp
, B_LOCKED
);
4814 retval
= vn_bwrite (ap
);
4820 * Relocate a file to a new location on disk
4821 * cnode must be locked on entry
4823 * Relocation occurs by cloning the file's data from its
4824 * current set of blocks to a new set of blocks. During
4825 * the relocation all of the blocks (old and new) are
4826 * owned by the file.
4833 * ----------------- -----------------
4834 * |///////////////| | | STEP 1 (acquire new blocks)
4835 * ----------------- -----------------
4838 * ----------------- -----------------
4839 * |///////////////| |///////////////| STEP 2 (clone data)
4840 * ----------------- -----------------
4844 * |///////////////| STEP 3 (head truncate blocks)
4848 * During steps 2 and 3 page-outs to file offsets less
4849 * than or equal to N are suspended.
4851 * During step 3 page-ins to the file get suspended.
4854 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
4858 struct filefork
*fp
;
4859 struct hfsmount
*hfsmp
;
4864 u_int32_t nextallocsave
;
4865 daddr64_t sector_a
, sector_b
;
4870 int took_trunc_lock
= 0;
4872 enum vtype vnodetype
;
4874 vnodetype
= vnode_vtype(vp
);
4875 if (vnodetype
!= VREG
) {
4876 /* Not allowed to move symlinks. */
4881 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
4887 if (fp
->ff_unallocblocks
)
4892 * <rdar://problem/9118426>
4893 * Disable HFS file relocation on content-protected filesystems
4895 if (cp_fs_protected (hfsmp
->hfs_mp
)) {
4899 /* If it's an SSD, also disable HFS relocation */
4900 if (hfsmp
->hfs_flags
& HFS_SSD
) {
4905 blksize
= hfsmp
->blockSize
;
4907 blockHint
= hfsmp
->nextAllocation
;
4909 if (fp
->ff_size
> 0x7fffffff) {
4914 // We do not believe that this call to hfs_fsync() is
4915 // necessary and it causes a journal transaction
4916 // deadlock so we are removing it.
4918 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4919 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4924 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
4926 hfs_lock_truncate(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_DEFAULT
);
4927 /* Force lock since callers expects lock to be held. */
4928 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
))) {
4929 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4932 /* No need to continue if file was removed. */
4933 if (cp
->c_flag
& C_NOEXISTS
) {
4934 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4937 took_trunc_lock
= 1;
4939 headblks
= fp
->ff_blocks
;
4940 datablks
= howmany(fp
->ff_size
, blksize
);
4941 growsize
= datablks
* blksize
;
4942 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
4943 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
4944 blockHint
<= hfsmp
->hfs_metazone_end
)
4945 eflags
|= kEFMetadataMask
;
4947 if (hfs_start_transaction(hfsmp
) != 0) {
4948 if (took_trunc_lock
)
4949 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
4954 * Protect the extents b-tree and the allocation bitmap
4955 * during MapFileBlockC and ExtendFileC operations.
4957 lockflags
= SFL_BITMAP
;
4958 if (overflow_extents(fp
))
4959 lockflags
|= SFL_EXTENTS
;
4960 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
4962 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
4964 retval
= MacToVFSError(retval
);
4969 * STEP 1 - acquire new allocation blocks.
4971 nextallocsave
= hfsmp
->nextAllocation
;
4972 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
4973 if (eflags
& kEFMetadataMask
) {
4974 hfs_lock_mount(hfsmp
);
4975 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
4976 MarkVCBDirty(hfsmp
);
4977 hfs_unlock_mount(hfsmp
);
4980 retval
= MacToVFSError(retval
);
4982 cp
->c_flag
|= C_MODIFIED
;
4983 if (newbytes
< growsize
) {
4986 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
4987 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp
->c_cnid
, hfsmp
->vcbVN
);
4992 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
4994 retval
= MacToVFSError(retval
);
4995 } else if ((sector_a
+ 1) == sector_b
) {
4998 } else if ((eflags
& kEFMetadataMask
) &&
4999 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
5000 hfsmp
->hfs_metazone_end
)) {
5002 const char * filestr
;
5003 char emptystr
= '\0';
5005 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
5006 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
5007 } else if (vnode_name(vp
) != NULL
) {
5008 filestr
= vnode_name(vp
);
5010 filestr
= &emptystr
;
5017 /* Done with system locks and journal for now. */
5018 hfs_systemfile_unlock(hfsmp
, lockflags
);
5020 hfs_end_transaction(hfsmp
);
5025 * Check to see if failure is due to excessive fragmentation.
5027 if ((retval
== ENOSPC
) &&
5028 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
5029 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
5034 * STEP 2 - clone file data into the new allocation blocks.
5037 if (vnodetype
== VLNK
)
5039 else if (vnode_issystem(vp
))
5040 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
5042 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
5044 /* Start transaction for step 3 or for a restore. */
5045 if (hfs_start_transaction(hfsmp
) != 0) {
5054 * STEP 3 - switch to cloned data and remove old blocks.
5056 lockflags
= SFL_BITMAP
;
5057 if (overflow_extents(fp
))
5058 lockflags
|= SFL_EXTENTS
;
5059 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5061 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
5063 hfs_systemfile_unlock(hfsmp
, lockflags
);
5068 if (took_trunc_lock
)
5069 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5072 hfs_systemfile_unlock(hfsmp
, lockflags
);
5076 /* Push cnode's new extent data to disk. */
5078 (void) hfs_update(vp
, MNT_WAIT
);
5081 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
5082 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
5084 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
5088 hfs_end_transaction(hfsmp
);
5093 if (fp
->ff_blocks
== headblks
) {
5094 if (took_trunc_lock
)
5095 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5099 * Give back any newly allocated space.
5101 if (lockflags
== 0) {
5102 lockflags
= SFL_BITMAP
;
5103 if (overflow_extents(fp
))
5104 lockflags
|= SFL_EXTENTS
;
5105 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
5108 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, 0, FORK_IS_RSRC(fp
),
5109 FTOC(fp
)->c_fileid
, false);
5111 hfs_systemfile_unlock(hfsmp
, lockflags
);
5114 if (took_trunc_lock
)
5115 hfs_unlock_truncate(cp
, HFS_LOCK_DEFAULT
);
5121 * Clone a file's data within the file.
5125 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
5136 writebase
= blkstart
* blksize
;
5137 copysize
= blkcnt
* blksize
;
5138 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
5141 hfs_unlock(VTOC(vp
));
5144 if ((error
= cp_handle_vnop(vp
, CP_WRITE_ACCESS
, 0)) != 0) {
5145 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5148 #endif /* CONFIG_PROTECT */
5150 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
5151 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5155 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
5157 while (offset
< copysize
) {
5158 iosize
= MIN(copysize
- offset
, iosize
);
5160 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
5161 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5163 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
5165 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
5168 if (uio_resid(auio
) != 0) {
5169 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio
));
5174 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
5175 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
5177 error
= cluster_write(vp
, auio
, writebase
+ offset
,
5178 writebase
+ offset
+ iosize
,
5179 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
5181 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
5184 if (uio_resid(auio
) != 0) {
5185 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5193 if ((blksize
& PAGE_MASK
)) {
5195 * since the copy may not have started on a PAGE
5196 * boundary (or may not have ended on one), we
5197 * may have pages left in the cache since NOCACHE
5198 * will let partially written pages linger...
5199 * lets just flush the entire range to make sure
5200 * we don't have any pages left that are beyond
5201 * (or intersect) the real LEOF of this file
5203 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
5206 * No need to call ubc_sync_range or hfs_invalbuf
5207 * since the file was copied using IO_NOCACHE and
5208 * the copy was done starting and ending on a page
5209 * boundary in the file.
5212 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
5214 hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
, HFS_LOCK_ALLOW_NOEXISTS
);
5219 * Clone a system (metadata) file.
5223 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
5224 kauth_cred_t cred
, struct proc
*p
)
5230 struct buf
*bp
= NULL
;
5233 daddr64_t start_blk
;
5240 iosize
= GetLogicalBlockSize(vp
);
5241 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
5242 breadcnt
= bufsize
/ iosize
;
5244 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
5247 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
5248 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
5251 while (blkno
< last_blk
) {
5253 * Read up to a megabyte
5256 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
5257 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
5259 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
5262 if (buf_count(bp
) != iosize
) {
5263 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
5266 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
5268 buf_markinvalid(bp
);
5276 * Write up to a megabyte
5279 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
5280 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
5282 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
5286 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
5287 error
= (int)buf_bwrite(bp
);
5299 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
5301 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);