2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
53 #include <sys/sysctl.h>
54 #include <sys/fsctl.h>
56 #include <miscfs/specfs/specdev.h>
59 #include <sys/ubc_internal.h>
61 #include <vm/vm_pageout.h>
62 #include <vm/vm_kern.h>
64 #include <sys/kdebug.h>
67 #include "hfs_attrlist.h"
68 #include "hfs_endian.h"
69 #include "hfs_fsctl.h"
70 #include "hfs_quota.h"
71 #include "hfscommon/headers/FileMgrInternal.h"
72 #include "hfscommon/headers/BTreesInternal.h"
73 #include "hfs_cnode.h"
76 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
79 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
82 /* from bsd/hfs/hfs_vfsops.c */
83 extern int hfs_vfs_vget (struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
85 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
86 static int hfs_clonefile(struct vnode
*, int, int, int);
87 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
88 static int hfs_minorupdate(struct vnode
*vp
);
89 static int do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skip
, vfs_context_t context
);
92 int flush_cache_on_write
= 0;
93 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
97 * Read data from a file.
100 hfs_vnop_read(struct vnop_read_args
*ap
)
102 uio_t uio
= ap
->a_uio
;
103 struct vnode
*vp
= ap
->a_vp
;
106 struct hfsmount
*hfsmp
;
109 off_t start_resid
= uio_resid(uio
);
110 off_t offset
= uio_offset(uio
);
113 /* Preflight checks */
114 if (!vnode_isreg(vp
)) {
115 /* can only read regular files */
121 if (start_resid
== 0)
122 return (0); /* Nothing left to do */
124 return (EINVAL
); /* cant read from a negative offset */
127 if (VNODE_IS_RSRC(vp
)) {
128 if (hfs_hides_rsrc(ap
->a_context
, VTOC(vp
), 1)) { /* 1 == don't take the cnode lock */
131 /* otherwise read the resource fork normally */
133 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
135 retval
= decmpfs_read_compressed(ap
, &compressed
, VTOCMP(vp
));
138 /* successful read, update the access time */
139 VTOC(vp
)->c_touch_acctime
= TRUE
;
141 /* compressed files are not hot file candidates */
142 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
143 VTOF(vp
)->ff_bytesread
= 0;
148 /* otherwise the file was converted back to a regular file while we were reading it */
152 #endif /* HFS_COMPRESSION */
158 /* Protect against a size change. */
159 hfs_lock_truncate(cp
, 0);
161 filesize
= fp
->ff_size
;
162 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
163 if (offset
> filesize
) {
164 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
165 (offset
> (off_t
)MAXHFSFILESIZE
)) {
171 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
172 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
174 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
);
176 cp
->c_touch_acctime
= TRUE
;
178 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
179 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
182 * Keep track blocks read
184 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
185 int took_cnode_lock
= 0;
188 bytesread
= start_resid
- uio_resid(uio
);
190 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
191 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
192 hfs_lock(cp
, HFS_FORCE_LOCK
);
196 * If this file hasn't been seen since the start of
197 * the current sampling period then start over.
199 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
202 fp
->ff_bytesread
= bytesread
;
204 cp
->c_atime
= tv
.tv_sec
;
206 fp
->ff_bytesread
+= bytesread
;
212 hfs_unlock_truncate(cp
, 0);
217 * Write data to a file.
220 hfs_vnop_write(struct vnop_write_args
*ap
)
222 uio_t uio
= ap
->a_uio
;
223 struct vnode
*vp
= ap
->a_vp
;
226 struct hfsmount
*hfsmp
;
227 kauth_cred_t cred
= NULL
;
230 off_t bytesToAdd
= 0;
231 off_t actualBytesAdded
;
236 int ioflag
= ap
->a_ioflag
;
239 int cnode_locked
= 0;
240 int partialwrite
= 0;
241 int exclusive_lock
= 0;
244 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
245 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
247 case FILE_IS_COMPRESSED
:
249 case FILE_IS_CONVERTING
:
250 /* if FILE_IS_CONVERTING, we allow writes */
253 printf("invalid state %d for compressed file\n", state
);
259 // LP64todo - fix this! uio_resid may be 64-bit value
260 resid
= uio_resid(uio
);
261 offset
= uio_offset(uio
);
263 if (ioflag
& IO_APPEND
) {
271 if (!vnode_isreg(vp
))
272 return (EPERM
); /* Can only write regular files */
278 eflags
= kEFDeferMask
; /* defer file block allocations */
279 #ifdef HFS_SPARSE_DEV
281 * When the underlying device is sparse and space
282 * is low (< 8MB), stop doing delayed allocations
283 * and begin doing synchronous I/O.
285 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
286 (hfs_freeblks(hfsmp
, 0) < 2048)) {
287 eflags
&= ~kEFDeferMask
;
290 #endif /* HFS_SPARSE_DEV */
293 /* Protect against a size change. */
294 hfs_lock_truncate(cp
, exclusive_lock
);
296 if (ioflag
& IO_APPEND
) {
297 uio_setoffset(uio
, fp
->ff_size
);
298 offset
= fp
->ff_size
;
300 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
305 origFileSize
= fp
->ff_size
;
306 writelimit
= offset
+ resid
;
307 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
309 /* If the truncate lock is shared, and if we either have virtual
310 * blocks or will need to extend the file, upgrade the truncate
311 * to exclusive lock. If upgrade fails, we lose the lock and
312 * have to get exclusive lock again. Note that we want to
313 * grab the truncate lock exclusive even if we're not allocating new blocks
314 * because we could still be growing past the LEOF.
316 if ((exclusive_lock
== 0) &&
317 ((fp
->ff_unallocblocks
!= 0) || (writelimit
> origFileSize
))) {
319 /* Lock upgrade failed and we lost our shared lock, try again */
320 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
325 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
330 if (!exclusive_lock
) {
331 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
332 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
336 /* Check if we do not need to extend the file */
337 if (writelimit
<= filebytes
) {
341 cred
= vfs_context_ucred(ap
->a_context
);
342 bytesToAdd
= writelimit
- filebytes
;
345 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
351 if (hfs_start_transaction(hfsmp
) != 0) {
356 while (writelimit
> filebytes
) {
357 bytesToAdd
= writelimit
- filebytes
;
358 if (cred
&& suser(cred
, NULL
) != 0)
359 eflags
|= kEFReserveMask
;
361 /* Protect extents b-tree and allocation bitmap */
362 lockflags
= SFL_BITMAP
;
363 if (overflow_extents(fp
))
364 lockflags
|= SFL_EXTENTS
;
365 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
367 /* Files that are changing size are not hot file candidates. */
368 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
369 fp
->ff_bytesread
= 0;
371 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
372 0, eflags
, &actualBytesAdded
));
374 hfs_systemfile_unlock(hfsmp
, lockflags
);
376 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
378 if (retval
!= E_NONE
)
380 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
381 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
382 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
384 (void) hfs_update(vp
, TRUE
);
385 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
386 (void) hfs_end_transaction(hfsmp
);
389 * If we didn't grow the file enough try a partial write.
390 * POSIX expects this behavior.
392 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
395 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
397 writelimit
= filebytes
;
400 if (retval
== E_NONE
) {
408 struct rl_entry
*invalid_range
;
410 if (writelimit
> fp
->ff_size
)
411 filesize
= writelimit
;
413 filesize
= fp
->ff_size
;
415 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
417 if (offset
<= fp
->ff_size
) {
418 zero_off
= offset
& ~PAGE_MASK_64
;
420 /* Check to see whether the area between the zero_offset and the start
421 of the transfer to see whether is invalid and should be zero-filled
422 as part of the transfer:
424 if (offset
> zero_off
) {
425 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
426 lflag
|= IO_HEADZEROFILL
;
429 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
431 /* The bytes between fp->ff_size and uio->uio_offset must never be
432 read without being zeroed. The current last block is filled with zeroes
433 if it holds valid data but in all cases merely do a little bookkeeping
434 to track the area from the end of the current last page to the start of
435 the area actually written. For the same reason only the bytes up to the
436 start of the page where this write will start is invalidated; any remainder
437 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
439 Note that inval_start, the start of the page after the current EOF,
440 may be past the start of the write, in which case the zeroing
441 will be handled by the cluser_write of the actual data.
443 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
444 inval_end
= offset
& ~PAGE_MASK_64
;
445 zero_off
= fp
->ff_size
;
447 if ((fp
->ff_size
& PAGE_MASK_64
) &&
448 (rl_scan(&fp
->ff_invalidranges
,
451 &invalid_range
) != RL_NOOVERLAP
)) {
452 /* The page containing the EOF is not valid, so the
453 entire page must be made inaccessible now. If the write
454 starts on a page beyond the page containing the eof
455 (inval_end > eof_page_base), add the
456 whole page to the range to be invalidated. Otherwise
457 (i.e. if the write starts on the same page), zero-fill
458 the entire page explicitly now:
460 if (inval_end
> eof_page_base
) {
461 inval_start
= eof_page_base
;
463 zero_off
= eof_page_base
;
467 if (inval_start
< inval_end
) {
469 /* There's some range of data that's going to be marked invalid */
471 if (zero_off
< inval_start
) {
472 /* The pages between inval_start and inval_end are going to be invalidated,
473 and the actual write will start on a page past inval_end. Now's the last
474 chance to zero-fill the page containing the EOF:
478 retval
= cluster_write(vp
, (uio_t
) 0,
479 fp
->ff_size
, inval_start
,
481 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
482 hfs_lock(cp
, HFS_FORCE_LOCK
);
484 if (retval
) goto ioerr_exit
;
485 offset
= uio_offset(uio
);
488 /* Mark the remaining area of the newly allocated space as invalid: */
489 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
491 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
492 zero_off
= fp
->ff_size
= inval_end
;
495 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
498 /* Check to see whether the area between the end of the write and the end of
499 the page it falls in is invalid and should be zero-filled as part of the transfer:
501 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
502 if (tail_off
> filesize
) tail_off
= filesize
;
503 if (tail_off
> writelimit
) {
504 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
505 lflag
|= IO_TAILZEROFILL
;
510 * if the write starts beyond the current EOF (possibly advanced in the
511 * zeroing of the last block, above), then we'll zero fill from the current EOF
512 * to where the write begins:
514 * NOTE: If (and ONLY if) the portion of the file about to be written is
515 * before the current EOF it might be marked as invalid now and must be
516 * made readable (removed from the invalid ranges) before cluster_write
519 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
520 if (io_start
< fp
->ff_size
) {
523 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
524 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
531 * We need to tell UBC the fork's new size BEFORE calling
532 * cluster_write, in case any of the new pages need to be
533 * paged out before cluster_write completes (which does happen
534 * in embedded systems due to extreme memory pressure).
535 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
536 * will be, so that it can pass that on to cluster_pageout, and
537 * allow those pageouts.
539 * We don't update ff_size yet since we don't want pageins to
540 * be able to see uninitialized data between the old and new
541 * EOF, until cluster_write has completed and initialized that
544 * The vnode pager relies on the file size last given to UBC via
545 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
546 * ff_size (whichever is larger). NOTE: ff_new_size is always
547 * zero, unless we are extending the file via write.
549 if (filesize
> fp
->ff_size
) {
550 fp
->ff_new_size
= filesize
;
551 ubc_setsize(vp
, filesize
);
553 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
554 tail_off
, lflag
| IO_NOZERODIRTY
);
556 fp
->ff_new_size
= 0; /* no longer extending; use ff_size */
557 if (filesize
> origFileSize
) {
558 ubc_setsize(vp
, origFileSize
);
563 if (filesize
> origFileSize
) {
564 fp
->ff_size
= filesize
;
566 /* Files that are changing size are not hot file candidates. */
567 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
568 fp
->ff_bytesread
= 0;
571 fp
->ff_new_size
= 0; /* ff_size now has the correct size */
573 /* If we wrote some bytes, then touch the change and mod times */
574 if (resid
> uio_resid(uio
)) {
575 cp
->c_touch_chgtime
= TRUE
;
576 cp
->c_touch_modtime
= TRUE
;
580 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
584 // XXXdbg - see radar 4871353 for more info
586 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
587 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
593 * If we successfully wrote any data, and we are not the superuser
594 * we clear the setuid and setgid bits as a precaution against
597 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
598 cred
= vfs_context_ucred(ap
->a_context
);
599 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
601 hfs_lock(cp
, HFS_FORCE_LOCK
);
604 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
608 if (ioflag
& IO_UNIT
) {
610 hfs_lock(cp
, HFS_FORCE_LOCK
);
613 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
614 0, 0, ap
->a_context
);
615 // LP64todo - fix this! resid needs to by user_ssize_t
616 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
617 uio_setresid(uio
, resid
);
618 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
620 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
622 hfs_lock(cp
, HFS_FORCE_LOCK
);
625 retval
= hfs_update(vp
, TRUE
);
627 /* Updating vcbWrCnt doesn't need to be atomic. */
630 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
631 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
635 hfs_unlock_truncate(cp
, exclusive_lock
);
639 /* support for the "bulk-access" fcntl */
641 #define CACHE_LEVELS 16
642 #define NUM_CACHE_ENTRIES (64*16)
643 #define PARENT_IDS_FLAG 0x100
645 struct access_cache
{
647 int cachehits
; /* these two for statistics gathering */
649 unsigned int *acache
;
650 unsigned char *haveaccess
;
654 uid_t uid
; /* IN: effective user id */
655 short flags
; /* IN: access requested (i.e. R_OK) */
656 short num_groups
; /* IN: number of groups user belongs to */
657 int num_files
; /* IN: number of files to process */
658 int *file_ids
; /* IN: array of file ids */
659 gid_t
*groups
; /* IN: array of groups */
660 short *access
; /* OUT: access info for each file (0 for 'has access') */
661 } __attribute__((unavailable
)); // this structure is for reference purposes only
663 struct user32_access_t
{
664 uid_t uid
; /* IN: effective user id */
665 short flags
; /* IN: access requested (i.e. R_OK) */
666 short num_groups
; /* IN: number of groups user belongs to */
667 int num_files
; /* IN: number of files to process */
668 user32_addr_t file_ids
; /* IN: array of file ids */
669 user32_addr_t groups
; /* IN: array of groups */
670 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
673 struct user64_access_t
{
674 uid_t uid
; /* IN: effective user id */
675 short flags
; /* IN: access requested (i.e. R_OK) */
676 short num_groups
; /* IN: number of groups user belongs to */
677 int num_files
; /* IN: number of files to process */
678 user64_addr_t file_ids
; /* IN: array of file ids */
679 user64_addr_t groups
; /* IN: array of groups */
680 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
684 // these are the "extended" versions of the above structures
685 // note that it is crucial that they be different sized than
686 // the regular version
687 struct ext_access_t
{
688 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
689 uint32_t num_files
; /* IN: number of files to process */
690 uint32_t map_size
; /* IN: size of the bit map */
691 uint32_t *file_ids
; /* IN: Array of file ids */
692 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
693 short *access
; /* OUT: access info for each file (0 for 'has access') */
694 uint32_t num_parents
; /* future use */
695 cnid_t
*parents
; /* future use */
696 } __attribute__((unavailable
)); // this structure is for reference purposes only
698 struct user32_ext_access_t
{
699 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
700 uint32_t num_files
; /* IN: number of files to process */
701 uint32_t map_size
; /* IN: size of the bit map */
702 user32_addr_t file_ids
; /* IN: Array of file ids */
703 user32_addr_t bitmap
; /* OUT: hash-bitmap of interesting directory ids */
704 user32_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
705 uint32_t num_parents
; /* future use */
706 user32_addr_t parents
; /* future use */
709 struct user64_ext_access_t
{
710 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
711 uint32_t num_files
; /* IN: number of files to process */
712 uint32_t map_size
; /* IN: size of the bit map */
713 user64_addr_t file_ids
; /* IN: array of file ids */
714 user64_addr_t bitmap
; /* IN: array of groups */
715 user64_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
716 uint32_t num_parents
;/* future use */
717 user64_addr_t parents
;/* future use */
722 * Perform a binary search for the given parent_id. Return value is
723 * the index if there is a match. If no_match_indexp is non-NULL it
724 * will be assigned with the index to insert the item (even if it was
727 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
733 unsigned int mid
= ((hi
- lo
)/2) + lo
;
734 unsigned int this_id
= array
[mid
];
736 if (parent_id
== this_id
) {
741 if (parent_id
< this_id
) {
746 if (parent_id
> this_id
) {
752 /* check if lo and hi converged on the match */
753 if (parent_id
== array
[hi
]) {
757 if (no_match_indexp
) {
758 *no_match_indexp
= hi
;
766 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
770 int index
, no_match_index
;
772 if (cache
->numcached
== 0) {
774 return 0; // table is empty, so insert at index=0 and report no match
777 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
778 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
779 cache->numcached, NUM_CACHE_ENTRIES);*/
780 cache
->numcached
= NUM_CACHE_ENTRIES
;
783 hi
= cache
->numcached
- 1;
785 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
787 /* if no existing entry found, find index for new one */
789 index
= no_match_index
;
800 * Add a node to the access_cache at the given index (or do a lookup first
801 * to find the index if -1 is passed in). We currently do a replace rather
802 * than an insert if the cache is full.
805 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
807 int lookup_index
= -1;
809 /* need to do a lookup first if -1 passed for index */
811 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
812 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
813 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
814 cache
->haveaccess
[lookup_index
] = access
;
817 /* mission accomplished */
820 index
= lookup_index
;
825 /* if the cache is full, do a replace rather than an insert */
826 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
827 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
828 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
830 if (index
> cache
->numcached
) {
831 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
832 index
= cache
->numcached
;
836 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
840 if (index
>= 0 && index
< cache
->numcached
) {
841 /* only do bcopy if we're inserting */
842 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
843 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
846 cache
->acache
[index
] = nodeID
;
847 cache
->haveaccess
[index
] = access
;
861 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
863 struct cinfo
*cip
= (struct cinfo
*)arg
;
865 cip
->uid
= attrp
->ca_uid
;
866 cip
->gid
= attrp
->ca_gid
;
867 cip
->mode
= attrp
->ca_mode
;
868 cip
->parentcnid
= descp
->cd_parentcnid
;
869 cip
->recflags
= attrp
->ca_recflags
;
875 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
876 * isn't incore, then go to the catalog.
879 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, cnid_t cnid
,
880 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
884 /* if this id matches the one the fsctl was called with, skip the lookup */
885 if (cnid
== skip_cp
->c_cnid
) {
886 cnattrp
->ca_uid
= skip_cp
->c_uid
;
887 cnattrp
->ca_gid
= skip_cp
->c_gid
;
888 cnattrp
->ca_mode
= skip_cp
->c_mode
;
889 cnattrp
->ca_recflags
= skip_cp
->c_attr
.ca_recflags
;
890 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
894 /* otherwise, check the cnode hash incase the file/dir is incore */
895 if (hfs_chash_snoop(hfsmp
, cnid
, snoop_callback
, &c_info
) == 0) {
896 cnattrp
->ca_uid
= c_info
.uid
;
897 cnattrp
->ca_gid
= c_info
.gid
;
898 cnattrp
->ca_mode
= c_info
.mode
;
899 cnattrp
->ca_recflags
= c_info
.recflags
;
900 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
904 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
906 /* lookup this cnid in the catalog */
907 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
909 hfs_systemfile_unlock(hfsmp
, lockflags
);
920 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
921 * up to CACHE_LEVELS as we progress towards the root.
924 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
925 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
,
926 struct vfs_context
*my_context
,
930 uint32_t num_parents
)
934 HFSCatalogNodeID thisNodeID
;
935 unsigned int myPerms
;
936 struct cat_attr cnattr
;
937 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
940 int i
= 0, ids_to_cache
= 0;
941 int parent_ids
[CACHE_LEVELS
];
944 while (thisNodeID
>= kRootDirID
) {
945 myResult
= 0; /* default to "no access" */
947 /* check the cache before resorting to hitting the catalog */
949 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
950 * to look any further after hitting cached dir */
952 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
954 myErr
= cache
->haveaccess
[cache_index
];
955 if (scope_index
!= -1) {
956 if (myErr
== ESRCH
) {
960 scope_index
= 0; // so we'll just use the cache result
961 scope_idx_start
= ids_to_cache
;
963 myResult
= (myErr
== 0) ? 1 : 0;
964 goto ExitThisRoutine
;
970 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
971 if (scope_index
== -1)
973 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
974 scope_idx_start
= ids_to_cache
;
978 /* remember which parents we want to cache */
979 if (ids_to_cache
< CACHE_LEVELS
) {
980 parent_ids
[ids_to_cache
] = thisNodeID
;
983 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
984 if (bitmap
&& map_size
) {
985 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
989 /* do the lookup (checks the cnode hash, then the catalog) */
990 myErr
= do_attr_lookup(hfsmp
, cache
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
992 goto ExitThisRoutine
; /* no access */
995 /* Root always gets access. */
996 if (suser(myp_ucred
, NULL
) == 0) {
997 thisNodeID
= catkey
.hfsPlus
.parentID
;
1002 // if the thing has acl's, do the full permission check
1003 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1006 /* get the vnode for this cnid */
1007 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0);
1010 goto ExitThisRoutine
;
1013 thisNodeID
= VTOC(vp
)->c_parentcnid
;
1015 hfs_unlock(VTOC(vp
));
1017 if (vnode_vtype(vp
) == VDIR
) {
1018 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
1020 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
1026 goto ExitThisRoutine
;
1031 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1032 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
1033 myp_ucred
, theProcPtr
);
1035 if (cnattr
.ca_mode
& S_IFDIR
) {
1036 flags
= R_OK
| X_OK
;
1040 if ( (myPerms
& flags
) != flags
) {
1043 goto ExitThisRoutine
; /* no access */
1046 /* up the hierarchy we go */
1047 thisNodeID
= catkey
.hfsPlus
.parentID
;
1051 /* if here, we have access to this node */
1055 if (parents
&& myErr
== 0 && scope_index
== -1) {
1064 /* cache the parent directory(ies) */
1065 for (i
= 0; i
< ids_to_cache
; i
++) {
1066 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
1067 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
1069 add_node(cache
, -1, parent_ids
[i
], myErr
);
1077 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
1078 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
1083 * NOTE: on entry, the vnode is locked. Incase this vnode
1084 * happens to be in our list of file_ids, we'll note it
1085 * avoid calling hfs_chashget_nowait() on that id as that
1086 * will cause a "locking against myself" panic.
1088 Boolean check_leaf
= true;
1090 struct user64_ext_access_t
*user_access_structp
;
1091 struct user64_ext_access_t tmp_user_access
;
1092 struct access_cache cache
;
1094 int error
= 0, prev_parent_check_ok
=1;
1098 unsigned int num_files
= 0;
1100 int num_parents
= 0;
1104 cnid_t
*parents
=NULL
;
1108 cnid_t prevParent_cnid
= 0;
1109 unsigned int myPerms
;
1111 struct cat_attr cnattr
;
1113 struct cnode
*skip_cp
= VTOC(vp
);
1114 kauth_cred_t cred
= vfs_context_ucred(context
);
1115 proc_t p
= vfs_context_proc(context
);
1117 is64bit
= proc_is64bit(p
);
1119 /* initialize the local cache and buffers */
1120 cache
.numcached
= 0;
1121 cache
.cachehits
= 0;
1123 cache
.acache
= NULL
;
1124 cache
.haveaccess
= NULL
;
1126 /* struct copyin done during dispatch... need to copy file_id array separately */
1127 if (ap
->a_data
== NULL
) {
1129 goto err_exit_bulk_access
;
1133 if (arg_size
!= sizeof(struct user64_ext_access_t
)) {
1135 goto err_exit_bulk_access
;
1138 user_access_structp
= (struct user64_ext_access_t
*)ap
->a_data
;
1140 } else if (arg_size
== sizeof(struct user32_access_t
)) {
1141 struct user32_access_t
*accessp
= (struct user32_access_t
*)ap
->a_data
;
1143 // convert an old style bulk-access struct to the new style
1144 tmp_user_access
.flags
= accessp
->flags
;
1145 tmp_user_access
.num_files
= accessp
->num_files
;
1146 tmp_user_access
.map_size
= 0;
1147 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1148 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1149 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1150 tmp_user_access
.num_parents
= 0;
1151 user_access_structp
= &tmp_user_access
;
1153 } else if (arg_size
== sizeof(struct user32_ext_access_t
)) {
1154 struct user32_ext_access_t
*accessp
= (struct user32_ext_access_t
*)ap
->a_data
;
1156 // up-cast from a 32-bit version of the struct
1157 tmp_user_access
.flags
= accessp
->flags
;
1158 tmp_user_access
.num_files
= accessp
->num_files
;
1159 tmp_user_access
.map_size
= accessp
->map_size
;
1160 tmp_user_access
.num_parents
= accessp
->num_parents
;
1162 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1163 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1164 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1165 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1167 user_access_structp
= &tmp_user_access
;
1170 goto err_exit_bulk_access
;
1173 map_size
= user_access_structp
->map_size
;
1175 num_files
= user_access_structp
->num_files
;
1177 num_parents
= user_access_structp
->num_parents
;
1179 if (num_files
< 1) {
1180 goto err_exit_bulk_access
;
1182 if (num_files
> 1024) {
1184 goto err_exit_bulk_access
;
1187 if (num_parents
> 1024) {
1189 goto err_exit_bulk_access
;
1192 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1193 access
= (short *) kalloc(sizeof(short) * num_files
);
1195 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1199 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1202 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1203 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1205 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1207 kfree(file_ids
, sizeof(int) * num_files
);
1210 kfree(bitmap
, sizeof(char) * map_size
);
1213 kfree(access
, sizeof(short) * num_files
);
1216 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1218 if (cache
.haveaccess
) {
1219 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1222 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1227 // make sure the bitmap is zero'ed out...
1229 bzero(bitmap
, (sizeof(char) * map_size
));
1232 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1233 num_files
* sizeof(int)))) {
1234 goto err_exit_bulk_access
;
1238 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1239 num_parents
* sizeof(cnid_t
)))) {
1240 goto err_exit_bulk_access
;
1244 flags
= user_access_structp
->flags
;
1245 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1249 /* check if we've been passed leaf node ids or parent ids */
1250 if (flags
& PARENT_IDS_FLAG
) {
1254 /* Check access to each file_id passed in */
1255 for (i
= 0; i
< num_files
; i
++) {
1257 cnid
= (cnid_t
) file_ids
[i
];
1259 /* root always has access */
1260 if ((!parents
) && (!suser(cred
, NULL
))) {
1266 /* do the lookup (checks the cnode hash, then the catalog) */
1267 error
= do_attr_lookup(hfsmp
, &cache
, cnid
, skip_cp
, &catkey
, &cnattr
);
1269 access
[i
] = (short) error
;
1274 // Check if the leaf matches one of the parent scopes
1275 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1276 if (leaf_index
>= 0 && parents
[leaf_index
] == cnid
)
1277 prev_parent_check_ok
= 0;
1278 else if (leaf_index
>= 0)
1279 prev_parent_check_ok
= 1;
1282 // if the thing has acl's, do the full permission check
1283 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1286 /* get the vnode for this cnid */
1287 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0);
1293 hfs_unlock(VTOC(cvp
));
1295 if (vnode_vtype(cvp
) == VDIR
) {
1296 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1298 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1307 /* before calling CheckAccess(), check the target file for read access */
1308 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1309 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1311 /* fail fast if no access */
1312 if ((myPerms
& flags
) == 0) {
1318 /* we were passed an array of parent ids */
1319 catkey
.hfsPlus
.parentID
= cnid
;
1322 /* if the last guy had the same parent and had access, we're done */
1323 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0 && prev_parent_check_ok
) {
1329 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1330 skip_cp
, p
, cred
, context
,bitmap
, map_size
, parents
, num_parents
);
1332 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1333 access
[i
] = 0; // have access.. no errors to report
1335 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1338 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1341 /* copyout the access array */
1342 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1343 num_files
* sizeof (short)))) {
1344 goto err_exit_bulk_access
;
1346 if (map_size
&& bitmap
) {
1347 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1348 map_size
* sizeof (char)))) {
1349 goto err_exit_bulk_access
;
1354 err_exit_bulk_access
:
1356 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1359 kfree(file_ids
, sizeof(int) * num_files
);
1361 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1363 kfree(bitmap
, sizeof(char) * map_size
);
1365 kfree(access
, sizeof(short) * num_files
);
1367 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1368 if (cache
.haveaccess
)
1369 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1375 /* end "bulk-access" support */
1379 * Callback for use with freeze ioctl.
1382 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1384 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1390 * Control filesystem operating characteristics.
1393 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1398 vfs_context_t a_context;
1401 struct vnode
* vp
= ap
->a_vp
;
1402 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1403 vfs_context_t context
= ap
->a_context
;
1404 kauth_cred_t cred
= vfs_context_ucred(context
);
1405 proc_t p
= vfs_context_proc(context
);
1406 struct vfsstatfs
*vfsp
;
1408 off_t jnl_start
, jnl_size
;
1409 struct hfs_journal_info
*jip
;
1412 off_t uncompressed_size
= -1;
1413 int decmpfs_error
= 0;
1415 if (ap
->a_command
== F_RDADVISE
) {
1416 /* we need to inspect the decmpfs state of the file as early as possible */
1417 compressed
= hfs_file_is_compressed(VTOC(vp
), 0);
1419 if (VNODE_IS_RSRC(vp
)) {
1420 /* if this is the resource fork, treat it as if it were empty */
1421 uncompressed_size
= 0;
1423 decmpfs_error
= hfs_uncompressed_size_of_compressed_file(NULL
, vp
, 0, &uncompressed_size
, 0);
1424 if (decmpfs_error
!= 0) {
1425 /* failed to get the uncompressed size, we'll check for this later */
1426 uncompressed_size
= -1;
1431 #endif /* HFS_COMPRESSION */
1433 is64bit
= proc_is64bit(p
);
1435 switch (ap
->a_command
) {
1439 struct vnode
*file_vp
;
1445 /* Caller must be owner of file system. */
1446 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1447 if (suser(cred
, NULL
) &&
1448 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1451 /* Target vnode must be file system's root. */
1452 if (!vnode_isvroot(vp
)) {
1455 bufptr
= (char *)ap
->a_data
;
1456 cnid
= strtoul(bufptr
, NULL
, 10);
1458 /* We need to call hfs_vfs_vget to leverage the code that will
1459 * fix the origin list for us if needed, as opposed to calling
1460 * hfs_vget, since we will need the parent for build_path call.
1463 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1466 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, 0, context
);
1480 /* Caller must be owner of file system. */
1481 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1482 if (suser(cred
, NULL
) &&
1483 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1486 /* Target vnode must be file system's root. */
1487 if (!vnode_isvroot(vp
)) {
1490 linkfileid
= *(cnid_t
*)ap
->a_data
;
1491 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1494 if ((error
= hfs_lookuplink(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1497 if (ap
->a_command
== HFS_NEXT_LINK
) {
1498 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1500 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1505 case HFS_RESIZE_PROGRESS
: {
1507 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1508 if (suser(cred
, NULL
) &&
1509 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1510 return (EACCES
); /* must be owner of file system */
1512 if (!vnode_isvroot(vp
)) {
1515 /* file system must not be mounted read-only */
1516 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1520 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1523 case HFS_RESIZE_VOLUME
: {
1527 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1528 if (suser(cred
, NULL
) &&
1529 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1530 return (EACCES
); /* must be owner of file system */
1532 if (!vnode_isvroot(vp
)) {
1536 /* filesystem must not be mounted read only */
1537 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1540 newsize
= *(u_int64_t
*)ap
->a_data
;
1541 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1543 if (newsize
> cursize
) {
1544 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1545 } else if (newsize
< cursize
) {
1546 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1551 case HFS_CHANGE_NEXT_ALLOCATION
: {
1552 int error
= 0; /* Assume success */
1555 if (vnode_vfsisrdonly(vp
)) {
1558 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1559 if (suser(cred
, NULL
) &&
1560 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1561 return (EACCES
); /* must be owner of file system */
1563 if (!vnode_isvroot(vp
)) {
1566 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1567 location
= *(u_int32_t
*)ap
->a_data
;
1568 if ((location
>= hfsmp
->allocLimit
) &&
1569 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1571 goto fail_change_next_allocation
;
1573 /* Return previous value. */
1574 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1575 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1576 /* On magic value for location, set nextAllocation to next block
1577 * after metadata zone and set flag in mount structure to indicate
1578 * that nextAllocation should not be updated again.
1580 if (hfsmp
->hfs_metazone_end
!= 0) {
1581 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1583 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1585 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1586 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1588 MarkVCBDirty(hfsmp
);
1589 fail_change_next_allocation
:
1590 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1594 #ifdef HFS_SPARSE_DEV
1595 case HFS_SETBACKINGSTOREINFO
: {
1596 struct vnode
* bsfs_rootvp
;
1597 struct vnode
* di_vp
;
1598 struct hfs_backingstoreinfo
*bsdata
;
1601 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1604 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1607 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1608 if (suser(cred
, NULL
) &&
1609 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1610 return (EACCES
); /* must be owner of file system */
1612 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1613 if (bsdata
== NULL
) {
1616 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1619 if ((error
= vnode_getwithref(di_vp
))) {
1620 file_drop(bsdata
->backingfd
);
1624 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1625 (void)vnode_put(di_vp
);
1626 file_drop(bsdata
->backingfd
);
1631 * Obtain the backing fs root vnode and keep a reference
1632 * on it. This reference will be dropped in hfs_unmount.
1634 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1636 (void)vnode_put(di_vp
);
1637 file_drop(bsdata
->backingfd
);
1640 vnode_ref(bsfs_rootvp
);
1641 vnode_put(bsfs_rootvp
);
1643 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1644 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1645 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1646 hfsmp
->hfs_sparsebandblks
*= 4;
1648 vfs_markdependency(hfsmp
->hfs_mp
);
1651 * If the sparse image is on a sparse image file (as opposed to a sparse
1652 * bundle), then we may need to limit the free space to the maximum size
1653 * of a file on that volume. So we query (using pathconf), and if we get
1654 * a meaningful result, we cache the number of blocks for later use in
1657 hfsmp
->hfs_backingfs_maxblocks
= 0;
1658 if (vnode_vtype(di_vp
) == VREG
) {
1661 terr
= vn_pathconf(di_vp
, _PC_FILESIZEBITS
, &hostbits
, context
);
1662 if (terr
== 0 && hostbits
!= 0 && hostbits
< 64) {
1663 u_int64_t hostfilesizemax
= ((u_int64_t
)1) << hostbits
;
1665 hfsmp
->hfs_backingfs_maxblocks
= hostfilesizemax
/ hfsmp
->blockSize
;
1669 (void)vnode_put(di_vp
);
1670 file_drop(bsdata
->backingfd
);
1673 case HFS_CLRBACKINGSTOREINFO
: {
1674 struct vnode
* tmpvp
;
1676 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1677 if (suser(cred
, NULL
) &&
1678 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1679 return (EACCES
); /* must be owner of file system */
1681 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1685 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1686 hfsmp
->hfs_backingfs_rootvp
) {
1688 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1689 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1690 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1691 hfsmp
->hfs_sparsebandblks
= 0;
1696 #endif /* HFS_SPARSE_DEV */
1701 mp
= vnode_mount(vp
);
1702 hfsmp
= VFSTOHFS(mp
);
1707 vfsp
= vfs_statfs(mp
);
1709 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
1710 !kauth_cred_issuser(cred
))
1713 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1715 // flush things before we get started to try and prevent
1716 // dirty data from being paged out while we're frozen.
1717 // note: can't do this after taking the lock as it will
1718 // deadlock against ourselves.
1719 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1720 hfs_global_exclusive_lock_acquire(hfsmp
);
1722 // DO NOT call hfs_journal_flush() because that takes a
1723 // shared lock on the global exclusive lock!
1724 journal_flush(hfsmp
->jnl
);
1726 // don't need to iterate on all vnodes, we just need to
1727 // wait for writes to the system files and the device vnode
1728 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1729 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1730 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1731 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1732 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1733 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1734 if (hfsmp
->hfs_attribute_vp
)
1735 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1736 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1738 hfsmp
->hfs_freezing_proc
= current_proc();
1744 vfsp
= vfs_statfs(vnode_mount(vp
));
1745 if (kauth_cred_getuid(cred
) != vfsp
->f_owner
&&
1746 !kauth_cred_issuser(cred
))
1749 // if we're not the one who froze the fs then we
1751 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1755 // NOTE: if you add code here, also go check the
1756 // code that "thaws" the fs in hfs_vnop_close()
1758 hfsmp
->hfs_freezing_proc
= NULL
;
1759 hfs_global_exclusive_lock_release(hfsmp
);
1760 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1765 case HFS_BULKACCESS_FSCTL
: {
1768 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1773 size
= sizeof(struct user64_access_t
);
1775 size
= sizeof(struct user32_access_t
);
1778 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1781 case HFS_EXT_BULKACCESS_FSCTL
: {
1784 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1789 size
= sizeof(struct user64_ext_access_t
);
1791 size
= sizeof(struct user32_ext_access_t
);
1794 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1797 case HFS_SETACLSTATE
: {
1800 if (ap
->a_data
== NULL
) {
1804 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1805 state
= *(int *)ap
->a_data
;
1807 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1810 // super-user can enable or disable acl's on a volume.
1811 // the volume owner can only enable acl's
1812 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1815 if (state
== 0 || state
== 1)
1816 return hfs_set_volxattr(hfsmp
, HFS_SETACLSTATE
, state
);
1821 case HFS_SET_XATTREXTENTS_STATE
: {
1824 if (ap
->a_data
== NULL
) {
1828 state
= *(int *)ap
->a_data
;
1830 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1834 /* Super-user can enable or disable extent-based extended
1835 * attribute support on a volume
1840 if (state
== 0 || state
== 1)
1841 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
1849 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1852 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1854 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
1855 hfs_unlock(VTOC(vp
));
1862 register struct cnode
*cp
;
1865 if (!vnode_isreg(vp
))
1868 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1872 * used by regression test to determine if
1873 * all the dirty pages (via write) have been cleaned
1874 * after a call to 'fsysnc'.
1876 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1883 register struct radvisory
*ra
;
1884 struct filefork
*fp
;
1887 if (!vnode_isreg(vp
))
1890 ra
= (struct radvisory
*)(ap
->a_data
);
1893 /* Protect against a size change. */
1894 hfs_lock_truncate(VTOC(vp
), TRUE
);
1897 if (compressed
&& (uncompressed_size
== -1)) {
1898 /* fetching the uncompressed size failed above, so return the error */
1899 error
= decmpfs_error
;
1900 } else if ((compressed
&& (ra
->ra_offset
>= uncompressed_size
)) ||
1901 (!compressed
&& (ra
->ra_offset
>= fp
->ff_size
))) {
1904 #else /* HFS_COMPRESSION */
1905 if (ra
->ra_offset
>= fp
->ff_size
) {
1908 #endif /* HFS_COMPRESSION */
1910 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1913 hfs_unlock_truncate(VTOC(vp
), TRUE
);
1917 case F_READBOOTSTRAP
:
1918 case F_WRITEBOOTSTRAP
:
1920 struct vnode
*devvp
= NULL
;
1921 user_fbootstraptransfer_t
*user_bootstrapp
;
1925 daddr64_t blockNumber
;
1926 u_int32_t blockOffset
;
1929 user_fbootstraptransfer_t user_bootstrap
;
1931 if (!vnode_isvroot(vp
))
1933 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1934 * to a user_fbootstraptransfer_t else we get a pointer to a
1935 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1937 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
1941 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1944 user32_fbootstraptransfer_t
*bootstrapp
= (user32_fbootstraptransfer_t
*)ap
->a_data
;
1945 user_bootstrapp
= &user_bootstrap
;
1946 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1947 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1948 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1951 if ((user_bootstrapp
->fbt_offset
< 0) || (user_bootstrapp
->fbt_offset
> 1024) ||
1952 (user_bootstrapp
->fbt_length
> 1024)) {
1956 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1959 devvp
= VTOHFS(vp
)->hfs_devvp
;
1960 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1961 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1962 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1963 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1965 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1967 while (uio_resid(auio
) > 0) {
1968 blockNumber
= uio_offset(auio
) / devBlockSize
;
1969 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1971 if (bp
) buf_brelse(bp
);
1976 blockOffset
= uio_offset(auio
) % devBlockSize
;
1977 xfersize
= devBlockSize
- blockOffset
;
1978 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1984 if (uio_rw(auio
) == UIO_WRITE
) {
1985 error
= VNOP_BWRITE(bp
);
1998 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
2001 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2004 *(user32_time_t
*)(ap
->a_data
) = (user32_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
2009 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME
:
2010 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_mount_time
;
2013 case SPOTLIGHT_FSCTL_GET_LAST_MTIME
:
2014 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_last_mounted_mtime
;
2017 case HFS_FSCTL_SET_VERY_LOW_DISK
:
2018 if (*(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_warninglimit
) {
2022 hfsmp
->hfs_freespace_notify_dangerlimit
= *(uint32_t *)ap
->a_data
;
2025 case HFS_FSCTL_SET_LOW_DISK
:
2026 if ( *(uint32_t *)ap
->a_data
>= hfsmp
->hfs_freespace_notify_desiredlevel
2027 || *(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_dangerlimit
) {
2032 hfsmp
->hfs_freespace_notify_warninglimit
= *(uint32_t *)ap
->a_data
;
2035 case HFS_FSCTL_SET_DESIRED_DISK
:
2036 if (*(uint32_t *)ap
->a_data
<= hfsmp
->hfs_freespace_notify_warninglimit
) {
2040 hfsmp
->hfs_freespace_notify_desiredlevel
= *(uint32_t *)ap
->a_data
;
2043 case HFS_VOLUME_STATUS
:
2044 *(uint32_t *)ap
->a_data
= hfsmp
->hfs_notification_conditions
;
2047 case HFS_SET_BOOT_INFO
:
2048 if (!vnode_isvroot(vp
))
2050 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
2051 return(EACCES
); /* must be superuser or owner of filesystem */
2052 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2055 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2056 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
2057 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2058 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
2061 case HFS_GET_BOOT_INFO
:
2062 if (!vnode_isvroot(vp
))
2064 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2065 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
2066 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2069 case HFS_MARK_BOOT_CORRUPT
:
2070 /* Mark the boot volume corrupt by setting
2071 * kHFSVolumeInconsistentBit in the volume header. This will
2072 * force fsck_hfs on next mount.
2078 /* Allowed only on the root vnode of the boot volume */
2079 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
2080 !vnode_isvroot(vp
)) {
2083 if (hfsmp
->hfs_flags
& HFS_READ_ONLY
) {
2086 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2087 hfs_mark_volume_inconsistent(hfsmp
);
2090 case HFS_FSCTL_GET_JOURNAL_INFO
:
2091 jip
= (struct hfs_journal_info
*)ap
->a_data
;
2096 if (hfsmp
->jnl
== NULL
) {
2100 jnl_start
= (off_t
)(hfsmp
->jnl_start
* HFSTOVCB(hfsmp
)->blockSize
) + (off_t
)HFSTOVCB(hfsmp
)->hfsPlusIOPosOffset
;
2101 jnl_size
= (off_t
)hfsmp
->jnl_size
;
2104 jip
->jstart
= jnl_start
;
2105 jip
->jsize
= jnl_size
;
2108 case HFS_SET_ALWAYS_ZEROFILL
: {
2109 struct cnode
*cp
= VTOC(vp
);
2111 if (*(int *)ap
->a_data
) {
2112 cp
->c_flag
|= C_ALWAYS_ZEROFILL
;
2114 cp
->c_flag
&= ~C_ALWAYS_ZEROFILL
;
2130 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
2132 struct vnop_select_args {
2137 vfs_context_t a_context;
2142 * We should really check to see if I/O is possible.
2148 * Converts a logical block number to a physical block, and optionally returns
2149 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2150 * The physical block number is based on the device block size, currently its 512.
2151 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2154 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
2156 struct filefork
*fp
= VTOF(vp
);
2157 struct hfsmount
*hfsmp
= VTOHFS(vp
);
2158 int retval
= E_NONE
;
2159 u_int32_t logBlockSize
;
2160 size_t bytesContAvail
= 0;
2161 off_t blockposition
;
2166 * Check for underlying vnode requests and ensure that logical
2167 * to physical mapping is requested.
2170 *vpp
= hfsmp
->hfs_devvp
;
2174 logBlockSize
= GetLogicalBlockSize(vp
);
2175 blockposition
= (off_t
)bn
* logBlockSize
;
2177 lockExtBtree
= overflow_extents(fp
);
2180 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
2182 retval
= MacToVFSError(
2183 MapFileBlockC (HFSTOVCB(hfsmp
),
2191 hfs_systemfile_unlock(hfsmp
, lockflags
);
2193 if (retval
== E_NONE
) {
2194 /* Figure out how many read ahead blocks there are */
2196 if (can_cluster(logBlockSize
)) {
2197 /* Make sure this result never goes negative: */
2198 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
2208 * Convert logical block number to file offset.
2211 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
2213 struct vnop_blktooff_args {
2220 if (ap
->a_vp
== NULL
)
2222 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
2228 * Convert file offset to logical block number.
2231 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
2233 struct vnop_offtoblk_args {
2236 daddr64_t *a_lblkno;
2240 if (ap
->a_vp
== NULL
)
2242 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
2248 * Map file offset to physical block number.
2250 * If this function is called for write operation, and if the file
2251 * had virtual blocks allocated (delayed allocation), real blocks
2252 * are allocated by calling ExtendFileC().
2254 * If this function is called for read operation, and if the file
2255 * had virtual blocks allocated (delayed allocation), no change
2256 * to the size of file is done, and if required, rangelist is
2257 * searched for mapping.
2259 * System file cnodes are expected to be locked (shared or exclusive).
2262 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
2264 struct vnop_blockmap_args {
2272 vfs_context_t a_context;
2276 struct vnode
*vp
= ap
->a_vp
;
2278 struct filefork
*fp
;
2279 struct hfsmount
*hfsmp
;
2280 size_t bytesContAvail
= 0;
2281 int retval
= E_NONE
;
2284 struct rl_entry
*invalid_range
;
2285 enum rl_overlaptype overlaptype
;
2290 if (VNODE_IS_RSRC(vp
)) {
2291 /* allow blockmaps to the resource fork */
2293 if ( hfs_file_is_compressed(VTOC(vp
), 1) ) { /* 1 == don't take the cnode lock */
2294 int state
= decmpfs_cnode_get_vnode_state(VTOCMP(vp
));
2296 case FILE_IS_COMPRESSED
:
2298 case FILE_IS_CONVERTING
:
2299 /* if FILE_IS_CONVERTING, we allow blockmap */
2302 printf("invalid state %d for compressed file\n", state
);
2307 #endif /* HFS_COMPRESSION */
2309 /* Do not allow blockmap operation on a directory */
2310 if (vnode_isdir(vp
)) {
2315 * Check for underlying vnode requests and ensure that logical
2316 * to physical mapping is requested.
2318 if (ap
->a_bpn
== NULL
)
2321 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2322 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2323 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2332 /* Check virtual blocks only when performing write operation */
2333 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2334 if (hfs_start_transaction(hfsmp
) != 0) {
2340 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2342 } else if (overflow_extents(fp
)) {
2343 syslocks
= SFL_EXTENTS
;
2347 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2350 * Check for any delayed allocations.
2352 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2354 u_int32_t loanedBlocks
;
2357 // Make sure we have a transaction. It's possible
2358 // that we came in and fp->ff_unallocblocks was zero
2359 // but during the time we blocked acquiring the extents
2360 // btree, ff_unallocblocks became non-zero and so we
2361 // will need to start a transaction.
2363 if (started_tr
== 0) {
2365 hfs_systemfile_unlock(hfsmp
, lockflags
);
2372 * Note: ExtendFileC will Release any blocks on loan and
2373 * aquire real blocks. So we ask to extend by zero bytes
2374 * since ExtendFileC will account for the virtual blocks.
2377 loanedBlocks
= fp
->ff_unallocblocks
;
2378 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2379 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2382 fp
->ff_unallocblocks
= loanedBlocks
;
2383 cp
->c_blocks
+= loanedBlocks
;
2384 fp
->ff_blocks
+= loanedBlocks
;
2386 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2387 hfsmp
->loanedBlocks
+= loanedBlocks
;
2388 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2390 hfs_systemfile_unlock(hfsmp
, lockflags
);
2391 cp
->c_flag
|= C_MODIFIED
;
2393 (void) hfs_update(vp
, TRUE
);
2394 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2396 hfs_end_transaction(hfsmp
);
2403 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2404 ap
->a_bpn
, &bytesContAvail
);
2406 hfs_systemfile_unlock(hfsmp
, lockflags
);
2411 (void) hfs_update(vp
, TRUE
);
2412 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2413 hfs_end_transaction(hfsmp
);
2417 /* On write, always return error because virtual blocks, if any,
2418 * should have been allocated in ExtendFileC(). We do not
2419 * allocate virtual blocks on read, therefore return error
2420 * only if no virtual blocks are allocated. Otherwise we search
2421 * rangelist for zero-fills
2423 if ((MacToVFSError(retval
) != ERANGE
) ||
2424 (ap
->a_flags
& VNODE_WRITE
) ||
2425 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2429 /* Validate if the start offset is within logical file size */
2430 if (ap
->a_foffset
> fp
->ff_size
) {
2434 /* Searching file extents has failed for read operation, therefore
2435 * search rangelist for any uncommitted holes in the file.
2437 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2438 ap
->a_foffset
+ (off_t
)(ap
->a_size
- 1),
2440 switch(overlaptype
) {
2441 case RL_OVERLAPISCONTAINED
:
2442 /* start_offset <= rl_start, end_offset >= rl_end */
2443 if (ap
->a_foffset
!= invalid_range
->rl_start
) {
2446 case RL_MATCHINGOVERLAP
:
2447 /* start_offset = rl_start, end_offset = rl_end */
2448 case RL_OVERLAPCONTAINSRANGE
:
2449 /* start_offset >= rl_start, end_offset <= rl_end */
2450 case RL_OVERLAPSTARTSBEFORE
:
2451 /* start_offset > rl_start, end_offset >= rl_start */
2452 if ((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) {
2453 bytesContAvail
= (invalid_range
->rl_end
+ 1) - ap
->a_foffset
;
2455 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2457 if (bytesContAvail
> ap
->a_size
) {
2458 bytesContAvail
= ap
->a_size
;
2460 *ap
->a_bpn
= (daddr64_t
)-1;
2463 case RL_OVERLAPENDSAFTER
:
2464 /* start_offset < rl_start, end_offset < rl_end */
2471 /* MapFileC() found a valid extent in the filefork. Search the
2472 * mapping information further for invalid file ranges
2474 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2475 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2477 if (overlaptype
!= RL_NOOVERLAP
) {
2478 switch(overlaptype
) {
2479 case RL_MATCHINGOVERLAP
:
2480 case RL_OVERLAPCONTAINSRANGE
:
2481 case RL_OVERLAPSTARTSBEFORE
:
2482 /* There's no valid block for this byte offset */
2483 *ap
->a_bpn
= (daddr64_t
)-1;
2484 /* There's no point limiting the amount to be returned
2485 * if the invalid range that was hit extends all the way
2486 * to the EOF (i.e. there's no valid bytes between the
2487 * end of this range and the file's EOF):
2489 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2490 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2491 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2495 case RL_OVERLAPISCONTAINED
:
2496 case RL_OVERLAPENDSAFTER
:
2497 /* The range of interest hits an invalid block before the end: */
2498 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2499 /* There's actually no valid information to be had starting here: */
2500 *ap
->a_bpn
= (daddr64_t
)-1;
2501 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2502 ((size_t)(invalid_range
->rl_end
+ 1 - ap
->a_foffset
) < bytesContAvail
)) {
2503 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2506 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2513 if (bytesContAvail
> ap
->a_size
)
2514 bytesContAvail
= ap
->a_size
;
2520 *ap
->a_run
= bytesContAvail
;
2523 *(int *)ap
->a_poff
= 0;
2529 return (MacToVFSError(retval
));
2534 * prepare and issue the I/O
2535 * buf_strategy knows how to deal
2536 * with requests that require
2540 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2542 buf_t bp
= ap
->a_bp
;
2543 vnode_t vp
= buf_vnode(bp
);
2545 return (buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
));
2549 hfs_minorupdate(struct vnode
*vp
) {
2550 struct cnode
*cp
= VTOC(vp
);
2551 cp
->c_flag
&= ~C_MODIFIED
;
2552 cp
->c_touch_acctime
= 0;
2553 cp
->c_touch_chgtime
= 0;
2554 cp
->c_touch_modtime
= 0;
2560 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipupdate
, vfs_context_t context
)
2562 register struct cnode
*cp
= VTOC(vp
);
2563 struct filefork
*fp
= VTOF(vp
);
2564 struct proc
*p
= vfs_context_proc(context
);;
2565 kauth_cred_t cred
= vfs_context_ucred(context
);
2568 off_t actualBytesAdded
;
2570 u_int32_t fileblocks
;
2572 struct hfsmount
*hfsmp
;
2575 blksize
= VTOVCB(vp
)->blockSize
;
2576 fileblocks
= fp
->ff_blocks
;
2577 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2579 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
2580 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2585 /* This should only happen with a corrupt filesystem */
2586 if ((off_t
)fp
->ff_size
< 0)
2589 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
2596 /* Files that are changing size are not hot file candidates. */
2597 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
2598 fp
->ff_bytesread
= 0;
2602 * We cannot just check if fp->ff_size == length (as an optimization)
2603 * since there may be extra physical blocks that also need truncation.
2606 if ((retval
= hfs_getinoquota(cp
)))
2611 * Lengthen the size of the file. We must ensure that the
2612 * last byte of the file is allocated. Since the smallest
2613 * value of ff_size is 0, length will be at least 1.
2615 if (length
> (off_t
)fp
->ff_size
) {
2617 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
2623 * If we don't have enough physical space then
2624 * we need to extend the physical size.
2626 if (length
> filebytes
) {
2628 u_int32_t blockHint
= 0;
2630 /* All or nothing and don't round up to clumpsize. */
2631 eflags
= kEFAllMask
| kEFNoClumpMask
;
2633 if (cred
&& suser(cred
, NULL
) != 0)
2634 eflags
|= kEFReserveMask
; /* keep a reserve */
2637 * Allocate Journal and Quota files in metadata zone.
2639 if (filebytes
== 0 &&
2640 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
2641 hfs_virtualmetafile(cp
)) {
2642 eflags
|= kEFMetadataMask
;
2643 blockHint
= hfsmp
->hfs_metazone_start
;
2645 if (hfs_start_transaction(hfsmp
) != 0) {
2650 /* Protect extents b-tree and allocation bitmap */
2651 lockflags
= SFL_BITMAP
;
2652 if (overflow_extents(fp
))
2653 lockflags
|= SFL_EXTENTS
;
2654 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2656 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2657 bytesToAdd
= length
- filebytes
;
2658 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
2663 &actualBytesAdded
));
2665 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2666 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
2667 if (length
> filebytes
)
2673 hfs_systemfile_unlock(hfsmp
, lockflags
);
2677 (void) hfs_minorupdate(vp
);
2680 (void) hfs_update(vp
, TRUE
);
2681 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2685 hfs_end_transaction(hfsmp
);
2690 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2691 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2694 if (!(flags
& IO_NOZEROFILL
)) {
2695 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
2696 struct rl_entry
*invalid_range
;
2699 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
2700 if (length
< zero_limit
) zero_limit
= length
;
2702 if (length
> (off_t
)fp
->ff_size
) {
2705 /* Extending the file: time to fill out the current last page w. zeroes? */
2706 if ((fp
->ff_size
& PAGE_MASK_64
) &&
2707 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2708 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2710 /* There's some valid data at the start of the (current) last page
2711 of the file, so zero out the remainder of that page to ensure the
2712 entire page contains valid data. Since there is no invalid range
2713 possible past the (current) eof, there's no need to remove anything
2714 from the invalid range list before calling cluster_write(): */
2716 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2717 fp
->ff_size
, (off_t
)0,
2718 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2719 hfs_lock(cp
, HFS_FORCE_LOCK
);
2720 if (retval
) goto Err_Exit
;
2722 /* Merely invalidate the remaining area, if necessary: */
2723 if (length
> zero_limit
) {
2725 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
2726 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2729 /* The page containing the (current) eof is invalid: just add the
2730 remainder of the page to the invalid list, along with the area
2731 being newly allocated:
2734 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
2735 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2739 panic("hfs_truncate: invoked on non-UBC object?!");
2742 cp
->c_touch_modtime
= TRUE
;
2743 fp
->ff_size
= length
;
2745 } else { /* Shorten the size of the file */
2747 if ((off_t
)fp
->ff_size
> length
) {
2748 /* Any space previously marked as invalid is now irrelevant: */
2749 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2753 * Account for any unmapped blocks. Note that the new
2754 * file length can still end up with unmapped blocks.
2756 if (fp
->ff_unallocblocks
> 0) {
2757 u_int32_t finalblks
;
2758 u_int32_t loanedBlocks
;
2760 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2762 loanedBlocks
= fp
->ff_unallocblocks
;
2763 cp
->c_blocks
-= loanedBlocks
;
2764 fp
->ff_blocks
-= loanedBlocks
;
2765 fp
->ff_unallocblocks
= 0;
2767 hfsmp
->loanedBlocks
-= loanedBlocks
;
2769 finalblks
= (length
+ blksize
- 1) / blksize
;
2770 if (finalblks
> fp
->ff_blocks
) {
2771 /* calculate required unmapped blocks */
2772 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2773 hfsmp
->loanedBlocks
+= loanedBlocks
;
2775 fp
->ff_unallocblocks
= loanedBlocks
;
2776 cp
->c_blocks
+= loanedBlocks
;
2777 fp
->ff_blocks
+= loanedBlocks
;
2779 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2783 * For a TBE process the deallocation of the file blocks is
2784 * delayed until the file is closed. And hfs_close calls
2785 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2786 * isn't set, we make sure this isn't a TBE process.
2788 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2790 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2792 if (hfs_start_transaction(hfsmp
) != 0) {
2797 if (fp
->ff_unallocblocks
== 0) {
2798 /* Protect extents b-tree and allocation bitmap */
2799 lockflags
= SFL_BITMAP
;
2800 if (overflow_extents(fp
))
2801 lockflags
|= SFL_EXTENTS
;
2802 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2804 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2805 (FCB
*)fp
, length
, false));
2807 hfs_systemfile_unlock(hfsmp
, lockflags
);
2811 fp
->ff_size
= length
;
2814 (void) hfs_minorupdate(vp
);
2817 (void) hfs_update(vp
, TRUE
);
2818 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2821 hfs_end_transaction(hfsmp
);
2823 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2827 /* These are bytesreleased */
2828 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2831 /* Only set update flag if the logical length changes */
2832 if ((off_t
)fp
->ff_size
!= length
)
2833 cp
->c_touch_modtime
= TRUE
;
2834 fp
->ff_size
= length
;
2836 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
2837 if (!vfs_context_issuser(context
)) {
2838 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2843 retval
= hfs_minorupdate(vp
);
2846 cp
->c_touch_chgtime
= TRUE
; /* status changed */
2847 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
2848 retval
= hfs_update(vp
, MNT_WAIT
);
2851 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2852 -1, -1, -1, retval
, 0);
2857 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2858 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2866 * Truncate a cnode to at most length size, freeing (or adding) the
2871 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2872 int skipupdate
, vfs_context_t context
)
2874 struct filefork
*fp
= VTOF(vp
);
2876 u_int32_t fileblocks
;
2877 int blksize
, error
= 0;
2878 struct cnode
*cp
= VTOC(vp
);
2880 /* Cannot truncate an HFS directory! */
2881 if (vnode_isdir(vp
)) {
2884 /* A swap file cannot change size. */
2885 if (vnode_isswap(vp
) && (length
!= 0)) {
2889 blksize
= VTOVCB(vp
)->blockSize
;
2890 fileblocks
= fp
->ff_blocks
;
2891 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2894 // Have to do this here so that we don't wind up with
2895 // i/o pending for blocks that are about to be released
2896 // if we truncate the file.
2898 // If skipsetsize is set, then the caller is responsible
2899 // for the ubc_setsize.
2901 // Even if skipsetsize is set, if the length is zero we
2902 // want to call ubc_setsize() because as of SnowLeopard
2903 // it will no longer cause any page-ins and it will drop
2904 // any dirty pages so that we don't do any i/o that we
2905 // don't have to. This also prevents a race where i/o
2906 // for truncated blocks may overwrite later data if the
2907 // blocks get reallocated to a different file.
2909 if (!skipsetsize
|| length
== 0)
2910 ubc_setsize(vp
, length
);
2912 // have to loop truncating or growing files that are
2913 // really big because otherwise transactions can get
2914 // enormous and consume too many kernel resources.
2916 if (length
< filebytes
) {
2917 while (filebytes
> length
) {
2918 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2919 filebytes
-= HFS_BIGFILE_SIZE
;
2923 cp
->c_flag
|= C_FORCEUPDATE
;
2924 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipupdate
, context
);
2928 } else if (length
> filebytes
) {
2929 while (filebytes
< length
) {
2930 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2931 filebytes
+= HFS_BIGFILE_SIZE
;
2935 cp
->c_flag
|= C_FORCEUPDATE
;
2936 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipupdate
, context
);
2940 } else /* Same logical size */ {
2942 error
= do_hfs_truncate(vp
, length
, flags
, skipupdate
, context
);
2944 /* Files that are changing size are not hot file candidates. */
2945 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2946 fp
->ff_bytesread
= 0;
2955 * Preallocate file storage space.
2958 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2962 off_t *a_bytesallocated;
2964 vfs_context_t a_context;
2967 struct vnode
*vp
= ap
->a_vp
;
2969 struct filefork
*fp
;
2971 off_t length
= ap
->a_length
;
2973 off_t moreBytesRequested
;
2974 off_t actualBytesAdded
;
2976 u_int32_t fileblocks
;
2977 int retval
, retval2
;
2978 u_int32_t blockHint
;
2979 u_int32_t extendFlags
; /* For call to ExtendFileC */
2980 struct hfsmount
*hfsmp
;
2981 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2984 *(ap
->a_bytesallocated
) = 0;
2986 if (!vnode_isreg(vp
))
2988 if (length
< (off_t
)0)
2993 hfs_lock_truncate(cp
, TRUE
);
2995 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3003 fileblocks
= fp
->ff_blocks
;
3004 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
3006 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
3011 /* Fill in the flags word for the call to Extend the file */
3013 extendFlags
= kEFNoClumpMask
;
3014 if (ap
->a_flags
& ALLOCATECONTIG
)
3015 extendFlags
|= kEFContigMask
;
3016 if (ap
->a_flags
& ALLOCATEALL
)
3017 extendFlags
|= kEFAllMask
;
3018 if (cred
&& suser(cred
, NULL
) != 0)
3019 extendFlags
|= kEFReserveMask
;
3020 if (hfs_virtualmetafile(cp
))
3021 extendFlags
|= kEFMetadataMask
;
3025 startingPEOF
= filebytes
;
3027 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
3028 length
+= filebytes
;
3029 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
3030 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
3032 /* If no changes are necesary, then we're done */
3033 if (filebytes
== length
)
3037 * Lengthen the size of the file. We must ensure that the
3038 * last byte of the file is allocated. Since the smallest
3039 * value of filebytes is 0, length will be at least 1.
3041 if (length
> filebytes
) {
3042 off_t total_bytes_added
= 0, orig_request_size
;
3044 orig_request_size
= moreBytesRequested
= length
- filebytes
;
3047 retval
= hfs_chkdq(cp
,
3048 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
3055 * Metadata zone checks.
3057 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
3059 * Allocate Journal and Quota files in metadata zone.
3061 if (hfs_virtualmetafile(cp
)) {
3062 blockHint
= hfsmp
->hfs_metazone_start
;
3063 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
3064 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
3066 * Move blockHint outside metadata zone.
3068 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
3073 while ((length
> filebytes
) && (retval
== E_NONE
)) {
3074 off_t bytesRequested
;
3076 if (hfs_start_transaction(hfsmp
) != 0) {
3081 /* Protect extents b-tree and allocation bitmap */
3082 lockflags
= SFL_BITMAP
;
3083 if (overflow_extents(fp
))
3084 lockflags
|= SFL_EXTENTS
;
3085 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3087 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
3088 bytesRequested
= HFS_BIGFILE_SIZE
;
3090 bytesRequested
= moreBytesRequested
;
3093 if (extendFlags
& kEFContigMask
) {
3094 // if we're on a sparse device, this will force it to do a
3095 // full scan to find the space needed.
3096 hfsmp
->hfs_flags
&= ~HFS_DID_CONTIG_SCAN
;
3099 retval
= MacToVFSError(ExtendFileC(vcb
,
3104 &actualBytesAdded
));
3106 if (retval
== E_NONE
) {
3107 *(ap
->a_bytesallocated
) += actualBytesAdded
;
3108 total_bytes_added
+= actualBytesAdded
;
3109 moreBytesRequested
-= actualBytesAdded
;
3110 if (blockHint
!= 0) {
3111 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
3114 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3116 hfs_systemfile_unlock(hfsmp
, lockflags
);
3119 (void) hfs_update(vp
, TRUE
);
3120 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
3123 hfs_end_transaction(hfsmp
);
3128 * if we get an error and no changes were made then exit
3129 * otherwise we must do the hfs_update to reflect the changes
3131 if (retval
&& (startingPEOF
== filebytes
))
3135 * Adjust actualBytesAdded to be allocation block aligned, not
3136 * clump size aligned.
3137 * NOTE: So what we are reporting does not affect reality
3138 * until the file is closed, when we truncate the file to allocation
3141 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
3142 *(ap
->a_bytesallocated
) =
3143 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
3145 } else { /* Shorten the size of the file */
3147 if (fp
->ff_size
> length
) {
3149 * Any buffers that are past the truncation point need to be
3150 * invalidated (to maintain buffer cache consistency).
3154 retval
= hfs_truncate(vp
, length
, 0, 0, 0, ap
->a_context
);
3155 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
3158 * if we get an error and no changes were made then exit
3159 * otherwise we must do the hfs_update to reflect the changes
3161 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
3163 /* These are bytesreleased */
3164 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
3167 if (fp
->ff_size
> filebytes
) {
3168 fp
->ff_size
= filebytes
;
3171 ubc_setsize(vp
, fp
->ff_size
);
3172 hfs_lock(cp
, HFS_FORCE_LOCK
);
3177 cp
->c_touch_chgtime
= TRUE
;
3178 cp
->c_touch_modtime
= TRUE
;
3179 retval2
= hfs_update(vp
, MNT_WAIT
);
3184 hfs_unlock_truncate(cp
, TRUE
);
3191 * Pagein for HFS filesystem
3194 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
3196 struct vnop_pagein_args {
3199 vm_offset_t a_pl_offset,
3203 vfs_context_t a_context;
3207 vnode_t vp
= ap
->a_vp
;
3211 if (VNODE_IS_RSRC(vp
)) {
3212 /* allow pageins of the resource fork */
3214 int compressed
= hfs_file_is_compressed(VTOC(vp
), 1); /* 1 == don't take the cnode lock */
3216 error
= decmpfs_pagein_compressed(ap
, &compressed
, VTOCMP(vp
));
3219 /* successful page-in, update the access time */
3220 VTOC(vp
)->c_touch_acctime
= TRUE
;
3222 /* compressed files are not hot file candidates */
3223 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
3224 VTOF(vp
)->ff_bytesread
= 0;
3229 /* otherwise the file was converted back to a regular file while we were reading it */
3234 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
3235 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
3237 * Keep track of blocks read.
3239 if (!vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
3241 struct filefork
*fp
;
3243 int took_cnode_lock
= 0;
3248 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
3249 bytesread
= fp
->ff_size
;
3251 bytesread
= ap
->a_size
;
3253 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
3254 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
3255 hfs_lock(cp
, HFS_FORCE_LOCK
);
3256 took_cnode_lock
= 1;
3259 * If this file hasn't been seen since the start of
3260 * the current sampling period then start over.
3262 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
3265 fp
->ff_bytesread
= bytesread
;
3267 cp
->c_atime
= tv
.tv_sec
;
3269 fp
->ff_bytesread
+= bytesread
;
3271 cp
->c_touch_acctime
= TRUE
;
3272 if (took_cnode_lock
)
3279 * Pageout for HFS filesystem.
3282 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
3284 struct vnop_pageout_args {
3287 vm_offset_t a_pl_offset,
3291 vfs_context_t a_context;
3295 vnode_t vp
= ap
->a_vp
;
3297 struct filefork
*fp
;
3301 upl_page_info_t
* pl
;
3302 vm_offset_t a_pl_offset
;
3304 int is_pageoutv2
= 0;
3311 * Figure out where the file ends, for pageout purposes. If
3312 * ff_new_size > ff_size, then we're in the middle of extending the
3313 * file via a write, so it is safe (and necessary) that we be able
3314 * to pageout up to that point.
3316 filesize
= fp
->ff_size
;
3317 if (fp
->ff_new_size
> filesize
)
3318 filesize
= fp
->ff_new_size
;
3320 a_flags
= ap
->a_flags
;
3321 a_pl_offset
= ap
->a_pl_offset
;
3324 * we can tell if we're getting the new or old behavior from the UPL
3326 if ((upl
= ap
->a_pl
) == NULL
) {
3331 * we're in control of any UPL we commit
3332 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
3334 a_flags
&= ~UPL_NOCOMMIT
;
3338 * take truncate lock (shared) to guard against
3339 * zero-fill thru fsync interfering, but only for v2
3341 hfs_lock_truncate(cp
, 0);
3343 if (a_flags
& UPL_MSYNC
) {
3344 request_flags
= UPL_UBC_MSYNC
| UPL_RET_ONLY_DIRTY
;
3347 request_flags
= UPL_UBC_PAGEOUT
| UPL_RET_ONLY_DIRTY
;
3349 kret
= ubc_create_upl(vp
, ap
->a_f_offset
, ap
->a_size
, &upl
, &pl
, request_flags
);
3351 if ((kret
!= KERN_SUCCESS
) || (upl
== (upl_t
) NULL
)) {
3357 * from this point forward upl points at the UPL we're working with
3358 * it was either passed in or we succesfully created it
3362 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
3363 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
3364 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
3365 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
3366 * logic in vnode_pageout except that we need to do it after grabbing the truncate
3367 * lock in HFS so that we don't lock invert ourselves.
3369 * Note that we can still get into this function on behalf of the default pager with
3370 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
3371 * since fsync and other writing threads will grab the locks, then mark the
3372 * relevant pages as busy. But the pageout codepath marks the pages as busy,
3373 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
3374 * we do not try to grab anything for the pre-V2 case, which should only be accessed
3375 * by the paging/VM system.
3387 f_offset
= ap
->a_f_offset
;
3390 * Scan from the back to find the last page in the UPL, so that we
3391 * aren't looking at a UPL that may have already been freed by the
3392 * preceding aborts/completions.
3394 for (pg_index
= ((isize
) / PAGE_SIZE
); pg_index
> 0;) {
3395 if (upl_page_present(pl
, --pg_index
))
3397 if (pg_index
== 0) {
3398 ubc_upl_abort_range(upl
, 0, isize
, UPL_ABORT_FREE_ON_EMPTY
);
3404 * initialize the offset variables before we touch the UPL.
3405 * a_f_offset is the position into the file, in bytes
3406 * offset is the position into the UPL, in bytes
3407 * pg_index is the pg# of the UPL we're operating on.
3408 * isize is the offset into the UPL of the last non-clean page.
3410 isize
= ((pg_index
+ 1) * PAGE_SIZE
);
3419 if ( !upl_page_present(pl
, pg_index
)) {
3421 * we asked for RET_ONLY_DIRTY, so it's possible
3422 * to get back empty slots in the UPL.
3423 * just skip over them
3425 f_offset
+= PAGE_SIZE
;
3426 offset
+= PAGE_SIZE
;
3432 if ( !upl_dirty_page(pl
, pg_index
)) {
3433 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index
, upl
);
3437 * We know that we have at least one dirty page.
3438 * Now checking to see how many in a row we have
3441 xsize
= isize
- PAGE_SIZE
;
3444 if ( !upl_dirty_page(pl
, pg_index
+ num_of_pages
))
3449 xsize
= num_of_pages
* PAGE_SIZE
;
3451 if (!vnode_isswap(vp
)) {
3457 if (cp
->c_lockowner
!= current_thread()) {
3458 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3460 * we're in the v2 path, so we are the
3461 * owner of the UPL... we may have already
3462 * processed some of the UPL, so abort it
3463 * from the current working offset to the
3466 ubc_upl_abort_range(upl
,
3468 ap
->a_size
- offset
,
3469 UPL_ABORT_FREE_ON_EMPTY
);
3474 end_of_range
= f_offset
+ xsize
- 1;
3476 if (end_of_range
>= filesize
) {
3477 end_of_range
= (off_t
)(filesize
- 1);
3479 if (f_offset
< filesize
) {
3480 rl_remove(f_offset
, end_of_range
, &fp
->ff_invalidranges
);
3481 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
3487 if ((error
= cluster_pageout(vp
, upl
, offset
, f_offset
,
3488 xsize
, filesize
, a_flags
))) {
3495 pg_index
+= num_of_pages
;
3497 /* capture errnos bubbled out of cluster_pageout if they occurred */
3498 if (error_ret
!= 0) {
3501 } /* end block for v2 pageout behavior */
3503 if (!vnode_isswap(vp
)) {
3507 if (cp
->c_lockowner
!= current_thread()) {
3508 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
3509 if (!(a_flags
& UPL_NOCOMMIT
)) {
3510 ubc_upl_abort_range(upl
,
3513 UPL_ABORT_FREE_ON_EMPTY
);
3519 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
3521 if (end_of_range
>= filesize
) {
3522 end_of_range
= (off_t
)(filesize
- 1);
3524 if (ap
->a_f_offset
< filesize
) {
3525 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
3526 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
3534 * just call cluster_pageout for old pre-v2 behavior
3536 retval
= cluster_pageout(vp
, upl
, a_pl_offset
, ap
->a_f_offset
,
3537 ap
->a_size
, filesize
, a_flags
);
3541 * If data was written, update the modification time of the file.
3542 * If setuid or setgid bits are set and this process is not the
3543 * superuser then clear the setuid and setgid bits as a precaution
3544 * against tampering.
3547 cp
->c_touch_modtime
= TRUE
;
3548 cp
->c_touch_chgtime
= TRUE
;
3549 if ((cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
3550 (vfs_context_suser(ap
->a_context
) != 0)) {
3551 hfs_lock(cp
, HFS_FORCE_LOCK
);
3552 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
3559 /* release truncate lock (shared) */
3560 hfs_unlock_truncate(cp
, 0);
3566 * Intercept B-Tree node writes to unswap them if necessary.
3569 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
3572 register struct buf
*bp
= ap
->a_bp
;
3573 register struct vnode
*vp
= buf_vnode(bp
);
3574 BlockDescriptor block
;
3576 /* Trap B-Tree writes */
3577 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
3578 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
3579 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
3580 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
3583 * Swap and validate the node if it is in native byte order.
3584 * This is always be true on big endian, so we always validate
3585 * before writing here. On little endian, the node typically has
3586 * been swapped and validated when it was written to the journal,
3587 * so we won't do anything here.
3589 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
3590 /* Prepare the block pointer */
3591 block
.blockHeader
= bp
;
3592 block
.buffer
= (char *)buf_dataptr(bp
);
3593 block
.blockNum
= buf_lblkno(bp
);
3594 /* not found in cache ==> came from disk */
3595 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
3596 block
.blockSize
= buf_count(bp
);
3598 /* Endian un-swap B-Tree node */
3599 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
3601 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3605 /* This buffer shouldn't be locked anymore but if it is clear it */
3606 if ((buf_flags(bp
) & B_LOCKED
)) {
3608 if (VTOHFS(vp
)->jnl
) {
3609 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
3611 buf_clearflags(bp
, B_LOCKED
);
3613 retval
= vn_bwrite (ap
);
3619 * Relocate a file to a new location on disk
3620 * cnode must be locked on entry
3622 * Relocation occurs by cloning the file's data from its
3623 * current set of blocks to a new set of blocks. During
3624 * the relocation all of the blocks (old and new) are
3625 * owned by the file.
3632 * ----------------- -----------------
3633 * |///////////////| | | STEP 1 (acquire new blocks)
3634 * ----------------- -----------------
3637 * ----------------- -----------------
3638 * |///////////////| |///////////////| STEP 2 (clone data)
3639 * ----------------- -----------------
3643 * |///////////////| STEP 3 (head truncate blocks)
3647 * During steps 2 and 3 page-outs to file offsets less
3648 * than or equal to N are suspended.
3650 * During step 3 page-ins to the file get suspended.
3654 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
3658 struct filefork
*fp
;
3659 struct hfsmount
*hfsmp
;
3664 u_int32_t nextallocsave
;
3665 daddr64_t sector_a
, sector_b
;
3670 int took_trunc_lock
= 0;
3672 enum vtype vnodetype
;
3674 vnodetype
= vnode_vtype(vp
);
3675 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
3680 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
3686 if (fp
->ff_unallocblocks
)
3688 blksize
= hfsmp
->blockSize
;
3690 blockHint
= hfsmp
->nextAllocation
;
3692 if ((fp
->ff_size
> 0x7fffffff) ||
3693 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
3698 // We do not believe that this call to hfs_fsync() is
3699 // necessary and it causes a journal transaction
3700 // deadlock so we are removing it.
3702 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3703 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3708 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
3710 hfs_lock_truncate(cp
, TRUE
);
3711 /* Force lock since callers expects lock to be held. */
3712 if ((retval
= hfs_lock(cp
, HFS_FORCE_LOCK
))) {
3713 hfs_unlock_truncate(cp
, TRUE
);
3716 /* No need to continue if file was removed. */
3717 if (cp
->c_flag
& C_NOEXISTS
) {
3718 hfs_unlock_truncate(cp
, TRUE
);
3721 took_trunc_lock
= 1;
3723 headblks
= fp
->ff_blocks
;
3724 datablks
= howmany(fp
->ff_size
, blksize
);
3725 growsize
= datablks
* blksize
;
3726 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
3727 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
3728 blockHint
<= hfsmp
->hfs_metazone_end
)
3729 eflags
|= kEFMetadataMask
;
3731 if (hfs_start_transaction(hfsmp
) != 0) {
3732 if (took_trunc_lock
)
3733 hfs_unlock_truncate(cp
, TRUE
);
3738 * Protect the extents b-tree and the allocation bitmap
3739 * during MapFileBlockC and ExtendFileC operations.
3741 lockflags
= SFL_BITMAP
;
3742 if (overflow_extents(fp
))
3743 lockflags
|= SFL_EXTENTS
;
3744 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3746 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
3748 retval
= MacToVFSError(retval
);
3753 * STEP 1 - acquire new allocation blocks.
3755 nextallocsave
= hfsmp
->nextAllocation
;
3756 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
3757 if (eflags
& kEFMetadataMask
) {
3758 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
3759 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
3760 MarkVCBDirty(hfsmp
);
3761 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
3764 retval
= MacToVFSError(retval
);
3766 cp
->c_flag
|= C_MODIFIED
;
3767 if (newbytes
< growsize
) {
3770 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
3771 printf("hfs_relocate: allocation failed");
3776 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
3778 retval
= MacToVFSError(retval
);
3779 } else if ((sector_a
+ 1) == sector_b
) {
3782 } else if ((eflags
& kEFMetadataMask
) &&
3783 ((((u_int64_t
)sector_b
* hfsmp
->hfs_logical_block_size
) / blksize
) >
3784 hfsmp
->hfs_metazone_end
)) {
3786 const char * filestr
;
3787 char emptystr
= '\0';
3789 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
3790 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
3791 } else if (vnode_name(vp
) != NULL
) {
3792 filestr
= vnode_name(vp
);
3794 filestr
= &emptystr
;
3801 /* Done with system locks and journal for now. */
3802 hfs_systemfile_unlock(hfsmp
, lockflags
);
3804 hfs_end_transaction(hfsmp
);
3809 * Check to see if failure is due to excessive fragmentation.
3811 if ((retval
== ENOSPC
) &&
3812 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
3813 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
3818 * STEP 2 - clone file data into the new allocation blocks.
3821 if (vnodetype
== VLNK
)
3822 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
3823 else if (vnode_issystem(vp
))
3824 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
3826 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
3828 /* Start transaction for step 3 or for a restore. */
3829 if (hfs_start_transaction(hfsmp
) != 0) {
3838 * STEP 3 - switch to cloned data and remove old blocks.
3840 lockflags
= SFL_BITMAP
;
3841 if (overflow_extents(fp
))
3842 lockflags
|= SFL_EXTENTS
;
3843 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3845 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
3847 hfs_systemfile_unlock(hfsmp
, lockflags
);
3852 if (took_trunc_lock
)
3853 hfs_unlock_truncate(cp
, TRUE
);
3856 hfs_systemfile_unlock(hfsmp
, lockflags
);
3860 /* Push cnode's new extent data to disk. */
3862 (void) hfs_update(vp
, MNT_WAIT
);
3865 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
3866 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
3868 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
3872 hfs_end_transaction(hfsmp
);
3877 if (fp
->ff_blocks
== headblks
) {
3878 if (took_trunc_lock
)
3879 hfs_unlock_truncate(cp
, TRUE
);
3883 * Give back any newly allocated space.
3885 if (lockflags
== 0) {
3886 lockflags
= SFL_BITMAP
;
3887 if (overflow_extents(fp
))
3888 lockflags
|= SFL_EXTENTS
;
3889 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3892 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
3894 hfs_systemfile_unlock(hfsmp
, lockflags
);
3897 if (took_trunc_lock
)
3898 hfs_unlock_truncate(cp
, TRUE
);
3908 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, __unused
struct proc
*p
)
3910 struct buf
*head_bp
= NULL
;
3911 struct buf
*tail_bp
= NULL
;
3915 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
3919 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
3920 if (tail_bp
== NULL
) {
3924 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
3925 error
= (int)buf_bwrite(tail_bp
);
3928 buf_markinvalid(head_bp
);
3929 buf_brelse(head_bp
);
3931 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
3937 * Clone a file's data within the file.
3941 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
3952 writebase
= blkstart
* blksize
;
3953 copysize
= blkcnt
* blksize
;
3954 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
3957 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3960 hfs_unlock(VTOC(vp
));
3962 auio
= uio_create(1, 0, UIO_SYSSPACE
, UIO_READ
);
3964 while (offset
< copysize
) {
3965 iosize
= MIN(copysize
- offset
, iosize
);
3967 uio_reset(auio
, offset
, UIO_SYSSPACE
, UIO_READ
);
3968 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3970 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
3972 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
3975 if (uio_resid(auio
) != 0) {
3976 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
3981 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE
, UIO_WRITE
);
3982 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3984 error
= cluster_write(vp
, auio
, writebase
+ offset
,
3985 writebase
+ offset
+ iosize
,
3986 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
3988 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
3991 if (uio_resid(auio
) != 0) {
3992 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4000 if ((blksize
& PAGE_MASK
)) {
4002 * since the copy may not have started on a PAGE
4003 * boundary (or may not have ended on one), we
4004 * may have pages left in the cache since NOCACHE
4005 * will let partially written pages linger...
4006 * lets just flush the entire range to make sure
4007 * we don't have any pages left that are beyond
4008 * (or intersect) the real LEOF of this file
4010 ubc_msync(vp
, writebase
, writebase
+ offset
, NULL
, UBC_INVALIDATE
| UBC_PUSHDIRTY
);
4013 * No need to call ubc_sync_range or hfs_invalbuf
4014 * since the file was copied using IO_NOCACHE and
4015 * the copy was done starting and ending on a page
4016 * boundary in the file.
4019 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4021 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
4026 * Clone a system (metadata) file.
4030 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
4031 kauth_cred_t cred
, struct proc
*p
)
4037 struct buf
*bp
= NULL
;
4040 daddr64_t start_blk
;
4047 iosize
= GetLogicalBlockSize(vp
);
4048 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
4049 breadcnt
= bufsize
/ iosize
;
4051 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
4054 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
4055 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
4058 while (blkno
< last_blk
) {
4060 * Read up to a megabyte
4063 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
4064 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
4066 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
4069 if (buf_count(bp
) != iosize
) {
4070 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
4073 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
4075 buf_markinvalid(bp
);
4083 * Write up to a megabyte
4086 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
4087 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
4089 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
4093 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
4094 error
= (int)buf_bwrite(bp
);
4106 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
4108 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);