2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
53 #include <sys/sysctl.h>
55 #include <miscfs/specfs/specdev.h>
58 #include <sys/ubc_internal.h>
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
63 #include <sys/kdebug.h>
66 #include "hfs_attrlist.h"
67 #include "hfs_endian.h"
68 #include "hfs_fsctl.h"
69 #include "hfs_quota.h"
70 #include "hfscommon/headers/FileMgrInternal.h"
71 #include "hfscommon/headers/BTreesInternal.h"
72 #include "hfs_cnode.h"
75 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
78 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
81 /* from bsd/vfs/vfs_cluster.c */
82 extern int is_file_clean(vnode_t vp
, off_t filesize
);
83 /* from bsd/hfs/hfs_vfsops.c */
84 extern int hfs_vfs_vget(struct mount
*mp
, ino64_t ino
, struct vnode
**vpp
, vfs_context_t context
);
86 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
87 static int hfs_clonefile(struct vnode
*, int, int, int);
88 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
90 int flush_cache_on_write
= 0;
91 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
95 * Read data from a file.
98 hfs_vnop_read(struct vnop_read_args
*ap
)
100 uio_t uio
= ap
->a_uio
;
101 struct vnode
*vp
= ap
->a_vp
;
104 struct hfsmount
*hfsmp
;
107 off_t start_resid
= uio_resid(uio
);
108 off_t offset
= uio_offset(uio
);
112 /* Preflight checks */
113 if (!vnode_isreg(vp
)) {
114 /* can only read regular files */
120 if (start_resid
== 0)
121 return (0); /* Nothing left to do */
123 return (EINVAL
); /* cant read from a negative offset */
129 /* Protect against a size change. */
130 hfs_lock_truncate(cp
, 0);
132 filesize
= fp
->ff_size
;
133 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
134 if (offset
> filesize
) {
135 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
136 (offset
> (off_t
)MAXHFSFILESIZE
)) {
142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
143 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
145 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
);
147 cp
->c_touch_acctime
= TRUE
;
149 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
150 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
153 * Keep track blocks read
155 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
156 int took_cnode_lock
= 0;
159 bytesread
= start_resid
- uio_resid(uio
);
161 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
162 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
163 hfs_lock(cp
, HFS_FORCE_LOCK
);
167 * If this file hasn't been seen since the start of
168 * the current sampling period then start over.
170 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
173 fp
->ff_bytesread
= bytesread
;
175 cp
->c_atime
= tv
.tv_sec
;
177 fp
->ff_bytesread
+= bytesread
;
183 hfs_unlock_truncate(cp
, 0);
188 * Write data to a file.
191 hfs_vnop_write(struct vnop_write_args
*ap
)
193 uio_t uio
= ap
->a_uio
;
194 struct vnode
*vp
= ap
->a_vp
;
197 struct hfsmount
*hfsmp
;
198 kauth_cred_t cred
= NULL
;
201 off_t bytesToAdd
= 0;
202 off_t actualBytesAdded
;
207 int ioflag
= ap
->a_ioflag
;
210 int cnode_locked
= 0;
211 int partialwrite
= 0;
212 int exclusive_lock
= 0;
214 // LP64todo - fix this! uio_resid may be 64-bit value
215 resid
= uio_resid(uio
);
216 offset
= uio_offset(uio
);
218 if (ioflag
& IO_APPEND
) {
226 if (!vnode_isreg(vp
))
227 return (EPERM
); /* Can only write regular files */
233 eflags
= kEFDeferMask
; /* defer file block allocations */
234 #ifdef HFS_SPARSE_DEV
236 * When the underlying device is sparse and space
237 * is low (< 8MB), stop doing delayed allocations
238 * and begin doing synchronous I/O.
240 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
241 (hfs_freeblks(hfsmp
, 0) < 2048)) {
242 eflags
&= ~kEFDeferMask
;
245 #endif /* HFS_SPARSE_DEV */
248 /* Protect against a size change. */
249 hfs_lock_truncate(cp
, exclusive_lock
);
251 if (ioflag
& IO_APPEND
) {
252 uio_setoffset(uio
, fp
->ff_size
);
253 offset
= fp
->ff_size
;
255 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
260 origFileSize
= fp
->ff_size
;
261 writelimit
= offset
+ resid
;
262 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
264 /* If the truncate lock is shared, and if we either have virtual
265 * blocks or will need to extend the file, upgrade the truncate
266 * to exclusive lock. If upgrade fails, we lose the lock and
267 * have to get exclusive lock again
269 if ((exclusive_lock
== 0) &&
270 ((fp
->ff_unallocblocks
!= 0) || (writelimit
> filebytes
))) {
272 /* Lock upgrade failed and we lost our shared lock, try again */
273 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
278 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
283 if (!exclusive_lock
) {
284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
285 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
289 /* Check if we do not need to extend the file */
290 if (writelimit
<= filebytes
) {
294 cred
= vfs_context_ucred(ap
->a_context
);
295 bytesToAdd
= writelimit
- filebytes
;
298 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
304 if (hfs_start_transaction(hfsmp
) != 0) {
309 while (writelimit
> filebytes
) {
310 bytesToAdd
= writelimit
- filebytes
;
311 if (cred
&& suser(cred
, NULL
) != 0)
312 eflags
|= kEFReserveMask
;
314 /* Protect extents b-tree and allocation bitmap */
315 lockflags
= SFL_BITMAP
;
316 if (overflow_extents(fp
))
317 lockflags
|= SFL_EXTENTS
;
318 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
320 /* Files that are changing size are not hot file candidates. */
321 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
322 fp
->ff_bytesread
= 0;
324 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
325 0, eflags
, &actualBytesAdded
));
327 hfs_systemfile_unlock(hfsmp
, lockflags
);
329 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
331 if (retval
!= E_NONE
)
333 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
334 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
335 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
337 (void) hfs_update(vp
, TRUE
);
338 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
339 (void) hfs_end_transaction(hfsmp
);
342 * If we didn't grow the file enough try a partial write.
343 * POSIX expects this behavior.
345 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
348 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
350 writelimit
= filebytes
;
353 if (retval
== E_NONE
) {
361 struct rl_entry
*invalid_range
;
363 if (writelimit
> fp
->ff_size
)
364 filesize
= writelimit
;
366 filesize
= fp
->ff_size
;
368 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
370 if (offset
<= fp
->ff_size
) {
371 zero_off
= offset
& ~PAGE_MASK_64
;
373 /* Check to see whether the area between the zero_offset and the start
374 of the transfer to see whether is invalid and should be zero-filled
375 as part of the transfer:
377 if (offset
> zero_off
) {
378 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
379 lflag
|= IO_HEADZEROFILL
;
382 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
384 /* The bytes between fp->ff_size and uio->uio_offset must never be
385 read without being zeroed. The current last block is filled with zeroes
386 if it holds valid data but in all cases merely do a little bookkeeping
387 to track the area from the end of the current last page to the start of
388 the area actually written. For the same reason only the bytes up to the
389 start of the page where this write will start is invalidated; any remainder
390 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
392 Note that inval_start, the start of the page after the current EOF,
393 may be past the start of the write, in which case the zeroing
394 will be handled by the cluser_write of the actual data.
396 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
397 inval_end
= offset
& ~PAGE_MASK_64
;
398 zero_off
= fp
->ff_size
;
400 if ((fp
->ff_size
& PAGE_MASK_64
) &&
401 (rl_scan(&fp
->ff_invalidranges
,
404 &invalid_range
) != RL_NOOVERLAP
)) {
405 /* The page containing the EOF is not valid, so the
406 entire page must be made inaccessible now. If the write
407 starts on a page beyond the page containing the eof
408 (inval_end > eof_page_base), add the
409 whole page to the range to be invalidated. Otherwise
410 (i.e. if the write starts on the same page), zero-fill
411 the entire page explicitly now:
413 if (inval_end
> eof_page_base
) {
414 inval_start
= eof_page_base
;
416 zero_off
= eof_page_base
;
420 if (inval_start
< inval_end
) {
422 /* There's some range of data that's going to be marked invalid */
424 if (zero_off
< inval_start
) {
425 /* The pages between inval_start and inval_end are going to be invalidated,
426 and the actual write will start on a page past inval_end. Now's the last
427 chance to zero-fill the page containing the EOF:
431 retval
= cluster_write(vp
, (uio_t
) 0,
432 fp
->ff_size
, inval_start
,
434 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
435 hfs_lock(cp
, HFS_FORCE_LOCK
);
437 if (retval
) goto ioerr_exit
;
438 offset
= uio_offset(uio
);
441 /* Mark the remaining area of the newly allocated space as invalid: */
442 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
444 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
445 zero_off
= fp
->ff_size
= inval_end
;
448 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
451 /* Check to see whether the area between the end of the write and the end of
452 the page it falls in is invalid and should be zero-filled as part of the transfer:
454 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
455 if (tail_off
> filesize
) tail_off
= filesize
;
456 if (tail_off
> writelimit
) {
457 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
458 lflag
|= IO_TAILZEROFILL
;
463 * if the write starts beyond the current EOF (possibly advanced in the
464 * zeroing of the last block, above), then we'll zero fill from the current EOF
465 * to where the write begins:
467 * NOTE: If (and ONLY if) the portion of the file about to be written is
468 * before the current EOF it might be marked as invalid now and must be
469 * made readable (removed from the invalid ranges) before cluster_write
472 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
473 if (io_start
< fp
->ff_size
) {
476 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
477 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
482 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
483 tail_off
, lflag
| IO_NOZERODIRTY
);
487 offset
= uio_offset(uio
);
488 if (offset
> fp
->ff_size
) {
489 fp
->ff_size
= offset
;
491 ubc_setsize(vp
, fp
->ff_size
); /* XXX check errors */
492 /* Files that are changing size are not hot file candidates. */
493 if (hfsmp
->hfc_stage
== HFC_RECORDING
)
494 fp
->ff_bytesread
= 0;
496 if (resid
> uio_resid(uio
)) {
497 cp
->c_touch_chgtime
= TRUE
;
498 cp
->c_touch_modtime
= TRUE
;
502 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
506 // XXXdbg - see radar 4871353 for more info
508 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
509 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
512 HFS_KNOTE(vp
, NOTE_WRITE
);
516 * If we successfully wrote any data, and we are not the superuser
517 * we clear the setuid and setgid bits as a precaution against
520 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
521 cred
= vfs_context_ucred(ap
->a_context
);
522 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
524 hfs_lock(cp
, HFS_FORCE_LOCK
);
527 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
531 if (ioflag
& IO_UNIT
) {
533 hfs_lock(cp
, HFS_FORCE_LOCK
);
536 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
538 // LP64todo - fix this! resid needs to by user_ssize_t
539 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
540 uio_setresid(uio
, resid
);
541 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
543 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
545 hfs_lock(cp
, HFS_FORCE_LOCK
);
548 retval
= hfs_update(vp
, TRUE
);
550 /* Updating vcbWrCnt doesn't need to be atomic. */
553 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
554 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
558 hfs_unlock_truncate(cp
, exclusive_lock
);
562 /* support for the "bulk-access" fcntl */
564 #define CACHE_LEVELS 16
565 #define NUM_CACHE_ENTRIES (64*16)
566 #define PARENT_IDS_FLAG 0x100
568 struct access_cache
{
570 int cachehits
; /* these two for statistics gathering */
572 unsigned int *acache
;
573 unsigned char *haveaccess
;
577 uid_t uid
; /* IN: effective user id */
578 short flags
; /* IN: access requested (i.e. R_OK) */
579 short num_groups
; /* IN: number of groups user belongs to */
580 int num_files
; /* IN: number of files to process */
581 int *file_ids
; /* IN: array of file ids */
582 gid_t
*groups
; /* IN: array of groups */
583 short *access
; /* OUT: access info for each file (0 for 'has access') */
586 struct user_access_t
{
587 uid_t uid
; /* IN: effective user id */
588 short flags
; /* IN: access requested (i.e. R_OK) */
589 short num_groups
; /* IN: number of groups user belongs to */
590 int num_files
; /* IN: number of files to process */
591 user_addr_t file_ids
; /* IN: array of file ids */
592 user_addr_t groups
; /* IN: array of groups */
593 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
597 // these are the "extended" versions of the above structures
598 // note that it is crucial that they be different sized than
599 // the regular version
600 struct ext_access_t
{
601 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
602 uint32_t num_files
; /* IN: number of files to process */
603 uint32_t map_size
; /* IN: size of the bit map */
604 uint32_t *file_ids
; /* IN: Array of file ids */
605 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
606 short *access
; /* OUT: access info for each file (0 for 'has access') */
607 uint32_t num_parents
; /* future use */
608 cnid_t
*parents
; /* future use */
611 struct ext_user_access_t
{
612 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
613 uint32_t num_files
; /* IN: number of files to process */
614 uint32_t map_size
; /* IN: size of the bit map */
615 user_addr_t file_ids
; /* IN: array of file ids */
616 user_addr_t bitmap
; /* IN: array of groups */
617 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
618 uint32_t num_parents
;/* future use */
619 user_addr_t parents
;/* future use */
624 * Perform a binary search for the given parent_id. Return value is
625 * the index if there is a match. If no_match_indexp is non-NULL it
626 * will be assigned with the index to insert the item (even if it was
629 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
635 unsigned int mid
= ((hi
- lo
)/2) + lo
;
636 unsigned int this_id
= array
[mid
];
638 if (parent_id
== this_id
) {
643 if (parent_id
< this_id
) {
648 if (parent_id
> this_id
) {
654 /* check if lo and hi converged on the match */
655 if (parent_id
== array
[hi
]) {
659 if (no_match_indexp
) {
660 *no_match_indexp
= hi
;
668 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
672 int index
, no_match_index
;
674 if (cache
->numcached
== 0) {
676 return 0; // table is empty, so insert at index=0 and report no match
679 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
680 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
681 cache->numcached, NUM_CACHE_ENTRIES);*/
682 cache
->numcached
= NUM_CACHE_ENTRIES
;
685 hi
= cache
->numcached
- 1;
687 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
689 /* if no existing entry found, find index for new one */
691 index
= no_match_index
;
702 * Add a node to the access_cache at the given index (or do a lookup first
703 * to find the index if -1 is passed in). We currently do a replace rather
704 * than an insert if the cache is full.
707 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
709 int lookup_index
= -1;
711 /* need to do a lookup first if -1 passed for index */
713 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
714 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
715 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
716 cache
->haveaccess
[lookup_index
] = access
;
719 /* mission accomplished */
722 index
= lookup_index
;
727 /* if the cache is full, do a replace rather than an insert */
728 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
729 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
730 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
732 if (index
> cache
->numcached
) {
733 // printf("index %d pinned to %d\n", index, cache->numcached);
734 index
= cache
->numcached
;
738 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
742 if (index
>= 0 && index
< cache
->numcached
) {
743 /* only do bcopy if we're inserting */
744 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
745 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
748 cache
->acache
[index
] = nodeID
;
749 cache
->haveaccess
[index
] = access
;
763 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
765 struct cinfo
*cip
= (struct cinfo
*)arg
;
767 cip
->uid
= attrp
->ca_uid
;
768 cip
->gid
= attrp
->ca_gid
;
769 cip
->mode
= attrp
->ca_mode
;
770 cip
->parentcnid
= descp
->cd_parentcnid
;
771 cip
->recflags
= attrp
->ca_recflags
;
777 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
778 * isn't incore, then go to the catalog.
781 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
782 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
786 /* if this id matches the one the fsctl was called with, skip the lookup */
787 if (cnid
== skip_cp
->c_cnid
) {
788 cnattrp
->ca_uid
= skip_cp
->c_uid
;
789 cnattrp
->ca_gid
= skip_cp
->c_gid
;
790 cnattrp
->ca_mode
= skip_cp
->c_mode
;
791 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
795 /* otherwise, check the cnode hash incase the file/dir is incore */
796 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
797 cnattrp
->ca_uid
= c_info
.uid
;
798 cnattrp
->ca_gid
= c_info
.gid
;
799 cnattrp
->ca_mode
= c_info
.mode
;
800 cnattrp
->ca_recflags
= c_info
.recflags
;
801 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
805 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
807 /* lookup this cnid in the catalog */
808 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
810 hfs_systemfile_unlock(hfsmp
, lockflags
);
821 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
822 * up to CACHE_LEVELS as we progress towards the root.
825 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
826 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
,
827 struct vfs_context
*my_context
,
831 uint32_t num_parents
)
835 HFSCatalogNodeID thisNodeID
;
836 unsigned int myPerms
;
837 struct cat_attr cnattr
;
838 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
841 int i
= 0, ids_to_cache
= 0;
842 int parent_ids
[CACHE_LEVELS
];
845 while (thisNodeID
>= kRootDirID
) {
846 myResult
= 0; /* default to "no access" */
848 /* check the cache before resorting to hitting the catalog */
850 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
851 * to look any further after hitting cached dir */
853 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
855 myErr
= cache
->haveaccess
[cache_index
];
856 if (scope_index
!= -1) {
857 if (myErr
== ESRCH
) {
861 scope_index
= 0; // so we'll just use the cache result
862 scope_idx_start
= ids_to_cache
;
864 myResult
= (myErr
== 0) ? 1 : 0;
865 goto ExitThisRoutine
;
871 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
872 if (scope_index
== -1)
874 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
875 scope_idx_start
= ids_to_cache
;
879 /* remember which parents we want to cache */
880 if (ids_to_cache
< CACHE_LEVELS
) {
881 parent_ids
[ids_to_cache
] = thisNodeID
;
884 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
885 if (bitmap
&& map_size
) {
886 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
890 /* do the lookup (checks the cnode hash, then the catalog) */
891 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
893 goto ExitThisRoutine
; /* no access */
896 /* Root always gets access. */
897 if (suser(myp_ucred
, NULL
) == 0) {
898 thisNodeID
= catkey
.hfsPlus
.parentID
;
903 // if the thing has acl's, do the full permission check
904 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
907 /* get the vnode for this cnid */
908 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0);
911 goto ExitThisRoutine
;
914 thisNodeID
= VTOC(vp
)->c_parentcnid
;
916 hfs_unlock(VTOC(vp
));
918 if (vnode_vtype(vp
) == VDIR
) {
919 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
921 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
927 goto ExitThisRoutine
;
932 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
933 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
934 myp_ucred
, theProcPtr
);
936 if (cnattr
.ca_mode
& S_IFDIR
) {
941 if ( (myPerms
& flags
) != flags
) {
944 goto ExitThisRoutine
; /* no access */
947 /* up the hierarchy we go */
948 thisNodeID
= catkey
.hfsPlus
.parentID
;
952 /* if here, we have access to this node */
956 if (parents
&& myErr
== 0 && scope_index
== -1) {
965 /* cache the parent directory(ies) */
966 for (i
= 0; i
< ids_to_cache
; i
++) {
967 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
968 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
970 add_node(cache
, -1, parent_ids
[i
], myErr
);
978 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
979 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
984 * NOTE: on entry, the vnode is locked. Incase this vnode
985 * happens to be in our list of file_ids, we'll note it
986 * avoid calling hfs_chashget_nowait() on that id as that
987 * will cause a "locking against myself" panic.
989 Boolean check_leaf
= true;
991 struct ext_user_access_t
*user_access_structp
;
992 struct ext_user_access_t tmp_user_access
;
993 struct access_cache cache
;
998 dev_t dev
= VTOC(vp
)->c_dev
;
1001 unsigned int num_files
= 0;
1003 int num_parents
= 0;
1007 cnid_t
*parents
=NULL
;
1011 cnid_t prevParent_cnid
= 0;
1012 unsigned int myPerms
;
1014 struct cat_attr cnattr
;
1016 struct cnode
*skip_cp
= VTOC(vp
);
1017 kauth_cred_t cred
= vfs_context_ucred(context
);
1018 proc_t p
= vfs_context_proc(context
);
1020 is64bit
= proc_is64bit(p
);
1022 /* initialize the local cache and buffers */
1023 cache
.numcached
= 0;
1024 cache
.cachehits
= 0;
1026 cache
.acache
= NULL
;
1027 cache
.haveaccess
= NULL
;
1029 /* struct copyin done during dispatch... need to copy file_id array separately */
1030 if (ap
->a_data
== NULL
) {
1032 goto err_exit_bulk_access
;
1036 if (arg_size
!= sizeof(struct ext_user_access_t
)) {
1038 goto err_exit_bulk_access
;
1041 user_access_structp
= (struct ext_user_access_t
*)ap
->a_data
;
1043 } else if (arg_size
== sizeof(struct access_t
)) {
1044 struct access_t
*accessp
= (struct access_t
*)ap
->a_data
;
1046 // convert an old style bulk-access struct to the new style
1047 tmp_user_access
.flags
= accessp
->flags
;
1048 tmp_user_access
.num_files
= accessp
->num_files
;
1049 tmp_user_access
.map_size
= 0;
1050 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1051 tmp_user_access
.bitmap
= USER_ADDR_NULL
;
1052 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1053 tmp_user_access
.num_parents
= 0;
1054 user_access_structp
= &tmp_user_access
;
1056 } else if (arg_size
== sizeof(struct ext_access_t
)) {
1057 struct ext_access_t
*accessp
= (struct ext_access_t
*)ap
->a_data
;
1059 // up-cast from a 32-bit version of the struct
1060 tmp_user_access
.flags
= accessp
->flags
;
1061 tmp_user_access
.num_files
= accessp
->num_files
;
1062 tmp_user_access
.map_size
= accessp
->map_size
;
1063 tmp_user_access
.num_parents
= accessp
->num_parents
;
1065 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1066 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1067 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1068 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1070 user_access_structp
= &tmp_user_access
;
1073 goto err_exit_bulk_access
;
1076 map_size
= user_access_structp
->map_size
;
1078 num_files
= user_access_structp
->num_files
;
1080 num_parents
= user_access_structp
->num_parents
;
1082 if (num_files
< 1) {
1083 goto err_exit_bulk_access
;
1085 if (num_files
> 1024) {
1087 goto err_exit_bulk_access
;
1090 if (num_parents
> 1024) {
1092 goto err_exit_bulk_access
;
1095 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1096 access
= (short *) kalloc(sizeof(short) * num_files
);
1098 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1102 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1105 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1106 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1108 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1110 kfree(file_ids
, sizeof(int) * num_files
);
1113 kfree(bitmap
, sizeof(char) * map_size
);
1116 kfree(access
, sizeof(short) * num_files
);
1119 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1121 if (cache
.haveaccess
) {
1122 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1125 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1130 // make sure the bitmap is zero'ed out...
1132 bzero(bitmap
, (sizeof(char) * map_size
));
1135 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1136 num_files
* sizeof(int)))) {
1137 goto err_exit_bulk_access
;
1141 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1142 num_parents
* sizeof(cnid_t
)))) {
1143 goto err_exit_bulk_access
;
1147 flags
= user_access_structp
->flags
;
1148 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1152 /* check if we've been passed leaf node ids or parent ids */
1153 if (flags
& PARENT_IDS_FLAG
) {
1157 /* Check access to each file_id passed in */
1158 for (i
= 0; i
< num_files
; i
++) {
1160 cnid
= (cnid_t
) file_ids
[i
];
1162 /* root always has access */
1163 if ((!parents
) && (!suser(cred
, NULL
))) {
1169 /* do the lookup (checks the cnode hash, then the catalog) */
1170 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
);
1172 access
[i
] = (short) error
;
1177 // Check if the leaf matches one of the parent scopes
1178 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1181 // if the thing has acl's, do the full permission check
1182 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1185 /* get the vnode for this cnid */
1186 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0);
1192 hfs_unlock(VTOC(cvp
));
1194 if (vnode_vtype(cvp
) == VDIR
) {
1195 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1197 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1206 /* before calling CheckAccess(), check the target file for read access */
1207 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1208 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1210 /* fail fast if no access */
1211 if ((myPerms
& flags
) == 0) {
1217 /* we were passed an array of parent ids */
1218 catkey
.hfsPlus
.parentID
= cnid
;
1221 /* if the last guy had the same parent and had access, we're done */
1222 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1228 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1229 skip_cp
, p
, cred
, dev
, context
,bitmap
, map_size
, parents
, num_parents
);
1231 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1232 access
[i
] = 0; // have access.. no errors to report
1234 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1237 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1240 /* copyout the access array */
1241 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1242 num_files
* sizeof (short)))) {
1243 goto err_exit_bulk_access
;
1245 if (map_size
&& bitmap
) {
1246 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1247 map_size
* sizeof (char)))) {
1248 goto err_exit_bulk_access
;
1253 err_exit_bulk_access
:
1255 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1258 kfree(file_ids
, sizeof(int) * num_files
);
1260 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1262 kfree(bitmap
, sizeof(char) * map_size
);
1264 kfree(access
, sizeof(short) * num_files
);
1266 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1267 if (cache
.haveaccess
)
1268 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1274 /* end "bulk-access" support */
1278 * Callback for use with freeze ioctl.
1281 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1283 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1289 * Control filesystem operating characteristics.
1292 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1297 vfs_context_t a_context;
1300 struct vnode
* vp
= ap
->a_vp
;
1301 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1302 vfs_context_t context
= ap
->a_context
;
1303 kauth_cred_t cred
= vfs_context_ucred(context
);
1304 proc_t p
= vfs_context_proc(context
);
1305 struct vfsstatfs
*vfsp
;
1308 is64bit
= proc_is64bit(p
);
1310 switch (ap
->a_command
) {
1314 struct vnode
*file_vp
;
1320 /* Caller must be owner of file system. */
1321 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1322 if (suser(cred
, NULL
) &&
1323 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1326 /* Target vnode must be file system's root. */
1327 if (!vnode_isvroot(vp
)) {
1330 bufptr
= (char *)ap
->a_data
;
1331 cnid
= strtoul(bufptr
, NULL
, 10);
1333 /* We need to call hfs_vfs_vget to leverage the code that will fix the
1334 * origin list for us if needed, as opposed to calling hfs_vget, since
1335 * we will need it for the subsequent build_path call.
1337 if ((error
= hfs_vfs_vget(HFSTOVFS(hfsmp
), cnid
, &file_vp
, context
))) {
1340 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, 0, context
);
1354 /* Caller must be owner of file system. */
1355 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1356 if (suser(cred
, NULL
) &&
1357 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1360 /* Target vnode must be file system's root. */
1361 if (!vnode_isvroot(vp
)) {
1364 linkfileid
= *(cnid_t
*)ap
->a_data
;
1365 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1368 if ((error
= hfs_lookuplink(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1371 if (ap
->a_command
== HFS_NEXT_LINK
) {
1372 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1374 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1379 case HFS_RESIZE_PROGRESS
: {
1381 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1382 if (suser(cred
, NULL
) &&
1383 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1384 return (EACCES
); /* must be owner of file system */
1386 if (!vnode_isvroot(vp
)) {
1389 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1392 case HFS_RESIZE_VOLUME
: {
1396 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1397 if (suser(cred
, NULL
) &&
1398 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1399 return (EACCES
); /* must be owner of file system */
1401 if (!vnode_isvroot(vp
)) {
1404 newsize
= *(u_int64_t
*)ap
->a_data
;
1405 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1407 if (newsize
> cursize
) {
1408 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1409 } else if (newsize
< cursize
) {
1410 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1415 case HFS_CHANGE_NEXT_ALLOCATION
: {
1416 int error
= 0; /* Assume success */
1419 if (vnode_vfsisrdonly(vp
)) {
1422 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1423 if (suser(cred
, NULL
) &&
1424 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1425 return (EACCES
); /* must be owner of file system */
1427 if (!vnode_isvroot(vp
)) {
1430 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1431 location
= *(u_int32_t
*)ap
->a_data
;
1432 if ((location
>= hfsmp
->allocLimit
) &&
1433 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1435 goto fail_change_next_allocation
;
1437 /* Return previous value. */
1438 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1439 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1440 /* On magic value for location, set nextAllocation to next block
1441 * after metadata zone and set flag in mount structure to indicate
1442 * that nextAllocation should not be updated again.
1444 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1445 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1447 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1448 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1450 MarkVCBDirty(hfsmp
);
1451 fail_change_next_allocation
:
1452 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1456 #ifdef HFS_SPARSE_DEV
1457 case HFS_SETBACKINGSTOREINFO
: {
1458 struct vnode
* bsfs_rootvp
;
1459 struct vnode
* di_vp
;
1460 struct hfs_backingstoreinfo
*bsdata
;
1463 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1466 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1467 if (suser(cred
, NULL
) &&
1468 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1469 return (EACCES
); /* must be owner of file system */
1471 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1472 if (bsdata
== NULL
) {
1475 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1478 if ((error
= vnode_getwithref(di_vp
))) {
1479 file_drop(bsdata
->backingfd
);
1483 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1484 (void)vnode_put(di_vp
);
1485 file_drop(bsdata
->backingfd
);
1490 * Obtain the backing fs root vnode and keep a reference
1491 * on it. This reference will be dropped in hfs_unmount.
1493 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1495 (void)vnode_put(di_vp
);
1496 file_drop(bsdata
->backingfd
);
1499 vnode_ref(bsfs_rootvp
);
1500 vnode_put(bsfs_rootvp
);
1502 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1503 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1504 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1505 hfsmp
->hfs_sparsebandblks
*= 4;
1507 vfs_markdependency(hfsmp
->hfs_mp
);
1509 (void)vnode_put(di_vp
);
1510 file_drop(bsdata
->backingfd
);
1513 case HFS_CLRBACKINGSTOREINFO
: {
1514 struct vnode
* tmpvp
;
1516 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1517 if (suser(cred
, NULL
) &&
1518 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1519 return (EACCES
); /* must be owner of file system */
1521 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1522 hfsmp
->hfs_backingfs_rootvp
) {
1524 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1525 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1526 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1527 hfsmp
->hfs_sparsebandblks
= 0;
1532 #endif /* HFS_SPARSE_DEV */
1540 mp
= vnode_mount(vp
);
1541 hfsmp
= VFSTOHFS(mp
);
1546 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1548 // flush things before we get started to try and prevent
1549 // dirty data from being paged out while we're frozen.
1550 // note: can't do this after taking the lock as it will
1551 // deadlock against ourselves.
1552 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1553 hfs_global_exclusive_lock_acquire(hfsmp
);
1554 journal_flush(hfsmp
->jnl
);
1556 // don't need to iterate on all vnodes, we just need to
1557 // wait for writes to the system files and the device vnode
1558 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1559 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1560 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1561 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1562 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1563 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1564 if (hfsmp
->hfs_attribute_vp
)
1565 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1566 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1568 hfsmp
->hfs_freezing_proc
= current_proc();
1577 // if we're not the one who froze the fs then we
1579 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1583 // NOTE: if you add code here, also go check the
1584 // code that "thaws" the fs in hfs_vnop_close()
1586 hfsmp
->hfs_freezing_proc
= NULL
;
1587 hfs_global_exclusive_lock_release(hfsmp
);
1588 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1593 case HFS_BULKACCESS_FSCTL
: {
1596 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1601 size
= sizeof(struct user_access_t
);
1603 size
= sizeof(struct access_t
);
1606 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1609 case HFS_EXT_BULKACCESS_FSCTL
: {
1612 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1617 size
= sizeof(struct ext_user_access_t
);
1619 size
= sizeof(struct ext_access_t
);
1622 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1625 case HFS_SETACLSTATE
: {
1628 if (ap
->a_data
== NULL
) {
1632 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1633 state
= *(int *)ap
->a_data
;
1635 // super-user can enable or disable acl's on a volume.
1636 // the volume owner can only enable acl's
1637 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1640 if (state
== 0 || state
== 1)
1641 return hfs_set_volxattr(hfsmp
, HFS_SETACLSTATE
, state
);
1646 case HFS_SET_XATTREXTENTS_STATE
: {
1649 if (ap
->a_data
== NULL
) {
1653 state
= *(int *)ap
->a_data
;
1655 /* Super-user can enable or disable extent-based extended
1656 * attribute support on a volume
1661 if (state
== 0 || state
== 1)
1662 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
1670 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1672 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
1673 hfs_unlock(VTOC(vp
));
1680 register struct cnode
*cp
;
1683 if (!vnode_isreg(vp
))
1686 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1690 * used by regression test to determine if
1691 * all the dirty pages (via write) have been cleaned
1692 * after a call to 'fsysnc'.
1694 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1701 register struct radvisory
*ra
;
1702 struct filefork
*fp
;
1705 if (!vnode_isreg(vp
))
1708 ra
= (struct radvisory
*)(ap
->a_data
);
1711 /* Protect against a size change. */
1712 hfs_lock_truncate(VTOC(vp
), TRUE
);
1714 if (ra
->ra_offset
>= fp
->ff_size
) {
1717 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1720 hfs_unlock_truncate(VTOC(vp
), TRUE
);
1724 case F_READBOOTSTRAP
:
1725 case F_WRITEBOOTSTRAP
:
1727 struct vnode
*devvp
= NULL
;
1728 user_fbootstraptransfer_t
*user_bootstrapp
;
1732 daddr64_t blockNumber
;
1736 user_fbootstraptransfer_t user_bootstrap
;
1738 if (!vnode_isvroot(vp
))
1740 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1741 * to a user_fbootstraptransfer_t else we get a pointer to a
1742 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1745 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1748 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1749 user_bootstrapp
= &user_bootstrap
;
1750 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1751 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1752 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1754 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1757 devvp
= VTOHFS(vp
)->hfs_devvp
;
1758 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1759 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1760 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1761 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1763 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1765 while (uio_resid(auio
) > 0) {
1766 blockNumber
= uio_offset(auio
) / devBlockSize
;
1767 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1769 if (bp
) buf_brelse(bp
);
1774 blockOffset
= uio_offset(auio
) % devBlockSize
;
1775 xfersize
= devBlockSize
- blockOffset
;
1776 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1782 if (uio_rw(auio
) == UIO_WRITE
) {
1783 error
= VNOP_BWRITE(bp
);
1796 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1799 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1802 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1807 case HFS_GET_MOUNT_TIME
:
1808 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1811 case HFS_GET_LAST_MTIME
:
1812 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1815 case HFS_SET_BOOT_INFO
:
1816 if (!vnode_isvroot(vp
))
1818 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1819 return(EACCES
); /* must be superuser or owner of filesystem */
1820 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1821 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1822 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1823 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1826 case HFS_GET_BOOT_INFO
:
1827 if (!vnode_isvroot(vp
))
1829 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1830 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1831 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1834 case HFS_MARK_BOOT_CORRUPT
:
1835 /* Mark the boot volume corrupt by setting
1836 * kHFSVolumeInconsistentBit in the volume header. This will
1837 * force fsck_hfs on next mount.
1843 /* Allowed only on the root vnode of the boot volume */
1844 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
1845 !vnode_isvroot(vp
)) {
1849 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1850 hfs_mark_volume_inconsistent(hfsmp
);
1857 /* Should never get here */
1865 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1867 struct vnop_select_args {
1872 vfs_context_t a_context;
1877 * We should really check to see if I/O is possible.
1883 * Converts a logical block number to a physical block, and optionally returns
1884 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1885 * The physical block number is based on the device block size, currently its 512.
1886 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1889 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
1891 struct filefork
*fp
= VTOF(vp
);
1892 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1893 int retval
= E_NONE
;
1894 u_int32_t logBlockSize
;
1895 size_t bytesContAvail
= 0;
1896 off_t blockposition
;
1901 * Check for underlying vnode requests and ensure that logical
1902 * to physical mapping is requested.
1905 *vpp
= hfsmp
->hfs_devvp
;
1909 logBlockSize
= GetLogicalBlockSize(vp
);
1910 blockposition
= (off_t
)bn
* logBlockSize
;
1912 lockExtBtree
= overflow_extents(fp
);
1915 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
1917 retval
= MacToVFSError(
1918 MapFileBlockC (HFSTOVCB(hfsmp
),
1926 hfs_systemfile_unlock(hfsmp
, lockflags
);
1928 if (retval
== E_NONE
) {
1929 /* Figure out how many read ahead blocks there are */
1931 if (can_cluster(logBlockSize
)) {
1932 /* Make sure this result never goes negative: */
1933 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1943 * Convert logical block number to file offset.
1946 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1948 struct vnop_blktooff_args {
1955 if (ap
->a_vp
== NULL
)
1957 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1963 * Convert file offset to logical block number.
1966 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1968 struct vnop_offtoblk_args {
1971 daddr64_t *a_lblkno;
1975 if (ap
->a_vp
== NULL
)
1977 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
1983 * Map file offset to physical block number.
1985 * If this function is called for write operation, and if the file
1986 * had virtual blocks allocated (delayed allocation), real blocks
1987 * are allocated by calling ExtendFileC().
1989 * If this function is called for read operation, and if the file
1990 * had virtual blocks allocated (delayed allocation), no change
1991 * to the size of file is done, and if required, rangelist is
1992 * searched for mapping.
1994 * System file cnodes are expected to be locked (shared or exclusive).
1997 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
1999 struct vnop_blockmap_args {
2007 vfs_context_t a_context;
2011 struct vnode
*vp
= ap
->a_vp
;
2013 struct filefork
*fp
;
2014 struct hfsmount
*hfsmp
;
2015 size_t bytesContAvail
= 0;
2016 int retval
= E_NONE
;
2019 struct rl_entry
*invalid_range
;
2020 enum rl_overlaptype overlaptype
;
2024 /* Do not allow blockmap operation on a directory */
2025 if (vnode_isdir(vp
)) {
2030 * Check for underlying vnode requests and ensure that logical
2031 * to physical mapping is requested.
2033 if (ap
->a_bpn
== NULL
)
2036 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2037 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2038 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2047 /* Check virtual blocks only when performing write operation */
2048 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2049 if (hfs_start_transaction(hfsmp
) != 0) {
2055 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2057 } else if (overflow_extents(fp
)) {
2058 syslocks
= SFL_EXTENTS
;
2062 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2065 * Check for any delayed allocations.
2067 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2069 u_int32_t loanedBlocks
;
2072 // Make sure we have a transaction. It's possible
2073 // that we came in and fp->ff_unallocblocks was zero
2074 // but during the time we blocked acquiring the extents
2075 // btree, ff_unallocblocks became non-zero and so we
2076 // will need to start a transaction.
2078 if (started_tr
== 0) {
2080 hfs_systemfile_unlock(hfsmp
, lockflags
);
2087 * Note: ExtendFileC will Release any blocks on loan and
2088 * aquire real blocks. So we ask to extend by zero bytes
2089 * since ExtendFileC will account for the virtual blocks.
2092 loanedBlocks
= fp
->ff_unallocblocks
;
2093 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2094 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2097 fp
->ff_unallocblocks
= loanedBlocks
;
2098 cp
->c_blocks
+= loanedBlocks
;
2099 fp
->ff_blocks
+= loanedBlocks
;
2101 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2102 hfsmp
->loanedBlocks
+= loanedBlocks
;
2103 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2105 hfs_systemfile_unlock(hfsmp
, lockflags
);
2106 cp
->c_flag
|= C_MODIFIED
;
2108 (void) hfs_update(vp
, TRUE
);
2109 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2111 hfs_end_transaction(hfsmp
);
2118 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2119 ap
->a_bpn
, &bytesContAvail
);
2121 hfs_systemfile_unlock(hfsmp
, lockflags
);
2126 (void) hfs_update(vp
, TRUE
);
2127 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2128 hfs_end_transaction(hfsmp
);
2132 /* On write, always return error because virtual blocks, if any,
2133 * should have been allocated in ExtendFileC(). We do not
2134 * allocate virtual blocks on read, therefore return error
2135 * only if no virtual blocks are allocated. Otherwise we search
2136 * rangelist for zero-fills
2138 if ((MacToVFSError(retval
) != ERANGE
) ||
2139 (ap
->a_flags
& VNODE_WRITE
) ||
2140 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2144 /* Validate if the start offset is within logical file size */
2145 if (ap
->a_foffset
> fp
->ff_size
) {
2149 /* Searching file extents has failed for read operation, therefore
2150 * search rangelist for any uncommitted holes in the file.
2152 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2153 ap
->a_foffset
+ (off_t
)(ap
->a_size
- 1),
2155 switch(overlaptype
) {
2156 case RL_OVERLAPISCONTAINED
:
2157 /* start_offset <= rl_start, end_offset >= rl_end */
2158 if (ap
->a_foffset
!= invalid_range
->rl_start
) {
2161 case RL_MATCHINGOVERLAP
:
2162 /* start_offset = rl_start, end_offset = rl_end */
2163 case RL_OVERLAPCONTAINSRANGE
:
2164 /* start_offset >= rl_start, end_offset <= rl_end */
2165 case RL_OVERLAPSTARTSBEFORE
:
2166 /* start_offset > rl_start, end_offset >= rl_start */
2167 if ((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) {
2168 bytesContAvail
= (invalid_range
->rl_end
+ 1) - ap
->a_foffset
;
2170 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2172 if (bytesContAvail
> ap
->a_size
) {
2173 bytesContAvail
= ap
->a_size
;
2175 *ap
->a_bpn
= (daddr64_t
)-1;
2178 case RL_OVERLAPENDSAFTER
:
2179 /* start_offset < rl_start, end_offset < rl_end */
2186 /* MapFileC() found a valid extent in the filefork. Search the
2187 * mapping information further for invalid file ranges
2189 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2190 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2192 if (overlaptype
!= RL_NOOVERLAP
) {
2193 switch(overlaptype
) {
2194 case RL_MATCHINGOVERLAP
:
2195 case RL_OVERLAPCONTAINSRANGE
:
2196 case RL_OVERLAPSTARTSBEFORE
:
2197 /* There's no valid block for this byte offset */
2198 *ap
->a_bpn
= (daddr64_t
)-1;
2199 /* There's no point limiting the amount to be returned
2200 * if the invalid range that was hit extends all the way
2201 * to the EOF (i.e. there's no valid bytes between the
2202 * end of this range and the file's EOF):
2204 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2205 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
2206 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2210 case RL_OVERLAPISCONTAINED
:
2211 case RL_OVERLAPENDSAFTER
:
2212 /* The range of interest hits an invalid block before the end: */
2213 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2214 /* There's actually no valid information to be had starting here: */
2215 *ap
->a_bpn
= (daddr64_t
)-1;
2216 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2217 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
2218 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2221 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2228 if (bytesContAvail
> ap
->a_size
)
2229 bytesContAvail
= ap
->a_size
;
2235 *ap
->a_run
= bytesContAvail
;
2238 *(int *)ap
->a_poff
= 0;
2244 return (MacToVFSError(retval
));
2249 * prepare and issue the I/O
2250 * buf_strategy knows how to deal
2251 * with requests that require
2255 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2257 buf_t bp
= ap
->a_bp
;
2258 vnode_t vp
= buf_vnode(bp
);
2260 return (buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
));
2265 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, vfs_context_t context
)
2267 register struct cnode
*cp
= VTOC(vp
);
2268 struct filefork
*fp
= VTOF(vp
);
2269 struct proc
*p
= vfs_context_proc(context
);;
2270 kauth_cred_t cred
= vfs_context_ucred(context
);
2273 off_t actualBytesAdded
;
2277 struct hfsmount
*hfsmp
;
2280 blksize
= VTOVCB(vp
)->blockSize
;
2281 fileblocks
= fp
->ff_blocks
;
2282 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
2285 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2290 /* This should only happen with a corrupt filesystem */
2291 if ((off_t
)fp
->ff_size
< 0)
2294 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
2301 /* Files that are changing size are not hot file candidates. */
2302 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
2303 fp
->ff_bytesread
= 0;
2307 * We cannot just check if fp->ff_size == length (as an optimization)
2308 * since there may be extra physical blocks that also need truncation.
2311 if ((retval
= hfs_getinoquota(cp
)))
2316 * Lengthen the size of the file. We must ensure that the
2317 * last byte of the file is allocated. Since the smallest
2318 * value of ff_size is 0, length will be at least 1.
2320 if (length
> (off_t
)fp
->ff_size
) {
2322 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
2328 * If we don't have enough physical space then
2329 * we need to extend the physical size.
2331 if (length
> filebytes
) {
2333 u_long blockHint
= 0;
2335 /* All or nothing and don't round up to clumpsize. */
2336 eflags
= kEFAllMask
| kEFNoClumpMask
;
2338 if (cred
&& suser(cred
, NULL
) != 0)
2339 eflags
|= kEFReserveMask
; /* keep a reserve */
2342 * Allocate Journal and Quota files in metadata zone.
2344 if (filebytes
== 0 &&
2345 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
2346 hfs_virtualmetafile(cp
)) {
2347 eflags
|= kEFMetadataMask
;
2348 blockHint
= hfsmp
->hfs_metazone_start
;
2350 if (hfs_start_transaction(hfsmp
) != 0) {
2355 /* Protect extents b-tree and allocation bitmap */
2356 lockflags
= SFL_BITMAP
;
2357 if (overflow_extents(fp
))
2358 lockflags
|= SFL_EXTENTS
;
2359 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2361 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2362 bytesToAdd
= length
- filebytes
;
2363 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
2368 &actualBytesAdded
));
2370 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2371 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
2372 if (length
> filebytes
)
2378 hfs_systemfile_unlock(hfsmp
, lockflags
);
2381 (void) hfs_update(vp
, TRUE
);
2382 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2385 hfs_end_transaction(hfsmp
);
2390 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2391 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2394 if (!(flags
& IO_NOZEROFILL
)) {
2395 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
2396 struct rl_entry
*invalid_range
;
2399 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
2400 if (length
< zero_limit
) zero_limit
= length
;
2402 if (length
> (off_t
)fp
->ff_size
) {
2405 /* Extending the file: time to fill out the current last page w. zeroes? */
2406 if ((fp
->ff_size
& PAGE_MASK_64
) &&
2407 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2408 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2410 /* There's some valid data at the start of the (current) last page
2411 of the file, so zero out the remainder of that page to ensure the
2412 entire page contains valid data. Since there is no invalid range
2413 possible past the (current) eof, there's no need to remove anything
2414 from the invalid range list before calling cluster_write(): */
2416 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2417 fp
->ff_size
, (off_t
)0,
2418 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2419 hfs_lock(cp
, HFS_FORCE_LOCK
);
2420 if (retval
) goto Err_Exit
;
2422 /* Merely invalidate the remaining area, if necessary: */
2423 if (length
> zero_limit
) {
2425 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
2426 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2429 /* The page containing the (current) eof is invalid: just add the
2430 remainder of the page to the invalid list, along with the area
2431 being newly allocated:
2434 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
2435 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2439 panic("hfs_truncate: invoked on non-UBC object?!");
2442 cp
->c_touch_modtime
= TRUE
;
2443 fp
->ff_size
= length
;
2445 } else { /* Shorten the size of the file */
2447 if ((off_t
)fp
->ff_size
> length
) {
2448 /* Any space previously marked as invalid is now irrelevant: */
2449 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2453 * Account for any unmapped blocks. Note that the new
2454 * file length can still end up with unmapped blocks.
2456 if (fp
->ff_unallocblocks
> 0) {
2457 u_int32_t finalblks
;
2458 u_int32_t loanedBlocks
;
2460 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2462 loanedBlocks
= fp
->ff_unallocblocks
;
2463 cp
->c_blocks
-= loanedBlocks
;
2464 fp
->ff_blocks
-= loanedBlocks
;
2465 fp
->ff_unallocblocks
= 0;
2467 hfsmp
->loanedBlocks
-= loanedBlocks
;
2469 finalblks
= (length
+ blksize
- 1) / blksize
;
2470 if (finalblks
> fp
->ff_blocks
) {
2471 /* calculate required unmapped blocks */
2472 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2473 hfsmp
->loanedBlocks
+= loanedBlocks
;
2475 fp
->ff_unallocblocks
= loanedBlocks
;
2476 cp
->c_blocks
+= loanedBlocks
;
2477 fp
->ff_blocks
+= loanedBlocks
;
2479 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2483 * For a TBE process the deallocation of the file blocks is
2484 * delayed until the file is closed. And hfs_close calls
2485 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2486 * isn't set, we make sure this isn't a TBE process.
2488 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2490 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2492 if (hfs_start_transaction(hfsmp
) != 0) {
2497 if (fp
->ff_unallocblocks
== 0) {
2498 /* Protect extents b-tree and allocation bitmap */
2499 lockflags
= SFL_BITMAP
;
2500 if (overflow_extents(fp
))
2501 lockflags
|= SFL_EXTENTS
;
2502 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2504 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2505 (FCB
*)fp
, length
, false));
2507 hfs_systemfile_unlock(hfsmp
, lockflags
);
2511 fp
->ff_size
= length
;
2513 (void) hfs_update(vp
, TRUE
);
2514 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2517 hfs_end_transaction(hfsmp
);
2519 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2523 /* These are bytesreleased */
2524 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2527 /* Only set update flag if the logical length changes */
2528 if ((off_t
)fp
->ff_size
!= length
)
2529 cp
->c_touch_modtime
= TRUE
;
2530 fp
->ff_size
= length
;
2532 cp
->c_touch_chgtime
= TRUE
; /* status changed */
2533 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
2534 retval
= hfs_update(vp
, MNT_WAIT
);
2536 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2537 -1, -1, -1, retval
, 0);
2542 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2543 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2551 * Truncate a cnode to at most length size, freeing (or adding) the
2556 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2557 vfs_context_t context
)
2559 struct filefork
*fp
= VTOF(vp
);
2562 int blksize
, error
= 0;
2563 struct cnode
*cp
= VTOC(vp
);
2565 /* Cannot truncate an HFS directory! */
2566 if (vnode_isdir(vp
)) {
2569 /* A swap file cannot change size. */
2570 if (vnode_isswap(vp
) && (length
!= 0)) {
2574 blksize
= VTOVCB(vp
)->blockSize
;
2575 fileblocks
= fp
->ff_blocks
;
2576 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2579 // Have to do this here so that we don't wind up with
2580 // i/o pending for blocks that are about to be released
2581 // if we truncate the file.
2583 // If skipsetsize is set, then the caller is responsible
2584 // for the ubc_setsize.
2587 ubc_setsize(vp
, length
);
2589 // have to loop truncating or growing files that are
2590 // really big because otherwise transactions can get
2591 // enormous and consume too many kernel resources.
2593 if (length
< filebytes
) {
2594 while (filebytes
> length
) {
2595 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2596 filebytes
-= HFS_BIGFILE_SIZE
;
2600 cp
->c_flag
|= C_FORCEUPDATE
;
2601 error
= do_hfs_truncate(vp
, filebytes
, flags
, context
);
2605 } else if (length
> filebytes
) {
2606 while (filebytes
< length
) {
2607 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2608 filebytes
+= HFS_BIGFILE_SIZE
;
2612 cp
->c_flag
|= C_FORCEUPDATE
;
2613 error
= do_hfs_truncate(vp
, filebytes
, flags
, context
);
2617 } else /* Same logical size */ {
2619 error
= do_hfs_truncate(vp
, length
, flags
, context
);
2621 /* Files that are changing size are not hot file candidates. */
2622 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2623 fp
->ff_bytesread
= 0;
2632 * Preallocate file storage space.
2635 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2639 off_t *a_bytesallocated;
2641 vfs_context_t a_context;
2644 struct vnode
*vp
= ap
->a_vp
;
2646 struct filefork
*fp
;
2648 off_t length
= ap
->a_length
;
2650 off_t moreBytesRequested
;
2651 off_t actualBytesAdded
;
2654 int retval
, retval2
;
2655 u_int32_t blockHint
;
2656 u_int32_t extendFlags
; /* For call to ExtendFileC */
2657 struct hfsmount
*hfsmp
;
2658 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2661 *(ap
->a_bytesallocated
) = 0;
2663 if (!vnode_isreg(vp
))
2665 if (length
< (off_t
)0)
2670 hfs_lock_truncate(cp
, TRUE
);
2672 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2680 fileblocks
= fp
->ff_blocks
;
2681 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2683 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2688 /* Fill in the flags word for the call to Extend the file */
2690 extendFlags
= kEFNoClumpMask
;
2691 if (ap
->a_flags
& ALLOCATECONTIG
)
2692 extendFlags
|= kEFContigMask
;
2693 if (ap
->a_flags
& ALLOCATEALL
)
2694 extendFlags
|= kEFAllMask
;
2695 if (cred
&& suser(cred
, NULL
) != 0)
2696 extendFlags
|= kEFReserveMask
;
2700 startingPEOF
= filebytes
;
2702 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2703 length
+= filebytes
;
2704 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2705 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2707 /* If no changes are necesary, then we're done */
2708 if (filebytes
== length
)
2712 * Lengthen the size of the file. We must ensure that the
2713 * last byte of the file is allocated. Since the smallest
2714 * value of filebytes is 0, length will be at least 1.
2716 if (length
> filebytes
) {
2717 off_t total_bytes_added
= 0, orig_request_size
;
2719 orig_request_size
= moreBytesRequested
= length
- filebytes
;
2722 retval
= hfs_chkdq(cp
,
2723 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2730 * Metadata zone checks.
2732 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2734 * Allocate Journal and Quota files in metadata zone.
2736 if (hfs_virtualmetafile(cp
)) {
2737 extendFlags
|= kEFMetadataMask
;
2738 blockHint
= hfsmp
->hfs_metazone_start
;
2739 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2740 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2742 * Move blockHint outside metadata zone.
2744 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2749 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2750 off_t bytesRequested
;
2752 if (hfs_start_transaction(hfsmp
) != 0) {
2757 /* Protect extents b-tree and allocation bitmap */
2758 lockflags
= SFL_BITMAP
;
2759 if (overflow_extents(fp
))
2760 lockflags
|= SFL_EXTENTS
;
2761 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2763 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
2764 bytesRequested
= HFS_BIGFILE_SIZE
;
2766 bytesRequested
= moreBytesRequested
;
2769 retval
= MacToVFSError(ExtendFileC(vcb
,
2774 &actualBytesAdded
));
2776 if (retval
== E_NONE
) {
2777 *(ap
->a_bytesallocated
) += actualBytesAdded
;
2778 total_bytes_added
+= actualBytesAdded
;
2779 moreBytesRequested
-= actualBytesAdded
;
2780 if (blockHint
!= 0) {
2781 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
2784 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2786 hfs_systemfile_unlock(hfsmp
, lockflags
);
2789 (void) hfs_update(vp
, TRUE
);
2790 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2793 hfs_end_transaction(hfsmp
);
2798 * if we get an error and no changes were made then exit
2799 * otherwise we must do the hfs_update to reflect the changes
2801 if (retval
&& (startingPEOF
== filebytes
))
2805 * Adjust actualBytesAdded to be allocation block aligned, not
2806 * clump size aligned.
2807 * NOTE: So what we are reporting does not affect reality
2808 * until the file is closed, when we truncate the file to allocation
2811 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
2812 *(ap
->a_bytesallocated
) =
2813 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
2815 } else { /* Shorten the size of the file */
2817 if (fp
->ff_size
> length
) {
2819 * Any buffers that are past the truncation point need to be
2820 * invalidated (to maintain buffer cache consistency).
2824 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
2825 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2828 * if we get an error and no changes were made then exit
2829 * otherwise we must do the hfs_update to reflect the changes
2831 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2833 /* These are bytesreleased */
2834 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2837 if (fp
->ff_size
> filebytes
) {
2838 fp
->ff_size
= filebytes
;
2841 ubc_setsize(vp
, fp
->ff_size
);
2842 hfs_lock(cp
, HFS_FORCE_LOCK
);
2847 cp
->c_touch_chgtime
= TRUE
;
2848 cp
->c_touch_modtime
= TRUE
;
2849 retval2
= hfs_update(vp
, MNT_WAIT
);
2854 hfs_unlock_truncate(cp
, TRUE
);
2861 * Pagein for HFS filesystem
2864 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2866 struct vnop_pagein_args {
2869 vm_offset_t a_pl_offset,
2873 vfs_context_t a_context;
2877 vnode_t vp
= ap
->a_vp
;
2880 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2881 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2883 * Keep track of blocks read.
2885 if (!vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2887 struct filefork
*fp
;
2889 int took_cnode_lock
= 0;
2894 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2895 bytesread
= fp
->ff_size
;
2897 bytesread
= ap
->a_size
;
2899 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2900 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
2901 hfs_lock(cp
, HFS_FORCE_LOCK
);
2902 took_cnode_lock
= 1;
2905 * If this file hasn't been seen since the start of
2906 * the current sampling period then start over.
2908 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2911 fp
->ff_bytesread
= bytesread
;
2913 cp
->c_atime
= tv
.tv_sec
;
2915 fp
->ff_bytesread
+= bytesread
;
2917 cp
->c_touch_acctime
= TRUE
;
2918 if (took_cnode_lock
)
2925 * Pageout for HFS filesystem.
2928 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2930 struct vnop_pageout_args {
2933 vm_offset_t a_pl_offset,
2937 vfs_context_t a_context;
2941 vnode_t vp
= ap
->a_vp
;
2943 struct filefork
*fp
;
2950 if (vnode_isswap(vp
)) {
2951 filesize
= fp
->ff_size
;
2956 if (cp
->c_lockowner
!= current_thread()) {
2957 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2958 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2959 ubc_upl_abort_range(ap
->a_pl
,
2962 UPL_ABORT_FREE_ON_EMPTY
);
2969 filesize
= fp
->ff_size
;
2970 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
2972 if (end_of_range
>= filesize
) {
2973 end_of_range
= (off_t
)(filesize
- 1);
2975 if (ap
->a_f_offset
< filesize
) {
2976 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
2977 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
2985 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2986 ap
->a_size
, filesize
, ap
->a_flags
);
2989 * If data was written, and setuid or setgid bits are set and
2990 * this process is not the superuser then clear the setuid and
2991 * setgid bits as a precaution against tampering.
2993 if ((retval
== 0) &&
2994 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
2995 (vfs_context_suser(ap
->a_context
) != 0)) {
2996 hfs_lock(cp
, HFS_FORCE_LOCK
);
2997 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2998 cp
->c_touch_chgtime
= TRUE
;
3005 * Intercept B-Tree node writes to unswap them if necessary.
3008 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
3011 register struct buf
*bp
= ap
->a_bp
;
3012 register struct vnode
*vp
= buf_vnode(bp
);
3013 BlockDescriptor block
;
3015 /* Trap B-Tree writes */
3016 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
3017 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
3018 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
3019 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
3022 * Swap and validate the node if it is in native byte order.
3023 * This is always be true on big endian, so we always validate
3024 * before writing here. On little endian, the node typically has
3025 * been swapped and validated when it was written to the journal,
3026 * so we won't do anything here.
3028 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
3029 /* Prepare the block pointer */
3030 block
.blockHeader
= bp
;
3031 block
.buffer
= (char *)buf_dataptr(bp
);
3032 block
.blockNum
= buf_lblkno(bp
);
3033 /* not found in cache ==> came from disk */
3034 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
3035 block
.blockSize
= buf_count(bp
);
3037 /* Endian un-swap B-Tree node */
3038 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
, false);
3040 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3044 /* This buffer shouldn't be locked anymore but if it is clear it */
3045 if ((buf_flags(bp
) & B_LOCKED
)) {
3047 if (VTOHFS(vp
)->jnl
) {
3048 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
3050 buf_clearflags(bp
, B_LOCKED
);
3052 retval
= vn_bwrite (ap
);
3058 * Relocate a file to a new location on disk
3059 * cnode must be locked on entry
3061 * Relocation occurs by cloning the file's data from its
3062 * current set of blocks to a new set of blocks. During
3063 * the relocation all of the blocks (old and new) are
3064 * owned by the file.
3071 * ----------------- -----------------
3072 * |///////////////| | | STEP 1 (acquire new blocks)
3073 * ----------------- -----------------
3076 * ----------------- -----------------
3077 * |///////////////| |///////////////| STEP 2 (clone data)
3078 * ----------------- -----------------
3082 * |///////////////| STEP 3 (head truncate blocks)
3086 * During steps 2 and 3 page-outs to file offsets less
3087 * than or equal to N are suspended.
3089 * During step 3 page-ins to the file get suspended.
3093 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
3097 struct filefork
*fp
;
3098 struct hfsmount
*hfsmp
;
3103 u_int32_t nextallocsave
;
3104 daddr64_t sector_a
, sector_b
;
3109 int took_trunc_lock
= 0;
3111 enum vtype vnodetype
;
3113 vnodetype
= vnode_vtype(vp
);
3114 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
3119 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
3125 if (fp
->ff_unallocblocks
)
3127 blksize
= hfsmp
->blockSize
;
3129 blockHint
= hfsmp
->nextAllocation
;
3131 if ((fp
->ff_size
> 0x7fffffff) ||
3132 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
3137 // We do not believe that this call to hfs_fsync() is
3138 // necessary and it causes a journal transaction
3139 // deadlock so we are removing it.
3141 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3142 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3147 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
3149 hfs_lock_truncate(cp
, TRUE
);
3150 /* Force lock since callers expects lock to be held. */
3151 if ((retval
= hfs_lock(cp
, HFS_FORCE_LOCK
))) {
3152 hfs_unlock_truncate(cp
, TRUE
);
3155 /* No need to continue if file was removed. */
3156 if (cp
->c_flag
& C_NOEXISTS
) {
3157 hfs_unlock_truncate(cp
, TRUE
);
3160 took_trunc_lock
= 1;
3162 headblks
= fp
->ff_blocks
;
3163 datablks
= howmany(fp
->ff_size
, blksize
);
3164 growsize
= datablks
* blksize
;
3165 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
3166 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
3167 blockHint
<= hfsmp
->hfs_metazone_end
)
3168 eflags
|= kEFMetadataMask
;
3170 if (hfs_start_transaction(hfsmp
) != 0) {
3171 if (took_trunc_lock
)
3172 hfs_unlock_truncate(cp
, TRUE
);
3177 * Protect the extents b-tree and the allocation bitmap
3178 * during MapFileBlockC and ExtendFileC operations.
3180 lockflags
= SFL_BITMAP
;
3181 if (overflow_extents(fp
))
3182 lockflags
|= SFL_EXTENTS
;
3183 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3185 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
3187 retval
= MacToVFSError(retval
);
3192 * STEP 1 - acquire new allocation blocks.
3194 nextallocsave
= hfsmp
->nextAllocation
;
3195 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
3196 if (eflags
& kEFMetadataMask
) {
3197 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
3198 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
3199 MarkVCBDirty(hfsmp
);
3200 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
3203 retval
= MacToVFSError(retval
);
3205 cp
->c_flag
|= C_MODIFIED
;
3206 if (newbytes
< growsize
) {
3209 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
3210 printf("hfs_relocate: allocation failed");
3215 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
3217 retval
= MacToVFSError(retval
);
3218 } else if ((sector_a
+ 1) == sector_b
) {
3221 } else if ((eflags
& kEFMetadataMask
) &&
3222 ((((u_int64_t
)sector_b
* hfsmp
->hfs_phys_block_size
) / blksize
) >
3223 hfsmp
->hfs_metazone_end
)) {
3224 const char * filestr
;
3225 char emptystr
= '\0';
3227 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
3228 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
3229 } else if (vnode_name(vp
) != NULL
) {
3230 filestr
= vnode_name(vp
);
3232 filestr
= &emptystr
;
3234 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr
, fp
->ff_blocks
);
3239 /* Done with system locks and journal for now. */
3240 hfs_systemfile_unlock(hfsmp
, lockflags
);
3242 hfs_end_transaction(hfsmp
);
3247 * Check to see if failure is due to excessive fragmentation.
3249 if ((retval
== ENOSPC
) &&
3250 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
3251 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
3256 * STEP 2 - clone file data into the new allocation blocks.
3259 if (vnodetype
== VLNK
)
3260 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
3261 else if (vnode_issystem(vp
))
3262 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
3264 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
3266 /* Start transaction for step 3 or for a restore. */
3267 if (hfs_start_transaction(hfsmp
) != 0) {
3276 * STEP 3 - switch to cloned data and remove old blocks.
3278 lockflags
= SFL_BITMAP
;
3279 if (overflow_extents(fp
))
3280 lockflags
|= SFL_EXTENTS
;
3281 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3283 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
3285 hfs_systemfile_unlock(hfsmp
, lockflags
);
3290 if (took_trunc_lock
)
3291 hfs_unlock_truncate(cp
, TRUE
);
3294 hfs_systemfile_unlock(hfsmp
, lockflags
);
3298 /* Push cnode's new extent data to disk. */
3300 (void) hfs_update(vp
, MNT_WAIT
);
3303 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
3304 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
3306 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
3310 hfs_end_transaction(hfsmp
);
3315 if (fp
->ff_blocks
== headblks
) {
3316 if (took_trunc_lock
)
3317 hfs_unlock_truncate(cp
, TRUE
);
3321 * Give back any newly allocated space.
3323 if (lockflags
== 0) {
3324 lockflags
= SFL_BITMAP
;
3325 if (overflow_extents(fp
))
3326 lockflags
|= SFL_EXTENTS
;
3327 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3330 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
3332 hfs_systemfile_unlock(hfsmp
, lockflags
);
3335 if (took_trunc_lock
)
3336 hfs_unlock_truncate(cp
, TRUE
);
3346 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, __unused
struct proc
*p
)
3348 struct buf
*head_bp
= NULL
;
3349 struct buf
*tail_bp
= NULL
;
3353 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
3357 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
3358 if (tail_bp
== NULL
) {
3362 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
3363 error
= (int)buf_bwrite(tail_bp
);
3366 buf_markinvalid(head_bp
);
3367 buf_brelse(head_bp
);
3369 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
3375 * Clone a file's data within the file.
3379 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
3391 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
3392 writebase
= blkstart
* blksize
;
3393 copysize
= blkcnt
* blksize
;
3394 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
3397 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3400 hfs_unlock(VTOC(vp
));
3402 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
3404 while (offset
< copysize
) {
3405 iosize
= MIN(copysize
- offset
, iosize
);
3407 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
3408 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3410 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
3412 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
3415 if (uio_resid(auio
) != 0) {
3416 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
3421 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
3422 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3424 error
= cluster_write(vp
, auio
, filesize
+ offset
,
3425 filesize
+ offset
+ iosize
,
3426 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
3428 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
3431 if (uio_resid(auio
) != 0) {
3432 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3441 * No need to call ubc_sync_range or hfs_invalbuf
3442 * since the file was copied using IO_NOCACHE.
3445 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3447 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
3452 * Clone a system (metadata) file.
3456 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
3457 kauth_cred_t cred
, struct proc
*p
)
3463 struct buf
*bp
= NULL
;
3466 daddr64_t start_blk
;
3473 iosize
= GetLogicalBlockSize(vp
);
3474 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3475 breadcnt
= bufsize
/ iosize
;
3477 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3480 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3481 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3484 while (blkno
< last_blk
) {
3486 * Read up to a megabyte
3489 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3490 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3492 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3495 if (buf_count(bp
) != iosize
) {
3496 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3499 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3501 buf_markinvalid(bp
);
3509 * Write up to a megabyte
3512 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3513 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3515 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3519 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3520 error
= (int)buf_bwrite(bp
);
3532 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3534 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);