2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
53 #include <sys/sysctl.h>
55 #include <miscfs/specfs/specdev.h>
58 #include <sys/ubc_internal.h>
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
63 #include <sys/kdebug.h>
66 #include "hfs_attrlist.h"
67 #include "hfs_endian.h"
68 #include "hfs_fsctl.h"
69 #include "hfs_quota.h"
70 #include "hfscommon/headers/FileMgrInternal.h"
71 #include "hfscommon/headers/BTreesInternal.h"
72 #include "hfs_cnode.h"
75 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
78 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
81 /* from bsd/vfs/vfs_cluster.c */
82 extern int is_file_clean(vnode_t vp
, off_t filesize
);
84 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
85 static int hfs_clonefile(struct vnode
*, int, int, int);
86 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
88 int flush_cache_on_write
= 0;
89 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
93 * Read data from a file.
96 hfs_vnop_read(struct vnop_read_args
*ap
)
98 uio_t uio
= ap
->a_uio
;
99 struct vnode
*vp
= ap
->a_vp
;
102 struct hfsmount
*hfsmp
;
105 off_t start_resid
= uio_resid(uio
);
106 off_t offset
= uio_offset(uio
);
110 /* Preflight checks */
111 if (!vnode_isreg(vp
)) {
112 /* can only read regular files */
118 if (start_resid
== 0)
119 return (0); /* Nothing left to do */
121 return (EINVAL
); /* cant read from a negative offset */
127 /* Protect against a size change. */
128 hfs_lock_truncate(cp
, 0);
130 filesize
= fp
->ff_size
;
131 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
132 if (offset
> filesize
) {
133 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
134 (offset
> (off_t
)MAXHFSFILESIZE
)) {
140 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
141 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
143 retval
= cluster_read(vp
, uio
, filesize
, ap
->a_ioflag
);
145 cp
->c_touch_acctime
= TRUE
;
147 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
148 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
151 * Keep track blocks read
153 if (hfsmp
->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
154 int took_cnode_lock
= 0;
157 bytesread
= start_resid
- uio_resid(uio
);
159 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
160 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
161 hfs_lock(cp
, HFS_FORCE_LOCK
);
165 * If this file hasn't been seen since the start of
166 * the current sampling period then start over.
168 if (cp
->c_atime
< hfsmp
->hfc_timebase
) {
171 fp
->ff_bytesread
= bytesread
;
173 cp
->c_atime
= tv
.tv_sec
;
175 fp
->ff_bytesread
+= bytesread
;
181 hfs_unlock_truncate(cp
, 0);
186 * Write data to a file.
189 hfs_vnop_write(struct vnop_write_args
*ap
)
191 uio_t uio
= ap
->a_uio
;
192 struct vnode
*vp
= ap
->a_vp
;
195 struct hfsmount
*hfsmp
;
196 kauth_cred_t cred
= NULL
;
199 off_t bytesToAdd
= 0;
200 off_t actualBytesAdded
;
205 int ioflag
= ap
->a_ioflag
;
208 int cnode_locked
= 0;
209 int partialwrite
= 0;
210 int exclusive_lock
= 0;
212 // LP64todo - fix this! uio_resid may be 64-bit value
213 resid
= uio_resid(uio
);
214 offset
= uio_offset(uio
);
216 if (ioflag
& IO_APPEND
) {
224 if (!vnode_isreg(vp
))
225 return (EPERM
); /* Can only write regular files */
231 eflags
= kEFDeferMask
; /* defer file block allocations */
232 #ifdef HFS_SPARSE_DEV
234 * When the underlying device is sparse and space
235 * is low (< 8MB), stop doing delayed allocations
236 * and begin doing synchronous I/O.
238 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
239 (hfs_freeblks(hfsmp
, 0) < 2048)) {
240 eflags
&= ~kEFDeferMask
;
243 #endif /* HFS_SPARSE_DEV */
246 /* Protect against a size change. */
247 hfs_lock_truncate(cp
, exclusive_lock
);
249 if (ioflag
& IO_APPEND
) {
250 uio_setoffset(uio
, fp
->ff_size
);
251 offset
= fp
->ff_size
;
253 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
258 origFileSize
= fp
->ff_size
;
259 writelimit
= offset
+ resid
;
260 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
262 /* If the truncate lock is shared, and if we either have virtual
263 * blocks or will need to extend the file, upgrade the truncate
264 * to exclusive lock. If upgrade fails, we lose the lock and
265 * have to get exclusive lock again
267 if ((exclusive_lock
== 0) &&
268 ((fp
->ff_unallocblocks
!= 0) || (writelimit
> filebytes
))) {
270 /* Lock upgrade failed and we lost our shared lock, try again */
271 if (lck_rw_lock_shared_to_exclusive(&cp
->c_truncatelock
) == FALSE
) {
276 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
281 if (!exclusive_lock
) {
282 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
283 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
,
287 /* Check if we do not need to extend the file */
288 if (writelimit
<= filebytes
) {
292 cred
= vfs_context_ucred(ap
->a_context
);
293 bytesToAdd
= writelimit
- filebytes
;
296 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
302 if (hfs_start_transaction(hfsmp
) != 0) {
307 while (writelimit
> filebytes
) {
308 bytesToAdd
= writelimit
- filebytes
;
309 if (cred
&& suser(cred
, NULL
) != 0)
310 eflags
|= kEFReserveMask
;
312 /* Protect extents b-tree and allocation bitmap */
313 lockflags
= SFL_BITMAP
;
314 if (overflow_extents(fp
))
315 lockflags
|= SFL_EXTENTS
;
316 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
318 /* Files that are changing size are not hot file candidates. */
319 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
320 fp
->ff_bytesread
= 0;
322 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
323 0, eflags
, &actualBytesAdded
));
325 hfs_systemfile_unlock(hfsmp
, lockflags
);
327 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
329 if (retval
!= E_NONE
)
331 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
332 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
333 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
335 (void) hfs_update(vp
, TRUE
);
336 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
337 (void) hfs_end_transaction(hfsmp
);
340 * If we didn't grow the file enough try a partial write.
341 * POSIX expects this behavior.
343 if ((retval
== ENOSPC
) && (filebytes
> offset
)) {
346 uio_setresid(uio
, (uio_resid(uio
) - bytesToAdd
));
348 writelimit
= filebytes
;
351 if (retval
== E_NONE
) {
359 struct rl_entry
*invalid_range
;
361 if (writelimit
> fp
->ff_size
)
362 filesize
= writelimit
;
364 filesize
= fp
->ff_size
;
366 lflag
= ioflag
& ~(IO_TAILZEROFILL
| IO_HEADZEROFILL
| IO_NOZEROVALID
| IO_NOZERODIRTY
);
368 if (offset
<= fp
->ff_size
) {
369 zero_off
= offset
& ~PAGE_MASK_64
;
371 /* Check to see whether the area between the zero_offset and the start
372 of the transfer to see whether is invalid and should be zero-filled
373 as part of the transfer:
375 if (offset
> zero_off
) {
376 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
377 lflag
|= IO_HEADZEROFILL
;
380 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
382 /* The bytes between fp->ff_size and uio->uio_offset must never be
383 read without being zeroed. The current last block is filled with zeroes
384 if it holds valid data but in all cases merely do a little bookkeeping
385 to track the area from the end of the current last page to the start of
386 the area actually written. For the same reason only the bytes up to the
387 start of the page where this write will start is invalidated; any remainder
388 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
390 Note that inval_start, the start of the page after the current EOF,
391 may be past the start of the write, in which case the zeroing
392 will be handled by the cluser_write of the actual data.
394 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
395 inval_end
= offset
& ~PAGE_MASK_64
;
396 zero_off
= fp
->ff_size
;
398 if ((fp
->ff_size
& PAGE_MASK_64
) &&
399 (rl_scan(&fp
->ff_invalidranges
,
402 &invalid_range
) != RL_NOOVERLAP
)) {
403 /* The page containing the EOF is not valid, so the
404 entire page must be made inaccessible now. If the write
405 starts on a page beyond the page containing the eof
406 (inval_end > eof_page_base), add the
407 whole page to the range to be invalidated. Otherwise
408 (i.e. if the write starts on the same page), zero-fill
409 the entire page explicitly now:
411 if (inval_end
> eof_page_base
) {
412 inval_start
= eof_page_base
;
414 zero_off
= eof_page_base
;
418 if (inval_start
< inval_end
) {
420 /* There's some range of data that's going to be marked invalid */
422 if (zero_off
< inval_start
) {
423 /* The pages between inval_start and inval_end are going to be invalidated,
424 and the actual write will start on a page past inval_end. Now's the last
425 chance to zero-fill the page containing the EOF:
429 retval
= cluster_write(vp
, (uio_t
) 0,
430 fp
->ff_size
, inval_start
,
432 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
433 hfs_lock(cp
, HFS_FORCE_LOCK
);
435 if (retval
) goto ioerr_exit
;
436 offset
= uio_offset(uio
);
439 /* Mark the remaining area of the newly allocated space as invalid: */
440 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
442 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
443 zero_off
= fp
->ff_size
= inval_end
;
446 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
449 /* Check to see whether the area between the end of the write and the end of
450 the page it falls in is invalid and should be zero-filled as part of the transfer:
452 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
453 if (tail_off
> filesize
) tail_off
= filesize
;
454 if (tail_off
> writelimit
) {
455 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
456 lflag
|= IO_TAILZEROFILL
;
461 * if the write starts beyond the current EOF (possibly advanced in the
462 * zeroing of the last block, above), then we'll zero fill from the current EOF
463 * to where the write begins:
465 * NOTE: If (and ONLY if) the portion of the file about to be written is
466 * before the current EOF it might be marked as invalid now and must be
467 * made readable (removed from the invalid ranges) before cluster_write
470 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
471 if (io_start
< fp
->ff_size
) {
474 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
475 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
480 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
481 tail_off
, lflag
| IO_NOZERODIRTY
);
485 offset
= uio_offset(uio
);
486 if (offset
> fp
->ff_size
) {
487 fp
->ff_size
= offset
;
489 ubc_setsize(vp
, fp
->ff_size
); /* XXX check errors */
490 /* Files that are changing size are not hot file candidates. */
491 if (hfsmp
->hfc_stage
== HFC_RECORDING
)
492 fp
->ff_bytesread
= 0;
494 if (resid
> uio_resid(uio
)) {
495 cp
->c_touch_chgtime
= TRUE
;
496 cp
->c_touch_modtime
= TRUE
;
500 uio_setresid(uio
, (uio_resid(uio
) + bytesToAdd
));
504 // XXXdbg - see radar 4871353 for more info
506 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
507 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
510 HFS_KNOTE(vp
, NOTE_WRITE
);
514 * If we successfully wrote any data, and we are not the superuser
515 * we clear the setuid and setgid bits as a precaution against
518 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
519 cred
= vfs_context_ucred(ap
->a_context
);
520 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
522 hfs_lock(cp
, HFS_FORCE_LOCK
);
525 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
529 if (ioflag
& IO_UNIT
) {
531 hfs_lock(cp
, HFS_FORCE_LOCK
);
534 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
536 // LP64todo - fix this! resid needs to by user_ssize_t
537 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
538 uio_setresid(uio
, resid
);
539 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
541 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
543 hfs_lock(cp
, HFS_FORCE_LOCK
);
546 retval
= hfs_update(vp
, TRUE
);
548 /* Updating vcbWrCnt doesn't need to be atomic. */
551 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
552 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
556 hfs_unlock_truncate(cp
, exclusive_lock
);
560 /* support for the "bulk-access" fcntl */
562 #define CACHE_LEVELS 16
563 #define NUM_CACHE_ENTRIES (64*16)
564 #define PARENT_IDS_FLAG 0x100
566 struct access_cache
{
568 int cachehits
; /* these two for statistics gathering */
570 unsigned int *acache
;
571 unsigned char *haveaccess
;
575 uid_t uid
; /* IN: effective user id */
576 short flags
; /* IN: access requested (i.e. R_OK) */
577 short num_groups
; /* IN: number of groups user belongs to */
578 int num_files
; /* IN: number of files to process */
579 int *file_ids
; /* IN: array of file ids */
580 gid_t
*groups
; /* IN: array of groups */
581 short *access
; /* OUT: access info for each file (0 for 'has access') */
584 struct user_access_t
{
585 uid_t uid
; /* IN: effective user id */
586 short flags
; /* IN: access requested (i.e. R_OK) */
587 short num_groups
; /* IN: number of groups user belongs to */
588 int num_files
; /* IN: number of files to process */
589 user_addr_t file_ids
; /* IN: array of file ids */
590 user_addr_t groups
; /* IN: array of groups */
591 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
595 // these are the "extended" versions of the above structures
596 // note that it is crucial that they be different sized than
597 // the regular version
598 struct ext_access_t
{
599 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
600 uint32_t num_files
; /* IN: number of files to process */
601 uint32_t map_size
; /* IN: size of the bit map */
602 uint32_t *file_ids
; /* IN: Array of file ids */
603 char *bitmap
; /* OUT: hash-bitmap of interesting directory ids */
604 short *access
; /* OUT: access info for each file (0 for 'has access') */
605 uint32_t num_parents
; /* future use */
606 cnid_t
*parents
; /* future use */
609 struct ext_user_access_t
{
610 uint32_t flags
; /* IN: access requested (i.e. R_OK) */
611 uint32_t num_files
; /* IN: number of files to process */
612 uint32_t map_size
; /* IN: size of the bit map */
613 user_addr_t file_ids
; /* IN: array of file ids */
614 user_addr_t bitmap
; /* IN: array of groups */
615 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
616 uint32_t num_parents
;/* future use */
617 user_addr_t parents
;/* future use */
622 * Perform a binary search for the given parent_id. Return value is
623 * the index if there is a match. If no_match_indexp is non-NULL it
624 * will be assigned with the index to insert the item (even if it was
627 static int cache_binSearch(cnid_t
*array
, unsigned int hi
, cnid_t parent_id
, int *no_match_indexp
)
633 unsigned int mid
= ((hi
- lo
)/2) + lo
;
634 unsigned int this_id
= array
[mid
];
636 if (parent_id
== this_id
) {
641 if (parent_id
< this_id
) {
646 if (parent_id
> this_id
) {
652 /* check if lo and hi converged on the match */
653 if (parent_id
== array
[hi
]) {
657 if (no_match_indexp
) {
658 *no_match_indexp
= hi
;
666 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
670 int index
, no_match_index
;
672 if (cache
->numcached
== 0) {
674 return 0; // table is empty, so insert at index=0 and report no match
677 if (cache
->numcached
> NUM_CACHE_ENTRIES
) {
678 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
679 cache->numcached, NUM_CACHE_ENTRIES);*/
680 cache
->numcached
= NUM_CACHE_ENTRIES
;
683 hi
= cache
->numcached
- 1;
685 index
= cache_binSearch(cache
->acache
, hi
, parent_id
, &no_match_index
);
687 /* if no existing entry found, find index for new one */
689 index
= no_match_index
;
700 * Add a node to the access_cache at the given index (or do a lookup first
701 * to find the index if -1 is passed in). We currently do a replace rather
702 * than an insert if the cache is full.
705 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
707 int lookup_index
= -1;
709 /* need to do a lookup first if -1 passed for index */
711 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
712 if (cache
->haveaccess
[lookup_index
] != access
&& cache
->haveaccess
[lookup_index
] == ESRCH
) {
713 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
714 cache
->haveaccess
[lookup_index
] = access
;
717 /* mission accomplished */
720 index
= lookup_index
;
725 /* if the cache is full, do a replace rather than an insert */
726 if (cache
->numcached
>= NUM_CACHE_ENTRIES
) {
727 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
728 cache
->numcached
= NUM_CACHE_ENTRIES
-1;
730 if (index
> cache
->numcached
) {
731 // printf("index %d pinned to %d\n", index, cache->numcached);
732 index
= cache
->numcached
;
736 if (index
< cache
->numcached
&& index
< NUM_CACHE_ENTRIES
&& nodeID
> cache
->acache
[index
]) {
740 if (index
>= 0 && index
< cache
->numcached
) {
741 /* only do bcopy if we're inserting */
742 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
743 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(unsigned char) );
746 cache
->acache
[index
] = nodeID
;
747 cache
->haveaccess
[index
] = access
;
761 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
763 struct cinfo
*cip
= (struct cinfo
*)arg
;
765 cip
->uid
= attrp
->ca_uid
;
766 cip
->gid
= attrp
->ca_gid
;
767 cip
->mode
= attrp
->ca_mode
;
768 cip
->parentcnid
= descp
->cd_parentcnid
;
769 cip
->recflags
= attrp
->ca_recflags
;
775 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
776 * isn't incore, then go to the catalog.
779 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
780 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
)
784 /* if this id matches the one the fsctl was called with, skip the lookup */
785 if (cnid
== skip_cp
->c_cnid
) {
786 cnattrp
->ca_uid
= skip_cp
->c_uid
;
787 cnattrp
->ca_gid
= skip_cp
->c_gid
;
788 cnattrp
->ca_mode
= skip_cp
->c_mode
;
789 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
793 /* otherwise, check the cnode hash incase the file/dir is incore */
794 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
795 cnattrp
->ca_uid
= c_info
.uid
;
796 cnattrp
->ca_gid
= c_info
.gid
;
797 cnattrp
->ca_mode
= c_info
.mode
;
798 cnattrp
->ca_recflags
= c_info
.recflags
;
799 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
803 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
805 /* lookup this cnid in the catalog */
806 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
808 hfs_systemfile_unlock(hfsmp
, lockflags
);
819 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
820 * up to CACHE_LEVELS as we progress towards the root.
823 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
824 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
,
825 struct vfs_context
*my_context
,
829 uint32_t num_parents
)
833 HFSCatalogNodeID thisNodeID
;
834 unsigned int myPerms
;
835 struct cat_attr cnattr
;
836 int cache_index
= -1, scope_index
= -1, scope_idx_start
= -1;
839 int i
= 0, ids_to_cache
= 0;
840 int parent_ids
[CACHE_LEVELS
];
843 while (thisNodeID
>= kRootDirID
) {
844 myResult
= 0; /* default to "no access" */
846 /* check the cache before resorting to hitting the catalog */
848 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
849 * to look any further after hitting cached dir */
851 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
853 myErr
= cache
->haveaccess
[cache_index
];
854 if (scope_index
!= -1) {
855 if (myErr
== ESRCH
) {
859 scope_index
= 0; // so we'll just use the cache result
860 scope_idx_start
= ids_to_cache
;
862 myResult
= (myErr
== 0) ? 1 : 0;
863 goto ExitThisRoutine
;
869 tmp
= cache_binSearch(parents
, num_parents
-1, thisNodeID
, NULL
);
870 if (scope_index
== -1)
872 if (tmp
!= -1 && scope_idx_start
== -1 && ids_to_cache
< CACHE_LEVELS
) {
873 scope_idx_start
= ids_to_cache
;
877 /* remember which parents we want to cache */
878 if (ids_to_cache
< CACHE_LEVELS
) {
879 parent_ids
[ids_to_cache
] = thisNodeID
;
882 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
883 if (bitmap
&& map_size
) {
884 bitmap
[(thisNodeID
/8)%(map_size
)]|=(1<<(thisNodeID
&7));
888 /* do the lookup (checks the cnode hash, then the catalog) */
889 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
);
891 goto ExitThisRoutine
; /* no access */
894 /* Root always gets access. */
895 if (suser(myp_ucred
, NULL
) == 0) {
896 thisNodeID
= catkey
.hfsPlus
.parentID
;
901 // if the thing has acl's, do the full permission check
902 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
905 /* get the vnode for this cnid */
906 myErr
= hfs_vget(hfsmp
, thisNodeID
, &vp
, 0);
909 goto ExitThisRoutine
;
912 thisNodeID
= VTOC(vp
)->c_parentcnid
;
914 hfs_unlock(VTOC(vp
));
916 if (vnode_vtype(vp
) == VDIR
) {
917 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), my_context
);
919 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, my_context
);
925 goto ExitThisRoutine
;
930 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
931 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
932 myp_ucred
, theProcPtr
);
934 if (cnattr
.ca_mode
& S_IFDIR
) {
939 if ( (myPerms
& flags
) != flags
) {
942 goto ExitThisRoutine
; /* no access */
945 /* up the hierarchy we go */
946 thisNodeID
= catkey
.hfsPlus
.parentID
;
950 /* if here, we have access to this node */
954 if (parents
&& myErr
== 0 && scope_index
== -1) {
963 /* cache the parent directory(ies) */
964 for (i
= 0; i
< ids_to_cache
; i
++) {
965 if (myErr
== 0 && parents
&& (scope_idx_start
== -1 || i
> scope_idx_start
)) {
966 add_node(cache
, -1, parent_ids
[i
], ESRCH
);
968 add_node(cache
, -1, parent_ids
[i
], myErr
);
976 do_bulk_access_check(struct hfsmount
*hfsmp
, struct vnode
*vp
,
977 struct vnop_ioctl_args
*ap
, int arg_size
, vfs_context_t context
)
982 * NOTE: on entry, the vnode is locked. Incase this vnode
983 * happens to be in our list of file_ids, we'll note it
984 * avoid calling hfs_chashget_nowait() on that id as that
985 * will cause a "locking against myself" panic.
987 Boolean check_leaf
= true;
989 struct ext_user_access_t
*user_access_structp
;
990 struct ext_user_access_t tmp_user_access
;
991 struct access_cache cache
;
996 dev_t dev
= VTOC(vp
)->c_dev
;
999 unsigned int num_files
= 0;
1001 int num_parents
= 0;
1005 cnid_t
*parents
=NULL
;
1009 cnid_t prevParent_cnid
= 0;
1010 unsigned int myPerms
;
1012 struct cat_attr cnattr
;
1014 struct cnode
*skip_cp
= VTOC(vp
);
1015 kauth_cred_t cred
= vfs_context_ucred(context
);
1016 proc_t p
= vfs_context_proc(context
);
1018 is64bit
= proc_is64bit(p
);
1020 /* initialize the local cache and buffers */
1021 cache
.numcached
= 0;
1022 cache
.cachehits
= 0;
1024 cache
.acache
= NULL
;
1025 cache
.haveaccess
= NULL
;
1027 /* struct copyin done during dispatch... need to copy file_id array separately */
1028 if (ap
->a_data
== NULL
) {
1030 goto err_exit_bulk_access
;
1034 if (arg_size
!= sizeof(struct ext_user_access_t
)) {
1036 goto err_exit_bulk_access
;
1039 user_access_structp
= (struct ext_user_access_t
*)ap
->a_data
;
1041 } else if (arg_size
== sizeof(struct access_t
)) {
1042 struct access_t
*accessp
= (struct access_t
*)ap
->a_data
;
1044 // convert an old style bulk-access struct to the new style
1045 tmp_user_access
.flags
= accessp
->flags
;
1046 tmp_user_access
.num_files
= accessp
->num_files
;
1047 tmp_user_access
.map_size
= 0;
1048 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1049 tmp_user_access
.bitmap
= (user_addr_t
)NULL
;
1050 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1051 tmp_user_access
.num_parents
= 0;
1052 user_access_structp
= &tmp_user_access
;
1054 } else if (arg_size
== sizeof(struct ext_access_t
)) {
1055 struct ext_access_t
*accessp
= (struct ext_access_t
*)ap
->a_data
;
1057 // up-cast from a 32-bit version of the struct
1058 tmp_user_access
.flags
= accessp
->flags
;
1059 tmp_user_access
.num_files
= accessp
->num_files
;
1060 tmp_user_access
.map_size
= accessp
->map_size
;
1061 tmp_user_access
.num_parents
= accessp
->num_parents
;
1063 tmp_user_access
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1064 tmp_user_access
.bitmap
= CAST_USER_ADDR_T(accessp
->bitmap
);
1065 tmp_user_access
.access
= CAST_USER_ADDR_T(accessp
->access
);
1066 tmp_user_access
.parents
= CAST_USER_ADDR_T(accessp
->parents
);
1068 user_access_structp
= &tmp_user_access
;
1071 goto err_exit_bulk_access
;
1074 map_size
= user_access_structp
->map_size
;
1076 num_files
= user_access_structp
->num_files
;
1078 num_parents
= user_access_structp
->num_parents
;
1080 if (num_files
< 1) {
1081 goto err_exit_bulk_access
;
1083 if (num_files
> 1024) {
1085 goto err_exit_bulk_access
;
1088 if (num_parents
> 1024) {
1090 goto err_exit_bulk_access
;
1093 file_ids
= (int *) kalloc(sizeof(int) * num_files
);
1094 access
= (short *) kalloc(sizeof(short) * num_files
);
1096 bitmap
= (char *) kalloc(sizeof(char) * map_size
);
1100 parents
= (cnid_t
*) kalloc(sizeof(cnid_t
) * num_parents
);
1103 cache
.acache
= (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES
);
1104 cache
.haveaccess
= (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1106 if (file_ids
== NULL
|| access
== NULL
|| (map_size
!= 0 && bitmap
== NULL
) || cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1108 kfree(file_ids
, sizeof(int) * num_files
);
1111 kfree(bitmap
, sizeof(char) * map_size
);
1114 kfree(access
, sizeof(short) * num_files
);
1117 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1119 if (cache
.haveaccess
) {
1120 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1123 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1128 // make sure the bitmap is zero'ed out...
1130 bzero(bitmap
, (sizeof(char) * map_size
));
1133 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1134 num_files
* sizeof(int)))) {
1135 goto err_exit_bulk_access
;
1139 if ((error
= copyin(user_access_structp
->parents
, (caddr_t
)parents
,
1140 num_parents
* sizeof(cnid_t
)))) {
1141 goto err_exit_bulk_access
;
1145 flags
= user_access_structp
->flags
;
1146 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1150 /* check if we've been passed leaf node ids or parent ids */
1151 if (flags
& PARENT_IDS_FLAG
) {
1155 /* Check access to each file_id passed in */
1156 for (i
= 0; i
< num_files
; i
++) {
1158 cnid
= (cnid_t
) file_ids
[i
];
1160 /* root always has access */
1161 if ((!parents
) && (!suser(cred
, NULL
))) {
1167 /* do the lookup (checks the cnode hash, then the catalog) */
1168 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
);
1170 access
[i
] = (short) error
;
1175 // Check if the leaf matches one of the parent scopes
1176 leaf_index
= cache_binSearch(parents
, num_parents
-1, cnid
, NULL
);
1179 // if the thing has acl's, do the full permission check
1180 if ((cnattr
.ca_recflags
& kHFSHasSecurityMask
) != 0) {
1183 /* get the vnode for this cnid */
1184 myErr
= hfs_vget(hfsmp
, cnid
, &cvp
, 0);
1190 hfs_unlock(VTOC(cvp
));
1192 if (vnode_vtype(cvp
) == VDIR
) {
1193 myErr
= vnode_authorize(cvp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), context
);
1195 myErr
= vnode_authorize(cvp
, NULL
, KAUTH_VNODE_READ_DATA
, context
);
1204 /* before calling CheckAccess(), check the target file for read access */
1205 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1206 cnattr
.ca_mode
, hfsmp
->hfs_mp
, cred
, p
);
1208 /* fail fast if no access */
1209 if ((myPerms
& flags
) == 0) {
1215 /* we were passed an array of parent ids */
1216 catkey
.hfsPlus
.parentID
= cnid
;
1219 /* if the last guy had the same parent and had access, we're done */
1220 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1226 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1227 skip_cp
, p
, cred
, dev
, context
,bitmap
, map_size
, parents
, num_parents
);
1229 if (myaccess
|| (error
== ESRCH
&& leaf_index
!= -1)) {
1230 access
[i
] = 0; // have access.. no errors to report
1232 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1235 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1238 /* copyout the access array */
1239 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1240 num_files
* sizeof (short)))) {
1241 goto err_exit_bulk_access
;
1243 if (map_size
&& bitmap
) {
1244 if ((error
= copyout((caddr_t
)bitmap
, user_access_structp
->bitmap
,
1245 map_size
* sizeof (char)))) {
1246 goto err_exit_bulk_access
;
1251 err_exit_bulk_access
:
1253 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1256 kfree(file_ids
, sizeof(int) * num_files
);
1258 kfree(parents
, sizeof(cnid_t
) * num_parents
);
1260 kfree(bitmap
, sizeof(char) * map_size
);
1262 kfree(access
, sizeof(short) * num_files
);
1264 kfree(cache
.acache
, sizeof(int) * NUM_CACHE_ENTRIES
);
1265 if (cache
.haveaccess
)
1266 kfree(cache
.haveaccess
, sizeof(unsigned char) * NUM_CACHE_ENTRIES
);
1272 /* end "bulk-access" support */
1276 * Callback for use with freeze ioctl.
1279 hfs_freezewrite_callback(struct vnode
*vp
, __unused
void *cargs
)
1281 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
1287 * Control filesystem operating characteristics.
1290 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
1295 vfs_context_t a_context;
1298 struct vnode
* vp
= ap
->a_vp
;
1299 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1300 vfs_context_t context
= ap
->a_context
;
1301 kauth_cred_t cred
= vfs_context_ucred(context
);
1302 proc_t p
= vfs_context_proc(context
);
1303 struct vfsstatfs
*vfsp
;
1306 is64bit
= proc_is64bit(p
);
1308 switch (ap
->a_command
) {
1312 struct vnode
*file_vp
;
1318 /* Caller must be owner of file system. */
1319 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1320 if (suser(cred
, NULL
) &&
1321 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1324 /* Target vnode must be file system's root. */
1325 if (!vnode_isvroot(vp
)) {
1328 bufptr
= (char *)ap
->a_data
;
1329 cnid
= strtoul(bufptr
, NULL
, 10);
1331 if ((error
= hfs_vget(hfsmp
, cnid
, &file_vp
, 1))) {
1334 error
= build_path(file_vp
, bufptr
, sizeof(pathname_t
), &outlen
, 0, context
);
1348 /* Caller must be owner of file system. */
1349 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1350 if (suser(cred
, NULL
) &&
1351 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1354 /* Target vnode must be file system's root. */
1355 if (!vnode_isvroot(vp
)) {
1358 linkfileid
= *(cnid_t
*)ap
->a_data
;
1359 if (linkfileid
< kHFSFirstUserCatalogNodeID
) {
1362 if ((error
= hfs_lookuplink(hfsmp
, linkfileid
, &prevlinkid
, &nextlinkid
))) {
1365 if (ap
->a_command
== HFS_NEXT_LINK
) {
1366 *(cnid_t
*)ap
->a_data
= nextlinkid
;
1368 *(cnid_t
*)ap
->a_data
= prevlinkid
;
1373 case HFS_RESIZE_PROGRESS
: {
1375 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1376 if (suser(cred
, NULL
) &&
1377 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1378 return (EACCES
); /* must be owner of file system */
1380 if (!vnode_isvroot(vp
)) {
1383 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
1386 case HFS_RESIZE_VOLUME
: {
1390 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1391 if (suser(cred
, NULL
) &&
1392 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1393 return (EACCES
); /* must be owner of file system */
1395 if (!vnode_isvroot(vp
)) {
1398 newsize
= *(u_int64_t
*)ap
->a_data
;
1399 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
1401 if (newsize
> cursize
) {
1402 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1403 } else if (newsize
< cursize
) {
1404 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
1409 case HFS_CHANGE_NEXT_ALLOCATION
: {
1410 int error
= 0; /* Assume success */
1413 if (vnode_vfsisrdonly(vp
)) {
1416 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1417 if (suser(cred
, NULL
) &&
1418 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1419 return (EACCES
); /* must be owner of file system */
1421 if (!vnode_isvroot(vp
)) {
1424 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1425 location
= *(u_int32_t
*)ap
->a_data
;
1426 if ((location
>= hfsmp
->allocLimit
) &&
1427 (location
!= HFS_NO_UPDATE_NEXT_ALLOCATION
)) {
1429 goto fail_change_next_allocation
;
1431 /* Return previous value. */
1432 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
1433 if (location
== HFS_NO_UPDATE_NEXT_ALLOCATION
) {
1434 /* On magic value for location, set nextAllocation to next block
1435 * after metadata zone and set flag in mount structure to indicate
1436 * that nextAllocation should not be updated again.
1438 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, hfsmp
->hfs_metazone_end
+ 1);
1439 hfsmp
->hfs_flags
|= HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1441 hfsmp
->hfs_flags
&= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION
;
1442 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, location
);
1444 MarkVCBDirty(hfsmp
);
1445 fail_change_next_allocation
:
1446 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1450 #ifdef HFS_SPARSE_DEV
1451 case HFS_SETBACKINGSTOREINFO
: {
1452 struct vnode
* bsfs_rootvp
;
1453 struct vnode
* di_vp
;
1454 struct hfs_backingstoreinfo
*bsdata
;
1457 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
1460 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1461 if (suser(cred
, NULL
) &&
1462 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1463 return (EACCES
); /* must be owner of file system */
1465 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
1466 if (bsdata
== NULL
) {
1469 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
1472 if ((error
= vnode_getwithref(di_vp
))) {
1473 file_drop(bsdata
->backingfd
);
1477 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
1478 (void)vnode_put(di_vp
);
1479 file_drop(bsdata
->backingfd
);
1484 * Obtain the backing fs root vnode and keep a reference
1485 * on it. This reference will be dropped in hfs_unmount.
1487 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
1489 (void)vnode_put(di_vp
);
1490 file_drop(bsdata
->backingfd
);
1493 vnode_ref(bsfs_rootvp
);
1494 vnode_put(bsfs_rootvp
);
1496 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
1497 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
1498 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
1499 hfsmp
->hfs_sparsebandblks
*= 4;
1501 vfs_markdependency(hfsmp
->hfs_mp
);
1503 (void)vnode_put(di_vp
);
1504 file_drop(bsdata
->backingfd
);
1507 case HFS_CLRBACKINGSTOREINFO
: {
1508 struct vnode
* tmpvp
;
1510 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1511 if (suser(cred
, NULL
) &&
1512 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
1513 return (EACCES
); /* must be owner of file system */
1515 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1516 hfsmp
->hfs_backingfs_rootvp
) {
1518 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1519 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1520 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1521 hfsmp
->hfs_sparsebandblks
= 0;
1526 #endif /* HFS_SPARSE_DEV */
1534 mp
= vnode_mount(vp
);
1535 hfsmp
= VFSTOHFS(mp
);
1540 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1542 // flush things before we get started to try and prevent
1543 // dirty data from being paged out while we're frozen.
1544 // note: can't do this after taking the lock as it will
1545 // deadlock against ourselves.
1546 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1547 hfs_global_exclusive_lock_acquire(hfsmp
);
1548 journal_flush(hfsmp
->jnl
);
1550 // don't need to iterate on all vnodes, we just need to
1551 // wait for writes to the system files and the device vnode
1552 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1553 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1554 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1555 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1556 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1557 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1558 if (hfsmp
->hfs_attribute_vp
)
1559 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1560 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1562 hfsmp
->hfs_freezing_proc
= current_proc();
1571 // if we're not the one who froze the fs then we
1573 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1577 // NOTE: if you add code here, also go check the
1578 // code that "thaws" the fs in hfs_vnop_close()
1580 hfsmp
->hfs_freezing_proc
= NULL
;
1581 hfs_global_exclusive_lock_release(hfsmp
);
1582 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1587 case HFS_BULKACCESS_FSCTL
: {
1590 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1595 size
= sizeof(struct user_access_t
);
1597 size
= sizeof(struct access_t
);
1600 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1603 case HFS_EXT_BULKACCESS_FSCTL
: {
1606 if (hfsmp
->hfs_flags
& HFS_STANDARD
) {
1611 size
= sizeof(struct ext_user_access_t
);
1613 size
= sizeof(struct ext_access_t
);
1616 return do_bulk_access_check(hfsmp
, vp
, ap
, size
, context
);
1619 case HFS_SETACLSTATE
: {
1622 if (ap
->a_data
== NULL
) {
1626 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1627 state
= *(int *)ap
->a_data
;
1629 // super-user can enable or disable acl's on a volume.
1630 // the volume owner can only enable acl's
1631 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1634 if (state
== 0 || state
== 1)
1635 return hfs_set_volxattr(hfsmp
, HFS_SETACLSTATE
, state
);
1640 case HFS_SET_XATTREXTENTS_STATE
: {
1643 if (ap
->a_data
== NULL
) {
1647 state
= *(int *)ap
->a_data
;
1649 /* Super-user can enable or disable extent-based extended
1650 * attribute support on a volume
1655 if (state
== 0 || state
== 1)
1656 return hfs_set_volxattr(hfsmp
, HFS_SET_XATTREXTENTS_STATE
, state
);
1664 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1666 error
= hfs_fsync(vp
, MNT_WAIT
, TRUE
, p
);
1667 hfs_unlock(VTOC(vp
));
1674 register struct cnode
*cp
;
1677 if (!vnode_isreg(vp
))
1680 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1684 * used by regression test to determine if
1685 * all the dirty pages (via write) have been cleaned
1686 * after a call to 'fsysnc'.
1688 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1695 register struct radvisory
*ra
;
1696 struct filefork
*fp
;
1699 if (!vnode_isreg(vp
))
1702 ra
= (struct radvisory
*)(ap
->a_data
);
1705 /* Protect against a size change. */
1706 hfs_lock_truncate(VTOC(vp
), TRUE
);
1708 if (ra
->ra_offset
>= fp
->ff_size
) {
1711 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1714 hfs_unlock_truncate(VTOC(vp
), TRUE
);
1718 case F_READBOOTSTRAP
:
1719 case F_WRITEBOOTSTRAP
:
1721 struct vnode
*devvp
= NULL
;
1722 user_fbootstraptransfer_t
*user_bootstrapp
;
1726 daddr64_t blockNumber
;
1730 user_fbootstraptransfer_t user_bootstrap
;
1732 if (!vnode_isvroot(vp
))
1734 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1735 * to a user_fbootstraptransfer_t else we get a pointer to a
1736 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1739 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1742 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1743 user_bootstrapp
= &user_bootstrap
;
1744 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1745 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1746 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1748 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1751 devvp
= VTOHFS(vp
)->hfs_devvp
;
1752 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1753 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1754 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1755 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1757 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1759 while (uio_resid(auio
) > 0) {
1760 blockNumber
= uio_offset(auio
) / devBlockSize
;
1761 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1763 if (bp
) buf_brelse(bp
);
1768 blockOffset
= uio_offset(auio
) % devBlockSize
;
1769 xfersize
= devBlockSize
- blockOffset
;
1770 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1776 if (uio_rw(auio
) == UIO_WRITE
) {
1777 error
= VNOP_BWRITE(bp
);
1790 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1793 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1796 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1801 case HFS_GET_MOUNT_TIME
:
1802 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1805 case HFS_GET_LAST_MTIME
:
1806 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1809 case HFS_SET_BOOT_INFO
:
1810 if (!vnode_isvroot(vp
))
1812 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1813 return(EACCES
); /* must be superuser or owner of filesystem */
1814 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1815 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1816 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1817 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1820 case HFS_GET_BOOT_INFO
:
1821 if (!vnode_isvroot(vp
))
1823 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1824 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1825 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1828 case HFS_MARK_BOOT_CORRUPT
:
1829 /* Mark the boot volume corrupt by setting
1830 * kHFSVolumeInconsistentBit in the volume header. This will
1831 * force fsck_hfs on next mount.
1837 /* Allowed only on the root vnode of the boot volume */
1838 if (!(vfs_flags(HFSTOVFS(hfsmp
)) & MNT_ROOTFS
) ||
1839 !vnode_isvroot(vp
)) {
1843 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1844 hfs_mark_volume_inconsistent(hfsmp
);
1851 /* Should never get here */
1859 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1861 struct vnop_select_args {
1866 vfs_context_t a_context;
1871 * We should really check to see if I/O is possible.
1877 * Converts a logical block number to a physical block, and optionally returns
1878 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1879 * The physical block number is based on the device block size, currently its 512.
1880 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1883 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, unsigned int *runp
)
1885 struct filefork
*fp
= VTOF(vp
);
1886 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1887 int retval
= E_NONE
;
1888 u_int32_t logBlockSize
;
1889 size_t bytesContAvail
= 0;
1890 off_t blockposition
;
1895 * Check for underlying vnode requests and ensure that logical
1896 * to physical mapping is requested.
1899 *vpp
= hfsmp
->hfs_devvp
;
1903 logBlockSize
= GetLogicalBlockSize(vp
);
1904 blockposition
= (off_t
)bn
* logBlockSize
;
1906 lockExtBtree
= overflow_extents(fp
);
1909 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_EXCLUSIVE_LOCK
);
1911 retval
= MacToVFSError(
1912 MapFileBlockC (HFSTOVCB(hfsmp
),
1920 hfs_systemfile_unlock(hfsmp
, lockflags
);
1922 if (retval
== E_NONE
) {
1923 /* Figure out how many read ahead blocks there are */
1925 if (can_cluster(logBlockSize
)) {
1926 /* Make sure this result never goes negative: */
1927 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1937 * Convert logical block number to file offset.
1940 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1942 struct vnop_blktooff_args {
1949 if (ap
->a_vp
== NULL
)
1951 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1957 * Convert file offset to logical block number.
1960 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1962 struct vnop_offtoblk_args {
1965 daddr64_t *a_lblkno;
1969 if (ap
->a_vp
== NULL
)
1971 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
1977 * Map file offset to physical block number.
1979 * If this function is called for write operation, and if the file
1980 * had virtual blocks allocated (delayed allocation), real blocks
1981 * are allocated by calling ExtendFileC().
1983 * If this function is called for read operation, and if the file
1984 * had virtual blocks allocated (delayed allocation), no change
1985 * to the size of file is done, and if required, rangelist is
1986 * searched for mapping.
1988 * System file cnodes are expected to be locked (shared or exclusive).
1991 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
1993 struct vnop_blockmap_args {
2001 vfs_context_t a_context;
2005 struct vnode
*vp
= ap
->a_vp
;
2007 struct filefork
*fp
;
2008 struct hfsmount
*hfsmp
;
2009 size_t bytesContAvail
= 0;
2010 int retval
= E_NONE
;
2013 struct rl_entry
*invalid_range
;
2014 enum rl_overlaptype overlaptype
;
2018 /* Do not allow blockmap operation on a directory */
2019 if (vnode_isdir(vp
)) {
2024 * Check for underlying vnode requests and ensure that logical
2025 * to physical mapping is requested.
2027 if (ap
->a_bpn
== NULL
)
2030 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
) && !vnode_isswap(vp
)) {
2031 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
2032 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2041 /* Check virtual blocks only when performing write operation */
2042 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2043 if (hfs_start_transaction(hfsmp
) != 0) {
2049 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
2051 } else if (overflow_extents(fp
)) {
2052 syslocks
= SFL_EXTENTS
;
2056 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
2059 * Check for any delayed allocations.
2061 if ((ap
->a_flags
& VNODE_WRITE
) && (fp
->ff_unallocblocks
!= 0)) {
2063 u_int32_t loanedBlocks
;
2066 // Make sure we have a transaction. It's possible
2067 // that we came in and fp->ff_unallocblocks was zero
2068 // but during the time we blocked acquiring the extents
2069 // btree, ff_unallocblocks became non-zero and so we
2070 // will need to start a transaction.
2072 if (started_tr
== 0) {
2074 hfs_systemfile_unlock(hfsmp
, lockflags
);
2081 * Note: ExtendFileC will Release any blocks on loan and
2082 * aquire real blocks. So we ask to extend by zero bytes
2083 * since ExtendFileC will account for the virtual blocks.
2086 loanedBlocks
= fp
->ff_unallocblocks
;
2087 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
2088 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
2091 fp
->ff_unallocblocks
= loanedBlocks
;
2092 cp
->c_blocks
+= loanedBlocks
;
2093 fp
->ff_blocks
+= loanedBlocks
;
2095 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2096 hfsmp
->loanedBlocks
+= loanedBlocks
;
2097 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2099 hfs_systemfile_unlock(hfsmp
, lockflags
);
2100 cp
->c_flag
|= C_MODIFIED
;
2102 (void) hfs_update(vp
, TRUE
);
2103 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2105 hfs_end_transaction(hfsmp
);
2112 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
2113 ap
->a_bpn
, &bytesContAvail
);
2115 hfs_systemfile_unlock(hfsmp
, lockflags
);
2120 (void) hfs_update(vp
, TRUE
);
2121 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2122 hfs_end_transaction(hfsmp
);
2126 /* On write, always return error because virtual blocks, if any,
2127 * should have been allocated in ExtendFileC(). We do not
2128 * allocate virtual blocks on read, therefore return error
2129 * only if no virtual blocks are allocated. Otherwise we search
2130 * rangelist for zero-fills
2132 if ((MacToVFSError(retval
) != ERANGE
) ||
2133 (ap
->a_flags
& VNODE_WRITE
) ||
2134 ((ap
->a_flags
& VNODE_READ
) && (fp
->ff_unallocblocks
== 0))) {
2138 /* Validate if the start offset is within logical file size */
2139 if (ap
->a_foffset
> fp
->ff_size
) {
2143 /* Searching file extents has failed for read operation, therefore
2144 * search rangelist for any uncommitted holes in the file.
2146 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2147 ap
->a_foffset
+ (off_t
)(ap
->a_size
- 1),
2149 switch(overlaptype
) {
2150 case RL_OVERLAPISCONTAINED
:
2151 /* start_offset <= rl_start, end_offset >= rl_end */
2152 if (ap
->a_foffset
!= invalid_range
->rl_start
) {
2155 case RL_MATCHINGOVERLAP
:
2156 /* start_offset = rl_start, end_offset = rl_end */
2157 case RL_OVERLAPCONTAINSRANGE
:
2158 /* start_offset >= rl_start, end_offset <= rl_end */
2159 case RL_OVERLAPSTARTSBEFORE
:
2160 /* start_offset > rl_start, end_offset >= rl_start */
2161 if ((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) {
2162 bytesContAvail
= (invalid_range
->rl_end
+ 1) - ap
->a_foffset
;
2164 bytesContAvail
= fp
->ff_size
- ap
->a_foffset
;
2166 if (bytesContAvail
> ap
->a_size
) {
2167 bytesContAvail
= ap
->a_size
;
2169 *ap
->a_bpn
= (daddr64_t
)-1;
2172 case RL_OVERLAPENDSAFTER
:
2173 /* start_offset < rl_start, end_offset < rl_end */
2180 /* MapFileC() found a valid extent in the filefork. Search the
2181 * mapping information further for invalid file ranges
2183 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
2184 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
2186 if (overlaptype
!= RL_NOOVERLAP
) {
2187 switch(overlaptype
) {
2188 case RL_MATCHINGOVERLAP
:
2189 case RL_OVERLAPCONTAINSRANGE
:
2190 case RL_OVERLAPSTARTSBEFORE
:
2191 /* There's no valid block for this byte offset */
2192 *ap
->a_bpn
= (daddr64_t
)-1;
2193 /* There's no point limiting the amount to be returned
2194 * if the invalid range that was hit extends all the way
2195 * to the EOF (i.e. there's no valid bytes between the
2196 * end of this range and the file's EOF):
2198 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2199 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
2200 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2204 case RL_OVERLAPISCONTAINED
:
2205 case RL_OVERLAPENDSAFTER
:
2206 /* The range of interest hits an invalid block before the end: */
2207 if (invalid_range
->rl_start
== ap
->a_foffset
) {
2208 /* There's actually no valid information to be had starting here: */
2209 *ap
->a_bpn
= (daddr64_t
)-1;
2210 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
2211 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
2212 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
2215 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
2222 if (bytesContAvail
> ap
->a_size
)
2223 bytesContAvail
= ap
->a_size
;
2229 *ap
->a_run
= bytesContAvail
;
2232 *(int *)ap
->a_poff
= 0;
2238 return (MacToVFSError(retval
));
2243 * prepare and issue the I/O
2244 * buf_strategy knows how to deal
2245 * with requests that require
2249 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
2251 buf_t bp
= ap
->a_bp
;
2252 vnode_t vp
= buf_vnode(bp
);
2254 return (buf_strategy(VTOHFS(vp
)->hfs_devvp
, ap
));
2259 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, vfs_context_t context
)
2261 register struct cnode
*cp
= VTOC(vp
);
2262 struct filefork
*fp
= VTOF(vp
);
2263 struct proc
*p
= vfs_context_proc(context
);;
2264 kauth_cred_t cred
= vfs_context_ucred(context
);
2267 off_t actualBytesAdded
;
2271 struct hfsmount
*hfsmp
;
2274 blksize
= VTOVCB(vp
)->blockSize
;
2275 fileblocks
= fp
->ff_blocks
;
2276 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2278 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
2279 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2284 /* This should only happen with a corrupt filesystem */
2285 if ((off_t
)fp
->ff_size
< 0)
2288 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
2295 /* Files that are changing size are not hot file candidates. */
2296 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
2297 fp
->ff_bytesread
= 0;
2301 * We cannot just check if fp->ff_size == length (as an optimization)
2302 * since there may be extra physical blocks that also need truncation.
2305 if ((retval
= hfs_getinoquota(cp
)))
2310 * Lengthen the size of the file. We must ensure that the
2311 * last byte of the file is allocated. Since the smallest
2312 * value of ff_size is 0, length will be at least 1.
2314 if (length
> (off_t
)fp
->ff_size
) {
2316 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
2322 * If we don't have enough physical space then
2323 * we need to extend the physical size.
2325 if (length
> filebytes
) {
2327 u_long blockHint
= 0;
2329 /* All or nothing and don't round up to clumpsize. */
2330 eflags
= kEFAllMask
| kEFNoClumpMask
;
2332 if (cred
&& suser(cred
, NULL
) != 0)
2333 eflags
|= kEFReserveMask
; /* keep a reserve */
2336 * Allocate Journal and Quota files in metadata zone.
2338 if (filebytes
== 0 &&
2339 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
2340 hfs_virtualmetafile(cp
)) {
2341 eflags
|= kEFMetadataMask
;
2342 blockHint
= hfsmp
->hfs_metazone_start
;
2344 if (hfs_start_transaction(hfsmp
) != 0) {
2349 /* Protect extents b-tree and allocation bitmap */
2350 lockflags
= SFL_BITMAP
;
2351 if (overflow_extents(fp
))
2352 lockflags
|= SFL_EXTENTS
;
2353 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2355 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2356 bytesToAdd
= length
- filebytes
;
2357 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
2362 &actualBytesAdded
));
2364 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2365 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
2366 if (length
> filebytes
)
2372 hfs_systemfile_unlock(hfsmp
, lockflags
);
2375 (void) hfs_update(vp
, TRUE
);
2376 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2379 hfs_end_transaction(hfsmp
);
2384 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2385 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
2388 if (!(flags
& IO_NOZEROFILL
)) {
2389 if (UBCINFOEXISTS(vp
) && (vnode_issystem(vp
) == 0) && retval
== E_NONE
) {
2390 struct rl_entry
*invalid_range
;
2393 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
2394 if (length
< zero_limit
) zero_limit
= length
;
2396 if (length
> (off_t
)fp
->ff_size
) {
2399 /* Extending the file: time to fill out the current last page w. zeroes? */
2400 if ((fp
->ff_size
& PAGE_MASK_64
) &&
2401 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2402 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2404 /* There's some valid data at the start of the (current) last page
2405 of the file, so zero out the remainder of that page to ensure the
2406 entire page contains valid data. Since there is no invalid range
2407 possible past the (current) eof, there's no need to remove anything
2408 from the invalid range list before calling cluster_write(): */
2410 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2411 fp
->ff_size
, (off_t
)0,
2412 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2413 hfs_lock(cp
, HFS_FORCE_LOCK
);
2414 if (retval
) goto Err_Exit
;
2416 /* Merely invalidate the remaining area, if necessary: */
2417 if (length
> zero_limit
) {
2419 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
2420 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2423 /* The page containing the (current) eof is invalid: just add the
2424 remainder of the page to the invalid list, along with the area
2425 being newly allocated:
2428 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
2429 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2433 panic("hfs_truncate: invoked on non-UBC object?!");
2436 cp
->c_touch_modtime
= TRUE
;
2437 fp
->ff_size
= length
;
2439 } else { /* Shorten the size of the file */
2441 if ((off_t
)fp
->ff_size
> length
) {
2442 /* Any space previously marked as invalid is now irrelevant: */
2443 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2447 * Account for any unmapped blocks. Note that the new
2448 * file length can still end up with unmapped blocks.
2450 if (fp
->ff_unallocblocks
> 0) {
2451 u_int32_t finalblks
;
2452 u_int32_t loanedBlocks
;
2454 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2456 loanedBlocks
= fp
->ff_unallocblocks
;
2457 cp
->c_blocks
-= loanedBlocks
;
2458 fp
->ff_blocks
-= loanedBlocks
;
2459 fp
->ff_unallocblocks
= 0;
2461 hfsmp
->loanedBlocks
-= loanedBlocks
;
2463 finalblks
= (length
+ blksize
- 1) / blksize
;
2464 if (finalblks
> fp
->ff_blocks
) {
2465 /* calculate required unmapped blocks */
2466 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2467 hfsmp
->loanedBlocks
+= loanedBlocks
;
2469 fp
->ff_unallocblocks
= loanedBlocks
;
2470 cp
->c_blocks
+= loanedBlocks
;
2471 fp
->ff_blocks
+= loanedBlocks
;
2473 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2477 * For a TBE process the deallocation of the file blocks is
2478 * delayed until the file is closed. And hfs_close calls
2479 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2480 * isn't set, we make sure this isn't a TBE process.
2482 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2484 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2486 if (hfs_start_transaction(hfsmp
) != 0) {
2491 if (fp
->ff_unallocblocks
== 0) {
2492 /* Protect extents b-tree and allocation bitmap */
2493 lockflags
= SFL_BITMAP
;
2494 if (overflow_extents(fp
))
2495 lockflags
|= SFL_EXTENTS
;
2496 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2498 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2499 (FCB
*)fp
, length
, false));
2501 hfs_systemfile_unlock(hfsmp
, lockflags
);
2505 fp
->ff_size
= length
;
2507 (void) hfs_update(vp
, TRUE
);
2508 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2511 hfs_end_transaction(hfsmp
);
2513 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2517 /* These are bytesreleased */
2518 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2521 /* Only set update flag if the logical length changes */
2522 if ((off_t
)fp
->ff_size
!= length
)
2523 cp
->c_touch_modtime
= TRUE
;
2524 fp
->ff_size
= length
;
2526 cp
->c_touch_chgtime
= TRUE
; /* status changed */
2527 cp
->c_touch_modtime
= TRUE
; /* file data was modified */
2528 retval
= hfs_update(vp
, MNT_WAIT
);
2530 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2531 -1, -1, -1, retval
, 0);
2536 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2537 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2545 * Truncate a cnode to at most length size, freeing (or adding) the
2550 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2551 vfs_context_t context
)
2553 struct filefork
*fp
= VTOF(vp
);
2556 int blksize
, error
= 0;
2557 struct cnode
*cp
= VTOC(vp
);
2559 /* Cannot truncate an HFS directory! */
2560 if (vnode_isdir(vp
)) {
2563 /* A swap file cannot change size. */
2564 if (vnode_isswap(vp
) && (length
!= 0)) {
2568 blksize
= VTOVCB(vp
)->blockSize
;
2569 fileblocks
= fp
->ff_blocks
;
2570 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2573 // Have to do this here so that we don't wind up with
2574 // i/o pending for blocks that are about to be released
2575 // if we truncate the file.
2577 // If skipsetsize is set, then the caller is responsible
2578 // for the ubc_setsize.
2581 ubc_setsize(vp
, length
);
2583 // have to loop truncating or growing files that are
2584 // really big because otherwise transactions can get
2585 // enormous and consume too many kernel resources.
2587 if (length
< filebytes
) {
2588 while (filebytes
> length
) {
2589 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2590 filebytes
-= HFS_BIGFILE_SIZE
;
2594 cp
->c_flag
|= C_FORCEUPDATE
;
2595 error
= do_hfs_truncate(vp
, filebytes
, flags
, context
);
2599 } else if (length
> filebytes
) {
2600 while (filebytes
< length
) {
2601 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2602 filebytes
+= HFS_BIGFILE_SIZE
;
2606 cp
->c_flag
|= C_FORCEUPDATE
;
2607 error
= do_hfs_truncate(vp
, filebytes
, flags
, context
);
2611 } else /* Same logical size */ {
2613 error
= do_hfs_truncate(vp
, length
, flags
, context
);
2615 /* Files that are changing size are not hot file candidates. */
2616 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2617 fp
->ff_bytesread
= 0;
2626 * Preallocate file storage space.
2629 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2633 off_t *a_bytesallocated;
2635 vfs_context_t a_context;
2638 struct vnode
*vp
= ap
->a_vp
;
2640 struct filefork
*fp
;
2642 off_t length
= ap
->a_length
;
2644 off_t moreBytesRequested
;
2645 off_t actualBytesAdded
;
2648 int retval
, retval2
;
2649 u_int32_t blockHint
;
2650 u_int32_t extendFlags
; /* For call to ExtendFileC */
2651 struct hfsmount
*hfsmp
;
2652 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2655 *(ap
->a_bytesallocated
) = 0;
2657 if (!vnode_isreg(vp
))
2659 if (length
< (off_t
)0)
2664 hfs_lock_truncate(cp
, TRUE
);
2666 if ((retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2674 fileblocks
= fp
->ff_blocks
;
2675 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2677 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2682 /* Fill in the flags word for the call to Extend the file */
2684 extendFlags
= kEFNoClumpMask
;
2685 if (ap
->a_flags
& ALLOCATECONTIG
)
2686 extendFlags
|= kEFContigMask
;
2687 if (ap
->a_flags
& ALLOCATEALL
)
2688 extendFlags
|= kEFAllMask
;
2689 if (cred
&& suser(cred
, NULL
) != 0)
2690 extendFlags
|= kEFReserveMask
;
2694 startingPEOF
= filebytes
;
2696 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2697 length
+= filebytes
;
2698 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2699 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2701 /* If no changes are necesary, then we're done */
2702 if (filebytes
== length
)
2706 * Lengthen the size of the file. We must ensure that the
2707 * last byte of the file is allocated. Since the smallest
2708 * value of filebytes is 0, length will be at least 1.
2710 if (length
> filebytes
) {
2711 off_t total_bytes_added
= 0, orig_request_size
;
2713 orig_request_size
= moreBytesRequested
= length
- filebytes
;
2716 retval
= hfs_chkdq(cp
,
2717 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2724 * Metadata zone checks.
2726 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2728 * Allocate Journal and Quota files in metadata zone.
2730 if (hfs_virtualmetafile(cp
)) {
2731 extendFlags
|= kEFMetadataMask
;
2732 blockHint
= hfsmp
->hfs_metazone_start
;
2733 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2734 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2736 * Move blockHint outside metadata zone.
2738 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2743 while ((length
> filebytes
) && (retval
== E_NONE
)) {
2744 off_t bytesRequested
;
2746 if (hfs_start_transaction(hfsmp
) != 0) {
2751 /* Protect extents b-tree and allocation bitmap */
2752 lockflags
= SFL_BITMAP
;
2753 if (overflow_extents(fp
))
2754 lockflags
|= SFL_EXTENTS
;
2755 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2757 if (moreBytesRequested
>= HFS_BIGFILE_SIZE
) {
2758 bytesRequested
= HFS_BIGFILE_SIZE
;
2760 bytesRequested
= moreBytesRequested
;
2763 retval
= MacToVFSError(ExtendFileC(vcb
,
2768 &actualBytesAdded
));
2770 if (retval
== E_NONE
) {
2771 *(ap
->a_bytesallocated
) += actualBytesAdded
;
2772 total_bytes_added
+= actualBytesAdded
;
2773 moreBytesRequested
-= actualBytesAdded
;
2774 if (blockHint
!= 0) {
2775 blockHint
+= actualBytesAdded
/ vcb
->blockSize
;
2778 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2780 hfs_systemfile_unlock(hfsmp
, lockflags
);
2783 (void) hfs_update(vp
, TRUE
);
2784 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2787 hfs_end_transaction(hfsmp
);
2792 * if we get an error and no changes were made then exit
2793 * otherwise we must do the hfs_update to reflect the changes
2795 if (retval
&& (startingPEOF
== filebytes
))
2799 * Adjust actualBytesAdded to be allocation block aligned, not
2800 * clump size aligned.
2801 * NOTE: So what we are reporting does not affect reality
2802 * until the file is closed, when we truncate the file to allocation
2805 if (total_bytes_added
!= 0 && orig_request_size
< total_bytes_added
)
2806 *(ap
->a_bytesallocated
) =
2807 roundup(orig_request_size
, (off_t
)vcb
->blockSize
);
2809 } else { /* Shorten the size of the file */
2811 if (fp
->ff_size
> length
) {
2813 * Any buffers that are past the truncation point need to be
2814 * invalidated (to maintain buffer cache consistency).
2818 retval
= hfs_truncate(vp
, length
, 0, 0, ap
->a_context
);
2819 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2822 * if we get an error and no changes were made then exit
2823 * otherwise we must do the hfs_update to reflect the changes
2825 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2827 /* These are bytesreleased */
2828 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2831 if (fp
->ff_size
> filebytes
) {
2832 fp
->ff_size
= filebytes
;
2835 ubc_setsize(vp
, fp
->ff_size
);
2836 hfs_lock(cp
, HFS_FORCE_LOCK
);
2841 cp
->c_touch_chgtime
= TRUE
;
2842 cp
->c_touch_modtime
= TRUE
;
2843 retval2
= hfs_update(vp
, MNT_WAIT
);
2848 hfs_unlock_truncate(cp
, TRUE
);
2855 * Pagein for HFS filesystem
2858 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2860 struct vnop_pagein_args {
2863 vm_offset_t a_pl_offset,
2867 vfs_context_t a_context;
2871 vnode_t vp
= ap
->a_vp
;
2874 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2875 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2877 * Keep track of blocks read.
2879 if (!vnode_isswap(vp
) && VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2881 struct filefork
*fp
;
2883 int took_cnode_lock
= 0;
2888 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2889 bytesread
= fp
->ff_size
;
2891 bytesread
= ap
->a_size
;
2893 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2894 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff && cp
->c_lockowner
!= current_thread()) {
2895 hfs_lock(cp
, HFS_FORCE_LOCK
);
2896 took_cnode_lock
= 1;
2899 * If this file hasn't been seen since the start of
2900 * the current sampling period then start over.
2902 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2905 fp
->ff_bytesread
= bytesread
;
2907 cp
->c_atime
= tv
.tv_sec
;
2909 fp
->ff_bytesread
+= bytesread
;
2911 cp
->c_touch_acctime
= TRUE
;
2912 if (took_cnode_lock
)
2919 * Pageout for HFS filesystem.
2922 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2924 struct vnop_pageout_args {
2927 vm_offset_t a_pl_offset,
2931 vfs_context_t a_context;
2935 vnode_t vp
= ap
->a_vp
;
2937 struct filefork
*fp
;
2944 if (vnode_isswap(vp
)) {
2945 filesize
= fp
->ff_size
;
2950 if (cp
->c_lockowner
!= current_thread()) {
2951 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2952 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2953 ubc_upl_abort_range(ap
->a_pl
,
2956 UPL_ABORT_FREE_ON_EMPTY
);
2963 filesize
= fp
->ff_size
;
2964 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
2966 if (end_of_range
>= filesize
) {
2967 end_of_range
= (off_t
)(filesize
- 1);
2969 if (ap
->a_f_offset
< filesize
) {
2970 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
2971 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
2979 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2980 ap
->a_size
, filesize
, ap
->a_flags
);
2983 * If data was written, and setuid or setgid bits are set and
2984 * this process is not the superuser then clear the setuid and
2985 * setgid bits as a precaution against tampering.
2987 if ((retval
== 0) &&
2988 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
2989 (vfs_context_suser(ap
->a_context
) != 0)) {
2990 hfs_lock(cp
, HFS_FORCE_LOCK
);
2991 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2992 cp
->c_touch_chgtime
= TRUE
;
2999 * Intercept B-Tree node writes to unswap them if necessary.
3002 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
3005 register struct buf
*bp
= ap
->a_bp
;
3006 register struct vnode
*vp
= buf_vnode(bp
);
3007 BlockDescriptor block
;
3009 /* Trap B-Tree writes */
3010 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
3011 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
3012 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
3013 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
3016 * Swap and validate the node if it is in native byte order.
3017 * This is always be true on big endian, so we always validate
3018 * before writing here. On little endian, the node typically has
3019 * been swapped and validated when it was written to the journal,
3020 * so we won't do anything here.
3022 if (((u_int16_t
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
3023 /* Prepare the block pointer */
3024 block
.blockHeader
= bp
;
3025 block
.buffer
= (char *)buf_dataptr(bp
);
3026 block
.blockNum
= buf_lblkno(bp
);
3027 /* not found in cache ==> came from disk */
3028 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
3029 block
.blockSize
= buf_count(bp
);
3031 /* Endian un-swap B-Tree node */
3032 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
);
3034 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3038 /* This buffer shouldn't be locked anymore but if it is clear it */
3039 if ((buf_flags(bp
) & B_LOCKED
)) {
3041 if (VTOHFS(vp
)->jnl
) {
3042 panic("hfs: CLEARING the lock bit on bp %p\n", bp
);
3044 buf_clearflags(bp
, B_LOCKED
);
3046 retval
= vn_bwrite (ap
);
3052 * Relocate a file to a new location on disk
3053 * cnode must be locked on entry
3055 * Relocation occurs by cloning the file's data from its
3056 * current set of blocks to a new set of blocks. During
3057 * the relocation all of the blocks (old and new) are
3058 * owned by the file.
3065 * ----------------- -----------------
3066 * |///////////////| | | STEP 1 (acquire new blocks)
3067 * ----------------- -----------------
3070 * ----------------- -----------------
3071 * |///////////////| |///////////////| STEP 2 (clone data)
3072 * ----------------- -----------------
3076 * |///////////////| STEP 3 (head truncate blocks)
3080 * During steps 2 and 3 page-outs to file offsets less
3081 * than or equal to N are suspended.
3083 * During step 3 page-ins to the file get suspended.
3087 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
3091 struct filefork
*fp
;
3092 struct hfsmount
*hfsmp
;
3097 u_int32_t nextallocsave
;
3098 daddr64_t sector_a
, sector_b
;
3103 int took_trunc_lock
= 0;
3105 enum vtype vnodetype
;
3107 vnodetype
= vnode_vtype(vp
);
3108 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
3113 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
3119 if (fp
->ff_unallocblocks
)
3121 blksize
= hfsmp
->blockSize
;
3123 blockHint
= hfsmp
->nextAllocation
;
3125 if ((fp
->ff_size
> 0x7fffffff) ||
3126 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
3131 // We do not believe that this call to hfs_fsync() is
3132 // necessary and it causes a journal transaction
3133 // deadlock so we are removing it.
3135 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3136 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3141 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
3143 hfs_lock_truncate(cp
, TRUE
);
3144 /* Force lock since callers expects lock to be held. */
3145 if ((retval
= hfs_lock(cp
, HFS_FORCE_LOCK
))) {
3146 hfs_unlock_truncate(cp
, TRUE
);
3149 /* No need to continue if file was removed. */
3150 if (cp
->c_flag
& C_NOEXISTS
) {
3151 hfs_unlock_truncate(cp
, TRUE
);
3154 took_trunc_lock
= 1;
3156 headblks
= fp
->ff_blocks
;
3157 datablks
= howmany(fp
->ff_size
, blksize
);
3158 growsize
= datablks
* blksize
;
3159 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
3160 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
3161 blockHint
<= hfsmp
->hfs_metazone_end
)
3162 eflags
|= kEFMetadataMask
;
3164 if (hfs_start_transaction(hfsmp
) != 0) {
3165 if (took_trunc_lock
)
3166 hfs_unlock_truncate(cp
, TRUE
);
3171 * Protect the extents b-tree and the allocation bitmap
3172 * during MapFileBlockC and ExtendFileC operations.
3174 lockflags
= SFL_BITMAP
;
3175 if (overflow_extents(fp
))
3176 lockflags
|= SFL_EXTENTS
;
3177 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3179 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
3181 retval
= MacToVFSError(retval
);
3186 * STEP 1 - acquire new allocation blocks.
3188 nextallocsave
= hfsmp
->nextAllocation
;
3189 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
3190 if (eflags
& kEFMetadataMask
) {
3191 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
3192 HFS_UPDATE_NEXT_ALLOCATION(hfsmp
, nextallocsave
);
3193 MarkVCBDirty(hfsmp
);
3194 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
3197 retval
= MacToVFSError(retval
);
3199 cp
->c_flag
|= C_MODIFIED
;
3200 if (newbytes
< growsize
) {
3203 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
3204 printf("hfs_relocate: allocation failed");
3209 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
3211 retval
= MacToVFSError(retval
);
3212 } else if ((sector_a
+ 1) == sector_b
) {
3215 } else if ((eflags
& kEFMetadataMask
) &&
3216 ((((u_int64_t
)sector_b
* hfsmp
->hfs_phys_block_size
) / blksize
) >
3217 hfsmp
->hfs_metazone_end
)) {
3218 const char * filestr
;
3219 char emptystr
= '\0';
3221 if (cp
->c_desc
.cd_nameptr
!= NULL
) {
3222 filestr
= (const char *)&cp
->c_desc
.cd_nameptr
[0];
3223 } else if (vnode_name(vp
) != NULL
) {
3224 filestr
= vnode_name(vp
);
3226 filestr
= &emptystr
;
3228 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr
, fp
->ff_blocks
);
3233 /* Done with system locks and journal for now. */
3234 hfs_systemfile_unlock(hfsmp
, lockflags
);
3236 hfs_end_transaction(hfsmp
);
3241 * Check to see if failure is due to excessive fragmentation.
3243 if ((retval
== ENOSPC
) &&
3244 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
3245 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
3250 * STEP 2 - clone file data into the new allocation blocks.
3253 if (vnodetype
== VLNK
)
3254 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
3255 else if (vnode_issystem(vp
))
3256 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
3258 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
3260 /* Start transaction for step 3 or for a restore. */
3261 if (hfs_start_transaction(hfsmp
) != 0) {
3270 * STEP 3 - switch to cloned data and remove old blocks.
3272 lockflags
= SFL_BITMAP
;
3273 if (overflow_extents(fp
))
3274 lockflags
|= SFL_EXTENTS
;
3275 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3277 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
3279 hfs_systemfile_unlock(hfsmp
, lockflags
);
3284 if (took_trunc_lock
)
3285 hfs_unlock_truncate(cp
, TRUE
);
3288 hfs_systemfile_unlock(hfsmp
, lockflags
);
3292 /* Push cnode's new extent data to disk. */
3294 (void) hfs_update(vp
, MNT_WAIT
);
3297 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
3298 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
3300 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
3304 hfs_end_transaction(hfsmp
);
3309 if (fp
->ff_blocks
== headblks
) {
3310 if (took_trunc_lock
)
3311 hfs_unlock_truncate(cp
, TRUE
);
3315 * Give back any newly allocated space.
3317 if (lockflags
== 0) {
3318 lockflags
= SFL_BITMAP
;
3319 if (overflow_extents(fp
))
3320 lockflags
|= SFL_EXTENTS
;
3321 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
3324 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
3326 hfs_systemfile_unlock(hfsmp
, lockflags
);
3329 if (took_trunc_lock
)
3330 hfs_unlock_truncate(cp
, TRUE
);
3340 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, __unused
struct proc
*p
)
3342 struct buf
*head_bp
= NULL
;
3343 struct buf
*tail_bp
= NULL
;
3347 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
3351 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
3352 if (tail_bp
== NULL
) {
3356 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
3357 error
= (int)buf_bwrite(tail_bp
);
3360 buf_markinvalid(head_bp
);
3361 buf_brelse(head_bp
);
3363 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
3369 * Clone a file's data within the file.
3373 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
3385 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
3386 writebase
= blkstart
* blksize
;
3387 copysize
= blkcnt
* blksize
;
3388 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
3391 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3394 hfs_unlock(VTOC(vp
));
3396 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
3398 while (offset
< copysize
) {
3399 iosize
= MIN(copysize
- offset
, iosize
);
3401 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
3402 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3404 error
= cluster_read(vp
, auio
, copysize
, IO_NOCACHE
);
3406 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
3409 if (uio_resid(auio
) != 0) {
3410 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
3415 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
3416 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3418 error
= cluster_write(vp
, auio
, filesize
+ offset
,
3419 filesize
+ offset
+ iosize
,
3420 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
3422 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
3425 if (uio_resid(auio
) != 0) {
3426 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3435 * No need to call ubc_sync_range or hfs_invalbuf
3436 * since the file was copied using IO_NOCACHE.
3439 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3441 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
3446 * Clone a system (metadata) file.
3450 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
3451 kauth_cred_t cred
, struct proc
*p
)
3457 struct buf
*bp
= NULL
;
3460 daddr64_t start_blk
;
3467 iosize
= GetLogicalBlockSize(vp
);
3468 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3469 breadcnt
= bufsize
/ iosize
;
3471 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3474 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3475 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3478 while (blkno
< last_blk
) {
3480 * Read up to a megabyte
3483 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3484 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3486 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3489 if (buf_count(bp
) != iosize
) {
3490 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3493 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3495 buf_markinvalid(bp
);
3503 * Write up to a megabyte
3506 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3507 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3509 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3513 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3514 error
= (int)buf_bwrite(bp
);
3526 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3528 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);