2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
28 /* @(#)hfs_readwrite.c 1.0
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
48 #include <sys/vfs_context.h>
50 #include <sys/sysctl.h>
52 #include <miscfs/specfs/specdev.h>
55 #include <vm/vm_pageout.h>
56 #include <vm/vm_kern.h>
58 #include <sys/kdebug.h>
61 #include "hfs_endian.h"
62 #include "hfs_fsctl.h"
63 #include "hfs_quota.h"
64 #include "hfscommon/headers/FileMgrInternal.h"
65 #include "hfscommon/headers/BTreesInternal.h"
66 #include "hfs_cnode.h"
69 extern int overflow_extents(struct filefork
*fp
);
71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
74 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
77 extern u_int32_t
GetLogicalBlockSize(struct vnode
*vp
);
79 extern int hfs_setextendedsecurity(struct hfsmount
*, int);
82 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
83 static int hfs_clonefile(struct vnode
*, int, int, int);
84 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
87 int flush_cache_on_write
= 0;
88 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
91 /*****************************************************************************
93 * I/O Operations on vnodes
95 *****************************************************************************/
96 int hfs_vnop_read(struct vnop_read_args
*);
97 int hfs_vnop_write(struct vnop_write_args
*);
98 int hfs_vnop_ioctl(struct vnop_ioctl_args
*);
99 int hfs_vnop_select(struct vnop_select_args
*);
100 int hfs_vnop_blktooff(struct vnop_blktooff_args
*);
101 int hfs_vnop_offtoblk(struct vnop_offtoblk_args
*);
102 int hfs_vnop_blockmap(struct vnop_blockmap_args
*);
103 int hfs_vnop_strategy(struct vnop_strategy_args
*);
104 int hfs_vnop_allocate(struct vnop_allocate_args
*);
105 int hfs_vnop_pagein(struct vnop_pagein_args
*);
106 int hfs_vnop_pageout(struct vnop_pageout_args
*);
107 int hfs_vnop_bwrite(struct vnop_bwrite_args
*);
111 * Read data from a file.
114 hfs_vnop_read(struct vnop_read_args
*ap
)
116 uio_t uio
= ap
->a_uio
;
117 struct vnode
*vp
= ap
->a_vp
;
120 struct hfsmount
*hfsmp
;
123 off_t start_resid
= uio_resid(uio
);
124 off_t offset
= uio_offset(uio
);
128 /* Preflight checks */
129 if (!vnode_isreg(vp
)) {
130 /* can only read regular files */
136 if (start_resid
== 0)
137 return (0); /* Nothing left to do */
139 return (EINVAL
); /* cant read from a negative offset */
145 /* Protect against a size change. */
146 hfs_lock_truncate(cp
, 0);
148 filesize
= fp
->ff_size
;
149 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
150 if (offset
> filesize
) {
151 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
152 (offset
> (off_t
)MAXHFSFILESIZE
)) {
158 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
159 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
161 retval
= cluster_read(vp
, uio
, filesize
, 0);
163 cp
->c_touch_acctime
= TRUE
;
165 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
166 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
169 * Keep track blocks read
171 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
172 int took_cnode_lock
= 0;
175 bytesread
= start_resid
- uio_resid(uio
);
177 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
178 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
179 hfs_lock(cp
, HFS_FORCE_LOCK
);
183 * If this file hasn't been seen since the start of
184 * the current sampling period then start over.
186 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
189 fp
->ff_bytesread
= bytesread
;
191 cp
->c_atime
= tv
.tv_sec
;
193 fp
->ff_bytesread
+= bytesread
;
199 hfs_unlock_truncate(cp
);
204 * Write data to a file.
207 hfs_vnop_write(struct vnop_write_args
*ap
)
209 uio_t uio
= ap
->a_uio
;
210 struct vnode
*vp
= ap
->a_vp
;
213 struct hfsmount
*hfsmp
;
214 kauth_cred_t cred
= NULL
;
218 off_t actualBytesAdded
;
223 int ioflag
= ap
->a_ioflag
;
226 int cnode_locked
= 0;
228 // LP64todo - fix this! uio_resid may be 64-bit value
229 resid
= uio_resid(uio
);
230 offset
= uio_offset(uio
);
236 if (!vnode_isreg(vp
))
237 return (EPERM
); /* Can only write regular files */
239 /* Protect against a size change. */
240 hfs_lock_truncate(VTOC(vp
), TRUE
);
242 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
243 hfs_unlock_truncate(VTOC(vp
));
250 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
252 if (ioflag
& IO_APPEND
) {
253 uio_setoffset(uio
, fp
->ff_size
);
254 offset
= fp
->ff_size
;
256 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
261 origFileSize
= fp
->ff_size
;
262 eflags
= kEFDeferMask
; /* defer file block allocations */
264 #ifdef HFS_SPARSE_DEV
266 * When the underlying device is sparse and space
267 * is low (< 8MB), stop doing delayed allocations
268 * and begin doing synchronous I/O.
270 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
271 (hfs_freeblks(hfsmp
, 0) < 2048)) {
272 eflags
&= ~kEFDeferMask
;
275 #endif /* HFS_SPARSE_DEV */
277 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
278 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
280 /* Now test if we need to extend the file */
281 /* Doing so will adjust the filebytes for us */
283 writelimit
= offset
+ resid
;
284 if (writelimit
<= filebytes
)
287 cred
= vfs_context_ucred(ap
->a_context
);
289 bytesToAdd
= writelimit
- filebytes
;
290 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
296 if (hfs_start_transaction(hfsmp
) != 0) {
301 while (writelimit
> filebytes
) {
302 bytesToAdd
= writelimit
- filebytes
;
303 if (cred
&& suser(cred
, NULL
) != 0)
304 eflags
|= kEFReserveMask
;
306 /* Protect extents b-tree and allocation bitmap */
307 lockflags
= SFL_BITMAP
;
308 if (overflow_extents(fp
))
309 lockflags
|= SFL_EXTENTS
;
310 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
312 /* Files that are changing size are not hot file candidates. */
313 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
314 fp
->ff_bytesread
= 0;
316 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
317 0, eflags
, &actualBytesAdded
));
319 hfs_systemfile_unlock(hfsmp
, lockflags
);
321 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
323 if (retval
!= E_NONE
)
325 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
326 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
327 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
329 (void) hfs_update(vp
, TRUE
);
330 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
331 (void) hfs_end_transaction(hfsmp
);
334 if (retval
== E_NONE
) {
342 struct rl_entry
*invalid_range
;
344 if (writelimit
> fp
->ff_size
)
345 filesize
= writelimit
;
347 filesize
= fp
->ff_size
;
349 lflag
= (ioflag
& IO_SYNC
);
351 if (offset
<= fp
->ff_size
) {
352 zero_off
= offset
& ~PAGE_MASK_64
;
354 /* Check to see whether the area between the zero_offset and the start
355 of the transfer to see whether is invalid and should be zero-filled
356 as part of the transfer:
358 if (offset
> zero_off
) {
359 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
360 lflag
|= IO_HEADZEROFILL
;
363 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
365 /* The bytes between fp->ff_size and uio->uio_offset must never be
366 read without being zeroed. The current last block is filled with zeroes
367 if it holds valid data but in all cases merely do a little bookkeeping
368 to track the area from the end of the current last page to the start of
369 the area actually written. For the same reason only the bytes up to the
370 start of the page where this write will start is invalidated; any remainder
371 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
373 Note that inval_start, the start of the page after the current EOF,
374 may be past the start of the write, in which case the zeroing
375 will be handled by the cluser_write of the actual data.
377 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
378 inval_end
= offset
& ~PAGE_MASK_64
;
379 zero_off
= fp
->ff_size
;
381 if ((fp
->ff_size
& PAGE_MASK_64
) &&
382 (rl_scan(&fp
->ff_invalidranges
,
385 &invalid_range
) != RL_NOOVERLAP
)) {
386 /* The page containing the EOF is not valid, so the
387 entire page must be made inaccessible now. If the write
388 starts on a page beyond the page containing the eof
389 (inval_end > eof_page_base), add the
390 whole page to the range to be invalidated. Otherwise
391 (i.e. if the write starts on the same page), zero-fill
392 the entire page explicitly now:
394 if (inval_end
> eof_page_base
) {
395 inval_start
= eof_page_base
;
397 zero_off
= eof_page_base
;
401 if (inval_start
< inval_end
) {
403 /* There's some range of data that's going to be marked invalid */
405 if (zero_off
< inval_start
) {
406 /* The pages between inval_start and inval_end are going to be invalidated,
407 and the actual write will start on a page past inval_end. Now's the last
408 chance to zero-fill the page containing the EOF:
412 retval
= cluster_write(vp
, (uio_t
) 0,
413 fp
->ff_size
, inval_start
,
415 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
416 hfs_lock(cp
, HFS_FORCE_LOCK
);
418 if (retval
) goto ioerr_exit
;
419 offset
= uio_offset(uio
);
422 /* Mark the remaining area of the newly allocated space as invalid: */
423 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
425 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
426 zero_off
= fp
->ff_size
= inval_end
;
429 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
432 /* Check to see whether the area between the end of the write and the end of
433 the page it falls in is invalid and should be zero-filled as part of the transfer:
435 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
436 if (tail_off
> filesize
) tail_off
= filesize
;
437 if (tail_off
> writelimit
) {
438 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
439 lflag
|= IO_TAILZEROFILL
;
444 * if the write starts beyond the current EOF (possibly advanced in the
445 * zeroing of the last block, above), then we'll zero fill from the current EOF
446 * to where the write begins:
448 * NOTE: If (and ONLY if) the portion of the file about to be written is
449 * before the current EOF it might be marked as invalid now and must be
450 * made readable (removed from the invalid ranges) before cluster_write
453 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
454 if (io_start
< fp
->ff_size
) {
457 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
458 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
463 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
464 tail_off
, lflag
| IO_NOZERODIRTY
);
465 offset
= uio_offset(uio
);
466 if (offset
> fp
->ff_size
) {
467 fp
->ff_size
= offset
;
469 ubc_setsize(vp
, fp
->ff_size
); /* XXX check errors */
470 /* Files that are changing size are not hot file candidates. */
471 if (hfsmp
->hfc_stage
== HFC_RECORDING
)
472 fp
->ff_bytesread
= 0;
474 if (resid
> uio_resid(uio
)) {
475 cp
->c_touch_chgtime
= TRUE
;
476 cp
->c_touch_modtime
= TRUE
;
480 // XXXdbg - testing for vivek and paul lambert
482 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
483 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
486 HFS_KNOTE(vp
, NOTE_WRITE
);
490 * If we successfully wrote any data, and we are not the superuser
491 * we clear the setuid and setgid bits as a precaution against
494 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
495 cred
= vfs_context_ucred(ap
->a_context
);
496 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
498 hfs_lock(cp
, HFS_FORCE_LOCK
);
501 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
505 if (ioflag
& IO_UNIT
) {
507 hfs_lock(cp
, HFS_FORCE_LOCK
);
510 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
512 // LP64todo - fix this! resid needs to by user_ssize_t
513 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
514 uio_setresid(uio
, resid
);
515 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
517 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
519 hfs_lock(cp
, HFS_FORCE_LOCK
);
522 retval
= hfs_update(vp
, TRUE
);
524 /* Updating vcbWrCnt doesn't need to be atomic. */
527 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
528 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
532 hfs_unlock_truncate(cp
);
536 /* support for the "bulk-access" fcntl */
538 #define CACHE_ELEMS 64
539 #define CACHE_LEVELS 16
540 #define PARENT_IDS_FLAG 0x100
542 /* from hfs_attrlist.c */
543 extern unsigned long DerivePermissionSummary(uid_t obj_uid
, gid_t obj_gid
,
544 mode_t obj_mode
, struct mount
*mp
,
545 kauth_cred_t cred
, struct proc
*p
);
547 /* from vfs/vfs_fsevents.c */
548 extern char *get_pathbuff(void);
549 extern void release_pathbuff(char *buff
);
551 struct access_cache
{
553 int cachehits
; /* these two for statistics gathering */
555 unsigned int *acache
;
560 uid_t uid
; /* IN: effective user id */
561 short flags
; /* IN: access requested (i.e. R_OK) */
562 short num_groups
; /* IN: number of groups user belongs to */
563 int num_files
; /* IN: number of files to process */
564 int *file_ids
; /* IN: array of file ids */
565 gid_t
*groups
; /* IN: array of groups */
566 short *access
; /* OUT: access info for each file (0 for 'has access') */
569 struct user_access_t
{
570 uid_t uid
; /* IN: effective user id */
571 short flags
; /* IN: access requested (i.e. R_OK) */
572 short num_groups
; /* IN: number of groups user belongs to */
573 int num_files
; /* IN: number of files to process */
574 user_addr_t file_ids
; /* IN: array of file ids */
575 user_addr_t groups
; /* IN: array of groups */
576 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
580 * Perform a binary search for the given parent_id. Return value is
581 * found/not found boolean, and indexp will be the index of the item
582 * or the index at which to insert the item if it's not found.
585 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
588 int index
, matches
= 0;
590 if (cache
->numcached
== 0) {
592 return 0; // table is empty, so insert at index=0 and report no match
595 if (cache
->numcached
> CACHE_ELEMS
) {
596 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
597 cache->numcached, CACHE_ELEMS);*/
598 cache
->numcached
= CACHE_ELEMS
;
602 hi
= cache
->numcached
- 1;
605 /* perform binary search for parent_id */
607 unsigned int mid
= (hi
- lo
)/2 + lo
;
608 unsigned int this_id
= cache
->acache
[mid
];
610 if (parent_id
== this_id
) {
615 if (parent_id
< this_id
) {
620 if (parent_id
> this_id
) {
626 /* check if lo and hi converged on the match */
627 if (parent_id
== cache
->acache
[hi
]) {
631 /* if no existing entry found, find index for new one */
633 index
= (parent_id
< cache
->acache
[hi
]) ? hi
: hi
+ 1;
644 * Add a node to the access_cache at the given index (or do a lookup first
645 * to find the index if -1 is passed in). We currently do a replace rather
646 * than an insert if the cache is full.
649 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
651 int lookup_index
= -1;
653 /* need to do a lookup first if -1 passed for index */
655 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
656 if (cache
->haveaccess
[lookup_index
] != access
) {
657 /* change access info for existing entry... should never happen */
658 cache
->haveaccess
[lookup_index
] = access
;
661 /* mission accomplished */
664 index
= lookup_index
;
669 /* if the cache is full, do a replace rather than an insert */
670 if (cache
->numcached
>= CACHE_ELEMS
) {
671 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
672 cache
->numcached
= CACHE_ELEMS
-1;
674 if (index
> cache
->numcached
) {
675 // printf("index %d pinned to %d\n", index, cache->numcached);
676 index
= cache
->numcached
;
678 } else if (index
>= 0 && index
< cache
->numcached
) {
679 /* only do bcopy if we're inserting */
680 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
681 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(Boolean
) );
684 cache
->acache
[index
] = nodeID
;
685 cache
->haveaccess
[index
] = access
;
698 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
700 struct cinfo
*cip
= (struct cinfo
*)arg
;
702 cip
->uid
= attrp
->ca_uid
;
703 cip
->gid
= attrp
->ca_gid
;
704 cip
->mode
= attrp
->ca_mode
;
705 cip
->parentcnid
= descp
->cd_parentcnid
;
711 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
712 * isn't incore, then go to the catalog.
715 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
716 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
, struct proc
*p
)
720 /* if this id matches the one the fsctl was called with, skip the lookup */
721 if (cnid
== skip_cp
->c_cnid
) {
722 cnattrp
->ca_uid
= skip_cp
->c_uid
;
723 cnattrp
->ca_gid
= skip_cp
->c_gid
;
724 cnattrp
->ca_mode
= skip_cp
->c_mode
;
725 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
729 /* otherwise, check the cnode hash incase the file/dir is incore */
730 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
731 cnattrp
->ca_uid
= c_info
.uid
;
732 cnattrp
->ca_gid
= c_info
.gid
;
733 cnattrp
->ca_mode
= c_info
.mode
;
734 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
738 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
740 /* lookup this cnid in the catalog */
741 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
743 hfs_systemfile_unlock(hfsmp
, lockflags
);
753 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
754 * up to CACHE_LEVELS as we progress towards the root.
757 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
758 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
)
762 HFSCatalogNodeID thisNodeID
;
763 unsigned long myPerms
;
764 struct cat_attr cnattr
;
765 int cache_index
= -1;
768 int i
= 0, ids_to_cache
= 0;
769 int parent_ids
[CACHE_LEVELS
];
771 /* root always has access */
772 if (!suser(myp_ucred
, NULL
)) {
777 while (thisNodeID
>= kRootDirID
) {
778 myResult
= 0; /* default to "no access" */
780 /* check the cache before resorting to hitting the catalog */
782 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
783 * to look any further after hitting cached dir */
785 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
787 myResult
= cache
->haveaccess
[cache_index
];
788 goto ExitThisRoutine
;
791 /* remember which parents we want to cache */
792 if (ids_to_cache
< CACHE_LEVELS
) {
793 parent_ids
[ids_to_cache
] = thisNodeID
;
797 /* do the lookup (checks the cnode hash, then the catalog) */
798 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
, theProcPtr
);
800 goto ExitThisRoutine
; /* no access */
803 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
804 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
805 myp_ucred
, theProcPtr
);
807 if ( (myPerms
& X_OK
) == 0 ) {
809 goto ExitThisRoutine
; /* no access */
812 /* up the hierarchy we go */
813 thisNodeID
= catkey
.hfsPlus
.parentID
;
816 /* if here, we have access to this node */
821 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
826 /* cache the parent directory(ies) */
827 for (i
= 0; i
< ids_to_cache
; i
++) {
828 /* small optimization: get rid of double-lookup for all these */
829 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
830 add_node(cache
, -1, parent_ids
[i
], myResult
);
835 /* end "bulk-access" support */
840 * Callback for use with freeze ioctl.
843 hfs_freezewrite_callback(struct vnode
*vp
, void *cargs
)
845 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
851 * Control filesystem operating characteristics.
854 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
859 vfs_context_t a_context;
862 struct vnode
* vp
= ap
->a_vp
;
863 struct hfsmount
*hfsmp
= VTOHFS(vp
);
864 vfs_context_t context
= ap
->a_context
;
865 kauth_cred_t cred
= vfs_context_ucred(context
);
866 proc_t p
= vfs_context_proc(context
);
867 struct vfsstatfs
*vfsp
;
870 is64bit
= proc_is64bit(p
);
872 switch (ap
->a_command
) {
874 case HFS_RESIZE_PROGRESS
: {
876 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
877 if (suser(cred
, NULL
) &&
878 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
879 return (EACCES
); /* must be owner of file system */
881 if (!vnode_isvroot(vp
)) {
884 return hfs_resize_progress(hfsmp
, (u_int32_t
*)ap
->a_data
);
886 case HFS_RESIZE_VOLUME
: {
890 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
891 if (suser(cred
, NULL
) &&
892 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
893 return (EACCES
); /* must be owner of file system */
895 if (!vnode_isvroot(vp
)) {
898 newsize
= *(u_int64_t
*)ap
->a_data
;
899 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
901 if (newsize
> cursize
) {
902 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
903 } else if (newsize
< cursize
) {
904 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
909 case HFS_CHANGE_NEXT_ALLOCATION
: {
912 if (vnode_vfsisrdonly(vp
)) {
915 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
916 if (suser(cred
, NULL
) &&
917 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
918 return (EACCES
); /* must be owner of file system */
920 if (!vnode_isvroot(vp
)) {
923 location
= *(u_int32_t
*)ap
->a_data
;
924 if (location
> hfsmp
->totalBlocks
- 1) {
927 /* Return previous value. */
928 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
929 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
930 hfsmp
->nextAllocation
= location
;
931 hfsmp
->vcbFlags
|= 0xFF00;
932 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
936 #ifdef HFS_SPARSE_DEV
937 case HFS_SETBACKINGSTOREINFO
: {
938 struct vnode
* bsfs_rootvp
;
939 struct vnode
* di_vp
;
940 struct hfs_backingstoreinfo
*bsdata
;
943 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
946 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
947 if (suser(cred
, NULL
) &&
948 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
949 return (EACCES
); /* must be owner of file system */
951 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
952 if (bsdata
== NULL
) {
955 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
958 if ((error
= vnode_getwithref(di_vp
))) {
959 file_drop(bsdata
->backingfd
);
963 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
964 (void)vnode_put(di_vp
);
965 file_drop(bsdata
->backingfd
);
970 * Obtain the backing fs root vnode and keep a reference
971 * on it. This reference will be dropped in hfs_unmount.
973 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
975 (void)vnode_put(di_vp
);
976 file_drop(bsdata
->backingfd
);
979 vnode_ref(bsfs_rootvp
);
980 vnode_put(bsfs_rootvp
);
982 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
983 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
984 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
985 hfsmp
->hfs_sparsebandblks
*= 4;
987 (void)vnode_put(di_vp
);
988 file_drop(bsdata
->backingfd
);
991 case HFS_CLRBACKINGSTOREINFO
: {
992 struct vnode
* tmpvp
;
994 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
995 if (suser(cred
, NULL
) &&
996 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
997 return (EACCES
); /* must be owner of file system */
999 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
1000 hfsmp
->hfs_backingfs_rootvp
) {
1002 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
1003 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
1004 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
1005 hfsmp
->hfs_sparsebandblks
= 0;
1010 #endif /* HFS_SPARSE_DEV */
1019 mp
= vnode_mount(vp
);
1020 hfsmp
= VFSTOHFS(mp
);
1025 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1027 task
= current_task();
1028 task_working_set_disable(task
);
1030 // flush things before we get started to try and prevent
1031 // dirty data from being paged out while we're frozen.
1032 // note: can't do this after taking the lock as it will
1033 // deadlock against ourselves.
1034 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1035 hfs_global_exclusive_lock_acquire(hfsmp
);
1036 journal_flush(hfsmp
->jnl
);
1038 // don't need to iterate on all vnodes, we just need to
1039 // wait for writes to the system files and the device vnode
1040 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1041 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1042 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1043 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1044 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1045 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1046 if (hfsmp
->hfs_attribute_vp
)
1047 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1048 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1050 hfsmp
->hfs_freezing_proc
= current_proc();
1059 // if we're not the one who froze the fs then we
1061 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1065 // NOTE: if you add code here, also go check the
1066 // code that "thaws" the fs in hfs_vnop_close()
1068 hfsmp
->hfs_freezing_proc
= NULL
;
1069 hfs_global_exclusive_lock_release(hfsmp
);
1070 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1075 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1076 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1078 case HFS_BULKACCESS_FSCTL
:
1079 case HFS_BULKACCESS
: {
1081 * NOTE: on entry, the vnode is locked. Incase this vnode
1082 * happens to be in our list of file_ids, we'll note it
1083 * avoid calling hfs_chashget_nowait() on that id as that
1084 * will cause a "locking against myself" panic.
1086 Boolean check_leaf
= true;
1088 struct user_access_t
*user_access_structp
;
1089 struct user_access_t tmp_user_access_t
;
1090 struct access_cache cache
;
1094 dev_t dev
= VTOC(vp
)->c_dev
;
1097 struct ucred myucred
;
1099 int *file_ids
= NULL
;
1100 short *access
= NULL
;
1103 cnid_t prevParent_cnid
= 0;
1104 unsigned long myPerms
;
1106 struct cat_attr cnattr
;
1108 struct cnode
*skip_cp
= VTOC(vp
);
1109 struct vfs_context my_context
;
1111 /* set up front for common exit code */
1112 my_context
.vc_ucred
= NOCRED
;
1114 /* first, return error if not run as root */
1115 if (cred
->cr_ruid
!= 0) {
1119 /* initialize the local cache and buffers */
1120 cache
.numcached
= 0;
1121 cache
.cachehits
= 0;
1124 file_ids
= (int *) get_pathbuff();
1125 access
= (short *) get_pathbuff();
1126 cache
.acache
= (int *) get_pathbuff();
1127 cache
.haveaccess
= (Boolean
*) get_pathbuff();
1129 if (file_ids
== NULL
|| access
== NULL
|| cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1130 release_pathbuff((char *) file_ids
);
1131 release_pathbuff((char *) access
);
1132 release_pathbuff((char *) cache
.acache
);
1133 release_pathbuff((char *) cache
.haveaccess
);
1138 /* struct copyin done during dispatch... need to copy file_id array separately */
1139 if (ap
->a_data
== NULL
) {
1141 goto err_exit_bulk_access
;
1145 user_access_structp
= (struct user_access_t
*)ap
->a_data
;
1148 struct access_t
* accessp
= (struct access_t
*)ap
->a_data
;
1149 tmp_user_access_t
.uid
= accessp
->uid
;
1150 tmp_user_access_t
.flags
= accessp
->flags
;
1151 tmp_user_access_t
.num_groups
= accessp
->num_groups
;
1152 tmp_user_access_t
.num_files
= accessp
->num_files
;
1153 tmp_user_access_t
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1154 tmp_user_access_t
.groups
= CAST_USER_ADDR_T(accessp
->groups
);
1155 tmp_user_access_t
.access
= CAST_USER_ADDR_T(accessp
->access
);
1156 user_access_structp
= &tmp_user_access_t
;
1159 num_files
= user_access_structp
->num_files
;
1160 if (num_files
< 1) {
1161 goto err_exit_bulk_access
;
1163 if (num_files
> 256) {
1165 goto err_exit_bulk_access
;
1168 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1169 num_files
* sizeof(int)))) {
1170 goto err_exit_bulk_access
;
1173 /* fill in the ucred structure */
1174 flags
= user_access_structp
->flags
;
1175 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1179 /* check if we've been passed leaf node ids or parent ids */
1180 if (flags
& PARENT_IDS_FLAG
) {
1185 * Create a templated credential; this credential may *NOT*
1186 * be used unless instantiated with a kauth_cred_create();
1187 * there must be a correcponding kauth_cred_unref() when it
1188 * is no longer in use (i.e. before it goes out of scope).
1190 memset(&myucred
, 0, sizeof(myucred
));
1192 myucred
.cr_uid
= myucred
.cr_ruid
= myucred
.cr_svuid
= user_access_structp
->uid
;
1193 myucred
.cr_ngroups
= user_access_structp
->num_groups
;
1194 if (myucred
.cr_ngroups
< 1 || myucred
.cr_ngroups
> 16) {
1195 myucred
.cr_ngroups
= 0;
1196 } else if ((error
= copyin(user_access_structp
->groups
, (caddr_t
)myucred
.cr_groups
,
1197 myucred
.cr_ngroups
* sizeof(gid_t
)))) {
1198 goto err_exit_bulk_access
;
1200 myucred
.cr_rgid
= myucred
.cr_svgid
= myucred
.cr_groups
[0];
1201 myucred
.cr_gmuid
= myucred
.cr_uid
;
1203 my_context
.vc_proc
= p
;
1204 my_context
.vc_ucred
= kauth_cred_create(&myucred
);
1206 /* Check access to each file_id passed in */
1207 for (i
= 0; i
< num_files
; i
++) {
1209 cnid
= (cnid_t
) file_ids
[i
];
1211 /* root always has access */
1212 if (!suser(my_context
.vc_ucred
, NULL
)) {
1219 /* do the lookup (checks the cnode hash, then the catalog) */
1220 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
, p
);
1222 access
[i
] = (short) error
;
1226 /* before calling CheckAccess(), check the target file for read access */
1227 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1228 cnattr
.ca_mode
, hfsmp
->hfs_mp
, my_context
.vc_ucred
, p
);
1231 /* fail fast if no access */
1232 if ((myPerms
& flags
) == 0) {
1237 /* we were passed an array of parent ids */
1238 catkey
.hfsPlus
.parentID
= cnid
;
1241 /* if the last guy had the same parent and had access, we're done */
1242 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1248 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1249 skip_cp
, p
, my_context
.vc_ucred
, dev
);
1252 access
[i
] = 0; // have access.. no errors to report
1254 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1257 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1261 cnid
= (cnid_t
)file_ids
[i
];
1263 while (cnid
>= kRootDirID
) {
1264 /* get the vnode for this cnid */
1265 myErr
= hfs_vget(hfsmp
, cnid
, &vp
, 0);
1271 cnid
= VTOC(vp
)->c_parentcnid
;
1273 hfs_unlock(VTOC(vp
));
1274 if (vnode_vtype(vp
) == VDIR
) {
1276 * XXX This code assumes that none of the
1277 * XXX callbacks from vnode_authorize() will
1278 * XXX take a persistent ref on the context
1279 * XXX credential, which is a bad assumption.
1281 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), &my_context
);
1283 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, &my_context
);
1294 /* copyout the access array */
1295 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1296 num_files
* sizeof (short)))) {
1297 goto err_exit_bulk_access
;
1300 err_exit_bulk_access
:
1302 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1304 release_pathbuff((char *) cache
.acache
);
1305 release_pathbuff((char *) cache
.haveaccess
);
1306 release_pathbuff((char *) file_ids
);
1307 release_pathbuff((char *) access
);
1308 /* clean up local context, if needed */
1309 if (IS_VALID_CRED(my_context
.vc_ucred
))
1310 kauth_cred_unref(&my_context
.vc_ucred
);
1313 } /* HFS_BULKACCESS */
1315 case HFS_SETACLSTATE
: {
1318 if (ap
->a_data
== NULL
) {
1322 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1323 state
= *(int *)ap
->a_data
;
1325 // super-user can enable or disable acl's on a volume.
1326 // the volume owner can only enable acl's
1327 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1330 if (state
== 0 || state
== 1)
1331 return hfs_setextendedsecurity(hfsmp
, state
);
1339 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1341 error
= hfs_fsync(vp
, MNT_NOWAIT
, TRUE
, p
);
1342 hfs_unlock(VTOC(vp
));
1349 register struct cnode
*cp
;
1352 if (!vnode_isreg(vp
))
1355 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1359 * used by regression test to determine if
1360 * all the dirty pages (via write) have been cleaned
1361 * after a call to 'fsysnc'.
1363 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1370 register struct radvisory
*ra
;
1371 struct filefork
*fp
;
1374 if (!vnode_isreg(vp
))
1377 ra
= (struct radvisory
*)(ap
->a_data
);
1380 /* Protect against a size change. */
1381 hfs_lock_truncate(VTOC(vp
), TRUE
);
1383 if (ra
->ra_offset
>= fp
->ff_size
) {
1386 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1389 hfs_unlock_truncate(VTOC(vp
));
1393 case F_READBOOTSTRAP
:
1394 case F_WRITEBOOTSTRAP
:
1396 struct vnode
*devvp
= NULL
;
1397 user_fbootstraptransfer_t
*user_bootstrapp
;
1401 daddr64_t blockNumber
;
1405 user_fbootstraptransfer_t user_bootstrap
;
1407 if (!vnode_isvroot(vp
))
1409 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1410 * to a user_fbootstraptransfer_t else we get a pointer to a
1411 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1414 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1417 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1418 user_bootstrapp
= &user_bootstrap
;
1419 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1420 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1421 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1423 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1426 devvp
= VTOHFS(vp
)->hfs_devvp
;
1427 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1428 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1429 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1430 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1432 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1434 while (uio_resid(auio
) > 0) {
1435 blockNumber
= uio_offset(auio
) / devBlockSize
;
1436 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1438 if (bp
) buf_brelse(bp
);
1443 blockOffset
= uio_offset(auio
) % devBlockSize
;
1444 xfersize
= devBlockSize
- blockOffset
;
1445 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1451 if (uio_rw(auio
) == UIO_WRITE
) {
1452 error
= VNOP_BWRITE(bp
);
1465 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1468 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1471 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1476 case HFS_GET_MOUNT_TIME
:
1477 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1480 case HFS_GET_LAST_MTIME
:
1481 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1484 case HFS_SET_BOOT_INFO
:
1485 if (!vnode_isvroot(vp
))
1487 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1488 return(EACCES
); /* must be superuser or owner of filesystem */
1489 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1490 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1491 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1492 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1495 case HFS_GET_BOOT_INFO
:
1496 if (!vnode_isvroot(vp
))
1498 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1499 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1500 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1507 /* Should never get here */
1515 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1517 struct vnop_select_args {
1522 vfs_context_t a_context;
1527 * We should really check to see if I/O is possible.
1533 * Converts a logical block number to a physical block, and optionally returns
1534 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1535 * The physical block number is based on the device block size, currently its 512.
1536 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1539 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, int *runp
)
1541 struct cnode
*cp
= VTOC(vp
);
1542 struct filefork
*fp
= VTOF(vp
);
1543 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1544 int retval
= E_NONE
;
1545 daddr_t logBlockSize
;
1546 size_t bytesContAvail
= 0;
1547 off_t blockposition
;
1552 * Check for underlying vnode requests and ensure that logical
1553 * to physical mapping is requested.
1560 logBlockSize
= GetLogicalBlockSize(vp
);
1561 blockposition
= (off_t
)bn
* (off_t
)logBlockSize
;
1563 lockExtBtree
= overflow_extents(fp
);
1566 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
1568 retval
= MacToVFSError(
1569 MapFileBlockC (HFSTOVCB(hfsmp
),
1577 hfs_systemfile_unlock(hfsmp
, lockflags
);
1579 if (retval
== E_NONE
) {
1580 /* Figure out how many read ahead blocks there are */
1582 if (can_cluster(logBlockSize
)) {
1583 /* Make sure this result never goes negative: */
1584 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1594 * Convert logical block number to file offset.
1597 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1599 struct vnop_blktooff_args {
1606 if (ap
->a_vp
== NULL
)
1608 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1614 * Convert file offset to logical block number.
1617 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1619 struct vnop_offtoblk_args {
1622 daddr64_t *a_lblkno;
1626 if (ap
->a_vp
== NULL
)
1628 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
1634 * Map file offset to physical block number.
1636 * System file cnodes are expected to be locked (shared or exclusive).
1639 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
1641 struct vnop_blockmap_args {
1649 vfs_context_t a_context;
1653 struct vnode
*vp
= ap
->a_vp
;
1655 struct filefork
*fp
;
1656 struct hfsmount
*hfsmp
;
1657 size_t bytesContAvail
= 0;
1658 int retval
= E_NONE
;
1661 struct rl_entry
*invalid_range
;
1662 enum rl_overlaptype overlaptype
;
1666 /* Do not allow blockmap operation on a directory */
1667 if (vnode_isdir(vp
)) {
1672 * Check for underlying vnode requests and ensure that logical
1673 * to physical mapping is requested.
1675 if (ap
->a_bpn
== NULL
)
1678 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
)) {
1679 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
1680 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
1684 panic("blockmap: %s cnode lock already held!\n",
1685 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
1693 if (fp
->ff_unallocblocks
) {
1694 if (hfs_start_transaction(hfsmp
) != 0) {
1700 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
1702 } else if (overflow_extents(fp
)) {
1703 syslocks
= SFL_EXTENTS
;
1707 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
1710 * Check for any delayed allocations.
1712 if (fp
->ff_unallocblocks
) {
1714 u_int32_t loanedBlocks
;
1717 // Make sure we have a transaction. It's possible
1718 // that we came in and fp->ff_unallocblocks was zero
1719 // but during the time we blocked acquiring the extents
1720 // btree, ff_unallocblocks became non-zero and so we
1721 // will need to start a transaction.
1723 if (started_tr
== 0) {
1725 hfs_systemfile_unlock(hfsmp
, lockflags
);
1732 * Note: ExtendFileC will Release any blocks on loan and
1733 * aquire real blocks. So we ask to extend by zero bytes
1734 * since ExtendFileC will account for the virtual blocks.
1737 loanedBlocks
= fp
->ff_unallocblocks
;
1738 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
1739 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
1742 fp
->ff_unallocblocks
= loanedBlocks
;
1743 cp
->c_blocks
+= loanedBlocks
;
1744 fp
->ff_blocks
+= loanedBlocks
;
1746 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1747 hfsmp
->loanedBlocks
+= loanedBlocks
;
1748 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1752 hfs_systemfile_unlock(hfsmp
, lockflags
);
1753 cp
->c_flag
|= C_MODIFIED
;
1755 (void) hfs_update(vp
, TRUE
);
1756 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1758 hfs_end_transaction(hfsmp
);
1764 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
1765 ap
->a_bpn
, &bytesContAvail
);
1767 hfs_systemfile_unlock(hfsmp
, lockflags
);
1772 (void) hfs_update(vp
, TRUE
);
1773 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1774 hfs_end_transaction(hfsmp
);
1781 /* Adjust the mapping information for invalid file ranges: */
1782 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
1783 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
1785 if (overlaptype
!= RL_NOOVERLAP
) {
1786 switch(overlaptype
) {
1787 case RL_MATCHINGOVERLAP
:
1788 case RL_OVERLAPCONTAINSRANGE
:
1789 case RL_OVERLAPSTARTSBEFORE
:
1790 /* There's no valid block for this byte offset: */
1791 *ap
->a_bpn
= (daddr64_t
)-1;
1792 /* There's no point limiting the amount to be returned
1793 * if the invalid range that was hit extends all the way
1794 * to the EOF (i.e. there's no valid bytes between the
1795 * end of this range and the file's EOF):
1797 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1798 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1799 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1803 case RL_OVERLAPISCONTAINED
:
1804 case RL_OVERLAPENDSAFTER
:
1805 /* The range of interest hits an invalid block before the end: */
1806 if (invalid_range
->rl_start
== ap
->a_foffset
) {
1807 /* There's actually no valid information to be had starting here: */
1808 *ap
->a_bpn
= (daddr64_t
)-1;
1809 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1810 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1811 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1814 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
1821 if (bytesContAvail
> ap
->a_size
)
1822 bytesContAvail
= ap
->a_size
;
1825 *ap
->a_run
= bytesContAvail
;
1828 *(int *)ap
->a_poff
= 0;
1833 return (MacToVFSError(retval
));
1838 * prepare and issue the I/O
1839 * buf_strategy knows how to deal
1840 * with requests that require
1844 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
1846 buf_t bp
= ap
->a_bp
;
1847 vnode_t vp
= buf_vnode(bp
);
1848 struct cnode
*cp
= VTOC(vp
);
1850 return (buf_strategy(cp
->c_devvp
, ap
));
1855 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
, vfs_context_t context
)
1857 register struct cnode
*cp
= VTOC(vp
);
1858 struct filefork
*fp
= VTOF(vp
);
1859 struct proc
*p
= vfs_context_proc(context
);;
1860 kauth_cred_t cred
= vfs_context_ucred(context
);
1863 off_t actualBytesAdded
;
1865 u_int64_t old_filesize
;
1868 struct hfsmount
*hfsmp
;
1871 blksize
= VTOVCB(vp
)->blockSize
;
1872 fileblocks
= fp
->ff_blocks
;
1873 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
1874 old_filesize
= fp
->ff_size
;
1876 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
1877 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1882 /* This should only happen with a corrupt filesystem */
1883 if ((off_t
)fp
->ff_size
< 0)
1886 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
1893 /* Files that are changing size are not hot file candidates. */
1894 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
1895 fp
->ff_bytesread
= 0;
1899 * We cannot just check if fp->ff_size == length (as an optimization)
1900 * since there may be extra physical blocks that also need truncation.
1903 if ((retval
= hfs_getinoquota(cp
)))
1908 * Lengthen the size of the file. We must ensure that the
1909 * last byte of the file is allocated. Since the smallest
1910 * value of ff_size is 0, length will be at least 1.
1912 if (length
> (off_t
)fp
->ff_size
) {
1914 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
1920 * If we don't have enough physical space then
1921 * we need to extend the physical size.
1923 if (length
> filebytes
) {
1925 u_long blockHint
= 0;
1927 /* All or nothing and don't round up to clumpsize. */
1928 eflags
= kEFAllMask
| kEFNoClumpMask
;
1930 if (cred
&& suser(cred
, NULL
) != 0)
1931 eflags
|= kEFReserveMask
; /* keep a reserve */
1934 * Allocate Journal and Quota files in metadata zone.
1936 if (filebytes
== 0 &&
1937 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
1938 hfs_virtualmetafile(cp
)) {
1939 eflags
|= kEFMetadataMask
;
1940 blockHint
= hfsmp
->hfs_metazone_start
;
1942 if (hfs_start_transaction(hfsmp
) != 0) {
1947 /* Protect extents b-tree and allocation bitmap */
1948 lockflags
= SFL_BITMAP
;
1949 if (overflow_extents(fp
))
1950 lockflags
|= SFL_EXTENTS
;
1951 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
1953 while ((length
> filebytes
) && (retval
== E_NONE
)) {
1954 bytesToAdd
= length
- filebytes
;
1955 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
1960 &actualBytesAdded
));
1962 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
1963 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
1964 if (length
> filebytes
)
1970 hfs_systemfile_unlock(hfsmp
, lockflags
);
1973 (void) hfs_update(vp
, TRUE
);
1974 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1977 hfs_end_transaction(hfsmp
);
1982 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
1983 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1986 if (!(flags
& IO_NOZEROFILL
)) {
1987 if (UBCINFOEXISTS(vp
) && retval
== E_NONE
) {
1988 struct rl_entry
*invalid_range
;
1991 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
1992 if (length
< zero_limit
) zero_limit
= length
;
1994 if (length
> (off_t
)fp
->ff_size
) {
1997 /* Extending the file: time to fill out the current last page w. zeroes? */
1998 if ((fp
->ff_size
& PAGE_MASK_64
) &&
1999 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
2000 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
2002 /* There's some valid data at the start of the (current) last page
2003 of the file, so zero out the remainder of that page to ensure the
2004 entire page contains valid data. Since there is no invalid range
2005 possible past the (current) eof, there's no need to remove anything
2006 from the invalid range list before calling cluster_write(): */
2008 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
2009 fp
->ff_size
, (off_t
)0,
2010 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
2011 hfs_lock(cp
, HFS_FORCE_LOCK
);
2012 if (retval
) goto Err_Exit
;
2014 /* Merely invalidate the remaining area, if necessary: */
2015 if (length
> zero_limit
) {
2017 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
2018 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2021 /* The page containing the (current) eof is invalid: just add the
2022 remainder of the page to the invalid list, along with the area
2023 being newly allocated:
2026 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
2027 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
2031 panic("hfs_truncate: invoked on non-UBC object?!");
2034 cp
->c_touch_modtime
= TRUE
;
2035 fp
->ff_size
= length
;
2037 /* Nested transactions will do their own ubc_setsize. */
2040 * ubc_setsize can cause a pagein here
2041 * so we need to drop cnode lock.
2044 ubc_setsize(vp
, length
);
2045 hfs_lock(cp
, HFS_FORCE_LOCK
);
2048 } else { /* Shorten the size of the file */
2050 if ((off_t
)fp
->ff_size
> length
) {
2052 * Any buffers that are past the truncation point need to be
2053 * invalidated (to maintain buffer cache consistency).
2056 /* Nested transactions will do their own ubc_setsize. */
2059 * ubc_setsize can cause a pageout here
2060 * so we need to drop cnode lock.
2063 ubc_setsize(vp
, length
);
2064 hfs_lock(cp
, HFS_FORCE_LOCK
);
2067 /* Any space previously marked as invalid is now irrelevant: */
2068 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2072 * Account for any unmapped blocks. Note that the new
2073 * file length can still end up with unmapped blocks.
2075 if (fp
->ff_unallocblocks
> 0) {
2076 u_int32_t finalblks
;
2077 u_int32_t loanedBlocks
;
2079 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2081 loanedBlocks
= fp
->ff_unallocblocks
;
2082 cp
->c_blocks
-= loanedBlocks
;
2083 fp
->ff_blocks
-= loanedBlocks
;
2084 fp
->ff_unallocblocks
= 0;
2086 hfsmp
->loanedBlocks
-= loanedBlocks
;
2088 finalblks
= (length
+ blksize
- 1) / blksize
;
2089 if (finalblks
> fp
->ff_blocks
) {
2090 /* calculate required unmapped blocks */
2091 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2092 hfsmp
->loanedBlocks
+= loanedBlocks
;
2094 fp
->ff_unallocblocks
= loanedBlocks
;
2095 cp
->c_blocks
+= loanedBlocks
;
2096 fp
->ff_blocks
+= loanedBlocks
;
2098 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2102 * For a TBE process the deallocation of the file blocks is
2103 * delayed until the file is closed. And hfs_close calls
2104 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2105 * isn't set, we make sure this isn't a TBE process.
2107 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2109 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2111 if (hfs_start_transaction(hfsmp
) != 0) {
2116 if (fp
->ff_unallocblocks
== 0) {
2117 /* Protect extents b-tree and allocation bitmap */
2118 lockflags
= SFL_BITMAP
;
2119 if (overflow_extents(fp
))
2120 lockflags
|= SFL_EXTENTS
;
2121 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2123 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2124 (FCB
*)fp
, length
, false));
2126 hfs_systemfile_unlock(hfsmp
, lockflags
);
2130 fp
->ff_size
= length
;
2132 (void) hfs_update(vp
, TRUE
);
2133 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2136 hfs_end_transaction(hfsmp
);
2138 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2142 /* These are bytesreleased */
2143 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2146 /* Only set update flag if the logical length changes */
2147 if (old_filesize
!= length
)
2148 cp
->c_touch_modtime
= TRUE
;
2149 fp
->ff_size
= length
;
2151 cp
->c_touch_chgtime
= TRUE
;
2152 retval
= hfs_update(vp
, MNT_WAIT
);
2154 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2155 -1, -1, -1, retval
, 0);
2160 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2161 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2169 * Truncate a cnode to at most length size, freeing (or adding) the
2174 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2175 vfs_context_t context
)
2177 struct filefork
*fp
= VTOF(vp
);
2180 int blksize
, error
= 0;
2181 struct cnode
*cp
= VTOC(vp
);
2183 if (vnode_isdir(vp
))
2184 return (EISDIR
); /* cannot truncate an HFS directory! */
2186 blksize
= VTOVCB(vp
)->blockSize
;
2187 fileblocks
= fp
->ff_blocks
;
2188 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2190 // have to loop truncating or growing files that are
2191 // really big because otherwise transactions can get
2192 // enormous and consume too many kernel resources.
2194 if (length
< filebytes
) {
2195 while (filebytes
> length
) {
2196 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2197 filebytes
-= HFS_BIGFILE_SIZE
;
2201 cp
->c_flag
|= C_FORCEUPDATE
;
2202 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2206 } else if (length
> filebytes
) {
2207 while (filebytes
< length
) {
2208 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
&& overflow_extents(fp
)) {
2209 filebytes
+= HFS_BIGFILE_SIZE
;
2213 cp
->c_flag
|= C_FORCEUPDATE
;
2214 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2218 } else /* Same logical size */ {
2220 error
= do_hfs_truncate(vp
, length
, flags
, skipsetsize
, context
);
2222 /* Files that are changing size are not hot file candidates. */
2223 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2224 fp
->ff_bytesread
= 0;
2233 * Preallocate file storage space.
2236 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2240 off_t *a_bytesallocated;
2242 vfs_context_t a_context;
2245 struct vnode
*vp
= ap
->a_vp
;
2247 struct filefork
*fp
;
2249 off_t length
= ap
->a_length
;
2251 off_t moreBytesRequested
;
2252 off_t actualBytesAdded
;
2255 int retval
, retval2
;
2257 UInt32 extendFlags
; /* For call to ExtendFileC */
2258 struct hfsmount
*hfsmp
;
2259 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2262 *(ap
->a_bytesallocated
) = 0;
2264 if (!vnode_isreg(vp
))
2266 if (length
< (off_t
)0)
2269 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
)))
2276 fileblocks
= fp
->ff_blocks
;
2277 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2279 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2284 /* Fill in the flags word for the call to Extend the file */
2286 extendFlags
= kEFNoClumpMask
;
2287 if (ap
->a_flags
& ALLOCATECONTIG
)
2288 extendFlags
|= kEFContigMask
;
2289 if (ap
->a_flags
& ALLOCATEALL
)
2290 extendFlags
|= kEFAllMask
;
2291 if (cred
&& suser(cred
, NULL
) != 0)
2292 extendFlags
|= kEFReserveMask
;
2296 startingPEOF
= filebytes
;
2298 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2299 length
+= filebytes
;
2300 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2301 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2303 /* If no changes are necesary, then we're done */
2304 if (filebytes
== length
)
2308 * Lengthen the size of the file. We must ensure that the
2309 * last byte of the file is allocated. Since the smallest
2310 * value of filebytes is 0, length will be at least 1.
2312 if (length
> filebytes
) {
2313 moreBytesRequested
= length
- filebytes
;
2316 retval
= hfs_chkdq(cp
,
2317 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2324 * Metadata zone checks.
2326 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2328 * Allocate Journal and Quota files in metadata zone.
2330 if (hfs_virtualmetafile(cp
)) {
2331 extendFlags
|= kEFMetadataMask
;
2332 blockHint
= hfsmp
->hfs_metazone_start
;
2333 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2334 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2336 * Move blockHint outside metadata zone.
2338 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2342 if (hfs_start_transaction(hfsmp
) != 0) {
2347 /* Protect extents b-tree and allocation bitmap */
2348 lockflags
= SFL_BITMAP
;
2349 if (overflow_extents(fp
))
2350 lockflags
|= SFL_EXTENTS
;
2351 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2353 retval
= MacToVFSError(ExtendFileC(vcb
,
2358 &actualBytesAdded
));
2360 *(ap
->a_bytesallocated
) = actualBytesAdded
;
2361 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2363 hfs_systemfile_unlock(hfsmp
, lockflags
);
2366 (void) hfs_update(vp
, TRUE
);
2367 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2370 hfs_end_transaction(hfsmp
);
2373 * if we get an error and no changes were made then exit
2374 * otherwise we must do the hfs_update to reflect the changes
2376 if (retval
&& (startingPEOF
== filebytes
))
2380 * Adjust actualBytesAdded to be allocation block aligned, not
2381 * clump size aligned.
2382 * NOTE: So what we are reporting does not affect reality
2383 * until the file is closed, when we truncate the file to allocation
2386 if ((actualBytesAdded
!= 0) && (moreBytesRequested
< actualBytesAdded
))
2387 *(ap
->a_bytesallocated
) =
2388 roundup(moreBytesRequested
, (off_t
)vcb
->blockSize
);
2390 } else { /* Shorten the size of the file */
2392 if (fp
->ff_size
> length
) {
2394 * Any buffers that are past the truncation point need to be
2395 * invalidated (to maintain buffer cache consistency).
2399 if (hfs_start_transaction(hfsmp
) != 0) {
2404 /* Protect extents b-tree and allocation bitmap */
2405 lockflags
= SFL_BITMAP
;
2406 if (overflow_extents(fp
))
2407 lockflags
|= SFL_EXTENTS
;
2408 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2410 retval
= MacToVFSError(TruncateFileC(vcb
, (FCB
*)fp
, length
, false));
2412 hfs_systemfile_unlock(hfsmp
, lockflags
);
2414 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2417 (void) hfs_update(vp
, TRUE
);
2418 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2421 hfs_end_transaction(hfsmp
);
2425 * if we get an error and no changes were made then exit
2426 * otherwise we must do the hfs_update to reflect the changes
2428 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2430 /* These are bytesreleased */
2431 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2434 if (fp
->ff_size
> filebytes
) {
2435 fp
->ff_size
= filebytes
;
2438 ubc_setsize(vp
, fp
->ff_size
);
2439 hfs_lock(cp
, HFS_FORCE_LOCK
);
2444 cp
->c_touch_chgtime
= TRUE
;
2445 cp
->c_touch_modtime
= TRUE
;
2446 retval2
= hfs_update(vp
, MNT_WAIT
);
2457 * Pagein for HFS filesystem
2460 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2462 struct vnop_pagein_args {
2465 vm_offset_t a_pl_offset,
2469 vfs_context_t a_context;
2473 vnode_t vp
= ap
->a_vp
;
2476 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2477 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2479 * Keep track of blocks read.
2481 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2483 struct filefork
*fp
;
2485 int took_cnode_lock
= 0;
2490 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2491 bytesread
= fp
->ff_size
;
2493 bytesread
= ap
->a_size
;
2495 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2496 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
2497 hfs_lock(cp
, HFS_FORCE_LOCK
);
2498 took_cnode_lock
= 1;
2501 * If this file hasn't been seen since the start of
2502 * the current sampling period then start over.
2504 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2507 fp
->ff_bytesread
= bytesread
;
2509 cp
->c_atime
= tv
.tv_sec
;
2511 fp
->ff_bytesread
+= bytesread
;
2513 cp
->c_touch_acctime
= TRUE
;
2514 if (took_cnode_lock
)
2521 * Pageout for HFS filesystem.
2524 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2526 struct vnop_pageout_args {
2529 vm_offset_t a_pl_offset,
2533 vfs_context_t a_context;
2537 vnode_t vp
= ap
->a_vp
;
2539 struct filefork
*fp
;
2545 if (cp
->c_lockowner
== current_thread()) {
2546 panic("pageout: %s cnode lock already held!\n",
2547 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
2549 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2550 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2551 ubc_upl_abort_range(ap
->a_pl
,
2554 UPL_ABORT_FREE_ON_EMPTY
);
2560 filesize
= fp
->ff_size
;
2561 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
2563 if (end_of_range
>= filesize
) {
2564 end_of_range
= (off_t
)(filesize
- 1);
2566 if (ap
->a_f_offset
< filesize
) {
2567 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
2568 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
2572 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2573 ap
->a_size
, filesize
, ap
->a_flags
);
2576 * If data was written, and setuid or setgid bits are set and
2577 * this process is not the superuser then clear the setuid and
2578 * setgid bits as a precaution against tampering.
2580 if ((retval
== 0) &&
2581 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
2582 (vfs_context_suser(ap
->a_context
) != 0)) {
2583 hfs_lock(cp
, HFS_FORCE_LOCK
);
2584 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2585 cp
->c_touch_chgtime
= TRUE
;
2592 * Intercept B-Tree node writes to unswap them if necessary.
2595 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
2598 register struct buf
*bp
= ap
->a_bp
;
2599 register struct vnode
*vp
= buf_vnode(bp
);
2600 BlockDescriptor block
;
2602 /* Trap B-Tree writes */
2603 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
2604 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
2605 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
) ||
2606 (vp
== VTOHFS(vp
)->hfc_filevp
)) {
2609 * Swap and validate the node if it is in native byte order.
2610 * This is always be true on big endian, so we always validate
2611 * before writing here. On little endian, the node typically has
2612 * been swapped and validatated when it was written to the journal,
2613 * so we won't do anything here.
2615 if (((UInt16
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
2616 /* Prepare the block pointer */
2617 block
.blockHeader
= bp
;
2618 block
.buffer
= (char *)buf_dataptr(bp
);
2619 block
.blockNum
= buf_lblkno(bp
);
2620 /* not found in cache ==> came from disk */
2621 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
2622 block
.blockSize
= buf_count(bp
);
2624 /* Endian un-swap B-Tree node */
2625 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
);
2627 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2631 /* This buffer shouldn't be locked anymore but if it is clear it */
2632 if ((buf_flags(bp
) & B_LOCKED
)) {
2634 if (VTOHFS(vp
)->jnl
) {
2635 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp
);
2637 buf_clearflags(bp
, B_LOCKED
);
2639 retval
= vn_bwrite (ap
);
2645 * Relocate a file to a new location on disk
2646 * cnode must be locked on entry
2648 * Relocation occurs by cloning the file's data from its
2649 * current set of blocks to a new set of blocks. During
2650 * the relocation all of the blocks (old and new) are
2651 * owned by the file.
2658 * ----------------- -----------------
2659 * |///////////////| | | STEP 1 (aquire new blocks)
2660 * ----------------- -----------------
2663 * ----------------- -----------------
2664 * |///////////////| |///////////////| STEP 2 (clone data)
2665 * ----------------- -----------------
2669 * |///////////////| STEP 3 (head truncate blocks)
2673 * During steps 2 and 3 page-outs to file offsets less
2674 * than or equal to N are suspended.
2676 * During step 3 page-ins to the file get supended.
2680 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
2684 struct filefork
*fp
;
2685 struct hfsmount
*hfsmp
;
2690 u_int32_t nextallocsave
;
2691 daddr64_t sector_a
, sector_b
;
2692 int disabled_caching
= 0;
2697 int took_trunc_lock
= 0;
2699 enum vtype vnodetype
;
2701 vnodetype
= vnode_vtype(vp
);
2702 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
2707 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
2713 if (fp
->ff_unallocblocks
)
2715 blksize
= hfsmp
->blockSize
;
2717 blockHint
= hfsmp
->nextAllocation
;
2719 if ((fp
->ff_size
> (u_int64_t
)0x7fffffff) ||
2720 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
2725 // We do not believe that this call to hfs_fsync() is
2726 // necessary and it causes a journal transaction
2727 // deadlock so we are removing it.
2729 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2730 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2735 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
2737 hfs_lock_truncate(cp
, TRUE
);
2738 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
2739 hfs_unlock_truncate(cp
);
2742 took_trunc_lock
= 1;
2744 headblks
= fp
->ff_blocks
;
2745 datablks
= howmany(fp
->ff_size
, blksize
);
2746 growsize
= datablks
* blksize
;
2747 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
2748 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
2749 blockHint
<= hfsmp
->hfs_metazone_end
)
2750 eflags
|= kEFMetadataMask
;
2752 if (hfs_start_transaction(hfsmp
) != 0) {
2753 if (took_trunc_lock
)
2754 hfs_unlock_truncate(cp
);
2759 * Protect the extents b-tree and the allocation bitmap
2760 * during MapFileBlockC and ExtendFileC operations.
2762 lockflags
= SFL_BITMAP
;
2763 if (overflow_extents(fp
))
2764 lockflags
|= SFL_EXTENTS
;
2765 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2767 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
2769 retval
= MacToVFSError(retval
);
2774 * STEP 1 - aquire new allocation blocks.
2776 if (!vnode_isnocache(vp
)) {
2777 vnode_setnocache(vp
);
2778 disabled_caching
= 1;
2781 nextallocsave
= hfsmp
->nextAllocation
;
2782 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
2783 if (eflags
& kEFMetadataMask
) {
2784 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2785 hfsmp
->nextAllocation
= nextallocsave
;
2786 hfsmp
->vcbFlags
|= 0xFF00;
2787 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2790 retval
= MacToVFSError(retval
);
2792 cp
->c_flag
|= C_MODIFIED
;
2793 if (newbytes
< growsize
) {
2796 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
2797 printf("hfs_relocate: allocation failed");
2802 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
2804 retval
= MacToVFSError(retval
);
2805 } else if ((sector_a
+ 1) == sector_b
) {
2808 } else if ((eflags
& kEFMetadataMask
) &&
2809 ((((u_int64_t
)sector_b
* hfsmp
->hfs_phys_block_size
) / blksize
) >
2810 hfsmp
->hfs_metazone_end
)) {
2811 printf("hfs_relocate: didn't move into metadata zone\n");
2816 /* Done with system locks and journal for now. */
2817 hfs_systemfile_unlock(hfsmp
, lockflags
);
2819 hfs_end_transaction(hfsmp
);
2824 * Check to see if failure is due to excessive fragmentation.
2826 if ((retval
== ENOSPC
) &&
2827 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
2828 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
2833 * STEP 2 - clone file data into the new allocation blocks.
2836 if (vnodetype
== VLNK
)
2837 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
2838 else if (vnode_issystem(vp
))
2839 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
2841 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
2843 /* Start transaction for step 3 or for a restore. */
2844 if (hfs_start_transaction(hfsmp
) != 0) {
2853 * STEP 3 - switch to cloned data and remove old blocks.
2855 lockflags
= SFL_BITMAP
;
2856 if (overflow_extents(fp
))
2857 lockflags
|= SFL_EXTENTS
;
2858 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2860 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
2862 hfs_systemfile_unlock(hfsmp
, lockflags
);
2867 if (took_trunc_lock
)
2868 hfs_unlock_truncate(cp
);
2871 hfs_systemfile_unlock(hfsmp
, lockflags
);
2875 /* Push cnode's new extent data to disk. */
2877 (void) hfs_update(vp
, MNT_WAIT
);
2881 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
2882 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
2884 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
2887 if (disabled_caching
) {
2888 vnode_clearnocache(vp
);
2891 hfs_end_transaction(hfsmp
);
2896 if (fp
->ff_blocks
== headblks
)
2899 * Give back any newly allocated space.
2901 if (lockflags
== 0) {
2902 lockflags
= SFL_BITMAP
;
2903 if (overflow_extents(fp
))
2904 lockflags
|= SFL_EXTENTS
;
2905 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2908 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
2910 hfs_systemfile_unlock(hfsmp
, lockflags
);
2913 if (took_trunc_lock
)
2914 hfs_unlock_truncate(cp
);
2924 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, struct proc
*p
)
2926 struct buf
*head_bp
= NULL
;
2927 struct buf
*tail_bp
= NULL
;
2931 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
2935 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
2936 if (tail_bp
== NULL
) {
2940 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
2941 error
= (int)buf_bwrite(tail_bp
);
2944 buf_markinvalid(head_bp
);
2945 buf_brelse(head_bp
);
2947 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
2953 * Clone a file's data within the file.
2957 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
2969 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
2970 writebase
= blkstart
* blksize
;
2971 copysize
= blkcnt
* blksize
;
2972 iosize
= bufsize
= MIN(copysize
, 128 * 1024);
2975 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
2978 hfs_unlock(VTOC(vp
));
2980 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
2982 while (offset
< copysize
) {
2983 iosize
= MIN(copysize
- offset
, iosize
);
2985 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
2986 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2988 error
= cluster_read(vp
, auio
, copysize
, 0);
2990 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
2993 if (uio_resid(auio
) != 0) {
2994 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
2999 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
3000 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
3002 error
= cluster_write(vp
, auio
, filesize
+ offset
,
3003 filesize
+ offset
+ iosize
,
3004 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
3006 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
3009 if (uio_resid(auio
) != 0) {
3010 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3019 * No need to call ubc_sync_range or hfs_invalbuf
3020 * since the file was copied using IO_NOCACHE.
3023 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3025 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
3030 * Clone a system (metadata) file.
3034 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
3035 kauth_cred_t cred
, struct proc
*p
)
3041 struct buf
*bp
= NULL
;
3044 daddr64_t start_blk
;
3051 iosize
= GetLogicalBlockSize(vp
);
3052 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3053 breadcnt
= bufsize
/ iosize
;
3055 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3058 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3059 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3062 while (blkno
< last_blk
) {
3064 * Read up to a megabyte
3067 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3068 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3070 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3073 if (buf_count(bp
) != iosize
) {
3074 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3077 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3079 buf_markinvalid(bp
);
3087 * Write up to a megabyte
3090 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3091 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3093 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3097 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3098 error
= (int)buf_bwrite(bp
);
3110 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3112 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);