2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* @(#)hfs_readwrite.c 1.0
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
42 #include <sys/vfs_context.h>
44 #include <sys/sysctl.h>
46 #include <miscfs/specfs/specdev.h>
49 #include <vm/vm_pageout.h>
50 #include <vm/vm_kern.h>
52 #include <sys/kdebug.h>
55 #include "hfs_endian.h"
56 #include "hfs_fsctl.h"
57 #include "hfs_quota.h"
58 #include "hfscommon/headers/FileMgrInternal.h"
59 #include "hfscommon/headers/BTreesInternal.h"
60 #include "hfs_cnode.h"
63 extern int overflow_extents(struct filefork
*fp
);
65 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
68 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
71 extern u_int32_t
GetLogicalBlockSize(struct vnode
*vp
);
73 extern int hfs_setextendedsecurity(struct hfsmount
*, int);
76 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
77 static int hfs_clonefile(struct vnode
*, int, int, int);
78 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
81 int flush_cache_on_write
= 0;
82 SYSCTL_INT (_kern
, OID_AUTO
, flush_cache_on_write
, CTLFLAG_RW
, &flush_cache_on_write
, 0, "always flush the drive cache on writes to uncached files");
85 /*****************************************************************************
87 * I/O Operations on vnodes
89 *****************************************************************************/
90 int hfs_vnop_read(struct vnop_read_args
*);
91 int hfs_vnop_write(struct vnop_write_args
*);
92 int hfs_vnop_ioctl(struct vnop_ioctl_args
*);
93 int hfs_vnop_select(struct vnop_select_args
*);
94 int hfs_vnop_blktooff(struct vnop_blktooff_args
*);
95 int hfs_vnop_offtoblk(struct vnop_offtoblk_args
*);
96 int hfs_vnop_blockmap(struct vnop_blockmap_args
*);
97 int hfs_vnop_strategy(struct vnop_strategy_args
*);
98 int hfs_vnop_allocate(struct vnop_allocate_args
*);
99 int hfs_vnop_pagein(struct vnop_pagein_args
*);
100 int hfs_vnop_pageout(struct vnop_pageout_args
*);
101 int hfs_vnop_bwrite(struct vnop_bwrite_args
*);
105 * Read data from a file.
108 hfs_vnop_read(struct vnop_read_args
*ap
)
110 uio_t uio
= ap
->a_uio
;
111 struct vnode
*vp
= ap
->a_vp
;
114 struct hfsmount
*hfsmp
;
117 off_t start_resid
= uio_resid(uio
);
118 off_t offset
= uio_offset(uio
);
122 /* Preflight checks */
123 if (!vnode_isreg(vp
)) {
124 /* can only read regular files */
130 if (start_resid
== 0)
131 return (0); /* Nothing left to do */
133 return (EINVAL
); /* cant read from a negative offset */
139 /* Protect against a size change. */
140 hfs_lock_truncate(cp
, 0);
142 filesize
= fp
->ff_size
;
143 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
144 if (offset
> filesize
) {
145 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
146 (offset
> (off_t
)MAXHFSFILESIZE
)) {
152 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
153 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
155 retval
= cluster_read(vp
, uio
, filesize
, 0);
157 cp
->c_touch_acctime
= TRUE
;
159 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
160 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
163 * Keep track blocks read
165 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
166 int took_cnode_lock
= 0;
169 bytesread
= start_resid
- uio_resid(uio
);
171 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
172 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
173 hfs_lock(cp
, HFS_FORCE_LOCK
);
177 * If this file hasn't been seen since the start of
178 * the current sampling period then start over.
180 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
183 fp
->ff_bytesread
= bytesread
;
185 cp
->c_atime
= tv
.tv_sec
;
187 fp
->ff_bytesread
+= bytesread
;
193 hfs_unlock_truncate(cp
);
198 * Write data to a file.
201 hfs_vnop_write(struct vnop_write_args
*ap
)
203 uio_t uio
= ap
->a_uio
;
204 struct vnode
*vp
= ap
->a_vp
;
207 struct hfsmount
*hfsmp
;
208 kauth_cred_t cred
= NULL
;
212 off_t actualBytesAdded
;
217 int ioflag
= ap
->a_ioflag
;
220 int cnode_locked
= 0;
222 // LP64todo - fix this! uio_resid may be 64-bit value
223 resid
= uio_resid(uio
);
224 offset
= uio_offset(uio
);
230 if (!vnode_isreg(vp
))
231 return (EPERM
); /* Can only write regular files */
233 /* Protect against a size change. */
234 hfs_lock_truncate(VTOC(vp
), TRUE
);
236 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
237 hfs_unlock_truncate(VTOC(vp
));
244 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
246 if (ioflag
& IO_APPEND
) {
247 uio_setoffset(uio
, fp
->ff_size
);
248 offset
= fp
->ff_size
;
250 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
255 origFileSize
= fp
->ff_size
;
256 eflags
= kEFDeferMask
; /* defer file block allocations */
258 #ifdef HFS_SPARSE_DEV
260 * When the underlying device is sparse and space
261 * is low (< 8MB), stop doing delayed allocations
262 * and begin doing synchronous I/O.
264 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
265 (hfs_freeblks(hfsmp
, 0) < 2048)) {
266 eflags
&= ~kEFDeferMask
;
269 #endif /* HFS_SPARSE_DEV */
271 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
272 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
274 /* Now test if we need to extend the file */
275 /* Doing so will adjust the filebytes for us */
277 writelimit
= offset
+ resid
;
278 if (writelimit
<= filebytes
)
281 cred
= vfs_context_ucred(ap
->a_context
);
283 bytesToAdd
= writelimit
- filebytes
;
284 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
290 if (hfs_start_transaction(hfsmp
) != 0) {
295 while (writelimit
> filebytes
) {
296 bytesToAdd
= writelimit
- filebytes
;
297 if (cred
&& suser(cred
, NULL
) != 0)
298 eflags
|= kEFReserveMask
;
300 /* Protect extents b-tree and allocation bitmap */
301 lockflags
= SFL_BITMAP
;
302 if (overflow_extents(fp
))
303 lockflags
|= SFL_EXTENTS
;
304 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
306 /* Files that are changing size are not hot file candidates. */
307 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
308 fp
->ff_bytesread
= 0;
310 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
311 0, eflags
, &actualBytesAdded
));
313 hfs_systemfile_unlock(hfsmp
, lockflags
);
315 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
317 if (retval
!= E_NONE
)
319 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
320 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
321 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
323 (void) hfs_update(vp
, TRUE
);
324 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
325 (void) hfs_end_transaction(hfsmp
);
328 if (retval
== E_NONE
) {
336 struct rl_entry
*invalid_range
;
338 if (writelimit
> fp
->ff_size
)
339 filesize
= writelimit
;
341 filesize
= fp
->ff_size
;
343 lflag
= (ioflag
& IO_SYNC
);
345 if (offset
<= fp
->ff_size
) {
346 zero_off
= offset
& ~PAGE_MASK_64
;
348 /* Check to see whether the area between the zero_offset and the start
349 of the transfer to see whether is invalid and should be zero-filled
350 as part of the transfer:
352 if (offset
> zero_off
) {
353 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
354 lflag
|= IO_HEADZEROFILL
;
357 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
359 /* The bytes between fp->ff_size and uio->uio_offset must never be
360 read without being zeroed. The current last block is filled with zeroes
361 if it holds valid data but in all cases merely do a little bookkeeping
362 to track the area from the end of the current last page to the start of
363 the area actually written. For the same reason only the bytes up to the
364 start of the page where this write will start is invalidated; any remainder
365 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
367 Note that inval_start, the start of the page after the current EOF,
368 may be past the start of the write, in which case the zeroing
369 will be handled by the cluser_write of the actual data.
371 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
372 inval_end
= offset
& ~PAGE_MASK_64
;
373 zero_off
= fp
->ff_size
;
375 if ((fp
->ff_size
& PAGE_MASK_64
) &&
376 (rl_scan(&fp
->ff_invalidranges
,
379 &invalid_range
) != RL_NOOVERLAP
)) {
380 /* The page containing the EOF is not valid, so the
381 entire page must be made inaccessible now. If the write
382 starts on a page beyond the page containing the eof
383 (inval_end > eof_page_base), add the
384 whole page to the range to be invalidated. Otherwise
385 (i.e. if the write starts on the same page), zero-fill
386 the entire page explicitly now:
388 if (inval_end
> eof_page_base
) {
389 inval_start
= eof_page_base
;
391 zero_off
= eof_page_base
;
395 if (inval_start
< inval_end
) {
397 /* There's some range of data that's going to be marked invalid */
399 if (zero_off
< inval_start
) {
400 /* The pages between inval_start and inval_end are going to be invalidated,
401 and the actual write will start on a page past inval_end. Now's the last
402 chance to zero-fill the page containing the EOF:
406 retval
= cluster_write(vp
, (uio_t
) 0,
407 fp
->ff_size
, inval_start
,
409 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
410 hfs_lock(cp
, HFS_FORCE_LOCK
);
412 if (retval
) goto ioerr_exit
;
413 offset
= uio_offset(uio
);
416 /* Mark the remaining area of the newly allocated space as invalid: */
417 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
419 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
420 zero_off
= fp
->ff_size
= inval_end
;
423 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
426 /* Check to see whether the area between the end of the write and the end of
427 the page it falls in is invalid and should be zero-filled as part of the transfer:
429 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
430 if (tail_off
> filesize
) tail_off
= filesize
;
431 if (tail_off
> writelimit
) {
432 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
433 lflag
|= IO_TAILZEROFILL
;
438 * if the write starts beyond the current EOF (possibly advanced in the
439 * zeroing of the last block, above), then we'll zero fill from the current EOF
440 * to where the write begins:
442 * NOTE: If (and ONLY if) the portion of the file about to be written is
443 * before the current EOF it might be marked as invalid now and must be
444 * made readable (removed from the invalid ranges) before cluster_write
447 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
448 if (io_start
< fp
->ff_size
) {
451 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
452 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
457 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
458 tail_off
, lflag
| IO_NOZERODIRTY
);
459 offset
= uio_offset(uio
);
460 if (offset
> fp
->ff_size
) {
461 fp
->ff_size
= offset
;
463 ubc_setsize(vp
, fp
->ff_size
); /* XXX check errors */
464 /* Files that are changing size are not hot file candidates. */
465 if (hfsmp
->hfc_stage
== HFC_RECORDING
)
466 fp
->ff_bytesread
= 0;
468 if (resid
> uio_resid(uio
)) {
469 cp
->c_touch_chgtime
= TRUE
;
470 cp
->c_touch_modtime
= TRUE
;
474 // XXXdbg - testing for vivek and paul lambert
476 if (flush_cache_on_write
&& ((ioflag
& IO_NOCACHE
) || vnode_isnocache(vp
))) {
477 VNOP_IOCTL(hfsmp
->hfs_devvp
, DKIOCSYNCHRONIZECACHE
, NULL
, FWRITE
, NULL
);
480 HFS_KNOTE(vp
, NOTE_WRITE
);
484 * If we successfully wrote any data, and we are not the superuser
485 * we clear the setuid and setgid bits as a precaution against
488 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
489 cred
= vfs_context_ucred(ap
->a_context
);
490 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
492 hfs_lock(cp
, HFS_FORCE_LOCK
);
495 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
499 if (ioflag
& IO_UNIT
) {
501 hfs_lock(cp
, HFS_FORCE_LOCK
);
504 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
506 // LP64todo - fix this! resid needs to by user_ssize_t
507 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
508 uio_setresid(uio
, resid
);
509 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
511 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
513 hfs_lock(cp
, HFS_FORCE_LOCK
);
516 retval
= hfs_update(vp
, TRUE
);
518 /* Updating vcbWrCnt doesn't need to be atomic. */
521 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
522 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
526 hfs_unlock_truncate(cp
);
530 /* support for the "bulk-access" fcntl */
532 #define CACHE_ELEMS 64
533 #define CACHE_LEVELS 16
534 #define PARENT_IDS_FLAG 0x100
536 /* from hfs_attrlist.c */
537 extern unsigned long DerivePermissionSummary(uid_t obj_uid
, gid_t obj_gid
,
538 mode_t obj_mode
, struct mount
*mp
,
539 kauth_cred_t cred
, struct proc
*p
);
541 /* from vfs/vfs_fsevents.c */
542 extern char *get_pathbuff(void);
543 extern void release_pathbuff(char *buff
);
545 struct access_cache
{
547 int cachehits
; /* these two for statistics gathering */
549 unsigned int *acache
;
554 uid_t uid
; /* IN: effective user id */
555 short flags
; /* IN: access requested (i.e. R_OK) */
556 short num_groups
; /* IN: number of groups user belongs to */
557 int num_files
; /* IN: number of files to process */
558 int *file_ids
; /* IN: array of file ids */
559 gid_t
*groups
; /* IN: array of groups */
560 short *access
; /* OUT: access info for each file (0 for 'has access') */
563 struct user_access_t
{
564 uid_t uid
; /* IN: effective user id */
565 short flags
; /* IN: access requested (i.e. R_OK) */
566 short num_groups
; /* IN: number of groups user belongs to */
567 int num_files
; /* IN: number of files to process */
568 user_addr_t file_ids
; /* IN: array of file ids */
569 user_addr_t groups
; /* IN: array of groups */
570 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
574 * Perform a binary search for the given parent_id. Return value is
575 * found/not found boolean, and indexp will be the index of the item
576 * or the index at which to insert the item if it's not found.
579 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
582 int index
, matches
= 0;
584 if (cache
->numcached
== 0) {
586 return 0; // table is empty, so insert at index=0 and report no match
589 if (cache
->numcached
> CACHE_ELEMS
) {
590 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
591 cache->numcached, CACHE_ELEMS);*/
592 cache
->numcached
= CACHE_ELEMS
;
596 hi
= cache
->numcached
- 1;
599 /* perform binary search for parent_id */
601 unsigned int mid
= (hi
- lo
)/2 + lo
;
602 unsigned int this_id
= cache
->acache
[mid
];
604 if (parent_id
== this_id
) {
609 if (parent_id
< this_id
) {
614 if (parent_id
> this_id
) {
620 /* check if lo and hi converged on the match */
621 if (parent_id
== cache
->acache
[hi
]) {
625 /* if no existing entry found, find index for new one */
627 index
= (parent_id
< cache
->acache
[hi
]) ? hi
: hi
+ 1;
638 * Add a node to the access_cache at the given index (or do a lookup first
639 * to find the index if -1 is passed in). We currently do a replace rather
640 * than an insert if the cache is full.
643 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
645 int lookup_index
= -1;
647 /* need to do a lookup first if -1 passed for index */
649 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
650 if (cache
->haveaccess
[lookup_index
] != access
) {
651 /* change access info for existing entry... should never happen */
652 cache
->haveaccess
[lookup_index
] = access
;
655 /* mission accomplished */
658 index
= lookup_index
;
663 /* if the cache is full, do a replace rather than an insert */
664 if (cache
->numcached
>= CACHE_ELEMS
) {
665 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
666 cache
->numcached
= CACHE_ELEMS
-1;
668 if (index
> cache
->numcached
) {
669 // printf("index %d pinned to %d\n", index, cache->numcached);
670 index
= cache
->numcached
;
672 } else if (index
>= 0 && index
< cache
->numcached
) {
673 /* only do bcopy if we're inserting */
674 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
675 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(Boolean
) );
678 cache
->acache
[index
] = nodeID
;
679 cache
->haveaccess
[index
] = access
;
692 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
694 struct cinfo
*cip
= (struct cinfo
*)arg
;
696 cip
->uid
= attrp
->ca_uid
;
697 cip
->gid
= attrp
->ca_gid
;
698 cip
->mode
= attrp
->ca_mode
;
699 cip
->parentcnid
= descp
->cd_parentcnid
;
705 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
706 * isn't incore, then go to the catalog.
709 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
710 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
, struct proc
*p
)
714 /* if this id matches the one the fsctl was called with, skip the lookup */
715 if (cnid
== skip_cp
->c_cnid
) {
716 cnattrp
->ca_uid
= skip_cp
->c_uid
;
717 cnattrp
->ca_gid
= skip_cp
->c_gid
;
718 cnattrp
->ca_mode
= skip_cp
->c_mode
;
719 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
723 /* otherwise, check the cnode hash incase the file/dir is incore */
724 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
725 cnattrp
->ca_uid
= c_info
.uid
;
726 cnattrp
->ca_gid
= c_info
.gid
;
727 cnattrp
->ca_mode
= c_info
.mode
;
728 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
732 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
734 /* lookup this cnid in the catalog */
735 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
737 hfs_systemfile_unlock(hfsmp
, lockflags
);
747 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
748 * up to CACHE_LEVELS as we progress towards the root.
751 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
752 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
)
756 HFSCatalogNodeID thisNodeID
;
757 unsigned long myPerms
;
758 struct cat_attr cnattr
;
759 int cache_index
= -1;
762 int i
= 0, ids_to_cache
= 0;
763 int parent_ids
[CACHE_LEVELS
];
765 /* root always has access */
766 if (!suser(myp_ucred
, NULL
)) {
771 while (thisNodeID
>= kRootDirID
) {
772 myResult
= 0; /* default to "no access" */
774 /* check the cache before resorting to hitting the catalog */
776 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
777 * to look any further after hitting cached dir */
779 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
781 myResult
= cache
->haveaccess
[cache_index
];
782 goto ExitThisRoutine
;
785 /* remember which parents we want to cache */
786 if (ids_to_cache
< CACHE_LEVELS
) {
787 parent_ids
[ids_to_cache
] = thisNodeID
;
791 /* do the lookup (checks the cnode hash, then the catalog) */
792 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
, theProcPtr
);
794 goto ExitThisRoutine
; /* no access */
797 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
798 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
799 myp_ucred
, theProcPtr
);
801 if ( (myPerms
& X_OK
) == 0 ) {
803 goto ExitThisRoutine
; /* no access */
806 /* up the hierarchy we go */
807 thisNodeID
= catkey
.hfsPlus
.parentID
;
810 /* if here, we have access to this node */
815 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
820 /* cache the parent directory(ies) */
821 for (i
= 0; i
< ids_to_cache
; i
++) {
822 /* small optimization: get rid of double-lookup for all these */
823 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
824 add_node(cache
, -1, parent_ids
[i
], myResult
);
829 /* end "bulk-access" support */
834 * Callback for use with freeze ioctl.
837 hfs_freezewrite_callback(struct vnode
*vp
, void *cargs
)
839 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
845 * Control filesystem operating characteristics.
848 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
853 vfs_context_t a_context;
856 struct vnode
* vp
= ap
->a_vp
;
857 struct hfsmount
*hfsmp
= VTOHFS(vp
);
858 vfs_context_t context
= ap
->a_context
;
859 kauth_cred_t cred
= vfs_context_ucred(context
);
860 proc_t p
= vfs_context_proc(context
);
861 struct vfsstatfs
*vfsp
;
864 is64bit
= proc_is64bit(p
);
866 switch (ap
->a_command
) {
868 case HFS_RESIZE_VOLUME
: {
872 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
873 if (suser(cred
, NULL
) &&
874 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
875 return (EACCES
); /* must be owner of file system */
877 if (!vnode_isvroot(vp
)) {
880 newsize
= *(u_int64_t
*)ap
->a_data
;
881 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
883 if (newsize
> cursize
) {
884 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
885 } else if (newsize
< cursize
) {
886 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
891 case HFS_CHANGE_NEXT_ALLOCATION
: {
894 if (vnode_vfsisrdonly(vp
)) {
897 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
898 if (suser(cred
, NULL
) &&
899 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
900 return (EACCES
); /* must be owner of file system */
902 if (!vnode_isvroot(vp
)) {
905 location
= *(u_int32_t
*)ap
->a_data
;
906 if (location
> hfsmp
->totalBlocks
- 1) {
909 /* Return previous value. */
910 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
911 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
912 hfsmp
->nextAllocation
= location
;
913 hfsmp
->vcbFlags
|= 0xFF00;
914 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
918 #ifdef HFS_SPARSE_DEV
919 case HFS_SETBACKINGSTOREINFO
: {
920 struct vnode
* bsfs_rootvp
;
921 struct vnode
* di_vp
;
922 struct hfs_backingstoreinfo
*bsdata
;
925 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
928 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
929 if (suser(cred
, NULL
) &&
930 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
931 return (EACCES
); /* must be owner of file system */
933 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
934 if (bsdata
== NULL
) {
937 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
940 if ((error
= vnode_getwithref(di_vp
))) {
941 file_drop(bsdata
->backingfd
);
945 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
946 (void)vnode_put(di_vp
);
947 file_drop(bsdata
->backingfd
);
952 * Obtain the backing fs root vnode and keep a reference
953 * on it. This reference will be dropped in hfs_unmount.
955 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
957 (void)vnode_put(di_vp
);
958 file_drop(bsdata
->backingfd
);
961 vnode_ref(bsfs_rootvp
);
962 vnode_put(bsfs_rootvp
);
964 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
965 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
966 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
967 hfsmp
->hfs_sparsebandblks
*= 4;
969 (void)vnode_put(di_vp
);
970 file_drop(bsdata
->backingfd
);
973 case HFS_CLRBACKINGSTOREINFO
: {
974 struct vnode
* tmpvp
;
976 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
977 if (suser(cred
, NULL
) &&
978 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
979 return (EACCES
); /* must be owner of file system */
981 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
982 hfsmp
->hfs_backingfs_rootvp
) {
984 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
985 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
986 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
987 hfsmp
->hfs_sparsebandblks
= 0;
992 #endif /* HFS_SPARSE_DEV */
1001 mp
= vnode_mount(vp
);
1002 hfsmp
= VFSTOHFS(mp
);
1007 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
1009 task
= current_task();
1010 task_working_set_disable(task
);
1012 // flush things before we get started to try and prevent
1013 // dirty data from being paged out while we're frozen.
1014 // note: can't do this after taking the lock as it will
1015 // deadlock against ourselves.
1016 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1017 hfs_global_exclusive_lock_acquire(hfsmp
);
1018 journal_flush(hfsmp
->jnl
);
1020 // don't need to iterate on all vnodes, we just need to
1021 // wait for writes to the system files and the device vnode
1022 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1023 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1024 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1025 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1026 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1027 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1028 if (hfsmp
->hfs_attribute_vp
)
1029 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1030 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1032 hfsmp
->hfs_freezing_proc
= current_proc();
1041 // if we're not the one who froze the fs then we
1043 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1047 // NOTE: if you add code here, also go check the
1048 // code that "thaws" the fs in hfs_vnop_close()
1050 hfsmp
->hfs_freezing_proc
= NULL
;
1051 hfs_global_exclusive_lock_release(hfsmp
);
1052 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1057 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1058 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1060 case HFS_BULKACCESS_FSCTL
:
1061 case HFS_BULKACCESS
: {
1063 * NOTE: on entry, the vnode is locked. Incase this vnode
1064 * happens to be in our list of file_ids, we'll note it
1065 * avoid calling hfs_chashget_nowait() on that id as that
1066 * will cause a "locking against myself" panic.
1068 Boolean check_leaf
= true;
1070 struct user_access_t
*user_access_structp
;
1071 struct user_access_t tmp_user_access_t
;
1072 struct access_cache cache
;
1076 dev_t dev
= VTOC(vp
)->c_dev
;
1079 struct ucred myucred
; /* XXX ILLEGAL */
1081 int *file_ids
= NULL
;
1082 short *access
= NULL
;
1085 cnid_t prevParent_cnid
= 0;
1086 unsigned long myPerms
;
1088 struct cat_attr cnattr
;
1090 struct cnode
*skip_cp
= VTOC(vp
);
1091 struct vfs_context my_context
;
1093 /* first, return error if not run as root */
1094 if (cred
->cr_ruid
!= 0) {
1098 /* initialize the local cache and buffers */
1099 cache
.numcached
= 0;
1100 cache
.cachehits
= 0;
1103 file_ids
= (int *) get_pathbuff();
1104 access
= (short *) get_pathbuff();
1105 cache
.acache
= (int *) get_pathbuff();
1106 cache
.haveaccess
= (Boolean
*) get_pathbuff();
1108 if (file_ids
== NULL
|| access
== NULL
|| cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1109 release_pathbuff((char *) file_ids
);
1110 release_pathbuff((char *) access
);
1111 release_pathbuff((char *) cache
.acache
);
1112 release_pathbuff((char *) cache
.haveaccess
);
1117 /* struct copyin done during dispatch... need to copy file_id array separately */
1118 if (ap
->a_data
== NULL
) {
1120 goto err_exit_bulk_access
;
1124 user_access_structp
= (struct user_access_t
*)ap
->a_data
;
1127 struct access_t
* accessp
= (struct access_t
*)ap
->a_data
;
1128 tmp_user_access_t
.uid
= accessp
->uid
;
1129 tmp_user_access_t
.flags
= accessp
->flags
;
1130 tmp_user_access_t
.num_groups
= accessp
->num_groups
;
1131 tmp_user_access_t
.num_files
= accessp
->num_files
;
1132 tmp_user_access_t
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1133 tmp_user_access_t
.groups
= CAST_USER_ADDR_T(accessp
->groups
);
1134 tmp_user_access_t
.access
= CAST_USER_ADDR_T(accessp
->access
);
1135 user_access_structp
= &tmp_user_access_t
;
1138 num_files
= user_access_structp
->num_files
;
1139 if (num_files
< 1) {
1140 goto err_exit_bulk_access
;
1142 if (num_files
> 256) {
1144 goto err_exit_bulk_access
;
1147 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1148 num_files
* sizeof(int)))) {
1149 goto err_exit_bulk_access
;
1152 /* fill in the ucred structure */
1153 flags
= user_access_structp
->flags
;
1154 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1158 /* check if we've been passed leaf node ids or parent ids */
1159 if (flags
& PARENT_IDS_FLAG
) {
1163 memset(&myucred
, 0, sizeof(myucred
));
1165 myucred
.cr_uid
= myucred
.cr_ruid
= myucred
.cr_svuid
= user_access_structp
->uid
;
1166 myucred
.cr_ngroups
= user_access_structp
->num_groups
;
1167 if (myucred
.cr_ngroups
< 1 || myucred
.cr_ngroups
> 16) {
1168 myucred
.cr_ngroups
= 0;
1169 } else if ((error
= copyin(user_access_structp
->groups
, (caddr_t
)myucred
.cr_groups
,
1170 myucred
.cr_ngroups
* sizeof(gid_t
)))) {
1171 goto err_exit_bulk_access
;
1173 myucred
.cr_rgid
= myucred
.cr_svgid
= myucred
.cr_groups
[0];
1174 myucred
.cr_gmuid
= myucred
.cr_uid
;
1176 my_context
.vc_proc
= p
;
1177 my_context
.vc_ucred
= &myucred
;
1179 /* Check access to each file_id passed in */
1180 for (i
= 0; i
< num_files
; i
++) {
1182 cnid
= (cnid_t
) file_ids
[i
];
1184 /* root always has access */
1185 if (!suser(&myucred
, NULL
)) {
1192 /* do the lookup (checks the cnode hash, then the catalog) */
1193 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
, p
);
1195 access
[i
] = (short) error
;
1199 /* before calling CheckAccess(), check the target file for read access */
1200 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1201 cnattr
.ca_mode
, hfsmp
->hfs_mp
, &myucred
, p
);
1204 /* fail fast if no access */
1205 if ((myPerms
& flags
) == 0) {
1210 /* we were passed an array of parent ids */
1211 catkey
.hfsPlus
.parentID
= cnid
;
1214 /* if the last guy had the same parent and had access, we're done */
1215 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1221 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1222 skip_cp
, p
, &myucred
, dev
);
1225 access
[i
] = 0; // have access.. no errors to report
1227 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1230 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1234 cnid
= (cnid_t
)file_ids
[i
];
1236 while (cnid
>= kRootDirID
) {
1237 /* get the vnode for this cnid */
1238 myErr
= hfs_vget(hfsmp
, cnid
, &vp
, 0);
1244 cnid
= VTOC(vp
)->c_parentcnid
;
1246 hfs_unlock(VTOC(vp
));
1247 if (vnode_vtype(vp
) == VDIR
) {
1248 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), &my_context
);
1250 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, &my_context
);
1261 /* copyout the access array */
1262 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1263 num_files
* sizeof (short)))) {
1264 goto err_exit_bulk_access
;
1267 err_exit_bulk_access
:
1269 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1271 release_pathbuff((char *) cache
.acache
);
1272 release_pathbuff((char *) cache
.haveaccess
);
1273 release_pathbuff((char *) file_ids
);
1274 release_pathbuff((char *) access
);
1277 } /* HFS_BULKACCESS */
1279 case HFS_SETACLSTATE
: {
1282 if (ap
->a_data
== NULL
) {
1286 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1287 state
= *(int *)ap
->a_data
;
1289 // super-user can enable or disable acl's on a volume.
1290 // the volume owner can only enable acl's
1291 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1294 if (state
== 0 || state
== 1)
1295 return hfs_setextendedsecurity(hfsmp
, state
);
1303 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1305 error
= hfs_fsync(vp
, MNT_NOWAIT
, TRUE
, p
);
1306 hfs_unlock(VTOC(vp
));
1313 register struct cnode
*cp
;
1316 if (!vnode_isreg(vp
))
1319 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1323 * used by regression test to determine if
1324 * all the dirty pages (via write) have been cleaned
1325 * after a call to 'fsysnc'.
1327 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1334 register struct radvisory
*ra
;
1335 struct filefork
*fp
;
1338 if (!vnode_isreg(vp
))
1341 ra
= (struct radvisory
*)(ap
->a_data
);
1344 /* Protect against a size change. */
1345 hfs_lock_truncate(VTOC(vp
), TRUE
);
1347 if (ra
->ra_offset
>= fp
->ff_size
) {
1350 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1353 hfs_unlock_truncate(VTOC(vp
));
1357 case F_READBOOTSTRAP
:
1358 case F_WRITEBOOTSTRAP
:
1360 struct vnode
*devvp
= NULL
;
1361 user_fbootstraptransfer_t
*user_bootstrapp
;
1365 daddr64_t blockNumber
;
1369 user_fbootstraptransfer_t user_bootstrap
;
1371 if (!vnode_isvroot(vp
))
1373 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1374 * to a user_fbootstraptransfer_t else we get a pointer to a
1375 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1378 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1381 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1382 user_bootstrapp
= &user_bootstrap
;
1383 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1384 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1385 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1387 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1390 devvp
= VTOHFS(vp
)->hfs_devvp
;
1391 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1392 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1393 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1394 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1396 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1398 while (uio_resid(auio
) > 0) {
1399 blockNumber
= uio_offset(auio
) / devBlockSize
;
1400 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1402 if (bp
) buf_brelse(bp
);
1407 blockOffset
= uio_offset(auio
) % devBlockSize
;
1408 xfersize
= devBlockSize
- blockOffset
;
1409 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1415 if (uio_rw(auio
) == UIO_WRITE
) {
1416 error
= VNOP_BWRITE(bp
);
1429 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1432 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1435 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1440 case HFS_GET_MOUNT_TIME
:
1441 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1444 case HFS_GET_LAST_MTIME
:
1445 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1448 case HFS_SET_BOOT_INFO
:
1449 if (!vnode_isvroot(vp
))
1451 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1452 return(EACCES
); /* must be superuser or owner of filesystem */
1453 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1454 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1455 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1456 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1459 case HFS_GET_BOOT_INFO
:
1460 if (!vnode_isvroot(vp
))
1462 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1463 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1464 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1471 /* Should never get here */
1479 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1481 struct vnop_select_args {
1486 vfs_context_t a_context;
1491 * We should really check to see if I/O is possible.
1497 * Converts a logical block number to a physical block, and optionally returns
1498 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1499 * The physical block number is based on the device block size, currently its 512.
1500 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1503 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, int *runp
)
1505 struct cnode
*cp
= VTOC(vp
);
1506 struct filefork
*fp
= VTOF(vp
);
1507 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1508 int retval
= E_NONE
;
1509 daddr_t logBlockSize
;
1510 size_t bytesContAvail
= 0;
1511 off_t blockposition
;
1516 * Check for underlying vnode requests and ensure that logical
1517 * to physical mapping is requested.
1524 logBlockSize
= GetLogicalBlockSize(vp
);
1525 blockposition
= (off_t
)bn
* (off_t
)logBlockSize
;
1527 lockExtBtree
= overflow_extents(fp
);
1530 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
1532 retval
= MacToVFSError(
1533 MapFileBlockC (HFSTOVCB(hfsmp
),
1541 hfs_systemfile_unlock(hfsmp
, lockflags
);
1543 if (retval
== E_NONE
) {
1544 /* Figure out how many read ahead blocks there are */
1546 if (can_cluster(logBlockSize
)) {
1547 /* Make sure this result never goes negative: */
1548 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1558 * Convert logical block number to file offset.
1561 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1563 struct vnop_blktooff_args {
1570 if (ap
->a_vp
== NULL
)
1572 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1578 * Convert file offset to logical block number.
1581 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1583 struct vnop_offtoblk_args {
1586 daddr64_t *a_lblkno;
1590 if (ap
->a_vp
== NULL
)
1592 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
1598 * Map file offset to physical block number.
1600 * System file cnodes are expected to be locked (shared or exclusive).
1603 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
1605 struct vnop_blockmap_args {
1613 vfs_context_t a_context;
1617 struct vnode
*vp
= ap
->a_vp
;
1619 struct filefork
*fp
;
1620 struct hfsmount
*hfsmp
;
1621 size_t bytesContAvail
= 0;
1622 int retval
= E_NONE
;
1625 struct rl_entry
*invalid_range
;
1626 enum rl_overlaptype overlaptype
;
1630 /* Do not allow blockmap operation on a directory */
1631 if (vnode_isdir(vp
)) {
1636 * Check for underlying vnode requests and ensure that logical
1637 * to physical mapping is requested.
1639 if (ap
->a_bpn
== NULL
)
1642 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
)) {
1643 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
1644 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
1648 panic("blockmap: %s cnode lock already held!\n",
1649 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
1657 if (fp
->ff_unallocblocks
) {
1658 if (hfs_start_transaction(hfsmp
) != 0) {
1664 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
1666 } else if (overflow_extents(fp
)) {
1667 syslocks
= SFL_EXTENTS
;
1671 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
1674 * Check for any delayed allocations.
1676 if (fp
->ff_unallocblocks
) {
1678 u_int32_t loanedBlocks
;
1681 // Make sure we have a transaction. It's possible
1682 // that we came in and fp->ff_unallocblocks was zero
1683 // but during the time we blocked acquiring the extents
1684 // btree, ff_unallocblocks became non-zero and so we
1685 // will need to start a transaction.
1687 if (started_tr
== 0) {
1689 hfs_systemfile_unlock(hfsmp
, lockflags
);
1696 * Note: ExtendFileC will Release any blocks on loan and
1697 * aquire real blocks. So we ask to extend by zero bytes
1698 * since ExtendFileC will account for the virtual blocks.
1701 loanedBlocks
= fp
->ff_unallocblocks
;
1702 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
1703 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
1706 fp
->ff_unallocblocks
= loanedBlocks
;
1707 cp
->c_blocks
+= loanedBlocks
;
1708 fp
->ff_blocks
+= loanedBlocks
;
1710 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1711 hfsmp
->loanedBlocks
+= loanedBlocks
;
1712 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1716 hfs_systemfile_unlock(hfsmp
, lockflags
);
1717 cp
->c_flag
|= C_MODIFIED
;
1719 (void) hfs_update(vp
, TRUE
);
1720 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1722 hfs_end_transaction(hfsmp
);
1728 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
1729 ap
->a_bpn
, &bytesContAvail
);
1731 hfs_systemfile_unlock(hfsmp
, lockflags
);
1736 (void) hfs_update(vp
, TRUE
);
1737 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1738 hfs_end_transaction(hfsmp
);
1745 /* Adjust the mapping information for invalid file ranges: */
1746 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
1747 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
1749 if (overlaptype
!= RL_NOOVERLAP
) {
1750 switch(overlaptype
) {
1751 case RL_MATCHINGOVERLAP
:
1752 case RL_OVERLAPCONTAINSRANGE
:
1753 case RL_OVERLAPSTARTSBEFORE
:
1754 /* There's no valid block for this byte offset: */
1755 *ap
->a_bpn
= (daddr64_t
)-1;
1756 /* There's no point limiting the amount to be returned
1757 * if the invalid range that was hit extends all the way
1758 * to the EOF (i.e. there's no valid bytes between the
1759 * end of this range and the file's EOF):
1761 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1762 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1763 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1767 case RL_OVERLAPISCONTAINED
:
1768 case RL_OVERLAPENDSAFTER
:
1769 /* The range of interest hits an invalid block before the end: */
1770 if (invalid_range
->rl_start
== ap
->a_foffset
) {
1771 /* There's actually no valid information to be had starting here: */
1772 *ap
->a_bpn
= (daddr64_t
)-1;
1773 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1774 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1775 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1778 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
1785 if (bytesContAvail
> ap
->a_size
)
1786 bytesContAvail
= ap
->a_size
;
1789 *ap
->a_run
= bytesContAvail
;
1792 *(int *)ap
->a_poff
= 0;
1797 return (MacToVFSError(retval
));
1802 * prepare and issue the I/O
1803 * buf_strategy knows how to deal
1804 * with requests that require
1808 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
1810 buf_t bp
= ap
->a_bp
;
1811 vnode_t vp
= buf_vnode(bp
);
1812 struct cnode
*cp
= VTOC(vp
);
1814 return (buf_strategy(cp
->c_devvp
, ap
));
1819 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
, vfs_context_t context
)
1821 register struct cnode
*cp
= VTOC(vp
);
1822 struct filefork
*fp
= VTOF(vp
);
1823 struct proc
*p
= vfs_context_proc(context
);;
1824 kauth_cred_t cred
= vfs_context_ucred(context
);
1827 off_t actualBytesAdded
;
1829 u_int64_t old_filesize
;
1832 struct hfsmount
*hfsmp
;
1835 blksize
= VTOVCB(vp
)->blockSize
;
1836 fileblocks
= fp
->ff_blocks
;
1837 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
1838 old_filesize
= fp
->ff_size
;
1840 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
1841 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1846 /* This should only happen with a corrupt filesystem */
1847 if ((off_t
)fp
->ff_size
< 0)
1850 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
1857 /* Files that are changing size are not hot file candidates. */
1858 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
1859 fp
->ff_bytesread
= 0;
1863 * We cannot just check if fp->ff_size == length (as an optimization)
1864 * since there may be extra physical blocks that also need truncation.
1867 if ((retval
= hfs_getinoquota(cp
)))
1872 * Lengthen the size of the file. We must ensure that the
1873 * last byte of the file is allocated. Since the smallest
1874 * value of ff_size is 0, length will be at least 1.
1876 if (length
> (off_t
)fp
->ff_size
) {
1878 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
1884 * If we don't have enough physical space then
1885 * we need to extend the physical size.
1887 if (length
> filebytes
) {
1889 u_long blockHint
= 0;
1891 /* All or nothing and don't round up to clumpsize. */
1892 eflags
= kEFAllMask
| kEFNoClumpMask
;
1894 if (cred
&& suser(cred
, NULL
) != 0)
1895 eflags
|= kEFReserveMask
; /* keep a reserve */
1898 * Allocate Journal and Quota files in metadata zone.
1900 if (filebytes
== 0 &&
1901 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
1902 hfs_virtualmetafile(cp
)) {
1903 eflags
|= kEFMetadataMask
;
1904 blockHint
= hfsmp
->hfs_metazone_start
;
1906 if (hfs_start_transaction(hfsmp
) != 0) {
1911 /* Protect extents b-tree and allocation bitmap */
1912 lockflags
= SFL_BITMAP
;
1913 if (overflow_extents(fp
))
1914 lockflags
|= SFL_EXTENTS
;
1915 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
1917 while ((length
> filebytes
) && (retval
== E_NONE
)) {
1918 bytesToAdd
= length
- filebytes
;
1919 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
1924 &actualBytesAdded
));
1926 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
1927 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
1928 if (length
> filebytes
)
1934 hfs_systemfile_unlock(hfsmp
, lockflags
);
1937 (void) hfs_update(vp
, TRUE
);
1938 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1941 hfs_end_transaction(hfsmp
);
1946 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
1947 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1950 if (!(flags
& IO_NOZEROFILL
)) {
1951 if (UBCINFOEXISTS(vp
) && retval
== E_NONE
) {
1952 struct rl_entry
*invalid_range
;
1955 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
1956 if (length
< zero_limit
) zero_limit
= length
;
1958 if (length
> (off_t
)fp
->ff_size
) {
1961 /* Extending the file: time to fill out the current last page w. zeroes? */
1962 if ((fp
->ff_size
& PAGE_MASK_64
) &&
1963 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
1964 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
1966 /* There's some valid data at the start of the (current) last page
1967 of the file, so zero out the remainder of that page to ensure the
1968 entire page contains valid data. Since there is no invalid range
1969 possible past the (current) eof, there's no need to remove anything
1970 from the invalid range list before calling cluster_write(): */
1972 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
1973 fp
->ff_size
, (off_t
)0,
1974 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
1975 hfs_lock(cp
, HFS_FORCE_LOCK
);
1976 if (retval
) goto Err_Exit
;
1978 /* Merely invalidate the remaining area, if necessary: */
1979 if (length
> zero_limit
) {
1981 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
1982 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1985 /* The page containing the (current) eof is invalid: just add the
1986 remainder of the page to the invalid list, along with the area
1987 being newly allocated:
1990 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
1991 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1995 panic("hfs_truncate: invoked on non-UBC object?!");
1998 cp
->c_touch_modtime
= TRUE
;
1999 fp
->ff_size
= length
;
2001 /* Nested transactions will do their own ubc_setsize. */
2004 * ubc_setsize can cause a pagein here
2005 * so we need to drop cnode lock.
2008 ubc_setsize(vp
, length
);
2009 hfs_lock(cp
, HFS_FORCE_LOCK
);
2012 } else { /* Shorten the size of the file */
2014 if ((off_t
)fp
->ff_size
> length
) {
2016 * Any buffers that are past the truncation point need to be
2017 * invalidated (to maintain buffer cache consistency).
2020 /* Nested transactions will do their own ubc_setsize. */
2023 * ubc_setsize can cause a pageout here
2024 * so we need to drop cnode lock.
2027 ubc_setsize(vp
, length
);
2028 hfs_lock(cp
, HFS_FORCE_LOCK
);
2031 /* Any space previously marked as invalid is now irrelevant: */
2032 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2036 * Account for any unmapped blocks. Note that the new
2037 * file length can still end up with unmapped blocks.
2039 if (fp
->ff_unallocblocks
> 0) {
2040 u_int32_t finalblks
;
2041 u_int32_t loanedBlocks
;
2043 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2045 loanedBlocks
= fp
->ff_unallocblocks
;
2046 cp
->c_blocks
-= loanedBlocks
;
2047 fp
->ff_blocks
-= loanedBlocks
;
2048 fp
->ff_unallocblocks
= 0;
2050 hfsmp
->loanedBlocks
-= loanedBlocks
;
2052 finalblks
= (length
+ blksize
- 1) / blksize
;
2053 if (finalblks
> fp
->ff_blocks
) {
2054 /* calculate required unmapped blocks */
2055 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2056 hfsmp
->loanedBlocks
+= loanedBlocks
;
2058 fp
->ff_unallocblocks
= loanedBlocks
;
2059 cp
->c_blocks
+= loanedBlocks
;
2060 fp
->ff_blocks
+= loanedBlocks
;
2062 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2066 * For a TBE process the deallocation of the file blocks is
2067 * delayed until the file is closed. And hfs_close calls
2068 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2069 * isn't set, we make sure this isn't a TBE process.
2071 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2073 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2075 if (hfs_start_transaction(hfsmp
) != 0) {
2080 if (fp
->ff_unallocblocks
== 0) {
2081 /* Protect extents b-tree and allocation bitmap */
2082 lockflags
= SFL_BITMAP
;
2083 if (overflow_extents(fp
))
2084 lockflags
|= SFL_EXTENTS
;
2085 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2087 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2088 (FCB
*)fp
, length
, false));
2090 hfs_systemfile_unlock(hfsmp
, lockflags
);
2094 fp
->ff_size
= length
;
2096 (void) hfs_update(vp
, TRUE
);
2097 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2100 hfs_end_transaction(hfsmp
);
2102 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2106 /* These are bytesreleased */
2107 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2110 /* Only set update flag if the logical length changes */
2111 if (old_filesize
!= length
)
2112 cp
->c_touch_modtime
= TRUE
;
2113 fp
->ff_size
= length
;
2115 cp
->c_touch_chgtime
= TRUE
;
2116 retval
= hfs_update(vp
, MNT_WAIT
);
2118 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2119 -1, -1, -1, retval
, 0);
2124 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2125 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2133 * Truncate a cnode to at most length size, freeing (or adding) the
2138 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2139 vfs_context_t context
)
2141 struct filefork
*fp
= VTOF(vp
);
2144 int blksize
, error
= 0;
2145 struct cnode
*cp
= VTOC(vp
);
2147 if (vnode_isdir(vp
))
2148 return (EISDIR
); /* cannot truncate an HFS directory! */
2150 blksize
= VTOVCB(vp
)->blockSize
;
2151 fileblocks
= fp
->ff_blocks
;
2152 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2154 // have to loop truncating or growing files that are
2155 // really big because otherwise transactions can get
2156 // enormous and consume too many kernel resources.
2158 if (length
< filebytes
) {
2159 while (filebytes
> length
) {
2160 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
2161 filebytes
-= HFS_BIGFILE_SIZE
;
2165 cp
->c_flag
|= C_FORCEUPDATE
;
2166 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2170 } else if (length
> filebytes
) {
2171 while (filebytes
< length
) {
2172 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
2173 filebytes
+= HFS_BIGFILE_SIZE
;
2177 cp
->c_flag
|= C_FORCEUPDATE
;
2178 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2182 } else /* Same logical size */ {
2184 error
= do_hfs_truncate(vp
, length
, flags
, skipsetsize
, context
);
2186 /* Files that are changing size are not hot file candidates. */
2187 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2188 fp
->ff_bytesread
= 0;
2197 * Preallocate file storage space.
2200 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2204 off_t *a_bytesallocated;
2206 vfs_context_t a_context;
2209 struct vnode
*vp
= ap
->a_vp
;
2211 struct filefork
*fp
;
2213 off_t length
= ap
->a_length
;
2215 off_t moreBytesRequested
;
2216 off_t actualBytesAdded
;
2219 int retval
, retval2
;
2221 UInt32 extendFlags
; /* For call to ExtendFileC */
2222 struct hfsmount
*hfsmp
;
2223 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2226 *(ap
->a_bytesallocated
) = 0;
2228 if (!vnode_isreg(vp
))
2230 if (length
< (off_t
)0)
2233 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
)))
2240 fileblocks
= fp
->ff_blocks
;
2241 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2243 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2248 /* Fill in the flags word for the call to Extend the file */
2250 extendFlags
= kEFNoClumpMask
;
2251 if (ap
->a_flags
& ALLOCATECONTIG
)
2252 extendFlags
|= kEFContigMask
;
2253 if (ap
->a_flags
& ALLOCATEALL
)
2254 extendFlags
|= kEFAllMask
;
2255 if (cred
&& suser(cred
, NULL
) != 0)
2256 extendFlags
|= kEFReserveMask
;
2260 startingPEOF
= filebytes
;
2262 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2263 length
+= filebytes
;
2264 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2265 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2267 /* If no changes are necesary, then we're done */
2268 if (filebytes
== length
)
2272 * Lengthen the size of the file. We must ensure that the
2273 * last byte of the file is allocated. Since the smallest
2274 * value of filebytes is 0, length will be at least 1.
2276 if (length
> filebytes
) {
2277 moreBytesRequested
= length
- filebytes
;
2280 retval
= hfs_chkdq(cp
,
2281 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2288 * Metadata zone checks.
2290 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2292 * Allocate Journal and Quota files in metadata zone.
2294 if (hfs_virtualmetafile(cp
)) {
2295 extendFlags
|= kEFMetadataMask
;
2296 blockHint
= hfsmp
->hfs_metazone_start
;
2297 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2298 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2300 * Move blockHint outside metadata zone.
2302 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2306 if (hfs_start_transaction(hfsmp
) != 0) {
2311 /* Protect extents b-tree and allocation bitmap */
2312 lockflags
= SFL_BITMAP
;
2313 if (overflow_extents(fp
))
2314 lockflags
|= SFL_EXTENTS
;
2315 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2317 retval
= MacToVFSError(ExtendFileC(vcb
,
2322 &actualBytesAdded
));
2324 *(ap
->a_bytesallocated
) = actualBytesAdded
;
2325 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2327 hfs_systemfile_unlock(hfsmp
, lockflags
);
2330 (void) hfs_update(vp
, TRUE
);
2331 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2334 hfs_end_transaction(hfsmp
);
2337 * if we get an error and no changes were made then exit
2338 * otherwise we must do the hfs_update to reflect the changes
2340 if (retval
&& (startingPEOF
== filebytes
))
2344 * Adjust actualBytesAdded to be allocation block aligned, not
2345 * clump size aligned.
2346 * NOTE: So what we are reporting does not affect reality
2347 * until the file is closed, when we truncate the file to allocation
2350 if ((actualBytesAdded
!= 0) && (moreBytesRequested
< actualBytesAdded
))
2351 *(ap
->a_bytesallocated
) =
2352 roundup(moreBytesRequested
, (off_t
)vcb
->blockSize
);
2354 } else { /* Shorten the size of the file */
2356 if (fp
->ff_size
> length
) {
2358 * Any buffers that are past the truncation point need to be
2359 * invalidated (to maintain buffer cache consistency).
2363 if (hfs_start_transaction(hfsmp
) != 0) {
2368 /* Protect extents b-tree and allocation bitmap */
2369 lockflags
= SFL_BITMAP
;
2370 if (overflow_extents(fp
))
2371 lockflags
|= SFL_EXTENTS
;
2372 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2374 retval
= MacToVFSError(TruncateFileC(vcb
, (FCB
*)fp
, length
, false));
2376 hfs_systemfile_unlock(hfsmp
, lockflags
);
2378 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2381 (void) hfs_update(vp
, TRUE
);
2382 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2385 hfs_end_transaction(hfsmp
);
2389 * if we get an error and no changes were made then exit
2390 * otherwise we must do the hfs_update to reflect the changes
2392 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2394 /* These are bytesreleased */
2395 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2398 if (fp
->ff_size
> filebytes
) {
2399 fp
->ff_size
= filebytes
;
2402 ubc_setsize(vp
, fp
->ff_size
);
2403 hfs_lock(cp
, HFS_FORCE_LOCK
);
2408 cp
->c_touch_chgtime
= TRUE
;
2409 cp
->c_touch_modtime
= TRUE
;
2410 retval2
= hfs_update(vp
, MNT_WAIT
);
2421 * Pagein for HFS filesystem
2424 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2426 struct vnop_pagein_args {
2429 vm_offset_t a_pl_offset,
2433 vfs_context_t a_context;
2437 vnode_t vp
= ap
->a_vp
;
2440 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2441 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2443 * Keep track of blocks read.
2445 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2447 struct filefork
*fp
;
2449 int took_cnode_lock
= 0;
2454 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2455 bytesread
= fp
->ff_size
;
2457 bytesread
= ap
->a_size
;
2459 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2460 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
2461 hfs_lock(cp
, HFS_FORCE_LOCK
);
2462 took_cnode_lock
= 1;
2465 * If this file hasn't been seen since the start of
2466 * the current sampling period then start over.
2468 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2471 fp
->ff_bytesread
= bytesread
;
2473 cp
->c_atime
= tv
.tv_sec
;
2475 fp
->ff_bytesread
+= bytesread
;
2477 cp
->c_touch_acctime
= TRUE
;
2478 if (took_cnode_lock
)
2485 * Pageout for HFS filesystem.
2488 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2490 struct vnop_pageout_args {
2493 vm_offset_t a_pl_offset,
2497 vfs_context_t a_context;
2501 vnode_t vp
= ap
->a_vp
;
2503 struct filefork
*fp
;
2509 if (cp
->c_lockowner
== current_thread()) {
2510 panic("pageout: %s cnode lock already held!\n",
2511 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
2513 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2514 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2515 ubc_upl_abort_range(ap
->a_pl
,
2518 UPL_ABORT_FREE_ON_EMPTY
);
2524 filesize
= fp
->ff_size
;
2525 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
2527 if (end_of_range
>= filesize
) {
2528 end_of_range
= (off_t
)(filesize
- 1);
2530 if (ap
->a_f_offset
< filesize
) {
2531 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
2532 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
2536 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2537 ap
->a_size
, filesize
, ap
->a_flags
);
2540 * If data was written, and setuid or setgid bits are set and
2541 * this process is not the superuser then clear the setuid and
2542 * setgid bits as a precaution against tampering.
2544 if ((retval
== 0) &&
2545 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
2546 (vfs_context_suser(ap
->a_context
) != 0)) {
2547 hfs_lock(cp
, HFS_FORCE_LOCK
);
2548 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2549 cp
->c_touch_chgtime
= TRUE
;
2556 * Intercept B-Tree node writes to unswap them if necessary.
2559 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
2562 register struct buf
*bp
= ap
->a_bp
;
2563 register struct vnode
*vp
= buf_vnode(bp
);
2564 BlockDescriptor block
;
2566 /* Trap B-Tree writes */
2567 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
2568 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
2569 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
)) {
2572 * Swap and validate the node if it is in native byte order.
2573 * This is always be true on big endian, so we always validate
2574 * before writing here. On little endian, the node typically has
2575 * been swapped and validatated when it was written to the journal,
2576 * so we won't do anything here.
2578 if (((UInt16
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
2579 /* Prepare the block pointer */
2580 block
.blockHeader
= bp
;
2581 block
.buffer
= (char *)buf_dataptr(bp
);
2582 block
.blockNum
= buf_lblkno(bp
);
2583 /* not found in cache ==> came from disk */
2584 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
2585 block
.blockSize
= buf_count(bp
);
2587 /* Endian un-swap B-Tree node */
2588 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
);
2590 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2594 /* This buffer shouldn't be locked anymore but if it is clear it */
2595 if ((buf_flags(bp
) & B_LOCKED
)) {
2597 if (VTOHFS(vp
)->jnl
) {
2598 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp
);
2600 buf_clearflags(bp
, B_LOCKED
);
2602 retval
= vn_bwrite (ap
);
2608 * Relocate a file to a new location on disk
2609 * cnode must be locked on entry
2611 * Relocation occurs by cloning the file's data from its
2612 * current set of blocks to a new set of blocks. During
2613 * the relocation all of the blocks (old and new) are
2614 * owned by the file.
2621 * ----------------- -----------------
2622 * |///////////////| | | STEP 1 (aquire new blocks)
2623 * ----------------- -----------------
2626 * ----------------- -----------------
2627 * |///////////////| |///////////////| STEP 2 (clone data)
2628 * ----------------- -----------------
2632 * |///////////////| STEP 3 (head truncate blocks)
2636 * During steps 2 and 3 page-outs to file offsets less
2637 * than or equal to N are suspended.
2639 * During step 3 page-ins to the file get supended.
2643 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
2647 struct filefork
*fp
;
2648 struct hfsmount
*hfsmp
;
2653 u_int32_t nextallocsave
;
2654 daddr64_t sector_a
, sector_b
;
2655 int disabled_caching
= 0;
2660 int took_trunc_lock
= 0;
2662 enum vtype vnodetype
;
2664 vnodetype
= vnode_vtype(vp
);
2665 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
2670 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
2676 if (fp
->ff_unallocblocks
)
2678 blksize
= hfsmp
->blockSize
;
2680 blockHint
= hfsmp
->nextAllocation
;
2682 if ((fp
->ff_size
> (u_int64_t
)0x7fffffff) ||
2683 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
2688 // We do not believe that this call to hfs_fsync() is
2689 // necessary and it causes a journal transaction
2690 // deadlock so we are removing it.
2692 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2693 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2698 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
2700 hfs_lock_truncate(cp
, TRUE
);
2701 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
2702 hfs_unlock_truncate(cp
);
2705 took_trunc_lock
= 1;
2707 headblks
= fp
->ff_blocks
;
2708 datablks
= howmany(fp
->ff_size
, blksize
);
2709 growsize
= datablks
* blksize
;
2710 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
2711 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
2712 blockHint
<= hfsmp
->hfs_metazone_end
)
2713 eflags
|= kEFMetadataMask
;
2715 if (hfs_start_transaction(hfsmp
) != 0) {
2716 if (took_trunc_lock
)
2717 hfs_unlock_truncate(cp
);
2722 * Protect the extents b-tree and the allocation bitmap
2723 * during MapFileBlockC and ExtendFileC operations.
2725 lockflags
= SFL_BITMAP
;
2726 if (overflow_extents(fp
))
2727 lockflags
|= SFL_EXTENTS
;
2728 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2730 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
2732 retval
= MacToVFSError(retval
);
2737 * STEP 1 - aquire new allocation blocks.
2739 if (!vnode_isnocache(vp
)) {
2740 vnode_setnocache(vp
);
2741 disabled_caching
= 1;
2744 nextallocsave
= hfsmp
->nextAllocation
;
2745 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
2746 if (eflags
& kEFMetadataMask
) {
2747 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2748 hfsmp
->nextAllocation
= nextallocsave
;
2749 hfsmp
->vcbFlags
|= 0xFF00;
2750 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2753 retval
= MacToVFSError(retval
);
2755 cp
->c_flag
|= C_MODIFIED
;
2756 if (newbytes
< growsize
) {
2759 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
2760 printf("hfs_relocate: allocation failed");
2765 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
2767 retval
= MacToVFSError(retval
);
2768 } else if ((sector_a
+ 1) == sector_b
) {
2771 } else if ((eflags
& kEFMetadataMask
) &&
2772 ((((u_int64_t
)sector_b
* hfsmp
->hfs_phys_block_size
) / blksize
) >
2773 hfsmp
->hfs_metazone_end
)) {
2774 printf("hfs_relocate: didn't move into metadata zone\n");
2779 /* Done with system locks and journal for now. */
2780 hfs_systemfile_unlock(hfsmp
, lockflags
);
2782 hfs_end_transaction(hfsmp
);
2787 * Check to see if failure is due to excessive fragmentation.
2789 if ((retval
== ENOSPC
) &&
2790 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
2791 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
2796 * STEP 2 - clone file data into the new allocation blocks.
2799 if (vnodetype
== VLNK
)
2800 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
2801 else if (vnode_issystem(vp
))
2802 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
2804 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
2806 /* Start transaction for step 3 or for a restore. */
2807 if (hfs_start_transaction(hfsmp
) != 0) {
2816 * STEP 3 - switch to cloned data and remove old blocks.
2818 lockflags
= SFL_BITMAP
;
2819 if (overflow_extents(fp
))
2820 lockflags
|= SFL_EXTENTS
;
2821 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2823 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
2825 hfs_systemfile_unlock(hfsmp
, lockflags
);
2830 if (took_trunc_lock
)
2831 hfs_unlock_truncate(cp
);
2834 hfs_systemfile_unlock(hfsmp
, lockflags
);
2838 // See comment up above about calls to hfs_fsync()
2841 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2844 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
2845 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
2847 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
2850 if (disabled_caching
) {
2851 vnode_clearnocache(vp
);
2854 hfs_end_transaction(hfsmp
);
2859 if (fp
->ff_blocks
== headblks
)
2862 * Give back any newly allocated space.
2864 if (lockflags
== 0) {
2865 lockflags
= SFL_BITMAP
;
2866 if (overflow_extents(fp
))
2867 lockflags
|= SFL_EXTENTS
;
2868 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2871 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
2873 hfs_systemfile_unlock(hfsmp
, lockflags
);
2876 if (took_trunc_lock
)
2877 hfs_unlock_truncate(cp
);
2887 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, struct proc
*p
)
2889 struct buf
*head_bp
= NULL
;
2890 struct buf
*tail_bp
= NULL
;
2894 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
2898 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
2899 if (tail_bp
== NULL
) {
2903 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
2904 error
= (int)buf_bwrite(tail_bp
);
2907 buf_markinvalid(head_bp
);
2908 buf_brelse(head_bp
);
2910 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
2916 * Clone a file's data within the file.
2920 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
2932 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
2933 writebase
= blkstart
* blksize
;
2934 copysize
= blkcnt
* blksize
;
2935 iosize
= bufsize
= MIN(copysize
, 4096 * 16);
2938 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
2941 hfs_unlock(VTOC(vp
));
2943 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
2945 while (offset
< copysize
) {
2946 iosize
= MIN(copysize
- offset
, iosize
);
2948 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
2949 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2951 error
= cluster_read(vp
, auio
, copysize
, 0);
2953 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
2956 if (uio_resid(auio
) != 0) {
2957 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
2962 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
2963 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2965 error
= cluster_write(vp
, auio
, filesize
+ offset
,
2966 filesize
+ offset
+ iosize
,
2967 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
2969 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
2972 if (uio_resid(auio
) != 0) {
2973 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2982 * No need to call ubc_sync_range or hfs_invalbuf
2983 * since the file was copied using IO_NOCACHE.
2986 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
2988 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2993 * Clone a system (metadata) file.
2997 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
2998 kauth_cred_t cred
, struct proc
*p
)
3004 struct buf
*bp
= NULL
;
3007 daddr64_t start_blk
;
3014 iosize
= GetLogicalBlockSize(vp
);
3015 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
3016 breadcnt
= bufsize
/ iosize
;
3018 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3021 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3022 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3025 while (blkno
< last_blk
) {
3027 * Read up to a megabyte
3030 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3031 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3033 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3036 if (buf_count(bp
) != iosize
) {
3037 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3040 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3042 buf_markinvalid(bp
);
3050 * Write up to a megabyte
3053 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3054 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3056 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3060 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3061 error
= (int)buf_bwrite(bp
);
3073 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3075 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);