2 * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
4 * @APPLE_LICENSE_HEADER_START@
6 * The contents of this file constitute Original Code as defined in and
7 * are subject to the Apple Public Source License Version 1.1 (the
8 * "License"). You may not use this file except in compliance with the
9 * License. Please obtain a copy of the License at
10 * http://www.apple.com/publicsource and read it before using this file.
12 * This Original Code and all software distributed under the License are
13 * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
14 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
15 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
16 * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT. Please see the
17 * License for the specific language governing rights and limitations
20 * @APPLE_LICENSE_HEADER_END@
22 /* @(#)hfs_readwrite.c 1.0
24 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
26 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
30 #include <sys/param.h>
31 #include <sys/systm.h>
32 #include <sys/resourcevar.h>
33 #include <sys/kernel.h>
34 #include <sys/fcntl.h>
35 #include <sys/filedesc.h>
39 #include <sys/kauth.h>
40 #include <sys/vnode.h>
42 #include <sys/vfs_context.h>
44 #include <miscfs/specfs/specdev.h>
47 #include <vm/vm_pageout.h>
48 #include <vm/vm_kern.h>
50 #include <sys/kdebug.h>
53 #include "hfs_endian.h"
54 #include "hfs_fsctl.h"
55 #include "hfs_quota.h"
56 #include "hfscommon/headers/FileMgrInternal.h"
57 #include "hfscommon/headers/BTreesInternal.h"
58 #include "hfs_cnode.h"
61 extern int overflow_extents(struct filefork
*fp
);
63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
66 MAXHFSFILESIZE
= 0x7FFFFFFF /* this needs to go in the mount structure */
69 extern u_int32_t
GetLogicalBlockSize(struct vnode
*vp
);
71 extern int hfs_setextendedsecurity(struct hfsmount
*, int);
74 static int hfs_clonelink(struct vnode
*, int, kauth_cred_t
, struct proc
*);
75 static int hfs_clonefile(struct vnode
*, int, int, int);
76 static int hfs_clonesysfile(struct vnode
*, int, int, int, kauth_cred_t
, struct proc
*);
79 /*****************************************************************************
81 * I/O Operations on vnodes
83 *****************************************************************************/
84 int hfs_vnop_read(struct vnop_read_args
*);
85 int hfs_vnop_write(struct vnop_write_args
*);
86 int hfs_vnop_ioctl(struct vnop_ioctl_args
*);
87 int hfs_vnop_select(struct vnop_select_args
*);
88 int hfs_vnop_blktooff(struct vnop_blktooff_args
*);
89 int hfs_vnop_offtoblk(struct vnop_offtoblk_args
*);
90 int hfs_vnop_blockmap(struct vnop_blockmap_args
*);
91 int hfs_vnop_strategy(struct vnop_strategy_args
*);
92 int hfs_vnop_allocate(struct vnop_allocate_args
*);
93 int hfs_vnop_pagein(struct vnop_pagein_args
*);
94 int hfs_vnop_pageout(struct vnop_pageout_args
*);
95 int hfs_vnop_bwrite(struct vnop_bwrite_args
*);
99 * Read data from a file.
102 hfs_vnop_read(struct vnop_read_args
*ap
)
104 uio_t uio
= ap
->a_uio
;
105 struct vnode
*vp
= ap
->a_vp
;
108 struct hfsmount
*hfsmp
;
111 off_t start_resid
= uio_resid(uio
);
112 off_t offset
= uio_offset(uio
);
116 /* Preflight checks */
117 if (!vnode_isreg(vp
)) {
118 /* can only read regular files */
124 if (start_resid
== 0)
125 return (0); /* Nothing left to do */
127 return (EINVAL
); /* cant read from a negative offset */
133 /* Protect against a size change. */
134 hfs_lock_truncate(cp
, 0);
136 filesize
= fp
->ff_size
;
137 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
138 if (offset
> filesize
) {
139 if ((hfsmp
->hfs_flags
& HFS_STANDARD
) &&
140 (offset
> (off_t
)MAXHFSFILESIZE
)) {
146 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_START
,
147 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
149 retval
= cluster_read(vp
, uio
, filesize
, 0);
151 cp
->c_touch_acctime
= TRUE
;
153 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 12)) | DBG_FUNC_END
,
154 (int)uio_offset(uio
), uio_resid(uio
), (int)filesize
, (int)filebytes
, 0);
157 * Keep track blocks read
159 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& retval
== 0) {
160 int took_cnode_lock
= 0;
163 bytesread
= start_resid
- uio_resid(uio
);
165 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
166 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
167 hfs_lock(cp
, HFS_FORCE_LOCK
);
171 * If this file hasn't been seen since the start of
172 * the current sampling period then start over.
174 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
177 fp
->ff_bytesread
= bytesread
;
179 cp
->c_atime
= tv
.tv_sec
;
181 fp
->ff_bytesread
+= bytesread
;
187 hfs_unlock_truncate(cp
);
192 * Write data to a file.
195 hfs_vnop_write(struct vnop_write_args
*ap
)
197 uio_t uio
= ap
->a_uio
;
198 struct vnode
*vp
= ap
->a_vp
;
201 struct hfsmount
*hfsmp
;
202 kauth_cred_t cred
= NULL
;
206 off_t actualBytesAdded
;
211 int ioflag
= ap
->a_ioflag
;
214 int cnode_locked
= 0;
216 // LP64todo - fix this! uio_resid may be 64-bit value
217 resid
= uio_resid(uio
);
218 offset
= uio_offset(uio
);
224 if (!vnode_isreg(vp
))
225 return (EPERM
); /* Can only write regular files */
227 /* Protect against a size change. */
228 hfs_lock_truncate(VTOC(vp
), TRUE
);
230 if ( (retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
231 hfs_unlock_truncate(VTOC(vp
));
238 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
240 if (ioflag
& IO_APPEND
) {
241 uio_setoffset(uio
, fp
->ff_size
);
242 offset
= fp
->ff_size
;
244 if ((cp
->c_flags
& APPEND
) && offset
!= fp
->ff_size
) {
249 origFileSize
= fp
->ff_size
;
250 eflags
= kEFDeferMask
; /* defer file block allocations */
252 #ifdef HFS_SPARSE_DEV
254 * When the underlying device is sparse and space
255 * is low (< 8MB), stop doing delayed allocations
256 * and begin doing synchronous I/O.
258 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
259 (hfs_freeblks(hfsmp
, 0) < 2048)) {
260 eflags
&= ~kEFDeferMask
;
263 #endif /* HFS_SPARSE_DEV */
265 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_START
,
266 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
268 /* Now test if we need to extend the file */
269 /* Doing so will adjust the filebytes for us */
271 writelimit
= offset
+ resid
;
272 if (writelimit
<= filebytes
)
275 cred
= vfs_context_ucred(ap
->a_context
);
277 bytesToAdd
= writelimit
- filebytes
;
278 retval
= hfs_chkdq(cp
, (int64_t)(roundup(bytesToAdd
, hfsmp
->blockSize
)),
284 if (hfs_start_transaction(hfsmp
) != 0) {
289 while (writelimit
> filebytes
) {
290 bytesToAdd
= writelimit
- filebytes
;
291 if (cred
&& suser(cred
, NULL
) != 0)
292 eflags
|= kEFReserveMask
;
294 /* Protect extents b-tree and allocation bitmap */
295 lockflags
= SFL_BITMAP
;
296 if (overflow_extents(fp
))
297 lockflags
|= SFL_EXTENTS
;
298 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
300 /* Files that are changing size are not hot file candidates. */
301 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
302 fp
->ff_bytesread
= 0;
304 retval
= MacToVFSError(ExtendFileC (hfsmp
, (FCB
*)fp
, bytesToAdd
,
305 0, eflags
, &actualBytesAdded
));
307 hfs_systemfile_unlock(hfsmp
, lockflags
);
309 if ((actualBytesAdded
== 0) && (retval
== E_NONE
))
311 if (retval
!= E_NONE
)
313 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
314 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_NONE
,
315 (int)offset
, uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
317 (void) hfs_update(vp
, TRUE
);
318 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
319 (void) hfs_end_transaction(hfsmp
);
322 if (retval
== E_NONE
) {
330 struct rl_entry
*invalid_range
;
332 if (writelimit
> fp
->ff_size
)
333 filesize
= writelimit
;
335 filesize
= fp
->ff_size
;
337 lflag
= (ioflag
& IO_SYNC
);
339 if (offset
<= fp
->ff_size
) {
340 zero_off
= offset
& ~PAGE_MASK_64
;
342 /* Check to see whether the area between the zero_offset and the start
343 of the transfer to see whether is invalid and should be zero-filled
344 as part of the transfer:
346 if (offset
> zero_off
) {
347 if (rl_scan(&fp
->ff_invalidranges
, zero_off
, offset
- 1, &invalid_range
) != RL_NOOVERLAP
)
348 lflag
|= IO_HEADZEROFILL
;
351 off_t eof_page_base
= fp
->ff_size
& ~PAGE_MASK_64
;
353 /* The bytes between fp->ff_size and uio->uio_offset must never be
354 read without being zeroed. The current last block is filled with zeroes
355 if it holds valid data but in all cases merely do a little bookkeeping
356 to track the area from the end of the current last page to the start of
357 the area actually written. For the same reason only the bytes up to the
358 start of the page where this write will start is invalidated; any remainder
359 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
361 Note that inval_start, the start of the page after the current EOF,
362 may be past the start of the write, in which case the zeroing
363 will be handled by the cluser_write of the actual data.
365 inval_start
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
366 inval_end
= offset
& ~PAGE_MASK_64
;
367 zero_off
= fp
->ff_size
;
369 if ((fp
->ff_size
& PAGE_MASK_64
) &&
370 (rl_scan(&fp
->ff_invalidranges
,
373 &invalid_range
) != RL_NOOVERLAP
)) {
374 /* The page containing the EOF is not valid, so the
375 entire page must be made inaccessible now. If the write
376 starts on a page beyond the page containing the eof
377 (inval_end > eof_page_base), add the
378 whole page to the range to be invalidated. Otherwise
379 (i.e. if the write starts on the same page), zero-fill
380 the entire page explicitly now:
382 if (inval_end
> eof_page_base
) {
383 inval_start
= eof_page_base
;
385 zero_off
= eof_page_base
;
389 if (inval_start
< inval_end
) {
391 /* There's some range of data that's going to be marked invalid */
393 if (zero_off
< inval_start
) {
394 /* The pages between inval_start and inval_end are going to be invalidated,
395 and the actual write will start on a page past inval_end. Now's the last
396 chance to zero-fill the page containing the EOF:
400 retval
= cluster_write(vp
, (uio_t
) 0,
401 fp
->ff_size
, inval_start
,
403 lflag
| IO_HEADZEROFILL
| IO_NOZERODIRTY
);
404 hfs_lock(cp
, HFS_FORCE_LOCK
);
406 if (retval
) goto ioerr_exit
;
407 offset
= uio_offset(uio
);
410 /* Mark the remaining area of the newly allocated space as invalid: */
411 rl_add(inval_start
, inval_end
- 1 , &fp
->ff_invalidranges
);
413 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
414 zero_off
= fp
->ff_size
= inval_end
;
417 if (offset
> zero_off
) lflag
|= IO_HEADZEROFILL
;
420 /* Check to see whether the area between the end of the write and the end of
421 the page it falls in is invalid and should be zero-filled as part of the transfer:
423 tail_off
= (writelimit
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
424 if (tail_off
> filesize
) tail_off
= filesize
;
425 if (tail_off
> writelimit
) {
426 if (rl_scan(&fp
->ff_invalidranges
, writelimit
, tail_off
- 1, &invalid_range
) != RL_NOOVERLAP
) {
427 lflag
|= IO_TAILZEROFILL
;
432 * if the write starts beyond the current EOF (possibly advanced in the
433 * zeroing of the last block, above), then we'll zero fill from the current EOF
434 * to where the write begins:
436 * NOTE: If (and ONLY if) the portion of the file about to be written is
437 * before the current EOF it might be marked as invalid now and must be
438 * made readable (removed from the invalid ranges) before cluster_write
441 io_start
= (lflag
& IO_HEADZEROFILL
) ? zero_off
: offset
;
442 if (io_start
< fp
->ff_size
) {
445 io_end
= (lflag
& IO_TAILZEROFILL
) ? tail_off
: writelimit
;
446 rl_remove(io_start
, io_end
- 1, &fp
->ff_invalidranges
);
451 retval
= cluster_write(vp
, uio
, fp
->ff_size
, filesize
, zero_off
,
452 tail_off
, lflag
| IO_NOZERODIRTY
);
453 offset
= uio_offset(uio
);
454 if (offset
> fp
->ff_size
) {
455 fp
->ff_size
= offset
;
457 ubc_setsize(vp
, fp
->ff_size
); /* XXX check errors */
458 /* Files that are changing size are not hot file candidates. */
459 if (hfsmp
->hfc_stage
== HFC_RECORDING
)
460 fp
->ff_bytesread
= 0;
462 if (resid
> uio_resid(uio
)) {
463 cp
->c_touch_chgtime
= TRUE
;
464 cp
->c_touch_modtime
= TRUE
;
467 HFS_KNOTE(vp
, NOTE_WRITE
);
471 * If we successfully wrote any data, and we are not the superuser
472 * we clear the setuid and setgid bits as a precaution against
475 if (cp
->c_mode
& (S_ISUID
| S_ISGID
)) {
476 cred
= vfs_context_ucred(ap
->a_context
);
477 if (resid
> uio_resid(uio
) && cred
&& suser(cred
, NULL
)) {
479 hfs_lock(cp
, HFS_FORCE_LOCK
);
482 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
486 if (ioflag
& IO_UNIT
) {
488 hfs_lock(cp
, HFS_FORCE_LOCK
);
491 (void)hfs_truncate(vp
, origFileSize
, ioflag
& IO_SYNC
,
493 // LP64todo - fix this! resid needs to by user_ssize_t
494 uio_setoffset(uio
, (uio_offset(uio
) - (resid
- uio_resid(uio
))));
495 uio_setresid(uio
, resid
);
496 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)hfsmp
->blockSize
;
498 } else if ((ioflag
& IO_SYNC
) && (resid
> uio_resid(uio
))) {
500 hfs_lock(cp
, HFS_FORCE_LOCK
);
503 retval
= hfs_update(vp
, TRUE
);
505 /* Updating vcbWrCnt doesn't need to be atomic. */
508 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 0)) | DBG_FUNC_END
,
509 (int)uio_offset(uio
), uio_resid(uio
), (int)fp
->ff_size
, (int)filebytes
, 0);
513 hfs_unlock_truncate(cp
);
517 /* support for the "bulk-access" fcntl */
519 #define CACHE_ELEMS 64
520 #define CACHE_LEVELS 16
521 #define PARENT_IDS_FLAG 0x100
523 /* from hfs_attrlist.c */
524 extern unsigned long DerivePermissionSummary(uid_t obj_uid
, gid_t obj_gid
,
525 mode_t obj_mode
, struct mount
*mp
,
526 kauth_cred_t cred
, struct proc
*p
);
528 /* from vfs/vfs_fsevents.c */
529 extern char *get_pathbuff(void);
530 extern void release_pathbuff(char *buff
);
532 struct access_cache
{
534 int cachehits
; /* these two for statistics gathering */
536 unsigned int *acache
;
541 uid_t uid
; /* IN: effective user id */
542 short flags
; /* IN: access requested (i.e. R_OK) */
543 short num_groups
; /* IN: number of groups user belongs to */
544 int num_files
; /* IN: number of files to process */
545 int *file_ids
; /* IN: array of file ids */
546 gid_t
*groups
; /* IN: array of groups */
547 short *access
; /* OUT: access info for each file (0 for 'has access') */
550 struct user_access_t
{
551 uid_t uid
; /* IN: effective user id */
552 short flags
; /* IN: access requested (i.e. R_OK) */
553 short num_groups
; /* IN: number of groups user belongs to */
554 int num_files
; /* IN: number of files to process */
555 user_addr_t file_ids
; /* IN: array of file ids */
556 user_addr_t groups
; /* IN: array of groups */
557 user_addr_t access
; /* OUT: access info for each file (0 for 'has access') */
561 * Perform a binary search for the given parent_id. Return value is
562 * found/not found boolean, and indexp will be the index of the item
563 * or the index at which to insert the item if it's not found.
566 lookup_bucket(struct access_cache
*cache
, int *indexp
, cnid_t parent_id
)
569 int index
, matches
= 0;
571 if (cache
->numcached
== 0) {
573 return 0; // table is empty, so insert at index=0 and report no match
576 if (cache
->numcached
> CACHE_ELEMS
) {
577 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
578 cache->numcached, CACHE_ELEMS);*/
579 cache
->numcached
= CACHE_ELEMS
;
583 hi
= cache
->numcached
- 1;
586 /* perform binary search for parent_id */
588 unsigned int mid
= (hi
- lo
)/2 + lo
;
589 unsigned int this_id
= cache
->acache
[mid
];
591 if (parent_id
== this_id
) {
596 if (parent_id
< this_id
) {
601 if (parent_id
> this_id
) {
607 /* check if lo and hi converged on the match */
608 if (parent_id
== cache
->acache
[hi
]) {
612 /* if no existing entry found, find index for new one */
614 index
= (parent_id
< cache
->acache
[hi
]) ? hi
: hi
+ 1;
625 * Add a node to the access_cache at the given index (or do a lookup first
626 * to find the index if -1 is passed in). We currently do a replace rather
627 * than an insert if the cache is full.
630 add_node(struct access_cache
*cache
, int index
, cnid_t nodeID
, int access
)
632 int lookup_index
= -1;
634 /* need to do a lookup first if -1 passed for index */
636 if (lookup_bucket(cache
, &lookup_index
, nodeID
)) {
637 if (cache
->haveaccess
[lookup_index
] != access
) {
638 /* change access info for existing entry... should never happen */
639 cache
->haveaccess
[lookup_index
] = access
;
642 /* mission accomplished */
645 index
= lookup_index
;
650 /* if the cache is full, do a replace rather than an insert */
651 if (cache
->numcached
>= CACHE_ELEMS
) {
652 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
653 cache
->numcached
= CACHE_ELEMS
-1;
655 if (index
> cache
->numcached
) {
656 // printf("index %d pinned to %d\n", index, cache->numcached);
657 index
= cache
->numcached
;
659 } else if (index
>= 0 && index
< cache
->numcached
) {
660 /* only do bcopy if we're inserting */
661 bcopy( cache
->acache
+index
, cache
->acache
+(index
+1), (cache
->numcached
- index
)*sizeof(int) );
662 bcopy( cache
->haveaccess
+index
, cache
->haveaccess
+(index
+1), (cache
->numcached
- index
)*sizeof(Boolean
) );
665 cache
->acache
[index
] = nodeID
;
666 cache
->haveaccess
[index
] = access
;
679 snoop_callback(const struct cat_desc
*descp
, const struct cat_attr
*attrp
, void * arg
)
681 struct cinfo
*cip
= (struct cinfo
*)arg
;
683 cip
->uid
= attrp
->ca_uid
;
684 cip
->gid
= attrp
->ca_gid
;
685 cip
->mode
= attrp
->ca_mode
;
686 cip
->parentcnid
= descp
->cd_parentcnid
;
692 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
693 * isn't incore, then go to the catalog.
696 do_attr_lookup(struct hfsmount
*hfsmp
, struct access_cache
*cache
, dev_t dev
, cnid_t cnid
,
697 struct cnode
*skip_cp
, CatalogKey
*keyp
, struct cat_attr
*cnattrp
, struct proc
*p
)
701 /* if this id matches the one the fsctl was called with, skip the lookup */
702 if (cnid
== skip_cp
->c_cnid
) {
703 cnattrp
->ca_uid
= skip_cp
->c_uid
;
704 cnattrp
->ca_gid
= skip_cp
->c_gid
;
705 cnattrp
->ca_mode
= skip_cp
->c_mode
;
706 keyp
->hfsPlus
.parentID
= skip_cp
->c_parentcnid
;
710 /* otherwise, check the cnode hash incase the file/dir is incore */
711 if (hfs_chash_snoop(dev
, cnid
, snoop_callback
, &c_info
) == 0) {
712 cnattrp
->ca_uid
= c_info
.uid
;
713 cnattrp
->ca_gid
= c_info
.gid
;
714 cnattrp
->ca_mode
= c_info
.mode
;
715 keyp
->hfsPlus
.parentID
= c_info
.parentcnid
;
719 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_CATALOG
, HFS_SHARED_LOCK
);
721 /* lookup this cnid in the catalog */
722 error
= cat_getkeyplusattr(hfsmp
, cnid
, keyp
, cnattrp
);
724 hfs_systemfile_unlock(hfsmp
, lockflags
);
734 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
735 * up to CACHE_LEVELS as we progress towards the root.
738 do_access_check(struct hfsmount
*hfsmp
, int *err
, struct access_cache
*cache
, HFSCatalogNodeID nodeID
,
739 struct cnode
*skip_cp
, struct proc
*theProcPtr
, kauth_cred_t myp_ucred
, dev_t dev
)
743 HFSCatalogNodeID thisNodeID
;
744 unsigned long myPerms
;
745 struct cat_attr cnattr
;
746 int cache_index
= -1;
749 int i
= 0, ids_to_cache
= 0;
750 int parent_ids
[CACHE_LEVELS
];
752 /* root always has access */
753 if (!suser(myp_ucred
, NULL
)) {
758 while (thisNodeID
>= kRootDirID
) {
759 myResult
= 0; /* default to "no access" */
761 /* check the cache before resorting to hitting the catalog */
763 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
764 * to look any further after hitting cached dir */
766 if (lookup_bucket(cache
, &cache_index
, thisNodeID
)) {
768 myResult
= cache
->haveaccess
[cache_index
];
769 goto ExitThisRoutine
;
772 /* remember which parents we want to cache */
773 if (ids_to_cache
< CACHE_LEVELS
) {
774 parent_ids
[ids_to_cache
] = thisNodeID
;
778 /* do the lookup (checks the cnode hash, then the catalog) */
779 myErr
= do_attr_lookup(hfsmp
, cache
, dev
, thisNodeID
, skip_cp
, &catkey
, &cnattr
, theProcPtr
);
781 goto ExitThisRoutine
; /* no access */
784 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
785 cnattr
.ca_mode
, hfsmp
->hfs_mp
,
786 myp_ucred
, theProcPtr
);
788 if ( (myPerms
& X_OK
) == 0 ) {
790 goto ExitThisRoutine
; /* no access */
793 /* up the hierarchy we go */
794 thisNodeID
= catkey
.hfsPlus
.parentID
;
797 /* if here, we have access to this node */
802 //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
807 /* cache the parent directory(ies) */
808 for (i
= 0; i
< ids_to_cache
; i
++) {
809 /* small optimization: get rid of double-lookup for all these */
810 // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
811 add_node(cache
, -1, parent_ids
[i
], myResult
);
816 /* end "bulk-access" support */
821 * Callback for use with freeze ioctl.
824 hfs_freezewrite_callback(struct vnode
*vp
, void *cargs
)
826 vnode_waitforwrites(vp
, 0, 0, 0, "hfs freeze");
832 * Control filesystem operating characteristics.
835 hfs_vnop_ioctl( struct vnop_ioctl_args
/* {
840 vfs_context_t a_context;
843 struct vnode
* vp
= ap
->a_vp
;
844 struct hfsmount
*hfsmp
= VTOHFS(vp
);
845 vfs_context_t context
= ap
->a_context
;
846 kauth_cred_t cred
= vfs_context_ucred(context
);
847 proc_t p
= vfs_context_proc(context
);
848 struct vfsstatfs
*vfsp
;
851 is64bit
= proc_is64bit(p
);
853 switch (ap
->a_command
) {
855 case HFS_RESIZE_VOLUME
: {
859 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
860 if (suser(cred
, NULL
) &&
861 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
862 return (EACCES
); /* must be owner of file system */
864 if (!vnode_isvroot(vp
)) {
867 newsize
= *(u_int64_t
*)ap
->a_data
;
868 cursize
= (u_int64_t
)hfsmp
->totalBlocks
* (u_int64_t
)hfsmp
->blockSize
;
870 if (newsize
> cursize
) {
871 return hfs_extendfs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
872 } else if (newsize
< cursize
) {
873 return hfs_truncatefs(hfsmp
, *(u_int64_t
*)ap
->a_data
, context
);
878 case HFS_CHANGE_NEXT_ALLOCATION
: {
881 if (vnode_vfsisrdonly(vp
)) {
884 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
885 if (suser(cred
, NULL
) &&
886 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
887 return (EACCES
); /* must be owner of file system */
889 if (!vnode_isvroot(vp
)) {
892 location
= *(u_int32_t
*)ap
->a_data
;
893 if (location
> hfsmp
->totalBlocks
- 1) {
896 /* Return previous value. */
897 *(u_int32_t
*)ap
->a_data
= hfsmp
->nextAllocation
;
898 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
899 hfsmp
->nextAllocation
= location
;
900 hfsmp
->vcbFlags
|= 0xFF00;
901 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
905 #ifdef HFS_SPARSE_DEV
906 case HFS_SETBACKINGSTOREINFO
: {
907 struct vnode
* bsfs_rootvp
;
908 struct vnode
* di_vp
;
909 struct hfs_backingstoreinfo
*bsdata
;
912 if (hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) {
915 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
916 if (suser(cred
, NULL
) &&
917 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
918 return (EACCES
); /* must be owner of file system */
920 bsdata
= (struct hfs_backingstoreinfo
*)ap
->a_data
;
921 if (bsdata
== NULL
) {
924 if ((error
= file_vnode(bsdata
->backingfd
, &di_vp
))) {
927 if ((error
= vnode_getwithref(di_vp
))) {
928 file_drop(bsdata
->backingfd
);
932 if (vnode_mount(vp
) == vnode_mount(di_vp
)) {
933 (void)vnode_put(di_vp
);
934 file_drop(bsdata
->backingfd
);
939 * Obtain the backing fs root vnode and keep a reference
940 * on it. This reference will be dropped in hfs_unmount.
942 error
= VFS_ROOT(vnode_mount(di_vp
), &bsfs_rootvp
, NULL
); /* XXX use context! */
944 (void)vnode_put(di_vp
);
945 file_drop(bsdata
->backingfd
);
948 vnode_ref(bsfs_rootvp
);
949 vnode_put(bsfs_rootvp
);
951 hfsmp
->hfs_backingfs_rootvp
= bsfs_rootvp
;
952 hfsmp
->hfs_flags
|= HFS_HAS_SPARSE_DEVICE
;
953 hfsmp
->hfs_sparsebandblks
= bsdata
->bandsize
/ HFSTOVCB(hfsmp
)->blockSize
;
954 hfsmp
->hfs_sparsebandblks
*= 4;
956 (void)vnode_put(di_vp
);
957 file_drop(bsdata
->backingfd
);
960 case HFS_CLRBACKINGSTOREINFO
: {
961 struct vnode
* tmpvp
;
963 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
964 if (suser(cred
, NULL
) &&
965 kauth_cred_getuid(cred
) != vfsp
->f_owner
) {
966 return (EACCES
); /* must be owner of file system */
968 if ((hfsmp
->hfs_flags
& HFS_HAS_SPARSE_DEVICE
) &&
969 hfsmp
->hfs_backingfs_rootvp
) {
971 hfsmp
->hfs_flags
&= ~HFS_HAS_SPARSE_DEVICE
;
972 tmpvp
= hfsmp
->hfs_backingfs_rootvp
;
973 hfsmp
->hfs_backingfs_rootvp
= NULLVP
;
974 hfsmp
->hfs_sparsebandblks
= 0;
979 #endif /* HFS_SPARSE_DEV */
988 mp
= vnode_mount(vp
);
989 hfsmp
= VFSTOHFS(mp
);
994 lck_rw_lock_exclusive(&hfsmp
->hfs_insync
);
996 task
= current_task();
997 task_working_set_disable(task
);
999 // flush things before we get started to try and prevent
1000 // dirty data from being paged out while we're frozen.
1001 // note: can't do this after taking the lock as it will
1002 // deadlock against ourselves.
1003 vnode_iterate(mp
, 0, hfs_freezewrite_callback
, NULL
);
1004 hfs_global_exclusive_lock_acquire(hfsmp
);
1005 journal_flush(hfsmp
->jnl
);
1007 // don't need to iterate on all vnodes, we just need to
1008 // wait for writes to the system files and the device vnode
1009 if (HFSTOVCB(hfsmp
)->extentsRefNum
)
1010 vnode_waitforwrites(HFSTOVCB(hfsmp
)->extentsRefNum
, 0, 0, 0, "hfs freeze");
1011 if (HFSTOVCB(hfsmp
)->catalogRefNum
)
1012 vnode_waitforwrites(HFSTOVCB(hfsmp
)->catalogRefNum
, 0, 0, 0, "hfs freeze");
1013 if (HFSTOVCB(hfsmp
)->allocationsRefNum
)
1014 vnode_waitforwrites(HFSTOVCB(hfsmp
)->allocationsRefNum
, 0, 0, 0, "hfs freeze");
1015 if (hfsmp
->hfs_attribute_vp
)
1016 vnode_waitforwrites(hfsmp
->hfs_attribute_vp
, 0, 0, 0, "hfs freeze");
1017 vnode_waitforwrites(hfsmp
->hfs_devvp
, 0, 0, 0, "hfs freeze");
1019 hfsmp
->hfs_freezing_proc
= current_proc();
1028 // if we're not the one who froze the fs then we
1030 if (hfsmp
->hfs_freezing_proc
!= current_proc()) {
1034 // NOTE: if you add code here, also go check the
1035 // code that "thaws" the fs in hfs_vnop_close()
1037 hfsmp
->hfs_freezing_proc
= NULL
;
1038 hfs_global_exclusive_lock_release(hfsmp
);
1039 lck_rw_unlock_exclusive(&hfsmp
->hfs_insync
);
1044 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1045 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1047 case HFS_BULKACCESS_FSCTL
:
1048 case HFS_BULKACCESS
: {
1050 * NOTE: on entry, the vnode is locked. Incase this vnode
1051 * happens to be in our list of file_ids, we'll note it
1052 * avoid calling hfs_chashget_nowait() on that id as that
1053 * will cause a "locking against myself" panic.
1055 Boolean check_leaf
= true;
1057 struct user_access_t
*user_access_structp
;
1058 struct user_access_t tmp_user_access_t
;
1059 struct access_cache cache
;
1063 dev_t dev
= VTOC(vp
)->c_dev
;
1066 struct ucred myucred
; /* XXX ILLEGAL */
1068 int *file_ids
= NULL
;
1069 short *access
= NULL
;
1072 cnid_t prevParent_cnid
= 0;
1073 unsigned long myPerms
;
1075 struct cat_attr cnattr
;
1077 struct cnode
*skip_cp
= VTOC(vp
);
1078 struct vfs_context my_context
;
1080 /* first, return error if not run as root */
1081 if (cred
->cr_ruid
!= 0) {
1085 /* initialize the local cache and buffers */
1086 cache
.numcached
= 0;
1087 cache
.cachehits
= 0;
1090 file_ids
= (int *) get_pathbuff();
1091 access
= (short *) get_pathbuff();
1092 cache
.acache
= (int *) get_pathbuff();
1093 cache
.haveaccess
= (Boolean
*) get_pathbuff();
1095 if (file_ids
== NULL
|| access
== NULL
|| cache
.acache
== NULL
|| cache
.haveaccess
== NULL
) {
1096 release_pathbuff((char *) file_ids
);
1097 release_pathbuff((char *) access
);
1098 release_pathbuff((char *) cache
.acache
);
1099 release_pathbuff((char *) cache
.haveaccess
);
1104 /* struct copyin done during dispatch... need to copy file_id array separately */
1105 if (ap
->a_data
== NULL
) {
1107 goto err_exit_bulk_access
;
1111 user_access_structp
= (struct user_access_t
*)ap
->a_data
;
1114 struct access_t
* accessp
= (struct access_t
*)ap
->a_data
;
1115 tmp_user_access_t
.uid
= accessp
->uid
;
1116 tmp_user_access_t
.flags
= accessp
->flags
;
1117 tmp_user_access_t
.num_groups
= accessp
->num_groups
;
1118 tmp_user_access_t
.num_files
= accessp
->num_files
;
1119 tmp_user_access_t
.file_ids
= CAST_USER_ADDR_T(accessp
->file_ids
);
1120 tmp_user_access_t
.groups
= CAST_USER_ADDR_T(accessp
->groups
);
1121 tmp_user_access_t
.access
= CAST_USER_ADDR_T(accessp
->access
);
1122 user_access_structp
= &tmp_user_access_t
;
1125 num_files
= user_access_structp
->num_files
;
1126 if (num_files
< 1) {
1127 goto err_exit_bulk_access
;
1129 if (num_files
> 256) {
1131 goto err_exit_bulk_access
;
1134 if ((error
= copyin(user_access_structp
->file_ids
, (caddr_t
)file_ids
,
1135 num_files
* sizeof(int)))) {
1136 goto err_exit_bulk_access
;
1139 /* fill in the ucred structure */
1140 flags
= user_access_structp
->flags
;
1141 if ((flags
& (F_OK
| R_OK
| W_OK
| X_OK
)) == 0) {
1145 /* check if we've been passed leaf node ids or parent ids */
1146 if (flags
& PARENT_IDS_FLAG
) {
1150 memset(&myucred
, 0, sizeof(myucred
));
1152 myucred
.cr_uid
= myucred
.cr_ruid
= myucred
.cr_svuid
= user_access_structp
->uid
;
1153 myucred
.cr_ngroups
= user_access_structp
->num_groups
;
1154 if (myucred
.cr_ngroups
< 1 || myucred
.cr_ngroups
> 16) {
1155 myucred
.cr_ngroups
= 0;
1156 } else if ((error
= copyin(user_access_structp
->groups
, (caddr_t
)myucred
.cr_groups
,
1157 myucred
.cr_ngroups
* sizeof(gid_t
)))) {
1158 goto err_exit_bulk_access
;
1160 myucred
.cr_rgid
= myucred
.cr_svgid
= myucred
.cr_groups
[0];
1162 my_context
.vc_proc
= p
;
1163 my_context
.vc_ucred
= &myucred
;
1165 /* Check access to each file_id passed in */
1166 for (i
= 0; i
< num_files
; i
++) {
1168 cnid
= (cnid_t
) file_ids
[i
];
1170 /* root always has access */
1171 if (!suser(&myucred
, NULL
)) {
1178 /* do the lookup (checks the cnode hash, then the catalog) */
1179 error
= do_attr_lookup(hfsmp
, &cache
, dev
, cnid
, skip_cp
, &catkey
, &cnattr
, p
);
1181 access
[i
] = (short) error
;
1185 /* before calling CheckAccess(), check the target file for read access */
1186 myPerms
= DerivePermissionSummary(cnattr
.ca_uid
, cnattr
.ca_gid
,
1187 cnattr
.ca_mode
, hfsmp
->hfs_mp
, &myucred
, p
);
1190 /* fail fast if no access */
1191 if ((myPerms
& flags
) == 0) {
1196 /* we were passed an array of parent ids */
1197 catkey
.hfsPlus
.parentID
= cnid
;
1200 /* if the last guy had the same parent and had access, we're done */
1201 if (i
> 0 && catkey
.hfsPlus
.parentID
== prevParent_cnid
&& access
[i
-1] == 0) {
1207 myaccess
= do_access_check(hfsmp
, &error
, &cache
, catkey
.hfsPlus
.parentID
,
1208 skip_cp
, p
, &myucred
, dev
);
1211 access
[i
] = 0; // have access.. no errors to report
1213 access
[i
] = (error
!= 0 ? (short) error
: EACCES
);
1216 prevParent_cnid
= catkey
.hfsPlus
.parentID
;
1220 cnid
= (cnid_t
)file_ids
[i
];
1222 while (cnid
>= kRootDirID
) {
1223 /* get the vnode for this cnid */
1224 myErr
= hfs_vget(hfsmp
, cnid
, &vp
, 0);
1230 cnid
= VTOC(vp
)->c_parentcnid
;
1232 hfs_unlock(VTOC(vp
));
1233 if (vnode_vtype(vp
) == VDIR
) {
1234 myErr
= vnode_authorize(vp
, NULL
, (KAUTH_VNODE_SEARCH
| KAUTH_VNODE_LIST_DIRECTORY
), &my_context
);
1236 myErr
= vnode_authorize(vp
, NULL
, KAUTH_VNODE_READ_DATA
, &my_context
);
1247 /* copyout the access array */
1248 if ((error
= copyout((caddr_t
)access
, user_access_structp
->access
,
1249 num_files
* sizeof (short)))) {
1250 goto err_exit_bulk_access
;
1253 err_exit_bulk_access
:
1255 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1257 release_pathbuff((char *) cache
.acache
);
1258 release_pathbuff((char *) cache
.haveaccess
);
1259 release_pathbuff((char *) file_ids
);
1260 release_pathbuff((char *) access
);
1263 } /* HFS_BULKACCESS */
1265 case HFS_SETACLSTATE
: {
1268 if (ap
->a_data
== NULL
) {
1272 vfsp
= vfs_statfs(HFSTOVFS(hfsmp
));
1273 state
= *(int *)ap
->a_data
;
1275 // super-user can enable or disable acl's on a volume.
1276 // the volume owner can only enable acl's
1277 if (!is_suser() && (state
== 0 || kauth_cred_getuid(cred
) != vfsp
->f_owner
)) {
1280 if (state
== 0 || state
== 1)
1281 return hfs_setextendedsecurity(hfsmp
, state
);
1289 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1291 error
= hfs_fsync(vp
, MNT_NOWAIT
, TRUE
, p
);
1292 hfs_unlock(VTOC(vp
));
1299 register struct cnode
*cp
;
1302 if (!vnode_isreg(vp
))
1305 error
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
);
1309 * used by regression test to determine if
1310 * all the dirty pages (via write) have been cleaned
1311 * after a call to 'fsysnc'.
1313 error
= is_file_clean(vp
, VTOF(vp
)->ff_size
);
1320 register struct radvisory
*ra
;
1321 struct filefork
*fp
;
1324 if (!vnode_isreg(vp
))
1327 ra
= (struct radvisory
*)(ap
->a_data
);
1330 /* Protect against a size change. */
1331 hfs_lock_truncate(VTOC(vp
), TRUE
);
1333 if (ra
->ra_offset
>= fp
->ff_size
) {
1336 error
= advisory_read(vp
, fp
->ff_size
, ra
->ra_offset
, ra
->ra_count
);
1339 hfs_unlock_truncate(VTOC(vp
));
1343 case F_READBOOTSTRAP
:
1344 case F_WRITEBOOTSTRAP
:
1346 struct vnode
*devvp
= NULL
;
1347 user_fbootstraptransfer_t
*user_bootstrapp
;
1351 daddr64_t blockNumber
;
1355 user_fbootstraptransfer_t user_bootstrap
;
1357 if (!vnode_isvroot(vp
))
1359 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1360 * to a user_fbootstraptransfer_t else we get a pointer to a
1361 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1364 user_bootstrapp
= (user_fbootstraptransfer_t
*)ap
->a_data
;
1367 fbootstraptransfer_t
*bootstrapp
= (fbootstraptransfer_t
*)ap
->a_data
;
1368 user_bootstrapp
= &user_bootstrap
;
1369 user_bootstrap
.fbt_offset
= bootstrapp
->fbt_offset
;
1370 user_bootstrap
.fbt_length
= bootstrapp
->fbt_length
;
1371 user_bootstrap
.fbt_buffer
= CAST_USER_ADDR_T(bootstrapp
->fbt_buffer
);
1373 if (user_bootstrapp
->fbt_offset
+ user_bootstrapp
->fbt_length
> 1024)
1376 devvp
= VTOHFS(vp
)->hfs_devvp
;
1377 auio
= uio_create(1, user_bootstrapp
->fbt_offset
,
1378 is64bit
? UIO_USERSPACE64
: UIO_USERSPACE32
,
1379 (ap
->a_command
== F_WRITEBOOTSTRAP
) ? UIO_WRITE
: UIO_READ
);
1380 uio_addiov(auio
, user_bootstrapp
->fbt_buffer
, user_bootstrapp
->fbt_length
);
1382 devBlockSize
= vfs_devblocksize(vnode_mount(vp
));
1384 while (uio_resid(auio
) > 0) {
1385 blockNumber
= uio_offset(auio
) / devBlockSize
;
1386 error
= (int)buf_bread(devvp
, blockNumber
, devBlockSize
, cred
, &bp
);
1388 if (bp
) buf_brelse(bp
);
1393 blockOffset
= uio_offset(auio
) % devBlockSize
;
1394 xfersize
= devBlockSize
- blockOffset
;
1395 error
= uiomove((caddr_t
)buf_dataptr(bp
) + blockOffset
, (int)xfersize
, auio
);
1401 if (uio_rw(auio
) == UIO_WRITE
) {
1402 error
= VNOP_BWRITE(bp
);
1415 case _IOC(IOC_OUT
,'h', 4, 0): /* Create date in local time */
1418 *(user_time_t
*)(ap
->a_data
) = (user_time_t
) (to_bsd_time(VTOVCB(vp
)->localCreateDate
));
1421 *(time_t *)(ap
->a_data
) = to_bsd_time(VTOVCB(vp
)->localCreateDate
);
1426 case HFS_GET_MOUNT_TIME
:
1427 return copyout(&hfsmp
->hfs_mount_time
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_mount_time
));
1430 case HFS_GET_LAST_MTIME
:
1431 return copyout(&hfsmp
->hfs_last_mounted_mtime
, CAST_USER_ADDR_T(ap
->a_data
), sizeof(hfsmp
->hfs_last_mounted_mtime
));
1434 case HFS_SET_BOOT_INFO
:
1435 if (!vnode_isvroot(vp
))
1437 if (!kauth_cred_issuser(cred
) && (kauth_cred_getuid(cred
) != vfs_statfs(HFSTOVFS(hfsmp
))->f_owner
))
1438 return(EACCES
); /* must be superuser or owner of filesystem */
1439 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1440 bcopy(ap
->a_data
, &hfsmp
->vcbFndrInfo
, sizeof(hfsmp
->vcbFndrInfo
));
1441 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1442 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, 0);
1445 case HFS_GET_BOOT_INFO
:
1446 if (!vnode_isvroot(vp
))
1448 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1449 bcopy(&hfsmp
->vcbFndrInfo
, ap
->a_data
, sizeof(hfsmp
->vcbFndrInfo
));
1450 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1457 /* Should never get here */
1465 hfs_vnop_select(__unused
struct vnop_select_args
*ap
)
1467 struct vnop_select_args {
1472 vfs_context_t a_context;
1477 * We should really check to see if I/O is possible.
1483 * Converts a logical block number to a physical block, and optionally returns
1484 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1485 * The physical block number is based on the device block size, currently its 512.
1486 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1489 hfs_bmap(struct vnode
*vp
, daddr_t bn
, struct vnode
**vpp
, daddr64_t
*bnp
, int *runp
)
1491 struct cnode
*cp
= VTOC(vp
);
1492 struct filefork
*fp
= VTOF(vp
);
1493 struct hfsmount
*hfsmp
= VTOHFS(vp
);
1494 int retval
= E_NONE
;
1495 daddr_t logBlockSize
;
1496 size_t bytesContAvail
= 0;
1497 off_t blockposition
;
1502 * Check for underlying vnode requests and ensure that logical
1503 * to physical mapping is requested.
1510 logBlockSize
= GetLogicalBlockSize(vp
);
1511 blockposition
= (off_t
)bn
* (off_t
)logBlockSize
;
1513 lockExtBtree
= overflow_extents(fp
);
1516 lockflags
= hfs_systemfile_lock(hfsmp
, SFL_EXTENTS
, HFS_SHARED_LOCK
);
1518 retval
= MacToVFSError(
1519 MapFileBlockC (HFSTOVCB(hfsmp
),
1527 hfs_systemfile_unlock(hfsmp
, lockflags
);
1529 if (retval
== E_NONE
) {
1530 /* Figure out how many read ahead blocks there are */
1532 if (can_cluster(logBlockSize
)) {
1533 /* Make sure this result never goes negative: */
1534 *runp
= (bytesContAvail
< logBlockSize
) ? 0 : (bytesContAvail
/ logBlockSize
) - 1;
1544 * Convert logical block number to file offset.
1547 hfs_vnop_blktooff(struct vnop_blktooff_args
*ap
)
1549 struct vnop_blktooff_args {
1556 if (ap
->a_vp
== NULL
)
1558 *ap
->a_offset
= (off_t
)ap
->a_lblkno
* (off_t
)GetLogicalBlockSize(ap
->a_vp
);
1564 * Convert file offset to logical block number.
1567 hfs_vnop_offtoblk(struct vnop_offtoblk_args
*ap
)
1569 struct vnop_offtoblk_args {
1572 daddr64_t *a_lblkno;
1576 if (ap
->a_vp
== NULL
)
1578 *ap
->a_lblkno
= (daddr64_t
)(ap
->a_offset
/ (off_t
)GetLogicalBlockSize(ap
->a_vp
));
1584 * Map file offset to physical block number.
1586 * System file cnodes are expected to be locked (shared or exclusive).
1589 hfs_vnop_blockmap(struct vnop_blockmap_args
*ap
)
1591 struct vnop_blockmap_args {
1599 vfs_context_t a_context;
1603 struct vnode
*vp
= ap
->a_vp
;
1605 struct filefork
*fp
;
1606 struct hfsmount
*hfsmp
;
1607 size_t bytesContAvail
= 0;
1608 int retval
= E_NONE
;
1611 struct rl_entry
*invalid_range
;
1612 enum rl_overlaptype overlaptype
;
1616 /* Do not allow blockmap operation on a directory */
1617 if (vnode_isdir(vp
)) {
1622 * Check for underlying vnode requests and ensure that logical
1623 * to physical mapping is requested.
1625 if (ap
->a_bpn
== NULL
)
1628 if ( !vnode_issystem(vp
) && !vnode_islnk(vp
)) {
1629 if (VTOC(vp
)->c_lockowner
!= current_thread()) {
1630 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
1634 panic("blockmap: %s cnode lock already held!\n",
1635 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
1643 if (fp
->ff_unallocblocks
) {
1644 if (hfs_start_transaction(hfsmp
) != 0) {
1650 syslocks
= SFL_EXTENTS
| SFL_BITMAP
;
1652 } else if (overflow_extents(fp
)) {
1653 syslocks
= SFL_EXTENTS
;
1657 lockflags
= hfs_systemfile_lock(hfsmp
, syslocks
, HFS_EXCLUSIVE_LOCK
);
1660 * Check for any delayed allocations.
1662 if (fp
->ff_unallocblocks
) {
1664 u_int32_t loanedBlocks
;
1667 // Make sure we have a transaction. It's possible
1668 // that we came in and fp->ff_unallocblocks was zero
1669 // but during the time we blocked acquiring the extents
1670 // btree, ff_unallocblocks became non-zero and so we
1671 // will need to start a transaction.
1673 if (started_tr
== 0) {
1675 hfs_systemfile_unlock(hfsmp
, lockflags
);
1682 * Note: ExtendFileC will Release any blocks on loan and
1683 * aquire real blocks. So we ask to extend by zero bytes
1684 * since ExtendFileC will account for the virtual blocks.
1687 loanedBlocks
= fp
->ff_unallocblocks
;
1688 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, 0, 0,
1689 kEFAllMask
| kEFNoClumpMask
, &actbytes
);
1692 fp
->ff_unallocblocks
= loanedBlocks
;
1693 cp
->c_blocks
+= loanedBlocks
;
1694 fp
->ff_blocks
+= loanedBlocks
;
1696 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
1697 hfsmp
->loanedBlocks
+= loanedBlocks
;
1698 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
1702 hfs_systemfile_unlock(hfsmp
, lockflags
);
1703 cp
->c_flag
|= C_MODIFIED
;
1705 (void) hfs_update(vp
, TRUE
);
1706 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1708 hfs_end_transaction(hfsmp
);
1714 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, ap
->a_size
, ap
->a_foffset
,
1715 ap
->a_bpn
, &bytesContAvail
);
1717 hfs_systemfile_unlock(hfsmp
, lockflags
);
1722 (void) hfs_update(vp
, TRUE
);
1723 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1724 hfs_end_transaction(hfsmp
);
1731 /* Adjust the mapping information for invalid file ranges: */
1732 overlaptype
= rl_scan(&fp
->ff_invalidranges
, ap
->a_foffset
,
1733 ap
->a_foffset
+ (off_t
)bytesContAvail
- 1,
1735 if (overlaptype
!= RL_NOOVERLAP
) {
1736 switch(overlaptype
) {
1737 case RL_MATCHINGOVERLAP
:
1738 case RL_OVERLAPCONTAINSRANGE
:
1739 case RL_OVERLAPSTARTSBEFORE
:
1740 /* There's no valid block for this byte offset: */
1741 *ap
->a_bpn
= (daddr64_t
)-1;
1742 /* There's no point limiting the amount to be returned
1743 * if the invalid range that was hit extends all the way
1744 * to the EOF (i.e. there's no valid bytes between the
1745 * end of this range and the file's EOF):
1747 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1748 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1749 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1753 case RL_OVERLAPISCONTAINED
:
1754 case RL_OVERLAPENDSAFTER
:
1755 /* The range of interest hits an invalid block before the end: */
1756 if (invalid_range
->rl_start
== ap
->a_foffset
) {
1757 /* There's actually no valid information to be had starting here: */
1758 *ap
->a_bpn
= (daddr64_t
)-1;
1759 if (((off_t
)fp
->ff_size
> (invalid_range
->rl_end
+ 1)) &&
1760 (invalid_range
->rl_end
+ 1 - ap
->a_foffset
< bytesContAvail
)) {
1761 bytesContAvail
= invalid_range
->rl_end
+ 1 - ap
->a_foffset
;
1764 bytesContAvail
= invalid_range
->rl_start
- ap
->a_foffset
;
1771 if (bytesContAvail
> ap
->a_size
)
1772 bytesContAvail
= ap
->a_size
;
1775 *ap
->a_run
= bytesContAvail
;
1778 *(int *)ap
->a_poff
= 0;
1783 return (MacToVFSError(retval
));
1788 * prepare and issue the I/O
1789 * buf_strategy knows how to deal
1790 * with requests that require
1794 hfs_vnop_strategy(struct vnop_strategy_args
*ap
)
1796 buf_t bp
= ap
->a_bp
;
1797 vnode_t vp
= buf_vnode(bp
);
1798 struct cnode
*cp
= VTOC(vp
);
1800 return (buf_strategy(cp
->c_devvp
, ap
));
1805 do_hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
, vfs_context_t context
)
1807 register struct cnode
*cp
= VTOC(vp
);
1808 struct filefork
*fp
= VTOF(vp
);
1809 struct proc
*p
= vfs_context_proc(context
);;
1810 kauth_cred_t cred
= vfs_context_ucred(context
);
1813 off_t actualBytesAdded
;
1815 u_int64_t old_filesize
;
1818 struct hfsmount
*hfsmp
;
1821 blksize
= VTOVCB(vp
)->blockSize
;
1822 fileblocks
= fp
->ff_blocks
;
1823 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
1824 old_filesize
= fp
->ff_size
;
1826 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_START
,
1827 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1832 if ((!ISHFSPLUS(VTOVCB(vp
))) && (length
> (off_t
)MAXHFSFILESIZE
))
1839 /* Files that are changing size are not hot file candidates. */
1840 if (hfsmp
->hfc_stage
== HFC_RECORDING
) {
1841 fp
->ff_bytesread
= 0;
1845 * We cannot just check if fp->ff_size == length (as an optimization)
1846 * since there may be extra physical blocks that also need truncation.
1849 if ((retval
= hfs_getinoquota(cp
)))
1854 * Lengthen the size of the file. We must ensure that the
1855 * last byte of the file is allocated. Since the smallest
1856 * value of ff_size is 0, length will be at least 1.
1858 if (length
> (off_t
)fp
->ff_size
) {
1860 retval
= hfs_chkdq(cp
, (int64_t)(roundup(length
- filebytes
, blksize
)),
1866 * If we don't have enough physical space then
1867 * we need to extend the physical size.
1869 if (length
> filebytes
) {
1871 u_long blockHint
= 0;
1873 /* All or nothing and don't round up to clumpsize. */
1874 eflags
= kEFAllMask
| kEFNoClumpMask
;
1876 if (cred
&& suser(cred
, NULL
) != 0)
1877 eflags
|= kEFReserveMask
; /* keep a reserve */
1880 * Allocate Journal and Quota files in metadata zone.
1882 if (filebytes
== 0 &&
1883 hfsmp
->hfs_flags
& HFS_METADATA_ZONE
&&
1884 hfs_virtualmetafile(cp
)) {
1885 eflags
|= kEFMetadataMask
;
1886 blockHint
= hfsmp
->hfs_metazone_start
;
1888 if (hfs_start_transaction(hfsmp
) != 0) {
1893 /* Protect extents b-tree and allocation bitmap */
1894 lockflags
= SFL_BITMAP
;
1895 if (overflow_extents(fp
))
1896 lockflags
|= SFL_EXTENTS
;
1897 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
1899 while ((length
> filebytes
) && (retval
== E_NONE
)) {
1900 bytesToAdd
= length
- filebytes
;
1901 retval
= MacToVFSError(ExtendFileC(VTOVCB(vp
),
1906 &actualBytesAdded
));
1908 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
1909 if (actualBytesAdded
== 0 && retval
== E_NONE
) {
1910 if (length
> filebytes
)
1916 hfs_systemfile_unlock(hfsmp
, lockflags
);
1919 (void) hfs_update(vp
, TRUE
);
1920 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
1923 hfs_end_transaction(hfsmp
);
1928 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
1929 (int)length
, (int)fp
->ff_size
, (int)filebytes
, 0, 0);
1932 if (!(flags
& IO_NOZEROFILL
)) {
1933 if (UBCINFOEXISTS(vp
) && retval
== E_NONE
) {
1934 struct rl_entry
*invalid_range
;
1937 zero_limit
= (fp
->ff_size
+ (PAGE_SIZE_64
- 1)) & ~PAGE_MASK_64
;
1938 if (length
< zero_limit
) zero_limit
= length
;
1940 if (length
> (off_t
)fp
->ff_size
) {
1943 /* Extending the file: time to fill out the current last page w. zeroes? */
1944 if ((fp
->ff_size
& PAGE_MASK_64
) &&
1945 (rl_scan(&fp
->ff_invalidranges
, fp
->ff_size
& ~PAGE_MASK_64
,
1946 fp
->ff_size
- 1, &invalid_range
) == RL_NOOVERLAP
)) {
1948 /* There's some valid data at the start of the (current) last page
1949 of the file, so zero out the remainder of that page to ensure the
1950 entire page contains valid data. Since there is no invalid range
1951 possible past the (current) eof, there's no need to remove anything
1952 from the invalid range list before calling cluster_write(): */
1954 retval
= cluster_write(vp
, (struct uio
*) 0, fp
->ff_size
, zero_limit
,
1955 fp
->ff_size
, (off_t
)0,
1956 (flags
& IO_SYNC
) | IO_HEADZEROFILL
| IO_NOZERODIRTY
);
1957 hfs_lock(cp
, HFS_FORCE_LOCK
);
1958 if (retval
) goto Err_Exit
;
1960 /* Merely invalidate the remaining area, if necessary: */
1961 if (length
> zero_limit
) {
1963 rl_add(zero_limit
, length
- 1, &fp
->ff_invalidranges
);
1964 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1967 /* The page containing the (current) eof is invalid: just add the
1968 remainder of the page to the invalid list, along with the area
1969 being newly allocated:
1972 rl_add(fp
->ff_size
, length
- 1, &fp
->ff_invalidranges
);
1973 cp
->c_zftimeout
= tv
.tv_sec
+ ZFTIMELIMIT
;
1977 panic("hfs_truncate: invoked on non-UBC object?!");
1980 cp
->c_touch_modtime
= TRUE
;
1981 fp
->ff_size
= length
;
1983 /* Nested transactions will do their own ubc_setsize. */
1986 * ubc_setsize can cause a pagein here
1987 * so we need to drop cnode lock.
1990 ubc_setsize(vp
, length
);
1991 hfs_lock(cp
, HFS_FORCE_LOCK
);
1994 } else { /* Shorten the size of the file */
1996 if ((off_t
)fp
->ff_size
> length
) {
1998 * Any buffers that are past the truncation point need to be
1999 * invalidated (to maintain buffer cache consistency).
2002 /* Nested transactions will do their own ubc_setsize. */
2005 * ubc_setsize can cause a pageout here
2006 * so we need to drop cnode lock.
2009 ubc_setsize(vp
, length
);
2010 hfs_lock(cp
, HFS_FORCE_LOCK
);
2013 /* Any space previously marked as invalid is now irrelevant: */
2014 rl_remove(length
, fp
->ff_size
- 1, &fp
->ff_invalidranges
);
2018 * Account for any unmapped blocks. Note that the new
2019 * file length can still end up with unmapped blocks.
2021 if (fp
->ff_unallocblocks
> 0) {
2022 u_int32_t finalblks
;
2023 u_int32_t loanedBlocks
;
2025 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2027 loanedBlocks
= fp
->ff_unallocblocks
;
2028 cp
->c_blocks
-= loanedBlocks
;
2029 fp
->ff_blocks
-= loanedBlocks
;
2030 fp
->ff_unallocblocks
= 0;
2032 hfsmp
->loanedBlocks
-= loanedBlocks
;
2034 finalblks
= (length
+ blksize
- 1) / blksize
;
2035 if (finalblks
> fp
->ff_blocks
) {
2036 /* calculate required unmapped blocks */
2037 loanedBlocks
= finalblks
- fp
->ff_blocks
;
2038 hfsmp
->loanedBlocks
+= loanedBlocks
;
2040 fp
->ff_unallocblocks
= loanedBlocks
;
2041 cp
->c_blocks
+= loanedBlocks
;
2042 fp
->ff_blocks
+= loanedBlocks
;
2044 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2048 * For a TBE process the deallocation of the file blocks is
2049 * delayed until the file is closed. And hfs_close calls
2050 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2051 * isn't set, we make sure this isn't a TBE process.
2053 if ((flags
& IO_NDELAY
) || (proc_tbe(p
) == 0)) {
2055 off_t savedbytes
= ((off_t
)fp
->ff_blocks
* (off_t
)blksize
);
2057 if (hfs_start_transaction(hfsmp
) != 0) {
2062 if (fp
->ff_unallocblocks
== 0) {
2063 /* Protect extents b-tree and allocation bitmap */
2064 lockflags
= SFL_BITMAP
;
2065 if (overflow_extents(fp
))
2066 lockflags
|= SFL_EXTENTS
;
2067 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2069 retval
= MacToVFSError(TruncateFileC(VTOVCB(vp
),
2070 (FCB
*)fp
, length
, false));
2072 hfs_systemfile_unlock(hfsmp
, lockflags
);
2076 fp
->ff_size
= length
;
2078 (void) hfs_update(vp
, TRUE
);
2079 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2082 hfs_end_transaction(hfsmp
);
2084 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)blksize
;
2088 /* These are bytesreleased */
2089 (void) hfs_chkdq(cp
, (int64_t)-(savedbytes
- filebytes
), NOCRED
, 0);
2092 /* Only set update flag if the logical length changes */
2093 if (old_filesize
!= length
)
2094 cp
->c_touch_modtime
= TRUE
;
2095 fp
->ff_size
= length
;
2097 cp
->c_touch_chgtime
= TRUE
;
2098 retval
= hfs_update(vp
, MNT_WAIT
);
2100 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_NONE
,
2101 -1, -1, -1, retval
, 0);
2106 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW
, 7)) | DBG_FUNC_END
,
2107 (int)length
, (int)fp
->ff_size
, (int)filebytes
, retval
, 0);
2115 * Truncate a cnode to at most length size, freeing (or adding) the
2120 hfs_truncate(struct vnode
*vp
, off_t length
, int flags
, int skipsetsize
,
2121 vfs_context_t context
)
2123 struct filefork
*fp
= VTOF(vp
);
2126 int blksize
, error
= 0;
2127 struct cnode
*cp
= VTOC(vp
);
2129 if (vnode_isdir(vp
))
2130 return (EISDIR
); /* cannot truncate an HFS directory! */
2132 blksize
= VTOVCB(vp
)->blockSize
;
2133 fileblocks
= fp
->ff_blocks
;
2134 filebytes
= (off_t
)fileblocks
* (off_t
)blksize
;
2136 // have to loop truncating or growing files that are
2137 // really big because otherwise transactions can get
2138 // enormous and consume too many kernel resources.
2140 if (length
< filebytes
) {
2141 while (filebytes
> length
) {
2142 if ((filebytes
- length
) > HFS_BIGFILE_SIZE
) {
2143 filebytes
-= HFS_BIGFILE_SIZE
;
2147 cp
->c_flag
|= C_FORCEUPDATE
;
2148 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2152 } else if (length
> filebytes
) {
2153 while (filebytes
< length
) {
2154 if ((length
- filebytes
) > HFS_BIGFILE_SIZE
) {
2155 filebytes
+= HFS_BIGFILE_SIZE
;
2159 cp
->c_flag
|= C_FORCEUPDATE
;
2160 error
= do_hfs_truncate(vp
, filebytes
, flags
, skipsetsize
, context
);
2164 } else /* Same logical size */ {
2166 error
= do_hfs_truncate(vp
, length
, flags
, skipsetsize
, context
);
2168 /* Files that are changing size are not hot file candidates. */
2169 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
) {
2170 fp
->ff_bytesread
= 0;
2179 * Preallocate file storage space.
2182 hfs_vnop_allocate(struct vnop_allocate_args
/* {
2186 off_t *a_bytesallocated;
2188 vfs_context_t a_context;
2191 struct vnode
*vp
= ap
->a_vp
;
2193 struct filefork
*fp
;
2195 off_t length
= ap
->a_length
;
2197 off_t moreBytesRequested
;
2198 off_t actualBytesAdded
;
2201 int retval
, retval2
;
2203 UInt32 extendFlags
; /* For call to ExtendFileC */
2204 struct hfsmount
*hfsmp
;
2205 kauth_cred_t cred
= vfs_context_ucred(ap
->a_context
);
2208 *(ap
->a_bytesallocated
) = 0;
2210 if (!vnode_isreg(vp
))
2212 if (length
< (off_t
)0)
2215 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
)))
2222 fileblocks
= fp
->ff_blocks
;
2223 filebytes
= (off_t
)fileblocks
* (off_t
)vcb
->blockSize
;
2225 if ((ap
->a_flags
& ALLOCATEFROMVOL
) && (length
< filebytes
)) {
2230 /* Fill in the flags word for the call to Extend the file */
2232 extendFlags
= kEFNoClumpMask
;
2233 if (ap
->a_flags
& ALLOCATECONTIG
)
2234 extendFlags
|= kEFContigMask
;
2235 if (ap
->a_flags
& ALLOCATEALL
)
2236 extendFlags
|= kEFAllMask
;
2237 if (cred
&& suser(cred
, NULL
) != 0)
2238 extendFlags
|= kEFReserveMask
;
2242 startingPEOF
= filebytes
;
2244 if (ap
->a_flags
& ALLOCATEFROMPEOF
)
2245 length
+= filebytes
;
2246 else if (ap
->a_flags
& ALLOCATEFROMVOL
)
2247 blockHint
= ap
->a_offset
/ VTOVCB(vp
)->blockSize
;
2249 /* If no changes are necesary, then we're done */
2250 if (filebytes
== length
)
2254 * Lengthen the size of the file. We must ensure that the
2255 * last byte of the file is allocated. Since the smallest
2256 * value of filebytes is 0, length will be at least 1.
2258 if (length
> filebytes
) {
2259 moreBytesRequested
= length
- filebytes
;
2262 retval
= hfs_chkdq(cp
,
2263 (int64_t)(roundup(moreBytesRequested
, vcb
->blockSize
)),
2270 * Metadata zone checks.
2272 if (hfsmp
->hfs_flags
& HFS_METADATA_ZONE
) {
2274 * Allocate Journal and Quota files in metadata zone.
2276 if (hfs_virtualmetafile(cp
)) {
2277 extendFlags
|= kEFMetadataMask
;
2278 blockHint
= hfsmp
->hfs_metazone_start
;
2279 } else if ((blockHint
>= hfsmp
->hfs_metazone_start
) &&
2280 (blockHint
<= hfsmp
->hfs_metazone_end
)) {
2282 * Move blockHint outside metadata zone.
2284 blockHint
= hfsmp
->hfs_metazone_end
+ 1;
2288 if (hfs_start_transaction(hfsmp
) != 0) {
2293 /* Protect extents b-tree and allocation bitmap */
2294 lockflags
= SFL_BITMAP
;
2295 if (overflow_extents(fp
))
2296 lockflags
|= SFL_EXTENTS
;
2297 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2299 retval
= MacToVFSError(ExtendFileC(vcb
,
2304 &actualBytesAdded
));
2306 *(ap
->a_bytesallocated
) = actualBytesAdded
;
2307 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2309 hfs_systemfile_unlock(hfsmp
, lockflags
);
2312 (void) hfs_update(vp
, TRUE
);
2313 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2316 hfs_end_transaction(hfsmp
);
2319 * if we get an error and no changes were made then exit
2320 * otherwise we must do the hfs_update to reflect the changes
2322 if (retval
&& (startingPEOF
== filebytes
))
2326 * Adjust actualBytesAdded to be allocation block aligned, not
2327 * clump size aligned.
2328 * NOTE: So what we are reporting does not affect reality
2329 * until the file is closed, when we truncate the file to allocation
2332 if ((actualBytesAdded
!= 0) && (moreBytesRequested
< actualBytesAdded
))
2333 *(ap
->a_bytesallocated
) =
2334 roundup(moreBytesRequested
, (off_t
)vcb
->blockSize
);
2336 } else { /* Shorten the size of the file */
2338 if (fp
->ff_size
> length
) {
2340 * Any buffers that are past the truncation point need to be
2341 * invalidated (to maintain buffer cache consistency).
2345 if (hfs_start_transaction(hfsmp
) != 0) {
2350 /* Protect extents b-tree and allocation bitmap */
2351 lockflags
= SFL_BITMAP
;
2352 if (overflow_extents(fp
))
2353 lockflags
|= SFL_EXTENTS
;
2354 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2356 retval
= MacToVFSError(TruncateFileC(vcb
, (FCB
*)fp
, length
, false));
2358 hfs_systemfile_unlock(hfsmp
, lockflags
);
2360 filebytes
= (off_t
)fp
->ff_blocks
* (off_t
)vcb
->blockSize
;
2363 (void) hfs_update(vp
, TRUE
);
2364 (void) hfs_volupdate(hfsmp
, VOL_UPDATE
, 0);
2367 hfs_end_transaction(hfsmp
);
2371 * if we get an error and no changes were made then exit
2372 * otherwise we must do the hfs_update to reflect the changes
2374 if (retval
&& (startingPEOF
== filebytes
)) goto Err_Exit
;
2376 /* These are bytesreleased */
2377 (void) hfs_chkdq(cp
, (int64_t)-((startingPEOF
- filebytes
)), NOCRED
,0);
2380 if (fp
->ff_size
> filebytes
) {
2381 fp
->ff_size
= filebytes
;
2384 ubc_setsize(vp
, fp
->ff_size
);
2385 hfs_lock(cp
, HFS_FORCE_LOCK
);
2390 cp
->c_touch_chgtime
= TRUE
;
2391 cp
->c_touch_modtime
= TRUE
;
2392 retval2
= hfs_update(vp
, MNT_WAIT
);
2403 * Pagein for HFS filesystem
2406 hfs_vnop_pagein(struct vnop_pagein_args
*ap
)
2408 struct vnop_pagein_args {
2411 vm_offset_t a_pl_offset,
2415 vfs_context_t a_context;
2419 vnode_t vp
= ap
->a_vp
;
2422 error
= cluster_pagein(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2423 ap
->a_size
, (off_t
)VTOF(vp
)->ff_size
, ap
->a_flags
);
2425 * Keep track of blocks read.
2427 if (VTOHFS(vp
)->hfc_stage
== HFC_RECORDING
&& error
== 0) {
2429 struct filefork
*fp
;
2431 int took_cnode_lock
= 0;
2436 if (ap
->a_f_offset
== 0 && fp
->ff_size
< PAGE_SIZE
)
2437 bytesread
= fp
->ff_size
;
2439 bytesread
= ap
->a_size
;
2441 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2442 if ((fp
->ff_bytesread
+ bytesread
) > 0x00000000ffffffff) {
2443 hfs_lock(cp
, HFS_FORCE_LOCK
);
2444 took_cnode_lock
= 1;
2447 * If this file hasn't been seen since the start of
2448 * the current sampling period then start over.
2450 if (cp
->c_atime
< VTOHFS(vp
)->hfc_timebase
) {
2453 fp
->ff_bytesread
= bytesread
;
2455 cp
->c_atime
= tv
.tv_sec
;
2457 fp
->ff_bytesread
+= bytesread
;
2459 cp
->c_touch_acctime
= TRUE
;
2460 if (took_cnode_lock
)
2467 * Pageout for HFS filesystem.
2470 hfs_vnop_pageout(struct vnop_pageout_args
*ap
)
2472 struct vnop_pageout_args {
2475 vm_offset_t a_pl_offset,
2479 vfs_context_t a_context;
2483 vnode_t vp
= ap
->a_vp
;
2485 struct filefork
*fp
;
2491 if (cp
->c_lockowner
== current_thread()) {
2492 panic("pageout: %s cnode lock already held!\n",
2493 cp
->c_desc
.cd_nameptr
? cp
->c_desc
.cd_nameptr
: "");
2495 if ( (retval
= hfs_lock(cp
, HFS_EXCLUSIVE_LOCK
))) {
2496 if (!(ap
->a_flags
& UPL_NOCOMMIT
)) {
2497 ubc_upl_abort_range(ap
->a_pl
,
2500 UPL_ABORT_FREE_ON_EMPTY
);
2506 filesize
= fp
->ff_size
;
2507 end_of_range
= ap
->a_f_offset
+ ap
->a_size
- 1;
2509 if (end_of_range
>= filesize
) {
2510 end_of_range
= (off_t
)(filesize
- 1);
2512 if (ap
->a_f_offset
< filesize
) {
2513 rl_remove(ap
->a_f_offset
, end_of_range
, &fp
->ff_invalidranges
);
2514 cp
->c_flag
|= C_MODIFIED
; /* leof is dirty */
2518 retval
= cluster_pageout(vp
, ap
->a_pl
, ap
->a_pl_offset
, ap
->a_f_offset
,
2519 ap
->a_size
, filesize
, ap
->a_flags
);
2522 * If data was written, and setuid or setgid bits are set and
2523 * this process is not the superuser then clear the setuid and
2524 * setgid bits as a precaution against tampering.
2526 if ((retval
== 0) &&
2527 (cp
->c_mode
& (S_ISUID
| S_ISGID
)) &&
2528 (vfs_context_suser(ap
->a_context
) != 0)) {
2529 hfs_lock(cp
, HFS_FORCE_LOCK
);
2530 cp
->c_mode
&= ~(S_ISUID
| S_ISGID
);
2531 cp
->c_touch_chgtime
= TRUE
;
2538 * Intercept B-Tree node writes to unswap them if necessary.
2541 hfs_vnop_bwrite(struct vnop_bwrite_args
*ap
)
2544 register struct buf
*bp
= ap
->a_bp
;
2545 register struct vnode
*vp
= buf_vnode(bp
);
2546 BlockDescriptor block
;
2548 /* Trap B-Tree writes */
2549 if ((VTOC(vp
)->c_fileid
== kHFSExtentsFileID
) ||
2550 (VTOC(vp
)->c_fileid
== kHFSCatalogFileID
) ||
2551 (VTOC(vp
)->c_fileid
== kHFSAttributesFileID
)) {
2554 * Swap and validate the node if it is in native byte order.
2555 * This is always be true on big endian, so we always validate
2556 * before writing here. On little endian, the node typically has
2557 * been swapped and validatated when it was written to the journal,
2558 * so we won't do anything here.
2560 if (((UInt16
*)((char *)buf_dataptr(bp
) + buf_count(bp
) - 2))[0] == 0x000e) {
2561 /* Prepare the block pointer */
2562 block
.blockHeader
= bp
;
2563 block
.buffer
= (char *)buf_dataptr(bp
);
2564 block
.blockNum
= buf_lblkno(bp
);
2565 /* not found in cache ==> came from disk */
2566 block
.blockReadFromDisk
= (buf_fromcache(bp
) == 0);
2567 block
.blockSize
= buf_count(bp
);
2569 /* Endian un-swap B-Tree node */
2570 retval
= hfs_swap_BTNode (&block
, vp
, kSwapBTNodeHostToBig
);
2572 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2576 /* This buffer shouldn't be locked anymore but if it is clear it */
2577 if ((buf_flags(bp
) & B_LOCKED
)) {
2579 if (VTOHFS(vp
)->jnl
) {
2580 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp
);
2582 buf_clearflags(bp
, B_LOCKED
);
2584 retval
= vn_bwrite (ap
);
2590 * Relocate a file to a new location on disk
2591 * cnode must be locked on entry
2593 * Relocation occurs by cloning the file's data from its
2594 * current set of blocks to a new set of blocks. During
2595 * the relocation all of the blocks (old and new) are
2596 * owned by the file.
2603 * ----------------- -----------------
2604 * |///////////////| | | STEP 1 (aquire new blocks)
2605 * ----------------- -----------------
2608 * ----------------- -----------------
2609 * |///////////////| |///////////////| STEP 2 (clone data)
2610 * ----------------- -----------------
2614 * |///////////////| STEP 3 (head truncate blocks)
2618 * During steps 2 and 3 page-outs to file offsets less
2619 * than or equal to N are suspended.
2621 * During step 3 page-ins to the file get supended.
2625 hfs_relocate(struct vnode
*vp
, u_int32_t blockHint
, kauth_cred_t cred
,
2629 struct filefork
*fp
;
2630 struct hfsmount
*hfsmp
;
2635 u_int32_t nextallocsave
;
2636 daddr64_t sector_a
, sector_b
;
2637 int disabled_caching
= 0;
2642 int took_trunc_lock
= 0;
2644 enum vtype vnodetype
;
2646 vnodetype
= vnode_vtype(vp
);
2647 if (vnodetype
!= VREG
&& vnodetype
!= VLNK
) {
2652 if (hfsmp
->hfs_flags
& HFS_FRAGMENTED_FREESPACE
) {
2658 if (fp
->ff_unallocblocks
)
2660 blksize
= hfsmp
->blockSize
;
2662 blockHint
= hfsmp
->nextAllocation
;
2664 if ((fp
->ff_size
> (u_int64_t
)0x7fffffff) ||
2665 ((fp
->ff_size
> blksize
) && vnodetype
== VLNK
)) {
2670 // We do not believe that this call to hfs_fsync() is
2671 // necessary and it causes a journal transaction
2672 // deadlock so we are removing it.
2674 //if (vnodetype == VREG && !vnode_issystem(vp)) {
2675 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2680 if (!vnode_issystem(vp
) && (vnodetype
!= VLNK
)) {
2682 hfs_lock_truncate(cp
, TRUE
);
2683 if ((retval
= hfs_lock(VTOC(vp
), HFS_EXCLUSIVE_LOCK
))) {
2684 hfs_unlock_truncate(cp
);
2687 took_trunc_lock
= 1;
2689 headblks
= fp
->ff_blocks
;
2690 datablks
= howmany(fp
->ff_size
, blksize
);
2691 growsize
= datablks
* blksize
;
2692 eflags
= kEFContigMask
| kEFAllMask
| kEFNoClumpMask
;
2693 if (blockHint
>= hfsmp
->hfs_metazone_start
&&
2694 blockHint
<= hfsmp
->hfs_metazone_end
)
2695 eflags
|= kEFMetadataMask
;
2697 if (hfs_start_transaction(hfsmp
) != 0) {
2698 if (took_trunc_lock
)
2699 hfs_unlock_truncate(cp
);
2704 * Protect the extents b-tree and the allocation bitmap
2705 * during MapFileBlockC and ExtendFileC operations.
2707 lockflags
= SFL_BITMAP
;
2708 if (overflow_extents(fp
))
2709 lockflags
|= SFL_EXTENTS
;
2710 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2712 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
- 1, §or_a
, NULL
);
2714 retval
= MacToVFSError(retval
);
2719 * STEP 1 - aquire new allocation blocks.
2721 if (!vnode_isnocache(vp
)) {
2722 vnode_setnocache(vp
);
2723 disabled_caching
= 1;
2726 nextallocsave
= hfsmp
->nextAllocation
;
2727 retval
= ExtendFileC(hfsmp
, (FCB
*)fp
, growsize
, blockHint
, eflags
, &newbytes
);
2728 if (eflags
& kEFMetadataMask
) {
2729 HFS_MOUNT_LOCK(hfsmp
, TRUE
);
2730 hfsmp
->nextAllocation
= nextallocsave
;
2731 hfsmp
->vcbFlags
|= 0xFF00;
2732 HFS_MOUNT_UNLOCK(hfsmp
, TRUE
);
2735 retval
= MacToVFSError(retval
);
2737 cp
->c_flag
|= C_MODIFIED
;
2738 if (newbytes
< growsize
) {
2741 } else if (fp
->ff_blocks
< (headblks
+ datablks
)) {
2742 printf("hfs_relocate: allocation failed");
2747 retval
= MapFileBlockC(hfsmp
, (FCB
*)fp
, 1, growsize
, §or_b
, NULL
);
2749 retval
= MacToVFSError(retval
);
2750 } else if ((sector_a
+ 1) == sector_b
) {
2753 } else if ((eflags
& kEFMetadataMask
) &&
2754 ((((u_int64_t
)sector_b
* hfsmp
->hfs_phys_block_size
) / blksize
) >
2755 hfsmp
->hfs_metazone_end
)) {
2756 printf("hfs_relocate: didn't move into metadata zone\n");
2761 /* Done with system locks and journal for now. */
2762 hfs_systemfile_unlock(hfsmp
, lockflags
);
2764 hfs_end_transaction(hfsmp
);
2769 * Check to see if failure is due to excessive fragmentation.
2771 if ((retval
== ENOSPC
) &&
2772 (hfs_freeblks(hfsmp
, 0) > (datablks
* 2))) {
2773 hfsmp
->hfs_flags
|= HFS_FRAGMENTED_FREESPACE
;
2778 * STEP 2 - clone file data into the new allocation blocks.
2781 if (vnodetype
== VLNK
)
2782 retval
= hfs_clonelink(vp
, blksize
, cred
, p
);
2783 else if (vnode_issystem(vp
))
2784 retval
= hfs_clonesysfile(vp
, headblks
, datablks
, blksize
, cred
, p
);
2786 retval
= hfs_clonefile(vp
, headblks
, datablks
, blksize
);
2788 /* Start transaction for step 3 or for a restore. */
2789 if (hfs_start_transaction(hfsmp
) != 0) {
2798 * STEP 3 - switch to cloned data and remove old blocks.
2800 lockflags
= SFL_BITMAP
;
2801 if (overflow_extents(fp
))
2802 lockflags
|= SFL_EXTENTS
;
2803 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2805 retval
= HeadTruncateFile(hfsmp
, (FCB
*)fp
, headblks
);
2807 hfs_systemfile_unlock(hfsmp
, lockflags
);
2812 if (took_trunc_lock
)
2813 hfs_unlock_truncate(cp
);
2816 hfs_systemfile_unlock(hfsmp
, lockflags
);
2820 // See comment up above about calls to hfs_fsync()
2823 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2826 if (cp
->c_cnid
< kHFSFirstUserCatalogNodeID
)
2827 (void) hfs_flushvolumeheader(hfsmp
, MNT_WAIT
, HFS_ALTFLUSH
);
2829 (void) hfs_flushvolumeheader(hfsmp
, MNT_NOWAIT
, 0);
2832 if (disabled_caching
) {
2833 vnode_clearnocache(vp
);
2836 hfs_end_transaction(hfsmp
);
2841 if (fp
->ff_blocks
== headblks
)
2844 * Give back any newly allocated space.
2846 if (lockflags
== 0) {
2847 lockflags
= SFL_BITMAP
;
2848 if (overflow_extents(fp
))
2849 lockflags
|= SFL_EXTENTS
;
2850 lockflags
= hfs_systemfile_lock(hfsmp
, lockflags
, HFS_EXCLUSIVE_LOCK
);
2853 (void) TruncateFileC(hfsmp
, (FCB
*)fp
, fp
->ff_size
, false);
2855 hfs_systemfile_unlock(hfsmp
, lockflags
);
2858 if (took_trunc_lock
)
2859 hfs_unlock_truncate(cp
);
2869 hfs_clonelink(struct vnode
*vp
, int blksize
, kauth_cred_t cred
, struct proc
*p
)
2871 struct buf
*head_bp
= NULL
;
2872 struct buf
*tail_bp
= NULL
;
2876 error
= (int)buf_meta_bread(vp
, (daddr64_t
)0, blksize
, cred
, &head_bp
);
2880 tail_bp
= buf_getblk(vp
, (daddr64_t
)1, blksize
, 0, 0, BLK_META
);
2881 if (tail_bp
== NULL
) {
2885 bcopy((char *)buf_dataptr(head_bp
), (char *)buf_dataptr(tail_bp
), blksize
);
2886 error
= (int)buf_bwrite(tail_bp
);
2889 buf_markinvalid(head_bp
);
2890 buf_brelse(head_bp
);
2892 (void) buf_invalidateblks(vp
, BUF_WRITE_DATA
, 0, 0);
2898 * Clone a file's data within the file.
2902 hfs_clonefile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
)
2914 filesize
= VTOF(vp
)->ff_blocks
* blksize
; /* virtual file size */
2915 writebase
= blkstart
* blksize
;
2916 copysize
= blkcnt
* blksize
;
2917 iosize
= bufsize
= MIN(copysize
, 4096 * 16);
2920 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
2923 hfs_unlock(VTOC(vp
));
2925 auio
= uio_create(1, 0, UIO_SYSSPACE32
, UIO_READ
);
2927 while (offset
< copysize
) {
2928 iosize
= MIN(copysize
- offset
, iosize
);
2930 uio_reset(auio
, offset
, UIO_SYSSPACE32
, UIO_READ
);
2931 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2933 error
= cluster_read(vp
, auio
, copysize
, 0);
2935 printf("hfs_clonefile: cluster_read failed - %d\n", error
);
2938 if (uio_resid(auio
) != 0) {
2939 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio
));
2944 uio_reset(auio
, writebase
+ offset
, UIO_SYSSPACE32
, UIO_WRITE
);
2945 uio_addiov(auio
, (uintptr_t)bufp
, iosize
);
2947 error
= cluster_write(vp
, auio
, filesize
+ offset
,
2948 filesize
+ offset
+ iosize
,
2949 uio_offset(auio
), 0, IO_NOCACHE
| IO_SYNC
);
2951 printf("hfs_clonefile: cluster_write failed - %d\n", error
);
2954 if (uio_resid(auio
) != 0) {
2955 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2964 * No need to call ubc_sync_range or hfs_invalbuf
2965 * since the file was copied using IO_NOCACHE.
2968 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
2970 hfs_lock(VTOC(vp
), HFS_FORCE_LOCK
);
2975 * Clone a system (metadata) file.
2979 hfs_clonesysfile(struct vnode
*vp
, int blkstart
, int blkcnt
, int blksize
,
2980 kauth_cred_t cred
, struct proc
*p
)
2986 struct buf
*bp
= NULL
;
2989 daddr64_t start_blk
;
2996 iosize
= GetLogicalBlockSize(vp
);
2997 bufsize
= MIN(blkcnt
* blksize
, 1024 * 1024) & ~(iosize
- 1);
2998 breadcnt
= bufsize
/ iosize
;
3000 if (kmem_alloc(kernel_map
, (vm_offset_t
*)&bufp
, bufsize
)) {
3003 start_blk
= ((daddr64_t
)blkstart
* blksize
) / iosize
;
3004 last_blk
= ((daddr64_t
)blkcnt
* blksize
) / iosize
;
3007 while (blkno
< last_blk
) {
3009 * Read up to a megabyte
3012 for (i
= 0, blk
= blkno
; (i
< breadcnt
) && (blk
< last_blk
); ++i
, ++blk
) {
3013 error
= (int)buf_meta_bread(vp
, blk
, iosize
, cred
, &bp
);
3015 printf("hfs_clonesysfile: meta_bread error %d\n", error
);
3018 if (buf_count(bp
) != iosize
) {
3019 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp
));
3022 bcopy((char *)buf_dataptr(bp
), offset
, iosize
);
3024 buf_markinvalid(bp
);
3032 * Write up to a megabyte
3035 for (i
= 0; (i
< breadcnt
) && (blkno
< last_blk
); ++i
, ++blkno
) {
3036 bp
= buf_getblk(vp
, start_blk
+ blkno
, iosize
, 0, 0, BLK_META
);
3038 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk
+ blkno
);
3042 bcopy(offset
, (char *)buf_dataptr(bp
), iosize
);
3043 error
= (int)buf_bwrite(bp
);
3055 kmem_free(kernel_map
, (vm_offset_t
)bufp
, bufsize
);
3057 error
= hfs_fsync(vp
, MNT_WAIT
, 0, p
);