]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_readwrite.c
xnu-1699.22.81.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
CommitLineData
1c79356b 1/*
6d2010ae 2 * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* @(#)hfs_readwrite.c 1.0
29 *
9bccf70c 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
1c79356b 31 *
1c79356b
A
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
1c79356b
A
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/resourcevar.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
55e303ae 41#include <sys/filedesc.h>
1c79356b
A
42#include <sys/stat.h>
43#include <sys/buf.h>
44#include <sys/proc.h>
91447636 45#include <sys/kauth.h>
1c79356b 46#include <sys/vnode.h>
2d21ac55 47#include <sys/vnode_internal.h>
1c79356b 48#include <sys/uio.h>
91447636 49#include <sys/vfs_context.h>
2d21ac55
A
50#include <sys/fsevents.h>
51#include <kern/kalloc.h>
8f6c56a5
A
52#include <sys/disk.h>
53#include <sys/sysctl.h>
b0d623f7 54#include <sys/fsctl.h>
1c79356b
A
55
56#include <miscfs/specfs/specdev.h>
57
1c79356b 58#include <sys/ubc.h>
2d21ac55
A
59#include <sys/ubc_internal.h>
60
1c79356b 61#include <vm/vm_pageout.h>
91447636 62#include <vm/vm_kern.h>
1c79356b 63
1c79356b
A
64#include <sys/kdebug.h>
65
66#include "hfs.h"
2d21ac55 67#include "hfs_attrlist.h"
1c79356b 68#include "hfs_endian.h"
2d21ac55 69#include "hfs_fsctl.h"
9bccf70c 70#include "hfs_quota.h"
1c79356b
A
71#include "hfscommon/headers/FileMgrInternal.h"
72#include "hfscommon/headers/BTreesInternal.h"
9bccf70c
A
73#include "hfs_cnode.h"
74#include "hfs_dbg.h"
1c79356b 75
1c79356b
A
76#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
77
78enum {
79 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
80};
81
935ed37a 82/* from bsd/hfs/hfs_vfsops.c */
b0d623f7 83extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
91447636
A
84
85static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
86static int hfs_clonefile(struct vnode *, int, int, int);
87static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
b0d623f7
A
88static int hfs_minorupdate(struct vnode *vp);
89static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
90
55e303ae 91
8f6c56a5 92int flush_cache_on_write = 0;
6d2010ae 93SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
8f6c56a5 94
91447636
A
95/*
96 * Read data from a file.
97 */
1c79356b 98int
91447636 99hfs_vnop_read(struct vnop_read_args *ap)
1c79356b 100{
91447636
A
101 uio_t uio = ap->a_uio;
102 struct vnode *vp = ap->a_vp;
9bccf70c
A
103 struct cnode *cp;
104 struct filefork *fp;
91447636
A
105 struct hfsmount *hfsmp;
106 off_t filesize;
107 off_t filebytes;
108 off_t start_resid = uio_resid(uio);
109 off_t offset = uio_offset(uio);
9bccf70c 110 int retval = 0;
6d2010ae 111 int took_truncate_lock = 0;
55e303ae 112
9bccf70c 113 /* Preflight checks */
91447636
A
114 if (!vnode_isreg(vp)) {
115 /* can only read regular files */
116 if (vnode_isdir(vp))
117 return (EISDIR);
118 else
119 return (EPERM);
120 }
121 if (start_resid == 0)
9bccf70c 122 return (0); /* Nothing left to do */
91447636 123 if (offset < 0)
9bccf70c 124 return (EINVAL); /* cant read from a negative offset */
b0d623f7
A
125
126#if HFS_COMPRESSION
127 if (VNODE_IS_RSRC(vp)) {
128 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
129 return 0;
130 }
131 /* otherwise read the resource fork normally */
132 } else {
133 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
134 if (compressed) {
135 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
136 if (compressed) {
137 if (retval == 0) {
138 /* successful read, update the access time */
139 VTOC(vp)->c_touch_acctime = TRUE;
140
141 /* compressed files are not hot file candidates */
142 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
143 VTOF(vp)->ff_bytesread = 0;
144 }
145 }
146 return retval;
147 }
148 /* otherwise the file was converted back to a regular file while we were reading it */
149 retval = 0;
6d2010ae
A
150 } else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
151 int error;
152
153 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
154 if (error) {
155 return error;
156 }
157
b0d623f7
A
158 }
159 }
160#endif /* HFS_COMPRESSION */
9bccf70c
A
161
162 cp = VTOC(vp);
163 fp = VTOF(vp);
91447636
A
164 hfsmp = VTOHFS(vp);
165
6d2010ae
A
166#if CONFIG_PROTECT
167 if ((retval = cp_handle_vnop (cp, CP_READ_ACCESS)) != 0) {
168 goto exit;
169 }
170#endif
171
91447636 172 /* Protect against a size change. */
6d2010ae
A
173 hfs_lock_truncate(cp, HFS_SHARED_LOCK);
174 took_truncate_lock = 1;
91447636 175
9bccf70c 176 filesize = fp->ff_size;
91447636
A
177 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
178 if (offset > filesize) {
179 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
180 (offset > (off_t)MAXHFSFILESIZE)) {
181 retval = EFBIG;
182 }
183 goto exit;
9bccf70c 184 }
1c79356b 185
9bccf70c 186 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
91447636 187 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 188
2d21ac55 189 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
1c79356b 190
91447636 191 cp->c_touch_acctime = TRUE;
1c79356b 192
9bccf70c 193 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
91447636 194 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 195
55e303ae
A
196 /*
197 * Keep track blocks read
198 */
2d21ac55 199 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
91447636
A
200 int took_cnode_lock = 0;
201 off_t bytesread;
202
203 bytesread = start_resid - uio_resid(uio);
204
205 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
206 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
207 hfs_lock(cp, HFS_FORCE_LOCK);
208 took_cnode_lock = 1;
209 }
55e303ae
A
210 /*
211 * If this file hasn't been seen since the start of
212 * the current sampling period then start over.
213 */
2d21ac55 214 if (cp->c_atime < hfsmp->hfc_timebase) {
91447636
A
215 struct timeval tv;
216
217 fp->ff_bytesread = bytesread;
218 microtime(&tv);
219 cp->c_atime = tv.tv_sec;
55e303ae 220 } else {
91447636 221 fp->ff_bytesread += bytesread;
55e303ae 222 }
91447636
A
223 if (took_cnode_lock)
224 hfs_unlock(cp);
55e303ae 225 }
91447636 226exit:
6d2010ae
A
227 if (took_truncate_lock) {
228 hfs_unlock_truncate(cp, 0);
229 }
230
9bccf70c 231 return (retval);
1c79356b
A
232}
233
234/*
91447636
A
235 * Write data to a file.
236 */
1c79356b 237int
91447636 238hfs_vnop_write(struct vnop_write_args *ap)
1c79356b 239{
91447636 240 uio_t uio = ap->a_uio;
9bccf70c 241 struct vnode *vp = ap->a_vp;
9bccf70c
A
242 struct cnode *cp;
243 struct filefork *fp;
91447636
A
244 struct hfsmount *hfsmp;
245 kauth_cred_t cred = NULL;
246 off_t origFileSize;
247 off_t writelimit;
2d21ac55 248 off_t bytesToAdd = 0;
55e303ae 249 off_t actualBytesAdded;
9bccf70c 250 off_t filebytes;
91447636 251 off_t offset;
b0d623f7 252 ssize_t resid;
91447636
A
253 int eflags;
254 int ioflag = ap->a_ioflag;
255 int retval = 0;
256 int lockflags;
257 int cnode_locked = 0;
2d21ac55 258 int partialwrite = 0;
6d2010ae
A
259 int do_snapshot = 1;
260 time_t orig_ctime=VTOC(vp)->c_ctime;
261 int took_truncate_lock = 0;
1c79356b 262
b0d623f7
A
263#if HFS_COMPRESSION
264 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
265 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
266 switch(state) {
267 case FILE_IS_COMPRESSED:
268 return EACCES;
269 case FILE_IS_CONVERTING:
6d2010ae
A
270 /* if FILE_IS_CONVERTING, we allow writes but do not
271 bother with snapshots or else we will deadlock.
272 */
273 do_snapshot = 0;
b0d623f7
A
274 break;
275 default:
276 printf("invalid state %d for compressed file\n", state);
277 /* fall through */
278 }
6d2010ae
A
279 } else if ((VTOC(vp)->c_flags & UF_COMPRESSED)) {
280 int error;
281
282 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
283 if (error != 0) {
284 return error;
285 }
b0d623f7 286 }
6d2010ae
A
287
288 if (do_snapshot) {
289 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
290 }
291
b0d623f7
A
292#endif
293
91447636
A
294 // LP64todo - fix this! uio_resid may be 64-bit value
295 resid = uio_resid(uio);
296 offset = uio_offset(uio);
1c79356b 297
91447636 298 if (offset < 0)
9bccf70c 299 return (EINVAL);
91447636 300 if (resid == 0)
9bccf70c 301 return (E_NONE);
91447636
A
302 if (!vnode_isreg(vp))
303 return (EPERM); /* Can only write regular files */
304
9bccf70c
A
305 cp = VTOC(vp);
306 fp = VTOF(vp);
91447636 307 hfsmp = VTOHFS(vp);
b4c24cb9 308
6d2010ae
A
309#if CONFIG_PROTECT
310 if ((retval = cp_handle_vnop (cp, CP_WRITE_ACCESS)) != 0) {
311 goto exit;
312 }
313#endif
314
9bccf70c 315 eflags = kEFDeferMask; /* defer file block allocations */
6d2010ae 316#if HFS_SPARSE_DEV
55e303ae
A
317 /*
318 * When the underlying device is sparse and space
319 * is low (< 8MB), stop doing delayed allocations
320 * and begin doing synchronous I/O.
321 */
322 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
323 (hfs_freeblks(hfsmp, 0) < 2048)) {
324 eflags &= ~kEFDeferMask;
325 ioflag |= IO_SYNC;
326 }
327#endif /* HFS_SPARSE_DEV */
328
2d21ac55
A
329again:
330 /* Protect against a size change. */
6d2010ae
A
331 if (ioflag & IO_APPEND) {
332 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
333 }
334 else {
335 hfs_lock_truncate(cp, HFS_SHARED_LOCK);
336 }
337 took_truncate_lock = 1;
91447636 338
6d2010ae 339 /* Update UIO */
2d21ac55
A
340 if (ioflag & IO_APPEND) {
341 uio_setoffset(uio, fp->ff_size);
342 offset = fp->ff_size;
343 }
344 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
345 retval = EPERM;
346 goto exit;
347 }
91447636 348
2d21ac55 349 origFileSize = fp->ff_size;
91447636 350 writelimit = offset + resid;
2d21ac55
A
351 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
352
353 /* If the truncate lock is shared, and if we either have virtual
354 * blocks or will need to extend the file, upgrade the truncate
355 * to exclusive lock. If upgrade fails, we lose the lock and
b0d623f7
A
356 * have to get exclusive lock again. Note that we want to
357 * grab the truncate lock exclusive even if we're not allocating new blocks
358 * because we could still be growing past the LEOF.
2d21ac55 359 */
6d2010ae 360 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
b0d623f7 361 ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
2d21ac55
A
362 /* Lock upgrade failed and we lost our shared lock, try again */
363 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
364 goto again;
365 }
6d2010ae
A
366 else {
367 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
368 cp->c_truncatelockowner = current_thread();
369 }
2d21ac55
A
370 }
371
372 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
373 goto exit;
374 }
375 cnode_locked = 1;
376
6d2010ae 377 if (cp->c_truncatelockowner == HFS_SHARED_OWNER) {
2d21ac55
A
378 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
379 (int)offset, uio_resid(uio), (int)fp->ff_size,
380 (int)filebytes, 0);
381 }
382
383 /* Check if we do not need to extend the file */
384 if (writelimit <= filebytes) {
91447636 385 goto sizeok;
2d21ac55 386 }
91447636
A
387
388 cred = vfs_context_ucred(ap->a_context);
91447636 389 bytesToAdd = writelimit - filebytes;
2d21ac55
A
390
391#if QUOTA
91447636
A
392 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
393 cred, 0);
394 if (retval)
395 goto exit;
396#endif /* QUOTA */
397
398 if (hfs_start_transaction(hfsmp) != 0) {
399 retval = EINVAL;
400 goto exit;
b4c24cb9
A
401 }
402
9bccf70c 403 while (writelimit > filebytes) {
9bccf70c 404 bytesToAdd = writelimit - filebytes;
91447636 405 if (cred && suser(cred, NULL) != 0)
9bccf70c
A
406 eflags |= kEFReserveMask;
407
91447636
A
408 /* Protect extents b-tree and allocation bitmap */
409 lockflags = SFL_BITMAP;
410 if (overflow_extents(fp))
411 lockflags |= SFL_EXTENTS;
412 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae
A
413
414 /* Files that are changing size are not hot file candidates. */
415 if (hfsmp->hfc_stage == HFC_RECORDING) {
416 fp->ff_bytesread = 0;
417 }
91447636 418 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
9bccf70c
A
419 0, eflags, &actualBytesAdded));
420
91447636
A
421 hfs_systemfile_unlock(hfsmp, lockflags);
422
9bccf70c
A
423 if ((actualBytesAdded == 0) && (retval == E_NONE))
424 retval = ENOSPC;
425 if (retval != E_NONE)
426 break;
91447636 427 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
9bccf70c 428 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
91447636 429 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
b4c24cb9 430 }
91447636
A
431 (void) hfs_update(vp, TRUE);
432 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
433 (void) hfs_end_transaction(hfsmp);
b4c24cb9 434
2d21ac55
A
435 /*
436 * If we didn't grow the file enough try a partial write.
437 * POSIX expects this behavior.
438 */
439 if ((retval == ENOSPC) && (filebytes > offset)) {
440 retval = 0;
441 partialwrite = 1;
442 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
443 resid -= bytesToAdd;
444 writelimit = filebytes;
445 }
91447636 446sizeok:
55e303ae 447 if (retval == E_NONE) {
0b4e3aa0
A
448 off_t filesize;
449 off_t zero_off;
450 off_t tail_off;
451 off_t inval_start;
452 off_t inval_end;
91447636 453 off_t io_start;
0b4e3aa0
A
454 int lflag;
455 struct rl_entry *invalid_range;
456
9bccf70c 457 if (writelimit > fp->ff_size)
0b4e3aa0
A
458 filesize = writelimit;
459 else
9bccf70c 460 filesize = fp->ff_size;
1c79356b 461
2d21ac55 462 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
1c79356b 463
91447636
A
464 if (offset <= fp->ff_size) {
465 zero_off = offset & ~PAGE_MASK_64;
0b4e3aa0
A
466
467 /* Check to see whether the area between the zero_offset and the start
468 of the transfer to see whether is invalid and should be zero-filled
469 as part of the transfer:
470 */
91447636
A
471 if (offset > zero_off) {
472 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
55e303ae
A
473 lflag |= IO_HEADZEROFILL;
474 }
0b4e3aa0 475 } else {
9bccf70c 476 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
0b4e3aa0 477
9bccf70c 478 /* The bytes between fp->ff_size and uio->uio_offset must never be
0b4e3aa0
A
479 read without being zeroed. The current last block is filled with zeroes
480 if it holds valid data but in all cases merely do a little bookkeeping
481 to track the area from the end of the current last page to the start of
482 the area actually written. For the same reason only the bytes up to the
483 start of the page where this write will start is invalidated; any remainder
484 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
485
486 Note that inval_start, the start of the page after the current EOF,
487 may be past the start of the write, in which case the zeroing
488 will be handled by the cluser_write of the actual data.
489 */
9bccf70c 490 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
91447636 491 inval_end = offset & ~PAGE_MASK_64;
9bccf70c 492 zero_off = fp->ff_size;
0b4e3aa0 493
9bccf70c
A
494 if ((fp->ff_size & PAGE_MASK_64) &&
495 (rl_scan(&fp->ff_invalidranges,
0b4e3aa0 496 eof_page_base,
9bccf70c 497 fp->ff_size - 1,
0b4e3aa0
A
498 &invalid_range) != RL_NOOVERLAP)) {
499 /* The page containing the EOF is not valid, so the
500 entire page must be made inaccessible now. If the write
501 starts on a page beyond the page containing the eof
502 (inval_end > eof_page_base), add the
503 whole page to the range to be invalidated. Otherwise
504 (i.e. if the write starts on the same page), zero-fill
505 the entire page explicitly now:
506 */
507 if (inval_end > eof_page_base) {
508 inval_start = eof_page_base;
509 } else {
510 zero_off = eof_page_base;
511 };
512 };
513
514 if (inval_start < inval_end) {
91447636 515 struct timeval tv;
0b4e3aa0
A
516 /* There's some range of data that's going to be marked invalid */
517
518 if (zero_off < inval_start) {
519 /* The pages between inval_start and inval_end are going to be invalidated,
520 and the actual write will start on a page past inval_end. Now's the last
521 chance to zero-fill the page containing the EOF:
522 */
91447636
A
523 hfs_unlock(cp);
524 cnode_locked = 0;
525 retval = cluster_write(vp, (uio_t) 0,
9bccf70c 526 fp->ff_size, inval_start,
91447636 527 zero_off, (off_t)0,
9bccf70c 528 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
91447636
A
529 hfs_lock(cp, HFS_FORCE_LOCK);
530 cnode_locked = 1;
0b4e3aa0 531 if (retval) goto ioerr_exit;
91447636 532 offset = uio_offset(uio);
0b4e3aa0
A
533 };
534
535 /* Mark the remaining area of the newly allocated space as invalid: */
9bccf70c 536 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
91447636
A
537 microuptime(&tv);
538 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c 539 zero_off = fp->ff_size = inval_end;
0b4e3aa0
A
540 };
541
91447636 542 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
0b4e3aa0 543 };
1c79356b 544
0b4e3aa0
A
545 /* Check to see whether the area between the end of the write and the end of
546 the page it falls in is invalid and should be zero-filled as part of the transfer:
547 */
548 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
549 if (tail_off > filesize) tail_off = filesize;
550 if (tail_off > writelimit) {
9bccf70c 551 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
0b4e3aa0
A
552 lflag |= IO_TAILZEROFILL;
553 };
554 };
555
556 /*
557 * if the write starts beyond the current EOF (possibly advanced in the
558 * zeroing of the last block, above), then we'll zero fill from the current EOF
559 * to where the write begins:
560 *
561 * NOTE: If (and ONLY if) the portion of the file about to be written is
562 * before the current EOF it might be marked as invalid now and must be
563 * made readable (removed from the invalid ranges) before cluster_write
564 * tries to write it:
565 */
91447636 566 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
9bccf70c 567 if (io_start < fp->ff_size) {
91447636
A
568 off_t io_end;
569
570 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
9bccf70c 571 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
0b4e3aa0 572 };
91447636
A
573
574 hfs_unlock(cp);
575 cnode_locked = 0;
593a1d5f
A
576
577 /*
578 * We need to tell UBC the fork's new size BEFORE calling
579 * cluster_write, in case any of the new pages need to be
580 * paged out before cluster_write completes (which does happen
581 * in embedded systems due to extreme memory pressure).
582 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
583 * will be, so that it can pass that on to cluster_pageout, and
584 * allow those pageouts.
585 *
586 * We don't update ff_size yet since we don't want pageins to
587 * be able to see uninitialized data between the old and new
588 * EOF, until cluster_write has completed and initialized that
589 * part of the file.
590 *
591 * The vnode pager relies on the file size last given to UBC via
592 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
593 * ff_size (whichever is larger). NOTE: ff_new_size is always
594 * zero, unless we are extending the file via write.
595 */
596 if (filesize > fp->ff_size) {
597 fp->ff_new_size = filesize;
598 ubc_setsize(vp, filesize);
599 }
9bccf70c 600 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
91447636 601 tail_off, lflag | IO_NOZERODIRTY);
2d21ac55 602 if (retval) {
593a1d5f
A
603 fp->ff_new_size = 0; /* no longer extending; use ff_size */
604 if (filesize > origFileSize) {
605 ubc_setsize(vp, origFileSize);
606 }
2d21ac55
A
607 goto ioerr_exit;
608 }
593a1d5f
A
609
610 if (filesize > origFileSize) {
611 fp->ff_size = filesize;
612
91447636 613 /* Files that are changing size are not hot file candidates. */
593a1d5f 614 if (hfsmp->hfc_stage == HFC_RECORDING) {
91447636 615 fp->ff_bytesread = 0;
593a1d5f 616 }
91447636 617 }
593a1d5f
A
618 fp->ff_new_size = 0; /* ff_size now has the correct size */
619
620 /* If we wrote some bytes, then touch the change and mod times */
91447636
A
621 if (resid > uio_resid(uio)) {
622 cp->c_touch_chgtime = TRUE;
623 cp->c_touch_modtime = TRUE;
0b4e3aa0 624 }
9bccf70c 625 }
2d21ac55
A
626 if (partialwrite) {
627 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
628 resid += bytesToAdd;
629 }
8f6c56a5 630
2d21ac55 631 // XXXdbg - see radar 4871353 for more info
8f6c56a5
A
632 {
633 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
634 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
635 }
636 }
55e303ae 637
0b4e3aa0 638ioerr_exit:
9bccf70c 639 /*
0b4e3aa0 640 * If we successfully wrote any data, and we are not the superuser
9bccf70c
A
641 * we clear the setuid and setgid bits as a precaution against
642 * tampering.
643 */
91447636
A
644 if (cp->c_mode & (S_ISUID | S_ISGID)) {
645 cred = vfs_context_ucred(ap->a_context);
646 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
647 if (!cnode_locked) {
648 hfs_lock(cp, HFS_FORCE_LOCK);
649 cnode_locked = 1;
650 }
651 cp->c_mode &= ~(S_ISUID | S_ISGID);
652 }
653 }
9bccf70c
A
654 if (retval) {
655 if (ioflag & IO_UNIT) {
91447636
A
656 if (!cnode_locked) {
657 hfs_lock(cp, HFS_FORCE_LOCK);
658 cnode_locked = 1;
659 }
660 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
b0d623f7 661 0, 0, ap->a_context);
91447636
A
662 // LP64todo - fix this! resid needs to by user_ssize_t
663 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
664 uio_setresid(uio, resid);
665 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
666 }
667 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
668 if (!cnode_locked) {
669 hfs_lock(cp, HFS_FORCE_LOCK);
670 cnode_locked = 1;
9bccf70c 671 }
91447636 672 retval = hfs_update(vp, TRUE);
9bccf70c 673 }
91447636
A
674 /* Updating vcbWrCnt doesn't need to be atomic. */
675 hfsmp->vcbWrCnt++;
1c79356b 676
9bccf70c 677 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
91447636
A
678 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
679exit:
680 if (cnode_locked)
681 hfs_unlock(cp);
6d2010ae
A
682
683 if (took_truncate_lock) {
684 hfs_unlock_truncate(cp, 0);
685 }
9bccf70c 686 return (retval);
1c79356b
A
687}
688
91447636 689/* support for the "bulk-access" fcntl */
1c79356b 690
91447636 691#define CACHE_LEVELS 16
2d21ac55 692#define NUM_CACHE_ENTRIES (64*16)
91447636
A
693#define PARENT_IDS_FLAG 0x100
694
91447636
A
695struct access_cache {
696 int numcached;
697 int cachehits; /* these two for statistics gathering */
698 int lookups;
699 unsigned int *acache;
2d21ac55 700 unsigned char *haveaccess;
55e303ae
A
701};
702
91447636
A
703struct access_t {
704 uid_t uid; /* IN: effective user id */
705 short flags; /* IN: access requested (i.e. R_OK) */
706 short num_groups; /* IN: number of groups user belongs to */
707 int num_files; /* IN: number of files to process */
708 int *file_ids; /* IN: array of file ids */
709 gid_t *groups; /* IN: array of groups */
710 short *access; /* OUT: access info for each file (0 for 'has access') */
b0d623f7
A
711} __attribute__((unavailable)); // this structure is for reference purposes only
712
713struct user32_access_t {
714 uid_t uid; /* IN: effective user id */
715 short flags; /* IN: access requested (i.e. R_OK) */
716 short num_groups; /* IN: number of groups user belongs to */
717 int num_files; /* IN: number of files to process */
718 user32_addr_t file_ids; /* IN: array of file ids */
719 user32_addr_t groups; /* IN: array of groups */
720 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 721};
55e303ae 722
b0d623f7 723struct user64_access_t {
91447636
A
724 uid_t uid; /* IN: effective user id */
725 short flags; /* IN: access requested (i.e. R_OK) */
726 short num_groups; /* IN: number of groups user belongs to */
2d21ac55 727 int num_files; /* IN: number of files to process */
b0d623f7
A
728 user64_addr_t file_ids; /* IN: array of file ids */
729 user64_addr_t groups; /* IN: array of groups */
730 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 731};
55e303ae 732
2d21ac55
A
733
734// these are the "extended" versions of the above structures
735// note that it is crucial that they be different sized than
736// the regular version
737struct ext_access_t {
738 uint32_t flags; /* IN: access requested (i.e. R_OK) */
739 uint32_t num_files; /* IN: number of files to process */
740 uint32_t map_size; /* IN: size of the bit map */
741 uint32_t *file_ids; /* IN: Array of file ids */
742 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
743 short *access; /* OUT: access info for each file (0 for 'has access') */
744 uint32_t num_parents; /* future use */
745 cnid_t *parents; /* future use */
b0d623f7
A
746} __attribute__((unavailable)); // this structure is for reference purposes only
747
748struct user32_ext_access_t {
749 uint32_t flags; /* IN: access requested (i.e. R_OK) */
750 uint32_t num_files; /* IN: number of files to process */
751 uint32_t map_size; /* IN: size of the bit map */
752 user32_addr_t file_ids; /* IN: Array of file ids */
753 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
754 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
755 uint32_t num_parents; /* future use */
756 user32_addr_t parents; /* future use */
2d21ac55
A
757};
758
b0d623f7 759struct user64_ext_access_t {
2d21ac55
A
760 uint32_t flags; /* IN: access requested (i.e. R_OK) */
761 uint32_t num_files; /* IN: number of files to process */
762 uint32_t map_size; /* IN: size of the bit map */
b0d623f7
A
763 user64_addr_t file_ids; /* IN: array of file ids */
764 user64_addr_t bitmap; /* IN: array of groups */
765 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
2d21ac55 766 uint32_t num_parents;/* future use */
b0d623f7 767 user64_addr_t parents;/* future use */
2d21ac55
A
768};
769
770
91447636
A
771/*
772 * Perform a binary search for the given parent_id. Return value is
2d21ac55
A
773 * the index if there is a match. If no_match_indexp is non-NULL it
774 * will be assigned with the index to insert the item (even if it was
775 * not found).
91447636 776 */
2d21ac55 777static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
91447636 778{
2d21ac55
A
779 int index=-1;
780 unsigned int lo=0;
91447636 781
2d21ac55
A
782 do {
783 unsigned int mid = ((hi - lo)/2) + lo;
784 unsigned int this_id = array[mid];
785
786 if (parent_id == this_id) {
787 hi = mid;
788 break;
91447636 789 }
2d21ac55
A
790
791 if (parent_id < this_id) {
792 hi = mid;
793 continue;
91447636 794 }
2d21ac55
A
795
796 if (parent_id > this_id) {
797 lo = mid + 1;
798 continue;
799 }
800 } while(lo < hi);
801
802 /* check if lo and hi converged on the match */
803 if (parent_id == array[hi]) {
804 index = hi;
805 }
91447636 806
2d21ac55
A
807 if (no_match_indexp) {
808 *no_match_indexp = hi;
809 }
810
811 return index;
812}
813
814
815static int
816lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
817{
818 unsigned int hi;
819 int matches = 0;
820 int index, no_match_index;
91447636 821
2d21ac55
A
822 if (cache->numcached == 0) {
823 *indexp = 0;
824 return 0; // table is empty, so insert at index=0 and report no match
825 }
91447636 826
2d21ac55 827 if (cache->numcached > NUM_CACHE_ENTRIES) {
b0d623f7 828 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
2d21ac55
A
829 cache->numcached, NUM_CACHE_ENTRIES);*/
830 cache->numcached = NUM_CACHE_ENTRIES;
831 }
91447636 832
2d21ac55 833 hi = cache->numcached - 1;
91447636 834
2d21ac55
A
835 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
836
837 /* if no existing entry found, find index for new one */
838 if (index == -1) {
839 index = no_match_index;
840 matches = 0;
841 } else {
842 matches = 1;
843 }
844
845 *indexp = index;
846 return matches;
91447636
A
847}
848
849/*
850 * Add a node to the access_cache at the given index (or do a lookup first
851 * to find the index if -1 is passed in). We currently do a replace rather
852 * than an insert if the cache is full.
853 */
854static void
855add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
856{
2d21ac55
A
857 int lookup_index = -1;
858
859 /* need to do a lookup first if -1 passed for index */
860 if (index == -1) {
861 if (lookup_bucket(cache, &lookup_index, nodeID)) {
862 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
863 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
864 cache->haveaccess[lookup_index] = access;
865 }
866
867 /* mission accomplished */
868 return;
869 } else {
870 index = lookup_index;
871 }
872
873 }
874
875 /* if the cache is full, do a replace rather than an insert */
876 if (cache->numcached >= NUM_CACHE_ENTRIES) {
b0d623f7 877 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
2d21ac55
A
878 cache->numcached = NUM_CACHE_ENTRIES-1;
879
880 if (index > cache->numcached) {
b0d623f7 881 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
2d21ac55
A
882 index = cache->numcached;
883 }
884 }
885
886 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
887 index++;
888 }
889
890 if (index >= 0 && index < cache->numcached) {
891 /* only do bcopy if we're inserting */
892 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
893 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
894 }
895
896 cache->acache[index] = nodeID;
897 cache->haveaccess[index] = access;
898 cache->numcached++;
91447636
A
899}
900
901
902struct cinfo {
2d21ac55
A
903 uid_t uid;
904 gid_t gid;
905 mode_t mode;
906 cnid_t parentcnid;
907 u_int16_t recflags;
91447636
A
908};
909
910static int
911snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
912{
2d21ac55 913 struct cinfo *cip = (struct cinfo *)arg;
91447636 914
2d21ac55
A
915 cip->uid = attrp->ca_uid;
916 cip->gid = attrp->ca_gid;
917 cip->mode = attrp->ca_mode;
918 cip->parentcnid = descp->cd_parentcnid;
919 cip->recflags = attrp->ca_recflags;
91447636 920
2d21ac55 921 return (0);
91447636
A
922}
923
924/*
925 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
926 * isn't incore, then go to the catalog.
927 */
928static int
b0d623f7 929do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
2d21ac55 930 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
91447636 931{
2d21ac55
A
932 int error = 0;
933
934 /* if this id matches the one the fsctl was called with, skip the lookup */
935 if (cnid == skip_cp->c_cnid) {
936 cnattrp->ca_uid = skip_cp->c_uid;
937 cnattrp->ca_gid = skip_cp->c_gid;
938 cnattrp->ca_mode = skip_cp->c_mode;
b0d623f7 939 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
2d21ac55
A
940 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
941 } else {
942 struct cinfo c_info;
943
944 /* otherwise, check the cnode hash incase the file/dir is incore */
b0d623f7 945 if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
2d21ac55
A
946 cnattrp->ca_uid = c_info.uid;
947 cnattrp->ca_gid = c_info.gid;
948 cnattrp->ca_mode = c_info.mode;
949 cnattrp->ca_recflags = c_info.recflags;
950 keyp->hfsPlus.parentID = c_info.parentcnid;
91447636 951 } else {
2d21ac55 952 int lockflags;
91447636 953
2d21ac55 954 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
91447636 955
2d21ac55
A
956 /* lookup this cnid in the catalog */
957 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
91447636 958
2d21ac55 959 hfs_systemfile_unlock(hfsmp, lockflags);
91447636 960
2d21ac55 961 cache->lookups++;
91447636 962 }
2d21ac55 963 }
91447636 964
2d21ac55 965 return (error);
91447636 966}
55e303ae 967
2d21ac55 968
1c79356b 969/*
91447636
A
970 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
971 * up to CACHE_LEVELS as we progress towards the root.
972 */
973static int
974do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
b0d623f7 975 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
2d21ac55
A
976 struct vfs_context *my_context,
977 char *bitmap,
978 uint32_t map_size,
979 cnid_t* parents,
980 uint32_t num_parents)
91447636 981{
2d21ac55
A
982 int myErr = 0;
983 int myResult;
984 HFSCatalogNodeID thisNodeID;
985 unsigned int myPerms;
986 struct cat_attr cnattr;
987 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
988 CatalogKey catkey;
989
990 int i = 0, ids_to_cache = 0;
991 int parent_ids[CACHE_LEVELS];
992
993 thisNodeID = nodeID;
994 while (thisNodeID >= kRootDirID) {
995 myResult = 0; /* default to "no access" */
91447636 996
2d21ac55
A
997 /* check the cache before resorting to hitting the catalog */
998
999 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1000 * to look any further after hitting cached dir */
1001
1002 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1003 cache->cachehits++;
1004 myErr = cache->haveaccess[cache_index];
1005 if (scope_index != -1) {
1006 if (myErr == ESRCH) {
1007 myErr = 0;
1008 }
1009 } else {
1010 scope_index = 0; // so we'll just use the cache result
1011 scope_idx_start = ids_to_cache;
1012 }
1013 myResult = (myErr == 0) ? 1 : 0;
1014 goto ExitThisRoutine;
1015 }
1016
1017
1018 if (parents) {
1019 int tmp;
1020 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1021 if (scope_index == -1)
1022 scope_index = tmp;
1023 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1024 scope_idx_start = ids_to_cache;
1025 }
1026 }
1027
1028 /* remember which parents we want to cache */
1029 if (ids_to_cache < CACHE_LEVELS) {
1030 parent_ids[ids_to_cache] = thisNodeID;
1031 ids_to_cache++;
1032 }
1033 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1034 if (bitmap && map_size) {
1035 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1036 }
1037
1038
1039 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 1040 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
2d21ac55
A
1041 if (myErr) {
1042 goto ExitThisRoutine; /* no access */
1043 }
1044
1045 /* Root always gets access. */
1046 if (suser(myp_ucred, NULL) == 0) {
1047 thisNodeID = catkey.hfsPlus.parentID;
1048 myResult = 1;
1049 continue;
1050 }
1051
1052 // if the thing has acl's, do the full permission check
1053 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1054 struct vnode *vp;
1055
1056 /* get the vnode for this cnid */
6d2010ae 1057 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
2d21ac55
A
1058 if ( myErr ) {
1059 myResult = 0;
1060 goto ExitThisRoutine;
1061 }
1062
1063 thisNodeID = VTOC(vp)->c_parentcnid;
1064
1065 hfs_unlock(VTOC(vp));
1066
1067 if (vnode_vtype(vp) == VDIR) {
1068 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1069 } else {
1070 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1071 }
1072
1073 vnode_put(vp);
1074 if (myErr) {
1075 myResult = 0;
1076 goto ExitThisRoutine;
1077 }
1078 } else {
1079 unsigned int flags;
6d2010ae
A
1080 int mode = cnattr.ca_mode & S_IFMT;
1081 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
2d21ac55 1082
6d2010ae
A
1083 if (mode == S_IFDIR) {
1084 flags = R_OK | X_OK;
1085 } else {
1086 flags = R_OK;
1087 }
1088 if ( (myPerms & flags) != flags) {
1089 myResult = 0;
1090 myErr = EACCES;
1091 goto ExitThisRoutine; /* no access */
1092 }
2d21ac55
A
1093
1094 /* up the hierarchy we go */
1095 thisNodeID = catkey.hfsPlus.parentID;
1096 }
1097 }
1098
1099 /* if here, we have access to this node */
1100 myResult = 1;
1101
1102 ExitThisRoutine:
1103 if (parents && myErr == 0 && scope_index == -1) {
1104 myErr = ESRCH;
1105 }
1106
1107 if (myErr) {
1108 myResult = 0;
1109 }
1110 *err = myErr;
1111
1112 /* cache the parent directory(ies) */
1113 for (i = 0; i < ids_to_cache; i++) {
1114 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1115 add_node(cache, -1, parent_ids[i], ESRCH);
1116 } else {
1117 add_node(cache, -1, parent_ids[i], myErr);
1118 }
1119 }
1120
1121 return (myResult);
91447636 1122}
1c79356b 1123
2d21ac55
A
1124static int
1125do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1126 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1127{
1128 boolean_t is64bit;
1129
1130 /*
1131 * NOTE: on entry, the vnode is locked. Incase this vnode
1132 * happens to be in our list of file_ids, we'll note it
1133 * avoid calling hfs_chashget_nowait() on that id as that
1134 * will cause a "locking against myself" panic.
1135 */
1136 Boolean check_leaf = true;
1137
b0d623f7
A
1138 struct user64_ext_access_t *user_access_structp;
1139 struct user64_ext_access_t tmp_user_access;
2d21ac55
A
1140 struct access_cache cache;
1141
b0d623f7 1142 int error = 0, prev_parent_check_ok=1;
2d21ac55
A
1143 unsigned int i;
1144
2d21ac55
A
1145 short flags;
1146 unsigned int num_files = 0;
1147 int map_size = 0;
1148 int num_parents = 0;
1149 int *file_ids=NULL;
1150 short *access=NULL;
1151 char *bitmap=NULL;
1152 cnid_t *parents=NULL;
1153 int leaf_index;
1154
1155 cnid_t cnid;
1156 cnid_t prevParent_cnid = 0;
1157 unsigned int myPerms;
1158 short myaccess = 0;
1159 struct cat_attr cnattr;
1160 CatalogKey catkey;
1161 struct cnode *skip_cp = VTOC(vp);
1162 kauth_cred_t cred = vfs_context_ucred(context);
1163 proc_t p = vfs_context_proc(context);
1164
1165 is64bit = proc_is64bit(p);
1166
1167 /* initialize the local cache and buffers */
1168 cache.numcached = 0;
1169 cache.cachehits = 0;
1170 cache.lookups = 0;
1171 cache.acache = NULL;
1172 cache.haveaccess = NULL;
1173
1174 /* struct copyin done during dispatch... need to copy file_id array separately */
1175 if (ap->a_data == NULL) {
1176 error = EINVAL;
1177 goto err_exit_bulk_access;
1178 }
1179
1180 if (is64bit) {
b0d623f7 1181 if (arg_size != sizeof(struct user64_ext_access_t)) {
2d21ac55
A
1182 error = EINVAL;
1183 goto err_exit_bulk_access;
1184 }
1185
b0d623f7 1186 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
2d21ac55 1187
b0d623f7
A
1188 } else if (arg_size == sizeof(struct user32_access_t)) {
1189 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
2d21ac55
A
1190
1191 // convert an old style bulk-access struct to the new style
1192 tmp_user_access.flags = accessp->flags;
1193 tmp_user_access.num_files = accessp->num_files;
1194 tmp_user_access.map_size = 0;
1195 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
cf7d32b8 1196 tmp_user_access.bitmap = USER_ADDR_NULL;
2d21ac55
A
1197 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1198 tmp_user_access.num_parents = 0;
1199 user_access_structp = &tmp_user_access;
1200
b0d623f7
A
1201 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1202 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
2d21ac55
A
1203
1204 // up-cast from a 32-bit version of the struct
1205 tmp_user_access.flags = accessp->flags;
1206 tmp_user_access.num_files = accessp->num_files;
1207 tmp_user_access.map_size = accessp->map_size;
1208 tmp_user_access.num_parents = accessp->num_parents;
1209
1210 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1211 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1212 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1213 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1214
1215 user_access_structp = &tmp_user_access;
1216 } else {
1217 error = EINVAL;
1218 goto err_exit_bulk_access;
1219 }
1220
1221 map_size = user_access_structp->map_size;
1222
1223 num_files = user_access_structp->num_files;
1224
1225 num_parents= user_access_structp->num_parents;
1226
1227 if (num_files < 1) {
1228 goto err_exit_bulk_access;
1229 }
1230 if (num_files > 1024) {
1231 error = EINVAL;
1232 goto err_exit_bulk_access;
1233 }
1234
1235 if (num_parents > 1024) {
1236 error = EINVAL;
1237 goto err_exit_bulk_access;
1238 }
1239
1240 file_ids = (int *) kalloc(sizeof(int) * num_files);
1241 access = (short *) kalloc(sizeof(short) * num_files);
1242 if (map_size) {
1243 bitmap = (char *) kalloc(sizeof(char) * map_size);
1244 }
1245
1246 if (num_parents) {
1247 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1248 }
1249
1250 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1251 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1252
1253 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1254 if (file_ids) {
1255 kfree(file_ids, sizeof(int) * num_files);
1256 }
1257 if (bitmap) {
1258 kfree(bitmap, sizeof(char) * map_size);
1259 }
1260 if (access) {
1261 kfree(access, sizeof(short) * num_files);
1262 }
1263 if (cache.acache) {
1264 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1265 }
1266 if (cache.haveaccess) {
1267 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1268 }
1269 if (parents) {
1270 kfree(parents, sizeof(cnid_t) * num_parents);
1271 }
1272 return ENOMEM;
1273 }
1274
1275 // make sure the bitmap is zero'ed out...
1276 if (bitmap) {
1277 bzero(bitmap, (sizeof(char) * map_size));
1278 }
1279
1280 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1281 num_files * sizeof(int)))) {
1282 goto err_exit_bulk_access;
1283 }
1284
1285 if (num_parents) {
1286 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1287 num_parents * sizeof(cnid_t)))) {
1288 goto err_exit_bulk_access;
1289 }
1290 }
1291
1292 flags = user_access_structp->flags;
1293 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1294 flags = R_OK;
1295 }
1296
1297 /* check if we've been passed leaf node ids or parent ids */
1298 if (flags & PARENT_IDS_FLAG) {
1299 check_leaf = false;
1300 }
1301
1302 /* Check access to each file_id passed in */
1303 for (i = 0; i < num_files; i++) {
1304 leaf_index=-1;
1305 cnid = (cnid_t) file_ids[i];
1306
1307 /* root always has access */
1308 if ((!parents) && (!suser(cred, NULL))) {
1309 access[i] = 0;
1310 continue;
1311 }
1312
1313 if (check_leaf) {
1314 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 1315 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
2d21ac55
A
1316 if (error) {
1317 access[i] = (short) error;
1318 continue;
1319 }
1320
1321 if (parents) {
1322 // Check if the leaf matches one of the parent scopes
1323 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
b0d623f7
A
1324 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1325 prev_parent_check_ok = 0;
1326 else if (leaf_index >= 0)
1327 prev_parent_check_ok = 1;
2d21ac55
A
1328 }
1329
1330 // if the thing has acl's, do the full permission check
1331 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1332 struct vnode *cvp;
1333 int myErr = 0;
1334 /* get the vnode for this cnid */
6d2010ae 1335 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
2d21ac55
A
1336 if ( myErr ) {
1337 access[i] = myErr;
1338 continue;
1339 }
1340
1341 hfs_unlock(VTOC(cvp));
1342
1343 if (vnode_vtype(cvp) == VDIR) {
1344 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1345 } else {
1346 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1347 }
1348
1349 vnode_put(cvp);
1350 if (myErr) {
1351 access[i] = myErr;
1352 continue;
1353 }
1354 } else {
1355 /* before calling CheckAccess(), check the target file for read access */
1356 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1357 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1358
1359 /* fail fast if no access */
1360 if ((myPerms & flags) == 0) {
1361 access[i] = EACCES;
1362 continue;
1363 }
1364 }
1365 } else {
1366 /* we were passed an array of parent ids */
1367 catkey.hfsPlus.parentID = cnid;
1368 }
1369
1370 /* if the last guy had the same parent and had access, we're done */
b0d623f7 1371 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
2d21ac55
A
1372 cache.cachehits++;
1373 access[i] = 0;
1374 continue;
1375 }
1376
1377 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
b0d623f7 1378 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
2d21ac55
A
1379
1380 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1381 access[i] = 0; // have access.. no errors to report
1382 } else {
1383 access[i] = (error != 0 ? (short) error : EACCES);
1384 }
1385
1386 prevParent_cnid = catkey.hfsPlus.parentID;
1387 }
1388
1389 /* copyout the access array */
1390 if ((error = copyout((caddr_t)access, user_access_structp->access,
1391 num_files * sizeof (short)))) {
1392 goto err_exit_bulk_access;
1393 }
1394 if (map_size && bitmap) {
1395 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1396 map_size * sizeof (char)))) {
1397 goto err_exit_bulk_access;
1398 }
1399 }
1400
1401
1402 err_exit_bulk_access:
1403
b0d623f7 1404 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
2d21ac55
A
1405
1406 if (file_ids)
1407 kfree(file_ids, sizeof(int) * num_files);
1408 if (parents)
1409 kfree(parents, sizeof(cnid_t) * num_parents);
1410 if (bitmap)
1411 kfree(bitmap, sizeof(char) * map_size);
1412 if (access)
1413 kfree(access, sizeof(short) * num_files);
1414 if (cache.acache)
1415 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1416 if (cache.haveaccess)
1417 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1418
1419 return (error);
1420}
1421
1422
1423/* end "bulk-access" support */
1c79356b 1424
1c79356b 1425
91447636
A
1426/*
1427 * Callback for use with freeze ioctl.
1428 */
1429static int
2d21ac55 1430hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
91447636
A
1431{
1432 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1433
1434 return 0;
1435}
1c79356b 1436
91447636
A
1437/*
1438 * Control filesystem operating characteristics.
1439 */
1c79356b 1440int
91447636
A
1441hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1442 vnode_t a_vp;
9bccf70c
A
1443 int a_command;
1444 caddr_t a_data;
1445 int a_fflag;
91447636
A
1446 vfs_context_t a_context;
1447 } */ *ap)
1c79356b 1448{
91447636
A
1449 struct vnode * vp = ap->a_vp;
1450 struct hfsmount *hfsmp = VTOHFS(vp);
1451 vfs_context_t context = ap->a_context;
1452 kauth_cred_t cred = vfs_context_ucred(context);
1453 proc_t p = vfs_context_proc(context);
1454 struct vfsstatfs *vfsp;
1455 boolean_t is64bit;
b0d623f7
A
1456 off_t jnl_start, jnl_size;
1457 struct hfs_journal_info *jip;
1458#if HFS_COMPRESSION
1459 int compressed = 0;
1460 off_t uncompressed_size = -1;
1461 int decmpfs_error = 0;
1462
1463 if (ap->a_command == F_RDADVISE) {
1464 /* we need to inspect the decmpfs state of the file as early as possible */
1465 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1466 if (compressed) {
1467 if (VNODE_IS_RSRC(vp)) {
1468 /* if this is the resource fork, treat it as if it were empty */
1469 uncompressed_size = 0;
1470 } else {
1471 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1472 if (decmpfs_error != 0) {
1473 /* failed to get the uncompressed size, we'll check for this later */
1474 uncompressed_size = -1;
1475 }
1476 }
1477 }
1478 }
1479#endif /* HFS_COMPRESSION */
91447636
A
1480
1481 is64bit = proc_is64bit(p);
1482
6d2010ae
A
1483#if CONFIG_PROTECT
1484 {
1485 int error = 0;
1486 if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
1487 return error;
1488 }
1489 }
1490#endif /* CONFIG_PROTECT */
1491
9bccf70c 1492 switch (ap->a_command) {
55e303ae 1493
2d21ac55
A
1494 case HFS_GETPATH:
1495 {
1496 struct vnode *file_vp;
1497 cnid_t cnid;
1498 int outlen;
1499 char *bufptr;
1500 int error;
1501
1502 /* Caller must be owner of file system. */
1503 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1504 if (suser(cred, NULL) &&
1505 kauth_cred_getuid(cred) != vfsp->f_owner) {
1506 return (EACCES);
1507 }
1508 /* Target vnode must be file system's root. */
1509 if (!vnode_isvroot(vp)) {
1510 return (EINVAL);
1511 }
1512 bufptr = (char *)ap->a_data;
1513 cnid = strtoul(bufptr, NULL, 10);
1514
b0d623f7
A
1515 /* We need to call hfs_vfs_vget to leverage the code that will
1516 * fix the origin list for us if needed, as opposed to calling
1517 * hfs_vget, since we will need the parent for build_path call.
935ed37a 1518 */
b0d623f7 1519
935ed37a 1520 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
2d21ac55
A
1521 return (error);
1522 }
1523 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1524 vnode_put(file_vp);
1525
1526 return (error);
1527 }
1528
1529 case HFS_PREV_LINK:
1530 case HFS_NEXT_LINK:
1531 {
1532 cnid_t linkfileid;
1533 cnid_t nextlinkid;
1534 cnid_t prevlinkid;
1535 int error;
1536
1537 /* Caller must be owner of file system. */
1538 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1539 if (suser(cred, NULL) &&
1540 kauth_cred_getuid(cred) != vfsp->f_owner) {
1541 return (EACCES);
1542 }
1543 /* Target vnode must be file system's root. */
1544 if (!vnode_isvroot(vp)) {
1545 return (EINVAL);
1546 }
1547 linkfileid = *(cnid_t *)ap->a_data;
1548 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1549 return (EINVAL);
1550 }
6d2010ae 1551 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
2d21ac55
A
1552 return (error);
1553 }
1554 if (ap->a_command == HFS_NEXT_LINK) {
1555 *(cnid_t *)ap->a_data = nextlinkid;
1556 } else {
1557 *(cnid_t *)ap->a_data = prevlinkid;
1558 }
1559 return (0);
1560 }
1561
0c530ab8
A
1562 case HFS_RESIZE_PROGRESS: {
1563
1564 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1565 if (suser(cred, NULL) &&
1566 kauth_cred_getuid(cred) != vfsp->f_owner) {
1567 return (EACCES); /* must be owner of file system */
1568 }
1569 if (!vnode_isvroot(vp)) {
1570 return (EINVAL);
1571 }
b0d623f7
A
1572 /* file system must not be mounted read-only */
1573 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1574 return (EROFS);
1575 }
1576
0c530ab8
A
1577 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1578 }
2d21ac55 1579
91447636
A
1580 case HFS_RESIZE_VOLUME: {
1581 u_int64_t newsize;
1582 u_int64_t cursize;
1583
1584 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1585 if (suser(cred, NULL) &&
1586 kauth_cred_getuid(cred) != vfsp->f_owner) {
1587 return (EACCES); /* must be owner of file system */
1588 }
1589 if (!vnode_isvroot(vp)) {
1590 return (EINVAL);
1591 }
b0d623f7
A
1592
1593 /* filesystem must not be mounted read only */
1594 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1595 return (EROFS);
1596 }
91447636
A
1597 newsize = *(u_int64_t *)ap->a_data;
1598 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1599
1600 if (newsize > cursize) {
1601 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1602 } else if (newsize < cursize) {
1603 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1604 } else {
1605 return (0);
1606 }
1607 }
1608 case HFS_CHANGE_NEXT_ALLOCATION: {
2d21ac55 1609 int error = 0; /* Assume success */
91447636
A
1610 u_int32_t location;
1611
1612 if (vnode_vfsisrdonly(vp)) {
1613 return (EROFS);
1614 }
1615 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1616 if (suser(cred, NULL) &&
1617 kauth_cred_getuid(cred) != vfsp->f_owner) {
1618 return (EACCES); /* must be owner of file system */
1619 }
1620 if (!vnode_isvroot(vp)) {
1621 return (EINVAL);
1622 }
2d21ac55 1623 HFS_MOUNT_LOCK(hfsmp, TRUE);
91447636 1624 location = *(u_int32_t *)ap->a_data;
2d21ac55
A
1625 if ((location >= hfsmp->allocLimit) &&
1626 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1627 error = EINVAL;
1628 goto fail_change_next_allocation;
91447636
A
1629 }
1630 /* Return previous value. */
1631 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
2d21ac55
A
1632 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1633 /* On magic value for location, set nextAllocation to next block
1634 * after metadata zone and set flag in mount structure to indicate
1635 * that nextAllocation should not be updated again.
1636 */
b0d623f7
A
1637 if (hfsmp->hfs_metazone_end != 0) {
1638 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1639 }
2d21ac55
A
1640 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1641 } else {
1642 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1643 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1644 }
1645 MarkVCBDirty(hfsmp);
1646fail_change_next_allocation:
91447636 1647 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2d21ac55 1648 return (error);
91447636
A
1649 }
1650
6d2010ae 1651#if HFS_SPARSE_DEV
55e303ae 1652 case HFS_SETBACKINGSTOREINFO: {
55e303ae
A
1653 struct vnode * bsfs_rootvp;
1654 struct vnode * di_vp;
55e303ae
A
1655 struct hfs_backingstoreinfo *bsdata;
1656 int error = 0;
1657
b0d623f7
A
1658 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1659 return (EROFS);
1660 }
55e303ae
A
1661 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1662 return (EALREADY);
1663 }
91447636
A
1664 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1665 if (suser(cred, NULL) &&
1666 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1667 return (EACCES); /* must be owner of file system */
1668 }
1669 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1670 if (bsdata == NULL) {
1671 return (EINVAL);
1672 }
91447636 1673 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
55e303ae
A
1674 return (error);
1675 }
91447636
A
1676 if ((error = vnode_getwithref(di_vp))) {
1677 file_drop(bsdata->backingfd);
1678 return(error);
55e303ae 1679 }
91447636
A
1680
1681 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1682 (void)vnode_put(di_vp);
1683 file_drop(bsdata->backingfd);
55e303ae
A
1684 return (EINVAL);
1685 }
1686
1687 /*
1688 * Obtain the backing fs root vnode and keep a reference
1689 * on it. This reference will be dropped in hfs_unmount.
1690 */
91447636 1691 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
55e303ae 1692 if (error) {
91447636
A
1693 (void)vnode_put(di_vp);
1694 file_drop(bsdata->backingfd);
55e303ae
A
1695 return (error);
1696 }
91447636
A
1697 vnode_ref(bsfs_rootvp);
1698 vnode_put(bsfs_rootvp);
55e303ae
A
1699
1700 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
6d2010ae 1701
55e303ae 1702 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
6d2010ae
A
1703 /* The free extent cache is managed differently for sparse devices.
1704 * There is a window between which the volume is mounted and the
1705 * device is marked as sparse, so the free extent cache for this
1706 * volume is currently initialized as normal volume (sorted by block
1707 * count). Reset the cache so that it will be rebuilt again
1708 * for sparse device (sorted by start block).
1709 */
1710 ResetVCBFreeExtCache(hfsmp);
1711
55e303ae
A
1712 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1713 hfsmp->hfs_sparsebandblks *= 4;
1714
2d21ac55
A
1715 vfs_markdependency(hfsmp->hfs_mp);
1716
b0d623f7
A
1717 /*
1718 * If the sparse image is on a sparse image file (as opposed to a sparse
1719 * bundle), then we may need to limit the free space to the maximum size
1720 * of a file on that volume. So we query (using pathconf), and if we get
1721 * a meaningful result, we cache the number of blocks for later use in
1722 * hfs_freeblks().
1723 */
1724 hfsmp->hfs_backingfs_maxblocks = 0;
1725 if (vnode_vtype(di_vp) == VREG) {
1726 int terr;
1727 int hostbits;
1728 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1729 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1730 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1731
1732 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1733 }
1734 }
1735
91447636
A
1736 (void)vnode_put(di_vp);
1737 file_drop(bsdata->backingfd);
55e303ae
A
1738 return (0);
1739 }
1740 case HFS_CLRBACKINGSTOREINFO: {
55e303ae
A
1741 struct vnode * tmpvp;
1742
91447636
A
1743 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1744 if (suser(cred, NULL) &&
1745 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1746 return (EACCES); /* must be owner of file system */
1747 }
b0d623f7
A
1748 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1749 return (EROFS);
1750 }
1751
55e303ae
A
1752 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1753 hfsmp->hfs_backingfs_rootvp) {
1754
1755 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1756 tmpvp = hfsmp->hfs_backingfs_rootvp;
1757 hfsmp->hfs_backingfs_rootvp = NULLVP;
1758 hfsmp->hfs_sparsebandblks = 0;
91447636 1759 vnode_rele(tmpvp);
55e303ae
A
1760 }
1761 return (0);
1762 }
1763#endif /* HFS_SPARSE_DEV */
1764
91447636
A
1765 case F_FREEZE_FS: {
1766 struct mount *mp;
91447636 1767
91447636
A
1768 mp = vnode_mount(vp);
1769 hfsmp = VFSTOHFS(mp);
1770
1771 if (!(hfsmp->jnl))
1772 return (ENOTSUP);
3a60a9f5 1773
b0d623f7
A
1774 vfsp = vfs_statfs(mp);
1775
1776 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1777 !kauth_cred_issuser(cred))
1778 return (EACCES);
1779
3a60a9f5 1780 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
91447636 1781
91447636
A
1782 // flush things before we get started to try and prevent
1783 // dirty data from being paged out while we're frozen.
1784 // note: can't do this after taking the lock as it will
1785 // deadlock against ourselves.
1786 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
6d2010ae 1787 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
b0d623f7
A
1788
1789 // DO NOT call hfs_journal_flush() because that takes a
1790 // shared lock on the global exclusive lock!
6d2010ae 1791 journal_flush(hfsmp->jnl, TRUE);
3a60a9f5 1792
91447636
A
1793 // don't need to iterate on all vnodes, we just need to
1794 // wait for writes to the system files and the device vnode
6d2010ae
A
1795 //
1796 // Now that journal flush waits for all metadata blocks to
1797 // be written out, waiting for btree writes is probably no
1798 // longer required.
91447636
A
1799 if (HFSTOVCB(hfsmp)->extentsRefNum)
1800 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1801 if (HFSTOVCB(hfsmp)->catalogRefNum)
1802 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1803 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1804 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1805 if (hfsmp->hfs_attribute_vp)
1806 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1807 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1808
1809 hfsmp->hfs_freezing_proc = current_proc();
1810
1811 return (0);
1812 }
1813
1814 case F_THAW_FS: {
b0d623f7
A
1815 vfsp = vfs_statfs(vnode_mount(vp));
1816 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1817 !kauth_cred_issuser(cred))
91447636
A
1818 return (EACCES);
1819
1820 // if we're not the one who froze the fs then we
1821 // can't thaw it.
1822 if (hfsmp->hfs_freezing_proc != current_proc()) {
3a60a9f5 1823 return EPERM;
91447636
A
1824 }
1825
1826 // NOTE: if you add code here, also go check the
1827 // code that "thaws" the fs in hfs_vnop_close()
1828 //
1829 hfsmp->hfs_freezing_proc = NULL;
6d2010ae 1830 hfs_unlock_global (hfsmp);
3a60a9f5 1831 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
91447636
A
1832
1833 return (0);
1834 }
1835
2d21ac55
A
1836 case HFS_BULKACCESS_FSCTL: {
1837 int size;
1838
1839 if (hfsmp->hfs_flags & HFS_STANDARD) {
1840 return EINVAL;
1841 }
91447636 1842
2d21ac55 1843 if (is64bit) {
b0d623f7 1844 size = sizeof(struct user64_access_t);
2d21ac55 1845 } else {
b0d623f7 1846 size = sizeof(struct user32_access_t);
2d21ac55
A
1847 }
1848
1849 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1850 }
91447636 1851
2d21ac55
A
1852 case HFS_EXT_BULKACCESS_FSCTL: {
1853 int size;
1854
1855 if (hfsmp->hfs_flags & HFS_STANDARD) {
1856 return EINVAL;
1857 }
91447636 1858
2d21ac55 1859 if (is64bit) {
b0d623f7 1860 size = sizeof(struct user64_ext_access_t);
2d21ac55 1861 } else {
b0d623f7 1862 size = sizeof(struct user32_ext_access_t);
2d21ac55
A
1863 }
1864
1865 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1866 }
91447636 1867
2d21ac55
A
1868 case HFS_SET_XATTREXTENTS_STATE: {
1869 int state;
1870
1871 if (ap->a_data == NULL) {
1872 return (EINVAL);
1873 }
1874
1875 state = *(int *)ap->a_data;
b0d623f7
A
1876
1877 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1878 return (EROFS);
1879 }
2d21ac55
A
1880
1881 /* Super-user can enable or disable extent-based extended
1882 * attribute support on a volume
6d2010ae
A
1883 * Note: Starting Mac OS X 10.7, extent-based extended attributes
1884 * are enabled by default, so any change will be transient only
1885 * till the volume is remounted.
2d21ac55
A
1886 */
1887 if (!is_suser()) {
1888 return (EPERM);
1889 }
1890 if (state == 0 || state == 1)
1891 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
91447636
A
1892 else
1893 return (EINVAL);
1894 }
1895
1896 case F_FULLFSYNC: {
55e303ae 1897 int error;
b0d623f7
A
1898
1899 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1900 return (EROFS);
1901 }
91447636
A
1902 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1903 if (error == 0) {
2d21ac55 1904 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
91447636
A
1905 hfs_unlock(VTOC(vp));
1906 }
55e303ae
A
1907
1908 return error;
1909 }
91447636
A
1910
1911 case F_CHKCLEAN: {
9bccf70c 1912 register struct cnode *cp;
55e303ae
A
1913 int error;
1914
91447636 1915 if (!vnode_isreg(vp))
55e303ae
A
1916 return EINVAL;
1917
91447636
A
1918 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1919 if (error == 0) {
1920 cp = VTOC(vp);
1921 /*
1922 * used by regression test to determine if
1923 * all the dirty pages (via write) have been cleaned
1924 * after a call to 'fsysnc'.
1925 */
1926 error = is_file_clean(vp, VTOF(vp)->ff_size);
1927 hfs_unlock(cp);
1928 }
55e303ae
A
1929 return (error);
1930 }
1931
91447636 1932 case F_RDADVISE: {
9bccf70c
A
1933 register struct radvisory *ra;
1934 struct filefork *fp;
9bccf70c
A
1935 int error;
1936
91447636 1937 if (!vnode_isreg(vp))
9bccf70c
A
1938 return EINVAL;
1939
9bccf70c 1940 ra = (struct radvisory *)(ap->a_data);
9bccf70c
A
1941 fp = VTOF(vp);
1942
91447636 1943 /* Protect against a size change. */
6d2010ae 1944 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK);
91447636 1945
b0d623f7
A
1946#if HFS_COMPRESSION
1947 if (compressed && (uncompressed_size == -1)) {
1948 /* fetching the uncompressed size failed above, so return the error */
1949 error = decmpfs_error;
1950 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
1951 (!compressed && (ra->ra_offset >= fp->ff_size))) {
1952 error = EFBIG;
1953 }
1954#else /* HFS_COMPRESSION */
9bccf70c 1955 if (ra->ra_offset >= fp->ff_size) {
91447636 1956 error = EFBIG;
b0d623f7
A
1957 }
1958#endif /* HFS_COMPRESSION */
1959 else {
91447636 1960 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
9bccf70c 1961 }
1c79356b 1962
6d2010ae 1963 hfs_unlock_truncate(VTOC(vp), 0);
9bccf70c 1964 return (error);
1c79356b 1965 }
1c79356b 1966
91447636
A
1967 case F_READBOOTSTRAP:
1968 case F_WRITEBOOTSTRAP:
1969 {
9bccf70c 1970 struct vnode *devvp = NULL;
91447636 1971 user_fbootstraptransfer_t *user_bootstrapp;
0b4e3aa0 1972 int devBlockSize;
1c79356b 1973 int error;
91447636
A
1974 uio_t auio;
1975 daddr64_t blockNumber;
b0d623f7
A
1976 u_int32_t blockOffset;
1977 u_int32_t xfersize;
1c79356b 1978 struct buf *bp;
91447636 1979 user_fbootstraptransfer_t user_bootstrap;
1c79356b 1980
91447636
A
1981 if (!vnode_isvroot(vp))
1982 return (EINVAL);
1983 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1984 * to a user_fbootstraptransfer_t else we get a pointer to a
1985 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1986 */
6d2010ae
A
1987 if ((hfsmp->hfs_flags & HFS_READ_ONLY)
1988 && (ap->a_command == F_WRITEBOOTSTRAP)) {
b0d623f7
A
1989 return (EROFS);
1990 }
91447636
A
1991 if (is64bit) {
1992 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1993 }
1994 else {
6d2010ae 1995 user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
91447636
A
1996 user_bootstrapp = &user_bootstrap;
1997 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1998 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1999 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
2000 }
d41d1dae 2001
6d2010ae 2002 if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) ||
d41d1dae
A
2003 (user_bootstrapp->fbt_length > 1024)) {
2004 return EINVAL;
2005 }
2006
91447636
A
2007 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
2008 return EINVAL;
1c79356b 2009
6d2010ae 2010 devvp = VTOHFS(vp)->hfs_devvp;
91447636
A
2011 auio = uio_create(1, user_bootstrapp->fbt_offset,
2012 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
2013 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
2014 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
2015
2016 devBlockSize = vfs_devblocksize(vnode_mount(vp));
2017
2018 while (uio_resid(auio) > 0) {
2019 blockNumber = uio_offset(auio) / devBlockSize;
2020 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
2021 if (error) {
2022 if (bp) buf_brelse(bp);
2023 uio_free(auio);
2024 return error;
2025 };
2026
2027 blockOffset = uio_offset(auio) % devBlockSize;
2028 xfersize = devBlockSize - blockOffset;
2029 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
2030 if (error) {
2031 buf_brelse(bp);
2032 uio_free(auio);
2033 return error;
2034 };
2035 if (uio_rw(auio) == UIO_WRITE) {
2036 error = VNOP_BWRITE(bp);
2037 if (error) {
2038 uio_free(auio);
2039 return error;
2040 }
2041 } else {
2042 buf_brelse(bp);
2043 };
2044 };
2045 uio_free(auio);
2046 };
2047 return 0;
2048
2049 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
2050 {
2051 if (is64bit) {
2052 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2053 }
2054 else {
b0d623f7 2055 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
91447636
A
2056 }
2057 return 0;
2058 }
2059
b0d623f7
A
2060 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2061 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2062 break;
2063
2064 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2065 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2066 break;
2067
2068 case HFS_FSCTL_SET_VERY_LOW_DISK:
2069 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2070 return EINVAL;
e2fac8b1 2071 }
91447636 2072
b0d623f7
A
2073 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2074 break;
2075
2076 case HFS_FSCTL_SET_LOW_DISK:
2077 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2078 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2079
2080 return EINVAL;
e2fac8b1 2081 }
b0d623f7
A
2082
2083 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2084 break;
2085
2086 case HFS_FSCTL_SET_DESIRED_DISK:
2087 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2088 return EINVAL;
2089 }
2090
2091 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2092 break;
2093
2094 case HFS_VOLUME_STATUS:
2095 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2096 break;
91447636
A
2097
2098 case HFS_SET_BOOT_INFO:
2099 if (!vnode_isvroot(vp))
2100 return(EINVAL);
2101 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2102 return(EACCES); /* must be superuser or owner of filesystem */
b0d623f7
A
2103 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2104 return (EROFS);
2105 }
91447636
A
2106 HFS_MOUNT_LOCK(hfsmp, TRUE);
2107 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2108 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2109 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2110 break;
2111
2112 case HFS_GET_BOOT_INFO:
2113 if (!vnode_isvroot(vp))
2114 return(EINVAL);
2115 HFS_MOUNT_LOCK(hfsmp, TRUE);
2116 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2117 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2118 break;
2119
2d21ac55
A
2120 case HFS_MARK_BOOT_CORRUPT:
2121 /* Mark the boot volume corrupt by setting
2122 * kHFSVolumeInconsistentBit in the volume header. This will
2123 * force fsck_hfs on next mount.
2124 */
2125 if (!is_suser()) {
2126 return EACCES;
2127 }
b0d623f7 2128
2d21ac55
A
2129 /* Allowed only on the root vnode of the boot volume */
2130 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2131 !vnode_isvroot(vp)) {
2132 return EINVAL;
2133 }
b0d623f7
A
2134 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2135 return (EROFS);
2136 }
2d21ac55
A
2137 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2138 hfs_mark_volume_inconsistent(hfsmp);
2139 break;
2140
b0d623f7
A
2141 case HFS_FSCTL_GET_JOURNAL_INFO:
2142 jip = (struct hfs_journal_info*)ap->a_data;
2143
2144 if (vp == NULLVP)
2145 return EINVAL;
2146
2147 if (hfsmp->jnl == NULL) {
2148 jnl_start = 0;
2149 jnl_size = 0;
2150 } else {
2151 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2152 jnl_size = (off_t)hfsmp->jnl_size;
2153 }
2154
2155 jip->jstart = jnl_start;
2156 jip->jsize = jnl_size;
2157 break;
2158
2159 case HFS_SET_ALWAYS_ZEROFILL: {
2160 struct cnode *cp = VTOC(vp);
2161
2162 if (*(int *)ap->a_data) {
2163 cp->c_flag |= C_ALWAYS_ZEROFILL;
2164 } else {
2165 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2166 }
2167 break;
2168 }
2169
6d2010ae
A
2170 case HFS_DISABLE_METAZONE: {
2171 /* Only root can disable metadata zone */
2172 if (!is_suser()) {
2173 return EACCES;
2174 }
2175 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2176 return (EROFS);
2177 }
2178
2179 /* Disable metadata zone now */
2180 (void) hfs_metadatazone_init(hfsmp, true);
2181 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2182 break;
2183 }
2184
91447636
A
2185 default:
2186 return (ENOTTY);
2187 }
1c79356b 2188
0b4e3aa0 2189 return 0;
1c79356b
A
2190}
2191
91447636
A
2192/*
2193 * select
2194 */
1c79356b 2195int
91447636
A
2196hfs_vnop_select(__unused struct vnop_select_args *ap)
2197/*
2198 struct vnop_select_args {
2199 vnode_t a_vp;
9bccf70c
A
2200 int a_which;
2201 int a_fflags;
9bccf70c 2202 void *a_wql;
91447636
A
2203 vfs_context_t a_context;
2204 };
2205*/
1c79356b 2206{
9bccf70c
A
2207 /*
2208 * We should really check to see if I/O is possible.
2209 */
2210 return (1);
1c79356b
A
2211}
2212
1c79356b
A
2213/*
2214 * Converts a logical block number to a physical block, and optionally returns
2215 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2216 * The physical block number is based on the device block size, currently its 512.
2217 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2218 */
1c79356b 2219int
2d21ac55 2220hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1c79356b 2221{
9bccf70c
A
2222 struct filefork *fp = VTOF(vp);
2223 struct hfsmount *hfsmp = VTOHFS(vp);
91447636 2224 int retval = E_NONE;
2d21ac55 2225 u_int32_t logBlockSize;
91447636
A
2226 size_t bytesContAvail = 0;
2227 off_t blockposition;
2228 int lockExtBtree;
2229 int lockflags = 0;
1c79356b 2230
9bccf70c
A
2231 /*
2232 * Check for underlying vnode requests and ensure that logical
2233 * to physical mapping is requested.
2234 */
91447636 2235 if (vpp != NULL)
2d21ac55 2236 *vpp = hfsmp->hfs_devvp;
91447636 2237 if (bnp == NULL)
9bccf70c
A
2238 return (0);
2239
9bccf70c 2240 logBlockSize = GetLogicalBlockSize(vp);
2d21ac55 2241 blockposition = (off_t)bn * logBlockSize;
9bccf70c
A
2242
2243 lockExtBtree = overflow_extents(fp);
91447636
A
2244
2245 if (lockExtBtree)
2d21ac55 2246 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1c79356b 2247
9bccf70c 2248 retval = MacToVFSError(
0b4e3aa0 2249 MapFileBlockC (HFSTOVCB(hfsmp),
9bccf70c 2250 (FCB*)fp,
0b4e3aa0
A
2251 MAXPHYSIO,
2252 blockposition,
91447636 2253 bnp,
0b4e3aa0 2254 &bytesContAvail));
1c79356b 2255
91447636
A
2256 if (lockExtBtree)
2257 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 2258
91447636
A
2259 if (retval == E_NONE) {
2260 /* Figure out how many read ahead blocks there are */
2261 if (runp != NULL) {
2262 if (can_cluster(logBlockSize)) {
2263 /* Make sure this result never goes negative: */
2264 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2265 } else {
2266 *runp = 0;
2267 }
2268 }
2269 }
2270 return (retval);
2271}
1c79356b 2272
91447636
A
2273/*
2274 * Convert logical block number to file offset.
2275 */
1c79356b 2276int
91447636
A
2277hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2278/*
2279 struct vnop_blktooff_args {
2280 vnode_t a_vp;
2281 daddr64_t a_lblkno;
9bccf70c 2282 off_t *a_offset;
91447636
A
2283 };
2284*/
1c79356b
A
2285{
2286 if (ap->a_vp == NULL)
2287 return (EINVAL);
91447636 2288 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1c79356b
A
2289
2290 return(0);
2291}
2292
91447636
A
2293/*
2294 * Convert file offset to logical block number.
2295 */
1c79356b 2296int
91447636
A
2297hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2298/*
2299 struct vnop_offtoblk_args {
2300 vnode_t a_vp;
9bccf70c 2301 off_t a_offset;
91447636
A
2302 daddr64_t *a_lblkno;
2303 };
2304*/
1c79356b 2305{
1c79356b
A
2306 if (ap->a_vp == NULL)
2307 return (EINVAL);
91447636 2308 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1c79356b
A
2309
2310 return(0);
2311}
2312
91447636
A
2313/*
2314 * Map file offset to physical block number.
2315 *
2d21ac55
A
2316 * If this function is called for write operation, and if the file
2317 * had virtual blocks allocated (delayed allocation), real blocks
2318 * are allocated by calling ExtendFileC().
2319 *
2320 * If this function is called for read operation, and if the file
2321 * had virtual blocks allocated (delayed allocation), no change
2322 * to the size of file is done, and if required, rangelist is
2323 * searched for mapping.
2324 *
91447636
A
2325 * System file cnodes are expected to be locked (shared or exclusive).
2326 */
1c79356b 2327int
91447636
A
2328hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2329/*
2330 struct vnop_blockmap_args {
2331 vnode_t a_vp;
9bccf70c
A
2332 off_t a_foffset;
2333 size_t a_size;
91447636 2334 daddr64_t *a_bpn;
9bccf70c
A
2335 size_t *a_run;
2336 void *a_poff;
91447636
A
2337 int a_flags;
2338 vfs_context_t a_context;
2339 };
2340*/
1c79356b 2341{
91447636
A
2342 struct vnode *vp = ap->a_vp;
2343 struct cnode *cp;
2344 struct filefork *fp;
2345 struct hfsmount *hfsmp;
2346 size_t bytesContAvail = 0;
2347 int retval = E_NONE;
2348 int syslocks = 0;
2349 int lockflags = 0;
2350 struct rl_entry *invalid_range;
2351 enum rl_overlaptype overlaptype;
2352 int started_tr = 0;
2353 int tooklock = 0;
1c79356b 2354
b0d623f7
A
2355#if HFS_COMPRESSION
2356 if (VNODE_IS_RSRC(vp)) {
2357 /* allow blockmaps to the resource fork */
2358 } else {
2359 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2360 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2361 switch(state) {
2362 case FILE_IS_COMPRESSED:
2363 return ENOTSUP;
2364 case FILE_IS_CONVERTING:
2365 /* if FILE_IS_CONVERTING, we allow blockmap */
2366 break;
2367 default:
2368 printf("invalid state %d for compressed file\n", state);
2369 /* fall through */
2370 }
2371 }
2372 }
2373#endif /* HFS_COMPRESSION */
2374
3a60a9f5
A
2375 /* Do not allow blockmap operation on a directory */
2376 if (vnode_isdir(vp)) {
2377 return (ENOTSUP);
2378 }
2379
9bccf70c
A
2380 /*
2381 * Check for underlying vnode requests and ensure that logical
2382 * to physical mapping is requested.
2383 */
2384 if (ap->a_bpn == NULL)
2385 return (0);
2386
2d21ac55 2387 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
91447636
A
2388 if (VTOC(vp)->c_lockowner != current_thread()) {
2389 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2390 tooklock = 1;
91447636
A
2391 }
2392 }
2393 hfsmp = VTOHFS(vp);
2394 cp = VTOC(vp);
2395 fp = VTOF(vp);
55e303ae 2396
91447636 2397retry:
2d21ac55
A
2398 /* Check virtual blocks only when performing write operation */
2399 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
91447636
A
2400 if (hfs_start_transaction(hfsmp) != 0) {
2401 retval = EINVAL;
2402 goto exit;
2403 } else {
2404 started_tr = 1;
b4c24cb9 2405 }
91447636
A
2406 syslocks = SFL_EXTENTS | SFL_BITMAP;
2407
b4c24cb9 2408 } else if (overflow_extents(fp)) {
91447636 2409 syslocks = SFL_EXTENTS;
9bccf70c 2410 }
91447636
A
2411
2412 if (syslocks)
2413 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1c79356b 2414
9bccf70c
A
2415 /*
2416 * Check for any delayed allocations.
2417 */
2d21ac55
A
2418 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2419 int64_t actbytes;
91447636 2420 u_int32_t loanedBlocks;
1c79356b 2421
55e303ae 2422 //
d12e1678
A
2423 // Make sure we have a transaction. It's possible
2424 // that we came in and fp->ff_unallocblocks was zero
2425 // but during the time we blocked acquiring the extents
2426 // btree, ff_unallocblocks became non-zero and so we
2427 // will need to start a transaction.
2428 //
91447636
A
2429 if (started_tr == 0) {
2430 if (syslocks) {
2431 hfs_systemfile_unlock(hfsmp, lockflags);
2432 syslocks = 0;
2433 }
2434 goto retry;
d12e1678
A
2435 }
2436
9bccf70c 2437 /*
91447636
A
2438 * Note: ExtendFileC will Release any blocks on loan and
2439 * aquire real blocks. So we ask to extend by zero bytes
2440 * since ExtendFileC will account for the virtual blocks.
9bccf70c 2441 */
9bccf70c 2442
91447636
A
2443 loanedBlocks = fp->ff_unallocblocks;
2444 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2445 kEFAllMask | kEFNoClumpMask, &actbytes);
2446
2447 if (retval) {
2448 fp->ff_unallocblocks = loanedBlocks;
2449 cp->c_blocks += loanedBlocks;
2450 fp->ff_blocks += loanedBlocks;
2451
2452 HFS_MOUNT_LOCK(hfsmp, TRUE);
2453 hfsmp->loanedBlocks += loanedBlocks;
2454 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1c79356b 2455
91447636
A
2456 hfs_systemfile_unlock(hfsmp, lockflags);
2457 cp->c_flag |= C_MODIFIED;
b4c24cb9 2458 if (started_tr) {
91447636
A
2459 (void) hfs_update(vp, TRUE);
2460 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
55e303ae 2461
91447636 2462 hfs_end_transaction(hfsmp);
2d21ac55 2463 started_tr = 0;
b4c24cb9 2464 }
91447636 2465 goto exit;
b4c24cb9 2466 }
9bccf70c
A
2467 }
2468
91447636
A
2469 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2470 ap->a_bpn, &bytesContAvail);
2471 if (syslocks) {
2472 hfs_systemfile_unlock(hfsmp, lockflags);
2473 syslocks = 0;
2474 }
1c79356b 2475
b4c24cb9 2476 if (started_tr) {
91447636
A
2477 (void) hfs_update(vp, TRUE);
2478 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2479 hfs_end_transaction(hfsmp);
b4c24cb9 2480 started_tr = 0;
91447636
A
2481 }
2482 if (retval) {
2d21ac55
A
2483 /* On write, always return error because virtual blocks, if any,
2484 * should have been allocated in ExtendFileC(). We do not
2485 * allocate virtual blocks on read, therefore return error
2486 * only if no virtual blocks are allocated. Otherwise we search
2487 * rangelist for zero-fills
2488 */
2489 if ((MacToVFSError(retval) != ERANGE) ||
2490 (ap->a_flags & VNODE_WRITE) ||
2491 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2492 goto exit;
2493 }
2494
2495 /* Validate if the start offset is within logical file size */
2496 if (ap->a_foffset > fp->ff_size) {
2497 goto exit;
2498 }
2499
2500 /* Searching file extents has failed for read operation, therefore
2501 * search rangelist for any uncommitted holes in the file.
2502 */
2503 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2504 ap->a_foffset + (off_t)(ap->a_size - 1),
2505 &invalid_range);
2506 switch(overlaptype) {
2507 case RL_OVERLAPISCONTAINED:
2508 /* start_offset <= rl_start, end_offset >= rl_end */
2509 if (ap->a_foffset != invalid_range->rl_start) {
2510 break;
2511 }
2512 case RL_MATCHINGOVERLAP:
2513 /* start_offset = rl_start, end_offset = rl_end */
2514 case RL_OVERLAPCONTAINSRANGE:
2515 /* start_offset >= rl_start, end_offset <= rl_end */
2516 case RL_OVERLAPSTARTSBEFORE:
2517 /* start_offset > rl_start, end_offset >= rl_start */
2518 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2519 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2520 } else {
2521 bytesContAvail = fp->ff_size - ap->a_foffset;
2522 }
2523 if (bytesContAvail > ap->a_size) {
2524 bytesContAvail = ap->a_size;
2525 }
2526 *ap->a_bpn = (daddr64_t)-1;
2527 retval = 0;
2528 break;
2529 case RL_OVERLAPENDSAFTER:
2530 /* start_offset < rl_start, end_offset < rl_end */
2531 case RL_NOOVERLAP:
2532 break;
2533 }
91447636
A
2534 goto exit;
2535 }
1c79356b 2536
2d21ac55
A
2537 /* MapFileC() found a valid extent in the filefork. Search the
2538 * mapping information further for invalid file ranges
2539 */
91447636
A
2540 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2541 ap->a_foffset + (off_t)bytesContAvail - 1,
2542 &invalid_range);
2543 if (overlaptype != RL_NOOVERLAP) {
2544 switch(overlaptype) {
2545 case RL_MATCHINGOVERLAP:
2546 case RL_OVERLAPCONTAINSRANGE:
2547 case RL_OVERLAPSTARTSBEFORE:
2d21ac55 2548 /* There's no valid block for this byte offset */
91447636
A
2549 *ap->a_bpn = (daddr64_t)-1;
2550 /* There's no point limiting the amount to be returned
2551 * if the invalid range that was hit extends all the way
2552 * to the EOF (i.e. there's no valid bytes between the
2553 * end of this range and the file's EOF):
2554 */
2555 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 2556 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
2557 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2558 }
2559 break;
9bccf70c 2560
91447636
A
2561 case RL_OVERLAPISCONTAINED:
2562 case RL_OVERLAPENDSAFTER:
2563 /* The range of interest hits an invalid block before the end: */
2564 if (invalid_range->rl_start == ap->a_foffset) {
2565 /* There's actually no valid information to be had starting here: */
2566 *ap->a_bpn = (daddr64_t)-1;
2567 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 2568 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
2569 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2570 }
2571 } else {
2572 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2573 }
9bccf70c 2574 break;
1c79356b 2575
91447636 2576 case RL_NOOVERLAP:
9bccf70c 2577 break;
91447636
A
2578 } /* end switch */
2579 if (bytesContAvail > ap->a_size)
2580 bytesContAvail = ap->a_size;
2d21ac55
A
2581 }
2582
2583exit:
2584 if (retval == 0) {
2585 if (ap->a_run)
2586 *ap->a_run = bytesContAvail;
2587
2588 if (ap->a_poff)
2589 *(int *)ap->a_poff = 0;
9bccf70c 2590 }
91447636 2591
91447636
A
2592 if (tooklock)
2593 hfs_unlock(cp);
2594
2595 return (MacToVFSError(retval));
1c79356b
A
2596}
2597
9bccf70c 2598
1c79356b 2599/*
91447636
A
2600 * prepare and issue the I/O
2601 * buf_strategy knows how to deal
2602 * with requests that require
2603 * fragmented I/Os
2604 */
1c79356b 2605int
91447636 2606hfs_vnop_strategy(struct vnop_strategy_args *ap)
1c79356b 2607{
91447636
A
2608 buf_t bp = ap->a_bp;
2609 vnode_t vp = buf_vnode(bp);
6d2010ae
A
2610 int error = 0;
2611
2612#if CONFIG_PROTECT
2613 cnode_t *cp = NULL;
2614
2615 if ((cp = cp_get_protected_cnode(vp)) != NULL) {
2616 /*
2617 * Some paths to hfs_vnop_strategy will take the cnode lock,
2618 * and some won't. But since content protection is only enabled
2619 * for files that (a) aren't system files and (b) are regular
2620 * files, any valid cnode here will be unlocked.
2621 */
2622 hfs_lock(cp, HFS_SHARED_LOCK);
2623 buf_setcpaddr(bp, cp->c_cpentry);
2624 }
2625#endif /* CONFIG_PROTECT */
2626
2627 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
1c79356b 2628
6d2010ae
A
2629#if CONFIG_PROTECT
2630 if (cp) {
2631 hfs_unlock(cp);
2632 }
2633#endif
2634
2635 return error;
1c79356b
A
2636}
2637
b0d623f7
A
2638static int
2639hfs_minorupdate(struct vnode *vp) {
2640 struct cnode *cp = VTOC(vp);
2641 cp->c_flag &= ~C_MODIFIED;
2642 cp->c_touch_acctime = 0;
2643 cp->c_touch_chgtime = 0;
2644 cp->c_touch_modtime = 0;
2645
2646 return 0;
2647}
1c79356b 2648
6d2010ae 2649int
b0d623f7 2650do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
1c79356b 2651{
9bccf70c
A
2652 register struct cnode *cp = VTOC(vp);
2653 struct filefork *fp = VTOF(vp);
91447636
A
2654 struct proc *p = vfs_context_proc(context);;
2655 kauth_cred_t cred = vfs_context_ucred(context);
9bccf70c
A
2656 int retval;
2657 off_t bytesToAdd;
2658 off_t actualBytesAdded;
2659 off_t filebytes;
b0d623f7 2660 u_int32_t fileblocks;
9bccf70c 2661 int blksize;
b4c24cb9 2662 struct hfsmount *hfsmp;
91447636 2663 int lockflags;
9bccf70c 2664
9bccf70c
A
2665 blksize = VTOVCB(vp)->blockSize;
2666 fileblocks = fp->ff_blocks;
2667 filebytes = (off_t)fileblocks * (off_t)blksize;
2668
2669 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2670 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2671
2672 if (length < 0)
2673 return (EINVAL);
1c79356b 2674
8f6c56a5
A
2675 /* This should only happen with a corrupt filesystem */
2676 if ((off_t)fp->ff_size < 0)
2677 return (EINVAL);
2678
9bccf70c
A
2679 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2680 return (EFBIG);
1c79356b 2681
b4c24cb9 2682 hfsmp = VTOHFS(vp);
1c79356b 2683
9bccf70c 2684 retval = E_NONE;
1c79356b 2685
55e303ae
A
2686 /* Files that are changing size are not hot file candidates. */
2687 if (hfsmp->hfc_stage == HFC_RECORDING) {
2688 fp->ff_bytesread = 0;
2689 }
2690
9bccf70c
A
2691 /*
2692 * We cannot just check if fp->ff_size == length (as an optimization)
2693 * since there may be extra physical blocks that also need truncation.
2694 */
2695#if QUOTA
91447636 2696 if ((retval = hfs_getinoquota(cp)))
9bccf70c
A
2697 return(retval);
2698#endif /* QUOTA */
1c79356b 2699
9bccf70c
A
2700 /*
2701 * Lengthen the size of the file. We must ensure that the
2702 * last byte of the file is allocated. Since the smallest
2703 * value of ff_size is 0, length will be at least 1.
2704 */
91447636 2705 if (length > (off_t)fp->ff_size) {
9bccf70c 2706#if QUOTA
b4c24cb9 2707 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
91447636 2708 cred, 0);
9bccf70c
A
2709 if (retval)
2710 goto Err_Exit;
2711#endif /* QUOTA */
2712 /*
2713 * If we don't have enough physical space then
2714 * we need to extend the physical size.
2715 */
2716 if (length > filebytes) {
2717 int eflags;
b0d623f7 2718 u_int32_t blockHint = 0;
1c79356b 2719
9bccf70c
A
2720 /* All or nothing and don't round up to clumpsize. */
2721 eflags = kEFAllMask | kEFNoClumpMask;
1c79356b 2722
91447636 2723 if (cred && suser(cred, NULL) != 0)
9bccf70c 2724 eflags |= kEFReserveMask; /* keep a reserve */
1c79356b 2725
55e303ae
A
2726 /*
2727 * Allocate Journal and Quota files in metadata zone.
2728 */
2729 if (filebytes == 0 &&
2730 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2731 hfs_virtualmetafile(cp)) {
2732 eflags |= kEFMetadataMask;
2733 blockHint = hfsmp->hfs_metazone_start;
2734 }
91447636
A
2735 if (hfs_start_transaction(hfsmp) != 0) {
2736 retval = EINVAL;
2737 goto Err_Exit;
b4c24cb9
A
2738 }
2739
91447636
A
2740 /* Protect extents b-tree and allocation bitmap */
2741 lockflags = SFL_BITMAP;
2742 if (overflow_extents(fp))
2743 lockflags |= SFL_EXTENTS;
2744 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1c79356b 2745
9bccf70c
A
2746 while ((length > filebytes) && (retval == E_NONE)) {
2747 bytesToAdd = length - filebytes;
2748 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2749 (FCB*)fp,
1c79356b 2750 bytesToAdd,
55e303ae 2751 blockHint,
9bccf70c 2752 eflags,
1c79356b
A
2753 &actualBytesAdded));
2754
9bccf70c
A
2755 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2756 if (actualBytesAdded == 0 && retval == E_NONE) {
2757 if (length > filebytes)
2758 length = filebytes;
2759 break;
2760 }
2761 } /* endwhile */
b4c24cb9 2762
91447636 2763 hfs_systemfile_unlock(hfsmp, lockflags);
b4c24cb9 2764
b4c24cb9 2765 if (hfsmp->jnl) {
b0d623f7
A
2766 if (skipupdate) {
2767 (void) hfs_minorupdate(vp);
2768 }
2769 else {
2770 (void) hfs_update(vp, TRUE);
2771 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2772 }
91447636 2773 }
55e303ae 2774
91447636 2775 hfs_end_transaction(hfsmp);
b4c24cb9 2776
9bccf70c
A
2777 if (retval)
2778 goto Err_Exit;
2779
2780 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2781 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1c79356b 2782 }
1c79356b 2783
91447636 2784 if (!(flags & IO_NOZEROFILL)) {
2d21ac55 2785 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
9bccf70c 2786 struct rl_entry *invalid_range;
9bccf70c 2787 off_t zero_limit;
0b4e3aa0 2788
9bccf70c
A
2789 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2790 if (length < zero_limit) zero_limit = length;
2791
91447636
A
2792 if (length > (off_t)fp->ff_size) {
2793 struct timeval tv;
2794
9bccf70c
A
2795 /* Extending the file: time to fill out the current last page w. zeroes? */
2796 if ((fp->ff_size & PAGE_MASK_64) &&
2797 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2798 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
0b4e3aa0
A
2799
2800 /* There's some valid data at the start of the (current) last page
2801 of the file, so zero out the remainder of that page to ensure the
2802 entire page contains valid data. Since there is no invalid range
2803 possible past the (current) eof, there's no need to remove anything
91447636
A
2804 from the invalid range list before calling cluster_write(): */
2805 hfs_unlock(cp);
9bccf70c 2806 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
91447636
A
2807 fp->ff_size, (off_t)0,
2808 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2809 hfs_lock(cp, HFS_FORCE_LOCK);
0b4e3aa0
A
2810 if (retval) goto Err_Exit;
2811
2812 /* Merely invalidate the remaining area, if necessary: */
9bccf70c 2813 if (length > zero_limit) {
91447636 2814 microuptime(&tv);
9bccf70c 2815 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
91447636 2816 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
2817 }
2818 } else {
0b4e3aa0
A
2819 /* The page containing the (current) eof is invalid: just add the
2820 remainder of the page to the invalid list, along with the area
2821 being newly allocated:
2822 */
91447636 2823 microuptime(&tv);
9bccf70c 2824 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
91447636 2825 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
2826 };
2827 }
2828 } else {
2829 panic("hfs_truncate: invoked on non-UBC object?!");
2830 };
2831 }
91447636 2832 cp->c_touch_modtime = TRUE;
9bccf70c 2833 fp->ff_size = length;
0b4e3aa0 2834
9bccf70c 2835 } else { /* Shorten the size of the file */
0b4e3aa0 2836
91447636 2837 if ((off_t)fp->ff_size > length) {
9bccf70c
A
2838 /* Any space previously marked as invalid is now irrelevant: */
2839 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2840 }
1c79356b 2841
9bccf70c
A
2842 /*
2843 * Account for any unmapped blocks. Note that the new
2844 * file length can still end up with unmapped blocks.
2845 */
2846 if (fp->ff_unallocblocks > 0) {
2847 u_int32_t finalblks;
91447636 2848 u_int32_t loanedBlocks;
1c79356b 2849
91447636
A
2850 HFS_MOUNT_LOCK(hfsmp, TRUE);
2851
2852 loanedBlocks = fp->ff_unallocblocks;
2853 cp->c_blocks -= loanedBlocks;
2854 fp->ff_blocks -= loanedBlocks;
2855 fp->ff_unallocblocks = 0;
1c79356b 2856
91447636 2857 hfsmp->loanedBlocks -= loanedBlocks;
9bccf70c
A
2858
2859 finalblks = (length + blksize - 1) / blksize;
2860 if (finalblks > fp->ff_blocks) {
2861 /* calculate required unmapped blocks */
91447636
A
2862 loanedBlocks = finalblks - fp->ff_blocks;
2863 hfsmp->loanedBlocks += loanedBlocks;
2864
2865 fp->ff_unallocblocks = loanedBlocks;
2866 cp->c_blocks += loanedBlocks;
2867 fp->ff_blocks += loanedBlocks;
9bccf70c 2868 }
91447636 2869 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
9bccf70c 2870 }
1c79356b 2871
9bccf70c
A
2872 /*
2873 * For a TBE process the deallocation of the file blocks is
2874 * delayed until the file is closed. And hfs_close calls
2875 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2876 * isn't set, we make sure this isn't a TBE process.
2877 */
91447636 2878 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
9bccf70c
A
2879#if QUOTA
2880 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2881#endif /* QUOTA */
91447636
A
2882 if (hfs_start_transaction(hfsmp) != 0) {
2883 retval = EINVAL;
2884 goto Err_Exit;
2885 }
2886
2887 if (fp->ff_unallocblocks == 0) {
2888 /* Protect extents b-tree and allocation bitmap */
2889 lockflags = SFL_BITMAP;
2890 if (overflow_extents(fp))
2891 lockflags |= SFL_EXTENTS;
2892 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
b4c24cb9 2893
6d2010ae
A
2894 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
2895 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
1c79356b 2896
91447636
A
2897 hfs_systemfile_unlock(hfsmp, lockflags);
2898 }
b4c24cb9 2899 if (hfsmp->jnl) {
ff6e181a
A
2900 if (retval == 0) {
2901 fp->ff_size = length;
2902 }
b0d623f7
A
2903 if (skipupdate) {
2904 (void) hfs_minorupdate(vp);
2905 }
2906 else {
2907 (void) hfs_update(vp, TRUE);
2908 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2909 }
b4c24cb9 2910 }
91447636 2911 hfs_end_transaction(hfsmp);
b4c24cb9 2912
9bccf70c
A
2913 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2914 if (retval)
2915 goto Err_Exit;
2916#if QUOTA
2917 /* These are bytesreleased */
2918 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2919#endif /* QUOTA */
2920 }
2921 /* Only set update flag if the logical length changes */
2d21ac55 2922 if ((off_t)fp->ff_size != length)
91447636 2923 cp->c_touch_modtime = TRUE;
9bccf70c 2924 fp->ff_size = length;
1c79356b 2925 }
b0d623f7
A
2926 if (cp->c_mode & (S_ISUID | S_ISGID)) {
2927 if (!vfs_context_issuser(context)) {
2928 cp->c_mode &= ~(S_ISUID | S_ISGID);
2929 skipupdate = 0;
2930 }
2931 }
2932 if (skipupdate) {
2933 retval = hfs_minorupdate(vp);
2934 }
2935 else {
2936 cp->c_touch_chgtime = TRUE; /* status changed */
2937 cp->c_touch_modtime = TRUE; /* file data was modified */
2938 retval = hfs_update(vp, MNT_WAIT);
2939 }
9bccf70c 2940 if (retval) {
0b4e3aa0 2941 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1c79356b 2942 -1, -1, -1, retval, 0);
9bccf70c 2943 }
1c79356b 2944
9bccf70c 2945Err_Exit:
1c79356b 2946
9bccf70c
A
2947 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2948 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1c79356b 2949
9bccf70c 2950 return (retval);
1c79356b
A
2951}
2952
6d2010ae
A
2953/*
2954 * Preparation which must be done prior to deleting the catalog record
2955 * of a file or directory. In order to make the on-disk as safe as possible,
2956 * we remove the catalog entry before releasing the bitmap blocks and the
2957 * overflow extent records. However, some work must be done prior to deleting
2958 * the catalog record.
2959 *
2960 * When calling this function, the cnode must exist both in memory and on-disk.
2961 * If there are both resource fork and data fork vnodes, this function should
2962 * be called on both.
2963 */
2964
2965int
2966hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
2967
2968 struct filefork *fp = VTOF(vp);
2969 struct cnode *cp = VTOC(vp);
2970 int retval = 0;
2971
2972 /* Cannot truncate an HFS directory! */
2973 if (vnode_isdir(vp)) {
2974 return (EISDIR);
2975 }
2976
2977 /*
2978 * See the comment below in hfs_truncate for why we need to call
2979 * setsize here. Essentially we want to avoid pending IO if we
2980 * already know that the blocks are going to be released here.
2981 * This function is only called when totally removing all storage for a file, so
2982 * we can take a shortcut and immediately setsize (0);
2983 */
2984 ubc_setsize(vp, 0);
2985
2986 /* This should only happen with a corrupt filesystem */
2987 if ((off_t)fp->ff_size < 0)
2988 return (EINVAL);
2989
2990 /*
2991 * We cannot just check if fp->ff_size == length (as an optimization)
2992 * since there may be extra physical blocks that also need truncation.
2993 */
2994#if QUOTA
2995 if ((retval = hfs_getinoquota(cp))) {
2996 return(retval);
2997 }
2998#endif /* QUOTA */
2999
3000 /* Wipe out any invalid ranges which have yet to be backed by disk */
3001 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3002
3003 /*
3004 * Account for any unmapped blocks. Since we're deleting the
3005 * entire file, we don't have to worry about just shrinking
3006 * to a smaller number of borrowed blocks.
3007 */
3008 if (fp->ff_unallocblocks > 0) {
3009 u_int32_t loanedBlocks;
3010
3011 HFS_MOUNT_LOCK(hfsmp, TRUE);
3012
3013 loanedBlocks = fp->ff_unallocblocks;
3014 cp->c_blocks -= loanedBlocks;
3015 fp->ff_blocks -= loanedBlocks;
3016 fp->ff_unallocblocks = 0;
3017
3018 hfsmp->loanedBlocks -= loanedBlocks;
3019
3020 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3021 }
3022
3023 return 0;
3024}
3025
3026
3027/*
3028 * Special wrapper around calling TruncateFileC. This function is useable
3029 * even when the catalog record does not exist any longer, making it ideal
3030 * for use when deleting a file. The simplification here is that we know
3031 * that we are releasing all blocks.
3032 *
3033 * The caller is responsible for saving off a copy of the filefork(s)
3034 * embedded within the cnode prior to calling this function. The pointers
3035 * supplied as arguments must be valid even if the cnode is no longer valid.
3036 */
3037
3038int
3039hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3040 struct filefork *rsrcfork, u_int32_t fileid) {
3041
3042 off_t filebytes;
3043 u_int32_t fileblocks;
3044 int blksize = 0;
3045 int error = 0;
3046 int lockflags;
3047
3048 blksize = hfsmp->blockSize;
3049
3050 /* Data Fork */
3051 if (datafork->ff_blocks > 0) {
3052 fileblocks = datafork->ff_blocks;
3053 filebytes = (off_t)fileblocks * (off_t)blksize;
3054
3055 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3056
3057 while (filebytes > 0) {
3058 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
3059 filebytes -= HFS_BIGFILE_SIZE;
3060 } else {
3061 filebytes = 0;
3062 }
3063
3064 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3065 if (hfs_start_transaction(hfsmp) != 0) {
3066 error = EINVAL;
3067 break;
3068 }
3069
3070 if (datafork->ff_unallocblocks == 0) {
3071 /* Protect extents b-tree and allocation bitmap */
3072 lockflags = SFL_BITMAP;
3073 if (overflow_extents(datafork))
3074 lockflags |= SFL_EXTENTS;
3075 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3076
3077 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
3078
3079 hfs_systemfile_unlock(hfsmp, lockflags);
3080 }
3081 if (error == 0) {
3082 datafork->ff_size = filebytes;
3083 }
3084 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3085
3086 /* Finish the transaction and start over if necessary */
3087 hfs_end_transaction(hfsmp);
3088
3089 if (error) {
3090 break;
3091 }
3092 }
3093 }
3094
3095 /* Resource fork */
3096 if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
3097 fileblocks = rsrcfork->ff_blocks;
3098 filebytes = (off_t)fileblocks * (off_t)blksize;
3099
3100 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3101
3102 while (filebytes > 0) {
3103 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
3104 filebytes -= HFS_BIGFILE_SIZE;
3105 } else {
3106 filebytes = 0;
3107 }
3108
3109 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3110 if (hfs_start_transaction(hfsmp) != 0) {
3111 error = EINVAL;
3112 break;
3113 }
3114
3115 if (rsrcfork->ff_unallocblocks == 0) {
3116 /* Protect extents b-tree and allocation bitmap */
3117 lockflags = SFL_BITMAP;
3118 if (overflow_extents(rsrcfork))
3119 lockflags |= SFL_EXTENTS;
3120 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3121
3122 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
3123
3124 hfs_systemfile_unlock(hfsmp, lockflags);
3125 }
3126 if (error == 0) {
3127 rsrcfork->ff_size = filebytes;
3128 }
3129 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3130
3131 /* Finish the transaction and start over if necessary */
3132 hfs_end_transaction(hfsmp);
3133
3134 if (error) {
3135 break;
3136 }
3137 }
3138 }
3139
3140 return error;
3141}
1c79356b 3142
91447636 3143
55e303ae 3144/*
55e303ae
A
3145 * Truncate a cnode to at most length size, freeing (or adding) the
3146 * disk blocks.
3147 */
91447636
A
3148int
3149hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
b0d623f7 3150 int skipupdate, vfs_context_t context)
55e303ae 3151{
55e303ae 3152 struct filefork *fp = VTOF(vp);
55e303ae 3153 off_t filebytes;
b0d623f7 3154 u_int32_t fileblocks;
91447636 3155 int blksize, error = 0;
3a60a9f5 3156 struct cnode *cp = VTOC(vp);
55e303ae 3157
2d21ac55
A
3158 /* Cannot truncate an HFS directory! */
3159 if (vnode_isdir(vp)) {
3160 return (EISDIR);
3161 }
3162 /* A swap file cannot change size. */
3163 if (vnode_isswap(vp) && (length != 0)) {
3164 return (EPERM);
3165 }
55e303ae 3166
55e303ae
A
3167 blksize = VTOVCB(vp)->blockSize;
3168 fileblocks = fp->ff_blocks;
3169 filebytes = (off_t)fileblocks * (off_t)blksize;
3170
2d21ac55
A
3171 //
3172 // Have to do this here so that we don't wind up with
3173 // i/o pending for blocks that are about to be released
3174 // if we truncate the file.
3175 //
3176 // If skipsetsize is set, then the caller is responsible
3177 // for the ubc_setsize.
3178 //
b0d623f7
A
3179 // Even if skipsetsize is set, if the length is zero we
3180 // want to call ubc_setsize() because as of SnowLeopard
3181 // it will no longer cause any page-ins and it will drop
3182 // any dirty pages so that we don't do any i/o that we
3183 // don't have to. This also prevents a race where i/o
3184 // for truncated blocks may overwrite later data if the
3185 // blocks get reallocated to a different file.
3186 //
3187 if (!skipsetsize || length == 0)
2d21ac55
A
3188 ubc_setsize(vp, length);
3189
55e303ae
A
3190 // have to loop truncating or growing files that are
3191 // really big because otherwise transactions can get
3192 // enormous and consume too many kernel resources.
91447636
A
3193
3194 if (length < filebytes) {
3195 while (filebytes > length) {
0c530ab8 3196 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
3197 filebytes -= HFS_BIGFILE_SIZE;
3198 } else {
3199 filebytes = length;
3200 }
3a60a9f5 3201 cp->c_flag |= C_FORCEUPDATE;
b0d623f7 3202 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
91447636
A
3203 if (error)
3204 break;
3205 }
3206 } else if (length > filebytes) {
3207 while (filebytes < length) {
0c530ab8 3208 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
3209 filebytes += HFS_BIGFILE_SIZE;
3210 } else {
3211 filebytes = length;
3212 }
3a60a9f5 3213 cp->c_flag |= C_FORCEUPDATE;
b0d623f7 3214 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
91447636
A
3215 if (error)
3216 break;
55e303ae 3217 }
91447636 3218 } else /* Same logical size */ {
55e303ae 3219
b0d623f7 3220 error = do_hfs_truncate(vp, length, flags, skipupdate, context);
91447636
A
3221 }
3222 /* Files that are changing size are not hot file candidates. */
3223 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3224 fp->ff_bytesread = 0;
55e303ae
A
3225 }
3226
91447636 3227 return (error);
55e303ae
A
3228}
3229
3230
1c79356b
A
3231
3232/*
91447636 3233 * Preallocate file storage space.
1c79356b 3234 */
91447636
A
3235int
3236hfs_vnop_allocate(struct vnop_allocate_args /* {
3237 vnode_t a_vp;
9bccf70c
A
3238 off_t a_length;
3239 u_int32_t a_flags;
3240 off_t *a_bytesallocated;
3241 off_t a_offset;
91447636
A
3242 vfs_context_t a_context;
3243 } */ *ap)
1c79356b 3244{
9bccf70c 3245 struct vnode *vp = ap->a_vp;
91447636
A
3246 struct cnode *cp;
3247 struct filefork *fp;
3248 ExtendedVCB *vcb;
9bccf70c
A
3249 off_t length = ap->a_length;
3250 off_t startingPEOF;
3251 off_t moreBytesRequested;
3252 off_t actualBytesAdded;
3253 off_t filebytes;
b0d623f7 3254 u_int32_t fileblocks;
9bccf70c 3255 int retval, retval2;
2d21ac55
A
3256 u_int32_t blockHint;
3257 u_int32_t extendFlags; /* For call to ExtendFileC */
b4c24cb9 3258 struct hfsmount *hfsmp;
91447636
A
3259 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
3260 int lockflags;
6d2010ae 3261 time_t orig_ctime;
91447636
A
3262
3263 *(ap->a_bytesallocated) = 0;
3264
3265 if (!vnode_isreg(vp))
3266 return (EISDIR);
3267 if (length < (off_t)0)
3268 return (EINVAL);
2d21ac55 3269
91447636 3270 cp = VTOC(vp);
2d21ac55 3271
6d2010ae
A
3272 orig_ctime = VTOC(vp)->c_ctime;
3273
3274 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
3275
3276 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
2d21ac55
A
3277
3278 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3279 goto Err_Exit;
3280 }
3281
91447636 3282 fp = VTOF(vp);
b4c24cb9 3283 hfsmp = VTOHFS(vp);
91447636 3284 vcb = VTOVCB(vp);
9bccf70c 3285
9bccf70c 3286 fileblocks = fp->ff_blocks;
55e303ae 3287 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
9bccf70c 3288
91447636
A
3289 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
3290 retval = EINVAL;
3291 goto Err_Exit;
3292 }
0b4e3aa0 3293
9bccf70c 3294 /* Fill in the flags word for the call to Extend the file */
1c79356b 3295
55e303ae 3296 extendFlags = kEFNoClumpMask;
9bccf70c 3297 if (ap->a_flags & ALLOCATECONTIG)
1c79356b 3298 extendFlags |= kEFContigMask;
9bccf70c 3299 if (ap->a_flags & ALLOCATEALL)
1c79356b 3300 extendFlags |= kEFAllMask;
91447636 3301 if (cred && suser(cred, NULL) != 0)
9bccf70c 3302 extendFlags |= kEFReserveMask;
b0d623f7
A
3303 if (hfs_virtualmetafile(cp))
3304 extendFlags |= kEFMetadataMask;
1c79356b 3305
9bccf70c
A
3306 retval = E_NONE;
3307 blockHint = 0;
3308 startingPEOF = filebytes;
1c79356b 3309
9bccf70c
A
3310 if (ap->a_flags & ALLOCATEFROMPEOF)
3311 length += filebytes;
3312 else if (ap->a_flags & ALLOCATEFROMVOL)
3313 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1c79356b 3314
9bccf70c
A
3315 /* If no changes are necesary, then we're done */
3316 if (filebytes == length)
3317 goto Std_Exit;
1c79356b 3318
9bccf70c
A
3319 /*
3320 * Lengthen the size of the file. We must ensure that the
3321 * last byte of the file is allocated. Since the smallest
3322 * value of filebytes is 0, length will be at least 1.
3323 */
3324 if (length > filebytes) {
2d21ac55
A
3325 off_t total_bytes_added = 0, orig_request_size;
3326
3327 orig_request_size = moreBytesRequested = length - filebytes;
1c79356b 3328
9bccf70c 3329#if QUOTA
b4c24cb9 3330 retval = hfs_chkdq(cp,
55e303ae 3331 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
91447636 3332 cred, 0);
9bccf70c 3333 if (retval)
91447636 3334 goto Err_Exit;
9bccf70c
A
3335
3336#endif /* QUOTA */
55e303ae
A
3337 /*
3338 * Metadata zone checks.
3339 */
3340 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3341 /*
3342 * Allocate Journal and Quota files in metadata zone.
3343 */
3344 if (hfs_virtualmetafile(cp)) {
55e303ae
A
3345 blockHint = hfsmp->hfs_metazone_start;
3346 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
3347 (blockHint <= hfsmp->hfs_metazone_end)) {
3348 /*
3349 * Move blockHint outside metadata zone.
3350 */
3351 blockHint = hfsmp->hfs_metazone_end + 1;
3352 }
3353 }
3354
b4c24cb9 3355
2d21ac55
A
3356 while ((length > filebytes) && (retval == E_NONE)) {
3357 off_t bytesRequested;
3358
3359 if (hfs_start_transaction(hfsmp) != 0) {
3360 retval = EINVAL;
3361 goto Err_Exit;
3362 }
3363
3364 /* Protect extents b-tree and allocation bitmap */
3365 lockflags = SFL_BITMAP;
3366 if (overflow_extents(fp))
91447636 3367 lockflags |= SFL_EXTENTS;
2d21ac55
A
3368 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3369
3370 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
3371 bytesRequested = HFS_BIGFILE_SIZE;
3372 } else {
3373 bytesRequested = moreBytesRequested;
3374 }
1c79356b 3375
b0d623f7
A
3376 if (extendFlags & kEFContigMask) {
3377 // if we're on a sparse device, this will force it to do a
3378 // full scan to find the space needed.
3379 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
3380 }
3381
2d21ac55 3382 retval = MacToVFSError(ExtendFileC(vcb,
9bccf70c 3383 (FCB*)fp,
2d21ac55 3384 bytesRequested,
9bccf70c
A
3385 blockHint,
3386 extendFlags,
3387 &actualBytesAdded));
1c79356b 3388
2d21ac55
A
3389 if (retval == E_NONE) {
3390 *(ap->a_bytesallocated) += actualBytesAdded;
3391 total_bytes_added += actualBytesAdded;
3392 moreBytesRequested -= actualBytesAdded;
3393 if (blockHint != 0) {
3394 blockHint += actualBytesAdded / vcb->blockSize;
3395 }
3396 }
3397 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3398
3399 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 3400
2d21ac55 3401 if (hfsmp->jnl) {
91447636
A
3402 (void) hfs_update(vp, TRUE);
3403 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2d21ac55
A
3404 }
3405
3406 hfs_end_transaction(hfsmp);
b4c24cb9 3407 }
91447636 3408
b4c24cb9 3409
1c79356b
A
3410 /*
3411 * if we get an error and no changes were made then exit
91447636 3412 * otherwise we must do the hfs_update to reflect the changes
1c79356b 3413 */
9bccf70c
A
3414 if (retval && (startingPEOF == filebytes))
3415 goto Err_Exit;
1c79356b 3416
9bccf70c
A
3417 /*
3418 * Adjust actualBytesAdded to be allocation block aligned, not
3419 * clump size aligned.
3420 * NOTE: So what we are reporting does not affect reality
3421 * until the file is closed, when we truncate the file to allocation
3422 * block size.
3423 */
2d21ac55 3424 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
0b4e3aa0 3425 *(ap->a_bytesallocated) =
2d21ac55 3426 roundup(orig_request_size, (off_t)vcb->blockSize);
1c79356b 3427
9bccf70c 3428 } else { /* Shorten the size of the file */
1c79356b 3429
9bccf70c 3430 if (fp->ff_size > length) {
1c79356b
A
3431 /*
3432 * Any buffers that are past the truncation point need to be
91447636 3433 * invalidated (to maintain buffer cache consistency).
1c79356b 3434 */
1c79356b
A
3435 }
3436
b0d623f7 3437 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
55e303ae 3438 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
b4c24cb9 3439
1c79356b
A
3440 /*
3441 * if we get an error and no changes were made then exit
91447636 3442 * otherwise we must do the hfs_update to reflect the changes
1c79356b 3443 */
9bccf70c
A
3444 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
3445#if QUOTA
3446 /* These are bytesreleased */
3447 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
3448#endif /* QUOTA */
1c79356b 3449
9bccf70c
A
3450 if (fp->ff_size > filebytes) {
3451 fp->ff_size = filebytes;
1c79356b 3452
91447636
A
3453 hfs_unlock(cp);
3454 ubc_setsize(vp, fp->ff_size);
3455 hfs_lock(cp, HFS_FORCE_LOCK);
9bccf70c
A
3456 }
3457 }
1c79356b
A
3458
3459Std_Exit:
91447636
A
3460 cp->c_touch_chgtime = TRUE;
3461 cp->c_touch_modtime = TRUE;
3462 retval2 = hfs_update(vp, MNT_WAIT);
1c79356b 3463
9bccf70c
A
3464 if (retval == 0)
3465 retval = retval2;
1c79356b 3466Err_Exit:
6d2010ae 3467 hfs_unlock_truncate(cp, 0);
91447636 3468 hfs_unlock(cp);
9bccf70c 3469 return (retval);
1c79356b
A
3470}
3471
3472
9bccf70c 3473/*
91447636 3474 * Pagein for HFS filesystem
9bccf70c 3475 */
1c79356b 3476int
91447636
A
3477hfs_vnop_pagein(struct vnop_pagein_args *ap)
3478/*
3479 struct vnop_pagein_args {
3480 vnode_t a_vp,
1c79356b
A
3481 upl_t a_pl,
3482 vm_offset_t a_pl_offset,
3483 off_t a_f_offset,
3484 size_t a_size,
1c79356b 3485 int a_flags
91447636
A
3486 vfs_context_t a_context;
3487 };
3488*/
1c79356b 3489{
6d2010ae
A
3490 vnode_t vp;
3491 struct cnode *cp;
3492 struct filefork *fp;
3493 int error = 0;
3494 upl_t upl;
3495 upl_page_info_t *pl;
3496 off_t f_offset;
3497 int offset;
3498 int isize;
3499 int pg_index;
3500 boolean_t truncate_lock_held = FALSE;
3501 boolean_t file_converted = FALSE;
3502 kern_return_t kret;
3503
3504 vp = ap->a_vp;
3505 cp = VTOC(vp);
3506 fp = VTOF(vp);
3507
3508#if CONFIG_PROTECT
3509 if ((error = cp_handle_vnop(cp, CP_READ_ACCESS | CP_WRITE_ACCESS)) != 0) {
3510 return error;
3511 }
3512#endif /* CONFIG_PROTECT */
3513
3514 if (ap->a_pl != NULL) {
3515 /*
3516 * this can only happen for swap files now that
3517 * we're asking for V2 paging behavior...
3518 * so don't need to worry about decompression, or
3519 * keeping track of blocks read or taking the truncate lock
3520 */
3521 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
3522 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
3523 goto pagein_done;
3524 }
3525
3526retry_pagein:
3527 /*
3528 * take truncate lock (shared/recursive) to guard against
3529 * zero-fill thru fsync interfering, but only for v2
3530 *
3531 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
3532 * lock shared and we are allowed to recurse 1 level if this thread already
3533 * owns the lock exclusively... this can legally occur
3534 * if we are doing a shrinking ftruncate against a file
3535 * that is mapped private, and the pages being truncated
3536 * do not currently exist in the cache... in that case
3537 * we will have to page-in the missing pages in order
3538 * to provide them to the private mapping... we must
3539 * also call hfs_unlock_truncate with a postive been_recursed
3540 * arg to indicate that if we have recursed, there is no need to drop
3541 * the lock. Allowing this simple recursion is necessary
3542 * in order to avoid a certain deadlock... since the ftruncate
3543 * already holds the truncate lock exclusively, if we try
3544 * to acquire it shared to protect the pagein path, we will
3545 * hang this thread
3546 *
3547 * NOTE: The if () block below is a workaround in order to prevent a
3548 * VM deadlock. See rdar://7853471.
3549 *
3550 * If we are in a forced unmount, then launchd will still have the
3551 * dyld_shared_cache file mapped as it is trying to reboot. If we
3552 * take the truncate lock here to service a page fault, then our
3553 * thread could deadlock with the forced-unmount. The forced unmount
3554 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
3555 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
3556 * thread will think it needs to copy all of the data out of the file
3557 * and into a VM copy object. If we hold the cnode lock here, then that
3558 * VM operation will not be able to proceed, because we'll set a busy page
3559 * before attempting to grab the lock. Note that this isn't as simple as "don't
3560 * call ubc_setsize" because doing that would just shift the problem to the
3561 * ubc_msync done before the vnode is reclaimed.
3562 *
3563 * So, if a forced unmount on this volume is in flight AND the cnode is
3564 * marked C_DELETED, then just go ahead and do the page in without taking
3565 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
3566 * that is not going to be available on the next mount, this seems like a
3567 * OK solution from a correctness point of view, even though it is hacky.
3568 */
3569 if (vfs_isforce(vp->v_mount)) {
3570 if (cp->c_flag & C_DELETED) {
3571 /* If we don't get it, then just go ahead and operate without the lock */
3572 truncate_lock_held = hfs_try_trunclock(cp, HFS_RECURSE_TRUNCLOCK);
3573 }
3574 }
3575 else {
3576 hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK);
3577 truncate_lock_held = TRUE;
3578 }
3579
3580 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
3581
3582 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
3583 error = EINVAL;
3584 goto pagein_done;
3585 }
3586 isize = ap->a_size;
3587
3588 /*
3589 * Scan from the back to find the last page in the UPL, so that we
3590 * aren't looking at a UPL that may have already been freed by the
3591 * preceding aborts/completions.
3592 */
3593 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
3594 if (upl_page_present(pl, --pg_index))
3595 break;
3596 if (pg_index == 0) {
3597 /*
3598 * no absent pages were found in the range specified
3599 * just abort the UPL to get rid of it and then we're done
3600 */
3601 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
3602 goto pagein_done;
3603 }
3604 }
3605 /*
3606 * initialize the offset variables before we touch the UPL.
3607 * f_offset is the position into the file, in bytes
3608 * offset is the position into the UPL, in bytes
3609 * pg_index is the pg# of the UPL we're operating on
3610 * isize is the offset into the UPL of the last page that is present.
3611 */
3612 isize = ((pg_index + 1) * PAGE_SIZE);
3613 pg_index = 0;
3614 offset = 0;
3615 f_offset = ap->a_f_offset;
3616
3617 while (isize) {
3618 int xsize;
3619 int num_of_pages;
3620
3621 if ( !upl_page_present(pl, pg_index)) {
3622 /*
3623 * we asked for RET_ONLY_ABSENT, so it's possible
3624 * to get back empty slots in the UPL.
3625 * just skip over them
3626 */
3627 f_offset += PAGE_SIZE;
3628 offset += PAGE_SIZE;
3629 isize -= PAGE_SIZE;
3630 pg_index++;
3631
3632 continue;
3633 }
3634 /*
3635 * We know that we have at least one absent page.
3636 * Now checking to see how many in a row we have
3637 */
3638 num_of_pages = 1;
3639 xsize = isize - PAGE_SIZE;
3640
3641 while (xsize) {
3642 if ( !upl_page_present(pl, pg_index + num_of_pages))
3643 break;
3644 num_of_pages++;
3645 xsize -= PAGE_SIZE;
3646 }
3647 xsize = num_of_pages * PAGE_SIZE;
1c79356b 3648
b0d623f7 3649#if HFS_COMPRESSION
6d2010ae
A
3650 if (VNODE_IS_RSRC(vp)) {
3651 /* allow pageins of the resource fork */
3652 } else {
3653 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
3654
b0d623f7 3655 if (compressed) {
6d2010ae
A
3656 if (truncate_lock_held) {
3657 /*
3658 * can't hold the truncate lock when calling into the decmpfs layer
3659 * since it calls back into this layer... even though we're only
3660 * holding the lock in shared mode, and the re-entrant path only
3661 * takes the lock shared, we can deadlock if some other thread
3662 * tries to grab the lock exclusively in between.
3663 */
3664 hfs_unlock_truncate(cp, 1);
3665 truncate_lock_held = FALSE;
3666 }
3667 ap->a_pl = upl;
3668 ap->a_pl_offset = offset;
3669 ap->a_f_offset = f_offset;
3670 ap->a_size = xsize;
3671
3672 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
3673 /*
3674 * note that decpfs_pagein_compressed can change the state of
3675 * 'compressed'... it will set it to 0 if the file is no longer
3676 * compressed once the compression lock is successfully taken
3677 * i.e. we would block on that lock while the file is being inflated
3678 */
3679 if (compressed) {
3680 if (error == 0) {
3681 /* successful page-in, update the access time */
3682 VTOC(vp)->c_touch_acctime = TRUE;
b0d623f7 3683
6d2010ae
A
3684 /* compressed files are not hot file candidates */
3685 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3686 fp->ff_bytesread = 0;
3687 }
3688 } else if (error == EAGAIN) {
3689 /*
3690 * EAGAIN indicates someone else already holds the compression lock...
3691 * to avoid deadlocking, we'll abort this range of pages with an
3692 * indication that the pagein needs to be redriven
3693 */
3694 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
b0d623f7 3695 }
6d2010ae
A
3696 goto pagein_next_range;
3697 }
3698 else {
3699 /*
3700 * Set file_converted only if the file became decompressed while we were
3701 * paging in. If it were still compressed, we would re-start the loop using the goto
3702 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
3703 * condition below, since we could have avoided taking the truncate lock to prevent
3704 * a deadlock in the force unmount case.
3705 */
3706 file_converted = TRUE;
b0d623f7 3707 }
b0d623f7 3708 }
6d2010ae
A
3709 if (file_converted == TRUE) {
3710 /*
3711 * the file was converted back to a regular file after we first saw it as compressed
3712 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
3713 * reset a_size so that we consider what remains of the original request
3714 * and null out a_upl and a_pl_offset.
3715 *
3716 * We should only be able to get into this block if the decmpfs_pagein_compressed
3717 * successfully decompressed the range in question for this file.
3718 */
3719 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
3720
3721 ap->a_size = isize;
3722 ap->a_pl = NULL;
3723 ap->a_pl_offset = 0;
3724
3725 /* Reset file_converted back to false so that we don't infinite-loop. */
3726 file_converted = FALSE;
3727 goto retry_pagein;
3728 }
b0d623f7 3729 }
b0d623f7 3730#endif
6d2010ae 3731 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
b0d623f7 3732
6d2010ae
A
3733 /*
3734 * Keep track of blocks read.
3735 */
3736 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
3737 int bytesread;
3738 int took_cnode_lock = 0;
55e303ae 3739
6d2010ae
A
3740 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
3741 bytesread = fp->ff_size;
3742 else
3743 bytesread = xsize;
91447636 3744
6d2010ae
A
3745 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
3746 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
3747 hfs_lock(cp, HFS_FORCE_LOCK);
3748 took_cnode_lock = 1;
3749 }
3750 /*
3751 * If this file hasn't been seen since the start of
3752 * the current sampling period then start over.
3753 */
3754 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
3755 struct timeval tv;
91447636 3756
6d2010ae
A
3757 fp->ff_bytesread = bytesread;
3758 microtime(&tv);
3759 cp->c_atime = tv.tv_sec;
3760 } else {
3761 fp->ff_bytesread += bytesread;
3762 }
3763 cp->c_touch_acctime = TRUE;
3764 if (took_cnode_lock)
3765 hfs_unlock(cp);
91447636 3766 }
6d2010ae
A
3767pagein_next_range:
3768 f_offset += xsize;
3769 offset += xsize;
3770 isize -= xsize;
3771 pg_index += num_of_pages;
55e303ae 3772
6d2010ae 3773 error = 0;
55e303ae 3774 }
6d2010ae
A
3775
3776pagein_done:
3777 if (truncate_lock_held == TRUE) {
3778 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
3779 hfs_unlock_truncate(cp, 1);
3780 }
3781
9bccf70c 3782 return (error);
1c79356b
A
3783}
3784
3785/*
91447636 3786 * Pageout for HFS filesystem.
1c79356b
A
3787 */
3788int
91447636
A
3789hfs_vnop_pageout(struct vnop_pageout_args *ap)
3790/*
3791 struct vnop_pageout_args {
3792 vnode_t a_vp,
1c79356b
A
3793 upl_t a_pl,
3794 vm_offset_t a_pl_offset,
3795 off_t a_f_offset,
3796 size_t a_size,
1c79356b 3797 int a_flags
91447636
A
3798 vfs_context_t a_context;
3799 };
3800*/
1c79356b 3801{
91447636
A
3802 vnode_t vp = ap->a_vp;
3803 struct cnode *cp;
3804 struct filefork *fp;
b0d623f7 3805 int retval = 0;
9bccf70c 3806 off_t filesize;
b0d623f7
A
3807 upl_t upl;
3808 upl_page_info_t* pl;
3809 vm_offset_t a_pl_offset;
3810 int a_flags;
3811 int is_pageoutv2 = 0;
b7266188 3812 kern_return_t kret;
1c79356b 3813
91447636 3814 cp = VTOC(vp);
91447636 3815 fp = VTOF(vp);
2d21ac55 3816
593a1d5f
A
3817 /*
3818 * Figure out where the file ends, for pageout purposes. If
3819 * ff_new_size > ff_size, then we're in the middle of extending the
3820 * file via a write, so it is safe (and necessary) that we be able
3821 * to pageout up to that point.
3822 */
3823 filesize = fp->ff_size;
3824 if (fp->ff_new_size > filesize)
3825 filesize = fp->ff_new_size;
b0d623f7
A
3826
3827 a_flags = ap->a_flags;
3828 a_pl_offset = ap->a_pl_offset;
3829
3830 /*
3831 * we can tell if we're getting the new or old behavior from the UPL
3832 */
3833 if ((upl = ap->a_pl) == NULL) {
3834 int request_flags;
3835
3836 is_pageoutv2 = 1;
3837 /*
3838 * we're in control of any UPL we commit
3839 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
3840 */
3841 a_flags &= ~UPL_NOCOMMIT;
3842 a_pl_offset = 0;
3843
3844 /*
3845 * take truncate lock (shared) to guard against
3846 * zero-fill thru fsync interfering, but only for v2
3847 */
6d2010ae 3848 hfs_lock_truncate(cp, HFS_SHARED_LOCK);
b0d623f7
A
3849
3850 if (a_flags & UPL_MSYNC) {
3851 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
3852 }
3853 else {
3854 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
3855 }
6d2010ae 3856
b7266188 3857 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
b0d623f7 3858
b7266188 3859 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
b0d623f7
A
3860 retval = EINVAL;
3861 goto pageout_done;
3862 }
3863 }
3864 /*
3865 * from this point forward upl points at the UPL we're working with
3866 * it was either passed in or we succesfully created it
3867 */
3868
3869 /*
3870 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
3871 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
3872 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
3873 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
3874 * logic in vnode_pageout except that we need to do it after grabbing the truncate
3875 * lock in HFS so that we don't lock invert ourselves.
3876 *
3877 * Note that we can still get into this function on behalf of the default pager with
3878 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
3879 * since fsync and other writing threads will grab the locks, then mark the
3880 * relevant pages as busy. But the pageout codepath marks the pages as busy,
3881 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
3882 * we do not try to grab anything for the pre-V2 case, which should only be accessed
3883 * by the paging/VM system.
3884 */
3885
3886 if (is_pageoutv2) {
3887 off_t f_offset;
3888 int offset;
3889 int isize;
3890 int pg_index;
3891 int error;
3892 int error_ret = 0;
3893
3894 isize = ap->a_size;
3895 f_offset = ap->a_f_offset;
3896
3897 /*
3898 * Scan from the back to find the last page in the UPL, so that we
3899 * aren't looking at a UPL that may have already been freed by the
3900 * preceding aborts/completions.
3901 */
3902 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
3903 if (upl_page_present(pl, --pg_index))
3904 break;
3905 if (pg_index == 0) {
3906 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
3907 goto pageout_done;
2d21ac55 3908 }
2d21ac55 3909 }
b0d623f7
A
3910
3911 /*
3912 * initialize the offset variables before we touch the UPL.
3913 * a_f_offset is the position into the file, in bytes
3914 * offset is the position into the UPL, in bytes
3915 * pg_index is the pg# of the UPL we're operating on.
3916 * isize is the offset into the UPL of the last non-clean page.
3917 */
3918 isize = ((pg_index + 1) * PAGE_SIZE);
3919
3920 offset = 0;
3921 pg_index = 0;
3922
3923 while (isize) {
3924 int xsize;
3925 int num_of_pages;
3926
3927 if ( !upl_page_present(pl, pg_index)) {
3928 /*
3929 * we asked for RET_ONLY_DIRTY, so it's possible
3930 * to get back empty slots in the UPL.
3931 * just skip over them
3932 */
3933 f_offset += PAGE_SIZE;
3934 offset += PAGE_SIZE;
3935 isize -= PAGE_SIZE;
3936 pg_index++;
3937
3938 continue;
3939 }
3940 if ( !upl_dirty_page(pl, pg_index)) {
3941 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
3942 }
3943
3944 /*
3945 * We know that we have at least one dirty page.
3946 * Now checking to see how many in a row we have
3947 */
3948 num_of_pages = 1;
3949 xsize = isize - PAGE_SIZE;
3950
3951 while (xsize) {
3952 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
3953 break;
3954 num_of_pages++;
3955 xsize -= PAGE_SIZE;
3956 }
3957 xsize = num_of_pages * PAGE_SIZE;
3958
3959 if (!vnode_isswap(vp)) {
3960 off_t end_of_range;
3961 int tooklock;
3962
3963 tooklock = 0;
3964
3965 if (cp->c_lockowner != current_thread()) {
3966 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3967 /*
3968 * we're in the v2 path, so we are the
3969 * owner of the UPL... we may have already
3970 * processed some of the UPL, so abort it
3971 * from the current working offset to the
3972 * end of the UPL
3973 */
3974 ubc_upl_abort_range(upl,
3975 offset,
3976 ap->a_size - offset,
3977 UPL_ABORT_FREE_ON_EMPTY);
3978 goto pageout_done;
3979 }
3980 tooklock = 1;
3981 }
3982 end_of_range = f_offset + xsize - 1;
2d21ac55 3983
b0d623f7
A
3984 if (end_of_range >= filesize) {
3985 end_of_range = (off_t)(filesize - 1);
3986 }
3987 if (f_offset < filesize) {
3988 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
3989 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3990 }
3991 if (tooklock) {
3992 hfs_unlock(cp);
3993 }
3994 }
3995 if ((error = cluster_pageout(vp, upl, offset, f_offset,
3996 xsize, filesize, a_flags))) {
3997 if (error_ret == 0)
3998 error_ret = error;
3999 }
4000 f_offset += xsize;
4001 offset += xsize;
4002 isize -= xsize;
4003 pg_index += num_of_pages;
4004 }
4005 /* capture errnos bubbled out of cluster_pageout if they occurred */
4006 if (error_ret != 0) {
4007 retval = error_ret;
4008 }
4009 } /* end block for v2 pageout behavior */
4010 else {
4011 if (!vnode_isswap(vp)) {
4012 off_t end_of_range;
4013 int tooklock = 0;
4014
4015 if (cp->c_lockowner != current_thread()) {
4016 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
4017 if (!(a_flags & UPL_NOCOMMIT)) {
4018 ubc_upl_abort_range(upl,
4019 a_pl_offset,
4020 ap->a_size,
4021 UPL_ABORT_FREE_ON_EMPTY);
4022 }
4023 goto pageout_done;
4024 }
4025 tooklock = 1;
4026 }
4027 end_of_range = ap->a_f_offset + ap->a_size - 1;
2d21ac55 4028
b0d623f7
A
4029 if (end_of_range >= filesize) {
4030 end_of_range = (off_t)(filesize - 1);
4031 }
4032 if (ap->a_f_offset < filesize) {
4033 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
4034 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4035 }
1c79356b 4036
b0d623f7
A
4037 if (tooklock) {
4038 hfs_unlock(cp);
4039 }
2d21ac55 4040 }
b0d623f7
A
4041 /*
4042 * just call cluster_pageout for old pre-v2 behavior
4043 */
4044 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
4045 ap->a_size, filesize, a_flags);
55e303ae 4046 }
0b4e3aa0 4047
1c79356b 4048 /*
b0d623f7
A
4049 * If data was written, update the modification time of the file.
4050 * If setuid or setgid bits are set and this process is not the
4051 * superuser then clear the setuid and setgid bits as a precaution
4052 * against tampering.
1c79356b 4053 */
b0d623f7
A
4054 if (retval == 0) {
4055 cp->c_touch_modtime = TRUE;
91447636 4056 cp->c_touch_chgtime = TRUE;
b0d623f7
A
4057 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
4058 (vfs_context_suser(ap->a_context) != 0)) {
4059 hfs_lock(cp, HFS_FORCE_LOCK);
4060 cp->c_mode &= ~(S_ISUID | S_ISGID);
4061 hfs_unlock(cp);
4062 }
4063 }
4064
4065pageout_done:
4066 if (is_pageoutv2) {
4067 /* release truncate lock (shared) */
4068 hfs_unlock_truncate(cp, 0);
91447636 4069 }
1c79356b
A
4070 return (retval);
4071}
4072
4073/*
4074 * Intercept B-Tree node writes to unswap them if necessary.
1c79356b
A
4075 */
4076int
91447636 4077hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
1c79356b 4078{
9bccf70c 4079 int retval = 0;
9bccf70c 4080 register struct buf *bp = ap->a_bp;
91447636 4081 register struct vnode *vp = buf_vnode(bp);
9bccf70c
A
4082 BlockDescriptor block;
4083
4084 /* Trap B-Tree writes */
4085 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
91447636 4086 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
0c530ab8
A
4087 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
4088 (vp == VTOHFS(vp)->hfc_filevp)) {
9bccf70c 4089
3a60a9f5
A
4090 /*
4091 * Swap and validate the node if it is in native byte order.
4092 * This is always be true on big endian, so we always validate
4093 * before writing here. On little endian, the node typically has
2d21ac55 4094 * been swapped and validated when it was written to the journal,
3a60a9f5
A
4095 * so we won't do anything here.
4096 */
2d21ac55 4097 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
9bccf70c
A
4098 /* Prepare the block pointer */
4099 block.blockHeader = bp;
91447636 4100 block.buffer = (char *)buf_dataptr(bp);
3a60a9f5 4101 block.blockNum = buf_lblkno(bp);
9bccf70c 4102 /* not found in cache ==> came from disk */
91447636
A
4103 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
4104 block.blockSize = buf_count(bp);
1c79356b 4105
9bccf70c 4106 /* Endian un-swap B-Tree node */
935ed37a 4107 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3a60a9f5
A
4108 if (retval)
4109 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
9bccf70c 4110 }
9bccf70c 4111 }
3a60a9f5 4112
9bccf70c 4113 /* This buffer shouldn't be locked anymore but if it is clear it */
91447636
A
4114 if ((buf_flags(bp) & B_LOCKED)) {
4115 // XXXdbg
4116 if (VTOHFS(vp)->jnl) {
2d21ac55 4117 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
91447636
A
4118 }
4119 buf_clearflags(bp, B_LOCKED);
9bccf70c
A
4120 }
4121 retval = vn_bwrite (ap);
1c79356b 4122
9bccf70c 4123 return (retval);
1c79356b 4124}
55e303ae
A
4125
4126/*
4127 * Relocate a file to a new location on disk
4128 * cnode must be locked on entry
4129 *
4130 * Relocation occurs by cloning the file's data from its
4131 * current set of blocks to a new set of blocks. During
4132 * the relocation all of the blocks (old and new) are
4133 * owned by the file.
4134 *
4135 * -----------------
4136 * |///////////////|
4137 * -----------------
4138 * 0 N (file offset)
4139 *
4140 * ----------------- -----------------
2d21ac55 4141 * |///////////////| | | STEP 1 (acquire new blocks)
55e303ae
A
4142 * ----------------- -----------------
4143 * 0 N N+1 2N
4144 *
4145 * ----------------- -----------------
4146 * |///////////////| |///////////////| STEP 2 (clone data)
4147 * ----------------- -----------------
4148 * 0 N N+1 2N
4149 *
4150 * -----------------
4151 * |///////////////| STEP 3 (head truncate blocks)
4152 * -----------------
4153 * 0 N
4154 *
4155 * During steps 2 and 3 page-outs to file offsets less
4156 * than or equal to N are suspended.
4157 *
2d21ac55 4158 * During step 3 page-ins to the file get suspended.
55e303ae 4159 */
55e303ae 4160int
91447636
A
4161hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
4162 struct proc *p)
55e303ae 4163{
91447636 4164 struct cnode *cp;
55e303ae
A
4165 struct filefork *fp;
4166 struct hfsmount *hfsmp;
55e303ae
A
4167 u_int32_t headblks;
4168 u_int32_t datablks;
4169 u_int32_t blksize;
55e303ae
A
4170 u_int32_t growsize;
4171 u_int32_t nextallocsave;
91447636 4172 daddr64_t sector_a, sector_b;
55e303ae 4173 int eflags;
55e303ae 4174 off_t newbytes;
91447636
A
4175 int retval;
4176 int lockflags = 0;
4177 int took_trunc_lock = 0;
4178 int started_tr = 0;
4179 enum vtype vnodetype;
4180
4181 vnodetype = vnode_vtype(vp);
4182 if (vnodetype != VREG && vnodetype != VLNK) {
55e303ae
A
4183 return (EPERM);
4184 }
4185
4186 hfsmp = VTOHFS(vp);
4187 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
4188 return (ENOSPC);
4189 }
4190
91447636 4191 cp = VTOC(vp);
55e303ae
A
4192 fp = VTOF(vp);
4193 if (fp->ff_unallocblocks)
4194 return (EINVAL);
6d2010ae
A
4195
4196#if CONFIG_PROTECT
4197 /*
4198 * <rdar://problem/9118426>
4199 * Disable HFS file relocation on content-protected filesystems
4200 */
4201 if (cp_fs_protected (hfsmp->hfs_mp)) {
4202 return EINVAL;
4203 }
4204#endif
4205
4206 /* If it's an SSD, also disable HFS relocation */
4207 if (hfsmp->hfs_flags & HFS_SSD) {
4208 return EINVAL;
4209 }
4210
91447636 4211 blksize = hfsmp->blockSize;
55e303ae 4212 if (blockHint == 0)
91447636 4213 blockHint = hfsmp->nextAllocation;
55e303ae 4214
2d21ac55 4215 if ((fp->ff_size > 0x7fffffff) ||
91447636 4216 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
55e303ae
A
4217 return (EFBIG);
4218 }
4219
91447636
A
4220 //
4221 // We do not believe that this call to hfs_fsync() is
4222 // necessary and it causes a journal transaction
4223 // deadlock so we are removing it.
4224 //
4225 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4226 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4227 // if (retval)
4228 // return (retval);
4229 //}
4230
4231 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
4232 hfs_unlock(cp);
6d2010ae 4233 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
2d21ac55
A
4234 /* Force lock since callers expects lock to be held. */
4235 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
6d2010ae 4236 hfs_unlock_truncate(cp, 0);
91447636
A
4237 return (retval);
4238 }
2d21ac55
A
4239 /* No need to continue if file was removed. */
4240 if (cp->c_flag & C_NOEXISTS) {
6d2010ae 4241 hfs_unlock_truncate(cp, 0);
2d21ac55
A
4242 return (ENOENT);
4243 }
91447636
A
4244 took_trunc_lock = 1;
4245 }
55e303ae
A
4246 headblks = fp->ff_blocks;
4247 datablks = howmany(fp->ff_size, blksize);
4248 growsize = datablks * blksize;
55e303ae
A
4249 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
4250 if (blockHint >= hfsmp->hfs_metazone_start &&
4251 blockHint <= hfsmp->hfs_metazone_end)
4252 eflags |= kEFMetadataMask;
4253
91447636
A
4254 if (hfs_start_transaction(hfsmp) != 0) {
4255 if (took_trunc_lock)
6d2010ae 4256 hfs_unlock_truncate(cp, 0);
91447636 4257 return (EINVAL);
55e303ae 4258 }
91447636
A
4259 started_tr = 1;
4260 /*
4261 * Protect the extents b-tree and the allocation bitmap
4262 * during MapFileBlockC and ExtendFileC operations.
4263 */
4264 lockflags = SFL_BITMAP;
4265 if (overflow_extents(fp))
4266 lockflags |= SFL_EXTENTS;
4267 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 4268
91447636 4269 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
55e303ae
A
4270 if (retval) {
4271 retval = MacToVFSError(retval);
4272 goto out;
4273 }
4274
4275 /*
2d21ac55 4276 * STEP 1 - acquire new allocation blocks.
55e303ae 4277 */
91447636
A
4278 nextallocsave = hfsmp->nextAllocation;
4279 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
4280 if (eflags & kEFMetadataMask) {
4281 HFS_MOUNT_LOCK(hfsmp, TRUE);
2d21ac55
A
4282 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
4283 MarkVCBDirty(hfsmp);
91447636
A
4284 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4285 }
55e303ae
A
4286
4287 retval = MacToVFSError(retval);
4288 if (retval == 0) {
91447636 4289 cp->c_flag |= C_MODIFIED;
55e303ae
A
4290 if (newbytes < growsize) {
4291 retval = ENOSPC;
4292 goto restore;
4293 } else if (fp->ff_blocks < (headblks + datablks)) {
4294 printf("hfs_relocate: allocation failed");
4295 retval = ENOSPC;
4296 goto restore;
4297 }
4298
91447636 4299 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
55e303ae
A
4300 if (retval) {
4301 retval = MacToVFSError(retval);
4302 } else if ((sector_a + 1) == sector_b) {
4303 retval = ENOSPC;
4304 goto restore;
4305 } else if ((eflags & kEFMetadataMask) &&
593a1d5f 4306 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
55e303ae 4307 hfsmp->hfs_metazone_end)) {
b0d623f7 4308#if 0
2d21ac55
A
4309 const char * filestr;
4310 char emptystr = '\0';
4311
4312 if (cp->c_desc.cd_nameptr != NULL) {
4313 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
4314 } else if (vnode_name(vp) != NULL) {
4315 filestr = vnode_name(vp);
4316 } else {
4317 filestr = &emptystr;
4318 }
b0d623f7 4319#endif
55e303ae
A
4320 retval = ENOSPC;
4321 goto restore;
4322 }
4323 }
91447636
A
4324 /* Done with system locks and journal for now. */
4325 hfs_systemfile_unlock(hfsmp, lockflags);
4326 lockflags = 0;
4327 hfs_end_transaction(hfsmp);
4328 started_tr = 0;
4329
55e303ae
A
4330 if (retval) {
4331 /*
4332 * Check to see if failure is due to excessive fragmentation.
4333 */
91447636
A
4334 if ((retval == ENOSPC) &&
4335 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
55e303ae
A
4336 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
4337 }
4338 goto out;
4339 }
55e303ae 4340 /*
91447636 4341 * STEP 2 - clone file data into the new allocation blocks.
55e303ae
A
4342 */
4343
91447636 4344 if (vnodetype == VLNK)
55e303ae 4345 retval = hfs_clonelink(vp, blksize, cred, p);
91447636 4346 else if (vnode_issystem(vp))
55e303ae
A
4347 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
4348 else
91447636 4349 retval = hfs_clonefile(vp, headblks, datablks, blksize);
ccc36f2f 4350
91447636
A
4351 /* Start transaction for step 3 or for a restore. */
4352 if (hfs_start_transaction(hfsmp) != 0) {
4353 retval = EINVAL;
4354 goto out;
4355 }
4356 started_tr = 1;
55e303ae
A
4357 if (retval)
4358 goto restore;
55e303ae
A
4359
4360 /*
91447636 4361 * STEP 3 - switch to cloned data and remove old blocks.
55e303ae 4362 */
91447636
A
4363 lockflags = SFL_BITMAP;
4364 if (overflow_extents(fp))
4365 lockflags |= SFL_EXTENTS;
4366 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 4367
91447636 4368 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
55e303ae 4369
91447636
A
4370 hfs_systemfile_unlock(hfsmp, lockflags);
4371 lockflags = 0;
55e303ae
A
4372 if (retval)
4373 goto restore;
55e303ae 4374out:
91447636 4375 if (took_trunc_lock)
6d2010ae 4376 hfs_unlock_truncate(cp, 0);
55e303ae 4377
91447636
A
4378 if (lockflags) {
4379 hfs_systemfile_unlock(hfsmp, lockflags);
4380 lockflags = 0;
ccc36f2f
A
4381 }
4382
0c530ab8
A
4383 /* Push cnode's new extent data to disk. */
4384 if (retval == 0) {
4385 (void) hfs_update(vp, MNT_WAIT);
4386 }
55e303ae 4387 if (hfsmp->jnl) {
91447636 4388 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
55e303ae
A
4389 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4390 else
4391 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
55e303ae 4392 }
91447636 4393exit:
91447636
A
4394 if (started_tr)
4395 hfs_end_transaction(hfsmp);
55e303ae
A
4396
4397 return (retval);
4398
4399restore:
2d21ac55
A
4400 if (fp->ff_blocks == headblks) {
4401 if (took_trunc_lock)
6d2010ae 4402 hfs_unlock_truncate(cp, 0);
91447636 4403 goto exit;
2d21ac55 4404 }
55e303ae
A
4405 /*
4406 * Give back any newly allocated space.
4407 */
91447636
A
4408 if (lockflags == 0) {
4409 lockflags = SFL_BITMAP;
4410 if (overflow_extents(fp))
4411 lockflags |= SFL_EXTENTS;
4412 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4413 }
4414
6d2010ae
A
4415 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
4416 FTOC(fp)->c_fileid, false);
91447636
A
4417
4418 hfs_systemfile_unlock(hfsmp, lockflags);
4419 lockflags = 0;
4420
4421 if (took_trunc_lock)
6d2010ae 4422 hfs_unlock_truncate(cp, 0);
91447636 4423 goto exit;
55e303ae
A
4424}
4425
4426
4427/*
4428 * Clone a symlink.
4429 *
4430 */
4431static int
2d21ac55 4432hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
55e303ae
A
4433{
4434 struct buf *head_bp = NULL;
4435 struct buf *tail_bp = NULL;
4436 int error;
4437
4438
91447636 4439 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
55e303ae
A
4440 if (error)
4441 goto out;
4442
91447636 4443 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
55e303ae
A
4444 if (tail_bp == NULL) {
4445 error = EIO;
4446 goto out;
4447 }
91447636
A
4448 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
4449 error = (int)buf_bwrite(tail_bp);
55e303ae
A
4450out:
4451 if (head_bp) {
91447636
A
4452 buf_markinvalid(head_bp);
4453 buf_brelse(head_bp);
55e303ae 4454 }
91447636 4455 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
55e303ae
A
4456
4457 return (error);
4458}
4459
4460/*
4461 * Clone a file's data within the file.
4462 *
4463 */
4464static int
91447636 4465hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
55e303ae
A
4466{
4467 caddr_t bufp;
55e303ae
A
4468 size_t bufsize;
4469 size_t copysize;
4470 size_t iosize;
55e303ae 4471 size_t offset;
b0d623f7 4472 off_t writebase;
91447636
A
4473 uio_t auio;
4474 int error = 0;
55e303ae 4475
55e303ae
A
4476 writebase = blkstart * blksize;
4477 copysize = blkcnt * blksize;
0c530ab8 4478 iosize = bufsize = MIN(copysize, 128 * 1024);
55e303ae
A
4479 offset = 0;
4480
6d2010ae
A
4481 hfs_unlock(VTOC(vp));
4482
4483#if CONFIG_PROTECT
4484 if ((error = cp_handle_vnop(VTOC(vp), CP_WRITE_ACCESS)) != 0) {
4485 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
4486 return (error);
4487 }
4488#endif /* CONFIG_PROTECT */
4489
55e303ae 4490 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
6d2010ae 4491 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
55e303ae 4492 return (ENOMEM);
6d2010ae 4493 }
55e303ae 4494
b0d623f7 4495 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
55e303ae
A
4496
4497 while (offset < copysize) {
4498 iosize = MIN(copysize - offset, iosize);
4499
b0d623f7 4500 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
91447636 4501 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 4502
2d21ac55 4503 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
55e303ae
A
4504 if (error) {
4505 printf("hfs_clonefile: cluster_read failed - %d\n", error);
4506 break;
4507 }
91447636 4508 if (uio_resid(auio) != 0) {
b0d623f7 4509 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
55e303ae
A
4510 error = EIO;
4511 break;
4512 }
4513
b0d623f7 4514 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
91447636 4515 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 4516
b0d623f7
A
4517 error = cluster_write(vp, auio, writebase + offset,
4518 writebase + offset + iosize,
91447636 4519 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
55e303ae
A
4520 if (error) {
4521 printf("hfs_clonefile: cluster_write failed - %d\n", error);
4522 break;
4523 }
91447636 4524 if (uio_resid(auio) != 0) {
55e303ae
A
4525 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4526 error = EIO;
4527 break;
4528 }
4529 offset += iosize;
4530 }
91447636
A
4531 uio_free(auio);
4532
b0d623f7
A
4533 if ((blksize & PAGE_MASK)) {
4534 /*
4535 * since the copy may not have started on a PAGE
4536 * boundary (or may not have ended on one), we
4537 * may have pages left in the cache since NOCACHE
4538 * will let partially written pages linger...
4539 * lets just flush the entire range to make sure
4540 * we don't have any pages left that are beyond
4541 * (or intersect) the real LEOF of this file
4542 */
4543 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
4544 } else {
4545 /*
4546 * No need to call ubc_sync_range or hfs_invalbuf
4547 * since the file was copied using IO_NOCACHE and
4548 * the copy was done starting and ending on a page
4549 * boundary in the file.
4550 */
4551 }
55e303ae 4552 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
91447636
A
4553
4554 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
55e303ae
A
4555 return (error);
4556}
4557
4558/*
4559 * Clone a system (metadata) file.
4560 *
4561 */
4562static int
4563hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
91447636 4564 kauth_cred_t cred, struct proc *p)
55e303ae
A
4565{
4566 caddr_t bufp;
4567 char * offset;
4568 size_t bufsize;
4569 size_t iosize;
4570 struct buf *bp = NULL;
91447636
A
4571 daddr64_t blkno;
4572 daddr64_t blk;
4573 daddr64_t start_blk;
4574 daddr64_t last_blk;
55e303ae
A
4575 int breadcnt;
4576 int i;
4577 int error = 0;
4578
4579
4580 iosize = GetLogicalBlockSize(vp);
4581 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
4582 breadcnt = bufsize / iosize;
4583
4584 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
4585 return (ENOMEM);
4586 }
91447636
A
4587 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
4588 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
55e303ae
A
4589 blkno = 0;
4590
91447636 4591 while (blkno < last_blk) {
55e303ae
A
4592 /*
4593 * Read up to a megabyte
4594 */
4595 offset = bufp;
91447636
A
4596 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
4597 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
55e303ae
A
4598 if (error) {
4599 printf("hfs_clonesysfile: meta_bread error %d\n", error);
4600 goto out;
4601 }
91447636
A
4602 if (buf_count(bp) != iosize) {
4603 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
55e303ae
A
4604 goto out;
4605 }
91447636
A
4606 bcopy((char *)buf_dataptr(bp), offset, iosize);
4607
4608 buf_markinvalid(bp);
4609 buf_brelse(bp);
55e303ae 4610 bp = NULL;
91447636 4611
55e303ae
A
4612 offset += iosize;
4613 }
4614
4615 /*
4616 * Write up to a megabyte
4617 */
4618 offset = bufp;
91447636
A
4619 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
4620 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
55e303ae 4621 if (bp == NULL) {
91447636 4622 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
55e303ae
A
4623 error = EIO;
4624 goto out;
4625 }
91447636
A
4626 bcopy(offset, (char *)buf_dataptr(bp), iosize);
4627 error = (int)buf_bwrite(bp);
55e303ae
A
4628 bp = NULL;
4629 if (error)
4630 goto out;
4631 offset += iosize;
4632 }
4633 }
4634out:
4635 if (bp) {
91447636 4636 buf_brelse(bp);
55e303ae
A
4637 }
4638
4639 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
4640
91447636 4641 error = hfs_fsync(vp, MNT_WAIT, 0, p);
55e303ae
A
4642
4643 return (error);
4644}