]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_readwrite.c
xnu-1486.2.11.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* @(#)hfs_readwrite.c 1.0
29 *
9bccf70c 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
1c79356b 31 *
1c79356b
A
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
1c79356b
A
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/resourcevar.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
55e303ae 41#include <sys/filedesc.h>
1c79356b
A
42#include <sys/stat.h>
43#include <sys/buf.h>
44#include <sys/proc.h>
91447636 45#include <sys/kauth.h>
1c79356b 46#include <sys/vnode.h>
2d21ac55 47#include <sys/vnode_internal.h>
1c79356b 48#include <sys/uio.h>
91447636 49#include <sys/vfs_context.h>
2d21ac55
A
50#include <sys/fsevents.h>
51#include <kern/kalloc.h>
8f6c56a5
A
52#include <sys/disk.h>
53#include <sys/sysctl.h>
b0d623f7 54#include <sys/fsctl.h>
1c79356b
A
55
56#include <miscfs/specfs/specdev.h>
57
1c79356b 58#include <sys/ubc.h>
2d21ac55
A
59#include <sys/ubc_internal.h>
60
1c79356b 61#include <vm/vm_pageout.h>
91447636 62#include <vm/vm_kern.h>
1c79356b 63
1c79356b
A
64#include <sys/kdebug.h>
65
66#include "hfs.h"
2d21ac55 67#include "hfs_attrlist.h"
1c79356b 68#include "hfs_endian.h"
2d21ac55 69#include "hfs_fsctl.h"
9bccf70c 70#include "hfs_quota.h"
1c79356b
A
71#include "hfscommon/headers/FileMgrInternal.h"
72#include "hfscommon/headers/BTreesInternal.h"
9bccf70c
A
73#include "hfs_cnode.h"
74#include "hfs_dbg.h"
1c79356b 75
1c79356b
A
76#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
77
78enum {
79 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
80};
81
935ed37a 82/* from bsd/hfs/hfs_vfsops.c */
b0d623f7 83extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
91447636
A
84
85static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
86static int hfs_clonefile(struct vnode *, int, int, int);
87static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
b0d623f7
A
88static int hfs_minorupdate(struct vnode *vp);
89static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
90
55e303ae 91
8f6c56a5
A
92int flush_cache_on_write = 0;
93SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
94
95
91447636
A
96/*
97 * Read data from a file.
98 */
1c79356b 99int
91447636 100hfs_vnop_read(struct vnop_read_args *ap)
1c79356b 101{
91447636
A
102 uio_t uio = ap->a_uio;
103 struct vnode *vp = ap->a_vp;
9bccf70c
A
104 struct cnode *cp;
105 struct filefork *fp;
91447636
A
106 struct hfsmount *hfsmp;
107 off_t filesize;
108 off_t filebytes;
109 off_t start_resid = uio_resid(uio);
110 off_t offset = uio_offset(uio);
9bccf70c 111 int retval = 0;
55e303ae 112
9bccf70c 113 /* Preflight checks */
91447636
A
114 if (!vnode_isreg(vp)) {
115 /* can only read regular files */
116 if (vnode_isdir(vp))
117 return (EISDIR);
118 else
119 return (EPERM);
120 }
121 if (start_resid == 0)
9bccf70c 122 return (0); /* Nothing left to do */
91447636 123 if (offset < 0)
9bccf70c 124 return (EINVAL); /* cant read from a negative offset */
b0d623f7
A
125
126#if HFS_COMPRESSION
127 if (VNODE_IS_RSRC(vp)) {
128 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
129 return 0;
130 }
131 /* otherwise read the resource fork normally */
132 } else {
133 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
134 if (compressed) {
135 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
136 if (compressed) {
137 if (retval == 0) {
138 /* successful read, update the access time */
139 VTOC(vp)->c_touch_acctime = TRUE;
140
141 /* compressed files are not hot file candidates */
142 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
143 VTOF(vp)->ff_bytesread = 0;
144 }
145 }
146 return retval;
147 }
148 /* otherwise the file was converted back to a regular file while we were reading it */
149 retval = 0;
150 }
151 }
152#endif /* HFS_COMPRESSION */
9bccf70c
A
153
154 cp = VTOC(vp);
155 fp = VTOF(vp);
91447636
A
156 hfsmp = VTOHFS(vp);
157
158 /* Protect against a size change. */
159 hfs_lock_truncate(cp, 0);
160
9bccf70c 161 filesize = fp->ff_size;
91447636
A
162 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
163 if (offset > filesize) {
164 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
165 (offset > (off_t)MAXHFSFILESIZE)) {
166 retval = EFBIG;
167 }
168 goto exit;
9bccf70c 169 }
1c79356b 170
9bccf70c 171 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
91447636 172 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 173
2d21ac55 174 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
1c79356b 175
91447636 176 cp->c_touch_acctime = TRUE;
1c79356b 177
9bccf70c 178 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
91447636 179 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 180
55e303ae
A
181 /*
182 * Keep track blocks read
183 */
2d21ac55 184 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
91447636
A
185 int took_cnode_lock = 0;
186 off_t bytesread;
187
188 bytesread = start_resid - uio_resid(uio);
189
190 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
191 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
192 hfs_lock(cp, HFS_FORCE_LOCK);
193 took_cnode_lock = 1;
194 }
55e303ae
A
195 /*
196 * If this file hasn't been seen since the start of
197 * the current sampling period then start over.
198 */
2d21ac55 199 if (cp->c_atime < hfsmp->hfc_timebase) {
91447636
A
200 struct timeval tv;
201
202 fp->ff_bytesread = bytesread;
203 microtime(&tv);
204 cp->c_atime = tv.tv_sec;
55e303ae 205 } else {
91447636 206 fp->ff_bytesread += bytesread;
55e303ae 207 }
91447636
A
208 if (took_cnode_lock)
209 hfs_unlock(cp);
55e303ae 210 }
91447636 211exit:
2d21ac55 212 hfs_unlock_truncate(cp, 0);
9bccf70c 213 return (retval);
1c79356b
A
214}
215
216/*
91447636
A
217 * Write data to a file.
218 */
1c79356b 219int
91447636 220hfs_vnop_write(struct vnop_write_args *ap)
1c79356b 221{
91447636 222 uio_t uio = ap->a_uio;
9bccf70c 223 struct vnode *vp = ap->a_vp;
9bccf70c
A
224 struct cnode *cp;
225 struct filefork *fp;
91447636
A
226 struct hfsmount *hfsmp;
227 kauth_cred_t cred = NULL;
228 off_t origFileSize;
229 off_t writelimit;
2d21ac55 230 off_t bytesToAdd = 0;
55e303ae 231 off_t actualBytesAdded;
9bccf70c 232 off_t filebytes;
91447636 233 off_t offset;
b0d623f7 234 ssize_t resid;
91447636
A
235 int eflags;
236 int ioflag = ap->a_ioflag;
237 int retval = 0;
238 int lockflags;
239 int cnode_locked = 0;
2d21ac55
A
240 int partialwrite = 0;
241 int exclusive_lock = 0;
1c79356b 242
b0d623f7
A
243#if HFS_COMPRESSION
244 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
245 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
246 switch(state) {
247 case FILE_IS_COMPRESSED:
248 return EACCES;
249 case FILE_IS_CONVERTING:
250 /* if FILE_IS_CONVERTING, we allow writes */
251 break;
252 default:
253 printf("invalid state %d for compressed file\n", state);
254 /* fall through */
255 }
256 }
257#endif
258
91447636
A
259 // LP64todo - fix this! uio_resid may be 64-bit value
260 resid = uio_resid(uio);
261 offset = uio_offset(uio);
1c79356b 262
2d21ac55
A
263 if (ioflag & IO_APPEND) {
264 exclusive_lock = 1;
265 }
266
91447636 267 if (offset < 0)
9bccf70c 268 return (EINVAL);
91447636 269 if (resid == 0)
9bccf70c 270 return (E_NONE);
91447636
A
271 if (!vnode_isreg(vp))
272 return (EPERM); /* Can only write regular files */
273
9bccf70c
A
274 cp = VTOC(vp);
275 fp = VTOF(vp);
91447636 276 hfsmp = VTOHFS(vp);
b4c24cb9 277
9bccf70c 278 eflags = kEFDeferMask; /* defer file block allocations */
55e303ae
A
279#ifdef HFS_SPARSE_DEV
280 /*
281 * When the underlying device is sparse and space
282 * is low (< 8MB), stop doing delayed allocations
283 * and begin doing synchronous I/O.
284 */
285 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
286 (hfs_freeblks(hfsmp, 0) < 2048)) {
287 eflags &= ~kEFDeferMask;
288 ioflag |= IO_SYNC;
289 }
290#endif /* HFS_SPARSE_DEV */
291
2d21ac55
A
292again:
293 /* Protect against a size change. */
294 hfs_lock_truncate(cp, exclusive_lock);
91447636 295
2d21ac55
A
296 if (ioflag & IO_APPEND) {
297 uio_setoffset(uio, fp->ff_size);
298 offset = fp->ff_size;
299 }
300 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
301 retval = EPERM;
302 goto exit;
303 }
91447636 304
2d21ac55 305 origFileSize = fp->ff_size;
91447636 306 writelimit = offset + resid;
2d21ac55
A
307 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
308
309 /* If the truncate lock is shared, and if we either have virtual
310 * blocks or will need to extend the file, upgrade the truncate
311 * to exclusive lock. If upgrade fails, we lose the lock and
b0d623f7
A
312 * have to get exclusive lock again. Note that we want to
313 * grab the truncate lock exclusive even if we're not allocating new blocks
314 * because we could still be growing past the LEOF.
2d21ac55
A
315 */
316 if ((exclusive_lock == 0) &&
b0d623f7 317 ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
2d21ac55
A
318 exclusive_lock = 1;
319 /* Lock upgrade failed and we lost our shared lock, try again */
320 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
321 goto again;
322 }
323 }
324
325 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
326 goto exit;
327 }
328 cnode_locked = 1;
329
330 if (!exclusive_lock) {
331 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
332 (int)offset, uio_resid(uio), (int)fp->ff_size,
333 (int)filebytes, 0);
334 }
335
336 /* Check if we do not need to extend the file */
337 if (writelimit <= filebytes) {
91447636 338 goto sizeok;
2d21ac55 339 }
91447636
A
340
341 cred = vfs_context_ucred(ap->a_context);
91447636 342 bytesToAdd = writelimit - filebytes;
2d21ac55
A
343
344#if QUOTA
91447636
A
345 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
346 cred, 0);
347 if (retval)
348 goto exit;
349#endif /* QUOTA */
350
351 if (hfs_start_transaction(hfsmp) != 0) {
352 retval = EINVAL;
353 goto exit;
b4c24cb9
A
354 }
355
9bccf70c 356 while (writelimit > filebytes) {
9bccf70c 357 bytesToAdd = writelimit - filebytes;
91447636 358 if (cred && suser(cred, NULL) != 0)
9bccf70c
A
359 eflags |= kEFReserveMask;
360
91447636
A
361 /* Protect extents b-tree and allocation bitmap */
362 lockflags = SFL_BITMAP;
363 if (overflow_extents(fp))
364 lockflags |= SFL_EXTENTS;
365 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae
A
366
367 /* Files that are changing size are not hot file candidates. */
368 if (hfsmp->hfc_stage == HFC_RECORDING) {
369 fp->ff_bytesread = 0;
370 }
91447636 371 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
9bccf70c
A
372 0, eflags, &actualBytesAdded));
373
91447636
A
374 hfs_systemfile_unlock(hfsmp, lockflags);
375
9bccf70c
A
376 if ((actualBytesAdded == 0) && (retval == E_NONE))
377 retval = ENOSPC;
378 if (retval != E_NONE)
379 break;
91447636 380 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
9bccf70c 381 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
91447636 382 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
b4c24cb9 383 }
91447636
A
384 (void) hfs_update(vp, TRUE);
385 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
386 (void) hfs_end_transaction(hfsmp);
b4c24cb9 387
2d21ac55
A
388 /*
389 * If we didn't grow the file enough try a partial write.
390 * POSIX expects this behavior.
391 */
392 if ((retval == ENOSPC) && (filebytes > offset)) {
393 retval = 0;
394 partialwrite = 1;
395 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
396 resid -= bytesToAdd;
397 writelimit = filebytes;
398 }
91447636 399sizeok:
55e303ae 400 if (retval == E_NONE) {
0b4e3aa0
A
401 off_t filesize;
402 off_t zero_off;
403 off_t tail_off;
404 off_t inval_start;
405 off_t inval_end;
91447636 406 off_t io_start;
0b4e3aa0
A
407 int lflag;
408 struct rl_entry *invalid_range;
409
9bccf70c 410 if (writelimit > fp->ff_size)
0b4e3aa0
A
411 filesize = writelimit;
412 else
9bccf70c 413 filesize = fp->ff_size;
1c79356b 414
2d21ac55 415 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
1c79356b 416
91447636
A
417 if (offset <= fp->ff_size) {
418 zero_off = offset & ~PAGE_MASK_64;
0b4e3aa0
A
419
420 /* Check to see whether the area between the zero_offset and the start
421 of the transfer to see whether is invalid and should be zero-filled
422 as part of the transfer:
423 */
91447636
A
424 if (offset > zero_off) {
425 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
55e303ae
A
426 lflag |= IO_HEADZEROFILL;
427 }
0b4e3aa0 428 } else {
9bccf70c 429 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
0b4e3aa0 430
9bccf70c 431 /* The bytes between fp->ff_size and uio->uio_offset must never be
0b4e3aa0
A
432 read without being zeroed. The current last block is filled with zeroes
433 if it holds valid data but in all cases merely do a little bookkeeping
434 to track the area from the end of the current last page to the start of
435 the area actually written. For the same reason only the bytes up to the
436 start of the page where this write will start is invalidated; any remainder
437 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
438
439 Note that inval_start, the start of the page after the current EOF,
440 may be past the start of the write, in which case the zeroing
441 will be handled by the cluser_write of the actual data.
442 */
9bccf70c 443 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
91447636 444 inval_end = offset & ~PAGE_MASK_64;
9bccf70c 445 zero_off = fp->ff_size;
0b4e3aa0 446
9bccf70c
A
447 if ((fp->ff_size & PAGE_MASK_64) &&
448 (rl_scan(&fp->ff_invalidranges,
0b4e3aa0 449 eof_page_base,
9bccf70c 450 fp->ff_size - 1,
0b4e3aa0
A
451 &invalid_range) != RL_NOOVERLAP)) {
452 /* The page containing the EOF is not valid, so the
453 entire page must be made inaccessible now. If the write
454 starts on a page beyond the page containing the eof
455 (inval_end > eof_page_base), add the
456 whole page to the range to be invalidated. Otherwise
457 (i.e. if the write starts on the same page), zero-fill
458 the entire page explicitly now:
459 */
460 if (inval_end > eof_page_base) {
461 inval_start = eof_page_base;
462 } else {
463 zero_off = eof_page_base;
464 };
465 };
466
467 if (inval_start < inval_end) {
91447636 468 struct timeval tv;
0b4e3aa0
A
469 /* There's some range of data that's going to be marked invalid */
470
471 if (zero_off < inval_start) {
472 /* The pages between inval_start and inval_end are going to be invalidated,
473 and the actual write will start on a page past inval_end. Now's the last
474 chance to zero-fill the page containing the EOF:
475 */
91447636
A
476 hfs_unlock(cp);
477 cnode_locked = 0;
478 retval = cluster_write(vp, (uio_t) 0,
9bccf70c 479 fp->ff_size, inval_start,
91447636 480 zero_off, (off_t)0,
9bccf70c 481 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
91447636
A
482 hfs_lock(cp, HFS_FORCE_LOCK);
483 cnode_locked = 1;
0b4e3aa0 484 if (retval) goto ioerr_exit;
91447636 485 offset = uio_offset(uio);
0b4e3aa0
A
486 };
487
488 /* Mark the remaining area of the newly allocated space as invalid: */
9bccf70c 489 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
91447636
A
490 microuptime(&tv);
491 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c 492 zero_off = fp->ff_size = inval_end;
0b4e3aa0
A
493 };
494
91447636 495 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
0b4e3aa0 496 };
1c79356b 497
0b4e3aa0
A
498 /* Check to see whether the area between the end of the write and the end of
499 the page it falls in is invalid and should be zero-filled as part of the transfer:
500 */
501 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
502 if (tail_off > filesize) tail_off = filesize;
503 if (tail_off > writelimit) {
9bccf70c 504 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
0b4e3aa0
A
505 lflag |= IO_TAILZEROFILL;
506 };
507 };
508
509 /*
510 * if the write starts beyond the current EOF (possibly advanced in the
511 * zeroing of the last block, above), then we'll zero fill from the current EOF
512 * to where the write begins:
513 *
514 * NOTE: If (and ONLY if) the portion of the file about to be written is
515 * before the current EOF it might be marked as invalid now and must be
516 * made readable (removed from the invalid ranges) before cluster_write
517 * tries to write it:
518 */
91447636 519 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
9bccf70c 520 if (io_start < fp->ff_size) {
91447636
A
521 off_t io_end;
522
523 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
9bccf70c 524 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
0b4e3aa0 525 };
91447636
A
526
527 hfs_unlock(cp);
528 cnode_locked = 0;
593a1d5f
A
529
530 /*
531 * We need to tell UBC the fork's new size BEFORE calling
532 * cluster_write, in case any of the new pages need to be
533 * paged out before cluster_write completes (which does happen
534 * in embedded systems due to extreme memory pressure).
535 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
536 * will be, so that it can pass that on to cluster_pageout, and
537 * allow those pageouts.
538 *
539 * We don't update ff_size yet since we don't want pageins to
540 * be able to see uninitialized data between the old and new
541 * EOF, until cluster_write has completed and initialized that
542 * part of the file.
543 *
544 * The vnode pager relies on the file size last given to UBC via
545 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
546 * ff_size (whichever is larger). NOTE: ff_new_size is always
547 * zero, unless we are extending the file via write.
548 */
549 if (filesize > fp->ff_size) {
550 fp->ff_new_size = filesize;
551 ubc_setsize(vp, filesize);
552 }
9bccf70c 553 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
91447636 554 tail_off, lflag | IO_NOZERODIRTY);
2d21ac55 555 if (retval) {
593a1d5f
A
556 fp->ff_new_size = 0; /* no longer extending; use ff_size */
557 if (filesize > origFileSize) {
558 ubc_setsize(vp, origFileSize);
559 }
2d21ac55
A
560 goto ioerr_exit;
561 }
593a1d5f
A
562
563 if (filesize > origFileSize) {
564 fp->ff_size = filesize;
565
91447636 566 /* Files that are changing size are not hot file candidates. */
593a1d5f 567 if (hfsmp->hfc_stage == HFC_RECORDING) {
91447636 568 fp->ff_bytesread = 0;
593a1d5f 569 }
91447636 570 }
593a1d5f
A
571 fp->ff_new_size = 0; /* ff_size now has the correct size */
572
573 /* If we wrote some bytes, then touch the change and mod times */
91447636
A
574 if (resid > uio_resid(uio)) {
575 cp->c_touch_chgtime = TRUE;
576 cp->c_touch_modtime = TRUE;
0b4e3aa0 577 }
9bccf70c 578 }
2d21ac55
A
579 if (partialwrite) {
580 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
581 resid += bytesToAdd;
582 }
8f6c56a5 583
2d21ac55 584 // XXXdbg - see radar 4871353 for more info
8f6c56a5
A
585 {
586 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
587 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
588 }
589 }
55e303ae 590
0b4e3aa0 591ioerr_exit:
9bccf70c 592 /*
0b4e3aa0 593 * If we successfully wrote any data, and we are not the superuser
9bccf70c
A
594 * we clear the setuid and setgid bits as a precaution against
595 * tampering.
596 */
91447636
A
597 if (cp->c_mode & (S_ISUID | S_ISGID)) {
598 cred = vfs_context_ucred(ap->a_context);
599 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
600 if (!cnode_locked) {
601 hfs_lock(cp, HFS_FORCE_LOCK);
602 cnode_locked = 1;
603 }
604 cp->c_mode &= ~(S_ISUID | S_ISGID);
605 }
606 }
9bccf70c
A
607 if (retval) {
608 if (ioflag & IO_UNIT) {
91447636
A
609 if (!cnode_locked) {
610 hfs_lock(cp, HFS_FORCE_LOCK);
611 cnode_locked = 1;
612 }
613 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
b0d623f7 614 0, 0, ap->a_context);
91447636
A
615 // LP64todo - fix this! resid needs to by user_ssize_t
616 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
617 uio_setresid(uio, resid);
618 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
619 }
620 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
621 if (!cnode_locked) {
622 hfs_lock(cp, HFS_FORCE_LOCK);
623 cnode_locked = 1;
9bccf70c 624 }
91447636 625 retval = hfs_update(vp, TRUE);
9bccf70c 626 }
91447636
A
627 /* Updating vcbWrCnt doesn't need to be atomic. */
628 hfsmp->vcbWrCnt++;
1c79356b 629
9bccf70c 630 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
91447636
A
631 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
632exit:
633 if (cnode_locked)
634 hfs_unlock(cp);
2d21ac55 635 hfs_unlock_truncate(cp, exclusive_lock);
9bccf70c 636 return (retval);
1c79356b
A
637}
638
91447636 639/* support for the "bulk-access" fcntl */
1c79356b 640
91447636 641#define CACHE_LEVELS 16
2d21ac55 642#define NUM_CACHE_ENTRIES (64*16)
91447636
A
643#define PARENT_IDS_FLAG 0x100
644
91447636
A
645struct access_cache {
646 int numcached;
647 int cachehits; /* these two for statistics gathering */
648 int lookups;
649 unsigned int *acache;
2d21ac55 650 unsigned char *haveaccess;
55e303ae
A
651};
652
91447636
A
653struct access_t {
654 uid_t uid; /* IN: effective user id */
655 short flags; /* IN: access requested (i.e. R_OK) */
656 short num_groups; /* IN: number of groups user belongs to */
657 int num_files; /* IN: number of files to process */
658 int *file_ids; /* IN: array of file ids */
659 gid_t *groups; /* IN: array of groups */
660 short *access; /* OUT: access info for each file (0 for 'has access') */
b0d623f7
A
661} __attribute__((unavailable)); // this structure is for reference purposes only
662
663struct user32_access_t {
664 uid_t uid; /* IN: effective user id */
665 short flags; /* IN: access requested (i.e. R_OK) */
666 short num_groups; /* IN: number of groups user belongs to */
667 int num_files; /* IN: number of files to process */
668 user32_addr_t file_ids; /* IN: array of file ids */
669 user32_addr_t groups; /* IN: array of groups */
670 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 671};
55e303ae 672
b0d623f7 673struct user64_access_t {
91447636
A
674 uid_t uid; /* IN: effective user id */
675 short flags; /* IN: access requested (i.e. R_OK) */
676 short num_groups; /* IN: number of groups user belongs to */
2d21ac55 677 int num_files; /* IN: number of files to process */
b0d623f7
A
678 user64_addr_t file_ids; /* IN: array of file ids */
679 user64_addr_t groups; /* IN: array of groups */
680 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 681};
55e303ae 682
2d21ac55
A
683
684// these are the "extended" versions of the above structures
685// note that it is crucial that they be different sized than
686// the regular version
687struct ext_access_t {
688 uint32_t flags; /* IN: access requested (i.e. R_OK) */
689 uint32_t num_files; /* IN: number of files to process */
690 uint32_t map_size; /* IN: size of the bit map */
691 uint32_t *file_ids; /* IN: Array of file ids */
692 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
693 short *access; /* OUT: access info for each file (0 for 'has access') */
694 uint32_t num_parents; /* future use */
695 cnid_t *parents; /* future use */
b0d623f7
A
696} __attribute__((unavailable)); // this structure is for reference purposes only
697
698struct user32_ext_access_t {
699 uint32_t flags; /* IN: access requested (i.e. R_OK) */
700 uint32_t num_files; /* IN: number of files to process */
701 uint32_t map_size; /* IN: size of the bit map */
702 user32_addr_t file_ids; /* IN: Array of file ids */
703 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
704 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
705 uint32_t num_parents; /* future use */
706 user32_addr_t parents; /* future use */
2d21ac55
A
707};
708
b0d623f7 709struct user64_ext_access_t {
2d21ac55
A
710 uint32_t flags; /* IN: access requested (i.e. R_OK) */
711 uint32_t num_files; /* IN: number of files to process */
712 uint32_t map_size; /* IN: size of the bit map */
b0d623f7
A
713 user64_addr_t file_ids; /* IN: array of file ids */
714 user64_addr_t bitmap; /* IN: array of groups */
715 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
2d21ac55 716 uint32_t num_parents;/* future use */
b0d623f7 717 user64_addr_t parents;/* future use */
2d21ac55
A
718};
719
720
91447636
A
721/*
722 * Perform a binary search for the given parent_id. Return value is
2d21ac55
A
723 * the index if there is a match. If no_match_indexp is non-NULL it
724 * will be assigned with the index to insert the item (even if it was
725 * not found).
91447636 726 */
2d21ac55 727static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
91447636 728{
2d21ac55
A
729 int index=-1;
730 unsigned int lo=0;
91447636 731
2d21ac55
A
732 do {
733 unsigned int mid = ((hi - lo)/2) + lo;
734 unsigned int this_id = array[mid];
735
736 if (parent_id == this_id) {
737 hi = mid;
738 break;
91447636 739 }
2d21ac55
A
740
741 if (parent_id < this_id) {
742 hi = mid;
743 continue;
91447636 744 }
2d21ac55
A
745
746 if (parent_id > this_id) {
747 lo = mid + 1;
748 continue;
749 }
750 } while(lo < hi);
751
752 /* check if lo and hi converged on the match */
753 if (parent_id == array[hi]) {
754 index = hi;
755 }
91447636 756
2d21ac55
A
757 if (no_match_indexp) {
758 *no_match_indexp = hi;
759 }
760
761 return index;
762}
763
764
765static int
766lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
767{
768 unsigned int hi;
769 int matches = 0;
770 int index, no_match_index;
91447636 771
2d21ac55
A
772 if (cache->numcached == 0) {
773 *indexp = 0;
774 return 0; // table is empty, so insert at index=0 and report no match
775 }
91447636 776
2d21ac55 777 if (cache->numcached > NUM_CACHE_ENTRIES) {
b0d623f7 778 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
2d21ac55
A
779 cache->numcached, NUM_CACHE_ENTRIES);*/
780 cache->numcached = NUM_CACHE_ENTRIES;
781 }
91447636 782
2d21ac55 783 hi = cache->numcached - 1;
91447636 784
2d21ac55
A
785 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
786
787 /* if no existing entry found, find index for new one */
788 if (index == -1) {
789 index = no_match_index;
790 matches = 0;
791 } else {
792 matches = 1;
793 }
794
795 *indexp = index;
796 return matches;
91447636
A
797}
798
799/*
800 * Add a node to the access_cache at the given index (or do a lookup first
801 * to find the index if -1 is passed in). We currently do a replace rather
802 * than an insert if the cache is full.
803 */
804static void
805add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
806{
2d21ac55
A
807 int lookup_index = -1;
808
809 /* need to do a lookup first if -1 passed for index */
810 if (index == -1) {
811 if (lookup_bucket(cache, &lookup_index, nodeID)) {
812 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
813 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
814 cache->haveaccess[lookup_index] = access;
815 }
816
817 /* mission accomplished */
818 return;
819 } else {
820 index = lookup_index;
821 }
822
823 }
824
825 /* if the cache is full, do a replace rather than an insert */
826 if (cache->numcached >= NUM_CACHE_ENTRIES) {
b0d623f7 827 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
2d21ac55
A
828 cache->numcached = NUM_CACHE_ENTRIES-1;
829
830 if (index > cache->numcached) {
b0d623f7 831 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
2d21ac55
A
832 index = cache->numcached;
833 }
834 }
835
836 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
837 index++;
838 }
839
840 if (index >= 0 && index < cache->numcached) {
841 /* only do bcopy if we're inserting */
842 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
843 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
844 }
845
846 cache->acache[index] = nodeID;
847 cache->haveaccess[index] = access;
848 cache->numcached++;
91447636
A
849}
850
851
852struct cinfo {
2d21ac55
A
853 uid_t uid;
854 gid_t gid;
855 mode_t mode;
856 cnid_t parentcnid;
857 u_int16_t recflags;
91447636
A
858};
859
860static int
861snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
862{
2d21ac55 863 struct cinfo *cip = (struct cinfo *)arg;
91447636 864
2d21ac55
A
865 cip->uid = attrp->ca_uid;
866 cip->gid = attrp->ca_gid;
867 cip->mode = attrp->ca_mode;
868 cip->parentcnid = descp->cd_parentcnid;
869 cip->recflags = attrp->ca_recflags;
91447636 870
2d21ac55 871 return (0);
91447636
A
872}
873
874/*
875 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
876 * isn't incore, then go to the catalog.
877 */
878static int
b0d623f7 879do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
2d21ac55 880 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
91447636 881{
2d21ac55
A
882 int error = 0;
883
884 /* if this id matches the one the fsctl was called with, skip the lookup */
885 if (cnid == skip_cp->c_cnid) {
886 cnattrp->ca_uid = skip_cp->c_uid;
887 cnattrp->ca_gid = skip_cp->c_gid;
888 cnattrp->ca_mode = skip_cp->c_mode;
b0d623f7 889 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
2d21ac55
A
890 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
891 } else {
892 struct cinfo c_info;
893
894 /* otherwise, check the cnode hash incase the file/dir is incore */
b0d623f7 895 if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
2d21ac55
A
896 cnattrp->ca_uid = c_info.uid;
897 cnattrp->ca_gid = c_info.gid;
898 cnattrp->ca_mode = c_info.mode;
899 cnattrp->ca_recflags = c_info.recflags;
900 keyp->hfsPlus.parentID = c_info.parentcnid;
91447636 901 } else {
2d21ac55 902 int lockflags;
91447636 903
2d21ac55 904 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
91447636 905
2d21ac55
A
906 /* lookup this cnid in the catalog */
907 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
91447636 908
2d21ac55 909 hfs_systemfile_unlock(hfsmp, lockflags);
91447636 910
2d21ac55 911 cache->lookups++;
91447636 912 }
2d21ac55 913 }
91447636 914
2d21ac55 915 return (error);
91447636 916}
55e303ae 917
2d21ac55 918
1c79356b 919/*
91447636
A
920 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
921 * up to CACHE_LEVELS as we progress towards the root.
922 */
923static int
924do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
b0d623f7 925 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
2d21ac55
A
926 struct vfs_context *my_context,
927 char *bitmap,
928 uint32_t map_size,
929 cnid_t* parents,
930 uint32_t num_parents)
91447636 931{
2d21ac55
A
932 int myErr = 0;
933 int myResult;
934 HFSCatalogNodeID thisNodeID;
935 unsigned int myPerms;
936 struct cat_attr cnattr;
937 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
938 CatalogKey catkey;
939
940 int i = 0, ids_to_cache = 0;
941 int parent_ids[CACHE_LEVELS];
942
943 thisNodeID = nodeID;
944 while (thisNodeID >= kRootDirID) {
945 myResult = 0; /* default to "no access" */
91447636 946
2d21ac55
A
947 /* check the cache before resorting to hitting the catalog */
948
949 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
950 * to look any further after hitting cached dir */
951
952 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
953 cache->cachehits++;
954 myErr = cache->haveaccess[cache_index];
955 if (scope_index != -1) {
956 if (myErr == ESRCH) {
957 myErr = 0;
958 }
959 } else {
960 scope_index = 0; // so we'll just use the cache result
961 scope_idx_start = ids_to_cache;
962 }
963 myResult = (myErr == 0) ? 1 : 0;
964 goto ExitThisRoutine;
965 }
966
967
968 if (parents) {
969 int tmp;
970 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
971 if (scope_index == -1)
972 scope_index = tmp;
973 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
974 scope_idx_start = ids_to_cache;
975 }
976 }
977
978 /* remember which parents we want to cache */
979 if (ids_to_cache < CACHE_LEVELS) {
980 parent_ids[ids_to_cache] = thisNodeID;
981 ids_to_cache++;
982 }
983 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
984 if (bitmap && map_size) {
985 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
986 }
987
988
989 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 990 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
2d21ac55
A
991 if (myErr) {
992 goto ExitThisRoutine; /* no access */
993 }
994
995 /* Root always gets access. */
996 if (suser(myp_ucred, NULL) == 0) {
997 thisNodeID = catkey.hfsPlus.parentID;
998 myResult = 1;
999 continue;
1000 }
1001
1002 // if the thing has acl's, do the full permission check
1003 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1004 struct vnode *vp;
1005
1006 /* get the vnode for this cnid */
1007 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
1008 if ( myErr ) {
1009 myResult = 0;
1010 goto ExitThisRoutine;
1011 }
1012
1013 thisNodeID = VTOC(vp)->c_parentcnid;
1014
1015 hfs_unlock(VTOC(vp));
1016
1017 if (vnode_vtype(vp) == VDIR) {
1018 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1019 } else {
1020 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1021 }
1022
1023 vnode_put(vp);
1024 if (myErr) {
1025 myResult = 0;
1026 goto ExitThisRoutine;
1027 }
1028 } else {
1029 unsigned int flags;
1030
1031 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1032 cnattr.ca_mode, hfsmp->hfs_mp,
1033 myp_ucred, theProcPtr);
1034
1035 if (cnattr.ca_mode & S_IFDIR) {
1036 flags = R_OK | X_OK;
1037 } else {
1038 flags = R_OK;
1039 }
1040 if ( (myPerms & flags) != flags) {
1041 myResult = 0;
1042 myErr = EACCES;
1043 goto ExitThisRoutine; /* no access */
1044 }
1045
1046 /* up the hierarchy we go */
1047 thisNodeID = catkey.hfsPlus.parentID;
1048 }
1049 }
1050
1051 /* if here, we have access to this node */
1052 myResult = 1;
1053
1054 ExitThisRoutine:
1055 if (parents && myErr == 0 && scope_index == -1) {
1056 myErr = ESRCH;
1057 }
1058
1059 if (myErr) {
1060 myResult = 0;
1061 }
1062 *err = myErr;
1063
1064 /* cache the parent directory(ies) */
1065 for (i = 0; i < ids_to_cache; i++) {
1066 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1067 add_node(cache, -1, parent_ids[i], ESRCH);
1068 } else {
1069 add_node(cache, -1, parent_ids[i], myErr);
1070 }
1071 }
1072
1073 return (myResult);
91447636 1074}
1c79356b 1075
2d21ac55
A
1076static int
1077do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1078 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1079{
1080 boolean_t is64bit;
1081
1082 /*
1083 * NOTE: on entry, the vnode is locked. Incase this vnode
1084 * happens to be in our list of file_ids, we'll note it
1085 * avoid calling hfs_chashget_nowait() on that id as that
1086 * will cause a "locking against myself" panic.
1087 */
1088 Boolean check_leaf = true;
1089
b0d623f7
A
1090 struct user64_ext_access_t *user_access_structp;
1091 struct user64_ext_access_t tmp_user_access;
2d21ac55
A
1092 struct access_cache cache;
1093
b0d623f7 1094 int error = 0, prev_parent_check_ok=1;
2d21ac55
A
1095 unsigned int i;
1096
2d21ac55
A
1097 short flags;
1098 unsigned int num_files = 0;
1099 int map_size = 0;
1100 int num_parents = 0;
1101 int *file_ids=NULL;
1102 short *access=NULL;
1103 char *bitmap=NULL;
1104 cnid_t *parents=NULL;
1105 int leaf_index;
1106
1107 cnid_t cnid;
1108 cnid_t prevParent_cnid = 0;
1109 unsigned int myPerms;
1110 short myaccess = 0;
1111 struct cat_attr cnattr;
1112 CatalogKey catkey;
1113 struct cnode *skip_cp = VTOC(vp);
1114 kauth_cred_t cred = vfs_context_ucred(context);
1115 proc_t p = vfs_context_proc(context);
1116
1117 is64bit = proc_is64bit(p);
1118
1119 /* initialize the local cache and buffers */
1120 cache.numcached = 0;
1121 cache.cachehits = 0;
1122 cache.lookups = 0;
1123 cache.acache = NULL;
1124 cache.haveaccess = NULL;
1125
1126 /* struct copyin done during dispatch... need to copy file_id array separately */
1127 if (ap->a_data == NULL) {
1128 error = EINVAL;
1129 goto err_exit_bulk_access;
1130 }
1131
1132 if (is64bit) {
b0d623f7 1133 if (arg_size != sizeof(struct user64_ext_access_t)) {
2d21ac55
A
1134 error = EINVAL;
1135 goto err_exit_bulk_access;
1136 }
1137
b0d623f7 1138 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
2d21ac55 1139
b0d623f7
A
1140 } else if (arg_size == sizeof(struct user32_access_t)) {
1141 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
2d21ac55
A
1142
1143 // convert an old style bulk-access struct to the new style
1144 tmp_user_access.flags = accessp->flags;
1145 tmp_user_access.num_files = accessp->num_files;
1146 tmp_user_access.map_size = 0;
1147 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
cf7d32b8 1148 tmp_user_access.bitmap = USER_ADDR_NULL;
2d21ac55
A
1149 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1150 tmp_user_access.num_parents = 0;
1151 user_access_structp = &tmp_user_access;
1152
b0d623f7
A
1153 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1154 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
2d21ac55
A
1155
1156 // up-cast from a 32-bit version of the struct
1157 tmp_user_access.flags = accessp->flags;
1158 tmp_user_access.num_files = accessp->num_files;
1159 tmp_user_access.map_size = accessp->map_size;
1160 tmp_user_access.num_parents = accessp->num_parents;
1161
1162 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1163 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1164 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1165 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1166
1167 user_access_structp = &tmp_user_access;
1168 } else {
1169 error = EINVAL;
1170 goto err_exit_bulk_access;
1171 }
1172
1173 map_size = user_access_structp->map_size;
1174
1175 num_files = user_access_structp->num_files;
1176
1177 num_parents= user_access_structp->num_parents;
1178
1179 if (num_files < 1) {
1180 goto err_exit_bulk_access;
1181 }
1182 if (num_files > 1024) {
1183 error = EINVAL;
1184 goto err_exit_bulk_access;
1185 }
1186
1187 if (num_parents > 1024) {
1188 error = EINVAL;
1189 goto err_exit_bulk_access;
1190 }
1191
1192 file_ids = (int *) kalloc(sizeof(int) * num_files);
1193 access = (short *) kalloc(sizeof(short) * num_files);
1194 if (map_size) {
1195 bitmap = (char *) kalloc(sizeof(char) * map_size);
1196 }
1197
1198 if (num_parents) {
1199 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1200 }
1201
1202 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1203 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1204
1205 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1206 if (file_ids) {
1207 kfree(file_ids, sizeof(int) * num_files);
1208 }
1209 if (bitmap) {
1210 kfree(bitmap, sizeof(char) * map_size);
1211 }
1212 if (access) {
1213 kfree(access, sizeof(short) * num_files);
1214 }
1215 if (cache.acache) {
1216 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1217 }
1218 if (cache.haveaccess) {
1219 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1220 }
1221 if (parents) {
1222 kfree(parents, sizeof(cnid_t) * num_parents);
1223 }
1224 return ENOMEM;
1225 }
1226
1227 // make sure the bitmap is zero'ed out...
1228 if (bitmap) {
1229 bzero(bitmap, (sizeof(char) * map_size));
1230 }
1231
1232 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1233 num_files * sizeof(int)))) {
1234 goto err_exit_bulk_access;
1235 }
1236
1237 if (num_parents) {
1238 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1239 num_parents * sizeof(cnid_t)))) {
1240 goto err_exit_bulk_access;
1241 }
1242 }
1243
1244 flags = user_access_structp->flags;
1245 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1246 flags = R_OK;
1247 }
1248
1249 /* check if we've been passed leaf node ids or parent ids */
1250 if (flags & PARENT_IDS_FLAG) {
1251 check_leaf = false;
1252 }
1253
1254 /* Check access to each file_id passed in */
1255 for (i = 0; i < num_files; i++) {
1256 leaf_index=-1;
1257 cnid = (cnid_t) file_ids[i];
1258
1259 /* root always has access */
1260 if ((!parents) && (!suser(cred, NULL))) {
1261 access[i] = 0;
1262 continue;
1263 }
1264
1265 if (check_leaf) {
1266 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 1267 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
2d21ac55
A
1268 if (error) {
1269 access[i] = (short) error;
1270 continue;
1271 }
1272
1273 if (parents) {
1274 // Check if the leaf matches one of the parent scopes
1275 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
b0d623f7
A
1276 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1277 prev_parent_check_ok = 0;
1278 else if (leaf_index >= 0)
1279 prev_parent_check_ok = 1;
2d21ac55
A
1280 }
1281
1282 // if the thing has acl's, do the full permission check
1283 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1284 struct vnode *cvp;
1285 int myErr = 0;
1286 /* get the vnode for this cnid */
1287 myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
1288 if ( myErr ) {
1289 access[i] = myErr;
1290 continue;
1291 }
1292
1293 hfs_unlock(VTOC(cvp));
1294
1295 if (vnode_vtype(cvp) == VDIR) {
1296 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1297 } else {
1298 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1299 }
1300
1301 vnode_put(cvp);
1302 if (myErr) {
1303 access[i] = myErr;
1304 continue;
1305 }
1306 } else {
1307 /* before calling CheckAccess(), check the target file for read access */
1308 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1309 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1310
1311 /* fail fast if no access */
1312 if ((myPerms & flags) == 0) {
1313 access[i] = EACCES;
1314 continue;
1315 }
1316 }
1317 } else {
1318 /* we were passed an array of parent ids */
1319 catkey.hfsPlus.parentID = cnid;
1320 }
1321
1322 /* if the last guy had the same parent and had access, we're done */
b0d623f7 1323 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
2d21ac55
A
1324 cache.cachehits++;
1325 access[i] = 0;
1326 continue;
1327 }
1328
1329 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
b0d623f7 1330 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
2d21ac55
A
1331
1332 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1333 access[i] = 0; // have access.. no errors to report
1334 } else {
1335 access[i] = (error != 0 ? (short) error : EACCES);
1336 }
1337
1338 prevParent_cnid = catkey.hfsPlus.parentID;
1339 }
1340
1341 /* copyout the access array */
1342 if ((error = copyout((caddr_t)access, user_access_structp->access,
1343 num_files * sizeof (short)))) {
1344 goto err_exit_bulk_access;
1345 }
1346 if (map_size && bitmap) {
1347 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1348 map_size * sizeof (char)))) {
1349 goto err_exit_bulk_access;
1350 }
1351 }
1352
1353
1354 err_exit_bulk_access:
1355
b0d623f7 1356 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
2d21ac55
A
1357
1358 if (file_ids)
1359 kfree(file_ids, sizeof(int) * num_files);
1360 if (parents)
1361 kfree(parents, sizeof(cnid_t) * num_parents);
1362 if (bitmap)
1363 kfree(bitmap, sizeof(char) * map_size);
1364 if (access)
1365 kfree(access, sizeof(short) * num_files);
1366 if (cache.acache)
1367 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1368 if (cache.haveaccess)
1369 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1370
1371 return (error);
1372}
1373
1374
1375/* end "bulk-access" support */
1c79356b 1376
1c79356b 1377
91447636
A
1378/*
1379 * Callback for use with freeze ioctl.
1380 */
1381static int
2d21ac55 1382hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
91447636
A
1383{
1384 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1385
1386 return 0;
1387}
1c79356b 1388
91447636
A
1389/*
1390 * Control filesystem operating characteristics.
1391 */
1c79356b 1392int
91447636
A
1393hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1394 vnode_t a_vp;
9bccf70c
A
1395 int a_command;
1396 caddr_t a_data;
1397 int a_fflag;
91447636
A
1398 vfs_context_t a_context;
1399 } */ *ap)
1c79356b 1400{
91447636
A
1401 struct vnode * vp = ap->a_vp;
1402 struct hfsmount *hfsmp = VTOHFS(vp);
1403 vfs_context_t context = ap->a_context;
1404 kauth_cred_t cred = vfs_context_ucred(context);
1405 proc_t p = vfs_context_proc(context);
1406 struct vfsstatfs *vfsp;
1407 boolean_t is64bit;
b0d623f7
A
1408 off_t jnl_start, jnl_size;
1409 struct hfs_journal_info *jip;
1410#if HFS_COMPRESSION
1411 int compressed = 0;
1412 off_t uncompressed_size = -1;
1413 int decmpfs_error = 0;
1414
1415 if (ap->a_command == F_RDADVISE) {
1416 /* we need to inspect the decmpfs state of the file as early as possible */
1417 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1418 if (compressed) {
1419 if (VNODE_IS_RSRC(vp)) {
1420 /* if this is the resource fork, treat it as if it were empty */
1421 uncompressed_size = 0;
1422 } else {
1423 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1424 if (decmpfs_error != 0) {
1425 /* failed to get the uncompressed size, we'll check for this later */
1426 uncompressed_size = -1;
1427 }
1428 }
1429 }
1430 }
1431#endif /* HFS_COMPRESSION */
91447636
A
1432
1433 is64bit = proc_is64bit(p);
1434
9bccf70c 1435 switch (ap->a_command) {
55e303ae 1436
2d21ac55
A
1437 case HFS_GETPATH:
1438 {
1439 struct vnode *file_vp;
1440 cnid_t cnid;
1441 int outlen;
1442 char *bufptr;
1443 int error;
1444
1445 /* Caller must be owner of file system. */
1446 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1447 if (suser(cred, NULL) &&
1448 kauth_cred_getuid(cred) != vfsp->f_owner) {
1449 return (EACCES);
1450 }
1451 /* Target vnode must be file system's root. */
1452 if (!vnode_isvroot(vp)) {
1453 return (EINVAL);
1454 }
1455 bufptr = (char *)ap->a_data;
1456 cnid = strtoul(bufptr, NULL, 10);
1457
b0d623f7
A
1458 /* We need to call hfs_vfs_vget to leverage the code that will
1459 * fix the origin list for us if needed, as opposed to calling
1460 * hfs_vget, since we will need the parent for build_path call.
935ed37a 1461 */
b0d623f7 1462
935ed37a 1463 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
2d21ac55
A
1464 return (error);
1465 }
1466 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1467 vnode_put(file_vp);
1468
1469 return (error);
1470 }
1471
1472 case HFS_PREV_LINK:
1473 case HFS_NEXT_LINK:
1474 {
1475 cnid_t linkfileid;
1476 cnid_t nextlinkid;
1477 cnid_t prevlinkid;
1478 int error;
1479
1480 /* Caller must be owner of file system. */
1481 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1482 if (suser(cred, NULL) &&
1483 kauth_cred_getuid(cred) != vfsp->f_owner) {
1484 return (EACCES);
1485 }
1486 /* Target vnode must be file system's root. */
1487 if (!vnode_isvroot(vp)) {
1488 return (EINVAL);
1489 }
1490 linkfileid = *(cnid_t *)ap->a_data;
1491 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1492 return (EINVAL);
1493 }
1494 if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1495 return (error);
1496 }
1497 if (ap->a_command == HFS_NEXT_LINK) {
1498 *(cnid_t *)ap->a_data = nextlinkid;
1499 } else {
1500 *(cnid_t *)ap->a_data = prevlinkid;
1501 }
1502 return (0);
1503 }
1504
0c530ab8
A
1505 case HFS_RESIZE_PROGRESS: {
1506
1507 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1508 if (suser(cred, NULL) &&
1509 kauth_cred_getuid(cred) != vfsp->f_owner) {
1510 return (EACCES); /* must be owner of file system */
1511 }
1512 if (!vnode_isvroot(vp)) {
1513 return (EINVAL);
1514 }
b0d623f7
A
1515 /* file system must not be mounted read-only */
1516 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1517 return (EROFS);
1518 }
1519
0c530ab8
A
1520 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1521 }
2d21ac55 1522
91447636
A
1523 case HFS_RESIZE_VOLUME: {
1524 u_int64_t newsize;
1525 u_int64_t cursize;
1526
1527 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1528 if (suser(cred, NULL) &&
1529 kauth_cred_getuid(cred) != vfsp->f_owner) {
1530 return (EACCES); /* must be owner of file system */
1531 }
1532 if (!vnode_isvroot(vp)) {
1533 return (EINVAL);
1534 }
b0d623f7
A
1535
1536 /* filesystem must not be mounted read only */
1537 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1538 return (EROFS);
1539 }
91447636
A
1540 newsize = *(u_int64_t *)ap->a_data;
1541 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1542
1543 if (newsize > cursize) {
1544 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1545 } else if (newsize < cursize) {
1546 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1547 } else {
1548 return (0);
1549 }
1550 }
1551 case HFS_CHANGE_NEXT_ALLOCATION: {
2d21ac55 1552 int error = 0; /* Assume success */
91447636
A
1553 u_int32_t location;
1554
1555 if (vnode_vfsisrdonly(vp)) {
1556 return (EROFS);
1557 }
1558 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1559 if (suser(cred, NULL) &&
1560 kauth_cred_getuid(cred) != vfsp->f_owner) {
1561 return (EACCES); /* must be owner of file system */
1562 }
1563 if (!vnode_isvroot(vp)) {
1564 return (EINVAL);
1565 }
2d21ac55 1566 HFS_MOUNT_LOCK(hfsmp, TRUE);
91447636 1567 location = *(u_int32_t *)ap->a_data;
2d21ac55
A
1568 if ((location >= hfsmp->allocLimit) &&
1569 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1570 error = EINVAL;
1571 goto fail_change_next_allocation;
91447636
A
1572 }
1573 /* Return previous value. */
1574 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
2d21ac55
A
1575 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1576 /* On magic value for location, set nextAllocation to next block
1577 * after metadata zone and set flag in mount structure to indicate
1578 * that nextAllocation should not be updated again.
1579 */
b0d623f7
A
1580 if (hfsmp->hfs_metazone_end != 0) {
1581 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1582 }
2d21ac55
A
1583 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1584 } else {
1585 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1586 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1587 }
1588 MarkVCBDirty(hfsmp);
1589fail_change_next_allocation:
91447636 1590 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2d21ac55 1591 return (error);
91447636
A
1592 }
1593
55e303ae
A
1594#ifdef HFS_SPARSE_DEV
1595 case HFS_SETBACKINGSTOREINFO: {
55e303ae
A
1596 struct vnode * bsfs_rootvp;
1597 struct vnode * di_vp;
55e303ae
A
1598 struct hfs_backingstoreinfo *bsdata;
1599 int error = 0;
1600
b0d623f7
A
1601 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1602 return (EROFS);
1603 }
55e303ae
A
1604 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1605 return (EALREADY);
1606 }
91447636
A
1607 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1608 if (suser(cred, NULL) &&
1609 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1610 return (EACCES); /* must be owner of file system */
1611 }
1612 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1613 if (bsdata == NULL) {
1614 return (EINVAL);
1615 }
91447636 1616 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
55e303ae
A
1617 return (error);
1618 }
91447636
A
1619 if ((error = vnode_getwithref(di_vp))) {
1620 file_drop(bsdata->backingfd);
1621 return(error);
55e303ae 1622 }
91447636
A
1623
1624 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1625 (void)vnode_put(di_vp);
1626 file_drop(bsdata->backingfd);
55e303ae
A
1627 return (EINVAL);
1628 }
1629
1630 /*
1631 * Obtain the backing fs root vnode and keep a reference
1632 * on it. This reference will be dropped in hfs_unmount.
1633 */
91447636 1634 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
55e303ae 1635 if (error) {
91447636
A
1636 (void)vnode_put(di_vp);
1637 file_drop(bsdata->backingfd);
55e303ae
A
1638 return (error);
1639 }
91447636
A
1640 vnode_ref(bsfs_rootvp);
1641 vnode_put(bsfs_rootvp);
55e303ae
A
1642
1643 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1644 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1645 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1646 hfsmp->hfs_sparsebandblks *= 4;
1647
2d21ac55
A
1648 vfs_markdependency(hfsmp->hfs_mp);
1649
b0d623f7
A
1650 /*
1651 * If the sparse image is on a sparse image file (as opposed to a sparse
1652 * bundle), then we may need to limit the free space to the maximum size
1653 * of a file on that volume. So we query (using pathconf), and if we get
1654 * a meaningful result, we cache the number of blocks for later use in
1655 * hfs_freeblks().
1656 */
1657 hfsmp->hfs_backingfs_maxblocks = 0;
1658 if (vnode_vtype(di_vp) == VREG) {
1659 int terr;
1660 int hostbits;
1661 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1662 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1663 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1664
1665 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1666 }
1667 }
1668
91447636
A
1669 (void)vnode_put(di_vp);
1670 file_drop(bsdata->backingfd);
55e303ae
A
1671 return (0);
1672 }
1673 case HFS_CLRBACKINGSTOREINFO: {
55e303ae
A
1674 struct vnode * tmpvp;
1675
91447636
A
1676 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1677 if (suser(cred, NULL) &&
1678 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1679 return (EACCES); /* must be owner of file system */
1680 }
b0d623f7
A
1681 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1682 return (EROFS);
1683 }
1684
55e303ae
A
1685 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1686 hfsmp->hfs_backingfs_rootvp) {
1687
1688 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1689 tmpvp = hfsmp->hfs_backingfs_rootvp;
1690 hfsmp->hfs_backingfs_rootvp = NULLVP;
1691 hfsmp->hfs_sparsebandblks = 0;
91447636 1692 vnode_rele(tmpvp);
55e303ae
A
1693 }
1694 return (0);
1695 }
1696#endif /* HFS_SPARSE_DEV */
1697
91447636
A
1698 case F_FREEZE_FS: {
1699 struct mount *mp;
91447636 1700
91447636
A
1701 mp = vnode_mount(vp);
1702 hfsmp = VFSTOHFS(mp);
1703
1704 if (!(hfsmp->jnl))
1705 return (ENOTSUP);
3a60a9f5 1706
b0d623f7
A
1707 vfsp = vfs_statfs(mp);
1708
1709 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1710 !kauth_cred_issuser(cred))
1711 return (EACCES);
1712
3a60a9f5 1713 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
91447636 1714
91447636
A
1715 // flush things before we get started to try and prevent
1716 // dirty data from being paged out while we're frozen.
1717 // note: can't do this after taking the lock as it will
1718 // deadlock against ourselves.
1719 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1720 hfs_global_exclusive_lock_acquire(hfsmp);
b0d623f7
A
1721
1722 // DO NOT call hfs_journal_flush() because that takes a
1723 // shared lock on the global exclusive lock!
91447636 1724 journal_flush(hfsmp->jnl);
3a60a9f5 1725
91447636
A
1726 // don't need to iterate on all vnodes, we just need to
1727 // wait for writes to the system files and the device vnode
91447636
A
1728 if (HFSTOVCB(hfsmp)->extentsRefNum)
1729 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1730 if (HFSTOVCB(hfsmp)->catalogRefNum)
1731 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1732 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1733 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1734 if (hfsmp->hfs_attribute_vp)
1735 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1736 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1737
1738 hfsmp->hfs_freezing_proc = current_proc();
1739
1740 return (0);
1741 }
1742
1743 case F_THAW_FS: {
b0d623f7
A
1744 vfsp = vfs_statfs(vnode_mount(vp));
1745 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1746 !kauth_cred_issuser(cred))
91447636
A
1747 return (EACCES);
1748
1749 // if we're not the one who froze the fs then we
1750 // can't thaw it.
1751 if (hfsmp->hfs_freezing_proc != current_proc()) {
3a60a9f5 1752 return EPERM;
91447636
A
1753 }
1754
1755 // NOTE: if you add code here, also go check the
1756 // code that "thaws" the fs in hfs_vnop_close()
1757 //
1758 hfsmp->hfs_freezing_proc = NULL;
1759 hfs_global_exclusive_lock_release(hfsmp);
3a60a9f5 1760 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
91447636
A
1761
1762 return (0);
1763 }
1764
2d21ac55
A
1765 case HFS_BULKACCESS_FSCTL: {
1766 int size;
1767
1768 if (hfsmp->hfs_flags & HFS_STANDARD) {
1769 return EINVAL;
1770 }
91447636 1771
2d21ac55 1772 if (is64bit) {
b0d623f7 1773 size = sizeof(struct user64_access_t);
2d21ac55 1774 } else {
b0d623f7 1775 size = sizeof(struct user32_access_t);
2d21ac55
A
1776 }
1777
1778 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1779 }
91447636 1780
2d21ac55
A
1781 case HFS_EXT_BULKACCESS_FSCTL: {
1782 int size;
1783
1784 if (hfsmp->hfs_flags & HFS_STANDARD) {
1785 return EINVAL;
1786 }
91447636 1787
2d21ac55 1788 if (is64bit) {
b0d623f7 1789 size = sizeof(struct user64_ext_access_t);
2d21ac55 1790 } else {
b0d623f7 1791 size = sizeof(struct user32_ext_access_t);
2d21ac55
A
1792 }
1793
1794 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1795 }
91447636
A
1796
1797 case HFS_SETACLSTATE: {
1798 int state;
1799
91447636
A
1800 if (ap->a_data == NULL) {
1801 return (EINVAL);
1802 }
3a60a9f5
A
1803
1804 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
91447636 1805 state = *(int *)ap->a_data;
3a60a9f5 1806
b0d623f7
A
1807 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1808 return (EROFS);
1809 }
3a60a9f5
A
1810 // super-user can enable or disable acl's on a volume.
1811 // the volume owner can only enable acl's
1812 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1813 return (EPERM);
1814 }
91447636 1815 if (state == 0 || state == 1)
2d21ac55
A
1816 return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
1817 else
1818 return (EINVAL);
1819 }
1820
1821 case HFS_SET_XATTREXTENTS_STATE: {
1822 int state;
1823
1824 if (ap->a_data == NULL) {
1825 return (EINVAL);
1826 }
1827
1828 state = *(int *)ap->a_data;
b0d623f7
A
1829
1830 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1831 return (EROFS);
1832 }
2d21ac55
A
1833
1834 /* Super-user can enable or disable extent-based extended
1835 * attribute support on a volume
1836 */
1837 if (!is_suser()) {
1838 return (EPERM);
1839 }
1840 if (state == 0 || state == 1)
1841 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
91447636
A
1842 else
1843 return (EINVAL);
1844 }
1845
1846 case F_FULLFSYNC: {
55e303ae 1847 int error;
b0d623f7
A
1848
1849 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1850 return (EROFS);
1851 }
91447636
A
1852 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1853 if (error == 0) {
2d21ac55 1854 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
91447636
A
1855 hfs_unlock(VTOC(vp));
1856 }
55e303ae
A
1857
1858 return error;
1859 }
91447636
A
1860
1861 case F_CHKCLEAN: {
9bccf70c 1862 register struct cnode *cp;
55e303ae
A
1863 int error;
1864
91447636 1865 if (!vnode_isreg(vp))
55e303ae
A
1866 return EINVAL;
1867
91447636
A
1868 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1869 if (error == 0) {
1870 cp = VTOC(vp);
1871 /*
1872 * used by regression test to determine if
1873 * all the dirty pages (via write) have been cleaned
1874 * after a call to 'fsysnc'.
1875 */
1876 error = is_file_clean(vp, VTOF(vp)->ff_size);
1877 hfs_unlock(cp);
1878 }
55e303ae
A
1879 return (error);
1880 }
1881
91447636 1882 case F_RDADVISE: {
9bccf70c
A
1883 register struct radvisory *ra;
1884 struct filefork *fp;
9bccf70c
A
1885 int error;
1886
91447636 1887 if (!vnode_isreg(vp))
9bccf70c
A
1888 return EINVAL;
1889
9bccf70c 1890 ra = (struct radvisory *)(ap->a_data);
9bccf70c
A
1891 fp = VTOF(vp);
1892
91447636
A
1893 /* Protect against a size change. */
1894 hfs_lock_truncate(VTOC(vp), TRUE);
1895
b0d623f7
A
1896#if HFS_COMPRESSION
1897 if (compressed && (uncompressed_size == -1)) {
1898 /* fetching the uncompressed size failed above, so return the error */
1899 error = decmpfs_error;
1900 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
1901 (!compressed && (ra->ra_offset >= fp->ff_size))) {
1902 error = EFBIG;
1903 }
1904#else /* HFS_COMPRESSION */
9bccf70c 1905 if (ra->ra_offset >= fp->ff_size) {
91447636 1906 error = EFBIG;
b0d623f7
A
1907 }
1908#endif /* HFS_COMPRESSION */
1909 else {
91447636 1910 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
9bccf70c 1911 }
1c79356b 1912
2d21ac55 1913 hfs_unlock_truncate(VTOC(vp), TRUE);
9bccf70c 1914 return (error);
1c79356b 1915 }
1c79356b 1916
91447636
A
1917 case F_READBOOTSTRAP:
1918 case F_WRITEBOOTSTRAP:
1919 {
9bccf70c 1920 struct vnode *devvp = NULL;
91447636 1921 user_fbootstraptransfer_t *user_bootstrapp;
0b4e3aa0 1922 int devBlockSize;
1c79356b 1923 int error;
91447636
A
1924 uio_t auio;
1925 daddr64_t blockNumber;
b0d623f7
A
1926 u_int32_t blockOffset;
1927 u_int32_t xfersize;
1c79356b 1928 struct buf *bp;
91447636 1929 user_fbootstraptransfer_t user_bootstrap;
1c79356b 1930
91447636
A
1931 if (!vnode_isvroot(vp))
1932 return (EINVAL);
1933 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1934 * to a user_fbootstraptransfer_t else we get a pointer to a
1935 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1936 */
b0d623f7
A
1937 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1938 return (EROFS);
1939 }
91447636
A
1940 if (is64bit) {
1941 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1942 }
1943 else {
b0d623f7 1944 user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
91447636
A
1945 user_bootstrapp = &user_bootstrap;
1946 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1947 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1948 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1949 }
1950 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1951 return EINVAL;
1c79356b 1952
9bccf70c 1953 devvp = VTOHFS(vp)->hfs_devvp;
91447636
A
1954 auio = uio_create(1, user_bootstrapp->fbt_offset,
1955 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1956 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1957 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1958
1959 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1960
1961 while (uio_resid(auio) > 0) {
1962 blockNumber = uio_offset(auio) / devBlockSize;
1963 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1964 if (error) {
1965 if (bp) buf_brelse(bp);
1966 uio_free(auio);
1967 return error;
1968 };
1969
1970 blockOffset = uio_offset(auio) % devBlockSize;
1971 xfersize = devBlockSize - blockOffset;
1972 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1973 if (error) {
1974 buf_brelse(bp);
1975 uio_free(auio);
1976 return error;
1977 };
1978 if (uio_rw(auio) == UIO_WRITE) {
1979 error = VNOP_BWRITE(bp);
1980 if (error) {
1981 uio_free(auio);
1982 return error;
1983 }
1984 } else {
1985 buf_brelse(bp);
1986 };
1987 };
1988 uio_free(auio);
1989 };
1990 return 0;
1991
1992 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1993 {
1994 if (is64bit) {
1995 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1996 }
1997 else {
b0d623f7 1998 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
91447636
A
1999 }
2000 return 0;
2001 }
2002
b0d623f7
A
2003 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2004 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2005 break;
2006
2007 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2008 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2009 break;
2010
2011 case HFS_FSCTL_SET_VERY_LOW_DISK:
2012 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2013 return EINVAL;
e2fac8b1 2014 }
91447636 2015
b0d623f7
A
2016 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2017 break;
2018
2019 case HFS_FSCTL_SET_LOW_DISK:
2020 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2021 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2022
2023 return EINVAL;
e2fac8b1 2024 }
b0d623f7
A
2025
2026 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2027 break;
2028
2029 case HFS_FSCTL_SET_DESIRED_DISK:
2030 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2031 return EINVAL;
2032 }
2033
2034 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2035 break;
2036
2037 case HFS_VOLUME_STATUS:
2038 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2039 break;
91447636
A
2040
2041 case HFS_SET_BOOT_INFO:
2042 if (!vnode_isvroot(vp))
2043 return(EINVAL);
2044 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2045 return(EACCES); /* must be superuser or owner of filesystem */
b0d623f7
A
2046 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2047 return (EROFS);
2048 }
91447636
A
2049 HFS_MOUNT_LOCK(hfsmp, TRUE);
2050 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2051 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2052 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2053 break;
2054
2055 case HFS_GET_BOOT_INFO:
2056 if (!vnode_isvroot(vp))
2057 return(EINVAL);
2058 HFS_MOUNT_LOCK(hfsmp, TRUE);
2059 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2060 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2061 break;
2062
2d21ac55
A
2063 case HFS_MARK_BOOT_CORRUPT:
2064 /* Mark the boot volume corrupt by setting
2065 * kHFSVolumeInconsistentBit in the volume header. This will
2066 * force fsck_hfs on next mount.
2067 */
2068 if (!is_suser()) {
2069 return EACCES;
2070 }
b0d623f7 2071
2d21ac55
A
2072 /* Allowed only on the root vnode of the boot volume */
2073 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2074 !vnode_isvroot(vp)) {
2075 return EINVAL;
2076 }
b0d623f7
A
2077 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2078 return (EROFS);
2079 }
2d21ac55
A
2080 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2081 hfs_mark_volume_inconsistent(hfsmp);
2082 break;
2083
b0d623f7
A
2084 case HFS_FSCTL_GET_JOURNAL_INFO:
2085 jip = (struct hfs_journal_info*)ap->a_data;
2086
2087 if (vp == NULLVP)
2088 return EINVAL;
2089
2090 if (hfsmp->jnl == NULL) {
2091 jnl_start = 0;
2092 jnl_size = 0;
2093 } else {
2094 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2095 jnl_size = (off_t)hfsmp->jnl_size;
2096 }
2097
2098 jip->jstart = jnl_start;
2099 jip->jsize = jnl_size;
2100 break;
2101
2102 case HFS_SET_ALWAYS_ZEROFILL: {
2103 struct cnode *cp = VTOC(vp);
2104
2105 if (*(int *)ap->a_data) {
2106 cp->c_flag |= C_ALWAYS_ZEROFILL;
2107 } else {
2108 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2109 }
2110 break;
2111 }
2112
91447636
A
2113 default:
2114 return (ENOTTY);
2115 }
1c79356b 2116
0b4e3aa0 2117 return 0;
1c79356b
A
2118}
2119
91447636
A
2120/*
2121 * select
2122 */
1c79356b 2123int
91447636
A
2124hfs_vnop_select(__unused struct vnop_select_args *ap)
2125/*
2126 struct vnop_select_args {
2127 vnode_t a_vp;
9bccf70c
A
2128 int a_which;
2129 int a_fflags;
9bccf70c 2130 void *a_wql;
91447636
A
2131 vfs_context_t a_context;
2132 };
2133*/
1c79356b 2134{
9bccf70c
A
2135 /*
2136 * We should really check to see if I/O is possible.
2137 */
2138 return (1);
1c79356b
A
2139}
2140
1c79356b
A
2141/*
2142 * Converts a logical block number to a physical block, and optionally returns
2143 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2144 * The physical block number is based on the device block size, currently its 512.
2145 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2146 */
1c79356b 2147int
2d21ac55 2148hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1c79356b 2149{
9bccf70c
A
2150 struct filefork *fp = VTOF(vp);
2151 struct hfsmount *hfsmp = VTOHFS(vp);
91447636 2152 int retval = E_NONE;
2d21ac55 2153 u_int32_t logBlockSize;
91447636
A
2154 size_t bytesContAvail = 0;
2155 off_t blockposition;
2156 int lockExtBtree;
2157 int lockflags = 0;
1c79356b 2158
9bccf70c
A
2159 /*
2160 * Check for underlying vnode requests and ensure that logical
2161 * to physical mapping is requested.
2162 */
91447636 2163 if (vpp != NULL)
2d21ac55 2164 *vpp = hfsmp->hfs_devvp;
91447636 2165 if (bnp == NULL)
9bccf70c
A
2166 return (0);
2167
9bccf70c 2168 logBlockSize = GetLogicalBlockSize(vp);
2d21ac55 2169 blockposition = (off_t)bn * logBlockSize;
9bccf70c
A
2170
2171 lockExtBtree = overflow_extents(fp);
91447636
A
2172
2173 if (lockExtBtree)
2d21ac55 2174 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1c79356b 2175
9bccf70c 2176 retval = MacToVFSError(
0b4e3aa0 2177 MapFileBlockC (HFSTOVCB(hfsmp),
9bccf70c 2178 (FCB*)fp,
0b4e3aa0
A
2179 MAXPHYSIO,
2180 blockposition,
91447636 2181 bnp,
0b4e3aa0 2182 &bytesContAvail));
1c79356b 2183
91447636
A
2184 if (lockExtBtree)
2185 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 2186
91447636
A
2187 if (retval == E_NONE) {
2188 /* Figure out how many read ahead blocks there are */
2189 if (runp != NULL) {
2190 if (can_cluster(logBlockSize)) {
2191 /* Make sure this result never goes negative: */
2192 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2193 } else {
2194 *runp = 0;
2195 }
2196 }
2197 }
2198 return (retval);
2199}
1c79356b 2200
91447636
A
2201/*
2202 * Convert logical block number to file offset.
2203 */
1c79356b 2204int
91447636
A
2205hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2206/*
2207 struct vnop_blktooff_args {
2208 vnode_t a_vp;
2209 daddr64_t a_lblkno;
9bccf70c 2210 off_t *a_offset;
91447636
A
2211 };
2212*/
1c79356b
A
2213{
2214 if (ap->a_vp == NULL)
2215 return (EINVAL);
91447636 2216 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1c79356b
A
2217
2218 return(0);
2219}
2220
91447636
A
2221/*
2222 * Convert file offset to logical block number.
2223 */
1c79356b 2224int
91447636
A
2225hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2226/*
2227 struct vnop_offtoblk_args {
2228 vnode_t a_vp;
9bccf70c 2229 off_t a_offset;
91447636
A
2230 daddr64_t *a_lblkno;
2231 };
2232*/
1c79356b 2233{
1c79356b
A
2234 if (ap->a_vp == NULL)
2235 return (EINVAL);
91447636 2236 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1c79356b
A
2237
2238 return(0);
2239}
2240
91447636
A
2241/*
2242 * Map file offset to physical block number.
2243 *
2d21ac55
A
2244 * If this function is called for write operation, and if the file
2245 * had virtual blocks allocated (delayed allocation), real blocks
2246 * are allocated by calling ExtendFileC().
2247 *
2248 * If this function is called for read operation, and if the file
2249 * had virtual blocks allocated (delayed allocation), no change
2250 * to the size of file is done, and if required, rangelist is
2251 * searched for mapping.
2252 *
91447636
A
2253 * System file cnodes are expected to be locked (shared or exclusive).
2254 */
1c79356b 2255int
91447636
A
2256hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2257/*
2258 struct vnop_blockmap_args {
2259 vnode_t a_vp;
9bccf70c
A
2260 off_t a_foffset;
2261 size_t a_size;
91447636 2262 daddr64_t *a_bpn;
9bccf70c
A
2263 size_t *a_run;
2264 void *a_poff;
91447636
A
2265 int a_flags;
2266 vfs_context_t a_context;
2267 };
2268*/
1c79356b 2269{
91447636
A
2270 struct vnode *vp = ap->a_vp;
2271 struct cnode *cp;
2272 struct filefork *fp;
2273 struct hfsmount *hfsmp;
2274 size_t bytesContAvail = 0;
2275 int retval = E_NONE;
2276 int syslocks = 0;
2277 int lockflags = 0;
2278 struct rl_entry *invalid_range;
2279 enum rl_overlaptype overlaptype;
2280 int started_tr = 0;
2281 int tooklock = 0;
1c79356b 2282
b0d623f7
A
2283#if HFS_COMPRESSION
2284 if (VNODE_IS_RSRC(vp)) {
2285 /* allow blockmaps to the resource fork */
2286 } else {
2287 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2288 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2289 switch(state) {
2290 case FILE_IS_COMPRESSED:
2291 return ENOTSUP;
2292 case FILE_IS_CONVERTING:
2293 /* if FILE_IS_CONVERTING, we allow blockmap */
2294 break;
2295 default:
2296 printf("invalid state %d for compressed file\n", state);
2297 /* fall through */
2298 }
2299 }
2300 }
2301#endif /* HFS_COMPRESSION */
2302
3a60a9f5
A
2303 /* Do not allow blockmap operation on a directory */
2304 if (vnode_isdir(vp)) {
2305 return (ENOTSUP);
2306 }
2307
9bccf70c
A
2308 /*
2309 * Check for underlying vnode requests and ensure that logical
2310 * to physical mapping is requested.
2311 */
2312 if (ap->a_bpn == NULL)
2313 return (0);
2314
2d21ac55 2315 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
91447636
A
2316 if (VTOC(vp)->c_lockowner != current_thread()) {
2317 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2318 tooklock = 1;
91447636
A
2319 }
2320 }
2321 hfsmp = VTOHFS(vp);
2322 cp = VTOC(vp);
2323 fp = VTOF(vp);
55e303ae 2324
91447636 2325retry:
2d21ac55
A
2326 /* Check virtual blocks only when performing write operation */
2327 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
91447636
A
2328 if (hfs_start_transaction(hfsmp) != 0) {
2329 retval = EINVAL;
2330 goto exit;
2331 } else {
2332 started_tr = 1;
b4c24cb9 2333 }
91447636
A
2334 syslocks = SFL_EXTENTS | SFL_BITMAP;
2335
b4c24cb9 2336 } else if (overflow_extents(fp)) {
91447636 2337 syslocks = SFL_EXTENTS;
9bccf70c 2338 }
91447636
A
2339
2340 if (syslocks)
2341 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1c79356b 2342
9bccf70c
A
2343 /*
2344 * Check for any delayed allocations.
2345 */
2d21ac55
A
2346 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2347 int64_t actbytes;
91447636 2348 u_int32_t loanedBlocks;
1c79356b 2349
55e303ae 2350 //
d12e1678
A
2351 // Make sure we have a transaction. It's possible
2352 // that we came in and fp->ff_unallocblocks was zero
2353 // but during the time we blocked acquiring the extents
2354 // btree, ff_unallocblocks became non-zero and so we
2355 // will need to start a transaction.
2356 //
91447636
A
2357 if (started_tr == 0) {
2358 if (syslocks) {
2359 hfs_systemfile_unlock(hfsmp, lockflags);
2360 syslocks = 0;
2361 }
2362 goto retry;
d12e1678
A
2363 }
2364
9bccf70c 2365 /*
91447636
A
2366 * Note: ExtendFileC will Release any blocks on loan and
2367 * aquire real blocks. So we ask to extend by zero bytes
2368 * since ExtendFileC will account for the virtual blocks.
9bccf70c 2369 */
9bccf70c 2370
91447636
A
2371 loanedBlocks = fp->ff_unallocblocks;
2372 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2373 kEFAllMask | kEFNoClumpMask, &actbytes);
2374
2375 if (retval) {
2376 fp->ff_unallocblocks = loanedBlocks;
2377 cp->c_blocks += loanedBlocks;
2378 fp->ff_blocks += loanedBlocks;
2379
2380 HFS_MOUNT_LOCK(hfsmp, TRUE);
2381 hfsmp->loanedBlocks += loanedBlocks;
2382 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1c79356b 2383
91447636
A
2384 hfs_systemfile_unlock(hfsmp, lockflags);
2385 cp->c_flag |= C_MODIFIED;
b4c24cb9 2386 if (started_tr) {
91447636
A
2387 (void) hfs_update(vp, TRUE);
2388 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
55e303ae 2389
91447636 2390 hfs_end_transaction(hfsmp);
2d21ac55 2391 started_tr = 0;
b4c24cb9 2392 }
91447636 2393 goto exit;
b4c24cb9 2394 }
9bccf70c
A
2395 }
2396
91447636
A
2397 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2398 ap->a_bpn, &bytesContAvail);
2399 if (syslocks) {
2400 hfs_systemfile_unlock(hfsmp, lockflags);
2401 syslocks = 0;
2402 }
1c79356b 2403
b4c24cb9 2404 if (started_tr) {
91447636
A
2405 (void) hfs_update(vp, TRUE);
2406 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2407 hfs_end_transaction(hfsmp);
b4c24cb9 2408 started_tr = 0;
91447636
A
2409 }
2410 if (retval) {
2d21ac55
A
2411 /* On write, always return error because virtual blocks, if any,
2412 * should have been allocated in ExtendFileC(). We do not
2413 * allocate virtual blocks on read, therefore return error
2414 * only if no virtual blocks are allocated. Otherwise we search
2415 * rangelist for zero-fills
2416 */
2417 if ((MacToVFSError(retval) != ERANGE) ||
2418 (ap->a_flags & VNODE_WRITE) ||
2419 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2420 goto exit;
2421 }
2422
2423 /* Validate if the start offset is within logical file size */
2424 if (ap->a_foffset > fp->ff_size) {
2425 goto exit;
2426 }
2427
2428 /* Searching file extents has failed for read operation, therefore
2429 * search rangelist for any uncommitted holes in the file.
2430 */
2431 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2432 ap->a_foffset + (off_t)(ap->a_size - 1),
2433 &invalid_range);
2434 switch(overlaptype) {
2435 case RL_OVERLAPISCONTAINED:
2436 /* start_offset <= rl_start, end_offset >= rl_end */
2437 if (ap->a_foffset != invalid_range->rl_start) {
2438 break;
2439 }
2440 case RL_MATCHINGOVERLAP:
2441 /* start_offset = rl_start, end_offset = rl_end */
2442 case RL_OVERLAPCONTAINSRANGE:
2443 /* start_offset >= rl_start, end_offset <= rl_end */
2444 case RL_OVERLAPSTARTSBEFORE:
2445 /* start_offset > rl_start, end_offset >= rl_start */
2446 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2447 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2448 } else {
2449 bytesContAvail = fp->ff_size - ap->a_foffset;
2450 }
2451 if (bytesContAvail > ap->a_size) {
2452 bytesContAvail = ap->a_size;
2453 }
2454 *ap->a_bpn = (daddr64_t)-1;
2455 retval = 0;
2456 break;
2457 case RL_OVERLAPENDSAFTER:
2458 /* start_offset < rl_start, end_offset < rl_end */
2459 case RL_NOOVERLAP:
2460 break;
2461 }
91447636
A
2462 goto exit;
2463 }
1c79356b 2464
2d21ac55
A
2465 /* MapFileC() found a valid extent in the filefork. Search the
2466 * mapping information further for invalid file ranges
2467 */
91447636
A
2468 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2469 ap->a_foffset + (off_t)bytesContAvail - 1,
2470 &invalid_range);
2471 if (overlaptype != RL_NOOVERLAP) {
2472 switch(overlaptype) {
2473 case RL_MATCHINGOVERLAP:
2474 case RL_OVERLAPCONTAINSRANGE:
2475 case RL_OVERLAPSTARTSBEFORE:
2d21ac55 2476 /* There's no valid block for this byte offset */
91447636
A
2477 *ap->a_bpn = (daddr64_t)-1;
2478 /* There's no point limiting the amount to be returned
2479 * if the invalid range that was hit extends all the way
2480 * to the EOF (i.e. there's no valid bytes between the
2481 * end of this range and the file's EOF):
2482 */
2483 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 2484 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
2485 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2486 }
2487 break;
9bccf70c 2488
91447636
A
2489 case RL_OVERLAPISCONTAINED:
2490 case RL_OVERLAPENDSAFTER:
2491 /* The range of interest hits an invalid block before the end: */
2492 if (invalid_range->rl_start == ap->a_foffset) {
2493 /* There's actually no valid information to be had starting here: */
2494 *ap->a_bpn = (daddr64_t)-1;
2495 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 2496 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
2497 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2498 }
2499 } else {
2500 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2501 }
9bccf70c 2502 break;
1c79356b 2503
91447636 2504 case RL_NOOVERLAP:
9bccf70c 2505 break;
91447636
A
2506 } /* end switch */
2507 if (bytesContAvail > ap->a_size)
2508 bytesContAvail = ap->a_size;
2d21ac55
A
2509 }
2510
2511exit:
2512 if (retval == 0) {
2513 if (ap->a_run)
2514 *ap->a_run = bytesContAvail;
2515
2516 if (ap->a_poff)
2517 *(int *)ap->a_poff = 0;
9bccf70c 2518 }
91447636 2519
91447636
A
2520 if (tooklock)
2521 hfs_unlock(cp);
2522
2523 return (MacToVFSError(retval));
1c79356b
A
2524}
2525
9bccf70c 2526
1c79356b 2527/*
91447636
A
2528 * prepare and issue the I/O
2529 * buf_strategy knows how to deal
2530 * with requests that require
2531 * fragmented I/Os
2532 */
1c79356b 2533int
91447636 2534hfs_vnop_strategy(struct vnop_strategy_args *ap)
1c79356b 2535{
91447636
A
2536 buf_t bp = ap->a_bp;
2537 vnode_t vp = buf_vnode(bp);
1c79356b 2538
2d21ac55 2539 return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
1c79356b
A
2540}
2541
b0d623f7
A
2542static int
2543hfs_minorupdate(struct vnode *vp) {
2544 struct cnode *cp = VTOC(vp);
2545 cp->c_flag &= ~C_MODIFIED;
2546 cp->c_touch_acctime = 0;
2547 cp->c_touch_chgtime = 0;
2548 cp->c_touch_modtime = 0;
2549
2550 return 0;
2551}
1c79356b 2552
91447636 2553static int
b0d623f7 2554do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
1c79356b 2555{
9bccf70c
A
2556 register struct cnode *cp = VTOC(vp);
2557 struct filefork *fp = VTOF(vp);
91447636
A
2558 struct proc *p = vfs_context_proc(context);;
2559 kauth_cred_t cred = vfs_context_ucred(context);
9bccf70c
A
2560 int retval;
2561 off_t bytesToAdd;
2562 off_t actualBytesAdded;
2563 off_t filebytes;
b0d623f7 2564 u_int32_t fileblocks;
9bccf70c 2565 int blksize;
b4c24cb9 2566 struct hfsmount *hfsmp;
91447636 2567 int lockflags;
9bccf70c 2568
9bccf70c
A
2569 blksize = VTOVCB(vp)->blockSize;
2570 fileblocks = fp->ff_blocks;
2571 filebytes = (off_t)fileblocks * (off_t)blksize;
2572
2573 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2574 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2575
2576 if (length < 0)
2577 return (EINVAL);
1c79356b 2578
8f6c56a5
A
2579 /* This should only happen with a corrupt filesystem */
2580 if ((off_t)fp->ff_size < 0)
2581 return (EINVAL);
2582
9bccf70c
A
2583 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2584 return (EFBIG);
1c79356b 2585
b4c24cb9 2586 hfsmp = VTOHFS(vp);
1c79356b 2587
9bccf70c 2588 retval = E_NONE;
1c79356b 2589
55e303ae
A
2590 /* Files that are changing size are not hot file candidates. */
2591 if (hfsmp->hfc_stage == HFC_RECORDING) {
2592 fp->ff_bytesread = 0;
2593 }
2594
9bccf70c
A
2595 /*
2596 * We cannot just check if fp->ff_size == length (as an optimization)
2597 * since there may be extra physical blocks that also need truncation.
2598 */
2599#if QUOTA
91447636 2600 if ((retval = hfs_getinoquota(cp)))
9bccf70c
A
2601 return(retval);
2602#endif /* QUOTA */
1c79356b 2603
9bccf70c
A
2604 /*
2605 * Lengthen the size of the file. We must ensure that the
2606 * last byte of the file is allocated. Since the smallest
2607 * value of ff_size is 0, length will be at least 1.
2608 */
91447636 2609 if (length > (off_t)fp->ff_size) {
9bccf70c 2610#if QUOTA
b4c24cb9 2611 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
91447636 2612 cred, 0);
9bccf70c
A
2613 if (retval)
2614 goto Err_Exit;
2615#endif /* QUOTA */
2616 /*
2617 * If we don't have enough physical space then
2618 * we need to extend the physical size.
2619 */
2620 if (length > filebytes) {
2621 int eflags;
b0d623f7 2622 u_int32_t blockHint = 0;
1c79356b 2623
9bccf70c
A
2624 /* All or nothing and don't round up to clumpsize. */
2625 eflags = kEFAllMask | kEFNoClumpMask;
1c79356b 2626
91447636 2627 if (cred && suser(cred, NULL) != 0)
9bccf70c 2628 eflags |= kEFReserveMask; /* keep a reserve */
1c79356b 2629
55e303ae
A
2630 /*
2631 * Allocate Journal and Quota files in metadata zone.
2632 */
2633 if (filebytes == 0 &&
2634 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2635 hfs_virtualmetafile(cp)) {
2636 eflags |= kEFMetadataMask;
2637 blockHint = hfsmp->hfs_metazone_start;
2638 }
91447636
A
2639 if (hfs_start_transaction(hfsmp) != 0) {
2640 retval = EINVAL;
2641 goto Err_Exit;
b4c24cb9
A
2642 }
2643
91447636
A
2644 /* Protect extents b-tree and allocation bitmap */
2645 lockflags = SFL_BITMAP;
2646 if (overflow_extents(fp))
2647 lockflags |= SFL_EXTENTS;
2648 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1c79356b 2649
9bccf70c
A
2650 while ((length > filebytes) && (retval == E_NONE)) {
2651 bytesToAdd = length - filebytes;
2652 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2653 (FCB*)fp,
1c79356b 2654 bytesToAdd,
55e303ae 2655 blockHint,
9bccf70c 2656 eflags,
1c79356b
A
2657 &actualBytesAdded));
2658
9bccf70c
A
2659 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2660 if (actualBytesAdded == 0 && retval == E_NONE) {
2661 if (length > filebytes)
2662 length = filebytes;
2663 break;
2664 }
2665 } /* endwhile */
b4c24cb9 2666
91447636 2667 hfs_systemfile_unlock(hfsmp, lockflags);
b4c24cb9 2668
b4c24cb9 2669 if (hfsmp->jnl) {
b0d623f7
A
2670 if (skipupdate) {
2671 (void) hfs_minorupdate(vp);
2672 }
2673 else {
2674 (void) hfs_update(vp, TRUE);
2675 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2676 }
91447636 2677 }
55e303ae 2678
91447636 2679 hfs_end_transaction(hfsmp);
b4c24cb9 2680
9bccf70c
A
2681 if (retval)
2682 goto Err_Exit;
2683
2684 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2685 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1c79356b 2686 }
1c79356b 2687
91447636 2688 if (!(flags & IO_NOZEROFILL)) {
2d21ac55 2689 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
9bccf70c 2690 struct rl_entry *invalid_range;
9bccf70c 2691 off_t zero_limit;
0b4e3aa0 2692
9bccf70c
A
2693 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2694 if (length < zero_limit) zero_limit = length;
2695
91447636
A
2696 if (length > (off_t)fp->ff_size) {
2697 struct timeval tv;
2698
9bccf70c
A
2699 /* Extending the file: time to fill out the current last page w. zeroes? */
2700 if ((fp->ff_size & PAGE_MASK_64) &&
2701 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2702 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
0b4e3aa0
A
2703
2704 /* There's some valid data at the start of the (current) last page
2705 of the file, so zero out the remainder of that page to ensure the
2706 entire page contains valid data. Since there is no invalid range
2707 possible past the (current) eof, there's no need to remove anything
91447636
A
2708 from the invalid range list before calling cluster_write(): */
2709 hfs_unlock(cp);
9bccf70c 2710 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
91447636
A
2711 fp->ff_size, (off_t)0,
2712 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2713 hfs_lock(cp, HFS_FORCE_LOCK);
0b4e3aa0
A
2714 if (retval) goto Err_Exit;
2715
2716 /* Merely invalidate the remaining area, if necessary: */
9bccf70c 2717 if (length > zero_limit) {
91447636 2718 microuptime(&tv);
9bccf70c 2719 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
91447636 2720 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
2721 }
2722 } else {
0b4e3aa0
A
2723 /* The page containing the (current) eof is invalid: just add the
2724 remainder of the page to the invalid list, along with the area
2725 being newly allocated:
2726 */
91447636 2727 microuptime(&tv);
9bccf70c 2728 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
91447636 2729 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
2730 };
2731 }
2732 } else {
2733 panic("hfs_truncate: invoked on non-UBC object?!");
2734 };
2735 }
91447636 2736 cp->c_touch_modtime = TRUE;
9bccf70c 2737 fp->ff_size = length;
0b4e3aa0 2738
9bccf70c 2739 } else { /* Shorten the size of the file */
0b4e3aa0 2740
91447636 2741 if ((off_t)fp->ff_size > length) {
9bccf70c
A
2742 /* Any space previously marked as invalid is now irrelevant: */
2743 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2744 }
1c79356b 2745
9bccf70c
A
2746 /*
2747 * Account for any unmapped blocks. Note that the new
2748 * file length can still end up with unmapped blocks.
2749 */
2750 if (fp->ff_unallocblocks > 0) {
2751 u_int32_t finalblks;
91447636 2752 u_int32_t loanedBlocks;
1c79356b 2753
91447636
A
2754 HFS_MOUNT_LOCK(hfsmp, TRUE);
2755
2756 loanedBlocks = fp->ff_unallocblocks;
2757 cp->c_blocks -= loanedBlocks;
2758 fp->ff_blocks -= loanedBlocks;
2759 fp->ff_unallocblocks = 0;
1c79356b 2760
91447636 2761 hfsmp->loanedBlocks -= loanedBlocks;
9bccf70c
A
2762
2763 finalblks = (length + blksize - 1) / blksize;
2764 if (finalblks > fp->ff_blocks) {
2765 /* calculate required unmapped blocks */
91447636
A
2766 loanedBlocks = finalblks - fp->ff_blocks;
2767 hfsmp->loanedBlocks += loanedBlocks;
2768
2769 fp->ff_unallocblocks = loanedBlocks;
2770 cp->c_blocks += loanedBlocks;
2771 fp->ff_blocks += loanedBlocks;
9bccf70c 2772 }
91447636 2773 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
9bccf70c 2774 }
1c79356b 2775
9bccf70c
A
2776 /*
2777 * For a TBE process the deallocation of the file blocks is
2778 * delayed until the file is closed. And hfs_close calls
2779 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2780 * isn't set, we make sure this isn't a TBE process.
2781 */
91447636 2782 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
9bccf70c
A
2783#if QUOTA
2784 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2785#endif /* QUOTA */
91447636
A
2786 if (hfs_start_transaction(hfsmp) != 0) {
2787 retval = EINVAL;
2788 goto Err_Exit;
2789 }
2790
2791 if (fp->ff_unallocblocks == 0) {
2792 /* Protect extents b-tree and allocation bitmap */
2793 lockflags = SFL_BITMAP;
2794 if (overflow_extents(fp))
2795 lockflags |= SFL_EXTENTS;
2796 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
b4c24cb9 2797
9bccf70c
A
2798 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2799 (FCB*)fp, length, false));
1c79356b 2800
91447636
A
2801 hfs_systemfile_unlock(hfsmp, lockflags);
2802 }
b4c24cb9 2803 if (hfsmp->jnl) {
ff6e181a
A
2804 if (retval == 0) {
2805 fp->ff_size = length;
2806 }
b0d623f7
A
2807 if (skipupdate) {
2808 (void) hfs_minorupdate(vp);
2809 }
2810 else {
2811 (void) hfs_update(vp, TRUE);
2812 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2813 }
b4c24cb9 2814 }
91447636 2815 hfs_end_transaction(hfsmp);
b4c24cb9 2816
9bccf70c
A
2817 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2818 if (retval)
2819 goto Err_Exit;
2820#if QUOTA
2821 /* These are bytesreleased */
2822 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2823#endif /* QUOTA */
2824 }
2825 /* Only set update flag if the logical length changes */
2d21ac55 2826 if ((off_t)fp->ff_size != length)
91447636 2827 cp->c_touch_modtime = TRUE;
9bccf70c 2828 fp->ff_size = length;
1c79356b 2829 }
b0d623f7
A
2830 if (cp->c_mode & (S_ISUID | S_ISGID)) {
2831 if (!vfs_context_issuser(context)) {
2832 cp->c_mode &= ~(S_ISUID | S_ISGID);
2833 skipupdate = 0;
2834 }
2835 }
2836 if (skipupdate) {
2837 retval = hfs_minorupdate(vp);
2838 }
2839 else {
2840 cp->c_touch_chgtime = TRUE; /* status changed */
2841 cp->c_touch_modtime = TRUE; /* file data was modified */
2842 retval = hfs_update(vp, MNT_WAIT);
2843 }
9bccf70c 2844 if (retval) {
0b4e3aa0 2845 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1c79356b 2846 -1, -1, -1, retval, 0);
9bccf70c 2847 }
1c79356b 2848
9bccf70c 2849Err_Exit:
1c79356b 2850
9bccf70c
A
2851 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2852 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1c79356b 2853
9bccf70c 2854 return (retval);
1c79356b
A
2855}
2856
2857
91447636 2858
55e303ae 2859/*
55e303ae
A
2860 * Truncate a cnode to at most length size, freeing (or adding) the
2861 * disk blocks.
2862 */
91447636
A
2863__private_extern__
2864int
2865hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
b0d623f7 2866 int skipupdate, vfs_context_t context)
55e303ae 2867{
55e303ae 2868 struct filefork *fp = VTOF(vp);
55e303ae 2869 off_t filebytes;
b0d623f7 2870 u_int32_t fileblocks;
91447636 2871 int blksize, error = 0;
3a60a9f5 2872 struct cnode *cp = VTOC(vp);
55e303ae 2873
2d21ac55
A
2874 /* Cannot truncate an HFS directory! */
2875 if (vnode_isdir(vp)) {
2876 return (EISDIR);
2877 }
2878 /* A swap file cannot change size. */
2879 if (vnode_isswap(vp) && (length != 0)) {
2880 return (EPERM);
2881 }
55e303ae 2882
55e303ae
A
2883 blksize = VTOVCB(vp)->blockSize;
2884 fileblocks = fp->ff_blocks;
2885 filebytes = (off_t)fileblocks * (off_t)blksize;
2886
2d21ac55
A
2887 //
2888 // Have to do this here so that we don't wind up with
2889 // i/o pending for blocks that are about to be released
2890 // if we truncate the file.
2891 //
2892 // If skipsetsize is set, then the caller is responsible
2893 // for the ubc_setsize.
2894 //
b0d623f7
A
2895 // Even if skipsetsize is set, if the length is zero we
2896 // want to call ubc_setsize() because as of SnowLeopard
2897 // it will no longer cause any page-ins and it will drop
2898 // any dirty pages so that we don't do any i/o that we
2899 // don't have to. This also prevents a race where i/o
2900 // for truncated blocks may overwrite later data if the
2901 // blocks get reallocated to a different file.
2902 //
2903 if (!skipsetsize || length == 0)
2d21ac55
A
2904 ubc_setsize(vp, length);
2905
55e303ae
A
2906 // have to loop truncating or growing files that are
2907 // really big because otherwise transactions can get
2908 // enormous and consume too many kernel resources.
91447636
A
2909
2910 if (length < filebytes) {
2911 while (filebytes > length) {
0c530ab8 2912 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
2913 filebytes -= HFS_BIGFILE_SIZE;
2914 } else {
2915 filebytes = length;
2916 }
3a60a9f5 2917 cp->c_flag |= C_FORCEUPDATE;
b0d623f7 2918 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
91447636
A
2919 if (error)
2920 break;
2921 }
2922 } else if (length > filebytes) {
2923 while (filebytes < length) {
0c530ab8 2924 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
2925 filebytes += HFS_BIGFILE_SIZE;
2926 } else {
2927 filebytes = length;
2928 }
3a60a9f5 2929 cp->c_flag |= C_FORCEUPDATE;
b0d623f7 2930 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
91447636
A
2931 if (error)
2932 break;
55e303ae 2933 }
91447636 2934 } else /* Same logical size */ {
55e303ae 2935
b0d623f7 2936 error = do_hfs_truncate(vp, length, flags, skipupdate, context);
91447636
A
2937 }
2938 /* Files that are changing size are not hot file candidates. */
2939 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2940 fp->ff_bytesread = 0;
55e303ae
A
2941 }
2942
91447636 2943 return (error);
55e303ae
A
2944}
2945
2946
1c79356b
A
2947
2948/*
91447636 2949 * Preallocate file storage space.
1c79356b 2950 */
91447636
A
2951int
2952hfs_vnop_allocate(struct vnop_allocate_args /* {
2953 vnode_t a_vp;
9bccf70c
A
2954 off_t a_length;
2955 u_int32_t a_flags;
2956 off_t *a_bytesallocated;
2957 off_t a_offset;
91447636
A
2958 vfs_context_t a_context;
2959 } */ *ap)
1c79356b 2960{
9bccf70c 2961 struct vnode *vp = ap->a_vp;
91447636
A
2962 struct cnode *cp;
2963 struct filefork *fp;
2964 ExtendedVCB *vcb;
9bccf70c
A
2965 off_t length = ap->a_length;
2966 off_t startingPEOF;
2967 off_t moreBytesRequested;
2968 off_t actualBytesAdded;
2969 off_t filebytes;
b0d623f7 2970 u_int32_t fileblocks;
9bccf70c 2971 int retval, retval2;
2d21ac55
A
2972 u_int32_t blockHint;
2973 u_int32_t extendFlags; /* For call to ExtendFileC */
b4c24cb9 2974 struct hfsmount *hfsmp;
91447636
A
2975 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2976 int lockflags;
2977
2978 *(ap->a_bytesallocated) = 0;
2979
2980 if (!vnode_isreg(vp))
2981 return (EISDIR);
2982 if (length < (off_t)0)
2983 return (EINVAL);
2d21ac55 2984
91447636 2985 cp = VTOC(vp);
2d21ac55
A
2986
2987 hfs_lock_truncate(cp, TRUE);
2988
2989 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2990 goto Err_Exit;
2991 }
2992
91447636 2993 fp = VTOF(vp);
b4c24cb9 2994 hfsmp = VTOHFS(vp);
91447636 2995 vcb = VTOVCB(vp);
9bccf70c 2996
9bccf70c 2997 fileblocks = fp->ff_blocks;
55e303ae 2998 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
9bccf70c 2999
91447636
A
3000 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
3001 retval = EINVAL;
3002 goto Err_Exit;
3003 }
0b4e3aa0 3004
9bccf70c 3005 /* Fill in the flags word for the call to Extend the file */
1c79356b 3006
55e303ae 3007 extendFlags = kEFNoClumpMask;
9bccf70c 3008 if (ap->a_flags & ALLOCATECONTIG)
1c79356b 3009 extendFlags |= kEFContigMask;
9bccf70c 3010 if (ap->a_flags & ALLOCATEALL)
1c79356b 3011 extendFlags |= kEFAllMask;
91447636 3012 if (cred && suser(cred, NULL) != 0)
9bccf70c 3013 extendFlags |= kEFReserveMask;
b0d623f7
A
3014 if (hfs_virtualmetafile(cp))
3015 extendFlags |= kEFMetadataMask;
1c79356b 3016
9bccf70c
A
3017 retval = E_NONE;
3018 blockHint = 0;
3019 startingPEOF = filebytes;
1c79356b 3020
9bccf70c
A
3021 if (ap->a_flags & ALLOCATEFROMPEOF)
3022 length += filebytes;
3023 else if (ap->a_flags & ALLOCATEFROMVOL)
3024 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1c79356b 3025
9bccf70c
A
3026 /* If no changes are necesary, then we're done */
3027 if (filebytes == length)
3028 goto Std_Exit;
1c79356b 3029
9bccf70c
A
3030 /*
3031 * Lengthen the size of the file. We must ensure that the
3032 * last byte of the file is allocated. Since the smallest
3033 * value of filebytes is 0, length will be at least 1.
3034 */
3035 if (length > filebytes) {
2d21ac55
A
3036 off_t total_bytes_added = 0, orig_request_size;
3037
3038 orig_request_size = moreBytesRequested = length - filebytes;
1c79356b 3039
9bccf70c 3040#if QUOTA
b4c24cb9 3041 retval = hfs_chkdq(cp,
55e303ae 3042 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
91447636 3043 cred, 0);
9bccf70c 3044 if (retval)
91447636 3045 goto Err_Exit;
9bccf70c
A
3046
3047#endif /* QUOTA */
55e303ae
A
3048 /*
3049 * Metadata zone checks.
3050 */
3051 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3052 /*
3053 * Allocate Journal and Quota files in metadata zone.
3054 */
3055 if (hfs_virtualmetafile(cp)) {
55e303ae
A
3056 blockHint = hfsmp->hfs_metazone_start;
3057 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
3058 (blockHint <= hfsmp->hfs_metazone_end)) {
3059 /*
3060 * Move blockHint outside metadata zone.
3061 */
3062 blockHint = hfsmp->hfs_metazone_end + 1;
3063 }
3064 }
3065
b4c24cb9 3066
2d21ac55
A
3067 while ((length > filebytes) && (retval == E_NONE)) {
3068 off_t bytesRequested;
3069
3070 if (hfs_start_transaction(hfsmp) != 0) {
3071 retval = EINVAL;
3072 goto Err_Exit;
3073 }
3074
3075 /* Protect extents b-tree and allocation bitmap */
3076 lockflags = SFL_BITMAP;
3077 if (overflow_extents(fp))
91447636 3078 lockflags |= SFL_EXTENTS;
2d21ac55
A
3079 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3080
3081 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
3082 bytesRequested = HFS_BIGFILE_SIZE;
3083 } else {
3084 bytesRequested = moreBytesRequested;
3085 }
1c79356b 3086
b0d623f7
A
3087 if (extendFlags & kEFContigMask) {
3088 // if we're on a sparse device, this will force it to do a
3089 // full scan to find the space needed.
3090 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
3091 }
3092
2d21ac55 3093 retval = MacToVFSError(ExtendFileC(vcb,
9bccf70c 3094 (FCB*)fp,
2d21ac55 3095 bytesRequested,
9bccf70c
A
3096 blockHint,
3097 extendFlags,
3098 &actualBytesAdded));
1c79356b 3099
2d21ac55
A
3100 if (retval == E_NONE) {
3101 *(ap->a_bytesallocated) += actualBytesAdded;
3102 total_bytes_added += actualBytesAdded;
3103 moreBytesRequested -= actualBytesAdded;
3104 if (blockHint != 0) {
3105 blockHint += actualBytesAdded / vcb->blockSize;
3106 }
3107 }
3108 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3109
3110 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 3111
2d21ac55 3112 if (hfsmp->jnl) {
91447636
A
3113 (void) hfs_update(vp, TRUE);
3114 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2d21ac55
A
3115 }
3116
3117 hfs_end_transaction(hfsmp);
b4c24cb9 3118 }
91447636 3119
b4c24cb9 3120
1c79356b
A
3121 /*
3122 * if we get an error and no changes were made then exit
91447636 3123 * otherwise we must do the hfs_update to reflect the changes
1c79356b 3124 */
9bccf70c
A
3125 if (retval && (startingPEOF == filebytes))
3126 goto Err_Exit;
1c79356b 3127
9bccf70c
A
3128 /*
3129 * Adjust actualBytesAdded to be allocation block aligned, not
3130 * clump size aligned.
3131 * NOTE: So what we are reporting does not affect reality
3132 * until the file is closed, when we truncate the file to allocation
3133 * block size.
3134 */
2d21ac55 3135 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
0b4e3aa0 3136 *(ap->a_bytesallocated) =
2d21ac55 3137 roundup(orig_request_size, (off_t)vcb->blockSize);
1c79356b 3138
9bccf70c 3139 } else { /* Shorten the size of the file */
1c79356b 3140
9bccf70c 3141 if (fp->ff_size > length) {
1c79356b
A
3142 /*
3143 * Any buffers that are past the truncation point need to be
91447636 3144 * invalidated (to maintain buffer cache consistency).
1c79356b 3145 */
1c79356b
A
3146 }
3147
b0d623f7 3148 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
55e303ae 3149 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
b4c24cb9 3150
1c79356b
A
3151 /*
3152 * if we get an error and no changes were made then exit
91447636 3153 * otherwise we must do the hfs_update to reflect the changes
1c79356b 3154 */
9bccf70c
A
3155 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
3156#if QUOTA
3157 /* These are bytesreleased */
3158 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
3159#endif /* QUOTA */
1c79356b 3160
9bccf70c
A
3161 if (fp->ff_size > filebytes) {
3162 fp->ff_size = filebytes;
1c79356b 3163
91447636
A
3164 hfs_unlock(cp);
3165 ubc_setsize(vp, fp->ff_size);
3166 hfs_lock(cp, HFS_FORCE_LOCK);
9bccf70c
A
3167 }
3168 }
1c79356b
A
3169
3170Std_Exit:
91447636
A
3171 cp->c_touch_chgtime = TRUE;
3172 cp->c_touch_modtime = TRUE;
3173 retval2 = hfs_update(vp, MNT_WAIT);
1c79356b 3174
9bccf70c
A
3175 if (retval == 0)
3176 retval = retval2;
1c79356b 3177Err_Exit:
2d21ac55 3178 hfs_unlock_truncate(cp, TRUE);
91447636 3179 hfs_unlock(cp);
9bccf70c 3180 return (retval);
1c79356b
A
3181}
3182
3183
9bccf70c 3184/*
91447636 3185 * Pagein for HFS filesystem
9bccf70c 3186 */
1c79356b 3187int
91447636
A
3188hfs_vnop_pagein(struct vnop_pagein_args *ap)
3189/*
3190 struct vnop_pagein_args {
3191 vnode_t a_vp,
1c79356b
A
3192 upl_t a_pl,
3193 vm_offset_t a_pl_offset,
3194 off_t a_f_offset,
3195 size_t a_size,
1c79356b 3196 int a_flags
91447636
A
3197 vfs_context_t a_context;
3198 };
3199*/
1c79356b 3200{
91447636 3201 vnode_t vp = ap->a_vp;
9bccf70c 3202 int error;
1c79356b 3203
b0d623f7
A
3204#if HFS_COMPRESSION
3205 if (VNODE_IS_RSRC(vp)) {
3206 /* allow pageins of the resource fork */
3207 } else {
3208 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
3209 if (compressed) {
3210 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
3211 if (compressed) {
3212 if (error == 0) {
3213 /* successful page-in, update the access time */
3214 VTOC(vp)->c_touch_acctime = TRUE;
3215
3216 /* compressed files are not hot file candidates */
3217 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3218 VTOF(vp)->ff_bytesread = 0;
3219 }
3220 }
3221 return error;
3222 }
3223 /* otherwise the file was converted back to a regular file while we were reading it */
3224 }
3225 }
3226#endif
3227
9bccf70c 3228 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
91447636 3229 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
55e303ae 3230 /*
91447636 3231 * Keep track of blocks read.
55e303ae 3232 */
2d21ac55 3233 if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
55e303ae 3234 struct cnode *cp;
91447636
A
3235 struct filefork *fp;
3236 int bytesread;
3237 int took_cnode_lock = 0;
55e303ae 3238
91447636
A
3239 cp = VTOC(vp);
3240 fp = VTOF(vp);
3241
3242 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
3243 bytesread = fp->ff_size;
3244 else
3245 bytesread = ap->a_size;
3246
3247 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2d21ac55 3248 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
91447636
A
3249 hfs_lock(cp, HFS_FORCE_LOCK);
3250 took_cnode_lock = 1;
3251 }
55e303ae
A
3252 /*
3253 * If this file hasn't been seen since the start of
3254 * the current sampling period then start over.
3255 */
91447636
A
3256 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
3257 struct timeval tv;
55e303ae 3258
91447636
A
3259 fp->ff_bytesread = bytesread;
3260 microtime(&tv);
3261 cp->c_atime = tv.tv_sec;
3262 } else {
3263 fp->ff_bytesread += bytesread;
3264 }
3265 cp->c_touch_acctime = TRUE;
3266 if (took_cnode_lock)
3267 hfs_unlock(cp);
55e303ae 3268 }
9bccf70c 3269 return (error);
1c79356b
A
3270}
3271
3272/*
91447636 3273 * Pageout for HFS filesystem.
1c79356b
A
3274 */
3275int
91447636
A
3276hfs_vnop_pageout(struct vnop_pageout_args *ap)
3277/*
3278 struct vnop_pageout_args {
3279 vnode_t a_vp,
1c79356b
A
3280 upl_t a_pl,
3281 vm_offset_t a_pl_offset,
3282 off_t a_f_offset,
3283 size_t a_size,
1c79356b 3284 int a_flags
91447636
A
3285 vfs_context_t a_context;
3286 };
3287*/
1c79356b 3288{
91447636
A
3289 vnode_t vp = ap->a_vp;
3290 struct cnode *cp;
3291 struct filefork *fp;
b0d623f7 3292 int retval = 0;
9bccf70c 3293 off_t filesize;
b0d623f7
A
3294 upl_t upl;
3295 upl_page_info_t* pl;
3296 vm_offset_t a_pl_offset;
3297 int a_flags;
3298 int is_pageoutv2 = 0;
1c79356b 3299
91447636 3300 cp = VTOC(vp);
91447636 3301 fp = VTOF(vp);
2d21ac55 3302
593a1d5f
A
3303 /*
3304 * Figure out where the file ends, for pageout purposes. If
3305 * ff_new_size > ff_size, then we're in the middle of extending the
3306 * file via a write, so it is safe (and necessary) that we be able
3307 * to pageout up to that point.
3308 */
3309 filesize = fp->ff_size;
3310 if (fp->ff_new_size > filesize)
3311 filesize = fp->ff_new_size;
b0d623f7
A
3312
3313 a_flags = ap->a_flags;
3314 a_pl_offset = ap->a_pl_offset;
3315
3316 /*
3317 * we can tell if we're getting the new or old behavior from the UPL
3318 */
3319 if ((upl = ap->a_pl) == NULL) {
3320 int request_flags;
3321
3322 is_pageoutv2 = 1;
3323 /*
3324 * we're in control of any UPL we commit
3325 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
3326 */
3327 a_flags &= ~UPL_NOCOMMIT;
3328 a_pl_offset = 0;
3329
3330 /*
3331 * take truncate lock (shared) to guard against
3332 * zero-fill thru fsync interfering, but only for v2
3333 */
3334 hfs_lock_truncate(cp, 0);
3335
3336 if (a_flags & UPL_MSYNC) {
3337 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
3338 }
3339 else {
3340 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
3341 }
3342 ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
3343
3344 if (upl == (upl_t) NULL) {
3345 retval = EINVAL;
3346 goto pageout_done;
3347 }
3348 }
3349 /*
3350 * from this point forward upl points at the UPL we're working with
3351 * it was either passed in or we succesfully created it
3352 */
3353
3354 /*
3355 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
3356 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
3357 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
3358 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
3359 * logic in vnode_pageout except that we need to do it after grabbing the truncate
3360 * lock in HFS so that we don't lock invert ourselves.
3361 *
3362 * Note that we can still get into this function on behalf of the default pager with
3363 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
3364 * since fsync and other writing threads will grab the locks, then mark the
3365 * relevant pages as busy. But the pageout codepath marks the pages as busy,
3366 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
3367 * we do not try to grab anything for the pre-V2 case, which should only be accessed
3368 * by the paging/VM system.
3369 */
3370
3371 if (is_pageoutv2) {
3372 off_t f_offset;
3373 int offset;
3374 int isize;
3375 int pg_index;
3376 int error;
3377 int error_ret = 0;
3378
3379 isize = ap->a_size;
3380 f_offset = ap->a_f_offset;
3381
3382 /*
3383 * Scan from the back to find the last page in the UPL, so that we
3384 * aren't looking at a UPL that may have already been freed by the
3385 * preceding aborts/completions.
3386 */
3387 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
3388 if (upl_page_present(pl, --pg_index))
3389 break;
3390 if (pg_index == 0) {
3391 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
3392 goto pageout_done;
2d21ac55 3393 }
2d21ac55 3394 }
b0d623f7
A
3395
3396 /*
3397 * initialize the offset variables before we touch the UPL.
3398 * a_f_offset is the position into the file, in bytes
3399 * offset is the position into the UPL, in bytes
3400 * pg_index is the pg# of the UPL we're operating on.
3401 * isize is the offset into the UPL of the last non-clean page.
3402 */
3403 isize = ((pg_index + 1) * PAGE_SIZE);
3404
3405 offset = 0;
3406 pg_index = 0;
3407
3408 while (isize) {
3409 int xsize;
3410 int num_of_pages;
3411
3412 if ( !upl_page_present(pl, pg_index)) {
3413 /*
3414 * we asked for RET_ONLY_DIRTY, so it's possible
3415 * to get back empty slots in the UPL.
3416 * just skip over them
3417 */
3418 f_offset += PAGE_SIZE;
3419 offset += PAGE_SIZE;
3420 isize -= PAGE_SIZE;
3421 pg_index++;
3422
3423 continue;
3424 }
3425 if ( !upl_dirty_page(pl, pg_index)) {
3426 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
3427 }
3428
3429 /*
3430 * We know that we have at least one dirty page.
3431 * Now checking to see how many in a row we have
3432 */
3433 num_of_pages = 1;
3434 xsize = isize - PAGE_SIZE;
3435
3436 while (xsize) {
3437 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
3438 break;
3439 num_of_pages++;
3440 xsize -= PAGE_SIZE;
3441 }
3442 xsize = num_of_pages * PAGE_SIZE;
3443
3444 if (!vnode_isswap(vp)) {
3445 off_t end_of_range;
3446 int tooklock;
3447
3448 tooklock = 0;
3449
3450 if (cp->c_lockowner != current_thread()) {
3451 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3452 /*
3453 * we're in the v2 path, so we are the
3454 * owner of the UPL... we may have already
3455 * processed some of the UPL, so abort it
3456 * from the current working offset to the
3457 * end of the UPL
3458 */
3459 ubc_upl_abort_range(upl,
3460 offset,
3461 ap->a_size - offset,
3462 UPL_ABORT_FREE_ON_EMPTY);
3463 goto pageout_done;
3464 }
3465 tooklock = 1;
3466 }
3467 end_of_range = f_offset + xsize - 1;
2d21ac55 3468
b0d623f7
A
3469 if (end_of_range >= filesize) {
3470 end_of_range = (off_t)(filesize - 1);
3471 }
3472 if (f_offset < filesize) {
3473 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
3474 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3475 }
3476 if (tooklock) {
3477 hfs_unlock(cp);
3478 }
3479 }
3480 if ((error = cluster_pageout(vp, upl, offset, f_offset,
3481 xsize, filesize, a_flags))) {
3482 if (error_ret == 0)
3483 error_ret = error;
3484 }
3485 f_offset += xsize;
3486 offset += xsize;
3487 isize -= xsize;
3488 pg_index += num_of_pages;
3489 }
3490 /* capture errnos bubbled out of cluster_pageout if they occurred */
3491 if (error_ret != 0) {
3492 retval = error_ret;
3493 }
3494 } /* end block for v2 pageout behavior */
3495 else {
3496 if (!vnode_isswap(vp)) {
3497 off_t end_of_range;
3498 int tooklock = 0;
3499
3500 if (cp->c_lockowner != current_thread()) {
3501 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3502 if (!(a_flags & UPL_NOCOMMIT)) {
3503 ubc_upl_abort_range(upl,
3504 a_pl_offset,
3505 ap->a_size,
3506 UPL_ABORT_FREE_ON_EMPTY);
3507 }
3508 goto pageout_done;
3509 }
3510 tooklock = 1;
3511 }
3512 end_of_range = ap->a_f_offset + ap->a_size - 1;
2d21ac55 3513
b0d623f7
A
3514 if (end_of_range >= filesize) {
3515 end_of_range = (off_t)(filesize - 1);
3516 }
3517 if (ap->a_f_offset < filesize) {
3518 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
3519 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3520 }
1c79356b 3521
b0d623f7
A
3522 if (tooklock) {
3523 hfs_unlock(cp);
3524 }
2d21ac55 3525 }
b0d623f7
A
3526 /*
3527 * just call cluster_pageout for old pre-v2 behavior
3528 */
3529 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
3530 ap->a_size, filesize, a_flags);
55e303ae 3531 }
0b4e3aa0 3532
1c79356b 3533 /*
b0d623f7
A
3534 * If data was written, update the modification time of the file.
3535 * If setuid or setgid bits are set and this process is not the
3536 * superuser then clear the setuid and setgid bits as a precaution
3537 * against tampering.
1c79356b 3538 */
b0d623f7
A
3539 if (retval == 0) {
3540 cp->c_touch_modtime = TRUE;
91447636 3541 cp->c_touch_chgtime = TRUE;
b0d623f7
A
3542 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
3543 (vfs_context_suser(ap->a_context) != 0)) {
3544 hfs_lock(cp, HFS_FORCE_LOCK);
3545 cp->c_mode &= ~(S_ISUID | S_ISGID);
3546 hfs_unlock(cp);
3547 }
3548 }
3549
3550pageout_done:
3551 if (is_pageoutv2) {
3552 /* release truncate lock (shared) */
3553 hfs_unlock_truncate(cp, 0);
91447636 3554 }
1c79356b
A
3555 return (retval);
3556}
3557
3558/*
3559 * Intercept B-Tree node writes to unswap them if necessary.
1c79356b
A
3560 */
3561int
91447636 3562hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
1c79356b 3563{
9bccf70c 3564 int retval = 0;
9bccf70c 3565 register struct buf *bp = ap->a_bp;
91447636 3566 register struct vnode *vp = buf_vnode(bp);
9bccf70c
A
3567 BlockDescriptor block;
3568
3569 /* Trap B-Tree writes */
3570 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
91447636 3571 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
0c530ab8
A
3572 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
3573 (vp == VTOHFS(vp)->hfc_filevp)) {
9bccf70c 3574
3a60a9f5
A
3575 /*
3576 * Swap and validate the node if it is in native byte order.
3577 * This is always be true on big endian, so we always validate
3578 * before writing here. On little endian, the node typically has
2d21ac55 3579 * been swapped and validated when it was written to the journal,
3a60a9f5
A
3580 * so we won't do anything here.
3581 */
2d21ac55 3582 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
9bccf70c
A
3583 /* Prepare the block pointer */
3584 block.blockHeader = bp;
91447636 3585 block.buffer = (char *)buf_dataptr(bp);
3a60a9f5 3586 block.blockNum = buf_lblkno(bp);
9bccf70c 3587 /* not found in cache ==> came from disk */
91447636
A
3588 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
3589 block.blockSize = buf_count(bp);
1c79356b 3590
9bccf70c 3591 /* Endian un-swap B-Tree node */
935ed37a 3592 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3a60a9f5
A
3593 if (retval)
3594 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
9bccf70c 3595 }
9bccf70c 3596 }
3a60a9f5 3597
9bccf70c 3598 /* This buffer shouldn't be locked anymore but if it is clear it */
91447636
A
3599 if ((buf_flags(bp) & B_LOCKED)) {
3600 // XXXdbg
3601 if (VTOHFS(vp)->jnl) {
2d21ac55 3602 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
91447636
A
3603 }
3604 buf_clearflags(bp, B_LOCKED);
9bccf70c
A
3605 }
3606 retval = vn_bwrite (ap);
1c79356b 3607
9bccf70c 3608 return (retval);
1c79356b 3609}
55e303ae
A
3610
3611/*
3612 * Relocate a file to a new location on disk
3613 * cnode must be locked on entry
3614 *
3615 * Relocation occurs by cloning the file's data from its
3616 * current set of blocks to a new set of blocks. During
3617 * the relocation all of the blocks (old and new) are
3618 * owned by the file.
3619 *
3620 * -----------------
3621 * |///////////////|
3622 * -----------------
3623 * 0 N (file offset)
3624 *
3625 * ----------------- -----------------
2d21ac55 3626 * |///////////////| | | STEP 1 (acquire new blocks)
55e303ae
A
3627 * ----------------- -----------------
3628 * 0 N N+1 2N
3629 *
3630 * ----------------- -----------------
3631 * |///////////////| |///////////////| STEP 2 (clone data)
3632 * ----------------- -----------------
3633 * 0 N N+1 2N
3634 *
3635 * -----------------
3636 * |///////////////| STEP 3 (head truncate blocks)
3637 * -----------------
3638 * 0 N
3639 *
3640 * During steps 2 and 3 page-outs to file offsets less
3641 * than or equal to N are suspended.
3642 *
2d21ac55 3643 * During step 3 page-ins to the file get suspended.
55e303ae
A
3644 */
3645__private_extern__
3646int
91447636
A
3647hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
3648 struct proc *p)
55e303ae 3649{
91447636 3650 struct cnode *cp;
55e303ae
A
3651 struct filefork *fp;
3652 struct hfsmount *hfsmp;
55e303ae
A
3653 u_int32_t headblks;
3654 u_int32_t datablks;
3655 u_int32_t blksize;
55e303ae
A
3656 u_int32_t growsize;
3657 u_int32_t nextallocsave;
91447636 3658 daddr64_t sector_a, sector_b;
55e303ae 3659 int eflags;
55e303ae 3660 off_t newbytes;
91447636
A
3661 int retval;
3662 int lockflags = 0;
3663 int took_trunc_lock = 0;
3664 int started_tr = 0;
3665 enum vtype vnodetype;
3666
3667 vnodetype = vnode_vtype(vp);
3668 if (vnodetype != VREG && vnodetype != VLNK) {
55e303ae
A
3669 return (EPERM);
3670 }
3671
3672 hfsmp = VTOHFS(vp);
3673 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
3674 return (ENOSPC);
3675 }
3676
91447636 3677 cp = VTOC(vp);
55e303ae
A
3678 fp = VTOF(vp);
3679 if (fp->ff_unallocblocks)
3680 return (EINVAL);
91447636 3681 blksize = hfsmp->blockSize;
55e303ae 3682 if (blockHint == 0)
91447636 3683 blockHint = hfsmp->nextAllocation;
55e303ae 3684
2d21ac55 3685 if ((fp->ff_size > 0x7fffffff) ||
91447636 3686 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
55e303ae
A
3687 return (EFBIG);
3688 }
3689
91447636
A
3690 //
3691 // We do not believe that this call to hfs_fsync() is
3692 // necessary and it causes a journal transaction
3693 // deadlock so we are removing it.
3694 //
3695 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3696 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3697 // if (retval)
3698 // return (retval);
3699 //}
3700
3701 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
3702 hfs_unlock(cp);
3703 hfs_lock_truncate(cp, TRUE);
2d21ac55
A
3704 /* Force lock since callers expects lock to be held. */
3705 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
3706 hfs_unlock_truncate(cp, TRUE);
91447636
A
3707 return (retval);
3708 }
2d21ac55
A
3709 /* No need to continue if file was removed. */
3710 if (cp->c_flag & C_NOEXISTS) {
3711 hfs_unlock_truncate(cp, TRUE);
3712 return (ENOENT);
3713 }
91447636
A
3714 took_trunc_lock = 1;
3715 }
55e303ae
A
3716 headblks = fp->ff_blocks;
3717 datablks = howmany(fp->ff_size, blksize);
3718 growsize = datablks * blksize;
55e303ae
A
3719 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
3720 if (blockHint >= hfsmp->hfs_metazone_start &&
3721 blockHint <= hfsmp->hfs_metazone_end)
3722 eflags |= kEFMetadataMask;
3723
91447636
A
3724 if (hfs_start_transaction(hfsmp) != 0) {
3725 if (took_trunc_lock)
2d21ac55 3726 hfs_unlock_truncate(cp, TRUE);
91447636 3727 return (EINVAL);
55e303ae 3728 }
91447636
A
3729 started_tr = 1;
3730 /*
3731 * Protect the extents b-tree and the allocation bitmap
3732 * during MapFileBlockC and ExtendFileC operations.
3733 */
3734 lockflags = SFL_BITMAP;
3735 if (overflow_extents(fp))
3736 lockflags |= SFL_EXTENTS;
3737 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 3738
91447636 3739 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
55e303ae
A
3740 if (retval) {
3741 retval = MacToVFSError(retval);
3742 goto out;
3743 }
3744
3745 /*
2d21ac55 3746 * STEP 1 - acquire new allocation blocks.
55e303ae 3747 */
91447636
A
3748 nextallocsave = hfsmp->nextAllocation;
3749 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
3750 if (eflags & kEFMetadataMask) {
3751 HFS_MOUNT_LOCK(hfsmp, TRUE);
2d21ac55
A
3752 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
3753 MarkVCBDirty(hfsmp);
91447636
A
3754 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3755 }
55e303ae
A
3756
3757 retval = MacToVFSError(retval);
3758 if (retval == 0) {
91447636 3759 cp->c_flag |= C_MODIFIED;
55e303ae
A
3760 if (newbytes < growsize) {
3761 retval = ENOSPC;
3762 goto restore;
3763 } else if (fp->ff_blocks < (headblks + datablks)) {
3764 printf("hfs_relocate: allocation failed");
3765 retval = ENOSPC;
3766 goto restore;
3767 }
3768
91447636 3769 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
55e303ae
A
3770 if (retval) {
3771 retval = MacToVFSError(retval);
3772 } else if ((sector_a + 1) == sector_b) {
3773 retval = ENOSPC;
3774 goto restore;
3775 } else if ((eflags & kEFMetadataMask) &&
593a1d5f 3776 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
55e303ae 3777 hfsmp->hfs_metazone_end)) {
b0d623f7 3778#if 0
2d21ac55
A
3779 const char * filestr;
3780 char emptystr = '\0';
3781
3782 if (cp->c_desc.cd_nameptr != NULL) {
3783 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
3784 } else if (vnode_name(vp) != NULL) {
3785 filestr = vnode_name(vp);
3786 } else {
3787 filestr = &emptystr;
3788 }
b0d623f7 3789#endif
55e303ae
A
3790 retval = ENOSPC;
3791 goto restore;
3792 }
3793 }
91447636
A
3794 /* Done with system locks and journal for now. */
3795 hfs_systemfile_unlock(hfsmp, lockflags);
3796 lockflags = 0;
3797 hfs_end_transaction(hfsmp);
3798 started_tr = 0;
3799
55e303ae
A
3800 if (retval) {
3801 /*
3802 * Check to see if failure is due to excessive fragmentation.
3803 */
91447636
A
3804 if ((retval == ENOSPC) &&
3805 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
55e303ae
A
3806 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
3807 }
3808 goto out;
3809 }
55e303ae 3810 /*
91447636 3811 * STEP 2 - clone file data into the new allocation blocks.
55e303ae
A
3812 */
3813
91447636 3814 if (vnodetype == VLNK)
55e303ae 3815 retval = hfs_clonelink(vp, blksize, cred, p);
91447636 3816 else if (vnode_issystem(vp))
55e303ae
A
3817 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
3818 else
91447636 3819 retval = hfs_clonefile(vp, headblks, datablks, blksize);
ccc36f2f 3820
91447636
A
3821 /* Start transaction for step 3 or for a restore. */
3822 if (hfs_start_transaction(hfsmp) != 0) {
3823 retval = EINVAL;
3824 goto out;
3825 }
3826 started_tr = 1;
55e303ae
A
3827 if (retval)
3828 goto restore;
55e303ae
A
3829
3830 /*
91447636 3831 * STEP 3 - switch to cloned data and remove old blocks.
55e303ae 3832 */
91447636
A
3833 lockflags = SFL_BITMAP;
3834 if (overflow_extents(fp))
3835 lockflags |= SFL_EXTENTS;
3836 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 3837
91447636 3838 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
55e303ae 3839
91447636
A
3840 hfs_systemfile_unlock(hfsmp, lockflags);
3841 lockflags = 0;
55e303ae
A
3842 if (retval)
3843 goto restore;
55e303ae 3844out:
91447636 3845 if (took_trunc_lock)
2d21ac55 3846 hfs_unlock_truncate(cp, TRUE);
55e303ae 3847
91447636
A
3848 if (lockflags) {
3849 hfs_systemfile_unlock(hfsmp, lockflags);
3850 lockflags = 0;
ccc36f2f
A
3851 }
3852
0c530ab8
A
3853 /* Push cnode's new extent data to disk. */
3854 if (retval == 0) {
3855 (void) hfs_update(vp, MNT_WAIT);
3856 }
55e303ae 3857 if (hfsmp->jnl) {
91447636 3858 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
55e303ae
A
3859 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3860 else
3861 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
55e303ae 3862 }
91447636 3863exit:
91447636
A
3864 if (started_tr)
3865 hfs_end_transaction(hfsmp);
55e303ae
A
3866
3867 return (retval);
3868
3869restore:
2d21ac55
A
3870 if (fp->ff_blocks == headblks) {
3871 if (took_trunc_lock)
3872 hfs_unlock_truncate(cp, TRUE);
91447636 3873 goto exit;
2d21ac55 3874 }
55e303ae
A
3875 /*
3876 * Give back any newly allocated space.
3877 */
91447636
A
3878 if (lockflags == 0) {
3879 lockflags = SFL_BITMAP;
3880 if (overflow_extents(fp))
3881 lockflags |= SFL_EXTENTS;
3882 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3883 }
3884
3885 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
3886
3887 hfs_systemfile_unlock(hfsmp, lockflags);
3888 lockflags = 0;
3889
3890 if (took_trunc_lock)
2d21ac55 3891 hfs_unlock_truncate(cp, TRUE);
91447636 3892 goto exit;
55e303ae
A
3893}
3894
3895
3896/*
3897 * Clone a symlink.
3898 *
3899 */
3900static int
2d21ac55 3901hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
55e303ae
A
3902{
3903 struct buf *head_bp = NULL;
3904 struct buf *tail_bp = NULL;
3905 int error;
3906
3907
91447636 3908 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
55e303ae
A
3909 if (error)
3910 goto out;
3911
91447636 3912 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
55e303ae
A
3913 if (tail_bp == NULL) {
3914 error = EIO;
3915 goto out;
3916 }
91447636
A
3917 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
3918 error = (int)buf_bwrite(tail_bp);
55e303ae
A
3919out:
3920 if (head_bp) {
91447636
A
3921 buf_markinvalid(head_bp);
3922 buf_brelse(head_bp);
55e303ae 3923 }
91447636 3924 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
55e303ae
A
3925
3926 return (error);
3927}
3928
3929/*
3930 * Clone a file's data within the file.
3931 *
3932 */
3933static int
91447636 3934hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
55e303ae
A
3935{
3936 caddr_t bufp;
55e303ae
A
3937 size_t bufsize;
3938 size_t copysize;
3939 size_t iosize;
55e303ae 3940 size_t offset;
b0d623f7 3941 off_t writebase;
91447636
A
3942 uio_t auio;
3943 int error = 0;
55e303ae 3944
55e303ae
A
3945 writebase = blkstart * blksize;
3946 copysize = blkcnt * blksize;
0c530ab8 3947 iosize = bufsize = MIN(copysize, 128 * 1024);
55e303ae
A
3948 offset = 0;
3949
3950 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3951 return (ENOMEM);
3952 }
91447636 3953 hfs_unlock(VTOC(vp));
55e303ae 3954
b0d623f7 3955 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
55e303ae
A
3956
3957 while (offset < copysize) {
3958 iosize = MIN(copysize - offset, iosize);
3959
b0d623f7 3960 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
91447636 3961 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 3962
2d21ac55 3963 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
55e303ae
A
3964 if (error) {
3965 printf("hfs_clonefile: cluster_read failed - %d\n", error);
3966 break;
3967 }
91447636 3968 if (uio_resid(auio) != 0) {
b0d623f7 3969 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
55e303ae
A
3970 error = EIO;
3971 break;
3972 }
3973
b0d623f7 3974 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
91447636 3975 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 3976
b0d623f7
A
3977 error = cluster_write(vp, auio, writebase + offset,
3978 writebase + offset + iosize,
91447636 3979 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
55e303ae
A
3980 if (error) {
3981 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3982 break;
3983 }
91447636 3984 if (uio_resid(auio) != 0) {
55e303ae
A
3985 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3986 error = EIO;
3987 break;
3988 }
3989 offset += iosize;
3990 }
91447636
A
3991 uio_free(auio);
3992
b0d623f7
A
3993 if ((blksize & PAGE_MASK)) {
3994 /*
3995 * since the copy may not have started on a PAGE
3996 * boundary (or may not have ended on one), we
3997 * may have pages left in the cache since NOCACHE
3998 * will let partially written pages linger...
3999 * lets just flush the entire range to make sure
4000 * we don't have any pages left that are beyond
4001 * (or intersect) the real LEOF of this file
4002 */
4003 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
4004 } else {
4005 /*
4006 * No need to call ubc_sync_range or hfs_invalbuf
4007 * since the file was copied using IO_NOCACHE and
4008 * the copy was done starting and ending on a page
4009 * boundary in the file.
4010 */
4011 }
55e303ae 4012 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
91447636
A
4013
4014 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
55e303ae
A
4015 return (error);
4016}
4017
4018/*
4019 * Clone a system (metadata) file.
4020 *
4021 */
4022static int
4023hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
91447636 4024 kauth_cred_t cred, struct proc *p)
55e303ae
A
4025{
4026 caddr_t bufp;
4027 char * offset;
4028 size_t bufsize;
4029 size_t iosize;
4030 struct buf *bp = NULL;
91447636
A
4031 daddr64_t blkno;
4032 daddr64_t blk;
4033 daddr64_t start_blk;
4034 daddr64_t last_blk;
55e303ae
A
4035 int breadcnt;
4036 int i;
4037 int error = 0;
4038
4039
4040 iosize = GetLogicalBlockSize(vp);
4041 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
4042 breadcnt = bufsize / iosize;
4043
4044 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
4045 return (ENOMEM);
4046 }
91447636
A
4047 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
4048 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
55e303ae
A
4049 blkno = 0;
4050
91447636 4051 while (blkno < last_blk) {
55e303ae
A
4052 /*
4053 * Read up to a megabyte
4054 */
4055 offset = bufp;
91447636
A
4056 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
4057 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
55e303ae
A
4058 if (error) {
4059 printf("hfs_clonesysfile: meta_bread error %d\n", error);
4060 goto out;
4061 }
91447636
A
4062 if (buf_count(bp) != iosize) {
4063 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
55e303ae
A
4064 goto out;
4065 }
91447636
A
4066 bcopy((char *)buf_dataptr(bp), offset, iosize);
4067
4068 buf_markinvalid(bp);
4069 buf_brelse(bp);
55e303ae 4070 bp = NULL;
91447636 4071
55e303ae
A
4072 offset += iosize;
4073 }
4074
4075 /*
4076 * Write up to a megabyte
4077 */
4078 offset = bufp;
91447636
A
4079 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
4080 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
55e303ae 4081 if (bp == NULL) {
91447636 4082 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
55e303ae
A
4083 error = EIO;
4084 goto out;
4085 }
91447636
A
4086 bcopy(offset, (char *)buf_dataptr(bp), iosize);
4087 error = (int)buf_bwrite(bp);
55e303ae
A
4088 bp = NULL;
4089 if (error)
4090 goto out;
4091 offset += iosize;
4092 }
4093 }
4094out:
4095 if (bp) {
91447636 4096 buf_brelse(bp);
55e303ae
A
4097 }
4098
4099 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
4100
91447636 4101 error = hfs_fsync(vp, MNT_WAIT, 0, p);
55e303ae
A
4102
4103 return (error);
4104}