]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_readwrite.c
xnu-1504.9.37.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
CommitLineData
1c79356b 1/*
b0d623f7 2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* @(#)hfs_readwrite.c 1.0
29 *
9bccf70c 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
1c79356b 31 *
1c79356b
A
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
1c79356b
A
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/resourcevar.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
55e303ae 41#include <sys/filedesc.h>
1c79356b
A
42#include <sys/stat.h>
43#include <sys/buf.h>
44#include <sys/proc.h>
91447636 45#include <sys/kauth.h>
1c79356b 46#include <sys/vnode.h>
2d21ac55 47#include <sys/vnode_internal.h>
1c79356b 48#include <sys/uio.h>
91447636 49#include <sys/vfs_context.h>
2d21ac55
A
50#include <sys/fsevents.h>
51#include <kern/kalloc.h>
8f6c56a5
A
52#include <sys/disk.h>
53#include <sys/sysctl.h>
b0d623f7 54#include <sys/fsctl.h>
1c79356b
A
55
56#include <miscfs/specfs/specdev.h>
57
1c79356b 58#include <sys/ubc.h>
2d21ac55
A
59#include <sys/ubc_internal.h>
60
1c79356b 61#include <vm/vm_pageout.h>
91447636 62#include <vm/vm_kern.h>
1c79356b 63
1c79356b
A
64#include <sys/kdebug.h>
65
66#include "hfs.h"
2d21ac55 67#include "hfs_attrlist.h"
1c79356b 68#include "hfs_endian.h"
2d21ac55 69#include "hfs_fsctl.h"
9bccf70c 70#include "hfs_quota.h"
1c79356b
A
71#include "hfscommon/headers/FileMgrInternal.h"
72#include "hfscommon/headers/BTreesInternal.h"
9bccf70c
A
73#include "hfs_cnode.h"
74#include "hfs_dbg.h"
1c79356b 75
1c79356b
A
76#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
77
78enum {
79 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
80};
81
935ed37a 82/* from bsd/hfs/hfs_vfsops.c */
b0d623f7 83extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
91447636
A
84
85static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
86static int hfs_clonefile(struct vnode *, int, int, int);
87static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
b0d623f7
A
88static int hfs_minorupdate(struct vnode *vp);
89static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
90
55e303ae 91
8f6c56a5
A
92int flush_cache_on_write = 0;
93SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
94
95
91447636
A
96/*
97 * Read data from a file.
98 */
1c79356b 99int
91447636 100hfs_vnop_read(struct vnop_read_args *ap)
1c79356b 101{
91447636
A
102 uio_t uio = ap->a_uio;
103 struct vnode *vp = ap->a_vp;
9bccf70c
A
104 struct cnode *cp;
105 struct filefork *fp;
91447636
A
106 struct hfsmount *hfsmp;
107 off_t filesize;
108 off_t filebytes;
109 off_t start_resid = uio_resid(uio);
110 off_t offset = uio_offset(uio);
9bccf70c 111 int retval = 0;
55e303ae 112
9bccf70c 113 /* Preflight checks */
91447636
A
114 if (!vnode_isreg(vp)) {
115 /* can only read regular files */
116 if (vnode_isdir(vp))
117 return (EISDIR);
118 else
119 return (EPERM);
120 }
121 if (start_resid == 0)
9bccf70c 122 return (0); /* Nothing left to do */
91447636 123 if (offset < 0)
9bccf70c 124 return (EINVAL); /* cant read from a negative offset */
b0d623f7
A
125
126#if HFS_COMPRESSION
127 if (VNODE_IS_RSRC(vp)) {
128 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
129 return 0;
130 }
131 /* otherwise read the resource fork normally */
132 } else {
133 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
134 if (compressed) {
135 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
136 if (compressed) {
137 if (retval == 0) {
138 /* successful read, update the access time */
139 VTOC(vp)->c_touch_acctime = TRUE;
140
141 /* compressed files are not hot file candidates */
142 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
143 VTOF(vp)->ff_bytesread = 0;
144 }
145 }
146 return retval;
147 }
148 /* otherwise the file was converted back to a regular file while we were reading it */
149 retval = 0;
150 }
151 }
152#endif /* HFS_COMPRESSION */
9bccf70c
A
153
154 cp = VTOC(vp);
155 fp = VTOF(vp);
91447636
A
156 hfsmp = VTOHFS(vp);
157
158 /* Protect against a size change. */
159 hfs_lock_truncate(cp, 0);
160
9bccf70c 161 filesize = fp->ff_size;
91447636
A
162 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
163 if (offset > filesize) {
164 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
165 (offset > (off_t)MAXHFSFILESIZE)) {
166 retval = EFBIG;
167 }
168 goto exit;
9bccf70c 169 }
1c79356b 170
9bccf70c 171 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
91447636 172 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 173
2d21ac55 174 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
1c79356b 175
91447636 176 cp->c_touch_acctime = TRUE;
1c79356b 177
9bccf70c 178 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
91447636 179 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 180
55e303ae
A
181 /*
182 * Keep track blocks read
183 */
2d21ac55 184 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
91447636
A
185 int took_cnode_lock = 0;
186 off_t bytesread;
187
188 bytesread = start_resid - uio_resid(uio);
189
190 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
191 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
192 hfs_lock(cp, HFS_FORCE_LOCK);
193 took_cnode_lock = 1;
194 }
55e303ae
A
195 /*
196 * If this file hasn't been seen since the start of
197 * the current sampling period then start over.
198 */
2d21ac55 199 if (cp->c_atime < hfsmp->hfc_timebase) {
91447636
A
200 struct timeval tv;
201
202 fp->ff_bytesread = bytesread;
203 microtime(&tv);
204 cp->c_atime = tv.tv_sec;
55e303ae 205 } else {
91447636 206 fp->ff_bytesread += bytesread;
55e303ae 207 }
91447636
A
208 if (took_cnode_lock)
209 hfs_unlock(cp);
55e303ae 210 }
91447636 211exit:
2d21ac55 212 hfs_unlock_truncate(cp, 0);
9bccf70c 213 return (retval);
1c79356b
A
214}
215
216/*
91447636
A
217 * Write data to a file.
218 */
1c79356b 219int
91447636 220hfs_vnop_write(struct vnop_write_args *ap)
1c79356b 221{
91447636 222 uio_t uio = ap->a_uio;
9bccf70c 223 struct vnode *vp = ap->a_vp;
9bccf70c
A
224 struct cnode *cp;
225 struct filefork *fp;
91447636
A
226 struct hfsmount *hfsmp;
227 kauth_cred_t cred = NULL;
228 off_t origFileSize;
229 off_t writelimit;
2d21ac55 230 off_t bytesToAdd = 0;
55e303ae 231 off_t actualBytesAdded;
9bccf70c 232 off_t filebytes;
91447636 233 off_t offset;
b0d623f7 234 ssize_t resid;
91447636
A
235 int eflags;
236 int ioflag = ap->a_ioflag;
237 int retval = 0;
238 int lockflags;
239 int cnode_locked = 0;
2d21ac55
A
240 int partialwrite = 0;
241 int exclusive_lock = 0;
1c79356b 242
b0d623f7
A
243#if HFS_COMPRESSION
244 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
245 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
246 switch(state) {
247 case FILE_IS_COMPRESSED:
248 return EACCES;
249 case FILE_IS_CONVERTING:
250 /* if FILE_IS_CONVERTING, we allow writes */
251 break;
252 default:
253 printf("invalid state %d for compressed file\n", state);
254 /* fall through */
255 }
256 }
257#endif
258
91447636
A
259 // LP64todo - fix this! uio_resid may be 64-bit value
260 resid = uio_resid(uio);
261 offset = uio_offset(uio);
1c79356b 262
2d21ac55
A
263 if (ioflag & IO_APPEND) {
264 exclusive_lock = 1;
265 }
266
91447636 267 if (offset < 0)
9bccf70c 268 return (EINVAL);
91447636 269 if (resid == 0)
9bccf70c 270 return (E_NONE);
91447636
A
271 if (!vnode_isreg(vp))
272 return (EPERM); /* Can only write regular files */
273
9bccf70c
A
274 cp = VTOC(vp);
275 fp = VTOF(vp);
91447636 276 hfsmp = VTOHFS(vp);
b4c24cb9 277
9bccf70c 278 eflags = kEFDeferMask; /* defer file block allocations */
55e303ae
A
279#ifdef HFS_SPARSE_DEV
280 /*
281 * When the underlying device is sparse and space
282 * is low (< 8MB), stop doing delayed allocations
283 * and begin doing synchronous I/O.
284 */
285 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
286 (hfs_freeblks(hfsmp, 0) < 2048)) {
287 eflags &= ~kEFDeferMask;
288 ioflag |= IO_SYNC;
289 }
290#endif /* HFS_SPARSE_DEV */
291
2d21ac55
A
292again:
293 /* Protect against a size change. */
294 hfs_lock_truncate(cp, exclusive_lock);
91447636 295
2d21ac55
A
296 if (ioflag & IO_APPEND) {
297 uio_setoffset(uio, fp->ff_size);
298 offset = fp->ff_size;
299 }
300 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
301 retval = EPERM;
302 goto exit;
303 }
91447636 304
2d21ac55 305 origFileSize = fp->ff_size;
91447636 306 writelimit = offset + resid;
2d21ac55
A
307 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
308
309 /* If the truncate lock is shared, and if we either have virtual
310 * blocks or will need to extend the file, upgrade the truncate
311 * to exclusive lock. If upgrade fails, we lose the lock and
b0d623f7
A
312 * have to get exclusive lock again. Note that we want to
313 * grab the truncate lock exclusive even if we're not allocating new blocks
314 * because we could still be growing past the LEOF.
2d21ac55
A
315 */
316 if ((exclusive_lock == 0) &&
b0d623f7 317 ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
2d21ac55
A
318 exclusive_lock = 1;
319 /* Lock upgrade failed and we lost our shared lock, try again */
320 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
321 goto again;
322 }
323 }
324
325 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
326 goto exit;
327 }
328 cnode_locked = 1;
329
330 if (!exclusive_lock) {
331 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
332 (int)offset, uio_resid(uio), (int)fp->ff_size,
333 (int)filebytes, 0);
334 }
335
336 /* Check if we do not need to extend the file */
337 if (writelimit <= filebytes) {
91447636 338 goto sizeok;
2d21ac55 339 }
91447636
A
340
341 cred = vfs_context_ucred(ap->a_context);
91447636 342 bytesToAdd = writelimit - filebytes;
2d21ac55
A
343
344#if QUOTA
91447636
A
345 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
346 cred, 0);
347 if (retval)
348 goto exit;
349#endif /* QUOTA */
350
351 if (hfs_start_transaction(hfsmp) != 0) {
352 retval = EINVAL;
353 goto exit;
b4c24cb9
A
354 }
355
9bccf70c 356 while (writelimit > filebytes) {
9bccf70c 357 bytesToAdd = writelimit - filebytes;
91447636 358 if (cred && suser(cred, NULL) != 0)
9bccf70c
A
359 eflags |= kEFReserveMask;
360
91447636
A
361 /* Protect extents b-tree and allocation bitmap */
362 lockflags = SFL_BITMAP;
363 if (overflow_extents(fp))
364 lockflags |= SFL_EXTENTS;
365 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae
A
366
367 /* Files that are changing size are not hot file candidates. */
368 if (hfsmp->hfc_stage == HFC_RECORDING) {
369 fp->ff_bytesread = 0;
370 }
91447636 371 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
9bccf70c
A
372 0, eflags, &actualBytesAdded));
373
91447636
A
374 hfs_systemfile_unlock(hfsmp, lockflags);
375
9bccf70c
A
376 if ((actualBytesAdded == 0) && (retval == E_NONE))
377 retval = ENOSPC;
378 if (retval != E_NONE)
379 break;
91447636 380 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
9bccf70c 381 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
91447636 382 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
b4c24cb9 383 }
91447636
A
384 (void) hfs_update(vp, TRUE);
385 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
386 (void) hfs_end_transaction(hfsmp);
b4c24cb9 387
2d21ac55
A
388 /*
389 * If we didn't grow the file enough try a partial write.
390 * POSIX expects this behavior.
391 */
392 if ((retval == ENOSPC) && (filebytes > offset)) {
393 retval = 0;
394 partialwrite = 1;
395 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
396 resid -= bytesToAdd;
397 writelimit = filebytes;
398 }
91447636 399sizeok:
55e303ae 400 if (retval == E_NONE) {
0b4e3aa0
A
401 off_t filesize;
402 off_t zero_off;
403 off_t tail_off;
404 off_t inval_start;
405 off_t inval_end;
91447636 406 off_t io_start;
0b4e3aa0
A
407 int lflag;
408 struct rl_entry *invalid_range;
409
9bccf70c 410 if (writelimit > fp->ff_size)
0b4e3aa0
A
411 filesize = writelimit;
412 else
9bccf70c 413 filesize = fp->ff_size;
1c79356b 414
2d21ac55 415 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
1c79356b 416
91447636
A
417 if (offset <= fp->ff_size) {
418 zero_off = offset & ~PAGE_MASK_64;
0b4e3aa0
A
419
420 /* Check to see whether the area between the zero_offset and the start
421 of the transfer to see whether is invalid and should be zero-filled
422 as part of the transfer:
423 */
91447636
A
424 if (offset > zero_off) {
425 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
55e303ae
A
426 lflag |= IO_HEADZEROFILL;
427 }
0b4e3aa0 428 } else {
9bccf70c 429 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
0b4e3aa0 430
9bccf70c 431 /* The bytes between fp->ff_size and uio->uio_offset must never be
0b4e3aa0
A
432 read without being zeroed. The current last block is filled with zeroes
433 if it holds valid data but in all cases merely do a little bookkeeping
434 to track the area from the end of the current last page to the start of
435 the area actually written. For the same reason only the bytes up to the
436 start of the page where this write will start is invalidated; any remainder
437 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
438
439 Note that inval_start, the start of the page after the current EOF,
440 may be past the start of the write, in which case the zeroing
441 will be handled by the cluser_write of the actual data.
442 */
9bccf70c 443 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
91447636 444 inval_end = offset & ~PAGE_MASK_64;
9bccf70c 445 zero_off = fp->ff_size;
0b4e3aa0 446
9bccf70c
A
447 if ((fp->ff_size & PAGE_MASK_64) &&
448 (rl_scan(&fp->ff_invalidranges,
0b4e3aa0 449 eof_page_base,
9bccf70c 450 fp->ff_size - 1,
0b4e3aa0
A
451 &invalid_range) != RL_NOOVERLAP)) {
452 /* The page containing the EOF is not valid, so the
453 entire page must be made inaccessible now. If the write
454 starts on a page beyond the page containing the eof
455 (inval_end > eof_page_base), add the
456 whole page to the range to be invalidated. Otherwise
457 (i.e. if the write starts on the same page), zero-fill
458 the entire page explicitly now:
459 */
460 if (inval_end > eof_page_base) {
461 inval_start = eof_page_base;
462 } else {
463 zero_off = eof_page_base;
464 };
465 };
466
467 if (inval_start < inval_end) {
91447636 468 struct timeval tv;
0b4e3aa0
A
469 /* There's some range of data that's going to be marked invalid */
470
471 if (zero_off < inval_start) {
472 /* The pages between inval_start and inval_end are going to be invalidated,
473 and the actual write will start on a page past inval_end. Now's the last
474 chance to zero-fill the page containing the EOF:
475 */
91447636
A
476 hfs_unlock(cp);
477 cnode_locked = 0;
478 retval = cluster_write(vp, (uio_t) 0,
9bccf70c 479 fp->ff_size, inval_start,
91447636 480 zero_off, (off_t)0,
9bccf70c 481 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
91447636
A
482 hfs_lock(cp, HFS_FORCE_LOCK);
483 cnode_locked = 1;
0b4e3aa0 484 if (retval) goto ioerr_exit;
91447636 485 offset = uio_offset(uio);
0b4e3aa0
A
486 };
487
488 /* Mark the remaining area of the newly allocated space as invalid: */
9bccf70c 489 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
91447636
A
490 microuptime(&tv);
491 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c 492 zero_off = fp->ff_size = inval_end;
0b4e3aa0
A
493 };
494
91447636 495 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
0b4e3aa0 496 };
1c79356b 497
0b4e3aa0
A
498 /* Check to see whether the area between the end of the write and the end of
499 the page it falls in is invalid and should be zero-filled as part of the transfer:
500 */
501 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
502 if (tail_off > filesize) tail_off = filesize;
503 if (tail_off > writelimit) {
9bccf70c 504 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
0b4e3aa0
A
505 lflag |= IO_TAILZEROFILL;
506 };
507 };
508
509 /*
510 * if the write starts beyond the current EOF (possibly advanced in the
511 * zeroing of the last block, above), then we'll zero fill from the current EOF
512 * to where the write begins:
513 *
514 * NOTE: If (and ONLY if) the portion of the file about to be written is
515 * before the current EOF it might be marked as invalid now and must be
516 * made readable (removed from the invalid ranges) before cluster_write
517 * tries to write it:
518 */
91447636 519 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
9bccf70c 520 if (io_start < fp->ff_size) {
91447636
A
521 off_t io_end;
522
523 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
9bccf70c 524 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
0b4e3aa0 525 };
91447636
A
526
527 hfs_unlock(cp);
528 cnode_locked = 0;
593a1d5f
A
529
530 /*
531 * We need to tell UBC the fork's new size BEFORE calling
532 * cluster_write, in case any of the new pages need to be
533 * paged out before cluster_write completes (which does happen
534 * in embedded systems due to extreme memory pressure).
535 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
536 * will be, so that it can pass that on to cluster_pageout, and
537 * allow those pageouts.
538 *
539 * We don't update ff_size yet since we don't want pageins to
540 * be able to see uninitialized data between the old and new
541 * EOF, until cluster_write has completed and initialized that
542 * part of the file.
543 *
544 * The vnode pager relies on the file size last given to UBC via
545 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
546 * ff_size (whichever is larger). NOTE: ff_new_size is always
547 * zero, unless we are extending the file via write.
548 */
549 if (filesize > fp->ff_size) {
550 fp->ff_new_size = filesize;
551 ubc_setsize(vp, filesize);
552 }
9bccf70c 553 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
91447636 554 tail_off, lflag | IO_NOZERODIRTY);
2d21ac55 555 if (retval) {
593a1d5f
A
556 fp->ff_new_size = 0; /* no longer extending; use ff_size */
557 if (filesize > origFileSize) {
558 ubc_setsize(vp, origFileSize);
559 }
2d21ac55
A
560 goto ioerr_exit;
561 }
593a1d5f
A
562
563 if (filesize > origFileSize) {
564 fp->ff_size = filesize;
565
91447636 566 /* Files that are changing size are not hot file candidates. */
593a1d5f 567 if (hfsmp->hfc_stage == HFC_RECORDING) {
91447636 568 fp->ff_bytesread = 0;
593a1d5f 569 }
91447636 570 }
593a1d5f
A
571 fp->ff_new_size = 0; /* ff_size now has the correct size */
572
573 /* If we wrote some bytes, then touch the change and mod times */
91447636
A
574 if (resid > uio_resid(uio)) {
575 cp->c_touch_chgtime = TRUE;
576 cp->c_touch_modtime = TRUE;
0b4e3aa0 577 }
9bccf70c 578 }
2d21ac55
A
579 if (partialwrite) {
580 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
581 resid += bytesToAdd;
582 }
8f6c56a5 583
2d21ac55 584 // XXXdbg - see radar 4871353 for more info
8f6c56a5
A
585 {
586 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
587 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
588 }
589 }
55e303ae 590
0b4e3aa0 591ioerr_exit:
9bccf70c 592 /*
0b4e3aa0 593 * If we successfully wrote any data, and we are not the superuser
9bccf70c
A
594 * we clear the setuid and setgid bits as a precaution against
595 * tampering.
596 */
91447636
A
597 if (cp->c_mode & (S_ISUID | S_ISGID)) {
598 cred = vfs_context_ucred(ap->a_context);
599 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
600 if (!cnode_locked) {
601 hfs_lock(cp, HFS_FORCE_LOCK);
602 cnode_locked = 1;
603 }
604 cp->c_mode &= ~(S_ISUID | S_ISGID);
605 }
606 }
9bccf70c
A
607 if (retval) {
608 if (ioflag & IO_UNIT) {
91447636
A
609 if (!cnode_locked) {
610 hfs_lock(cp, HFS_FORCE_LOCK);
611 cnode_locked = 1;
612 }
613 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
b0d623f7 614 0, 0, ap->a_context);
91447636
A
615 // LP64todo - fix this! resid needs to by user_ssize_t
616 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
617 uio_setresid(uio, resid);
618 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
619 }
620 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
621 if (!cnode_locked) {
622 hfs_lock(cp, HFS_FORCE_LOCK);
623 cnode_locked = 1;
9bccf70c 624 }
91447636 625 retval = hfs_update(vp, TRUE);
9bccf70c 626 }
91447636
A
627 /* Updating vcbWrCnt doesn't need to be atomic. */
628 hfsmp->vcbWrCnt++;
1c79356b 629
9bccf70c 630 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
91447636
A
631 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
632exit:
633 if (cnode_locked)
634 hfs_unlock(cp);
2d21ac55 635 hfs_unlock_truncate(cp, exclusive_lock);
9bccf70c 636 return (retval);
1c79356b
A
637}
638
91447636 639/* support for the "bulk-access" fcntl */
1c79356b 640
91447636 641#define CACHE_LEVELS 16
2d21ac55 642#define NUM_CACHE_ENTRIES (64*16)
91447636
A
643#define PARENT_IDS_FLAG 0x100
644
91447636
A
645struct access_cache {
646 int numcached;
647 int cachehits; /* these two for statistics gathering */
648 int lookups;
649 unsigned int *acache;
2d21ac55 650 unsigned char *haveaccess;
55e303ae
A
651};
652
91447636
A
653struct access_t {
654 uid_t uid; /* IN: effective user id */
655 short flags; /* IN: access requested (i.e. R_OK) */
656 short num_groups; /* IN: number of groups user belongs to */
657 int num_files; /* IN: number of files to process */
658 int *file_ids; /* IN: array of file ids */
659 gid_t *groups; /* IN: array of groups */
660 short *access; /* OUT: access info for each file (0 for 'has access') */
b0d623f7
A
661} __attribute__((unavailable)); // this structure is for reference purposes only
662
663struct user32_access_t {
664 uid_t uid; /* IN: effective user id */
665 short flags; /* IN: access requested (i.e. R_OK) */
666 short num_groups; /* IN: number of groups user belongs to */
667 int num_files; /* IN: number of files to process */
668 user32_addr_t file_ids; /* IN: array of file ids */
669 user32_addr_t groups; /* IN: array of groups */
670 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 671};
55e303ae 672
b0d623f7 673struct user64_access_t {
91447636
A
674 uid_t uid; /* IN: effective user id */
675 short flags; /* IN: access requested (i.e. R_OK) */
676 short num_groups; /* IN: number of groups user belongs to */
2d21ac55 677 int num_files; /* IN: number of files to process */
b0d623f7
A
678 user64_addr_t file_ids; /* IN: array of file ids */
679 user64_addr_t groups; /* IN: array of groups */
680 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 681};
55e303ae 682
2d21ac55
A
683
684// these are the "extended" versions of the above structures
685// note that it is crucial that they be different sized than
686// the regular version
687struct ext_access_t {
688 uint32_t flags; /* IN: access requested (i.e. R_OK) */
689 uint32_t num_files; /* IN: number of files to process */
690 uint32_t map_size; /* IN: size of the bit map */
691 uint32_t *file_ids; /* IN: Array of file ids */
692 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
693 short *access; /* OUT: access info for each file (0 for 'has access') */
694 uint32_t num_parents; /* future use */
695 cnid_t *parents; /* future use */
b0d623f7
A
696} __attribute__((unavailable)); // this structure is for reference purposes only
697
698struct user32_ext_access_t {
699 uint32_t flags; /* IN: access requested (i.e. R_OK) */
700 uint32_t num_files; /* IN: number of files to process */
701 uint32_t map_size; /* IN: size of the bit map */
702 user32_addr_t file_ids; /* IN: Array of file ids */
703 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
704 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
705 uint32_t num_parents; /* future use */
706 user32_addr_t parents; /* future use */
2d21ac55
A
707};
708
b0d623f7 709struct user64_ext_access_t {
2d21ac55
A
710 uint32_t flags; /* IN: access requested (i.e. R_OK) */
711 uint32_t num_files; /* IN: number of files to process */
712 uint32_t map_size; /* IN: size of the bit map */
b0d623f7
A
713 user64_addr_t file_ids; /* IN: array of file ids */
714 user64_addr_t bitmap; /* IN: array of groups */
715 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
2d21ac55 716 uint32_t num_parents;/* future use */
b0d623f7 717 user64_addr_t parents;/* future use */
2d21ac55
A
718};
719
720
91447636
A
721/*
722 * Perform a binary search for the given parent_id. Return value is
2d21ac55
A
723 * the index if there is a match. If no_match_indexp is non-NULL it
724 * will be assigned with the index to insert the item (even if it was
725 * not found).
91447636 726 */
2d21ac55 727static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
91447636 728{
2d21ac55
A
729 int index=-1;
730 unsigned int lo=0;
91447636 731
2d21ac55
A
732 do {
733 unsigned int mid = ((hi - lo)/2) + lo;
734 unsigned int this_id = array[mid];
735
736 if (parent_id == this_id) {
737 hi = mid;
738 break;
91447636 739 }
2d21ac55
A
740
741 if (parent_id < this_id) {
742 hi = mid;
743 continue;
91447636 744 }
2d21ac55
A
745
746 if (parent_id > this_id) {
747 lo = mid + 1;
748 continue;
749 }
750 } while(lo < hi);
751
752 /* check if lo and hi converged on the match */
753 if (parent_id == array[hi]) {
754 index = hi;
755 }
91447636 756
2d21ac55
A
757 if (no_match_indexp) {
758 *no_match_indexp = hi;
759 }
760
761 return index;
762}
763
764
765static int
766lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
767{
768 unsigned int hi;
769 int matches = 0;
770 int index, no_match_index;
91447636 771
2d21ac55
A
772 if (cache->numcached == 0) {
773 *indexp = 0;
774 return 0; // table is empty, so insert at index=0 and report no match
775 }
91447636 776
2d21ac55 777 if (cache->numcached > NUM_CACHE_ENTRIES) {
b0d623f7 778 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
2d21ac55
A
779 cache->numcached, NUM_CACHE_ENTRIES);*/
780 cache->numcached = NUM_CACHE_ENTRIES;
781 }
91447636 782
2d21ac55 783 hi = cache->numcached - 1;
91447636 784
2d21ac55
A
785 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
786
787 /* if no existing entry found, find index for new one */
788 if (index == -1) {
789 index = no_match_index;
790 matches = 0;
791 } else {
792 matches = 1;
793 }
794
795 *indexp = index;
796 return matches;
91447636
A
797}
798
799/*
800 * Add a node to the access_cache at the given index (or do a lookup first
801 * to find the index if -1 is passed in). We currently do a replace rather
802 * than an insert if the cache is full.
803 */
804static void
805add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
806{
2d21ac55
A
807 int lookup_index = -1;
808
809 /* need to do a lookup first if -1 passed for index */
810 if (index == -1) {
811 if (lookup_bucket(cache, &lookup_index, nodeID)) {
812 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
813 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
814 cache->haveaccess[lookup_index] = access;
815 }
816
817 /* mission accomplished */
818 return;
819 } else {
820 index = lookup_index;
821 }
822
823 }
824
825 /* if the cache is full, do a replace rather than an insert */
826 if (cache->numcached >= NUM_CACHE_ENTRIES) {
b0d623f7 827 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
2d21ac55
A
828 cache->numcached = NUM_CACHE_ENTRIES-1;
829
830 if (index > cache->numcached) {
b0d623f7 831 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
2d21ac55
A
832 index = cache->numcached;
833 }
834 }
835
836 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
837 index++;
838 }
839
840 if (index >= 0 && index < cache->numcached) {
841 /* only do bcopy if we're inserting */
842 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
843 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
844 }
845
846 cache->acache[index] = nodeID;
847 cache->haveaccess[index] = access;
848 cache->numcached++;
91447636
A
849}
850
851
852struct cinfo {
2d21ac55
A
853 uid_t uid;
854 gid_t gid;
855 mode_t mode;
856 cnid_t parentcnid;
857 u_int16_t recflags;
91447636
A
858};
859
860static int
861snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
862{
2d21ac55 863 struct cinfo *cip = (struct cinfo *)arg;
91447636 864
2d21ac55
A
865 cip->uid = attrp->ca_uid;
866 cip->gid = attrp->ca_gid;
867 cip->mode = attrp->ca_mode;
868 cip->parentcnid = descp->cd_parentcnid;
869 cip->recflags = attrp->ca_recflags;
91447636 870
2d21ac55 871 return (0);
91447636
A
872}
873
874/*
875 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
876 * isn't incore, then go to the catalog.
877 */
878static int
b0d623f7 879do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
2d21ac55 880 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
91447636 881{
2d21ac55
A
882 int error = 0;
883
884 /* if this id matches the one the fsctl was called with, skip the lookup */
885 if (cnid == skip_cp->c_cnid) {
886 cnattrp->ca_uid = skip_cp->c_uid;
887 cnattrp->ca_gid = skip_cp->c_gid;
888 cnattrp->ca_mode = skip_cp->c_mode;
b0d623f7 889 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
2d21ac55
A
890 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
891 } else {
892 struct cinfo c_info;
893
894 /* otherwise, check the cnode hash incase the file/dir is incore */
b0d623f7 895 if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
2d21ac55
A
896 cnattrp->ca_uid = c_info.uid;
897 cnattrp->ca_gid = c_info.gid;
898 cnattrp->ca_mode = c_info.mode;
899 cnattrp->ca_recflags = c_info.recflags;
900 keyp->hfsPlus.parentID = c_info.parentcnid;
91447636 901 } else {
2d21ac55 902 int lockflags;
91447636 903
2d21ac55 904 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
91447636 905
2d21ac55
A
906 /* lookup this cnid in the catalog */
907 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
91447636 908
2d21ac55 909 hfs_systemfile_unlock(hfsmp, lockflags);
91447636 910
2d21ac55 911 cache->lookups++;
91447636 912 }
2d21ac55 913 }
91447636 914
2d21ac55 915 return (error);
91447636 916}
55e303ae 917
2d21ac55 918
1c79356b 919/*
91447636
A
920 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
921 * up to CACHE_LEVELS as we progress towards the root.
922 */
923static int
924do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
b0d623f7 925 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
2d21ac55
A
926 struct vfs_context *my_context,
927 char *bitmap,
928 uint32_t map_size,
929 cnid_t* parents,
930 uint32_t num_parents)
91447636 931{
2d21ac55
A
932 int myErr = 0;
933 int myResult;
934 HFSCatalogNodeID thisNodeID;
935 unsigned int myPerms;
936 struct cat_attr cnattr;
937 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
938 CatalogKey catkey;
939
940 int i = 0, ids_to_cache = 0;
941 int parent_ids[CACHE_LEVELS];
942
943 thisNodeID = nodeID;
944 while (thisNodeID >= kRootDirID) {
945 myResult = 0; /* default to "no access" */
91447636 946
2d21ac55
A
947 /* check the cache before resorting to hitting the catalog */
948
949 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
950 * to look any further after hitting cached dir */
951
952 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
953 cache->cachehits++;
954 myErr = cache->haveaccess[cache_index];
955 if (scope_index != -1) {
956 if (myErr == ESRCH) {
957 myErr = 0;
958 }
959 } else {
960 scope_index = 0; // so we'll just use the cache result
961 scope_idx_start = ids_to_cache;
962 }
963 myResult = (myErr == 0) ? 1 : 0;
964 goto ExitThisRoutine;
965 }
966
967
968 if (parents) {
969 int tmp;
970 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
971 if (scope_index == -1)
972 scope_index = tmp;
973 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
974 scope_idx_start = ids_to_cache;
975 }
976 }
977
978 /* remember which parents we want to cache */
979 if (ids_to_cache < CACHE_LEVELS) {
980 parent_ids[ids_to_cache] = thisNodeID;
981 ids_to_cache++;
982 }
983 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
984 if (bitmap && map_size) {
985 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
986 }
987
988
989 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 990 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
2d21ac55
A
991 if (myErr) {
992 goto ExitThisRoutine; /* no access */
993 }
994
995 /* Root always gets access. */
996 if (suser(myp_ucred, NULL) == 0) {
997 thisNodeID = catkey.hfsPlus.parentID;
998 myResult = 1;
999 continue;
1000 }
1001
1002 // if the thing has acl's, do the full permission check
1003 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1004 struct vnode *vp;
1005
1006 /* get the vnode for this cnid */
1007 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
1008 if ( myErr ) {
1009 myResult = 0;
1010 goto ExitThisRoutine;
1011 }
1012
1013 thisNodeID = VTOC(vp)->c_parentcnid;
1014
1015 hfs_unlock(VTOC(vp));
1016
1017 if (vnode_vtype(vp) == VDIR) {
1018 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1019 } else {
1020 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1021 }
1022
1023 vnode_put(vp);
1024 if (myErr) {
1025 myResult = 0;
1026 goto ExitThisRoutine;
1027 }
1028 } else {
1029 unsigned int flags;
1030
1031 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1032 cnattr.ca_mode, hfsmp->hfs_mp,
1033 myp_ucred, theProcPtr);
1034
1035 if (cnattr.ca_mode & S_IFDIR) {
1036 flags = R_OK | X_OK;
1037 } else {
1038 flags = R_OK;
1039 }
1040 if ( (myPerms & flags) != flags) {
1041 myResult = 0;
1042 myErr = EACCES;
1043 goto ExitThisRoutine; /* no access */
1044 }
1045
1046 /* up the hierarchy we go */
1047 thisNodeID = catkey.hfsPlus.parentID;
1048 }
1049 }
1050
1051 /* if here, we have access to this node */
1052 myResult = 1;
1053
1054 ExitThisRoutine:
1055 if (parents && myErr == 0 && scope_index == -1) {
1056 myErr = ESRCH;
1057 }
1058
1059 if (myErr) {
1060 myResult = 0;
1061 }
1062 *err = myErr;
1063
1064 /* cache the parent directory(ies) */
1065 for (i = 0; i < ids_to_cache; i++) {
1066 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1067 add_node(cache, -1, parent_ids[i], ESRCH);
1068 } else {
1069 add_node(cache, -1, parent_ids[i], myErr);
1070 }
1071 }
1072
1073 return (myResult);
91447636 1074}
1c79356b 1075
2d21ac55
A
1076static int
1077do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1078 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1079{
1080 boolean_t is64bit;
1081
1082 /*
1083 * NOTE: on entry, the vnode is locked. Incase this vnode
1084 * happens to be in our list of file_ids, we'll note it
1085 * avoid calling hfs_chashget_nowait() on that id as that
1086 * will cause a "locking against myself" panic.
1087 */
1088 Boolean check_leaf = true;
1089
b0d623f7
A
1090 struct user64_ext_access_t *user_access_structp;
1091 struct user64_ext_access_t tmp_user_access;
2d21ac55
A
1092 struct access_cache cache;
1093
b0d623f7 1094 int error = 0, prev_parent_check_ok=1;
2d21ac55
A
1095 unsigned int i;
1096
2d21ac55
A
1097 short flags;
1098 unsigned int num_files = 0;
1099 int map_size = 0;
1100 int num_parents = 0;
1101 int *file_ids=NULL;
1102 short *access=NULL;
1103 char *bitmap=NULL;
1104 cnid_t *parents=NULL;
1105 int leaf_index;
1106
1107 cnid_t cnid;
1108 cnid_t prevParent_cnid = 0;
1109 unsigned int myPerms;
1110 short myaccess = 0;
1111 struct cat_attr cnattr;
1112 CatalogKey catkey;
1113 struct cnode *skip_cp = VTOC(vp);
1114 kauth_cred_t cred = vfs_context_ucred(context);
1115 proc_t p = vfs_context_proc(context);
1116
1117 is64bit = proc_is64bit(p);
1118
1119 /* initialize the local cache and buffers */
1120 cache.numcached = 0;
1121 cache.cachehits = 0;
1122 cache.lookups = 0;
1123 cache.acache = NULL;
1124 cache.haveaccess = NULL;
1125
1126 /* struct copyin done during dispatch... need to copy file_id array separately */
1127 if (ap->a_data == NULL) {
1128 error = EINVAL;
1129 goto err_exit_bulk_access;
1130 }
1131
1132 if (is64bit) {
b0d623f7 1133 if (arg_size != sizeof(struct user64_ext_access_t)) {
2d21ac55
A
1134 error = EINVAL;
1135 goto err_exit_bulk_access;
1136 }
1137
b0d623f7 1138 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
2d21ac55 1139
b0d623f7
A
1140 } else if (arg_size == sizeof(struct user32_access_t)) {
1141 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
2d21ac55
A
1142
1143 // convert an old style bulk-access struct to the new style
1144 tmp_user_access.flags = accessp->flags;
1145 tmp_user_access.num_files = accessp->num_files;
1146 tmp_user_access.map_size = 0;
1147 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
cf7d32b8 1148 tmp_user_access.bitmap = USER_ADDR_NULL;
2d21ac55
A
1149 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1150 tmp_user_access.num_parents = 0;
1151 user_access_structp = &tmp_user_access;
1152
b0d623f7
A
1153 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1154 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
2d21ac55
A
1155
1156 // up-cast from a 32-bit version of the struct
1157 tmp_user_access.flags = accessp->flags;
1158 tmp_user_access.num_files = accessp->num_files;
1159 tmp_user_access.map_size = accessp->map_size;
1160 tmp_user_access.num_parents = accessp->num_parents;
1161
1162 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1163 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1164 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1165 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1166
1167 user_access_structp = &tmp_user_access;
1168 } else {
1169 error = EINVAL;
1170 goto err_exit_bulk_access;
1171 }
1172
1173 map_size = user_access_structp->map_size;
1174
1175 num_files = user_access_structp->num_files;
1176
1177 num_parents= user_access_structp->num_parents;
1178
1179 if (num_files < 1) {
1180 goto err_exit_bulk_access;
1181 }
1182 if (num_files > 1024) {
1183 error = EINVAL;
1184 goto err_exit_bulk_access;
1185 }
1186
1187 if (num_parents > 1024) {
1188 error = EINVAL;
1189 goto err_exit_bulk_access;
1190 }
1191
1192 file_ids = (int *) kalloc(sizeof(int) * num_files);
1193 access = (short *) kalloc(sizeof(short) * num_files);
1194 if (map_size) {
1195 bitmap = (char *) kalloc(sizeof(char) * map_size);
1196 }
1197
1198 if (num_parents) {
1199 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1200 }
1201
1202 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1203 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1204
1205 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1206 if (file_ids) {
1207 kfree(file_ids, sizeof(int) * num_files);
1208 }
1209 if (bitmap) {
1210 kfree(bitmap, sizeof(char) * map_size);
1211 }
1212 if (access) {
1213 kfree(access, sizeof(short) * num_files);
1214 }
1215 if (cache.acache) {
1216 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1217 }
1218 if (cache.haveaccess) {
1219 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1220 }
1221 if (parents) {
1222 kfree(parents, sizeof(cnid_t) * num_parents);
1223 }
1224 return ENOMEM;
1225 }
1226
1227 // make sure the bitmap is zero'ed out...
1228 if (bitmap) {
1229 bzero(bitmap, (sizeof(char) * map_size));
1230 }
1231
1232 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1233 num_files * sizeof(int)))) {
1234 goto err_exit_bulk_access;
1235 }
1236
1237 if (num_parents) {
1238 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1239 num_parents * sizeof(cnid_t)))) {
1240 goto err_exit_bulk_access;
1241 }
1242 }
1243
1244 flags = user_access_structp->flags;
1245 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1246 flags = R_OK;
1247 }
1248
1249 /* check if we've been passed leaf node ids or parent ids */
1250 if (flags & PARENT_IDS_FLAG) {
1251 check_leaf = false;
1252 }
1253
1254 /* Check access to each file_id passed in */
1255 for (i = 0; i < num_files; i++) {
1256 leaf_index=-1;
1257 cnid = (cnid_t) file_ids[i];
1258
1259 /* root always has access */
1260 if ((!parents) && (!suser(cred, NULL))) {
1261 access[i] = 0;
1262 continue;
1263 }
1264
1265 if (check_leaf) {
1266 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 1267 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
2d21ac55
A
1268 if (error) {
1269 access[i] = (short) error;
1270 continue;
1271 }
1272
1273 if (parents) {
1274 // Check if the leaf matches one of the parent scopes
1275 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
b0d623f7
A
1276 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1277 prev_parent_check_ok = 0;
1278 else if (leaf_index >= 0)
1279 prev_parent_check_ok = 1;
2d21ac55
A
1280 }
1281
1282 // if the thing has acl's, do the full permission check
1283 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1284 struct vnode *cvp;
1285 int myErr = 0;
1286 /* get the vnode for this cnid */
1287 myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
1288 if ( myErr ) {
1289 access[i] = myErr;
1290 continue;
1291 }
1292
1293 hfs_unlock(VTOC(cvp));
1294
1295 if (vnode_vtype(cvp) == VDIR) {
1296 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1297 } else {
1298 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1299 }
1300
1301 vnode_put(cvp);
1302 if (myErr) {
1303 access[i] = myErr;
1304 continue;
1305 }
1306 } else {
1307 /* before calling CheckAccess(), check the target file for read access */
1308 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1309 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1310
1311 /* fail fast if no access */
1312 if ((myPerms & flags) == 0) {
1313 access[i] = EACCES;
1314 continue;
1315 }
1316 }
1317 } else {
1318 /* we were passed an array of parent ids */
1319 catkey.hfsPlus.parentID = cnid;
1320 }
1321
1322 /* if the last guy had the same parent and had access, we're done */
b0d623f7 1323 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
2d21ac55
A
1324 cache.cachehits++;
1325 access[i] = 0;
1326 continue;
1327 }
1328
1329 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
b0d623f7 1330 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
2d21ac55
A
1331
1332 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1333 access[i] = 0; // have access.. no errors to report
1334 } else {
1335 access[i] = (error != 0 ? (short) error : EACCES);
1336 }
1337
1338 prevParent_cnid = catkey.hfsPlus.parentID;
1339 }
1340
1341 /* copyout the access array */
1342 if ((error = copyout((caddr_t)access, user_access_structp->access,
1343 num_files * sizeof (short)))) {
1344 goto err_exit_bulk_access;
1345 }
1346 if (map_size && bitmap) {
1347 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1348 map_size * sizeof (char)))) {
1349 goto err_exit_bulk_access;
1350 }
1351 }
1352
1353
1354 err_exit_bulk_access:
1355
b0d623f7 1356 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
2d21ac55
A
1357
1358 if (file_ids)
1359 kfree(file_ids, sizeof(int) * num_files);
1360 if (parents)
1361 kfree(parents, sizeof(cnid_t) * num_parents);
1362 if (bitmap)
1363 kfree(bitmap, sizeof(char) * map_size);
1364 if (access)
1365 kfree(access, sizeof(short) * num_files);
1366 if (cache.acache)
1367 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1368 if (cache.haveaccess)
1369 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1370
1371 return (error);
1372}
1373
1374
1375/* end "bulk-access" support */
1c79356b 1376
1c79356b 1377
91447636
A
1378/*
1379 * Callback for use with freeze ioctl.
1380 */
1381static int
2d21ac55 1382hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
91447636
A
1383{
1384 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1385
1386 return 0;
1387}
1c79356b 1388
91447636
A
1389/*
1390 * Control filesystem operating characteristics.
1391 */
1c79356b 1392int
91447636
A
1393hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1394 vnode_t a_vp;
9bccf70c
A
1395 int a_command;
1396 caddr_t a_data;
1397 int a_fflag;
91447636
A
1398 vfs_context_t a_context;
1399 } */ *ap)
1c79356b 1400{
91447636
A
1401 struct vnode * vp = ap->a_vp;
1402 struct hfsmount *hfsmp = VTOHFS(vp);
1403 vfs_context_t context = ap->a_context;
1404 kauth_cred_t cred = vfs_context_ucred(context);
1405 proc_t p = vfs_context_proc(context);
1406 struct vfsstatfs *vfsp;
1407 boolean_t is64bit;
b0d623f7
A
1408 off_t jnl_start, jnl_size;
1409 struct hfs_journal_info *jip;
1410#if HFS_COMPRESSION
1411 int compressed = 0;
1412 off_t uncompressed_size = -1;
1413 int decmpfs_error = 0;
1414
1415 if (ap->a_command == F_RDADVISE) {
1416 /* we need to inspect the decmpfs state of the file as early as possible */
1417 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1418 if (compressed) {
1419 if (VNODE_IS_RSRC(vp)) {
1420 /* if this is the resource fork, treat it as if it were empty */
1421 uncompressed_size = 0;
1422 } else {
1423 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1424 if (decmpfs_error != 0) {
1425 /* failed to get the uncompressed size, we'll check for this later */
1426 uncompressed_size = -1;
1427 }
1428 }
1429 }
1430 }
1431#endif /* HFS_COMPRESSION */
91447636
A
1432
1433 is64bit = proc_is64bit(p);
1434
9bccf70c 1435 switch (ap->a_command) {
55e303ae 1436
2d21ac55
A
1437 case HFS_GETPATH:
1438 {
1439 struct vnode *file_vp;
1440 cnid_t cnid;
1441 int outlen;
1442 char *bufptr;
1443 int error;
1444
1445 /* Caller must be owner of file system. */
1446 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1447 if (suser(cred, NULL) &&
1448 kauth_cred_getuid(cred) != vfsp->f_owner) {
1449 return (EACCES);
1450 }
1451 /* Target vnode must be file system's root. */
1452 if (!vnode_isvroot(vp)) {
1453 return (EINVAL);
1454 }
1455 bufptr = (char *)ap->a_data;
1456 cnid = strtoul(bufptr, NULL, 10);
1457
b0d623f7
A
1458 /* We need to call hfs_vfs_vget to leverage the code that will
1459 * fix the origin list for us if needed, as opposed to calling
1460 * hfs_vget, since we will need the parent for build_path call.
935ed37a 1461 */
b0d623f7 1462
935ed37a 1463 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
2d21ac55
A
1464 return (error);
1465 }
1466 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1467 vnode_put(file_vp);
1468
1469 return (error);
1470 }
1471
1472 case HFS_PREV_LINK:
1473 case HFS_NEXT_LINK:
1474 {
1475 cnid_t linkfileid;
1476 cnid_t nextlinkid;
1477 cnid_t prevlinkid;
1478 int error;
1479
1480 /* Caller must be owner of file system. */
1481 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1482 if (suser(cred, NULL) &&
1483 kauth_cred_getuid(cred) != vfsp->f_owner) {
1484 return (EACCES);
1485 }
1486 /* Target vnode must be file system's root. */
1487 if (!vnode_isvroot(vp)) {
1488 return (EINVAL);
1489 }
1490 linkfileid = *(cnid_t *)ap->a_data;
1491 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1492 return (EINVAL);
1493 }
1494 if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1495 return (error);
1496 }
1497 if (ap->a_command == HFS_NEXT_LINK) {
1498 *(cnid_t *)ap->a_data = nextlinkid;
1499 } else {
1500 *(cnid_t *)ap->a_data = prevlinkid;
1501 }
1502 return (0);
1503 }
1504
0c530ab8
A
1505 case HFS_RESIZE_PROGRESS: {
1506
1507 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1508 if (suser(cred, NULL) &&
1509 kauth_cred_getuid(cred) != vfsp->f_owner) {
1510 return (EACCES); /* must be owner of file system */
1511 }
1512 if (!vnode_isvroot(vp)) {
1513 return (EINVAL);
1514 }
b0d623f7
A
1515 /* file system must not be mounted read-only */
1516 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1517 return (EROFS);
1518 }
1519
0c530ab8
A
1520 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1521 }
2d21ac55 1522
91447636
A
1523 case HFS_RESIZE_VOLUME: {
1524 u_int64_t newsize;
1525 u_int64_t cursize;
1526
1527 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1528 if (suser(cred, NULL) &&
1529 kauth_cred_getuid(cred) != vfsp->f_owner) {
1530 return (EACCES); /* must be owner of file system */
1531 }
1532 if (!vnode_isvroot(vp)) {
1533 return (EINVAL);
1534 }
b0d623f7
A
1535
1536 /* filesystem must not be mounted read only */
1537 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1538 return (EROFS);
1539 }
91447636
A
1540 newsize = *(u_int64_t *)ap->a_data;
1541 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1542
1543 if (newsize > cursize) {
1544 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1545 } else if (newsize < cursize) {
1546 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1547 } else {
1548 return (0);
1549 }
1550 }
1551 case HFS_CHANGE_NEXT_ALLOCATION: {
2d21ac55 1552 int error = 0; /* Assume success */
91447636
A
1553 u_int32_t location;
1554
1555 if (vnode_vfsisrdonly(vp)) {
1556 return (EROFS);
1557 }
1558 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1559 if (suser(cred, NULL) &&
1560 kauth_cred_getuid(cred) != vfsp->f_owner) {
1561 return (EACCES); /* must be owner of file system */
1562 }
1563 if (!vnode_isvroot(vp)) {
1564 return (EINVAL);
1565 }
2d21ac55 1566 HFS_MOUNT_LOCK(hfsmp, TRUE);
91447636 1567 location = *(u_int32_t *)ap->a_data;
2d21ac55
A
1568 if ((location >= hfsmp->allocLimit) &&
1569 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1570 error = EINVAL;
1571 goto fail_change_next_allocation;
91447636
A
1572 }
1573 /* Return previous value. */
1574 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
2d21ac55
A
1575 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1576 /* On magic value for location, set nextAllocation to next block
1577 * after metadata zone and set flag in mount structure to indicate
1578 * that nextAllocation should not be updated again.
1579 */
b0d623f7
A
1580 if (hfsmp->hfs_metazone_end != 0) {
1581 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1582 }
2d21ac55
A
1583 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1584 } else {
1585 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1586 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1587 }
1588 MarkVCBDirty(hfsmp);
1589fail_change_next_allocation:
91447636 1590 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2d21ac55 1591 return (error);
91447636
A
1592 }
1593
55e303ae
A
1594#ifdef HFS_SPARSE_DEV
1595 case HFS_SETBACKINGSTOREINFO: {
55e303ae
A
1596 struct vnode * bsfs_rootvp;
1597 struct vnode * di_vp;
55e303ae
A
1598 struct hfs_backingstoreinfo *bsdata;
1599 int error = 0;
1600
b0d623f7
A
1601 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1602 return (EROFS);
1603 }
55e303ae
A
1604 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1605 return (EALREADY);
1606 }
91447636
A
1607 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1608 if (suser(cred, NULL) &&
1609 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1610 return (EACCES); /* must be owner of file system */
1611 }
1612 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1613 if (bsdata == NULL) {
1614 return (EINVAL);
1615 }
91447636 1616 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
55e303ae
A
1617 return (error);
1618 }
91447636
A
1619 if ((error = vnode_getwithref(di_vp))) {
1620 file_drop(bsdata->backingfd);
1621 return(error);
55e303ae 1622 }
91447636
A
1623
1624 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1625 (void)vnode_put(di_vp);
1626 file_drop(bsdata->backingfd);
55e303ae
A
1627 return (EINVAL);
1628 }
1629
1630 /*
1631 * Obtain the backing fs root vnode and keep a reference
1632 * on it. This reference will be dropped in hfs_unmount.
1633 */
91447636 1634 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
55e303ae 1635 if (error) {
91447636
A
1636 (void)vnode_put(di_vp);
1637 file_drop(bsdata->backingfd);
55e303ae
A
1638 return (error);
1639 }
91447636
A
1640 vnode_ref(bsfs_rootvp);
1641 vnode_put(bsfs_rootvp);
55e303ae
A
1642
1643 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1644 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1645 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1646 hfsmp->hfs_sparsebandblks *= 4;
1647
2d21ac55
A
1648 vfs_markdependency(hfsmp->hfs_mp);
1649
b0d623f7
A
1650 /*
1651 * If the sparse image is on a sparse image file (as opposed to a sparse
1652 * bundle), then we may need to limit the free space to the maximum size
1653 * of a file on that volume. So we query (using pathconf), and if we get
1654 * a meaningful result, we cache the number of blocks for later use in
1655 * hfs_freeblks().
1656 */
1657 hfsmp->hfs_backingfs_maxblocks = 0;
1658 if (vnode_vtype(di_vp) == VREG) {
1659 int terr;
1660 int hostbits;
1661 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1662 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1663 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1664
1665 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1666 }
1667 }
1668
91447636
A
1669 (void)vnode_put(di_vp);
1670 file_drop(bsdata->backingfd);
55e303ae
A
1671 return (0);
1672 }
1673 case HFS_CLRBACKINGSTOREINFO: {
55e303ae
A
1674 struct vnode * tmpvp;
1675
91447636
A
1676 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1677 if (suser(cred, NULL) &&
1678 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1679 return (EACCES); /* must be owner of file system */
1680 }
b0d623f7
A
1681 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1682 return (EROFS);
1683 }
1684
55e303ae
A
1685 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1686 hfsmp->hfs_backingfs_rootvp) {
1687
1688 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1689 tmpvp = hfsmp->hfs_backingfs_rootvp;
1690 hfsmp->hfs_backingfs_rootvp = NULLVP;
1691 hfsmp->hfs_sparsebandblks = 0;
91447636 1692 vnode_rele(tmpvp);
55e303ae
A
1693 }
1694 return (0);
1695 }
1696#endif /* HFS_SPARSE_DEV */
1697
91447636
A
1698 case F_FREEZE_FS: {
1699 struct mount *mp;
91447636 1700
91447636
A
1701 mp = vnode_mount(vp);
1702 hfsmp = VFSTOHFS(mp);
1703
1704 if (!(hfsmp->jnl))
1705 return (ENOTSUP);
3a60a9f5 1706
b0d623f7
A
1707 vfsp = vfs_statfs(mp);
1708
1709 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1710 !kauth_cred_issuser(cred))
1711 return (EACCES);
1712
3a60a9f5 1713 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
91447636 1714
91447636
A
1715 // flush things before we get started to try and prevent
1716 // dirty data from being paged out while we're frozen.
1717 // note: can't do this after taking the lock as it will
1718 // deadlock against ourselves.
1719 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1720 hfs_global_exclusive_lock_acquire(hfsmp);
b0d623f7
A
1721
1722 // DO NOT call hfs_journal_flush() because that takes a
1723 // shared lock on the global exclusive lock!
91447636 1724 journal_flush(hfsmp->jnl);
3a60a9f5 1725
91447636
A
1726 // don't need to iterate on all vnodes, we just need to
1727 // wait for writes to the system files and the device vnode
91447636
A
1728 if (HFSTOVCB(hfsmp)->extentsRefNum)
1729 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1730 if (HFSTOVCB(hfsmp)->catalogRefNum)
1731 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1732 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1733 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1734 if (hfsmp->hfs_attribute_vp)
1735 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1736 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1737
1738 hfsmp->hfs_freezing_proc = current_proc();
1739
1740 return (0);
1741 }
1742
1743 case F_THAW_FS: {
b0d623f7
A
1744 vfsp = vfs_statfs(vnode_mount(vp));
1745 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1746 !kauth_cred_issuser(cred))
91447636
A
1747 return (EACCES);
1748
1749 // if we're not the one who froze the fs then we
1750 // can't thaw it.
1751 if (hfsmp->hfs_freezing_proc != current_proc()) {
3a60a9f5 1752 return EPERM;
91447636
A
1753 }
1754
1755 // NOTE: if you add code here, also go check the
1756 // code that "thaws" the fs in hfs_vnop_close()
1757 //
1758 hfsmp->hfs_freezing_proc = NULL;
1759 hfs_global_exclusive_lock_release(hfsmp);
3a60a9f5 1760 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
91447636
A
1761
1762 return (0);
1763 }
1764
2d21ac55
A
1765 case HFS_BULKACCESS_FSCTL: {
1766 int size;
1767
1768 if (hfsmp->hfs_flags & HFS_STANDARD) {
1769 return EINVAL;
1770 }
91447636 1771
2d21ac55 1772 if (is64bit) {
b0d623f7 1773 size = sizeof(struct user64_access_t);
2d21ac55 1774 } else {
b0d623f7 1775 size = sizeof(struct user32_access_t);
2d21ac55
A
1776 }
1777
1778 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1779 }
91447636 1780
2d21ac55
A
1781 case HFS_EXT_BULKACCESS_FSCTL: {
1782 int size;
1783
1784 if (hfsmp->hfs_flags & HFS_STANDARD) {
1785 return EINVAL;
1786 }
91447636 1787
2d21ac55 1788 if (is64bit) {
b0d623f7 1789 size = sizeof(struct user64_ext_access_t);
2d21ac55 1790 } else {
b0d623f7 1791 size = sizeof(struct user32_ext_access_t);
2d21ac55
A
1792 }
1793
1794 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1795 }
91447636
A
1796
1797 case HFS_SETACLSTATE: {
1798 int state;
1799
91447636
A
1800 if (ap->a_data == NULL) {
1801 return (EINVAL);
1802 }
3a60a9f5
A
1803
1804 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
91447636 1805 state = *(int *)ap->a_data;
3a60a9f5 1806
b0d623f7
A
1807 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1808 return (EROFS);
1809 }
3a60a9f5
A
1810 // super-user can enable or disable acl's on a volume.
1811 // the volume owner can only enable acl's
1812 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1813 return (EPERM);
1814 }
91447636 1815 if (state == 0 || state == 1)
2d21ac55
A
1816 return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
1817 else
1818 return (EINVAL);
1819 }
1820
1821 case HFS_SET_XATTREXTENTS_STATE: {
1822 int state;
1823
1824 if (ap->a_data == NULL) {
1825 return (EINVAL);
1826 }
1827
1828 state = *(int *)ap->a_data;
b0d623f7
A
1829
1830 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1831 return (EROFS);
1832 }
2d21ac55
A
1833
1834 /* Super-user can enable or disable extent-based extended
1835 * attribute support on a volume
1836 */
1837 if (!is_suser()) {
1838 return (EPERM);
1839 }
1840 if (state == 0 || state == 1)
1841 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
91447636
A
1842 else
1843 return (EINVAL);
1844 }
1845
1846 case F_FULLFSYNC: {
55e303ae 1847 int error;
b0d623f7
A
1848
1849 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1850 return (EROFS);
1851 }
91447636
A
1852 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1853 if (error == 0) {
2d21ac55 1854 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
91447636
A
1855 hfs_unlock(VTOC(vp));
1856 }
55e303ae
A
1857
1858 return error;
1859 }
91447636
A
1860
1861 case F_CHKCLEAN: {
9bccf70c 1862 register struct cnode *cp;
55e303ae
A
1863 int error;
1864
91447636 1865 if (!vnode_isreg(vp))
55e303ae
A
1866 return EINVAL;
1867
91447636
A
1868 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1869 if (error == 0) {
1870 cp = VTOC(vp);
1871 /*
1872 * used by regression test to determine if
1873 * all the dirty pages (via write) have been cleaned
1874 * after a call to 'fsysnc'.
1875 */
1876 error = is_file_clean(vp, VTOF(vp)->ff_size);
1877 hfs_unlock(cp);
1878 }
55e303ae
A
1879 return (error);
1880 }
1881
91447636 1882 case F_RDADVISE: {
9bccf70c
A
1883 register struct radvisory *ra;
1884 struct filefork *fp;
9bccf70c
A
1885 int error;
1886
91447636 1887 if (!vnode_isreg(vp))
9bccf70c
A
1888 return EINVAL;
1889
9bccf70c 1890 ra = (struct radvisory *)(ap->a_data);
9bccf70c
A
1891 fp = VTOF(vp);
1892
91447636
A
1893 /* Protect against a size change. */
1894 hfs_lock_truncate(VTOC(vp), TRUE);
1895
b0d623f7
A
1896#if HFS_COMPRESSION
1897 if (compressed && (uncompressed_size == -1)) {
1898 /* fetching the uncompressed size failed above, so return the error */
1899 error = decmpfs_error;
1900 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
1901 (!compressed && (ra->ra_offset >= fp->ff_size))) {
1902 error = EFBIG;
1903 }
1904#else /* HFS_COMPRESSION */
9bccf70c 1905 if (ra->ra_offset >= fp->ff_size) {
91447636 1906 error = EFBIG;
b0d623f7
A
1907 }
1908#endif /* HFS_COMPRESSION */
1909 else {
91447636 1910 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
9bccf70c 1911 }
1c79356b 1912
2d21ac55 1913 hfs_unlock_truncate(VTOC(vp), TRUE);
9bccf70c 1914 return (error);
1c79356b 1915 }
1c79356b 1916
91447636
A
1917 case F_READBOOTSTRAP:
1918 case F_WRITEBOOTSTRAP:
1919 {
9bccf70c 1920 struct vnode *devvp = NULL;
91447636 1921 user_fbootstraptransfer_t *user_bootstrapp;
0b4e3aa0 1922 int devBlockSize;
1c79356b 1923 int error;
91447636
A
1924 uio_t auio;
1925 daddr64_t blockNumber;
b0d623f7
A
1926 u_int32_t blockOffset;
1927 u_int32_t xfersize;
1c79356b 1928 struct buf *bp;
91447636 1929 user_fbootstraptransfer_t user_bootstrap;
1c79356b 1930
91447636
A
1931 if (!vnode_isvroot(vp))
1932 return (EINVAL);
1933 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1934 * to a user_fbootstraptransfer_t else we get a pointer to a
1935 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1936 */
b0d623f7
A
1937 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1938 return (EROFS);
1939 }
91447636
A
1940 if (is64bit) {
1941 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1942 }
1943 else {
b0d623f7 1944 user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
91447636
A
1945 user_bootstrapp = &user_bootstrap;
1946 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1947 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1948 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1949 }
d41d1dae
A
1950
1951 if ((user_bootstrapp->fbt_offset < 0) || (user_bootstrapp->fbt_offset > 1024) ||
1952 (user_bootstrapp->fbt_length > 1024)) {
1953 return EINVAL;
1954 }
1955
91447636
A
1956 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1957 return EINVAL;
1c79356b 1958
9bccf70c 1959 devvp = VTOHFS(vp)->hfs_devvp;
91447636
A
1960 auio = uio_create(1, user_bootstrapp->fbt_offset,
1961 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1962 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1963 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1964
1965 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1966
1967 while (uio_resid(auio) > 0) {
1968 blockNumber = uio_offset(auio) / devBlockSize;
1969 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1970 if (error) {
1971 if (bp) buf_brelse(bp);
1972 uio_free(auio);
1973 return error;
1974 };
1975
1976 blockOffset = uio_offset(auio) % devBlockSize;
1977 xfersize = devBlockSize - blockOffset;
1978 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1979 if (error) {
1980 buf_brelse(bp);
1981 uio_free(auio);
1982 return error;
1983 };
1984 if (uio_rw(auio) == UIO_WRITE) {
1985 error = VNOP_BWRITE(bp);
1986 if (error) {
1987 uio_free(auio);
1988 return error;
1989 }
1990 } else {
1991 buf_brelse(bp);
1992 };
1993 };
1994 uio_free(auio);
1995 };
1996 return 0;
1997
1998 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1999 {
2000 if (is64bit) {
2001 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2002 }
2003 else {
b0d623f7 2004 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
91447636
A
2005 }
2006 return 0;
2007 }
2008
b0d623f7
A
2009 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2010 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2011 break;
2012
2013 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2014 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2015 break;
2016
2017 case HFS_FSCTL_SET_VERY_LOW_DISK:
2018 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2019 return EINVAL;
e2fac8b1 2020 }
91447636 2021
b0d623f7
A
2022 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2023 break;
2024
2025 case HFS_FSCTL_SET_LOW_DISK:
2026 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2027 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2028
2029 return EINVAL;
e2fac8b1 2030 }
b0d623f7
A
2031
2032 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2033 break;
2034
2035 case HFS_FSCTL_SET_DESIRED_DISK:
2036 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2037 return EINVAL;
2038 }
2039
2040 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2041 break;
2042
2043 case HFS_VOLUME_STATUS:
2044 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2045 break;
91447636
A
2046
2047 case HFS_SET_BOOT_INFO:
2048 if (!vnode_isvroot(vp))
2049 return(EINVAL);
2050 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2051 return(EACCES); /* must be superuser or owner of filesystem */
b0d623f7
A
2052 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2053 return (EROFS);
2054 }
91447636
A
2055 HFS_MOUNT_LOCK(hfsmp, TRUE);
2056 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2057 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2058 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2059 break;
2060
2061 case HFS_GET_BOOT_INFO:
2062 if (!vnode_isvroot(vp))
2063 return(EINVAL);
2064 HFS_MOUNT_LOCK(hfsmp, TRUE);
2065 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2066 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2067 break;
2068
2d21ac55
A
2069 case HFS_MARK_BOOT_CORRUPT:
2070 /* Mark the boot volume corrupt by setting
2071 * kHFSVolumeInconsistentBit in the volume header. This will
2072 * force fsck_hfs on next mount.
2073 */
2074 if (!is_suser()) {
2075 return EACCES;
2076 }
b0d623f7 2077
2d21ac55
A
2078 /* Allowed only on the root vnode of the boot volume */
2079 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2080 !vnode_isvroot(vp)) {
2081 return EINVAL;
2082 }
b0d623f7
A
2083 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2084 return (EROFS);
2085 }
2d21ac55
A
2086 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2087 hfs_mark_volume_inconsistent(hfsmp);
2088 break;
2089
b0d623f7
A
2090 case HFS_FSCTL_GET_JOURNAL_INFO:
2091 jip = (struct hfs_journal_info*)ap->a_data;
2092
2093 if (vp == NULLVP)
2094 return EINVAL;
2095
2096 if (hfsmp->jnl == NULL) {
2097 jnl_start = 0;
2098 jnl_size = 0;
2099 } else {
2100 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2101 jnl_size = (off_t)hfsmp->jnl_size;
2102 }
2103
2104 jip->jstart = jnl_start;
2105 jip->jsize = jnl_size;
2106 break;
2107
2108 case HFS_SET_ALWAYS_ZEROFILL: {
2109 struct cnode *cp = VTOC(vp);
2110
2111 if (*(int *)ap->a_data) {
2112 cp->c_flag |= C_ALWAYS_ZEROFILL;
2113 } else {
2114 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2115 }
2116 break;
2117 }
2118
91447636
A
2119 default:
2120 return (ENOTTY);
2121 }
1c79356b 2122
0b4e3aa0 2123 return 0;
1c79356b
A
2124}
2125
91447636
A
2126/*
2127 * select
2128 */
1c79356b 2129int
91447636
A
2130hfs_vnop_select(__unused struct vnop_select_args *ap)
2131/*
2132 struct vnop_select_args {
2133 vnode_t a_vp;
9bccf70c
A
2134 int a_which;
2135 int a_fflags;
9bccf70c 2136 void *a_wql;
91447636
A
2137 vfs_context_t a_context;
2138 };
2139*/
1c79356b 2140{
9bccf70c
A
2141 /*
2142 * We should really check to see if I/O is possible.
2143 */
2144 return (1);
1c79356b
A
2145}
2146
1c79356b
A
2147/*
2148 * Converts a logical block number to a physical block, and optionally returns
2149 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2150 * The physical block number is based on the device block size, currently its 512.
2151 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2152 */
1c79356b 2153int
2d21ac55 2154hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1c79356b 2155{
9bccf70c
A
2156 struct filefork *fp = VTOF(vp);
2157 struct hfsmount *hfsmp = VTOHFS(vp);
91447636 2158 int retval = E_NONE;
2d21ac55 2159 u_int32_t logBlockSize;
91447636
A
2160 size_t bytesContAvail = 0;
2161 off_t blockposition;
2162 int lockExtBtree;
2163 int lockflags = 0;
1c79356b 2164
9bccf70c
A
2165 /*
2166 * Check for underlying vnode requests and ensure that logical
2167 * to physical mapping is requested.
2168 */
91447636 2169 if (vpp != NULL)
2d21ac55 2170 *vpp = hfsmp->hfs_devvp;
91447636 2171 if (bnp == NULL)
9bccf70c
A
2172 return (0);
2173
9bccf70c 2174 logBlockSize = GetLogicalBlockSize(vp);
2d21ac55 2175 blockposition = (off_t)bn * logBlockSize;
9bccf70c
A
2176
2177 lockExtBtree = overflow_extents(fp);
91447636
A
2178
2179 if (lockExtBtree)
2d21ac55 2180 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1c79356b 2181
9bccf70c 2182 retval = MacToVFSError(
0b4e3aa0 2183 MapFileBlockC (HFSTOVCB(hfsmp),
9bccf70c 2184 (FCB*)fp,
0b4e3aa0
A
2185 MAXPHYSIO,
2186 blockposition,
91447636 2187 bnp,
0b4e3aa0 2188 &bytesContAvail));
1c79356b 2189
91447636
A
2190 if (lockExtBtree)
2191 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 2192
91447636
A
2193 if (retval == E_NONE) {
2194 /* Figure out how many read ahead blocks there are */
2195 if (runp != NULL) {
2196 if (can_cluster(logBlockSize)) {
2197 /* Make sure this result never goes negative: */
2198 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2199 } else {
2200 *runp = 0;
2201 }
2202 }
2203 }
2204 return (retval);
2205}
1c79356b 2206
91447636
A
2207/*
2208 * Convert logical block number to file offset.
2209 */
1c79356b 2210int
91447636
A
2211hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2212/*
2213 struct vnop_blktooff_args {
2214 vnode_t a_vp;
2215 daddr64_t a_lblkno;
9bccf70c 2216 off_t *a_offset;
91447636
A
2217 };
2218*/
1c79356b
A
2219{
2220 if (ap->a_vp == NULL)
2221 return (EINVAL);
91447636 2222 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1c79356b
A
2223
2224 return(0);
2225}
2226
91447636
A
2227/*
2228 * Convert file offset to logical block number.
2229 */
1c79356b 2230int
91447636
A
2231hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2232/*
2233 struct vnop_offtoblk_args {
2234 vnode_t a_vp;
9bccf70c 2235 off_t a_offset;
91447636
A
2236 daddr64_t *a_lblkno;
2237 };
2238*/
1c79356b 2239{
1c79356b
A
2240 if (ap->a_vp == NULL)
2241 return (EINVAL);
91447636 2242 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1c79356b
A
2243
2244 return(0);
2245}
2246
91447636
A
2247/*
2248 * Map file offset to physical block number.
2249 *
2d21ac55
A
2250 * If this function is called for write operation, and if the file
2251 * had virtual blocks allocated (delayed allocation), real blocks
2252 * are allocated by calling ExtendFileC().
2253 *
2254 * If this function is called for read operation, and if the file
2255 * had virtual blocks allocated (delayed allocation), no change
2256 * to the size of file is done, and if required, rangelist is
2257 * searched for mapping.
2258 *
91447636
A
2259 * System file cnodes are expected to be locked (shared or exclusive).
2260 */
1c79356b 2261int
91447636
A
2262hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2263/*
2264 struct vnop_blockmap_args {
2265 vnode_t a_vp;
9bccf70c
A
2266 off_t a_foffset;
2267 size_t a_size;
91447636 2268 daddr64_t *a_bpn;
9bccf70c
A
2269 size_t *a_run;
2270 void *a_poff;
91447636
A
2271 int a_flags;
2272 vfs_context_t a_context;
2273 };
2274*/
1c79356b 2275{
91447636
A
2276 struct vnode *vp = ap->a_vp;
2277 struct cnode *cp;
2278 struct filefork *fp;
2279 struct hfsmount *hfsmp;
2280 size_t bytesContAvail = 0;
2281 int retval = E_NONE;
2282 int syslocks = 0;
2283 int lockflags = 0;
2284 struct rl_entry *invalid_range;
2285 enum rl_overlaptype overlaptype;
2286 int started_tr = 0;
2287 int tooklock = 0;
1c79356b 2288
b0d623f7
A
2289#if HFS_COMPRESSION
2290 if (VNODE_IS_RSRC(vp)) {
2291 /* allow blockmaps to the resource fork */
2292 } else {
2293 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2294 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2295 switch(state) {
2296 case FILE_IS_COMPRESSED:
2297 return ENOTSUP;
2298 case FILE_IS_CONVERTING:
2299 /* if FILE_IS_CONVERTING, we allow blockmap */
2300 break;
2301 default:
2302 printf("invalid state %d for compressed file\n", state);
2303 /* fall through */
2304 }
2305 }
2306 }
2307#endif /* HFS_COMPRESSION */
2308
3a60a9f5
A
2309 /* Do not allow blockmap operation on a directory */
2310 if (vnode_isdir(vp)) {
2311 return (ENOTSUP);
2312 }
2313
9bccf70c
A
2314 /*
2315 * Check for underlying vnode requests and ensure that logical
2316 * to physical mapping is requested.
2317 */
2318 if (ap->a_bpn == NULL)
2319 return (0);
2320
2d21ac55 2321 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
91447636
A
2322 if (VTOC(vp)->c_lockowner != current_thread()) {
2323 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2324 tooklock = 1;
91447636
A
2325 }
2326 }
2327 hfsmp = VTOHFS(vp);
2328 cp = VTOC(vp);
2329 fp = VTOF(vp);
55e303ae 2330
91447636 2331retry:
2d21ac55
A
2332 /* Check virtual blocks only when performing write operation */
2333 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
91447636
A
2334 if (hfs_start_transaction(hfsmp) != 0) {
2335 retval = EINVAL;
2336 goto exit;
2337 } else {
2338 started_tr = 1;
b4c24cb9 2339 }
91447636
A
2340 syslocks = SFL_EXTENTS | SFL_BITMAP;
2341
b4c24cb9 2342 } else if (overflow_extents(fp)) {
91447636 2343 syslocks = SFL_EXTENTS;
9bccf70c 2344 }
91447636
A
2345
2346 if (syslocks)
2347 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1c79356b 2348
9bccf70c
A
2349 /*
2350 * Check for any delayed allocations.
2351 */
2d21ac55
A
2352 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2353 int64_t actbytes;
91447636 2354 u_int32_t loanedBlocks;
1c79356b 2355
55e303ae 2356 //
d12e1678
A
2357 // Make sure we have a transaction. It's possible
2358 // that we came in and fp->ff_unallocblocks was zero
2359 // but during the time we blocked acquiring the extents
2360 // btree, ff_unallocblocks became non-zero and so we
2361 // will need to start a transaction.
2362 //
91447636
A
2363 if (started_tr == 0) {
2364 if (syslocks) {
2365 hfs_systemfile_unlock(hfsmp, lockflags);
2366 syslocks = 0;
2367 }
2368 goto retry;
d12e1678
A
2369 }
2370
9bccf70c 2371 /*
91447636
A
2372 * Note: ExtendFileC will Release any blocks on loan and
2373 * aquire real blocks. So we ask to extend by zero bytes
2374 * since ExtendFileC will account for the virtual blocks.
9bccf70c 2375 */
9bccf70c 2376
91447636
A
2377 loanedBlocks = fp->ff_unallocblocks;
2378 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2379 kEFAllMask | kEFNoClumpMask, &actbytes);
2380
2381 if (retval) {
2382 fp->ff_unallocblocks = loanedBlocks;
2383 cp->c_blocks += loanedBlocks;
2384 fp->ff_blocks += loanedBlocks;
2385
2386 HFS_MOUNT_LOCK(hfsmp, TRUE);
2387 hfsmp->loanedBlocks += loanedBlocks;
2388 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1c79356b 2389
91447636
A
2390 hfs_systemfile_unlock(hfsmp, lockflags);
2391 cp->c_flag |= C_MODIFIED;
b4c24cb9 2392 if (started_tr) {
91447636
A
2393 (void) hfs_update(vp, TRUE);
2394 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
55e303ae 2395
91447636 2396 hfs_end_transaction(hfsmp);
2d21ac55 2397 started_tr = 0;
b4c24cb9 2398 }
91447636 2399 goto exit;
b4c24cb9 2400 }
9bccf70c
A
2401 }
2402
91447636
A
2403 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2404 ap->a_bpn, &bytesContAvail);
2405 if (syslocks) {
2406 hfs_systemfile_unlock(hfsmp, lockflags);
2407 syslocks = 0;
2408 }
1c79356b 2409
b4c24cb9 2410 if (started_tr) {
91447636
A
2411 (void) hfs_update(vp, TRUE);
2412 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2413 hfs_end_transaction(hfsmp);
b4c24cb9 2414 started_tr = 0;
91447636
A
2415 }
2416 if (retval) {
2d21ac55
A
2417 /* On write, always return error because virtual blocks, if any,
2418 * should have been allocated in ExtendFileC(). We do not
2419 * allocate virtual blocks on read, therefore return error
2420 * only if no virtual blocks are allocated. Otherwise we search
2421 * rangelist for zero-fills
2422 */
2423 if ((MacToVFSError(retval) != ERANGE) ||
2424 (ap->a_flags & VNODE_WRITE) ||
2425 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2426 goto exit;
2427 }
2428
2429 /* Validate if the start offset is within logical file size */
2430 if (ap->a_foffset > fp->ff_size) {
2431 goto exit;
2432 }
2433
2434 /* Searching file extents has failed for read operation, therefore
2435 * search rangelist for any uncommitted holes in the file.
2436 */
2437 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2438 ap->a_foffset + (off_t)(ap->a_size - 1),
2439 &invalid_range);
2440 switch(overlaptype) {
2441 case RL_OVERLAPISCONTAINED:
2442 /* start_offset <= rl_start, end_offset >= rl_end */
2443 if (ap->a_foffset != invalid_range->rl_start) {
2444 break;
2445 }
2446 case RL_MATCHINGOVERLAP:
2447 /* start_offset = rl_start, end_offset = rl_end */
2448 case RL_OVERLAPCONTAINSRANGE:
2449 /* start_offset >= rl_start, end_offset <= rl_end */
2450 case RL_OVERLAPSTARTSBEFORE:
2451 /* start_offset > rl_start, end_offset >= rl_start */
2452 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2453 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2454 } else {
2455 bytesContAvail = fp->ff_size - ap->a_foffset;
2456 }
2457 if (bytesContAvail > ap->a_size) {
2458 bytesContAvail = ap->a_size;
2459 }
2460 *ap->a_bpn = (daddr64_t)-1;
2461 retval = 0;
2462 break;
2463 case RL_OVERLAPENDSAFTER:
2464 /* start_offset < rl_start, end_offset < rl_end */
2465 case RL_NOOVERLAP:
2466 break;
2467 }
91447636
A
2468 goto exit;
2469 }
1c79356b 2470
2d21ac55
A
2471 /* MapFileC() found a valid extent in the filefork. Search the
2472 * mapping information further for invalid file ranges
2473 */
91447636
A
2474 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2475 ap->a_foffset + (off_t)bytesContAvail - 1,
2476 &invalid_range);
2477 if (overlaptype != RL_NOOVERLAP) {
2478 switch(overlaptype) {
2479 case RL_MATCHINGOVERLAP:
2480 case RL_OVERLAPCONTAINSRANGE:
2481 case RL_OVERLAPSTARTSBEFORE:
2d21ac55 2482 /* There's no valid block for this byte offset */
91447636
A
2483 *ap->a_bpn = (daddr64_t)-1;
2484 /* There's no point limiting the amount to be returned
2485 * if the invalid range that was hit extends all the way
2486 * to the EOF (i.e. there's no valid bytes between the
2487 * end of this range and the file's EOF):
2488 */
2489 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 2490 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
2491 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2492 }
2493 break;
9bccf70c 2494
91447636
A
2495 case RL_OVERLAPISCONTAINED:
2496 case RL_OVERLAPENDSAFTER:
2497 /* The range of interest hits an invalid block before the end: */
2498 if (invalid_range->rl_start == ap->a_foffset) {
2499 /* There's actually no valid information to be had starting here: */
2500 *ap->a_bpn = (daddr64_t)-1;
2501 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 2502 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
2503 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2504 }
2505 } else {
2506 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2507 }
9bccf70c 2508 break;
1c79356b 2509
91447636 2510 case RL_NOOVERLAP:
9bccf70c 2511 break;
91447636
A
2512 } /* end switch */
2513 if (bytesContAvail > ap->a_size)
2514 bytesContAvail = ap->a_size;
2d21ac55
A
2515 }
2516
2517exit:
2518 if (retval == 0) {
2519 if (ap->a_run)
2520 *ap->a_run = bytesContAvail;
2521
2522 if (ap->a_poff)
2523 *(int *)ap->a_poff = 0;
9bccf70c 2524 }
91447636 2525
91447636
A
2526 if (tooklock)
2527 hfs_unlock(cp);
2528
2529 return (MacToVFSError(retval));
1c79356b
A
2530}
2531
9bccf70c 2532
1c79356b 2533/*
91447636
A
2534 * prepare and issue the I/O
2535 * buf_strategy knows how to deal
2536 * with requests that require
2537 * fragmented I/Os
2538 */
1c79356b 2539int
91447636 2540hfs_vnop_strategy(struct vnop_strategy_args *ap)
1c79356b 2541{
91447636
A
2542 buf_t bp = ap->a_bp;
2543 vnode_t vp = buf_vnode(bp);
1c79356b 2544
2d21ac55 2545 return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
1c79356b
A
2546}
2547
b0d623f7
A
2548static int
2549hfs_minorupdate(struct vnode *vp) {
2550 struct cnode *cp = VTOC(vp);
2551 cp->c_flag &= ~C_MODIFIED;
2552 cp->c_touch_acctime = 0;
2553 cp->c_touch_chgtime = 0;
2554 cp->c_touch_modtime = 0;
2555
2556 return 0;
2557}
1c79356b 2558
91447636 2559static int
b0d623f7 2560do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
1c79356b 2561{
9bccf70c
A
2562 register struct cnode *cp = VTOC(vp);
2563 struct filefork *fp = VTOF(vp);
91447636
A
2564 struct proc *p = vfs_context_proc(context);;
2565 kauth_cred_t cred = vfs_context_ucred(context);
9bccf70c
A
2566 int retval;
2567 off_t bytesToAdd;
2568 off_t actualBytesAdded;
2569 off_t filebytes;
b0d623f7 2570 u_int32_t fileblocks;
9bccf70c 2571 int blksize;
b4c24cb9 2572 struct hfsmount *hfsmp;
91447636 2573 int lockflags;
9bccf70c 2574
9bccf70c
A
2575 blksize = VTOVCB(vp)->blockSize;
2576 fileblocks = fp->ff_blocks;
2577 filebytes = (off_t)fileblocks * (off_t)blksize;
2578
2579 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2580 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2581
2582 if (length < 0)
2583 return (EINVAL);
1c79356b 2584
8f6c56a5
A
2585 /* This should only happen with a corrupt filesystem */
2586 if ((off_t)fp->ff_size < 0)
2587 return (EINVAL);
2588
9bccf70c
A
2589 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2590 return (EFBIG);
1c79356b 2591
b4c24cb9 2592 hfsmp = VTOHFS(vp);
1c79356b 2593
9bccf70c 2594 retval = E_NONE;
1c79356b 2595
55e303ae
A
2596 /* Files that are changing size are not hot file candidates. */
2597 if (hfsmp->hfc_stage == HFC_RECORDING) {
2598 fp->ff_bytesread = 0;
2599 }
2600
9bccf70c
A
2601 /*
2602 * We cannot just check if fp->ff_size == length (as an optimization)
2603 * since there may be extra physical blocks that also need truncation.
2604 */
2605#if QUOTA
91447636 2606 if ((retval = hfs_getinoquota(cp)))
9bccf70c
A
2607 return(retval);
2608#endif /* QUOTA */
1c79356b 2609
9bccf70c
A
2610 /*
2611 * Lengthen the size of the file. We must ensure that the
2612 * last byte of the file is allocated. Since the smallest
2613 * value of ff_size is 0, length will be at least 1.
2614 */
91447636 2615 if (length > (off_t)fp->ff_size) {
9bccf70c 2616#if QUOTA
b4c24cb9 2617 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
91447636 2618 cred, 0);
9bccf70c
A
2619 if (retval)
2620 goto Err_Exit;
2621#endif /* QUOTA */
2622 /*
2623 * If we don't have enough physical space then
2624 * we need to extend the physical size.
2625 */
2626 if (length > filebytes) {
2627 int eflags;
b0d623f7 2628 u_int32_t blockHint = 0;
1c79356b 2629
9bccf70c
A
2630 /* All or nothing and don't round up to clumpsize. */
2631 eflags = kEFAllMask | kEFNoClumpMask;
1c79356b 2632
91447636 2633 if (cred && suser(cred, NULL) != 0)
9bccf70c 2634 eflags |= kEFReserveMask; /* keep a reserve */
1c79356b 2635
55e303ae
A
2636 /*
2637 * Allocate Journal and Quota files in metadata zone.
2638 */
2639 if (filebytes == 0 &&
2640 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2641 hfs_virtualmetafile(cp)) {
2642 eflags |= kEFMetadataMask;
2643 blockHint = hfsmp->hfs_metazone_start;
2644 }
91447636
A
2645 if (hfs_start_transaction(hfsmp) != 0) {
2646 retval = EINVAL;
2647 goto Err_Exit;
b4c24cb9
A
2648 }
2649
91447636
A
2650 /* Protect extents b-tree and allocation bitmap */
2651 lockflags = SFL_BITMAP;
2652 if (overflow_extents(fp))
2653 lockflags |= SFL_EXTENTS;
2654 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1c79356b 2655
9bccf70c
A
2656 while ((length > filebytes) && (retval == E_NONE)) {
2657 bytesToAdd = length - filebytes;
2658 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2659 (FCB*)fp,
1c79356b 2660 bytesToAdd,
55e303ae 2661 blockHint,
9bccf70c 2662 eflags,
1c79356b
A
2663 &actualBytesAdded));
2664
9bccf70c
A
2665 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2666 if (actualBytesAdded == 0 && retval == E_NONE) {
2667 if (length > filebytes)
2668 length = filebytes;
2669 break;
2670 }
2671 } /* endwhile */
b4c24cb9 2672
91447636 2673 hfs_systemfile_unlock(hfsmp, lockflags);
b4c24cb9 2674
b4c24cb9 2675 if (hfsmp->jnl) {
b0d623f7
A
2676 if (skipupdate) {
2677 (void) hfs_minorupdate(vp);
2678 }
2679 else {
2680 (void) hfs_update(vp, TRUE);
2681 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2682 }
91447636 2683 }
55e303ae 2684
91447636 2685 hfs_end_transaction(hfsmp);
b4c24cb9 2686
9bccf70c
A
2687 if (retval)
2688 goto Err_Exit;
2689
2690 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2691 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1c79356b 2692 }
1c79356b 2693
91447636 2694 if (!(flags & IO_NOZEROFILL)) {
2d21ac55 2695 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
9bccf70c 2696 struct rl_entry *invalid_range;
9bccf70c 2697 off_t zero_limit;
0b4e3aa0 2698
9bccf70c
A
2699 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2700 if (length < zero_limit) zero_limit = length;
2701
91447636
A
2702 if (length > (off_t)fp->ff_size) {
2703 struct timeval tv;
2704
9bccf70c
A
2705 /* Extending the file: time to fill out the current last page w. zeroes? */
2706 if ((fp->ff_size & PAGE_MASK_64) &&
2707 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2708 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
0b4e3aa0
A
2709
2710 /* There's some valid data at the start of the (current) last page
2711 of the file, so zero out the remainder of that page to ensure the
2712 entire page contains valid data. Since there is no invalid range
2713 possible past the (current) eof, there's no need to remove anything
91447636
A
2714 from the invalid range list before calling cluster_write(): */
2715 hfs_unlock(cp);
9bccf70c 2716 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
91447636
A
2717 fp->ff_size, (off_t)0,
2718 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2719 hfs_lock(cp, HFS_FORCE_LOCK);
0b4e3aa0
A
2720 if (retval) goto Err_Exit;
2721
2722 /* Merely invalidate the remaining area, if necessary: */
9bccf70c 2723 if (length > zero_limit) {
91447636 2724 microuptime(&tv);
9bccf70c 2725 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
91447636 2726 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
2727 }
2728 } else {
0b4e3aa0
A
2729 /* The page containing the (current) eof is invalid: just add the
2730 remainder of the page to the invalid list, along with the area
2731 being newly allocated:
2732 */
91447636 2733 microuptime(&tv);
9bccf70c 2734 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
91447636 2735 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
2736 };
2737 }
2738 } else {
2739 panic("hfs_truncate: invoked on non-UBC object?!");
2740 };
2741 }
91447636 2742 cp->c_touch_modtime = TRUE;
9bccf70c 2743 fp->ff_size = length;
0b4e3aa0 2744
9bccf70c 2745 } else { /* Shorten the size of the file */
0b4e3aa0 2746
91447636 2747 if ((off_t)fp->ff_size > length) {
9bccf70c
A
2748 /* Any space previously marked as invalid is now irrelevant: */
2749 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2750 }
1c79356b 2751
9bccf70c
A
2752 /*
2753 * Account for any unmapped blocks. Note that the new
2754 * file length can still end up with unmapped blocks.
2755 */
2756 if (fp->ff_unallocblocks > 0) {
2757 u_int32_t finalblks;
91447636 2758 u_int32_t loanedBlocks;
1c79356b 2759
91447636
A
2760 HFS_MOUNT_LOCK(hfsmp, TRUE);
2761
2762 loanedBlocks = fp->ff_unallocblocks;
2763 cp->c_blocks -= loanedBlocks;
2764 fp->ff_blocks -= loanedBlocks;
2765 fp->ff_unallocblocks = 0;
1c79356b 2766
91447636 2767 hfsmp->loanedBlocks -= loanedBlocks;
9bccf70c
A
2768
2769 finalblks = (length + blksize - 1) / blksize;
2770 if (finalblks > fp->ff_blocks) {
2771 /* calculate required unmapped blocks */
91447636
A
2772 loanedBlocks = finalblks - fp->ff_blocks;
2773 hfsmp->loanedBlocks += loanedBlocks;
2774
2775 fp->ff_unallocblocks = loanedBlocks;
2776 cp->c_blocks += loanedBlocks;
2777 fp->ff_blocks += loanedBlocks;
9bccf70c 2778 }
91447636 2779 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
9bccf70c 2780 }
1c79356b 2781
9bccf70c
A
2782 /*
2783 * For a TBE process the deallocation of the file blocks is
2784 * delayed until the file is closed. And hfs_close calls
2785 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2786 * isn't set, we make sure this isn't a TBE process.
2787 */
91447636 2788 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
9bccf70c
A
2789#if QUOTA
2790 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2791#endif /* QUOTA */
91447636
A
2792 if (hfs_start_transaction(hfsmp) != 0) {
2793 retval = EINVAL;
2794 goto Err_Exit;
2795 }
2796
2797 if (fp->ff_unallocblocks == 0) {
2798 /* Protect extents b-tree and allocation bitmap */
2799 lockflags = SFL_BITMAP;
2800 if (overflow_extents(fp))
2801 lockflags |= SFL_EXTENTS;
2802 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
b4c24cb9 2803
9bccf70c
A
2804 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2805 (FCB*)fp, length, false));
1c79356b 2806
91447636
A
2807 hfs_systemfile_unlock(hfsmp, lockflags);
2808 }
b4c24cb9 2809 if (hfsmp->jnl) {
ff6e181a
A
2810 if (retval == 0) {
2811 fp->ff_size = length;
2812 }
b0d623f7
A
2813 if (skipupdate) {
2814 (void) hfs_minorupdate(vp);
2815 }
2816 else {
2817 (void) hfs_update(vp, TRUE);
2818 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2819 }
b4c24cb9 2820 }
91447636 2821 hfs_end_transaction(hfsmp);
b4c24cb9 2822
9bccf70c
A
2823 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2824 if (retval)
2825 goto Err_Exit;
2826#if QUOTA
2827 /* These are bytesreleased */
2828 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2829#endif /* QUOTA */
2830 }
2831 /* Only set update flag if the logical length changes */
2d21ac55 2832 if ((off_t)fp->ff_size != length)
91447636 2833 cp->c_touch_modtime = TRUE;
9bccf70c 2834 fp->ff_size = length;
1c79356b 2835 }
b0d623f7
A
2836 if (cp->c_mode & (S_ISUID | S_ISGID)) {
2837 if (!vfs_context_issuser(context)) {
2838 cp->c_mode &= ~(S_ISUID | S_ISGID);
2839 skipupdate = 0;
2840 }
2841 }
2842 if (skipupdate) {
2843 retval = hfs_minorupdate(vp);
2844 }
2845 else {
2846 cp->c_touch_chgtime = TRUE; /* status changed */
2847 cp->c_touch_modtime = TRUE; /* file data was modified */
2848 retval = hfs_update(vp, MNT_WAIT);
2849 }
9bccf70c 2850 if (retval) {
0b4e3aa0 2851 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1c79356b 2852 -1, -1, -1, retval, 0);
9bccf70c 2853 }
1c79356b 2854
9bccf70c 2855Err_Exit:
1c79356b 2856
9bccf70c
A
2857 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2858 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1c79356b 2859
9bccf70c 2860 return (retval);
1c79356b
A
2861}
2862
2863
91447636 2864
55e303ae 2865/*
55e303ae
A
2866 * Truncate a cnode to at most length size, freeing (or adding) the
2867 * disk blocks.
2868 */
91447636
A
2869__private_extern__
2870int
2871hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
b0d623f7 2872 int skipupdate, vfs_context_t context)
55e303ae 2873{
55e303ae 2874 struct filefork *fp = VTOF(vp);
55e303ae 2875 off_t filebytes;
b0d623f7 2876 u_int32_t fileblocks;
91447636 2877 int blksize, error = 0;
3a60a9f5 2878 struct cnode *cp = VTOC(vp);
55e303ae 2879
2d21ac55
A
2880 /* Cannot truncate an HFS directory! */
2881 if (vnode_isdir(vp)) {
2882 return (EISDIR);
2883 }
2884 /* A swap file cannot change size. */
2885 if (vnode_isswap(vp) && (length != 0)) {
2886 return (EPERM);
2887 }
55e303ae 2888
55e303ae
A
2889 blksize = VTOVCB(vp)->blockSize;
2890 fileblocks = fp->ff_blocks;
2891 filebytes = (off_t)fileblocks * (off_t)blksize;
2892
2d21ac55
A
2893 //
2894 // Have to do this here so that we don't wind up with
2895 // i/o pending for blocks that are about to be released
2896 // if we truncate the file.
2897 //
2898 // If skipsetsize is set, then the caller is responsible
2899 // for the ubc_setsize.
2900 //
b0d623f7
A
2901 // Even if skipsetsize is set, if the length is zero we
2902 // want to call ubc_setsize() because as of SnowLeopard
2903 // it will no longer cause any page-ins and it will drop
2904 // any dirty pages so that we don't do any i/o that we
2905 // don't have to. This also prevents a race where i/o
2906 // for truncated blocks may overwrite later data if the
2907 // blocks get reallocated to a different file.
2908 //
2909 if (!skipsetsize || length == 0)
2d21ac55
A
2910 ubc_setsize(vp, length);
2911
55e303ae
A
2912 // have to loop truncating or growing files that are
2913 // really big because otherwise transactions can get
2914 // enormous and consume too many kernel resources.
91447636
A
2915
2916 if (length < filebytes) {
2917 while (filebytes > length) {
0c530ab8 2918 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
2919 filebytes -= HFS_BIGFILE_SIZE;
2920 } else {
2921 filebytes = length;
2922 }
3a60a9f5 2923 cp->c_flag |= C_FORCEUPDATE;
b0d623f7 2924 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
91447636
A
2925 if (error)
2926 break;
2927 }
2928 } else if (length > filebytes) {
2929 while (filebytes < length) {
0c530ab8 2930 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
2931 filebytes += HFS_BIGFILE_SIZE;
2932 } else {
2933 filebytes = length;
2934 }
3a60a9f5 2935 cp->c_flag |= C_FORCEUPDATE;
b0d623f7 2936 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
91447636
A
2937 if (error)
2938 break;
55e303ae 2939 }
91447636 2940 } else /* Same logical size */ {
55e303ae 2941
b0d623f7 2942 error = do_hfs_truncate(vp, length, flags, skipupdate, context);
91447636
A
2943 }
2944 /* Files that are changing size are not hot file candidates. */
2945 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2946 fp->ff_bytesread = 0;
55e303ae
A
2947 }
2948
91447636 2949 return (error);
55e303ae
A
2950}
2951
2952
1c79356b
A
2953
2954/*
91447636 2955 * Preallocate file storage space.
1c79356b 2956 */
91447636
A
2957int
2958hfs_vnop_allocate(struct vnop_allocate_args /* {
2959 vnode_t a_vp;
9bccf70c
A
2960 off_t a_length;
2961 u_int32_t a_flags;
2962 off_t *a_bytesallocated;
2963 off_t a_offset;
91447636
A
2964 vfs_context_t a_context;
2965 } */ *ap)
1c79356b 2966{
9bccf70c 2967 struct vnode *vp = ap->a_vp;
91447636
A
2968 struct cnode *cp;
2969 struct filefork *fp;
2970 ExtendedVCB *vcb;
9bccf70c
A
2971 off_t length = ap->a_length;
2972 off_t startingPEOF;
2973 off_t moreBytesRequested;
2974 off_t actualBytesAdded;
2975 off_t filebytes;
b0d623f7 2976 u_int32_t fileblocks;
9bccf70c 2977 int retval, retval2;
2d21ac55
A
2978 u_int32_t blockHint;
2979 u_int32_t extendFlags; /* For call to ExtendFileC */
b4c24cb9 2980 struct hfsmount *hfsmp;
91447636
A
2981 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2982 int lockflags;
2983
2984 *(ap->a_bytesallocated) = 0;
2985
2986 if (!vnode_isreg(vp))
2987 return (EISDIR);
2988 if (length < (off_t)0)
2989 return (EINVAL);
2d21ac55 2990
91447636 2991 cp = VTOC(vp);
2d21ac55
A
2992
2993 hfs_lock_truncate(cp, TRUE);
2994
2995 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2996 goto Err_Exit;
2997 }
2998
91447636 2999 fp = VTOF(vp);
b4c24cb9 3000 hfsmp = VTOHFS(vp);
91447636 3001 vcb = VTOVCB(vp);
9bccf70c 3002
9bccf70c 3003 fileblocks = fp->ff_blocks;
55e303ae 3004 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
9bccf70c 3005
91447636
A
3006 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
3007 retval = EINVAL;
3008 goto Err_Exit;
3009 }
0b4e3aa0 3010
9bccf70c 3011 /* Fill in the flags word for the call to Extend the file */
1c79356b 3012
55e303ae 3013 extendFlags = kEFNoClumpMask;
9bccf70c 3014 if (ap->a_flags & ALLOCATECONTIG)
1c79356b 3015 extendFlags |= kEFContigMask;
9bccf70c 3016 if (ap->a_flags & ALLOCATEALL)
1c79356b 3017 extendFlags |= kEFAllMask;
91447636 3018 if (cred && suser(cred, NULL) != 0)
9bccf70c 3019 extendFlags |= kEFReserveMask;
b0d623f7
A
3020 if (hfs_virtualmetafile(cp))
3021 extendFlags |= kEFMetadataMask;
1c79356b 3022
9bccf70c
A
3023 retval = E_NONE;
3024 blockHint = 0;
3025 startingPEOF = filebytes;
1c79356b 3026
9bccf70c
A
3027 if (ap->a_flags & ALLOCATEFROMPEOF)
3028 length += filebytes;
3029 else if (ap->a_flags & ALLOCATEFROMVOL)
3030 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1c79356b 3031
9bccf70c
A
3032 /* If no changes are necesary, then we're done */
3033 if (filebytes == length)
3034 goto Std_Exit;
1c79356b 3035
9bccf70c
A
3036 /*
3037 * Lengthen the size of the file. We must ensure that the
3038 * last byte of the file is allocated. Since the smallest
3039 * value of filebytes is 0, length will be at least 1.
3040 */
3041 if (length > filebytes) {
2d21ac55
A
3042 off_t total_bytes_added = 0, orig_request_size;
3043
3044 orig_request_size = moreBytesRequested = length - filebytes;
1c79356b 3045
9bccf70c 3046#if QUOTA
b4c24cb9 3047 retval = hfs_chkdq(cp,
55e303ae 3048 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
91447636 3049 cred, 0);
9bccf70c 3050 if (retval)
91447636 3051 goto Err_Exit;
9bccf70c
A
3052
3053#endif /* QUOTA */
55e303ae
A
3054 /*
3055 * Metadata zone checks.
3056 */
3057 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3058 /*
3059 * Allocate Journal and Quota files in metadata zone.
3060 */
3061 if (hfs_virtualmetafile(cp)) {
55e303ae
A
3062 blockHint = hfsmp->hfs_metazone_start;
3063 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
3064 (blockHint <= hfsmp->hfs_metazone_end)) {
3065 /*
3066 * Move blockHint outside metadata zone.
3067 */
3068 blockHint = hfsmp->hfs_metazone_end + 1;
3069 }
3070 }
3071
b4c24cb9 3072
2d21ac55
A
3073 while ((length > filebytes) && (retval == E_NONE)) {
3074 off_t bytesRequested;
3075
3076 if (hfs_start_transaction(hfsmp) != 0) {
3077 retval = EINVAL;
3078 goto Err_Exit;
3079 }
3080
3081 /* Protect extents b-tree and allocation bitmap */
3082 lockflags = SFL_BITMAP;
3083 if (overflow_extents(fp))
91447636 3084 lockflags |= SFL_EXTENTS;
2d21ac55
A
3085 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3086
3087 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
3088 bytesRequested = HFS_BIGFILE_SIZE;
3089 } else {
3090 bytesRequested = moreBytesRequested;
3091 }
1c79356b 3092
b0d623f7
A
3093 if (extendFlags & kEFContigMask) {
3094 // if we're on a sparse device, this will force it to do a
3095 // full scan to find the space needed.
3096 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
3097 }
3098
2d21ac55 3099 retval = MacToVFSError(ExtendFileC(vcb,
9bccf70c 3100 (FCB*)fp,
2d21ac55 3101 bytesRequested,
9bccf70c
A
3102 blockHint,
3103 extendFlags,
3104 &actualBytesAdded));
1c79356b 3105
2d21ac55
A
3106 if (retval == E_NONE) {
3107 *(ap->a_bytesallocated) += actualBytesAdded;
3108 total_bytes_added += actualBytesAdded;
3109 moreBytesRequested -= actualBytesAdded;
3110 if (blockHint != 0) {
3111 blockHint += actualBytesAdded / vcb->blockSize;
3112 }
3113 }
3114 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3115
3116 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 3117
2d21ac55 3118 if (hfsmp->jnl) {
91447636
A
3119 (void) hfs_update(vp, TRUE);
3120 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2d21ac55
A
3121 }
3122
3123 hfs_end_transaction(hfsmp);
b4c24cb9 3124 }
91447636 3125
b4c24cb9 3126
1c79356b
A
3127 /*
3128 * if we get an error and no changes were made then exit
91447636 3129 * otherwise we must do the hfs_update to reflect the changes
1c79356b 3130 */
9bccf70c
A
3131 if (retval && (startingPEOF == filebytes))
3132 goto Err_Exit;
1c79356b 3133
9bccf70c
A
3134 /*
3135 * Adjust actualBytesAdded to be allocation block aligned, not
3136 * clump size aligned.
3137 * NOTE: So what we are reporting does not affect reality
3138 * until the file is closed, when we truncate the file to allocation
3139 * block size.
3140 */
2d21ac55 3141 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
0b4e3aa0 3142 *(ap->a_bytesallocated) =
2d21ac55 3143 roundup(orig_request_size, (off_t)vcb->blockSize);
1c79356b 3144
9bccf70c 3145 } else { /* Shorten the size of the file */
1c79356b 3146
9bccf70c 3147 if (fp->ff_size > length) {
1c79356b
A
3148 /*
3149 * Any buffers that are past the truncation point need to be
91447636 3150 * invalidated (to maintain buffer cache consistency).
1c79356b 3151 */
1c79356b
A
3152 }
3153
b0d623f7 3154 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
55e303ae 3155 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
b4c24cb9 3156
1c79356b
A
3157 /*
3158 * if we get an error and no changes were made then exit
91447636 3159 * otherwise we must do the hfs_update to reflect the changes
1c79356b 3160 */
9bccf70c
A
3161 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
3162#if QUOTA
3163 /* These are bytesreleased */
3164 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
3165#endif /* QUOTA */
1c79356b 3166
9bccf70c
A
3167 if (fp->ff_size > filebytes) {
3168 fp->ff_size = filebytes;
1c79356b 3169
91447636
A
3170 hfs_unlock(cp);
3171 ubc_setsize(vp, fp->ff_size);
3172 hfs_lock(cp, HFS_FORCE_LOCK);
9bccf70c
A
3173 }
3174 }
1c79356b
A
3175
3176Std_Exit:
91447636
A
3177 cp->c_touch_chgtime = TRUE;
3178 cp->c_touch_modtime = TRUE;
3179 retval2 = hfs_update(vp, MNT_WAIT);
1c79356b 3180
9bccf70c
A
3181 if (retval == 0)
3182 retval = retval2;
1c79356b 3183Err_Exit:
2d21ac55 3184 hfs_unlock_truncate(cp, TRUE);
91447636 3185 hfs_unlock(cp);
9bccf70c 3186 return (retval);
1c79356b
A
3187}
3188
3189
9bccf70c 3190/*
91447636 3191 * Pagein for HFS filesystem
9bccf70c 3192 */
1c79356b 3193int
91447636
A
3194hfs_vnop_pagein(struct vnop_pagein_args *ap)
3195/*
3196 struct vnop_pagein_args {
3197 vnode_t a_vp,
1c79356b
A
3198 upl_t a_pl,
3199 vm_offset_t a_pl_offset,
3200 off_t a_f_offset,
3201 size_t a_size,
1c79356b 3202 int a_flags
91447636
A
3203 vfs_context_t a_context;
3204 };
3205*/
1c79356b 3206{
91447636 3207 vnode_t vp = ap->a_vp;
9bccf70c 3208 int error;
1c79356b 3209
b0d623f7
A
3210#if HFS_COMPRESSION
3211 if (VNODE_IS_RSRC(vp)) {
3212 /* allow pageins of the resource fork */
3213 } else {
3214 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
3215 if (compressed) {
3216 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
3217 if (compressed) {
3218 if (error == 0) {
3219 /* successful page-in, update the access time */
3220 VTOC(vp)->c_touch_acctime = TRUE;
3221
3222 /* compressed files are not hot file candidates */
3223 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3224 VTOF(vp)->ff_bytesread = 0;
3225 }
3226 }
3227 return error;
3228 }
3229 /* otherwise the file was converted back to a regular file while we were reading it */
3230 }
3231 }
3232#endif
3233
9bccf70c 3234 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
91447636 3235 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
55e303ae 3236 /*
91447636 3237 * Keep track of blocks read.
55e303ae 3238 */
2d21ac55 3239 if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
55e303ae 3240 struct cnode *cp;
91447636
A
3241 struct filefork *fp;
3242 int bytesread;
3243 int took_cnode_lock = 0;
55e303ae 3244
91447636
A
3245 cp = VTOC(vp);
3246 fp = VTOF(vp);
3247
3248 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
3249 bytesread = fp->ff_size;
3250 else
3251 bytesread = ap->a_size;
3252
3253 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2d21ac55 3254 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
91447636
A
3255 hfs_lock(cp, HFS_FORCE_LOCK);
3256 took_cnode_lock = 1;
3257 }
55e303ae
A
3258 /*
3259 * If this file hasn't been seen since the start of
3260 * the current sampling period then start over.
3261 */
91447636
A
3262 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
3263 struct timeval tv;
55e303ae 3264
91447636
A
3265 fp->ff_bytesread = bytesread;
3266 microtime(&tv);
3267 cp->c_atime = tv.tv_sec;
3268 } else {
3269 fp->ff_bytesread += bytesread;
3270 }
3271 cp->c_touch_acctime = TRUE;
3272 if (took_cnode_lock)
3273 hfs_unlock(cp);
55e303ae 3274 }
9bccf70c 3275 return (error);
1c79356b
A
3276}
3277
3278/*
91447636 3279 * Pageout for HFS filesystem.
1c79356b
A
3280 */
3281int
91447636
A
3282hfs_vnop_pageout(struct vnop_pageout_args *ap)
3283/*
3284 struct vnop_pageout_args {
3285 vnode_t a_vp,
1c79356b
A
3286 upl_t a_pl,
3287 vm_offset_t a_pl_offset,
3288 off_t a_f_offset,
3289 size_t a_size,
1c79356b 3290 int a_flags
91447636
A
3291 vfs_context_t a_context;
3292 };
3293*/
1c79356b 3294{
91447636
A
3295 vnode_t vp = ap->a_vp;
3296 struct cnode *cp;
3297 struct filefork *fp;
b0d623f7 3298 int retval = 0;
9bccf70c 3299 off_t filesize;
b0d623f7
A
3300 upl_t upl;
3301 upl_page_info_t* pl;
3302 vm_offset_t a_pl_offset;
3303 int a_flags;
3304 int is_pageoutv2 = 0;
b7266188 3305 kern_return_t kret;
1c79356b 3306
91447636 3307 cp = VTOC(vp);
91447636 3308 fp = VTOF(vp);
2d21ac55 3309
593a1d5f
A
3310 /*
3311 * Figure out where the file ends, for pageout purposes. If
3312 * ff_new_size > ff_size, then we're in the middle of extending the
3313 * file via a write, so it is safe (and necessary) that we be able
3314 * to pageout up to that point.
3315 */
3316 filesize = fp->ff_size;
3317 if (fp->ff_new_size > filesize)
3318 filesize = fp->ff_new_size;
b0d623f7
A
3319
3320 a_flags = ap->a_flags;
3321 a_pl_offset = ap->a_pl_offset;
3322
3323 /*
3324 * we can tell if we're getting the new or old behavior from the UPL
3325 */
3326 if ((upl = ap->a_pl) == NULL) {
3327 int request_flags;
3328
3329 is_pageoutv2 = 1;
3330 /*
3331 * we're in control of any UPL we commit
3332 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
3333 */
3334 a_flags &= ~UPL_NOCOMMIT;
3335 a_pl_offset = 0;
3336
3337 /*
3338 * take truncate lock (shared) to guard against
3339 * zero-fill thru fsync interfering, but only for v2
3340 */
3341 hfs_lock_truncate(cp, 0);
3342
3343 if (a_flags & UPL_MSYNC) {
3344 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
3345 }
3346 else {
3347 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
3348 }
b7266188 3349 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
b0d623f7 3350
b7266188 3351 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
b0d623f7
A
3352 retval = EINVAL;
3353 goto pageout_done;
3354 }
3355 }
3356 /*
3357 * from this point forward upl points at the UPL we're working with
3358 * it was either passed in or we succesfully created it
3359 */
3360
3361 /*
3362 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
3363 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
3364 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
3365 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
3366 * logic in vnode_pageout except that we need to do it after grabbing the truncate
3367 * lock in HFS so that we don't lock invert ourselves.
3368 *
3369 * Note that we can still get into this function on behalf of the default pager with
3370 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
3371 * since fsync and other writing threads will grab the locks, then mark the
3372 * relevant pages as busy. But the pageout codepath marks the pages as busy,
3373 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
3374 * we do not try to grab anything for the pre-V2 case, which should only be accessed
3375 * by the paging/VM system.
3376 */
3377
3378 if (is_pageoutv2) {
3379 off_t f_offset;
3380 int offset;
3381 int isize;
3382 int pg_index;
3383 int error;
3384 int error_ret = 0;
3385
3386 isize = ap->a_size;
3387 f_offset = ap->a_f_offset;
3388
3389 /*
3390 * Scan from the back to find the last page in the UPL, so that we
3391 * aren't looking at a UPL that may have already been freed by the
3392 * preceding aborts/completions.
3393 */
3394 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
3395 if (upl_page_present(pl, --pg_index))
3396 break;
3397 if (pg_index == 0) {
3398 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
3399 goto pageout_done;
2d21ac55 3400 }
2d21ac55 3401 }
b0d623f7
A
3402
3403 /*
3404 * initialize the offset variables before we touch the UPL.
3405 * a_f_offset is the position into the file, in bytes
3406 * offset is the position into the UPL, in bytes
3407 * pg_index is the pg# of the UPL we're operating on.
3408 * isize is the offset into the UPL of the last non-clean page.
3409 */
3410 isize = ((pg_index + 1) * PAGE_SIZE);
3411
3412 offset = 0;
3413 pg_index = 0;
3414
3415 while (isize) {
3416 int xsize;
3417 int num_of_pages;
3418
3419 if ( !upl_page_present(pl, pg_index)) {
3420 /*
3421 * we asked for RET_ONLY_DIRTY, so it's possible
3422 * to get back empty slots in the UPL.
3423 * just skip over them
3424 */
3425 f_offset += PAGE_SIZE;
3426 offset += PAGE_SIZE;
3427 isize -= PAGE_SIZE;
3428 pg_index++;
3429
3430 continue;
3431 }
3432 if ( !upl_dirty_page(pl, pg_index)) {
3433 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
3434 }
3435
3436 /*
3437 * We know that we have at least one dirty page.
3438 * Now checking to see how many in a row we have
3439 */
3440 num_of_pages = 1;
3441 xsize = isize - PAGE_SIZE;
3442
3443 while (xsize) {
3444 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
3445 break;
3446 num_of_pages++;
3447 xsize -= PAGE_SIZE;
3448 }
3449 xsize = num_of_pages * PAGE_SIZE;
3450
3451 if (!vnode_isswap(vp)) {
3452 off_t end_of_range;
3453 int tooklock;
3454
3455 tooklock = 0;
3456
3457 if (cp->c_lockowner != current_thread()) {
3458 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3459 /*
3460 * we're in the v2 path, so we are the
3461 * owner of the UPL... we may have already
3462 * processed some of the UPL, so abort it
3463 * from the current working offset to the
3464 * end of the UPL
3465 */
3466 ubc_upl_abort_range(upl,
3467 offset,
3468 ap->a_size - offset,
3469 UPL_ABORT_FREE_ON_EMPTY);
3470 goto pageout_done;
3471 }
3472 tooklock = 1;
3473 }
3474 end_of_range = f_offset + xsize - 1;
2d21ac55 3475
b0d623f7
A
3476 if (end_of_range >= filesize) {
3477 end_of_range = (off_t)(filesize - 1);
3478 }
3479 if (f_offset < filesize) {
3480 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
3481 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3482 }
3483 if (tooklock) {
3484 hfs_unlock(cp);
3485 }
3486 }
3487 if ((error = cluster_pageout(vp, upl, offset, f_offset,
3488 xsize, filesize, a_flags))) {
3489 if (error_ret == 0)
3490 error_ret = error;
3491 }
3492 f_offset += xsize;
3493 offset += xsize;
3494 isize -= xsize;
3495 pg_index += num_of_pages;
3496 }
3497 /* capture errnos bubbled out of cluster_pageout if they occurred */
3498 if (error_ret != 0) {
3499 retval = error_ret;
3500 }
3501 } /* end block for v2 pageout behavior */
3502 else {
3503 if (!vnode_isswap(vp)) {
3504 off_t end_of_range;
3505 int tooklock = 0;
3506
3507 if (cp->c_lockowner != current_thread()) {
3508 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3509 if (!(a_flags & UPL_NOCOMMIT)) {
3510 ubc_upl_abort_range(upl,
3511 a_pl_offset,
3512 ap->a_size,
3513 UPL_ABORT_FREE_ON_EMPTY);
3514 }
3515 goto pageout_done;
3516 }
3517 tooklock = 1;
3518 }
3519 end_of_range = ap->a_f_offset + ap->a_size - 1;
2d21ac55 3520
b0d623f7
A
3521 if (end_of_range >= filesize) {
3522 end_of_range = (off_t)(filesize - 1);
3523 }
3524 if (ap->a_f_offset < filesize) {
3525 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
3526 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3527 }
1c79356b 3528
b0d623f7
A
3529 if (tooklock) {
3530 hfs_unlock(cp);
3531 }
2d21ac55 3532 }
b0d623f7
A
3533 /*
3534 * just call cluster_pageout for old pre-v2 behavior
3535 */
3536 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
3537 ap->a_size, filesize, a_flags);
55e303ae 3538 }
0b4e3aa0 3539
1c79356b 3540 /*
b0d623f7
A
3541 * If data was written, update the modification time of the file.
3542 * If setuid or setgid bits are set and this process is not the
3543 * superuser then clear the setuid and setgid bits as a precaution
3544 * against tampering.
1c79356b 3545 */
b0d623f7
A
3546 if (retval == 0) {
3547 cp->c_touch_modtime = TRUE;
91447636 3548 cp->c_touch_chgtime = TRUE;
b0d623f7
A
3549 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
3550 (vfs_context_suser(ap->a_context) != 0)) {
3551 hfs_lock(cp, HFS_FORCE_LOCK);
3552 cp->c_mode &= ~(S_ISUID | S_ISGID);
3553 hfs_unlock(cp);
3554 }
3555 }
3556
3557pageout_done:
3558 if (is_pageoutv2) {
3559 /* release truncate lock (shared) */
3560 hfs_unlock_truncate(cp, 0);
91447636 3561 }
1c79356b
A
3562 return (retval);
3563}
3564
3565/*
3566 * Intercept B-Tree node writes to unswap them if necessary.
1c79356b
A
3567 */
3568int
91447636 3569hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
1c79356b 3570{
9bccf70c 3571 int retval = 0;
9bccf70c 3572 register struct buf *bp = ap->a_bp;
91447636 3573 register struct vnode *vp = buf_vnode(bp);
9bccf70c
A
3574 BlockDescriptor block;
3575
3576 /* Trap B-Tree writes */
3577 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
91447636 3578 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
0c530ab8
A
3579 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
3580 (vp == VTOHFS(vp)->hfc_filevp)) {
9bccf70c 3581
3a60a9f5
A
3582 /*
3583 * Swap and validate the node if it is in native byte order.
3584 * This is always be true on big endian, so we always validate
3585 * before writing here. On little endian, the node typically has
2d21ac55 3586 * been swapped and validated when it was written to the journal,
3a60a9f5
A
3587 * so we won't do anything here.
3588 */
2d21ac55 3589 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
9bccf70c
A
3590 /* Prepare the block pointer */
3591 block.blockHeader = bp;
91447636 3592 block.buffer = (char *)buf_dataptr(bp);
3a60a9f5 3593 block.blockNum = buf_lblkno(bp);
9bccf70c 3594 /* not found in cache ==> came from disk */
91447636
A
3595 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
3596 block.blockSize = buf_count(bp);
1c79356b 3597
9bccf70c 3598 /* Endian un-swap B-Tree node */
935ed37a 3599 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3a60a9f5
A
3600 if (retval)
3601 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
9bccf70c 3602 }
9bccf70c 3603 }
3a60a9f5 3604
9bccf70c 3605 /* This buffer shouldn't be locked anymore but if it is clear it */
91447636
A
3606 if ((buf_flags(bp) & B_LOCKED)) {
3607 // XXXdbg
3608 if (VTOHFS(vp)->jnl) {
2d21ac55 3609 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
91447636
A
3610 }
3611 buf_clearflags(bp, B_LOCKED);
9bccf70c
A
3612 }
3613 retval = vn_bwrite (ap);
1c79356b 3614
9bccf70c 3615 return (retval);
1c79356b 3616}
55e303ae
A
3617
3618/*
3619 * Relocate a file to a new location on disk
3620 * cnode must be locked on entry
3621 *
3622 * Relocation occurs by cloning the file's data from its
3623 * current set of blocks to a new set of blocks. During
3624 * the relocation all of the blocks (old and new) are
3625 * owned by the file.
3626 *
3627 * -----------------
3628 * |///////////////|
3629 * -----------------
3630 * 0 N (file offset)
3631 *
3632 * ----------------- -----------------
2d21ac55 3633 * |///////////////| | | STEP 1 (acquire new blocks)
55e303ae
A
3634 * ----------------- -----------------
3635 * 0 N N+1 2N
3636 *
3637 * ----------------- -----------------
3638 * |///////////////| |///////////////| STEP 2 (clone data)
3639 * ----------------- -----------------
3640 * 0 N N+1 2N
3641 *
3642 * -----------------
3643 * |///////////////| STEP 3 (head truncate blocks)
3644 * -----------------
3645 * 0 N
3646 *
3647 * During steps 2 and 3 page-outs to file offsets less
3648 * than or equal to N are suspended.
3649 *
2d21ac55 3650 * During step 3 page-ins to the file get suspended.
55e303ae
A
3651 */
3652__private_extern__
3653int
91447636
A
3654hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
3655 struct proc *p)
55e303ae 3656{
91447636 3657 struct cnode *cp;
55e303ae
A
3658 struct filefork *fp;
3659 struct hfsmount *hfsmp;
55e303ae
A
3660 u_int32_t headblks;
3661 u_int32_t datablks;
3662 u_int32_t blksize;
55e303ae
A
3663 u_int32_t growsize;
3664 u_int32_t nextallocsave;
91447636 3665 daddr64_t sector_a, sector_b;
55e303ae 3666 int eflags;
55e303ae 3667 off_t newbytes;
91447636
A
3668 int retval;
3669 int lockflags = 0;
3670 int took_trunc_lock = 0;
3671 int started_tr = 0;
3672 enum vtype vnodetype;
3673
3674 vnodetype = vnode_vtype(vp);
3675 if (vnodetype != VREG && vnodetype != VLNK) {
55e303ae
A
3676 return (EPERM);
3677 }
3678
3679 hfsmp = VTOHFS(vp);
3680 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
3681 return (ENOSPC);
3682 }
3683
91447636 3684 cp = VTOC(vp);
55e303ae
A
3685 fp = VTOF(vp);
3686 if (fp->ff_unallocblocks)
3687 return (EINVAL);
91447636 3688 blksize = hfsmp->blockSize;
55e303ae 3689 if (blockHint == 0)
91447636 3690 blockHint = hfsmp->nextAllocation;
55e303ae 3691
2d21ac55 3692 if ((fp->ff_size > 0x7fffffff) ||
91447636 3693 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
55e303ae
A
3694 return (EFBIG);
3695 }
3696
91447636
A
3697 //
3698 // We do not believe that this call to hfs_fsync() is
3699 // necessary and it causes a journal transaction
3700 // deadlock so we are removing it.
3701 //
3702 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3703 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3704 // if (retval)
3705 // return (retval);
3706 //}
3707
3708 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
3709 hfs_unlock(cp);
3710 hfs_lock_truncate(cp, TRUE);
2d21ac55
A
3711 /* Force lock since callers expects lock to be held. */
3712 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
3713 hfs_unlock_truncate(cp, TRUE);
91447636
A
3714 return (retval);
3715 }
2d21ac55
A
3716 /* No need to continue if file was removed. */
3717 if (cp->c_flag & C_NOEXISTS) {
3718 hfs_unlock_truncate(cp, TRUE);
3719 return (ENOENT);
3720 }
91447636
A
3721 took_trunc_lock = 1;
3722 }
55e303ae
A
3723 headblks = fp->ff_blocks;
3724 datablks = howmany(fp->ff_size, blksize);
3725 growsize = datablks * blksize;
55e303ae
A
3726 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
3727 if (blockHint >= hfsmp->hfs_metazone_start &&
3728 blockHint <= hfsmp->hfs_metazone_end)
3729 eflags |= kEFMetadataMask;
3730
91447636
A
3731 if (hfs_start_transaction(hfsmp) != 0) {
3732 if (took_trunc_lock)
2d21ac55 3733 hfs_unlock_truncate(cp, TRUE);
91447636 3734 return (EINVAL);
55e303ae 3735 }
91447636
A
3736 started_tr = 1;
3737 /*
3738 * Protect the extents b-tree and the allocation bitmap
3739 * during MapFileBlockC and ExtendFileC operations.
3740 */
3741 lockflags = SFL_BITMAP;
3742 if (overflow_extents(fp))
3743 lockflags |= SFL_EXTENTS;
3744 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 3745
91447636 3746 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
55e303ae
A
3747 if (retval) {
3748 retval = MacToVFSError(retval);
3749 goto out;
3750 }
3751
3752 /*
2d21ac55 3753 * STEP 1 - acquire new allocation blocks.
55e303ae 3754 */
91447636
A
3755 nextallocsave = hfsmp->nextAllocation;
3756 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
3757 if (eflags & kEFMetadataMask) {
3758 HFS_MOUNT_LOCK(hfsmp, TRUE);
2d21ac55
A
3759 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
3760 MarkVCBDirty(hfsmp);
91447636
A
3761 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3762 }
55e303ae
A
3763
3764 retval = MacToVFSError(retval);
3765 if (retval == 0) {
91447636 3766 cp->c_flag |= C_MODIFIED;
55e303ae
A
3767 if (newbytes < growsize) {
3768 retval = ENOSPC;
3769 goto restore;
3770 } else if (fp->ff_blocks < (headblks + datablks)) {
3771 printf("hfs_relocate: allocation failed");
3772 retval = ENOSPC;
3773 goto restore;
3774 }
3775
91447636 3776 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
55e303ae
A
3777 if (retval) {
3778 retval = MacToVFSError(retval);
3779 } else if ((sector_a + 1) == sector_b) {
3780 retval = ENOSPC;
3781 goto restore;
3782 } else if ((eflags & kEFMetadataMask) &&
593a1d5f 3783 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
55e303ae 3784 hfsmp->hfs_metazone_end)) {
b0d623f7 3785#if 0
2d21ac55
A
3786 const char * filestr;
3787 char emptystr = '\0';
3788
3789 if (cp->c_desc.cd_nameptr != NULL) {
3790 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
3791 } else if (vnode_name(vp) != NULL) {
3792 filestr = vnode_name(vp);
3793 } else {
3794 filestr = &emptystr;
3795 }
b0d623f7 3796#endif
55e303ae
A
3797 retval = ENOSPC;
3798 goto restore;
3799 }
3800 }
91447636
A
3801 /* Done with system locks and journal for now. */
3802 hfs_systemfile_unlock(hfsmp, lockflags);
3803 lockflags = 0;
3804 hfs_end_transaction(hfsmp);
3805 started_tr = 0;
3806
55e303ae
A
3807 if (retval) {
3808 /*
3809 * Check to see if failure is due to excessive fragmentation.
3810 */
91447636
A
3811 if ((retval == ENOSPC) &&
3812 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
55e303ae
A
3813 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
3814 }
3815 goto out;
3816 }
55e303ae 3817 /*
91447636 3818 * STEP 2 - clone file data into the new allocation blocks.
55e303ae
A
3819 */
3820
91447636 3821 if (vnodetype == VLNK)
55e303ae 3822 retval = hfs_clonelink(vp, blksize, cred, p);
91447636 3823 else if (vnode_issystem(vp))
55e303ae
A
3824 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
3825 else
91447636 3826 retval = hfs_clonefile(vp, headblks, datablks, blksize);
ccc36f2f 3827
91447636
A
3828 /* Start transaction for step 3 or for a restore. */
3829 if (hfs_start_transaction(hfsmp) != 0) {
3830 retval = EINVAL;
3831 goto out;
3832 }
3833 started_tr = 1;
55e303ae
A
3834 if (retval)
3835 goto restore;
55e303ae
A
3836
3837 /*
91447636 3838 * STEP 3 - switch to cloned data and remove old blocks.
55e303ae 3839 */
91447636
A
3840 lockflags = SFL_BITMAP;
3841 if (overflow_extents(fp))
3842 lockflags |= SFL_EXTENTS;
3843 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 3844
91447636 3845 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
55e303ae 3846
91447636
A
3847 hfs_systemfile_unlock(hfsmp, lockflags);
3848 lockflags = 0;
55e303ae
A
3849 if (retval)
3850 goto restore;
55e303ae 3851out:
91447636 3852 if (took_trunc_lock)
2d21ac55 3853 hfs_unlock_truncate(cp, TRUE);
55e303ae 3854
91447636
A
3855 if (lockflags) {
3856 hfs_systemfile_unlock(hfsmp, lockflags);
3857 lockflags = 0;
ccc36f2f
A
3858 }
3859
0c530ab8
A
3860 /* Push cnode's new extent data to disk. */
3861 if (retval == 0) {
3862 (void) hfs_update(vp, MNT_WAIT);
3863 }
55e303ae 3864 if (hfsmp->jnl) {
91447636 3865 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
55e303ae
A
3866 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3867 else
3868 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
55e303ae 3869 }
91447636 3870exit:
91447636
A
3871 if (started_tr)
3872 hfs_end_transaction(hfsmp);
55e303ae
A
3873
3874 return (retval);
3875
3876restore:
2d21ac55
A
3877 if (fp->ff_blocks == headblks) {
3878 if (took_trunc_lock)
3879 hfs_unlock_truncate(cp, TRUE);
91447636 3880 goto exit;
2d21ac55 3881 }
55e303ae
A
3882 /*
3883 * Give back any newly allocated space.
3884 */
91447636
A
3885 if (lockflags == 0) {
3886 lockflags = SFL_BITMAP;
3887 if (overflow_extents(fp))
3888 lockflags |= SFL_EXTENTS;
3889 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3890 }
3891
3892 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
3893
3894 hfs_systemfile_unlock(hfsmp, lockflags);
3895 lockflags = 0;
3896
3897 if (took_trunc_lock)
2d21ac55 3898 hfs_unlock_truncate(cp, TRUE);
91447636 3899 goto exit;
55e303ae
A
3900}
3901
3902
3903/*
3904 * Clone a symlink.
3905 *
3906 */
3907static int
2d21ac55 3908hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
55e303ae
A
3909{
3910 struct buf *head_bp = NULL;
3911 struct buf *tail_bp = NULL;
3912 int error;
3913
3914
91447636 3915 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
55e303ae
A
3916 if (error)
3917 goto out;
3918
91447636 3919 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
55e303ae
A
3920 if (tail_bp == NULL) {
3921 error = EIO;
3922 goto out;
3923 }
91447636
A
3924 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
3925 error = (int)buf_bwrite(tail_bp);
55e303ae
A
3926out:
3927 if (head_bp) {
91447636
A
3928 buf_markinvalid(head_bp);
3929 buf_brelse(head_bp);
55e303ae 3930 }
91447636 3931 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
55e303ae
A
3932
3933 return (error);
3934}
3935
3936/*
3937 * Clone a file's data within the file.
3938 *
3939 */
3940static int
91447636 3941hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
55e303ae
A
3942{
3943 caddr_t bufp;
55e303ae
A
3944 size_t bufsize;
3945 size_t copysize;
3946 size_t iosize;
55e303ae 3947 size_t offset;
b0d623f7 3948 off_t writebase;
91447636
A
3949 uio_t auio;
3950 int error = 0;
55e303ae 3951
55e303ae
A
3952 writebase = blkstart * blksize;
3953 copysize = blkcnt * blksize;
0c530ab8 3954 iosize = bufsize = MIN(copysize, 128 * 1024);
55e303ae
A
3955 offset = 0;
3956
3957 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3958 return (ENOMEM);
3959 }
91447636 3960 hfs_unlock(VTOC(vp));
55e303ae 3961
b0d623f7 3962 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
55e303ae
A
3963
3964 while (offset < copysize) {
3965 iosize = MIN(copysize - offset, iosize);
3966
b0d623f7 3967 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
91447636 3968 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 3969
2d21ac55 3970 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
55e303ae
A
3971 if (error) {
3972 printf("hfs_clonefile: cluster_read failed - %d\n", error);
3973 break;
3974 }
91447636 3975 if (uio_resid(auio) != 0) {
b0d623f7 3976 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
55e303ae
A
3977 error = EIO;
3978 break;
3979 }
3980
b0d623f7 3981 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
91447636 3982 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 3983
b0d623f7
A
3984 error = cluster_write(vp, auio, writebase + offset,
3985 writebase + offset + iosize,
91447636 3986 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
55e303ae
A
3987 if (error) {
3988 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3989 break;
3990 }
91447636 3991 if (uio_resid(auio) != 0) {
55e303ae
A
3992 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3993 error = EIO;
3994 break;
3995 }
3996 offset += iosize;
3997 }
91447636
A
3998 uio_free(auio);
3999
b0d623f7
A
4000 if ((blksize & PAGE_MASK)) {
4001 /*
4002 * since the copy may not have started on a PAGE
4003 * boundary (or may not have ended on one), we
4004 * may have pages left in the cache since NOCACHE
4005 * will let partially written pages linger...
4006 * lets just flush the entire range to make sure
4007 * we don't have any pages left that are beyond
4008 * (or intersect) the real LEOF of this file
4009 */
4010 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
4011 } else {
4012 /*
4013 * No need to call ubc_sync_range or hfs_invalbuf
4014 * since the file was copied using IO_NOCACHE and
4015 * the copy was done starting and ending on a page
4016 * boundary in the file.
4017 */
4018 }
55e303ae 4019 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
91447636
A
4020
4021 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
55e303ae
A
4022 return (error);
4023}
4024
4025/*
4026 * Clone a system (metadata) file.
4027 *
4028 */
4029static int
4030hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
91447636 4031 kauth_cred_t cred, struct proc *p)
55e303ae
A
4032{
4033 caddr_t bufp;
4034 char * offset;
4035 size_t bufsize;
4036 size_t iosize;
4037 struct buf *bp = NULL;
91447636
A
4038 daddr64_t blkno;
4039 daddr64_t blk;
4040 daddr64_t start_blk;
4041 daddr64_t last_blk;
55e303ae
A
4042 int breadcnt;
4043 int i;
4044 int error = 0;
4045
4046
4047 iosize = GetLogicalBlockSize(vp);
4048 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
4049 breadcnt = bufsize / iosize;
4050
4051 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
4052 return (ENOMEM);
4053 }
91447636
A
4054 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
4055 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
55e303ae
A
4056 blkno = 0;
4057
91447636 4058 while (blkno < last_blk) {
55e303ae
A
4059 /*
4060 * Read up to a megabyte
4061 */
4062 offset = bufp;
91447636
A
4063 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
4064 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
55e303ae
A
4065 if (error) {
4066 printf("hfs_clonesysfile: meta_bread error %d\n", error);
4067 goto out;
4068 }
91447636
A
4069 if (buf_count(bp) != iosize) {
4070 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
55e303ae
A
4071 goto out;
4072 }
91447636
A
4073 bcopy((char *)buf_dataptr(bp), offset, iosize);
4074
4075 buf_markinvalid(bp);
4076 buf_brelse(bp);
55e303ae 4077 bp = NULL;
91447636 4078
55e303ae
A
4079 offset += iosize;
4080 }
4081
4082 /*
4083 * Write up to a megabyte
4084 */
4085 offset = bufp;
91447636
A
4086 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
4087 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
55e303ae 4088 if (bp == NULL) {
91447636 4089 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
55e303ae
A
4090 error = EIO;
4091 goto out;
4092 }
91447636
A
4093 bcopy(offset, (char *)buf_dataptr(bp), iosize);
4094 error = (int)buf_bwrite(bp);
55e303ae
A
4095 bp = NULL;
4096 if (error)
4097 goto out;
4098 offset += iosize;
4099 }
4100 }
4101out:
4102 if (bp) {
91447636 4103 buf_brelse(bp);
55e303ae
A
4104 }
4105
4106 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
4107
91447636 4108 error = hfs_fsync(vp, MNT_WAIT, 0, p);
55e303ae
A
4109
4110 return (error);
4111}