]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_readwrite.c
xnu-1228.7.58.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
CommitLineData
1c79356b 1/*
935ed37a 2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* @(#)hfs_readwrite.c 1.0
29 *
9bccf70c 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
1c79356b 31 *
1c79356b
A
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
1c79356b
A
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/resourcevar.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
55e303ae 41#include <sys/filedesc.h>
1c79356b
A
42#include <sys/stat.h>
43#include <sys/buf.h>
44#include <sys/proc.h>
91447636 45#include <sys/kauth.h>
1c79356b 46#include <sys/vnode.h>
2d21ac55 47#include <sys/vnode_internal.h>
1c79356b 48#include <sys/uio.h>
91447636 49#include <sys/vfs_context.h>
2d21ac55
A
50#include <sys/fsevents.h>
51#include <kern/kalloc.h>
8f6c56a5
A
52#include <sys/disk.h>
53#include <sys/sysctl.h>
1c79356b
A
54
55#include <miscfs/specfs/specdev.h>
56
1c79356b 57#include <sys/ubc.h>
2d21ac55
A
58#include <sys/ubc_internal.h>
59
1c79356b 60#include <vm/vm_pageout.h>
91447636 61#include <vm/vm_kern.h>
1c79356b 62
1c79356b
A
63#include <sys/kdebug.h>
64
65#include "hfs.h"
2d21ac55 66#include "hfs_attrlist.h"
1c79356b 67#include "hfs_endian.h"
2d21ac55 68#include "hfs_fsctl.h"
9bccf70c 69#include "hfs_quota.h"
1c79356b
A
70#include "hfscommon/headers/FileMgrInternal.h"
71#include "hfscommon/headers/BTreesInternal.h"
9bccf70c
A
72#include "hfs_cnode.h"
73#include "hfs_dbg.h"
1c79356b 74
1c79356b
A
75#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
76
77enum {
78 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
79};
80
2d21ac55
A
81/* from bsd/vfs/vfs_cluster.c */
82extern int is_file_clean(vnode_t vp, off_t filesize);
935ed37a
A
83/* from bsd/hfs/hfs_vfsops.c */
84extern int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
91447636
A
85
86static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
87static int hfs_clonefile(struct vnode *, int, int, int);
88static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
55e303ae 89
8f6c56a5
A
90int flush_cache_on_write = 0;
91SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
92
93
91447636
A
94/*
95 * Read data from a file.
96 */
1c79356b 97int
91447636 98hfs_vnop_read(struct vnop_read_args *ap)
1c79356b 99{
91447636
A
100 uio_t uio = ap->a_uio;
101 struct vnode *vp = ap->a_vp;
9bccf70c
A
102 struct cnode *cp;
103 struct filefork *fp;
91447636
A
104 struct hfsmount *hfsmp;
105 off_t filesize;
106 off_t filebytes;
107 off_t start_resid = uio_resid(uio);
108 off_t offset = uio_offset(uio);
9bccf70c 109 int retval = 0;
55e303ae 110
9bccf70c
A
111
112 /* Preflight checks */
91447636
A
113 if (!vnode_isreg(vp)) {
114 /* can only read regular files */
115 if (vnode_isdir(vp))
116 return (EISDIR);
117 else
118 return (EPERM);
119 }
120 if (start_resid == 0)
9bccf70c 121 return (0); /* Nothing left to do */
91447636 122 if (offset < 0)
9bccf70c
A
123 return (EINVAL); /* cant read from a negative offset */
124
125 cp = VTOC(vp);
126 fp = VTOF(vp);
91447636
A
127 hfsmp = VTOHFS(vp);
128
129 /* Protect against a size change. */
130 hfs_lock_truncate(cp, 0);
131
9bccf70c 132 filesize = fp->ff_size;
91447636
A
133 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
134 if (offset > filesize) {
135 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
136 (offset > (off_t)MAXHFSFILESIZE)) {
137 retval = EFBIG;
138 }
139 goto exit;
9bccf70c 140 }
1c79356b 141
9bccf70c 142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
91447636 143 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 144
2d21ac55 145 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
1c79356b 146
91447636 147 cp->c_touch_acctime = TRUE;
1c79356b 148
9bccf70c 149 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
91447636 150 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 151
55e303ae
A
152 /*
153 * Keep track blocks read
154 */
2d21ac55 155 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
91447636
A
156 int took_cnode_lock = 0;
157 off_t bytesread;
158
159 bytesread = start_resid - uio_resid(uio);
160
161 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
162 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
163 hfs_lock(cp, HFS_FORCE_LOCK);
164 took_cnode_lock = 1;
165 }
55e303ae
A
166 /*
167 * If this file hasn't been seen since the start of
168 * the current sampling period then start over.
169 */
2d21ac55 170 if (cp->c_atime < hfsmp->hfc_timebase) {
91447636
A
171 struct timeval tv;
172
173 fp->ff_bytesread = bytesread;
174 microtime(&tv);
175 cp->c_atime = tv.tv_sec;
55e303ae 176 } else {
91447636 177 fp->ff_bytesread += bytesread;
55e303ae 178 }
91447636
A
179 if (took_cnode_lock)
180 hfs_unlock(cp);
55e303ae 181 }
91447636 182exit:
2d21ac55 183 hfs_unlock_truncate(cp, 0);
9bccf70c 184 return (retval);
1c79356b
A
185}
186
187/*
91447636
A
188 * Write data to a file.
189 */
1c79356b 190int
91447636 191hfs_vnop_write(struct vnop_write_args *ap)
1c79356b 192{
91447636 193 uio_t uio = ap->a_uio;
9bccf70c 194 struct vnode *vp = ap->a_vp;
9bccf70c
A
195 struct cnode *cp;
196 struct filefork *fp;
91447636
A
197 struct hfsmount *hfsmp;
198 kauth_cred_t cred = NULL;
199 off_t origFileSize;
200 off_t writelimit;
2d21ac55 201 off_t bytesToAdd = 0;
55e303ae 202 off_t actualBytesAdded;
9bccf70c 203 off_t filebytes;
91447636
A
204 off_t offset;
205 size_t resid;
206 int eflags;
207 int ioflag = ap->a_ioflag;
208 int retval = 0;
209 int lockflags;
210 int cnode_locked = 0;
2d21ac55
A
211 int partialwrite = 0;
212 int exclusive_lock = 0;
1c79356b 213
91447636
A
214 // LP64todo - fix this! uio_resid may be 64-bit value
215 resid = uio_resid(uio);
216 offset = uio_offset(uio);
1c79356b 217
2d21ac55
A
218 if (ioflag & IO_APPEND) {
219 exclusive_lock = 1;
220 }
221
91447636 222 if (offset < 0)
9bccf70c 223 return (EINVAL);
91447636 224 if (resid == 0)
9bccf70c 225 return (E_NONE);
91447636
A
226 if (!vnode_isreg(vp))
227 return (EPERM); /* Can only write regular files */
228
9bccf70c
A
229 cp = VTOC(vp);
230 fp = VTOF(vp);
91447636 231 hfsmp = VTOHFS(vp);
b4c24cb9 232
9bccf70c 233 eflags = kEFDeferMask; /* defer file block allocations */
55e303ae
A
234#ifdef HFS_SPARSE_DEV
235 /*
236 * When the underlying device is sparse and space
237 * is low (< 8MB), stop doing delayed allocations
238 * and begin doing synchronous I/O.
239 */
240 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
241 (hfs_freeblks(hfsmp, 0) < 2048)) {
242 eflags &= ~kEFDeferMask;
243 ioflag |= IO_SYNC;
244 }
245#endif /* HFS_SPARSE_DEV */
246
2d21ac55
A
247again:
248 /* Protect against a size change. */
249 hfs_lock_truncate(cp, exclusive_lock);
91447636 250
2d21ac55
A
251 if (ioflag & IO_APPEND) {
252 uio_setoffset(uio, fp->ff_size);
253 offset = fp->ff_size;
254 }
255 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
256 retval = EPERM;
257 goto exit;
258 }
91447636 259
2d21ac55 260 origFileSize = fp->ff_size;
91447636 261 writelimit = offset + resid;
2d21ac55
A
262 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
263
264 /* If the truncate lock is shared, and if we either have virtual
265 * blocks or will need to extend the file, upgrade the truncate
266 * to exclusive lock. If upgrade fails, we lose the lock and
267 * have to get exclusive lock again
268 */
269 if ((exclusive_lock == 0) &&
270 ((fp->ff_unallocblocks != 0) || (writelimit > filebytes))) {
271 exclusive_lock = 1;
272 /* Lock upgrade failed and we lost our shared lock, try again */
273 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
274 goto again;
275 }
276 }
277
278 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
279 goto exit;
280 }
281 cnode_locked = 1;
282
283 if (!exclusive_lock) {
284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
285 (int)offset, uio_resid(uio), (int)fp->ff_size,
286 (int)filebytes, 0);
287 }
288
289 /* Check if we do not need to extend the file */
290 if (writelimit <= filebytes) {
91447636 291 goto sizeok;
2d21ac55 292 }
91447636
A
293
294 cred = vfs_context_ucred(ap->a_context);
91447636 295 bytesToAdd = writelimit - filebytes;
2d21ac55
A
296
297#if QUOTA
91447636
A
298 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
299 cred, 0);
300 if (retval)
301 goto exit;
302#endif /* QUOTA */
303
304 if (hfs_start_transaction(hfsmp) != 0) {
305 retval = EINVAL;
306 goto exit;
b4c24cb9
A
307 }
308
9bccf70c 309 while (writelimit > filebytes) {
9bccf70c 310 bytesToAdd = writelimit - filebytes;
91447636 311 if (cred && suser(cred, NULL) != 0)
9bccf70c
A
312 eflags |= kEFReserveMask;
313
91447636
A
314 /* Protect extents b-tree and allocation bitmap */
315 lockflags = SFL_BITMAP;
316 if (overflow_extents(fp))
317 lockflags |= SFL_EXTENTS;
318 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae
A
319
320 /* Files that are changing size are not hot file candidates. */
321 if (hfsmp->hfc_stage == HFC_RECORDING) {
322 fp->ff_bytesread = 0;
323 }
91447636 324 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
9bccf70c
A
325 0, eflags, &actualBytesAdded));
326
91447636
A
327 hfs_systemfile_unlock(hfsmp, lockflags);
328
9bccf70c
A
329 if ((actualBytesAdded == 0) && (retval == E_NONE))
330 retval = ENOSPC;
331 if (retval != E_NONE)
332 break;
91447636 333 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
9bccf70c 334 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
91447636 335 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
b4c24cb9 336 }
91447636
A
337 (void) hfs_update(vp, TRUE);
338 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
339 (void) hfs_end_transaction(hfsmp);
b4c24cb9 340
2d21ac55
A
341 /*
342 * If we didn't grow the file enough try a partial write.
343 * POSIX expects this behavior.
344 */
345 if ((retval == ENOSPC) && (filebytes > offset)) {
346 retval = 0;
347 partialwrite = 1;
348 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
349 resid -= bytesToAdd;
350 writelimit = filebytes;
351 }
91447636 352sizeok:
55e303ae 353 if (retval == E_NONE) {
0b4e3aa0
A
354 off_t filesize;
355 off_t zero_off;
356 off_t tail_off;
357 off_t inval_start;
358 off_t inval_end;
91447636 359 off_t io_start;
0b4e3aa0
A
360 int lflag;
361 struct rl_entry *invalid_range;
362
9bccf70c 363 if (writelimit > fp->ff_size)
0b4e3aa0
A
364 filesize = writelimit;
365 else
9bccf70c 366 filesize = fp->ff_size;
1c79356b 367
2d21ac55 368 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
1c79356b 369
91447636
A
370 if (offset <= fp->ff_size) {
371 zero_off = offset & ~PAGE_MASK_64;
0b4e3aa0
A
372
373 /* Check to see whether the area between the zero_offset and the start
374 of the transfer to see whether is invalid and should be zero-filled
375 as part of the transfer:
376 */
91447636
A
377 if (offset > zero_off) {
378 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
55e303ae
A
379 lflag |= IO_HEADZEROFILL;
380 }
0b4e3aa0 381 } else {
9bccf70c 382 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
0b4e3aa0 383
9bccf70c 384 /* The bytes between fp->ff_size and uio->uio_offset must never be
0b4e3aa0
A
385 read without being zeroed. The current last block is filled with zeroes
386 if it holds valid data but in all cases merely do a little bookkeeping
387 to track the area from the end of the current last page to the start of
388 the area actually written. For the same reason only the bytes up to the
389 start of the page where this write will start is invalidated; any remainder
390 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
391
392 Note that inval_start, the start of the page after the current EOF,
393 may be past the start of the write, in which case the zeroing
394 will be handled by the cluser_write of the actual data.
395 */
9bccf70c 396 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
91447636 397 inval_end = offset & ~PAGE_MASK_64;
9bccf70c 398 zero_off = fp->ff_size;
0b4e3aa0 399
9bccf70c
A
400 if ((fp->ff_size & PAGE_MASK_64) &&
401 (rl_scan(&fp->ff_invalidranges,
0b4e3aa0 402 eof_page_base,
9bccf70c 403 fp->ff_size - 1,
0b4e3aa0
A
404 &invalid_range) != RL_NOOVERLAP)) {
405 /* The page containing the EOF is not valid, so the
406 entire page must be made inaccessible now. If the write
407 starts on a page beyond the page containing the eof
408 (inval_end > eof_page_base), add the
409 whole page to the range to be invalidated. Otherwise
410 (i.e. if the write starts on the same page), zero-fill
411 the entire page explicitly now:
412 */
413 if (inval_end > eof_page_base) {
414 inval_start = eof_page_base;
415 } else {
416 zero_off = eof_page_base;
417 };
418 };
419
420 if (inval_start < inval_end) {
91447636 421 struct timeval tv;
0b4e3aa0
A
422 /* There's some range of data that's going to be marked invalid */
423
424 if (zero_off < inval_start) {
425 /* The pages between inval_start and inval_end are going to be invalidated,
426 and the actual write will start on a page past inval_end. Now's the last
427 chance to zero-fill the page containing the EOF:
428 */
91447636
A
429 hfs_unlock(cp);
430 cnode_locked = 0;
431 retval = cluster_write(vp, (uio_t) 0,
9bccf70c 432 fp->ff_size, inval_start,
91447636 433 zero_off, (off_t)0,
9bccf70c 434 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
91447636
A
435 hfs_lock(cp, HFS_FORCE_LOCK);
436 cnode_locked = 1;
0b4e3aa0 437 if (retval) goto ioerr_exit;
91447636 438 offset = uio_offset(uio);
0b4e3aa0
A
439 };
440
441 /* Mark the remaining area of the newly allocated space as invalid: */
9bccf70c 442 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
91447636
A
443 microuptime(&tv);
444 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c 445 zero_off = fp->ff_size = inval_end;
0b4e3aa0
A
446 };
447
91447636 448 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
0b4e3aa0 449 };
1c79356b 450
0b4e3aa0
A
451 /* Check to see whether the area between the end of the write and the end of
452 the page it falls in is invalid and should be zero-filled as part of the transfer:
453 */
454 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
455 if (tail_off > filesize) tail_off = filesize;
456 if (tail_off > writelimit) {
9bccf70c 457 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
0b4e3aa0
A
458 lflag |= IO_TAILZEROFILL;
459 };
460 };
461
462 /*
463 * if the write starts beyond the current EOF (possibly advanced in the
464 * zeroing of the last block, above), then we'll zero fill from the current EOF
465 * to where the write begins:
466 *
467 * NOTE: If (and ONLY if) the portion of the file about to be written is
468 * before the current EOF it might be marked as invalid now and must be
469 * made readable (removed from the invalid ranges) before cluster_write
470 * tries to write it:
471 */
91447636 472 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
9bccf70c 473 if (io_start < fp->ff_size) {
91447636
A
474 off_t io_end;
475
476 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
9bccf70c 477 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
0b4e3aa0 478 };
91447636
A
479
480 hfs_unlock(cp);
481 cnode_locked = 0;
9bccf70c 482 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
91447636 483 tail_off, lflag | IO_NOZERODIRTY);
2d21ac55
A
484 if (retval) {
485 goto ioerr_exit;
486 }
91447636
A
487 offset = uio_offset(uio);
488 if (offset > fp->ff_size) {
489 fp->ff_size = offset;
1c79356b 490
9bccf70c 491 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
91447636
A
492 /* Files that are changing size are not hot file candidates. */
493 if (hfsmp->hfc_stage == HFC_RECORDING)
494 fp->ff_bytesread = 0;
495 }
496 if (resid > uio_resid(uio)) {
497 cp->c_touch_chgtime = TRUE;
498 cp->c_touch_modtime = TRUE;
0b4e3aa0 499 }
9bccf70c 500 }
2d21ac55
A
501 if (partialwrite) {
502 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
503 resid += bytesToAdd;
504 }
8f6c56a5 505
2d21ac55 506 // XXXdbg - see radar 4871353 for more info
8f6c56a5
A
507 {
508 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
509 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
510 }
511 }
55e303ae
A
512 HFS_KNOTE(vp, NOTE_WRITE);
513
0b4e3aa0 514ioerr_exit:
9bccf70c 515 /*
0b4e3aa0 516 * If we successfully wrote any data, and we are not the superuser
9bccf70c
A
517 * we clear the setuid and setgid bits as a precaution against
518 * tampering.
519 */
91447636
A
520 if (cp->c_mode & (S_ISUID | S_ISGID)) {
521 cred = vfs_context_ucred(ap->a_context);
522 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
523 if (!cnode_locked) {
524 hfs_lock(cp, HFS_FORCE_LOCK);
525 cnode_locked = 1;
526 }
527 cp->c_mode &= ~(S_ISUID | S_ISGID);
528 }
529 }
9bccf70c
A
530 if (retval) {
531 if (ioflag & IO_UNIT) {
91447636
A
532 if (!cnode_locked) {
533 hfs_lock(cp, HFS_FORCE_LOCK);
534 cnode_locked = 1;
535 }
536 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
537 0, ap->a_context);
538 // LP64todo - fix this! resid needs to by user_ssize_t
539 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
540 uio_setresid(uio, resid);
541 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
542 }
543 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
544 if (!cnode_locked) {
545 hfs_lock(cp, HFS_FORCE_LOCK);
546 cnode_locked = 1;
9bccf70c 547 }
91447636 548 retval = hfs_update(vp, TRUE);
9bccf70c 549 }
91447636
A
550 /* Updating vcbWrCnt doesn't need to be atomic. */
551 hfsmp->vcbWrCnt++;
1c79356b 552
9bccf70c 553 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
91447636
A
554 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
555exit:
556 if (cnode_locked)
557 hfs_unlock(cp);
2d21ac55 558 hfs_unlock_truncate(cp, exclusive_lock);
9bccf70c 559 return (retval);
1c79356b
A
560}
561
91447636 562/* support for the "bulk-access" fcntl */
1c79356b 563
91447636 564#define CACHE_LEVELS 16
2d21ac55 565#define NUM_CACHE_ENTRIES (64*16)
91447636
A
566#define PARENT_IDS_FLAG 0x100
567
91447636
A
568struct access_cache {
569 int numcached;
570 int cachehits; /* these two for statistics gathering */
571 int lookups;
572 unsigned int *acache;
2d21ac55 573 unsigned char *haveaccess;
55e303ae
A
574};
575
91447636
A
576struct access_t {
577 uid_t uid; /* IN: effective user id */
578 short flags; /* IN: access requested (i.e. R_OK) */
579 short num_groups; /* IN: number of groups user belongs to */
580 int num_files; /* IN: number of files to process */
581 int *file_ids; /* IN: array of file ids */
582 gid_t *groups; /* IN: array of groups */
583 short *access; /* OUT: access info for each file (0 for 'has access') */
584};
55e303ae 585
91447636
A
586struct user_access_t {
587 uid_t uid; /* IN: effective user id */
588 short flags; /* IN: access requested (i.e. R_OK) */
589 short num_groups; /* IN: number of groups user belongs to */
2d21ac55 590 int num_files; /* IN: number of files to process */
91447636
A
591 user_addr_t file_ids; /* IN: array of file ids */
592 user_addr_t groups; /* IN: array of groups */
593 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
594};
55e303ae 595
2d21ac55
A
596
597// these are the "extended" versions of the above structures
598// note that it is crucial that they be different sized than
599// the regular version
600struct ext_access_t {
601 uint32_t flags; /* IN: access requested (i.e. R_OK) */
602 uint32_t num_files; /* IN: number of files to process */
603 uint32_t map_size; /* IN: size of the bit map */
604 uint32_t *file_ids; /* IN: Array of file ids */
605 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
606 short *access; /* OUT: access info for each file (0 for 'has access') */
607 uint32_t num_parents; /* future use */
608 cnid_t *parents; /* future use */
609};
610
611struct ext_user_access_t {
612 uint32_t flags; /* IN: access requested (i.e. R_OK) */
613 uint32_t num_files; /* IN: number of files to process */
614 uint32_t map_size; /* IN: size of the bit map */
615 user_addr_t file_ids; /* IN: array of file ids */
616 user_addr_t bitmap; /* IN: array of groups */
617 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
618 uint32_t num_parents;/* future use */
619 user_addr_t parents;/* future use */
620};
621
622
91447636
A
623/*
624 * Perform a binary search for the given parent_id. Return value is
2d21ac55
A
625 * the index if there is a match. If no_match_indexp is non-NULL it
626 * will be assigned with the index to insert the item (even if it was
627 * not found).
91447636 628 */
2d21ac55 629static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
91447636 630{
2d21ac55
A
631 int index=-1;
632 unsigned int lo=0;
91447636 633
2d21ac55
A
634 do {
635 unsigned int mid = ((hi - lo)/2) + lo;
636 unsigned int this_id = array[mid];
637
638 if (parent_id == this_id) {
639 hi = mid;
640 break;
91447636 641 }
2d21ac55
A
642
643 if (parent_id < this_id) {
644 hi = mid;
645 continue;
91447636 646 }
2d21ac55
A
647
648 if (parent_id > this_id) {
649 lo = mid + 1;
650 continue;
651 }
652 } while(lo < hi);
653
654 /* check if lo and hi converged on the match */
655 if (parent_id == array[hi]) {
656 index = hi;
657 }
91447636 658
2d21ac55
A
659 if (no_match_indexp) {
660 *no_match_indexp = hi;
661 }
662
663 return index;
664}
665
666
667static int
668lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
669{
670 unsigned int hi;
671 int matches = 0;
672 int index, no_match_index;
91447636 673
2d21ac55
A
674 if (cache->numcached == 0) {
675 *indexp = 0;
676 return 0; // table is empty, so insert at index=0 and report no match
677 }
91447636 678
2d21ac55
A
679 if (cache->numcached > NUM_CACHE_ENTRIES) {
680 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
681 cache->numcached, NUM_CACHE_ENTRIES);*/
682 cache->numcached = NUM_CACHE_ENTRIES;
683 }
91447636 684
2d21ac55 685 hi = cache->numcached - 1;
91447636 686
2d21ac55
A
687 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
688
689 /* if no existing entry found, find index for new one */
690 if (index == -1) {
691 index = no_match_index;
692 matches = 0;
693 } else {
694 matches = 1;
695 }
696
697 *indexp = index;
698 return matches;
91447636
A
699}
700
701/*
702 * Add a node to the access_cache at the given index (or do a lookup first
703 * to find the index if -1 is passed in). We currently do a replace rather
704 * than an insert if the cache is full.
705 */
706static void
707add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
708{
2d21ac55
A
709 int lookup_index = -1;
710
711 /* need to do a lookup first if -1 passed for index */
712 if (index == -1) {
713 if (lookup_bucket(cache, &lookup_index, nodeID)) {
714 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
715 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
716 cache->haveaccess[lookup_index] = access;
717 }
718
719 /* mission accomplished */
720 return;
721 } else {
722 index = lookup_index;
723 }
724
725 }
726
727 /* if the cache is full, do a replace rather than an insert */
728 if (cache->numcached >= NUM_CACHE_ENTRIES) {
729 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
730 cache->numcached = NUM_CACHE_ENTRIES-1;
731
732 if (index > cache->numcached) {
733 // printf("index %d pinned to %d\n", index, cache->numcached);
734 index = cache->numcached;
735 }
736 }
737
738 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
739 index++;
740 }
741
742 if (index >= 0 && index < cache->numcached) {
743 /* only do bcopy if we're inserting */
744 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
745 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
746 }
747
748 cache->acache[index] = nodeID;
749 cache->haveaccess[index] = access;
750 cache->numcached++;
91447636
A
751}
752
753
754struct cinfo {
2d21ac55
A
755 uid_t uid;
756 gid_t gid;
757 mode_t mode;
758 cnid_t parentcnid;
759 u_int16_t recflags;
91447636
A
760};
761
762static int
763snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
764{
2d21ac55 765 struct cinfo *cip = (struct cinfo *)arg;
91447636 766
2d21ac55
A
767 cip->uid = attrp->ca_uid;
768 cip->gid = attrp->ca_gid;
769 cip->mode = attrp->ca_mode;
770 cip->parentcnid = descp->cd_parentcnid;
771 cip->recflags = attrp->ca_recflags;
91447636 772
2d21ac55 773 return (0);
91447636
A
774}
775
776/*
777 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
778 * isn't incore, then go to the catalog.
779 */
780static int
781do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
2d21ac55 782 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
91447636 783{
2d21ac55
A
784 int error = 0;
785
786 /* if this id matches the one the fsctl was called with, skip the lookup */
787 if (cnid == skip_cp->c_cnid) {
788 cnattrp->ca_uid = skip_cp->c_uid;
789 cnattrp->ca_gid = skip_cp->c_gid;
790 cnattrp->ca_mode = skip_cp->c_mode;
791 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
792 } else {
793 struct cinfo c_info;
794
795 /* otherwise, check the cnode hash incase the file/dir is incore */
796 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
797 cnattrp->ca_uid = c_info.uid;
798 cnattrp->ca_gid = c_info.gid;
799 cnattrp->ca_mode = c_info.mode;
800 cnattrp->ca_recflags = c_info.recflags;
801 keyp->hfsPlus.parentID = c_info.parentcnid;
91447636 802 } else {
2d21ac55 803 int lockflags;
91447636 804
2d21ac55 805 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
91447636 806
2d21ac55
A
807 /* lookup this cnid in the catalog */
808 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
91447636 809
2d21ac55 810 hfs_systemfile_unlock(hfsmp, lockflags);
91447636 811
2d21ac55 812 cache->lookups++;
91447636 813 }
2d21ac55 814 }
91447636 815
2d21ac55 816 return (error);
91447636 817}
55e303ae 818
2d21ac55 819
1c79356b 820/*
91447636
A
821 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
822 * up to CACHE_LEVELS as we progress towards the root.
823 */
824static int
825do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
2d21ac55
A
826 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev,
827 struct vfs_context *my_context,
828 char *bitmap,
829 uint32_t map_size,
830 cnid_t* parents,
831 uint32_t num_parents)
91447636 832{
2d21ac55
A
833 int myErr = 0;
834 int myResult;
835 HFSCatalogNodeID thisNodeID;
836 unsigned int myPerms;
837 struct cat_attr cnattr;
838 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
839 CatalogKey catkey;
840
841 int i = 0, ids_to_cache = 0;
842 int parent_ids[CACHE_LEVELS];
843
844 thisNodeID = nodeID;
845 while (thisNodeID >= kRootDirID) {
846 myResult = 0; /* default to "no access" */
91447636 847
2d21ac55
A
848 /* check the cache before resorting to hitting the catalog */
849
850 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
851 * to look any further after hitting cached dir */
852
853 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
854 cache->cachehits++;
855 myErr = cache->haveaccess[cache_index];
856 if (scope_index != -1) {
857 if (myErr == ESRCH) {
858 myErr = 0;
859 }
860 } else {
861 scope_index = 0; // so we'll just use the cache result
862 scope_idx_start = ids_to_cache;
863 }
864 myResult = (myErr == 0) ? 1 : 0;
865 goto ExitThisRoutine;
866 }
867
868
869 if (parents) {
870 int tmp;
871 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
872 if (scope_index == -1)
873 scope_index = tmp;
874 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
875 scope_idx_start = ids_to_cache;
876 }
877 }
878
879 /* remember which parents we want to cache */
880 if (ids_to_cache < CACHE_LEVELS) {
881 parent_ids[ids_to_cache] = thisNodeID;
882 ids_to_cache++;
883 }
884 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
885 if (bitmap && map_size) {
886 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
887 }
888
889
890 /* do the lookup (checks the cnode hash, then the catalog) */
891 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr);
892 if (myErr) {
893 goto ExitThisRoutine; /* no access */
894 }
895
896 /* Root always gets access. */
897 if (suser(myp_ucred, NULL) == 0) {
898 thisNodeID = catkey.hfsPlus.parentID;
899 myResult = 1;
900 continue;
901 }
902
903 // if the thing has acl's, do the full permission check
904 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
905 struct vnode *vp;
906
907 /* get the vnode for this cnid */
908 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
909 if ( myErr ) {
910 myResult = 0;
911 goto ExitThisRoutine;
912 }
913
914 thisNodeID = VTOC(vp)->c_parentcnid;
915
916 hfs_unlock(VTOC(vp));
917
918 if (vnode_vtype(vp) == VDIR) {
919 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
920 } else {
921 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
922 }
923
924 vnode_put(vp);
925 if (myErr) {
926 myResult = 0;
927 goto ExitThisRoutine;
928 }
929 } else {
930 unsigned int flags;
931
932 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
933 cnattr.ca_mode, hfsmp->hfs_mp,
934 myp_ucred, theProcPtr);
935
936 if (cnattr.ca_mode & S_IFDIR) {
937 flags = R_OK | X_OK;
938 } else {
939 flags = R_OK;
940 }
941 if ( (myPerms & flags) != flags) {
942 myResult = 0;
943 myErr = EACCES;
944 goto ExitThisRoutine; /* no access */
945 }
946
947 /* up the hierarchy we go */
948 thisNodeID = catkey.hfsPlus.parentID;
949 }
950 }
951
952 /* if here, we have access to this node */
953 myResult = 1;
954
955 ExitThisRoutine:
956 if (parents && myErr == 0 && scope_index == -1) {
957 myErr = ESRCH;
958 }
959
960 if (myErr) {
961 myResult = 0;
962 }
963 *err = myErr;
964
965 /* cache the parent directory(ies) */
966 for (i = 0; i < ids_to_cache; i++) {
967 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
968 add_node(cache, -1, parent_ids[i], ESRCH);
969 } else {
970 add_node(cache, -1, parent_ids[i], myErr);
971 }
972 }
973
974 return (myResult);
91447636 975}
1c79356b 976
2d21ac55
A
977static int
978do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
979 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
980{
981 boolean_t is64bit;
982
983 /*
984 * NOTE: on entry, the vnode is locked. Incase this vnode
985 * happens to be in our list of file_ids, we'll note it
986 * avoid calling hfs_chashget_nowait() on that id as that
987 * will cause a "locking against myself" panic.
988 */
989 Boolean check_leaf = true;
990
991 struct ext_user_access_t *user_access_structp;
992 struct ext_user_access_t tmp_user_access;
993 struct access_cache cache;
994
995 int error = 0;
996 unsigned int i;
997
998 dev_t dev = VTOC(vp)->c_dev;
999
1000 short flags;
1001 unsigned int num_files = 0;
1002 int map_size = 0;
1003 int num_parents = 0;
1004 int *file_ids=NULL;
1005 short *access=NULL;
1006 char *bitmap=NULL;
1007 cnid_t *parents=NULL;
1008 int leaf_index;
1009
1010 cnid_t cnid;
1011 cnid_t prevParent_cnid = 0;
1012 unsigned int myPerms;
1013 short myaccess = 0;
1014 struct cat_attr cnattr;
1015 CatalogKey catkey;
1016 struct cnode *skip_cp = VTOC(vp);
1017 kauth_cred_t cred = vfs_context_ucred(context);
1018 proc_t p = vfs_context_proc(context);
1019
1020 is64bit = proc_is64bit(p);
1021
1022 /* initialize the local cache and buffers */
1023 cache.numcached = 0;
1024 cache.cachehits = 0;
1025 cache.lookups = 0;
1026 cache.acache = NULL;
1027 cache.haveaccess = NULL;
1028
1029 /* struct copyin done during dispatch... need to copy file_id array separately */
1030 if (ap->a_data == NULL) {
1031 error = EINVAL;
1032 goto err_exit_bulk_access;
1033 }
1034
1035 if (is64bit) {
1036 if (arg_size != sizeof(struct ext_user_access_t)) {
1037 error = EINVAL;
1038 goto err_exit_bulk_access;
1039 }
1040
1041 user_access_structp = (struct ext_user_access_t *)ap->a_data;
1042
1043 } else if (arg_size == sizeof(struct access_t)) {
1044 struct access_t *accessp = (struct access_t *)ap->a_data;
1045
1046 // convert an old style bulk-access struct to the new style
1047 tmp_user_access.flags = accessp->flags;
1048 tmp_user_access.num_files = accessp->num_files;
1049 tmp_user_access.map_size = 0;
1050 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
cf7d32b8 1051 tmp_user_access.bitmap = USER_ADDR_NULL;
2d21ac55
A
1052 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1053 tmp_user_access.num_parents = 0;
1054 user_access_structp = &tmp_user_access;
1055
1056 } else if (arg_size == sizeof(struct ext_access_t)) {
1057 struct ext_access_t *accessp = (struct ext_access_t *)ap->a_data;
1058
1059 // up-cast from a 32-bit version of the struct
1060 tmp_user_access.flags = accessp->flags;
1061 tmp_user_access.num_files = accessp->num_files;
1062 tmp_user_access.map_size = accessp->map_size;
1063 tmp_user_access.num_parents = accessp->num_parents;
1064
1065 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1066 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1067 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1068 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1069
1070 user_access_structp = &tmp_user_access;
1071 } else {
1072 error = EINVAL;
1073 goto err_exit_bulk_access;
1074 }
1075
1076 map_size = user_access_structp->map_size;
1077
1078 num_files = user_access_structp->num_files;
1079
1080 num_parents= user_access_structp->num_parents;
1081
1082 if (num_files < 1) {
1083 goto err_exit_bulk_access;
1084 }
1085 if (num_files > 1024) {
1086 error = EINVAL;
1087 goto err_exit_bulk_access;
1088 }
1089
1090 if (num_parents > 1024) {
1091 error = EINVAL;
1092 goto err_exit_bulk_access;
1093 }
1094
1095 file_ids = (int *) kalloc(sizeof(int) * num_files);
1096 access = (short *) kalloc(sizeof(short) * num_files);
1097 if (map_size) {
1098 bitmap = (char *) kalloc(sizeof(char) * map_size);
1099 }
1100
1101 if (num_parents) {
1102 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1103 }
1104
1105 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1106 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1107
1108 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1109 if (file_ids) {
1110 kfree(file_ids, sizeof(int) * num_files);
1111 }
1112 if (bitmap) {
1113 kfree(bitmap, sizeof(char) * map_size);
1114 }
1115 if (access) {
1116 kfree(access, sizeof(short) * num_files);
1117 }
1118 if (cache.acache) {
1119 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1120 }
1121 if (cache.haveaccess) {
1122 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1123 }
1124 if (parents) {
1125 kfree(parents, sizeof(cnid_t) * num_parents);
1126 }
1127 return ENOMEM;
1128 }
1129
1130 // make sure the bitmap is zero'ed out...
1131 if (bitmap) {
1132 bzero(bitmap, (sizeof(char) * map_size));
1133 }
1134
1135 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1136 num_files * sizeof(int)))) {
1137 goto err_exit_bulk_access;
1138 }
1139
1140 if (num_parents) {
1141 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1142 num_parents * sizeof(cnid_t)))) {
1143 goto err_exit_bulk_access;
1144 }
1145 }
1146
1147 flags = user_access_structp->flags;
1148 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1149 flags = R_OK;
1150 }
1151
1152 /* check if we've been passed leaf node ids or parent ids */
1153 if (flags & PARENT_IDS_FLAG) {
1154 check_leaf = false;
1155 }
1156
1157 /* Check access to each file_id passed in */
1158 for (i = 0; i < num_files; i++) {
1159 leaf_index=-1;
1160 cnid = (cnid_t) file_ids[i];
1161
1162 /* root always has access */
1163 if ((!parents) && (!suser(cred, NULL))) {
1164 access[i] = 0;
1165 continue;
1166 }
1167
1168 if (check_leaf) {
1169 /* do the lookup (checks the cnode hash, then the catalog) */
1170 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr);
1171 if (error) {
1172 access[i] = (short) error;
1173 continue;
1174 }
1175
1176 if (parents) {
1177 // Check if the leaf matches one of the parent scopes
1178 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1179 }
1180
1181 // if the thing has acl's, do the full permission check
1182 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1183 struct vnode *cvp;
1184 int myErr = 0;
1185 /* get the vnode for this cnid */
1186 myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
1187 if ( myErr ) {
1188 access[i] = myErr;
1189 continue;
1190 }
1191
1192 hfs_unlock(VTOC(cvp));
1193
1194 if (vnode_vtype(cvp) == VDIR) {
1195 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1196 } else {
1197 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1198 }
1199
1200 vnode_put(cvp);
1201 if (myErr) {
1202 access[i] = myErr;
1203 continue;
1204 }
1205 } else {
1206 /* before calling CheckAccess(), check the target file for read access */
1207 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1208 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1209
1210 /* fail fast if no access */
1211 if ((myPerms & flags) == 0) {
1212 access[i] = EACCES;
1213 continue;
1214 }
1215 }
1216 } else {
1217 /* we were passed an array of parent ids */
1218 catkey.hfsPlus.parentID = cnid;
1219 }
1220
1221 /* if the last guy had the same parent and had access, we're done */
1222 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1223 cache.cachehits++;
1224 access[i] = 0;
1225 continue;
1226 }
1227
1228 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1229 skip_cp, p, cred, dev, context,bitmap, map_size, parents, num_parents);
1230
1231 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1232 access[i] = 0; // have access.. no errors to report
1233 } else {
1234 access[i] = (error != 0 ? (short) error : EACCES);
1235 }
1236
1237 prevParent_cnid = catkey.hfsPlus.parentID;
1238 }
1239
1240 /* copyout the access array */
1241 if ((error = copyout((caddr_t)access, user_access_structp->access,
1242 num_files * sizeof (short)))) {
1243 goto err_exit_bulk_access;
1244 }
1245 if (map_size && bitmap) {
1246 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1247 map_size * sizeof (char)))) {
1248 goto err_exit_bulk_access;
1249 }
1250 }
1251
1252
1253 err_exit_bulk_access:
1254
1255 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1256
1257 if (file_ids)
1258 kfree(file_ids, sizeof(int) * num_files);
1259 if (parents)
1260 kfree(parents, sizeof(cnid_t) * num_parents);
1261 if (bitmap)
1262 kfree(bitmap, sizeof(char) * map_size);
1263 if (access)
1264 kfree(access, sizeof(short) * num_files);
1265 if (cache.acache)
1266 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1267 if (cache.haveaccess)
1268 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1269
1270 return (error);
1271}
1272
1273
1274/* end "bulk-access" support */
1c79356b 1275
1c79356b 1276
91447636
A
1277/*
1278 * Callback for use with freeze ioctl.
1279 */
1280static int
2d21ac55 1281hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
91447636
A
1282{
1283 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1284
1285 return 0;
1286}
1c79356b 1287
91447636
A
1288/*
1289 * Control filesystem operating characteristics.
1290 */
1c79356b 1291int
91447636
A
1292hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1293 vnode_t a_vp;
9bccf70c
A
1294 int a_command;
1295 caddr_t a_data;
1296 int a_fflag;
91447636
A
1297 vfs_context_t a_context;
1298 } */ *ap)
1c79356b 1299{
91447636
A
1300 struct vnode * vp = ap->a_vp;
1301 struct hfsmount *hfsmp = VTOHFS(vp);
1302 vfs_context_t context = ap->a_context;
1303 kauth_cred_t cred = vfs_context_ucred(context);
1304 proc_t p = vfs_context_proc(context);
1305 struct vfsstatfs *vfsp;
1306 boolean_t is64bit;
1307
1308 is64bit = proc_is64bit(p);
1309
9bccf70c 1310 switch (ap->a_command) {
55e303ae 1311
2d21ac55
A
1312 case HFS_GETPATH:
1313 {
1314 struct vnode *file_vp;
1315 cnid_t cnid;
1316 int outlen;
1317 char *bufptr;
1318 int error;
1319
1320 /* Caller must be owner of file system. */
1321 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1322 if (suser(cred, NULL) &&
1323 kauth_cred_getuid(cred) != vfsp->f_owner) {
1324 return (EACCES);
1325 }
1326 /* Target vnode must be file system's root. */
1327 if (!vnode_isvroot(vp)) {
1328 return (EINVAL);
1329 }
1330 bufptr = (char *)ap->a_data;
1331 cnid = strtoul(bufptr, NULL, 10);
1332
935ed37a
A
1333 /* We need to call hfs_vfs_vget to leverage the code that will fix the
1334 * origin list for us if needed, as opposed to calling hfs_vget, since
1335 * we will need it for the subsequent build_path call.
1336 */
1337 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
2d21ac55
A
1338 return (error);
1339 }
1340 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1341 vnode_put(file_vp);
1342
1343 return (error);
1344 }
1345
1346 case HFS_PREV_LINK:
1347 case HFS_NEXT_LINK:
1348 {
1349 cnid_t linkfileid;
1350 cnid_t nextlinkid;
1351 cnid_t prevlinkid;
1352 int error;
1353
1354 /* Caller must be owner of file system. */
1355 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1356 if (suser(cred, NULL) &&
1357 kauth_cred_getuid(cred) != vfsp->f_owner) {
1358 return (EACCES);
1359 }
1360 /* Target vnode must be file system's root. */
1361 if (!vnode_isvroot(vp)) {
1362 return (EINVAL);
1363 }
1364 linkfileid = *(cnid_t *)ap->a_data;
1365 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1366 return (EINVAL);
1367 }
1368 if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1369 return (error);
1370 }
1371 if (ap->a_command == HFS_NEXT_LINK) {
1372 *(cnid_t *)ap->a_data = nextlinkid;
1373 } else {
1374 *(cnid_t *)ap->a_data = prevlinkid;
1375 }
1376 return (0);
1377 }
1378
0c530ab8
A
1379 case HFS_RESIZE_PROGRESS: {
1380
1381 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1382 if (suser(cred, NULL) &&
1383 kauth_cred_getuid(cred) != vfsp->f_owner) {
1384 return (EACCES); /* must be owner of file system */
1385 }
1386 if (!vnode_isvroot(vp)) {
1387 return (EINVAL);
1388 }
1389 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1390 }
2d21ac55 1391
91447636
A
1392 case HFS_RESIZE_VOLUME: {
1393 u_int64_t newsize;
1394 u_int64_t cursize;
1395
1396 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1397 if (suser(cred, NULL) &&
1398 kauth_cred_getuid(cred) != vfsp->f_owner) {
1399 return (EACCES); /* must be owner of file system */
1400 }
1401 if (!vnode_isvroot(vp)) {
1402 return (EINVAL);
1403 }
1404 newsize = *(u_int64_t *)ap->a_data;
1405 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1406
1407 if (newsize > cursize) {
1408 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1409 } else if (newsize < cursize) {
1410 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1411 } else {
1412 return (0);
1413 }
1414 }
1415 case HFS_CHANGE_NEXT_ALLOCATION: {
2d21ac55 1416 int error = 0; /* Assume success */
91447636
A
1417 u_int32_t location;
1418
1419 if (vnode_vfsisrdonly(vp)) {
1420 return (EROFS);
1421 }
1422 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1423 if (suser(cred, NULL) &&
1424 kauth_cred_getuid(cred) != vfsp->f_owner) {
1425 return (EACCES); /* must be owner of file system */
1426 }
1427 if (!vnode_isvroot(vp)) {
1428 return (EINVAL);
1429 }
2d21ac55 1430 HFS_MOUNT_LOCK(hfsmp, TRUE);
91447636 1431 location = *(u_int32_t *)ap->a_data;
2d21ac55
A
1432 if ((location >= hfsmp->allocLimit) &&
1433 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1434 error = EINVAL;
1435 goto fail_change_next_allocation;
91447636
A
1436 }
1437 /* Return previous value. */
1438 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
2d21ac55
A
1439 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1440 /* On magic value for location, set nextAllocation to next block
1441 * after metadata zone and set flag in mount structure to indicate
1442 * that nextAllocation should not be updated again.
1443 */
1444 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1445 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1446 } else {
1447 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1448 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1449 }
1450 MarkVCBDirty(hfsmp);
1451fail_change_next_allocation:
91447636 1452 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2d21ac55 1453 return (error);
91447636
A
1454 }
1455
55e303ae
A
1456#ifdef HFS_SPARSE_DEV
1457 case HFS_SETBACKINGSTOREINFO: {
55e303ae
A
1458 struct vnode * bsfs_rootvp;
1459 struct vnode * di_vp;
55e303ae
A
1460 struct hfs_backingstoreinfo *bsdata;
1461 int error = 0;
1462
55e303ae
A
1463 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1464 return (EALREADY);
1465 }
91447636
A
1466 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1467 if (suser(cred, NULL) &&
1468 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1469 return (EACCES); /* must be owner of file system */
1470 }
1471 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1472 if (bsdata == NULL) {
1473 return (EINVAL);
1474 }
91447636 1475 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
55e303ae
A
1476 return (error);
1477 }
91447636
A
1478 if ((error = vnode_getwithref(di_vp))) {
1479 file_drop(bsdata->backingfd);
1480 return(error);
55e303ae 1481 }
91447636
A
1482
1483 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1484 (void)vnode_put(di_vp);
1485 file_drop(bsdata->backingfd);
55e303ae
A
1486 return (EINVAL);
1487 }
1488
1489 /*
1490 * Obtain the backing fs root vnode and keep a reference
1491 * on it. This reference will be dropped in hfs_unmount.
1492 */
91447636 1493 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
55e303ae 1494 if (error) {
91447636
A
1495 (void)vnode_put(di_vp);
1496 file_drop(bsdata->backingfd);
55e303ae
A
1497 return (error);
1498 }
91447636
A
1499 vnode_ref(bsfs_rootvp);
1500 vnode_put(bsfs_rootvp);
55e303ae
A
1501
1502 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1503 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1504 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1505 hfsmp->hfs_sparsebandblks *= 4;
1506
2d21ac55
A
1507 vfs_markdependency(hfsmp->hfs_mp);
1508
91447636
A
1509 (void)vnode_put(di_vp);
1510 file_drop(bsdata->backingfd);
55e303ae
A
1511 return (0);
1512 }
1513 case HFS_CLRBACKINGSTOREINFO: {
55e303ae
A
1514 struct vnode * tmpvp;
1515
91447636
A
1516 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1517 if (suser(cred, NULL) &&
1518 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1519 return (EACCES); /* must be owner of file system */
1520 }
1521 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1522 hfsmp->hfs_backingfs_rootvp) {
1523
1524 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1525 tmpvp = hfsmp->hfs_backingfs_rootvp;
1526 hfsmp->hfs_backingfs_rootvp = NULLVP;
1527 hfsmp->hfs_sparsebandblks = 0;
91447636 1528 vnode_rele(tmpvp);
55e303ae
A
1529 }
1530 return (0);
1531 }
1532#endif /* HFS_SPARSE_DEV */
1533
91447636
A
1534 case F_FREEZE_FS: {
1535 struct mount *mp;
91447636
A
1536
1537 if (!is_suser())
1538 return (EACCES);
1539
1540 mp = vnode_mount(vp);
1541 hfsmp = VFSTOHFS(mp);
1542
1543 if (!(hfsmp->jnl))
1544 return (ENOTSUP);
3a60a9f5
A
1545
1546 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
91447636 1547
91447636
A
1548 // flush things before we get started to try and prevent
1549 // dirty data from being paged out while we're frozen.
1550 // note: can't do this after taking the lock as it will
1551 // deadlock against ourselves.
1552 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1553 hfs_global_exclusive_lock_acquire(hfsmp);
1554 journal_flush(hfsmp->jnl);
3a60a9f5 1555
91447636
A
1556 // don't need to iterate on all vnodes, we just need to
1557 // wait for writes to the system files and the device vnode
91447636
A
1558 if (HFSTOVCB(hfsmp)->extentsRefNum)
1559 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1560 if (HFSTOVCB(hfsmp)->catalogRefNum)
1561 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1562 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1563 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1564 if (hfsmp->hfs_attribute_vp)
1565 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1566 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1567
1568 hfsmp->hfs_freezing_proc = current_proc();
1569
1570 return (0);
1571 }
1572
1573 case F_THAW_FS: {
1574 if (!is_suser())
1575 return (EACCES);
1576
1577 // if we're not the one who froze the fs then we
1578 // can't thaw it.
1579 if (hfsmp->hfs_freezing_proc != current_proc()) {
3a60a9f5 1580 return EPERM;
91447636
A
1581 }
1582
1583 // NOTE: if you add code here, also go check the
1584 // code that "thaws" the fs in hfs_vnop_close()
1585 //
1586 hfsmp->hfs_freezing_proc = NULL;
1587 hfs_global_exclusive_lock_release(hfsmp);
3a60a9f5 1588 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
91447636
A
1589
1590 return (0);
1591 }
1592
2d21ac55
A
1593 case HFS_BULKACCESS_FSCTL: {
1594 int size;
1595
1596 if (hfsmp->hfs_flags & HFS_STANDARD) {
1597 return EINVAL;
1598 }
91447636 1599
2d21ac55
A
1600 if (is64bit) {
1601 size = sizeof(struct user_access_t);
1602 } else {
1603 size = sizeof(struct access_t);
1604 }
1605
1606 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1607 }
91447636 1608
2d21ac55
A
1609 case HFS_EXT_BULKACCESS_FSCTL: {
1610 int size;
1611
1612 if (hfsmp->hfs_flags & HFS_STANDARD) {
1613 return EINVAL;
1614 }
91447636 1615
2d21ac55
A
1616 if (is64bit) {
1617 size = sizeof(struct ext_user_access_t);
1618 } else {
1619 size = sizeof(struct ext_access_t);
1620 }
1621
1622 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1623 }
91447636
A
1624
1625 case HFS_SETACLSTATE: {
1626 int state;
1627
91447636
A
1628 if (ap->a_data == NULL) {
1629 return (EINVAL);
1630 }
3a60a9f5
A
1631
1632 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
91447636 1633 state = *(int *)ap->a_data;
3a60a9f5
A
1634
1635 // super-user can enable or disable acl's on a volume.
1636 // the volume owner can only enable acl's
1637 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1638 return (EPERM);
1639 }
91447636 1640 if (state == 0 || state == 1)
2d21ac55
A
1641 return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
1642 else
1643 return (EINVAL);
1644 }
1645
1646 case HFS_SET_XATTREXTENTS_STATE: {
1647 int state;
1648
1649 if (ap->a_data == NULL) {
1650 return (EINVAL);
1651 }
1652
1653 state = *(int *)ap->a_data;
1654
1655 /* Super-user can enable or disable extent-based extended
1656 * attribute support on a volume
1657 */
1658 if (!is_suser()) {
1659 return (EPERM);
1660 }
1661 if (state == 0 || state == 1)
1662 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
91447636
A
1663 else
1664 return (EINVAL);
1665 }
1666
1667 case F_FULLFSYNC: {
55e303ae
A
1668 int error;
1669
91447636
A
1670 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1671 if (error == 0) {
2d21ac55 1672 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
91447636
A
1673 hfs_unlock(VTOC(vp));
1674 }
55e303ae
A
1675
1676 return error;
1677 }
91447636
A
1678
1679 case F_CHKCLEAN: {
9bccf70c 1680 register struct cnode *cp;
55e303ae
A
1681 int error;
1682
91447636 1683 if (!vnode_isreg(vp))
55e303ae
A
1684 return EINVAL;
1685
91447636
A
1686 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1687 if (error == 0) {
1688 cp = VTOC(vp);
1689 /*
1690 * used by regression test to determine if
1691 * all the dirty pages (via write) have been cleaned
1692 * after a call to 'fsysnc'.
1693 */
1694 error = is_file_clean(vp, VTOF(vp)->ff_size);
1695 hfs_unlock(cp);
1696 }
55e303ae
A
1697 return (error);
1698 }
1699
91447636 1700 case F_RDADVISE: {
9bccf70c
A
1701 register struct radvisory *ra;
1702 struct filefork *fp;
9bccf70c
A
1703 int error;
1704
91447636 1705 if (!vnode_isreg(vp))
9bccf70c
A
1706 return EINVAL;
1707
9bccf70c 1708 ra = (struct radvisory *)(ap->a_data);
9bccf70c
A
1709 fp = VTOF(vp);
1710
91447636
A
1711 /* Protect against a size change. */
1712 hfs_lock_truncate(VTOC(vp), TRUE);
1713
9bccf70c 1714 if (ra->ra_offset >= fp->ff_size) {
91447636
A
1715 error = EFBIG;
1716 } else {
1717 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
9bccf70c 1718 }
1c79356b 1719
2d21ac55 1720 hfs_unlock_truncate(VTOC(vp), TRUE);
9bccf70c 1721 return (error);
1c79356b 1722 }
1c79356b 1723
91447636
A
1724 case F_READBOOTSTRAP:
1725 case F_WRITEBOOTSTRAP:
1726 {
9bccf70c 1727 struct vnode *devvp = NULL;
91447636 1728 user_fbootstraptransfer_t *user_bootstrapp;
0b4e3aa0 1729 int devBlockSize;
1c79356b 1730 int error;
91447636
A
1731 uio_t auio;
1732 daddr64_t blockNumber;
1c79356b
A
1733 u_long blockOffset;
1734 u_long xfersize;
1735 struct buf *bp;
91447636 1736 user_fbootstraptransfer_t user_bootstrap;
1c79356b 1737
91447636
A
1738 if (!vnode_isvroot(vp))
1739 return (EINVAL);
1740 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1741 * to a user_fbootstraptransfer_t else we get a pointer to a
1742 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1743 */
1744 if (is64bit) {
1745 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1746 }
1747 else {
1748 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1749 user_bootstrapp = &user_bootstrap;
1750 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1751 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1752 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1753 }
1754 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1755 return EINVAL;
1c79356b 1756
9bccf70c 1757 devvp = VTOHFS(vp)->hfs_devvp;
91447636
A
1758 auio = uio_create(1, user_bootstrapp->fbt_offset,
1759 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1760 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1761 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1762
1763 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1764
1765 while (uio_resid(auio) > 0) {
1766 blockNumber = uio_offset(auio) / devBlockSize;
1767 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1768 if (error) {
1769 if (bp) buf_brelse(bp);
1770 uio_free(auio);
1771 return error;
1772 };
1773
1774 blockOffset = uio_offset(auio) % devBlockSize;
1775 xfersize = devBlockSize - blockOffset;
1776 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1777 if (error) {
1778 buf_brelse(bp);
1779 uio_free(auio);
1780 return error;
1781 };
1782 if (uio_rw(auio) == UIO_WRITE) {
1783 error = VNOP_BWRITE(bp);
1784 if (error) {
1785 uio_free(auio);
1786 return error;
1787 }
1788 } else {
1789 buf_brelse(bp);
1790 };
1791 };
1792 uio_free(auio);
1793 };
1794 return 0;
1795
1796 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1797 {
1798 if (is64bit) {
1799 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1800 }
1801 else {
1802 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1803 }
1804 return 0;
1805 }
1806
1807 case HFS_GET_MOUNT_TIME:
1808 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1809 break;
1810
1811 case HFS_GET_LAST_MTIME:
1812 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1813 break;
1814
1815 case HFS_SET_BOOT_INFO:
1816 if (!vnode_isvroot(vp))
1817 return(EINVAL);
1818 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1819 return(EACCES); /* must be superuser or owner of filesystem */
1820 HFS_MOUNT_LOCK(hfsmp, TRUE);
1821 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1822 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1823 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1824 break;
1825
1826 case HFS_GET_BOOT_INFO:
1827 if (!vnode_isvroot(vp))
1828 return(EINVAL);
1829 HFS_MOUNT_LOCK(hfsmp, TRUE);
1830 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1831 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1832 break;
1833
2d21ac55
A
1834 case HFS_MARK_BOOT_CORRUPT:
1835 /* Mark the boot volume corrupt by setting
1836 * kHFSVolumeInconsistentBit in the volume header. This will
1837 * force fsck_hfs on next mount.
1838 */
1839 if (!is_suser()) {
1840 return EACCES;
1841 }
1842
1843 /* Allowed only on the root vnode of the boot volume */
1844 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
1845 !vnode_isvroot(vp)) {
1846 return EINVAL;
1847 }
1848
1849 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1850 hfs_mark_volume_inconsistent(hfsmp);
1851 break;
1852
91447636
A
1853 default:
1854 return (ENOTTY);
1855 }
1c79356b 1856
0b4e3aa0
A
1857 /* Should never get here */
1858 return 0;
1c79356b
A
1859}
1860
91447636
A
1861/*
1862 * select
1863 */
1c79356b 1864int
91447636
A
1865hfs_vnop_select(__unused struct vnop_select_args *ap)
1866/*
1867 struct vnop_select_args {
1868 vnode_t a_vp;
9bccf70c
A
1869 int a_which;
1870 int a_fflags;
9bccf70c 1871 void *a_wql;
91447636
A
1872 vfs_context_t a_context;
1873 };
1874*/
1c79356b 1875{
9bccf70c
A
1876 /*
1877 * We should really check to see if I/O is possible.
1878 */
1879 return (1);
1c79356b
A
1880}
1881
1c79356b
A
1882/*
1883 * Converts a logical block number to a physical block, and optionally returns
1884 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1885 * The physical block number is based on the device block size, currently its 512.
1886 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1887 */
1c79356b 1888int
2d21ac55 1889hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1c79356b 1890{
9bccf70c
A
1891 struct filefork *fp = VTOF(vp);
1892 struct hfsmount *hfsmp = VTOHFS(vp);
91447636 1893 int retval = E_NONE;
2d21ac55 1894 u_int32_t logBlockSize;
91447636
A
1895 size_t bytesContAvail = 0;
1896 off_t blockposition;
1897 int lockExtBtree;
1898 int lockflags = 0;
1c79356b 1899
9bccf70c
A
1900 /*
1901 * Check for underlying vnode requests and ensure that logical
1902 * to physical mapping is requested.
1903 */
91447636 1904 if (vpp != NULL)
2d21ac55 1905 *vpp = hfsmp->hfs_devvp;
91447636 1906 if (bnp == NULL)
9bccf70c
A
1907 return (0);
1908
9bccf70c 1909 logBlockSize = GetLogicalBlockSize(vp);
2d21ac55 1910 blockposition = (off_t)bn * logBlockSize;
9bccf70c
A
1911
1912 lockExtBtree = overflow_extents(fp);
91447636
A
1913
1914 if (lockExtBtree)
2d21ac55 1915 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1c79356b 1916
9bccf70c 1917 retval = MacToVFSError(
0b4e3aa0 1918 MapFileBlockC (HFSTOVCB(hfsmp),
9bccf70c 1919 (FCB*)fp,
0b4e3aa0
A
1920 MAXPHYSIO,
1921 blockposition,
91447636 1922 bnp,
0b4e3aa0 1923 &bytesContAvail));
1c79356b 1924
91447636
A
1925 if (lockExtBtree)
1926 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 1927
91447636
A
1928 if (retval == E_NONE) {
1929 /* Figure out how many read ahead blocks there are */
1930 if (runp != NULL) {
1931 if (can_cluster(logBlockSize)) {
1932 /* Make sure this result never goes negative: */
1933 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1934 } else {
1935 *runp = 0;
1936 }
1937 }
1938 }
1939 return (retval);
1940}
1c79356b 1941
91447636
A
1942/*
1943 * Convert logical block number to file offset.
1944 */
1c79356b 1945int
91447636
A
1946hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1947/*
1948 struct vnop_blktooff_args {
1949 vnode_t a_vp;
1950 daddr64_t a_lblkno;
9bccf70c 1951 off_t *a_offset;
91447636
A
1952 };
1953*/
1c79356b
A
1954{
1955 if (ap->a_vp == NULL)
1956 return (EINVAL);
91447636 1957 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1c79356b
A
1958
1959 return(0);
1960}
1961
91447636
A
1962/*
1963 * Convert file offset to logical block number.
1964 */
1c79356b 1965int
91447636
A
1966hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1967/*
1968 struct vnop_offtoblk_args {
1969 vnode_t a_vp;
9bccf70c 1970 off_t a_offset;
91447636
A
1971 daddr64_t *a_lblkno;
1972 };
1973*/
1c79356b 1974{
1c79356b
A
1975 if (ap->a_vp == NULL)
1976 return (EINVAL);
91447636 1977 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1c79356b
A
1978
1979 return(0);
1980}
1981
91447636
A
1982/*
1983 * Map file offset to physical block number.
1984 *
2d21ac55
A
1985 * If this function is called for write operation, and if the file
1986 * had virtual blocks allocated (delayed allocation), real blocks
1987 * are allocated by calling ExtendFileC().
1988 *
1989 * If this function is called for read operation, and if the file
1990 * had virtual blocks allocated (delayed allocation), no change
1991 * to the size of file is done, and if required, rangelist is
1992 * searched for mapping.
1993 *
91447636
A
1994 * System file cnodes are expected to be locked (shared or exclusive).
1995 */
1c79356b 1996int
91447636
A
1997hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1998/*
1999 struct vnop_blockmap_args {
2000 vnode_t a_vp;
9bccf70c
A
2001 off_t a_foffset;
2002 size_t a_size;
91447636 2003 daddr64_t *a_bpn;
9bccf70c
A
2004 size_t *a_run;
2005 void *a_poff;
91447636
A
2006 int a_flags;
2007 vfs_context_t a_context;
2008 };
2009*/
1c79356b 2010{
91447636
A
2011 struct vnode *vp = ap->a_vp;
2012 struct cnode *cp;
2013 struct filefork *fp;
2014 struct hfsmount *hfsmp;
2015 size_t bytesContAvail = 0;
2016 int retval = E_NONE;
2017 int syslocks = 0;
2018 int lockflags = 0;
2019 struct rl_entry *invalid_range;
2020 enum rl_overlaptype overlaptype;
2021 int started_tr = 0;
2022 int tooklock = 0;
1c79356b 2023
3a60a9f5
A
2024 /* Do not allow blockmap operation on a directory */
2025 if (vnode_isdir(vp)) {
2026 return (ENOTSUP);
2027 }
2028
9bccf70c
A
2029 /*
2030 * Check for underlying vnode requests and ensure that logical
2031 * to physical mapping is requested.
2032 */
2033 if (ap->a_bpn == NULL)
2034 return (0);
2035
2d21ac55 2036 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
91447636
A
2037 if (VTOC(vp)->c_lockowner != current_thread()) {
2038 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2039 tooklock = 1;
91447636
A
2040 }
2041 }
2042 hfsmp = VTOHFS(vp);
2043 cp = VTOC(vp);
2044 fp = VTOF(vp);
55e303ae 2045
91447636 2046retry:
2d21ac55
A
2047 /* Check virtual blocks only when performing write operation */
2048 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
91447636
A
2049 if (hfs_start_transaction(hfsmp) != 0) {
2050 retval = EINVAL;
2051 goto exit;
2052 } else {
2053 started_tr = 1;
b4c24cb9 2054 }
91447636
A
2055 syslocks = SFL_EXTENTS | SFL_BITMAP;
2056
b4c24cb9 2057 } else if (overflow_extents(fp)) {
91447636 2058 syslocks = SFL_EXTENTS;
9bccf70c 2059 }
91447636
A
2060
2061 if (syslocks)
2062 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1c79356b 2063
9bccf70c
A
2064 /*
2065 * Check for any delayed allocations.
2066 */
2d21ac55
A
2067 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2068 int64_t actbytes;
91447636 2069 u_int32_t loanedBlocks;
1c79356b 2070
55e303ae 2071 //
d12e1678
A
2072 // Make sure we have a transaction. It's possible
2073 // that we came in and fp->ff_unallocblocks was zero
2074 // but during the time we blocked acquiring the extents
2075 // btree, ff_unallocblocks became non-zero and so we
2076 // will need to start a transaction.
2077 //
91447636
A
2078 if (started_tr == 0) {
2079 if (syslocks) {
2080 hfs_systemfile_unlock(hfsmp, lockflags);
2081 syslocks = 0;
2082 }
2083 goto retry;
d12e1678
A
2084 }
2085
9bccf70c 2086 /*
91447636
A
2087 * Note: ExtendFileC will Release any blocks on loan and
2088 * aquire real blocks. So we ask to extend by zero bytes
2089 * since ExtendFileC will account for the virtual blocks.
9bccf70c 2090 */
9bccf70c 2091
91447636
A
2092 loanedBlocks = fp->ff_unallocblocks;
2093 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2094 kEFAllMask | kEFNoClumpMask, &actbytes);
2095
2096 if (retval) {
2097 fp->ff_unallocblocks = loanedBlocks;
2098 cp->c_blocks += loanedBlocks;
2099 fp->ff_blocks += loanedBlocks;
2100
2101 HFS_MOUNT_LOCK(hfsmp, TRUE);
2102 hfsmp->loanedBlocks += loanedBlocks;
2103 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1c79356b 2104
91447636
A
2105 hfs_systemfile_unlock(hfsmp, lockflags);
2106 cp->c_flag |= C_MODIFIED;
b4c24cb9 2107 if (started_tr) {
91447636
A
2108 (void) hfs_update(vp, TRUE);
2109 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
55e303ae 2110
91447636 2111 hfs_end_transaction(hfsmp);
2d21ac55 2112 started_tr = 0;
b4c24cb9 2113 }
91447636 2114 goto exit;
b4c24cb9 2115 }
9bccf70c
A
2116 }
2117
91447636
A
2118 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2119 ap->a_bpn, &bytesContAvail);
2120 if (syslocks) {
2121 hfs_systemfile_unlock(hfsmp, lockflags);
2122 syslocks = 0;
2123 }
1c79356b 2124
b4c24cb9 2125 if (started_tr) {
91447636
A
2126 (void) hfs_update(vp, TRUE);
2127 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2128 hfs_end_transaction(hfsmp);
b4c24cb9 2129 started_tr = 0;
91447636
A
2130 }
2131 if (retval) {
2d21ac55
A
2132 /* On write, always return error because virtual blocks, if any,
2133 * should have been allocated in ExtendFileC(). We do not
2134 * allocate virtual blocks on read, therefore return error
2135 * only if no virtual blocks are allocated. Otherwise we search
2136 * rangelist for zero-fills
2137 */
2138 if ((MacToVFSError(retval) != ERANGE) ||
2139 (ap->a_flags & VNODE_WRITE) ||
2140 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2141 goto exit;
2142 }
2143
2144 /* Validate if the start offset is within logical file size */
2145 if (ap->a_foffset > fp->ff_size) {
2146 goto exit;
2147 }
2148
2149 /* Searching file extents has failed for read operation, therefore
2150 * search rangelist for any uncommitted holes in the file.
2151 */
2152 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2153 ap->a_foffset + (off_t)(ap->a_size - 1),
2154 &invalid_range);
2155 switch(overlaptype) {
2156 case RL_OVERLAPISCONTAINED:
2157 /* start_offset <= rl_start, end_offset >= rl_end */
2158 if (ap->a_foffset != invalid_range->rl_start) {
2159 break;
2160 }
2161 case RL_MATCHINGOVERLAP:
2162 /* start_offset = rl_start, end_offset = rl_end */
2163 case RL_OVERLAPCONTAINSRANGE:
2164 /* start_offset >= rl_start, end_offset <= rl_end */
2165 case RL_OVERLAPSTARTSBEFORE:
2166 /* start_offset > rl_start, end_offset >= rl_start */
2167 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2168 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2169 } else {
2170 bytesContAvail = fp->ff_size - ap->a_foffset;
2171 }
2172 if (bytesContAvail > ap->a_size) {
2173 bytesContAvail = ap->a_size;
2174 }
2175 *ap->a_bpn = (daddr64_t)-1;
2176 retval = 0;
2177 break;
2178 case RL_OVERLAPENDSAFTER:
2179 /* start_offset < rl_start, end_offset < rl_end */
2180 case RL_NOOVERLAP:
2181 break;
2182 }
91447636
A
2183 goto exit;
2184 }
1c79356b 2185
2d21ac55
A
2186 /* MapFileC() found a valid extent in the filefork. Search the
2187 * mapping information further for invalid file ranges
2188 */
91447636
A
2189 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2190 ap->a_foffset + (off_t)bytesContAvail - 1,
2191 &invalid_range);
2192 if (overlaptype != RL_NOOVERLAP) {
2193 switch(overlaptype) {
2194 case RL_MATCHINGOVERLAP:
2195 case RL_OVERLAPCONTAINSRANGE:
2196 case RL_OVERLAPSTARTSBEFORE:
2d21ac55 2197 /* There's no valid block for this byte offset */
91447636
A
2198 *ap->a_bpn = (daddr64_t)-1;
2199 /* There's no point limiting the amount to be returned
2200 * if the invalid range that was hit extends all the way
2201 * to the EOF (i.e. there's no valid bytes between the
2202 * end of this range and the file's EOF):
2203 */
2204 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2205 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2206 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2207 }
2208 break;
9bccf70c 2209
91447636
A
2210 case RL_OVERLAPISCONTAINED:
2211 case RL_OVERLAPENDSAFTER:
2212 /* The range of interest hits an invalid block before the end: */
2213 if (invalid_range->rl_start == ap->a_foffset) {
2214 /* There's actually no valid information to be had starting here: */
2215 *ap->a_bpn = (daddr64_t)-1;
2216 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2217 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2218 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2219 }
2220 } else {
2221 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2222 }
9bccf70c 2223 break;
1c79356b 2224
91447636 2225 case RL_NOOVERLAP:
9bccf70c 2226 break;
91447636
A
2227 } /* end switch */
2228 if (bytesContAvail > ap->a_size)
2229 bytesContAvail = ap->a_size;
2d21ac55
A
2230 }
2231
2232exit:
2233 if (retval == 0) {
2234 if (ap->a_run)
2235 *ap->a_run = bytesContAvail;
2236
2237 if (ap->a_poff)
2238 *(int *)ap->a_poff = 0;
9bccf70c 2239 }
91447636 2240
91447636
A
2241 if (tooklock)
2242 hfs_unlock(cp);
2243
2244 return (MacToVFSError(retval));
1c79356b
A
2245}
2246
9bccf70c 2247
1c79356b 2248/*
91447636
A
2249 * prepare and issue the I/O
2250 * buf_strategy knows how to deal
2251 * with requests that require
2252 * fragmented I/Os
2253 */
1c79356b 2254int
91447636 2255hfs_vnop_strategy(struct vnop_strategy_args *ap)
1c79356b 2256{
91447636
A
2257 buf_t bp = ap->a_bp;
2258 vnode_t vp = buf_vnode(bp);
1c79356b 2259
2d21ac55 2260 return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
1c79356b
A
2261}
2262
2263
91447636 2264static int
2d21ac55 2265do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context)
1c79356b 2266{
9bccf70c
A
2267 register struct cnode *cp = VTOC(vp);
2268 struct filefork *fp = VTOF(vp);
91447636
A
2269 struct proc *p = vfs_context_proc(context);;
2270 kauth_cred_t cred = vfs_context_ucred(context);
9bccf70c
A
2271 int retval;
2272 off_t bytesToAdd;
2273 off_t actualBytesAdded;
2274 off_t filebytes;
2275 u_long fileblocks;
2276 int blksize;
b4c24cb9 2277 struct hfsmount *hfsmp;
91447636 2278 int lockflags;
9bccf70c 2279
9bccf70c
A
2280 blksize = VTOVCB(vp)->blockSize;
2281 fileblocks = fp->ff_blocks;
2282 filebytes = (off_t)fileblocks * (off_t)blksize;
2283
2284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2285 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2286
2287 if (length < 0)
2288 return (EINVAL);
1c79356b 2289
8f6c56a5
A
2290 /* This should only happen with a corrupt filesystem */
2291 if ((off_t)fp->ff_size < 0)
2292 return (EINVAL);
2293
9bccf70c
A
2294 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2295 return (EFBIG);
1c79356b 2296
b4c24cb9 2297 hfsmp = VTOHFS(vp);
1c79356b 2298
9bccf70c 2299 retval = E_NONE;
1c79356b 2300
55e303ae
A
2301 /* Files that are changing size are not hot file candidates. */
2302 if (hfsmp->hfc_stage == HFC_RECORDING) {
2303 fp->ff_bytesread = 0;
2304 }
2305
9bccf70c
A
2306 /*
2307 * We cannot just check if fp->ff_size == length (as an optimization)
2308 * since there may be extra physical blocks that also need truncation.
2309 */
2310#if QUOTA
91447636 2311 if ((retval = hfs_getinoquota(cp)))
9bccf70c
A
2312 return(retval);
2313#endif /* QUOTA */
1c79356b 2314
9bccf70c
A
2315 /*
2316 * Lengthen the size of the file. We must ensure that the
2317 * last byte of the file is allocated. Since the smallest
2318 * value of ff_size is 0, length will be at least 1.
2319 */
91447636 2320 if (length > (off_t)fp->ff_size) {
9bccf70c 2321#if QUOTA
b4c24cb9 2322 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
91447636 2323 cred, 0);
9bccf70c
A
2324 if (retval)
2325 goto Err_Exit;
2326#endif /* QUOTA */
2327 /*
2328 * If we don't have enough physical space then
2329 * we need to extend the physical size.
2330 */
2331 if (length > filebytes) {
2332 int eflags;
55e303ae 2333 u_long blockHint = 0;
1c79356b 2334
9bccf70c
A
2335 /* All or nothing and don't round up to clumpsize. */
2336 eflags = kEFAllMask | kEFNoClumpMask;
1c79356b 2337
91447636 2338 if (cred && suser(cred, NULL) != 0)
9bccf70c 2339 eflags |= kEFReserveMask; /* keep a reserve */
1c79356b 2340
55e303ae
A
2341 /*
2342 * Allocate Journal and Quota files in metadata zone.
2343 */
2344 if (filebytes == 0 &&
2345 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2346 hfs_virtualmetafile(cp)) {
2347 eflags |= kEFMetadataMask;
2348 blockHint = hfsmp->hfs_metazone_start;
2349 }
91447636
A
2350 if (hfs_start_transaction(hfsmp) != 0) {
2351 retval = EINVAL;
2352 goto Err_Exit;
b4c24cb9
A
2353 }
2354
91447636
A
2355 /* Protect extents b-tree and allocation bitmap */
2356 lockflags = SFL_BITMAP;
2357 if (overflow_extents(fp))
2358 lockflags |= SFL_EXTENTS;
2359 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1c79356b 2360
9bccf70c
A
2361 while ((length > filebytes) && (retval == E_NONE)) {
2362 bytesToAdd = length - filebytes;
2363 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2364 (FCB*)fp,
1c79356b 2365 bytesToAdd,
55e303ae 2366 blockHint,
9bccf70c 2367 eflags,
1c79356b
A
2368 &actualBytesAdded));
2369
9bccf70c
A
2370 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2371 if (actualBytesAdded == 0 && retval == E_NONE) {
2372 if (length > filebytes)
2373 length = filebytes;
2374 break;
2375 }
2376 } /* endwhile */
b4c24cb9 2377
91447636 2378 hfs_systemfile_unlock(hfsmp, lockflags);
b4c24cb9 2379
b4c24cb9 2380 if (hfsmp->jnl) {
91447636
A
2381 (void) hfs_update(vp, TRUE);
2382 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2383 }
55e303ae 2384
91447636 2385 hfs_end_transaction(hfsmp);
b4c24cb9 2386
9bccf70c
A
2387 if (retval)
2388 goto Err_Exit;
2389
2390 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2391 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1c79356b 2392 }
1c79356b 2393
91447636 2394 if (!(flags & IO_NOZEROFILL)) {
2d21ac55 2395 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
9bccf70c 2396 struct rl_entry *invalid_range;
9bccf70c 2397 off_t zero_limit;
0b4e3aa0 2398
9bccf70c
A
2399 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2400 if (length < zero_limit) zero_limit = length;
2401
91447636
A
2402 if (length > (off_t)fp->ff_size) {
2403 struct timeval tv;
2404
9bccf70c
A
2405 /* Extending the file: time to fill out the current last page w. zeroes? */
2406 if ((fp->ff_size & PAGE_MASK_64) &&
2407 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2408 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
0b4e3aa0
A
2409
2410 /* There's some valid data at the start of the (current) last page
2411 of the file, so zero out the remainder of that page to ensure the
2412 entire page contains valid data. Since there is no invalid range
2413 possible past the (current) eof, there's no need to remove anything
91447636
A
2414 from the invalid range list before calling cluster_write(): */
2415 hfs_unlock(cp);
9bccf70c 2416 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
91447636
A
2417 fp->ff_size, (off_t)0,
2418 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2419 hfs_lock(cp, HFS_FORCE_LOCK);
0b4e3aa0
A
2420 if (retval) goto Err_Exit;
2421
2422 /* Merely invalidate the remaining area, if necessary: */
9bccf70c 2423 if (length > zero_limit) {
91447636 2424 microuptime(&tv);
9bccf70c 2425 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
91447636 2426 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
2427 }
2428 } else {
0b4e3aa0
A
2429 /* The page containing the (current) eof is invalid: just add the
2430 remainder of the page to the invalid list, along with the area
2431 being newly allocated:
2432 */
91447636 2433 microuptime(&tv);
9bccf70c 2434 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
91447636 2435 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
2436 };
2437 }
2438 } else {
2439 panic("hfs_truncate: invoked on non-UBC object?!");
2440 };
2441 }
91447636 2442 cp->c_touch_modtime = TRUE;
9bccf70c 2443 fp->ff_size = length;
0b4e3aa0 2444
9bccf70c 2445 } else { /* Shorten the size of the file */
0b4e3aa0 2446
91447636 2447 if ((off_t)fp->ff_size > length) {
9bccf70c
A
2448 /* Any space previously marked as invalid is now irrelevant: */
2449 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2450 }
1c79356b 2451
9bccf70c
A
2452 /*
2453 * Account for any unmapped blocks. Note that the new
2454 * file length can still end up with unmapped blocks.
2455 */
2456 if (fp->ff_unallocblocks > 0) {
2457 u_int32_t finalblks;
91447636 2458 u_int32_t loanedBlocks;
1c79356b 2459
91447636
A
2460 HFS_MOUNT_LOCK(hfsmp, TRUE);
2461
2462 loanedBlocks = fp->ff_unallocblocks;
2463 cp->c_blocks -= loanedBlocks;
2464 fp->ff_blocks -= loanedBlocks;
2465 fp->ff_unallocblocks = 0;
1c79356b 2466
91447636 2467 hfsmp->loanedBlocks -= loanedBlocks;
9bccf70c
A
2468
2469 finalblks = (length + blksize - 1) / blksize;
2470 if (finalblks > fp->ff_blocks) {
2471 /* calculate required unmapped blocks */
91447636
A
2472 loanedBlocks = finalblks - fp->ff_blocks;
2473 hfsmp->loanedBlocks += loanedBlocks;
2474
2475 fp->ff_unallocblocks = loanedBlocks;
2476 cp->c_blocks += loanedBlocks;
2477 fp->ff_blocks += loanedBlocks;
9bccf70c 2478 }
91447636 2479 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
9bccf70c 2480 }
1c79356b 2481
9bccf70c
A
2482 /*
2483 * For a TBE process the deallocation of the file blocks is
2484 * delayed until the file is closed. And hfs_close calls
2485 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2486 * isn't set, we make sure this isn't a TBE process.
2487 */
91447636 2488 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
9bccf70c
A
2489#if QUOTA
2490 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2491#endif /* QUOTA */
91447636
A
2492 if (hfs_start_transaction(hfsmp) != 0) {
2493 retval = EINVAL;
2494 goto Err_Exit;
2495 }
2496
2497 if (fp->ff_unallocblocks == 0) {
2498 /* Protect extents b-tree and allocation bitmap */
2499 lockflags = SFL_BITMAP;
2500 if (overflow_extents(fp))
2501 lockflags |= SFL_EXTENTS;
2502 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
b4c24cb9 2503
9bccf70c
A
2504 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2505 (FCB*)fp, length, false));
1c79356b 2506
91447636
A
2507 hfs_systemfile_unlock(hfsmp, lockflags);
2508 }
b4c24cb9 2509 if (hfsmp->jnl) {
ff6e181a
A
2510 if (retval == 0) {
2511 fp->ff_size = length;
2512 }
91447636
A
2513 (void) hfs_update(vp, TRUE);
2514 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
b4c24cb9 2515 }
91447636
A
2516
2517 hfs_end_transaction(hfsmp);
b4c24cb9 2518
9bccf70c
A
2519 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2520 if (retval)
2521 goto Err_Exit;
2522#if QUOTA
2523 /* These are bytesreleased */
2524 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2525#endif /* QUOTA */
2526 }
2527 /* Only set update flag if the logical length changes */
2d21ac55 2528 if ((off_t)fp->ff_size != length)
91447636 2529 cp->c_touch_modtime = TRUE;
9bccf70c 2530 fp->ff_size = length;
1c79356b 2531 }
2d21ac55
A
2532 cp->c_touch_chgtime = TRUE; /* status changed */
2533 cp->c_touch_modtime = TRUE; /* file data was modified */
91447636 2534 retval = hfs_update(vp, MNT_WAIT);
9bccf70c 2535 if (retval) {
0b4e3aa0 2536 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1c79356b 2537 -1, -1, -1, retval, 0);
9bccf70c 2538 }
1c79356b 2539
9bccf70c 2540Err_Exit:
1c79356b 2541
9bccf70c
A
2542 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2543 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1c79356b 2544
9bccf70c 2545 return (retval);
1c79356b
A
2546}
2547
2548
91447636 2549
55e303ae 2550/*
55e303ae
A
2551 * Truncate a cnode to at most length size, freeing (or adding) the
2552 * disk blocks.
2553 */
91447636
A
2554__private_extern__
2555int
2556hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2557 vfs_context_t context)
55e303ae 2558{
55e303ae 2559 struct filefork *fp = VTOF(vp);
55e303ae
A
2560 off_t filebytes;
2561 u_long fileblocks;
91447636 2562 int blksize, error = 0;
3a60a9f5 2563 struct cnode *cp = VTOC(vp);
55e303ae 2564
2d21ac55
A
2565 /* Cannot truncate an HFS directory! */
2566 if (vnode_isdir(vp)) {
2567 return (EISDIR);
2568 }
2569 /* A swap file cannot change size. */
2570 if (vnode_isswap(vp) && (length != 0)) {
2571 return (EPERM);
2572 }
55e303ae 2573
55e303ae
A
2574 blksize = VTOVCB(vp)->blockSize;
2575 fileblocks = fp->ff_blocks;
2576 filebytes = (off_t)fileblocks * (off_t)blksize;
2577
2d21ac55
A
2578 //
2579 // Have to do this here so that we don't wind up with
2580 // i/o pending for blocks that are about to be released
2581 // if we truncate the file.
2582 //
2583 // If skipsetsize is set, then the caller is responsible
2584 // for the ubc_setsize.
2585 //
2586 if (!skipsetsize)
2587 ubc_setsize(vp, length);
2588
55e303ae
A
2589 // have to loop truncating or growing files that are
2590 // really big because otherwise transactions can get
2591 // enormous and consume too many kernel resources.
91447636
A
2592
2593 if (length < filebytes) {
2594 while (filebytes > length) {
0c530ab8 2595 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
2596 filebytes -= HFS_BIGFILE_SIZE;
2597 } else {
2598 filebytes = length;
2599 }
3a60a9f5 2600 cp->c_flag |= C_FORCEUPDATE;
2d21ac55 2601 error = do_hfs_truncate(vp, filebytes, flags, context);
91447636
A
2602 if (error)
2603 break;
2604 }
2605 } else if (length > filebytes) {
2606 while (filebytes < length) {
0c530ab8 2607 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
2608 filebytes += HFS_BIGFILE_SIZE;
2609 } else {
2610 filebytes = length;
2611 }
3a60a9f5 2612 cp->c_flag |= C_FORCEUPDATE;
2d21ac55 2613 error = do_hfs_truncate(vp, filebytes, flags, context);
91447636
A
2614 if (error)
2615 break;
55e303ae 2616 }
91447636 2617 } else /* Same logical size */ {
55e303ae 2618
2d21ac55 2619 error = do_hfs_truncate(vp, length, flags, context);
91447636
A
2620 }
2621 /* Files that are changing size are not hot file candidates. */
2622 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2623 fp->ff_bytesread = 0;
55e303ae
A
2624 }
2625
91447636 2626 return (error);
55e303ae
A
2627}
2628
2629
1c79356b
A
2630
2631/*
91447636 2632 * Preallocate file storage space.
1c79356b 2633 */
91447636
A
2634int
2635hfs_vnop_allocate(struct vnop_allocate_args /* {
2636 vnode_t a_vp;
9bccf70c
A
2637 off_t a_length;
2638 u_int32_t a_flags;
2639 off_t *a_bytesallocated;
2640 off_t a_offset;
91447636
A
2641 vfs_context_t a_context;
2642 } */ *ap)
1c79356b 2643{
9bccf70c 2644 struct vnode *vp = ap->a_vp;
91447636
A
2645 struct cnode *cp;
2646 struct filefork *fp;
2647 ExtendedVCB *vcb;
9bccf70c
A
2648 off_t length = ap->a_length;
2649 off_t startingPEOF;
2650 off_t moreBytesRequested;
2651 off_t actualBytesAdded;
2652 off_t filebytes;
2653 u_long fileblocks;
9bccf70c 2654 int retval, retval2;
2d21ac55
A
2655 u_int32_t blockHint;
2656 u_int32_t extendFlags; /* For call to ExtendFileC */
b4c24cb9 2657 struct hfsmount *hfsmp;
91447636
A
2658 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2659 int lockflags;
2660
2661 *(ap->a_bytesallocated) = 0;
2662
2663 if (!vnode_isreg(vp))
2664 return (EISDIR);
2665 if (length < (off_t)0)
2666 return (EINVAL);
2d21ac55 2667
91447636 2668 cp = VTOC(vp);
2d21ac55
A
2669
2670 hfs_lock_truncate(cp, TRUE);
2671
2672 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2673 goto Err_Exit;
2674 }
2675
91447636 2676 fp = VTOF(vp);
b4c24cb9 2677 hfsmp = VTOHFS(vp);
91447636 2678 vcb = VTOVCB(vp);
9bccf70c 2679
9bccf70c 2680 fileblocks = fp->ff_blocks;
55e303ae 2681 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
9bccf70c 2682
91447636
A
2683 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2684 retval = EINVAL;
2685 goto Err_Exit;
2686 }
0b4e3aa0 2687
9bccf70c 2688 /* Fill in the flags word for the call to Extend the file */
1c79356b 2689
55e303ae 2690 extendFlags = kEFNoClumpMask;
9bccf70c 2691 if (ap->a_flags & ALLOCATECONTIG)
1c79356b 2692 extendFlags |= kEFContigMask;
9bccf70c 2693 if (ap->a_flags & ALLOCATEALL)
1c79356b 2694 extendFlags |= kEFAllMask;
91447636 2695 if (cred && suser(cred, NULL) != 0)
9bccf70c 2696 extendFlags |= kEFReserveMask;
1c79356b 2697
9bccf70c
A
2698 retval = E_NONE;
2699 blockHint = 0;
2700 startingPEOF = filebytes;
1c79356b 2701
9bccf70c
A
2702 if (ap->a_flags & ALLOCATEFROMPEOF)
2703 length += filebytes;
2704 else if (ap->a_flags & ALLOCATEFROMVOL)
2705 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1c79356b 2706
9bccf70c
A
2707 /* If no changes are necesary, then we're done */
2708 if (filebytes == length)
2709 goto Std_Exit;
1c79356b 2710
9bccf70c
A
2711 /*
2712 * Lengthen the size of the file. We must ensure that the
2713 * last byte of the file is allocated. Since the smallest
2714 * value of filebytes is 0, length will be at least 1.
2715 */
2716 if (length > filebytes) {
2d21ac55
A
2717 off_t total_bytes_added = 0, orig_request_size;
2718
2719 orig_request_size = moreBytesRequested = length - filebytes;
1c79356b 2720
9bccf70c 2721#if QUOTA
b4c24cb9 2722 retval = hfs_chkdq(cp,
55e303ae 2723 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
91447636 2724 cred, 0);
9bccf70c 2725 if (retval)
91447636 2726 goto Err_Exit;
9bccf70c
A
2727
2728#endif /* QUOTA */
55e303ae
A
2729 /*
2730 * Metadata zone checks.
2731 */
2732 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2733 /*
2734 * Allocate Journal and Quota files in metadata zone.
2735 */
2736 if (hfs_virtualmetafile(cp)) {
2737 extendFlags |= kEFMetadataMask;
2738 blockHint = hfsmp->hfs_metazone_start;
2739 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2740 (blockHint <= hfsmp->hfs_metazone_end)) {
2741 /*
2742 * Move blockHint outside metadata zone.
2743 */
2744 blockHint = hfsmp->hfs_metazone_end + 1;
2745 }
2746 }
2747
b4c24cb9 2748
2d21ac55
A
2749 while ((length > filebytes) && (retval == E_NONE)) {
2750 off_t bytesRequested;
2751
2752 if (hfs_start_transaction(hfsmp) != 0) {
2753 retval = EINVAL;
2754 goto Err_Exit;
2755 }
2756
2757 /* Protect extents b-tree and allocation bitmap */
2758 lockflags = SFL_BITMAP;
2759 if (overflow_extents(fp))
91447636 2760 lockflags |= SFL_EXTENTS;
2d21ac55
A
2761 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2762
2763 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
2764 bytesRequested = HFS_BIGFILE_SIZE;
2765 } else {
2766 bytesRequested = moreBytesRequested;
2767 }
1c79356b 2768
2d21ac55 2769 retval = MacToVFSError(ExtendFileC(vcb,
9bccf70c 2770 (FCB*)fp,
2d21ac55 2771 bytesRequested,
9bccf70c
A
2772 blockHint,
2773 extendFlags,
2774 &actualBytesAdded));
1c79356b 2775
2d21ac55
A
2776 if (retval == E_NONE) {
2777 *(ap->a_bytesallocated) += actualBytesAdded;
2778 total_bytes_added += actualBytesAdded;
2779 moreBytesRequested -= actualBytesAdded;
2780 if (blockHint != 0) {
2781 blockHint += actualBytesAdded / vcb->blockSize;
2782 }
2783 }
2784 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2785
2786 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 2787
2d21ac55 2788 if (hfsmp->jnl) {
91447636
A
2789 (void) hfs_update(vp, TRUE);
2790 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2d21ac55
A
2791 }
2792
2793 hfs_end_transaction(hfsmp);
b4c24cb9 2794 }
91447636 2795
b4c24cb9 2796
1c79356b
A
2797 /*
2798 * if we get an error and no changes were made then exit
91447636 2799 * otherwise we must do the hfs_update to reflect the changes
1c79356b 2800 */
9bccf70c
A
2801 if (retval && (startingPEOF == filebytes))
2802 goto Err_Exit;
1c79356b 2803
9bccf70c
A
2804 /*
2805 * Adjust actualBytesAdded to be allocation block aligned, not
2806 * clump size aligned.
2807 * NOTE: So what we are reporting does not affect reality
2808 * until the file is closed, when we truncate the file to allocation
2809 * block size.
2810 */
2d21ac55 2811 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
0b4e3aa0 2812 *(ap->a_bytesallocated) =
2d21ac55 2813 roundup(orig_request_size, (off_t)vcb->blockSize);
1c79356b 2814
9bccf70c 2815 } else { /* Shorten the size of the file */
1c79356b 2816
9bccf70c 2817 if (fp->ff_size > length) {
1c79356b
A
2818 /*
2819 * Any buffers that are past the truncation point need to be
91447636 2820 * invalidated (to maintain buffer cache consistency).
1c79356b 2821 */
1c79356b
A
2822 }
2823
2d21ac55 2824 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
55e303ae 2825 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
b4c24cb9 2826
1c79356b
A
2827 /*
2828 * if we get an error and no changes were made then exit
91447636 2829 * otherwise we must do the hfs_update to reflect the changes
1c79356b 2830 */
9bccf70c
A
2831 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2832#if QUOTA
2833 /* These are bytesreleased */
2834 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2835#endif /* QUOTA */
1c79356b 2836
9bccf70c
A
2837 if (fp->ff_size > filebytes) {
2838 fp->ff_size = filebytes;
1c79356b 2839
91447636
A
2840 hfs_unlock(cp);
2841 ubc_setsize(vp, fp->ff_size);
2842 hfs_lock(cp, HFS_FORCE_LOCK);
9bccf70c
A
2843 }
2844 }
1c79356b
A
2845
2846Std_Exit:
91447636
A
2847 cp->c_touch_chgtime = TRUE;
2848 cp->c_touch_modtime = TRUE;
2849 retval2 = hfs_update(vp, MNT_WAIT);
1c79356b 2850
9bccf70c
A
2851 if (retval == 0)
2852 retval = retval2;
1c79356b 2853Err_Exit:
2d21ac55 2854 hfs_unlock_truncate(cp, TRUE);
91447636 2855 hfs_unlock(cp);
9bccf70c 2856 return (retval);
1c79356b
A
2857}
2858
2859
9bccf70c 2860/*
91447636 2861 * Pagein for HFS filesystem
9bccf70c 2862 */
1c79356b 2863int
91447636
A
2864hfs_vnop_pagein(struct vnop_pagein_args *ap)
2865/*
2866 struct vnop_pagein_args {
2867 vnode_t a_vp,
1c79356b
A
2868 upl_t a_pl,
2869 vm_offset_t a_pl_offset,
2870 off_t a_f_offset,
2871 size_t a_size,
1c79356b 2872 int a_flags
91447636
A
2873 vfs_context_t a_context;
2874 };
2875*/
1c79356b 2876{
91447636 2877 vnode_t vp = ap->a_vp;
9bccf70c 2878 int error;
1c79356b 2879
9bccf70c 2880 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
91447636 2881 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
55e303ae 2882 /*
91447636 2883 * Keep track of blocks read.
55e303ae 2884 */
2d21ac55 2885 if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
55e303ae 2886 struct cnode *cp;
91447636
A
2887 struct filefork *fp;
2888 int bytesread;
2889 int took_cnode_lock = 0;
55e303ae 2890
91447636
A
2891 cp = VTOC(vp);
2892 fp = VTOF(vp);
2893
2894 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2895 bytesread = fp->ff_size;
2896 else
2897 bytesread = ap->a_size;
2898
2899 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2d21ac55 2900 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
91447636
A
2901 hfs_lock(cp, HFS_FORCE_LOCK);
2902 took_cnode_lock = 1;
2903 }
55e303ae
A
2904 /*
2905 * If this file hasn't been seen since the start of
2906 * the current sampling period then start over.
2907 */
91447636
A
2908 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2909 struct timeval tv;
55e303ae 2910
91447636
A
2911 fp->ff_bytesread = bytesread;
2912 microtime(&tv);
2913 cp->c_atime = tv.tv_sec;
2914 } else {
2915 fp->ff_bytesread += bytesread;
2916 }
2917 cp->c_touch_acctime = TRUE;
2918 if (took_cnode_lock)
2919 hfs_unlock(cp);
55e303ae 2920 }
9bccf70c 2921 return (error);
1c79356b
A
2922}
2923
2924/*
91447636 2925 * Pageout for HFS filesystem.
1c79356b
A
2926 */
2927int
91447636
A
2928hfs_vnop_pageout(struct vnop_pageout_args *ap)
2929/*
2930 struct vnop_pageout_args {
2931 vnode_t a_vp,
1c79356b
A
2932 upl_t a_pl,
2933 vm_offset_t a_pl_offset,
2934 off_t a_f_offset,
2935 size_t a_size,
1c79356b 2936 int a_flags
91447636
A
2937 vfs_context_t a_context;
2938 };
2939*/
1c79356b 2940{
91447636
A
2941 vnode_t vp = ap->a_vp;
2942 struct cnode *cp;
2943 struct filefork *fp;
9bccf70c 2944 int retval;
9bccf70c 2945 off_t filesize;
1c79356b 2946
91447636 2947 cp = VTOC(vp);
91447636 2948 fp = VTOF(vp);
2d21ac55
A
2949
2950 if (vnode_isswap(vp)) {
2951 filesize = fp->ff_size;
2952 } else {
2953 off_t end_of_range;
2954 int tooklock = 0;
2955
2956 if (cp->c_lockowner != current_thread()) {
2957 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2958 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2959 ubc_upl_abort_range(ap->a_pl,
2960 ap->a_pl_offset,
2961 ap->a_size,
2962 UPL_ABORT_FREE_ON_EMPTY);
2963 }
2964 return (retval);
2965 }
2966 tooklock = 1;
2967 }
2968
2969 filesize = fp->ff_size;
2970 end_of_range = ap->a_f_offset + ap->a_size - 1;
2971
2972 if (end_of_range >= filesize) {
2973 end_of_range = (off_t)(filesize - 1);
2974 }
2975 if (ap->a_f_offset < filesize) {
2976 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2977 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2978 }
1c79356b 2979
2d21ac55
A
2980 if (tooklock) {
2981 hfs_unlock(cp);
2982 }
55e303ae 2983 }
0b4e3aa0 2984
91447636
A
2985 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2986 ap->a_size, filesize, ap->a_flags);
0b4e3aa0 2987
1c79356b 2988 /*
91447636
A
2989 * If data was written, and setuid or setgid bits are set and
2990 * this process is not the superuser then clear the setuid and
2991 * setgid bits as a precaution against tampering.
1c79356b 2992 */
91447636
A
2993 if ((retval == 0) &&
2994 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2995 (vfs_context_suser(ap->a_context) != 0)) {
2996 hfs_lock(cp, HFS_FORCE_LOCK);
9bccf70c 2997 cp->c_mode &= ~(S_ISUID | S_ISGID);
91447636
A
2998 cp->c_touch_chgtime = TRUE;
2999 hfs_unlock(cp);
3000 }
1c79356b
A
3001 return (retval);
3002}
3003
3004/*
3005 * Intercept B-Tree node writes to unswap them if necessary.
1c79356b
A
3006 */
3007int
91447636 3008hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
1c79356b 3009{
9bccf70c 3010 int retval = 0;
9bccf70c 3011 register struct buf *bp = ap->a_bp;
91447636 3012 register struct vnode *vp = buf_vnode(bp);
9bccf70c
A
3013 BlockDescriptor block;
3014
3015 /* Trap B-Tree writes */
3016 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
91447636 3017 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
0c530ab8
A
3018 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
3019 (vp == VTOHFS(vp)->hfc_filevp)) {
9bccf70c 3020
3a60a9f5
A
3021 /*
3022 * Swap and validate the node if it is in native byte order.
3023 * This is always be true on big endian, so we always validate
3024 * before writing here. On little endian, the node typically has
2d21ac55 3025 * been swapped and validated when it was written to the journal,
3a60a9f5
A
3026 * so we won't do anything here.
3027 */
2d21ac55 3028 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
9bccf70c
A
3029 /* Prepare the block pointer */
3030 block.blockHeader = bp;
91447636 3031 block.buffer = (char *)buf_dataptr(bp);
3a60a9f5 3032 block.blockNum = buf_lblkno(bp);
9bccf70c 3033 /* not found in cache ==> came from disk */
91447636
A
3034 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
3035 block.blockSize = buf_count(bp);
1c79356b 3036
9bccf70c 3037 /* Endian un-swap B-Tree node */
935ed37a 3038 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3a60a9f5
A
3039 if (retval)
3040 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
9bccf70c 3041 }
9bccf70c 3042 }
3a60a9f5 3043
9bccf70c 3044 /* This buffer shouldn't be locked anymore but if it is clear it */
91447636
A
3045 if ((buf_flags(bp) & B_LOCKED)) {
3046 // XXXdbg
3047 if (VTOHFS(vp)->jnl) {
2d21ac55 3048 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
91447636
A
3049 }
3050 buf_clearflags(bp, B_LOCKED);
9bccf70c
A
3051 }
3052 retval = vn_bwrite (ap);
1c79356b 3053
9bccf70c 3054 return (retval);
1c79356b 3055}
55e303ae
A
3056
3057/*
3058 * Relocate a file to a new location on disk
3059 * cnode must be locked on entry
3060 *
3061 * Relocation occurs by cloning the file's data from its
3062 * current set of blocks to a new set of blocks. During
3063 * the relocation all of the blocks (old and new) are
3064 * owned by the file.
3065 *
3066 * -----------------
3067 * |///////////////|
3068 * -----------------
3069 * 0 N (file offset)
3070 *
3071 * ----------------- -----------------
2d21ac55 3072 * |///////////////| | | STEP 1 (acquire new blocks)
55e303ae
A
3073 * ----------------- -----------------
3074 * 0 N N+1 2N
3075 *
3076 * ----------------- -----------------
3077 * |///////////////| |///////////////| STEP 2 (clone data)
3078 * ----------------- -----------------
3079 * 0 N N+1 2N
3080 *
3081 * -----------------
3082 * |///////////////| STEP 3 (head truncate blocks)
3083 * -----------------
3084 * 0 N
3085 *
3086 * During steps 2 and 3 page-outs to file offsets less
3087 * than or equal to N are suspended.
3088 *
2d21ac55 3089 * During step 3 page-ins to the file get suspended.
55e303ae
A
3090 */
3091__private_extern__
3092int
91447636
A
3093hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
3094 struct proc *p)
55e303ae 3095{
91447636 3096 struct cnode *cp;
55e303ae
A
3097 struct filefork *fp;
3098 struct hfsmount *hfsmp;
55e303ae
A
3099 u_int32_t headblks;
3100 u_int32_t datablks;
3101 u_int32_t blksize;
55e303ae
A
3102 u_int32_t growsize;
3103 u_int32_t nextallocsave;
91447636 3104 daddr64_t sector_a, sector_b;
55e303ae 3105 int eflags;
55e303ae 3106 off_t newbytes;
91447636
A
3107 int retval;
3108 int lockflags = 0;
3109 int took_trunc_lock = 0;
3110 int started_tr = 0;
3111 enum vtype vnodetype;
3112
3113 vnodetype = vnode_vtype(vp);
3114 if (vnodetype != VREG && vnodetype != VLNK) {
55e303ae
A
3115 return (EPERM);
3116 }
3117
3118 hfsmp = VTOHFS(vp);
3119 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
3120 return (ENOSPC);
3121 }
3122
91447636 3123 cp = VTOC(vp);
55e303ae
A
3124 fp = VTOF(vp);
3125 if (fp->ff_unallocblocks)
3126 return (EINVAL);
91447636 3127 blksize = hfsmp->blockSize;
55e303ae 3128 if (blockHint == 0)
91447636 3129 blockHint = hfsmp->nextAllocation;
55e303ae 3130
2d21ac55 3131 if ((fp->ff_size > 0x7fffffff) ||
91447636 3132 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
55e303ae
A
3133 return (EFBIG);
3134 }
3135
91447636
A
3136 //
3137 // We do not believe that this call to hfs_fsync() is
3138 // necessary and it causes a journal transaction
3139 // deadlock so we are removing it.
3140 //
3141 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3142 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3143 // if (retval)
3144 // return (retval);
3145 //}
3146
3147 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
3148 hfs_unlock(cp);
3149 hfs_lock_truncate(cp, TRUE);
2d21ac55
A
3150 /* Force lock since callers expects lock to be held. */
3151 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
3152 hfs_unlock_truncate(cp, TRUE);
91447636
A
3153 return (retval);
3154 }
2d21ac55
A
3155 /* No need to continue if file was removed. */
3156 if (cp->c_flag & C_NOEXISTS) {
3157 hfs_unlock_truncate(cp, TRUE);
3158 return (ENOENT);
3159 }
91447636
A
3160 took_trunc_lock = 1;
3161 }
55e303ae
A
3162 headblks = fp->ff_blocks;
3163 datablks = howmany(fp->ff_size, blksize);
3164 growsize = datablks * blksize;
55e303ae
A
3165 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
3166 if (blockHint >= hfsmp->hfs_metazone_start &&
3167 blockHint <= hfsmp->hfs_metazone_end)
3168 eflags |= kEFMetadataMask;
3169
91447636
A
3170 if (hfs_start_transaction(hfsmp) != 0) {
3171 if (took_trunc_lock)
2d21ac55 3172 hfs_unlock_truncate(cp, TRUE);
91447636 3173 return (EINVAL);
55e303ae 3174 }
91447636
A
3175 started_tr = 1;
3176 /*
3177 * Protect the extents b-tree and the allocation bitmap
3178 * during MapFileBlockC and ExtendFileC operations.
3179 */
3180 lockflags = SFL_BITMAP;
3181 if (overflow_extents(fp))
3182 lockflags |= SFL_EXTENTS;
3183 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 3184
91447636 3185 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
55e303ae
A
3186 if (retval) {
3187 retval = MacToVFSError(retval);
3188 goto out;
3189 }
3190
3191 /*
2d21ac55 3192 * STEP 1 - acquire new allocation blocks.
55e303ae 3193 */
91447636
A
3194 nextallocsave = hfsmp->nextAllocation;
3195 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
3196 if (eflags & kEFMetadataMask) {
3197 HFS_MOUNT_LOCK(hfsmp, TRUE);
2d21ac55
A
3198 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
3199 MarkVCBDirty(hfsmp);
91447636
A
3200 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3201 }
55e303ae
A
3202
3203 retval = MacToVFSError(retval);
3204 if (retval == 0) {
91447636 3205 cp->c_flag |= C_MODIFIED;
55e303ae
A
3206 if (newbytes < growsize) {
3207 retval = ENOSPC;
3208 goto restore;
3209 } else if (fp->ff_blocks < (headblks + datablks)) {
3210 printf("hfs_relocate: allocation failed");
3211 retval = ENOSPC;
3212 goto restore;
3213 }
3214
91447636 3215 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
55e303ae
A
3216 if (retval) {
3217 retval = MacToVFSError(retval);
3218 } else if ((sector_a + 1) == sector_b) {
3219 retval = ENOSPC;
3220 goto restore;
3221 } else if ((eflags & kEFMetadataMask) &&
3222 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
3223 hfsmp->hfs_metazone_end)) {
2d21ac55
A
3224 const char * filestr;
3225 char emptystr = '\0';
3226
3227 if (cp->c_desc.cd_nameptr != NULL) {
3228 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
3229 } else if (vnode_name(vp) != NULL) {
3230 filestr = vnode_name(vp);
3231 } else {
3232 filestr = &emptystr;
3233 }
3234 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr, fp->ff_blocks);
55e303ae
A
3235 retval = ENOSPC;
3236 goto restore;
3237 }
3238 }
91447636
A
3239 /* Done with system locks and journal for now. */
3240 hfs_systemfile_unlock(hfsmp, lockflags);
3241 lockflags = 0;
3242 hfs_end_transaction(hfsmp);
3243 started_tr = 0;
3244
55e303ae
A
3245 if (retval) {
3246 /*
3247 * Check to see if failure is due to excessive fragmentation.
3248 */
91447636
A
3249 if ((retval == ENOSPC) &&
3250 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
55e303ae
A
3251 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
3252 }
3253 goto out;
3254 }
55e303ae 3255 /*
91447636 3256 * STEP 2 - clone file data into the new allocation blocks.
55e303ae
A
3257 */
3258
91447636 3259 if (vnodetype == VLNK)
55e303ae 3260 retval = hfs_clonelink(vp, blksize, cred, p);
91447636 3261 else if (vnode_issystem(vp))
55e303ae
A
3262 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
3263 else
91447636 3264 retval = hfs_clonefile(vp, headblks, datablks, blksize);
ccc36f2f 3265
91447636
A
3266 /* Start transaction for step 3 or for a restore. */
3267 if (hfs_start_transaction(hfsmp) != 0) {
3268 retval = EINVAL;
3269 goto out;
3270 }
3271 started_tr = 1;
55e303ae
A
3272 if (retval)
3273 goto restore;
55e303ae
A
3274
3275 /*
91447636 3276 * STEP 3 - switch to cloned data and remove old blocks.
55e303ae 3277 */
91447636
A
3278 lockflags = SFL_BITMAP;
3279 if (overflow_extents(fp))
3280 lockflags |= SFL_EXTENTS;
3281 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 3282
91447636 3283 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
55e303ae 3284
91447636
A
3285 hfs_systemfile_unlock(hfsmp, lockflags);
3286 lockflags = 0;
55e303ae
A
3287 if (retval)
3288 goto restore;
55e303ae 3289out:
91447636 3290 if (took_trunc_lock)
2d21ac55 3291 hfs_unlock_truncate(cp, TRUE);
55e303ae 3292
91447636
A
3293 if (lockflags) {
3294 hfs_systemfile_unlock(hfsmp, lockflags);
3295 lockflags = 0;
ccc36f2f
A
3296 }
3297
0c530ab8
A
3298 /* Push cnode's new extent data to disk. */
3299 if (retval == 0) {
3300 (void) hfs_update(vp, MNT_WAIT);
3301 }
55e303ae 3302 if (hfsmp->jnl) {
91447636 3303 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
55e303ae
A
3304 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3305 else
3306 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
55e303ae 3307 }
91447636 3308exit:
91447636
A
3309 if (started_tr)
3310 hfs_end_transaction(hfsmp);
55e303ae
A
3311
3312 return (retval);
3313
3314restore:
2d21ac55
A
3315 if (fp->ff_blocks == headblks) {
3316 if (took_trunc_lock)
3317 hfs_unlock_truncate(cp, TRUE);
91447636 3318 goto exit;
2d21ac55 3319 }
55e303ae
A
3320 /*
3321 * Give back any newly allocated space.
3322 */
91447636
A
3323 if (lockflags == 0) {
3324 lockflags = SFL_BITMAP;
3325 if (overflow_extents(fp))
3326 lockflags |= SFL_EXTENTS;
3327 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3328 }
3329
3330 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
3331
3332 hfs_systemfile_unlock(hfsmp, lockflags);
3333 lockflags = 0;
3334
3335 if (took_trunc_lock)
2d21ac55 3336 hfs_unlock_truncate(cp, TRUE);
91447636 3337 goto exit;
55e303ae
A
3338}
3339
3340
3341/*
3342 * Clone a symlink.
3343 *
3344 */
3345static int
2d21ac55 3346hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
55e303ae
A
3347{
3348 struct buf *head_bp = NULL;
3349 struct buf *tail_bp = NULL;
3350 int error;
3351
3352
91447636 3353 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
55e303ae
A
3354 if (error)
3355 goto out;
3356
91447636 3357 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
55e303ae
A
3358 if (tail_bp == NULL) {
3359 error = EIO;
3360 goto out;
3361 }
91447636
A
3362 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
3363 error = (int)buf_bwrite(tail_bp);
55e303ae
A
3364out:
3365 if (head_bp) {
91447636
A
3366 buf_markinvalid(head_bp);
3367 buf_brelse(head_bp);
55e303ae 3368 }
91447636 3369 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
55e303ae
A
3370
3371 return (error);
3372}
3373
3374/*
3375 * Clone a file's data within the file.
3376 *
3377 */
3378static int
91447636 3379hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
55e303ae
A
3380{
3381 caddr_t bufp;
3382 size_t writebase;
3383 size_t bufsize;
3384 size_t copysize;
3385 size_t iosize;
91447636 3386 off_t filesize;
55e303ae 3387 size_t offset;
91447636
A
3388 uio_t auio;
3389 int error = 0;
55e303ae 3390
91447636 3391 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
55e303ae
A
3392 writebase = blkstart * blksize;
3393 copysize = blkcnt * blksize;
0c530ab8 3394 iosize = bufsize = MIN(copysize, 128 * 1024);
55e303ae
A
3395 offset = 0;
3396
3397 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3398 return (ENOMEM);
3399 }
91447636 3400 hfs_unlock(VTOC(vp));
55e303ae 3401
91447636 3402 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
55e303ae
A
3403
3404 while (offset < copysize) {
3405 iosize = MIN(copysize - offset, iosize);
3406
91447636
A
3407 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
3408 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 3409
2d21ac55 3410 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
55e303ae
A
3411 if (error) {
3412 printf("hfs_clonefile: cluster_read failed - %d\n", error);
3413 break;
3414 }
91447636
A
3415 if (uio_resid(auio) != 0) {
3416 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
55e303ae
A
3417 error = EIO;
3418 break;
3419 }
3420
91447636
A
3421 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
3422 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 3423
91447636 3424 error = cluster_write(vp, auio, filesize + offset,
55e303ae 3425 filesize + offset + iosize,
91447636 3426 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
55e303ae
A
3427 if (error) {
3428 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3429 break;
3430 }
91447636 3431 if (uio_resid(auio) != 0) {
55e303ae
A
3432 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3433 error = EIO;
3434 break;
3435 }
3436 offset += iosize;
3437 }
91447636
A
3438 uio_free(auio);
3439
3440 /*
3441 * No need to call ubc_sync_range or hfs_invalbuf
3442 * since the file was copied using IO_NOCACHE.
3443 */
3444
55e303ae 3445 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
91447636
A
3446
3447 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
55e303ae
A
3448 return (error);
3449}
3450
3451/*
3452 * Clone a system (metadata) file.
3453 *
3454 */
3455static int
3456hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
91447636 3457 kauth_cred_t cred, struct proc *p)
55e303ae
A
3458{
3459 caddr_t bufp;
3460 char * offset;
3461 size_t bufsize;
3462 size_t iosize;
3463 struct buf *bp = NULL;
91447636
A
3464 daddr64_t blkno;
3465 daddr64_t blk;
3466 daddr64_t start_blk;
3467 daddr64_t last_blk;
55e303ae
A
3468 int breadcnt;
3469 int i;
3470 int error = 0;
3471
3472
3473 iosize = GetLogicalBlockSize(vp);
3474 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3475 breadcnt = bufsize / iosize;
3476
3477 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3478 return (ENOMEM);
3479 }
91447636
A
3480 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3481 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
55e303ae
A
3482 blkno = 0;
3483
91447636 3484 while (blkno < last_blk) {
55e303ae
A
3485 /*
3486 * Read up to a megabyte
3487 */
3488 offset = bufp;
91447636
A
3489 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3490 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
55e303ae
A
3491 if (error) {
3492 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3493 goto out;
3494 }
91447636
A
3495 if (buf_count(bp) != iosize) {
3496 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
55e303ae
A
3497 goto out;
3498 }
91447636
A
3499 bcopy((char *)buf_dataptr(bp), offset, iosize);
3500
3501 buf_markinvalid(bp);
3502 buf_brelse(bp);
55e303ae 3503 bp = NULL;
91447636 3504
55e303ae
A
3505 offset += iosize;
3506 }
3507
3508 /*
3509 * Write up to a megabyte
3510 */
3511 offset = bufp;
91447636
A
3512 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3513 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
55e303ae 3514 if (bp == NULL) {
91447636 3515 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
55e303ae
A
3516 error = EIO;
3517 goto out;
3518 }
91447636
A
3519 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3520 error = (int)buf_bwrite(bp);
55e303ae
A
3521 bp = NULL;
3522 if (error)
3523 goto out;
3524 offset += iosize;
3525 }
3526 }
3527out:
3528 if (bp) {
91447636 3529 buf_brelse(bp);
55e303ae
A
3530 }
3531
3532 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3533
91447636 3534 error = hfs_fsync(vp, MNT_WAIT, 0, p);
55e303ae
A
3535
3536 return (error);
3537}