]> git.saurik.com Git - apple/xnu.git/blame_incremental - bsd/hfs/hfs_readwrite.c
xnu-1456.1.26.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
... / ...
CommitLineData
1/*
2 * Copyright (c) 2000-2009 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28/* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/resourcevar.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
41#include <sys/filedesc.h>
42#include <sys/stat.h>
43#include <sys/buf.h>
44#include <sys/proc.h>
45#include <sys/kauth.h>
46#include <sys/vnode.h>
47#include <sys/vnode_internal.h>
48#include <sys/uio.h>
49#include <sys/vfs_context.h>
50#include <sys/fsevents.h>
51#include <kern/kalloc.h>
52#include <sys/disk.h>
53#include <sys/sysctl.h>
54#include <sys/fsctl.h>
55
56#include <miscfs/specfs/specdev.h>
57
58#include <sys/ubc.h>
59#include <sys/ubc_internal.h>
60
61#include <vm/vm_pageout.h>
62#include <vm/vm_kern.h>
63
64#include <sys/kdebug.h>
65
66#include "hfs.h"
67#include "hfs_attrlist.h"
68#include "hfs_endian.h"
69#include "hfs_fsctl.h"
70#include "hfs_quota.h"
71#include "hfscommon/headers/FileMgrInternal.h"
72#include "hfscommon/headers/BTreesInternal.h"
73#include "hfs_cnode.h"
74#include "hfs_dbg.h"
75
76#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
77
78enum {
79 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
80};
81
82/* from bsd/hfs/hfs_vfsops.c */
83extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
84
85static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
86static int hfs_clonefile(struct vnode *, int, int, int);
87static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
88static int hfs_minorupdate(struct vnode *vp);
89static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
90
91
92int flush_cache_on_write = 0;
93SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
94
95
96/*
97 * Read data from a file.
98 */
99int
100hfs_vnop_read(struct vnop_read_args *ap)
101{
102 uio_t uio = ap->a_uio;
103 struct vnode *vp = ap->a_vp;
104 struct cnode *cp;
105 struct filefork *fp;
106 struct hfsmount *hfsmp;
107 off_t filesize;
108 off_t filebytes;
109 off_t start_resid = uio_resid(uio);
110 off_t offset = uio_offset(uio);
111 int retval = 0;
112
113 /* Preflight checks */
114 if (!vnode_isreg(vp)) {
115 /* can only read regular files */
116 if (vnode_isdir(vp))
117 return (EISDIR);
118 else
119 return (EPERM);
120 }
121 if (start_resid == 0)
122 return (0); /* Nothing left to do */
123 if (offset < 0)
124 return (EINVAL); /* cant read from a negative offset */
125
126#if HFS_COMPRESSION
127 if (VNODE_IS_RSRC(vp)) {
128 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
129 return 0;
130 }
131 /* otherwise read the resource fork normally */
132 } else {
133 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
134 if (compressed) {
135 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
136 if (compressed) {
137 if (retval == 0) {
138 /* successful read, update the access time */
139 VTOC(vp)->c_touch_acctime = TRUE;
140
141 /* compressed files are not hot file candidates */
142 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
143 VTOF(vp)->ff_bytesread = 0;
144 }
145 }
146 return retval;
147 }
148 /* otherwise the file was converted back to a regular file while we were reading it */
149 retval = 0;
150 }
151 }
152#endif /* HFS_COMPRESSION */
153
154 cp = VTOC(vp);
155 fp = VTOF(vp);
156 hfsmp = VTOHFS(vp);
157
158 /* Protect against a size change. */
159 hfs_lock_truncate(cp, 0);
160
161 filesize = fp->ff_size;
162 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
163 if (offset > filesize) {
164 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
165 (offset > (off_t)MAXHFSFILESIZE)) {
166 retval = EFBIG;
167 }
168 goto exit;
169 }
170
171 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
172 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
173
174 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
175
176 cp->c_touch_acctime = TRUE;
177
178 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
179 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
180
181 /*
182 * Keep track blocks read
183 */
184 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
185 int took_cnode_lock = 0;
186 off_t bytesread;
187
188 bytesread = start_resid - uio_resid(uio);
189
190 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
191 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
192 hfs_lock(cp, HFS_FORCE_LOCK);
193 took_cnode_lock = 1;
194 }
195 /*
196 * If this file hasn't been seen since the start of
197 * the current sampling period then start over.
198 */
199 if (cp->c_atime < hfsmp->hfc_timebase) {
200 struct timeval tv;
201
202 fp->ff_bytesread = bytesread;
203 microtime(&tv);
204 cp->c_atime = tv.tv_sec;
205 } else {
206 fp->ff_bytesread += bytesread;
207 }
208 if (took_cnode_lock)
209 hfs_unlock(cp);
210 }
211exit:
212 hfs_unlock_truncate(cp, 0);
213 return (retval);
214}
215
216/*
217 * Write data to a file.
218 */
219int
220hfs_vnop_write(struct vnop_write_args *ap)
221{
222 uio_t uio = ap->a_uio;
223 struct vnode *vp = ap->a_vp;
224 struct cnode *cp;
225 struct filefork *fp;
226 struct hfsmount *hfsmp;
227 kauth_cred_t cred = NULL;
228 off_t origFileSize;
229 off_t writelimit;
230 off_t bytesToAdd = 0;
231 off_t actualBytesAdded;
232 off_t filebytes;
233 off_t offset;
234 ssize_t resid;
235 int eflags;
236 int ioflag = ap->a_ioflag;
237 int retval = 0;
238 int lockflags;
239 int cnode_locked = 0;
240 int partialwrite = 0;
241 int exclusive_lock = 0;
242
243#if HFS_COMPRESSION
244 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
245 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
246 switch(state) {
247 case FILE_IS_COMPRESSED:
248 return EACCES;
249 case FILE_IS_CONVERTING:
250 /* if FILE_IS_CONVERTING, we allow writes */
251 break;
252 default:
253 printf("invalid state %d for compressed file\n", state);
254 /* fall through */
255 }
256 }
257#endif
258
259 // LP64todo - fix this! uio_resid may be 64-bit value
260 resid = uio_resid(uio);
261 offset = uio_offset(uio);
262
263 if (ioflag & IO_APPEND) {
264 exclusive_lock = 1;
265 }
266
267 if (offset < 0)
268 return (EINVAL);
269 if (resid == 0)
270 return (E_NONE);
271 if (!vnode_isreg(vp))
272 return (EPERM); /* Can only write regular files */
273
274 cp = VTOC(vp);
275 fp = VTOF(vp);
276 hfsmp = VTOHFS(vp);
277
278 eflags = kEFDeferMask; /* defer file block allocations */
279#ifdef HFS_SPARSE_DEV
280 /*
281 * When the underlying device is sparse and space
282 * is low (< 8MB), stop doing delayed allocations
283 * and begin doing synchronous I/O.
284 */
285 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
286 (hfs_freeblks(hfsmp, 0) < 2048)) {
287 eflags &= ~kEFDeferMask;
288 ioflag |= IO_SYNC;
289 }
290#endif /* HFS_SPARSE_DEV */
291
292again:
293 /* Protect against a size change. */
294 hfs_lock_truncate(cp, exclusive_lock);
295
296 if (ioflag & IO_APPEND) {
297 uio_setoffset(uio, fp->ff_size);
298 offset = fp->ff_size;
299 }
300 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
301 retval = EPERM;
302 goto exit;
303 }
304
305 origFileSize = fp->ff_size;
306 writelimit = offset + resid;
307 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
308
309 /* If the truncate lock is shared, and if we either have virtual
310 * blocks or will need to extend the file, upgrade the truncate
311 * to exclusive lock. If upgrade fails, we lose the lock and
312 * have to get exclusive lock again. Note that we want to
313 * grab the truncate lock exclusive even if we're not allocating new blocks
314 * because we could still be growing past the LEOF.
315 */
316 if ((exclusive_lock == 0) &&
317 ((fp->ff_unallocblocks != 0) || (writelimit > origFileSize))) {
318 exclusive_lock = 1;
319 /* Lock upgrade failed and we lost our shared lock, try again */
320 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
321 goto again;
322 }
323 }
324
325 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
326 goto exit;
327 }
328 cnode_locked = 1;
329
330 if (!exclusive_lock) {
331 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
332 (int)offset, uio_resid(uio), (int)fp->ff_size,
333 (int)filebytes, 0);
334 }
335
336 /* Check if we do not need to extend the file */
337 if (writelimit <= filebytes) {
338 goto sizeok;
339 }
340
341 cred = vfs_context_ucred(ap->a_context);
342 bytesToAdd = writelimit - filebytes;
343
344#if QUOTA
345 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
346 cred, 0);
347 if (retval)
348 goto exit;
349#endif /* QUOTA */
350
351 if (hfs_start_transaction(hfsmp) != 0) {
352 retval = EINVAL;
353 goto exit;
354 }
355
356 while (writelimit > filebytes) {
357 bytesToAdd = writelimit - filebytes;
358 if (cred && suser(cred, NULL) != 0)
359 eflags |= kEFReserveMask;
360
361 /* Protect extents b-tree and allocation bitmap */
362 lockflags = SFL_BITMAP;
363 if (overflow_extents(fp))
364 lockflags |= SFL_EXTENTS;
365 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
366
367 /* Files that are changing size are not hot file candidates. */
368 if (hfsmp->hfc_stage == HFC_RECORDING) {
369 fp->ff_bytesread = 0;
370 }
371 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
372 0, eflags, &actualBytesAdded));
373
374 hfs_systemfile_unlock(hfsmp, lockflags);
375
376 if ((actualBytesAdded == 0) && (retval == E_NONE))
377 retval = ENOSPC;
378 if (retval != E_NONE)
379 break;
380 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
381 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
382 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
383 }
384 (void) hfs_update(vp, TRUE);
385 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
386 (void) hfs_end_transaction(hfsmp);
387
388 /*
389 * If we didn't grow the file enough try a partial write.
390 * POSIX expects this behavior.
391 */
392 if ((retval == ENOSPC) && (filebytes > offset)) {
393 retval = 0;
394 partialwrite = 1;
395 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
396 resid -= bytesToAdd;
397 writelimit = filebytes;
398 }
399sizeok:
400 if (retval == E_NONE) {
401 off_t filesize;
402 off_t zero_off;
403 off_t tail_off;
404 off_t inval_start;
405 off_t inval_end;
406 off_t io_start;
407 int lflag;
408 struct rl_entry *invalid_range;
409
410 if (writelimit > fp->ff_size)
411 filesize = writelimit;
412 else
413 filesize = fp->ff_size;
414
415 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
416
417 if (offset <= fp->ff_size) {
418 zero_off = offset & ~PAGE_MASK_64;
419
420 /* Check to see whether the area between the zero_offset and the start
421 of the transfer to see whether is invalid and should be zero-filled
422 as part of the transfer:
423 */
424 if (offset > zero_off) {
425 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
426 lflag |= IO_HEADZEROFILL;
427 }
428 } else {
429 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
430
431 /* The bytes between fp->ff_size and uio->uio_offset must never be
432 read without being zeroed. The current last block is filled with zeroes
433 if it holds valid data but in all cases merely do a little bookkeeping
434 to track the area from the end of the current last page to the start of
435 the area actually written. For the same reason only the bytes up to the
436 start of the page where this write will start is invalidated; any remainder
437 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
438
439 Note that inval_start, the start of the page after the current EOF,
440 may be past the start of the write, in which case the zeroing
441 will be handled by the cluser_write of the actual data.
442 */
443 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
444 inval_end = offset & ~PAGE_MASK_64;
445 zero_off = fp->ff_size;
446
447 if ((fp->ff_size & PAGE_MASK_64) &&
448 (rl_scan(&fp->ff_invalidranges,
449 eof_page_base,
450 fp->ff_size - 1,
451 &invalid_range) != RL_NOOVERLAP)) {
452 /* The page containing the EOF is not valid, so the
453 entire page must be made inaccessible now. If the write
454 starts on a page beyond the page containing the eof
455 (inval_end > eof_page_base), add the
456 whole page to the range to be invalidated. Otherwise
457 (i.e. if the write starts on the same page), zero-fill
458 the entire page explicitly now:
459 */
460 if (inval_end > eof_page_base) {
461 inval_start = eof_page_base;
462 } else {
463 zero_off = eof_page_base;
464 };
465 };
466
467 if (inval_start < inval_end) {
468 struct timeval tv;
469 /* There's some range of data that's going to be marked invalid */
470
471 if (zero_off < inval_start) {
472 /* The pages between inval_start and inval_end are going to be invalidated,
473 and the actual write will start on a page past inval_end. Now's the last
474 chance to zero-fill the page containing the EOF:
475 */
476 hfs_unlock(cp);
477 cnode_locked = 0;
478 retval = cluster_write(vp, (uio_t) 0,
479 fp->ff_size, inval_start,
480 zero_off, (off_t)0,
481 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
482 hfs_lock(cp, HFS_FORCE_LOCK);
483 cnode_locked = 1;
484 if (retval) goto ioerr_exit;
485 offset = uio_offset(uio);
486 };
487
488 /* Mark the remaining area of the newly allocated space as invalid: */
489 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
490 microuptime(&tv);
491 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
492 zero_off = fp->ff_size = inval_end;
493 };
494
495 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
496 };
497
498 /* Check to see whether the area between the end of the write and the end of
499 the page it falls in is invalid and should be zero-filled as part of the transfer:
500 */
501 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
502 if (tail_off > filesize) tail_off = filesize;
503 if (tail_off > writelimit) {
504 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
505 lflag |= IO_TAILZEROFILL;
506 };
507 };
508
509 /*
510 * if the write starts beyond the current EOF (possibly advanced in the
511 * zeroing of the last block, above), then we'll zero fill from the current EOF
512 * to where the write begins:
513 *
514 * NOTE: If (and ONLY if) the portion of the file about to be written is
515 * before the current EOF it might be marked as invalid now and must be
516 * made readable (removed from the invalid ranges) before cluster_write
517 * tries to write it:
518 */
519 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
520 if (io_start < fp->ff_size) {
521 off_t io_end;
522
523 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
524 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
525 };
526
527 hfs_unlock(cp);
528 cnode_locked = 0;
529
530 /*
531 * We need to tell UBC the fork's new size BEFORE calling
532 * cluster_write, in case any of the new pages need to be
533 * paged out before cluster_write completes (which does happen
534 * in embedded systems due to extreme memory pressure).
535 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
536 * will be, so that it can pass that on to cluster_pageout, and
537 * allow those pageouts.
538 *
539 * We don't update ff_size yet since we don't want pageins to
540 * be able to see uninitialized data between the old and new
541 * EOF, until cluster_write has completed and initialized that
542 * part of the file.
543 *
544 * The vnode pager relies on the file size last given to UBC via
545 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
546 * ff_size (whichever is larger). NOTE: ff_new_size is always
547 * zero, unless we are extending the file via write.
548 */
549 if (filesize > fp->ff_size) {
550 fp->ff_new_size = filesize;
551 ubc_setsize(vp, filesize);
552 }
553 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
554 tail_off, lflag | IO_NOZERODIRTY);
555 if (retval) {
556 fp->ff_new_size = 0; /* no longer extending; use ff_size */
557 if (filesize > origFileSize) {
558 ubc_setsize(vp, origFileSize);
559 }
560 goto ioerr_exit;
561 }
562
563 if (filesize > origFileSize) {
564 fp->ff_size = filesize;
565
566 /* Files that are changing size are not hot file candidates. */
567 if (hfsmp->hfc_stage == HFC_RECORDING) {
568 fp->ff_bytesread = 0;
569 }
570 }
571 fp->ff_new_size = 0; /* ff_size now has the correct size */
572
573 /* If we wrote some bytes, then touch the change and mod times */
574 if (resid > uio_resid(uio)) {
575 cp->c_touch_chgtime = TRUE;
576 cp->c_touch_modtime = TRUE;
577 }
578 }
579 if (partialwrite) {
580 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
581 resid += bytesToAdd;
582 }
583
584 // XXXdbg - see radar 4871353 for more info
585 {
586 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
587 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
588 }
589 }
590
591ioerr_exit:
592 /*
593 * If we successfully wrote any data, and we are not the superuser
594 * we clear the setuid and setgid bits as a precaution against
595 * tampering.
596 */
597 if (cp->c_mode & (S_ISUID | S_ISGID)) {
598 cred = vfs_context_ucred(ap->a_context);
599 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
600 if (!cnode_locked) {
601 hfs_lock(cp, HFS_FORCE_LOCK);
602 cnode_locked = 1;
603 }
604 cp->c_mode &= ~(S_ISUID | S_ISGID);
605 }
606 }
607 if (retval) {
608 if (ioflag & IO_UNIT) {
609 if (!cnode_locked) {
610 hfs_lock(cp, HFS_FORCE_LOCK);
611 cnode_locked = 1;
612 }
613 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
614 0, 0, ap->a_context);
615 // LP64todo - fix this! resid needs to by user_ssize_t
616 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
617 uio_setresid(uio, resid);
618 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
619 }
620 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
621 if (!cnode_locked) {
622 hfs_lock(cp, HFS_FORCE_LOCK);
623 cnode_locked = 1;
624 }
625 retval = hfs_update(vp, TRUE);
626 }
627 /* Updating vcbWrCnt doesn't need to be atomic. */
628 hfsmp->vcbWrCnt++;
629
630 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
631 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
632exit:
633 if (cnode_locked)
634 hfs_unlock(cp);
635 hfs_unlock_truncate(cp, exclusive_lock);
636 return (retval);
637}
638
639/* support for the "bulk-access" fcntl */
640
641#define CACHE_LEVELS 16
642#define NUM_CACHE_ENTRIES (64*16)
643#define PARENT_IDS_FLAG 0x100
644
645struct access_cache {
646 int numcached;
647 int cachehits; /* these two for statistics gathering */
648 int lookups;
649 unsigned int *acache;
650 unsigned char *haveaccess;
651};
652
653struct access_t {
654 uid_t uid; /* IN: effective user id */
655 short flags; /* IN: access requested (i.e. R_OK) */
656 short num_groups; /* IN: number of groups user belongs to */
657 int num_files; /* IN: number of files to process */
658 int *file_ids; /* IN: array of file ids */
659 gid_t *groups; /* IN: array of groups */
660 short *access; /* OUT: access info for each file (0 for 'has access') */
661} __attribute__((unavailable)); // this structure is for reference purposes only
662
663struct user32_access_t {
664 uid_t uid; /* IN: effective user id */
665 short flags; /* IN: access requested (i.e. R_OK) */
666 short num_groups; /* IN: number of groups user belongs to */
667 int num_files; /* IN: number of files to process */
668 user32_addr_t file_ids; /* IN: array of file ids */
669 user32_addr_t groups; /* IN: array of groups */
670 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
671};
672
673struct user64_access_t {
674 uid_t uid; /* IN: effective user id */
675 short flags; /* IN: access requested (i.e. R_OK) */
676 short num_groups; /* IN: number of groups user belongs to */
677 int num_files; /* IN: number of files to process */
678 user64_addr_t file_ids; /* IN: array of file ids */
679 user64_addr_t groups; /* IN: array of groups */
680 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
681};
682
683
684// these are the "extended" versions of the above structures
685// note that it is crucial that they be different sized than
686// the regular version
687struct ext_access_t {
688 uint32_t flags; /* IN: access requested (i.e. R_OK) */
689 uint32_t num_files; /* IN: number of files to process */
690 uint32_t map_size; /* IN: size of the bit map */
691 uint32_t *file_ids; /* IN: Array of file ids */
692 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
693 short *access; /* OUT: access info for each file (0 for 'has access') */
694 uint32_t num_parents; /* future use */
695 cnid_t *parents; /* future use */
696} __attribute__((unavailable)); // this structure is for reference purposes only
697
698struct user32_ext_access_t {
699 uint32_t flags; /* IN: access requested (i.e. R_OK) */
700 uint32_t num_files; /* IN: number of files to process */
701 uint32_t map_size; /* IN: size of the bit map */
702 user32_addr_t file_ids; /* IN: Array of file ids */
703 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
704 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
705 uint32_t num_parents; /* future use */
706 user32_addr_t parents; /* future use */
707};
708
709struct user64_ext_access_t {
710 uint32_t flags; /* IN: access requested (i.e. R_OK) */
711 uint32_t num_files; /* IN: number of files to process */
712 uint32_t map_size; /* IN: size of the bit map */
713 user64_addr_t file_ids; /* IN: array of file ids */
714 user64_addr_t bitmap; /* IN: array of groups */
715 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
716 uint32_t num_parents;/* future use */
717 user64_addr_t parents;/* future use */
718};
719
720
721/*
722 * Perform a binary search for the given parent_id. Return value is
723 * the index if there is a match. If no_match_indexp is non-NULL it
724 * will be assigned with the index to insert the item (even if it was
725 * not found).
726 */
727static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
728{
729 int index=-1;
730 unsigned int lo=0;
731
732 do {
733 unsigned int mid = ((hi - lo)/2) + lo;
734 unsigned int this_id = array[mid];
735
736 if (parent_id == this_id) {
737 hi = mid;
738 break;
739 }
740
741 if (parent_id < this_id) {
742 hi = mid;
743 continue;
744 }
745
746 if (parent_id > this_id) {
747 lo = mid + 1;
748 continue;
749 }
750 } while(lo < hi);
751
752 /* check if lo and hi converged on the match */
753 if (parent_id == array[hi]) {
754 index = hi;
755 }
756
757 if (no_match_indexp) {
758 *no_match_indexp = hi;
759 }
760
761 return index;
762}
763
764
765static int
766lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
767{
768 unsigned int hi;
769 int matches = 0;
770 int index, no_match_index;
771
772 if (cache->numcached == 0) {
773 *indexp = 0;
774 return 0; // table is empty, so insert at index=0 and report no match
775 }
776
777 if (cache->numcached > NUM_CACHE_ENTRIES) {
778 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
779 cache->numcached, NUM_CACHE_ENTRIES);*/
780 cache->numcached = NUM_CACHE_ENTRIES;
781 }
782
783 hi = cache->numcached - 1;
784
785 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
786
787 /* if no existing entry found, find index for new one */
788 if (index == -1) {
789 index = no_match_index;
790 matches = 0;
791 } else {
792 matches = 1;
793 }
794
795 *indexp = index;
796 return matches;
797}
798
799/*
800 * Add a node to the access_cache at the given index (or do a lookup first
801 * to find the index if -1 is passed in). We currently do a replace rather
802 * than an insert if the cache is full.
803 */
804static void
805add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
806{
807 int lookup_index = -1;
808
809 /* need to do a lookup first if -1 passed for index */
810 if (index == -1) {
811 if (lookup_bucket(cache, &lookup_index, nodeID)) {
812 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
813 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
814 cache->haveaccess[lookup_index] = access;
815 }
816
817 /* mission accomplished */
818 return;
819 } else {
820 index = lookup_index;
821 }
822
823 }
824
825 /* if the cache is full, do a replace rather than an insert */
826 if (cache->numcached >= NUM_CACHE_ENTRIES) {
827 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
828 cache->numcached = NUM_CACHE_ENTRIES-1;
829
830 if (index > cache->numcached) {
831 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
832 index = cache->numcached;
833 }
834 }
835
836 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
837 index++;
838 }
839
840 if (index >= 0 && index < cache->numcached) {
841 /* only do bcopy if we're inserting */
842 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
843 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
844 }
845
846 cache->acache[index] = nodeID;
847 cache->haveaccess[index] = access;
848 cache->numcached++;
849}
850
851
852struct cinfo {
853 uid_t uid;
854 gid_t gid;
855 mode_t mode;
856 cnid_t parentcnid;
857 u_int16_t recflags;
858};
859
860static int
861snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
862{
863 struct cinfo *cip = (struct cinfo *)arg;
864
865 cip->uid = attrp->ca_uid;
866 cip->gid = attrp->ca_gid;
867 cip->mode = attrp->ca_mode;
868 cip->parentcnid = descp->cd_parentcnid;
869 cip->recflags = attrp->ca_recflags;
870
871 return (0);
872}
873
874/*
875 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
876 * isn't incore, then go to the catalog.
877 */
878static int
879do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
880 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
881{
882 int error = 0;
883
884 /* if this id matches the one the fsctl was called with, skip the lookup */
885 if (cnid == skip_cp->c_cnid) {
886 cnattrp->ca_uid = skip_cp->c_uid;
887 cnattrp->ca_gid = skip_cp->c_gid;
888 cnattrp->ca_mode = skip_cp->c_mode;
889 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
890 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
891 } else {
892 struct cinfo c_info;
893
894 /* otherwise, check the cnode hash incase the file/dir is incore */
895 if (hfs_chash_snoop(hfsmp, cnid, snoop_callback, &c_info) == 0) {
896 cnattrp->ca_uid = c_info.uid;
897 cnattrp->ca_gid = c_info.gid;
898 cnattrp->ca_mode = c_info.mode;
899 cnattrp->ca_recflags = c_info.recflags;
900 keyp->hfsPlus.parentID = c_info.parentcnid;
901 } else {
902 int lockflags;
903
904 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
905
906 /* lookup this cnid in the catalog */
907 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
908
909 hfs_systemfile_unlock(hfsmp, lockflags);
910
911 cache->lookups++;
912 }
913 }
914
915 return (error);
916}
917
918
919/*
920 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
921 * up to CACHE_LEVELS as we progress towards the root.
922 */
923static int
924do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
925 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
926 struct vfs_context *my_context,
927 char *bitmap,
928 uint32_t map_size,
929 cnid_t* parents,
930 uint32_t num_parents)
931{
932 int myErr = 0;
933 int myResult;
934 HFSCatalogNodeID thisNodeID;
935 unsigned int myPerms;
936 struct cat_attr cnattr;
937 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
938 CatalogKey catkey;
939
940 int i = 0, ids_to_cache = 0;
941 int parent_ids[CACHE_LEVELS];
942
943 thisNodeID = nodeID;
944 while (thisNodeID >= kRootDirID) {
945 myResult = 0; /* default to "no access" */
946
947 /* check the cache before resorting to hitting the catalog */
948
949 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
950 * to look any further after hitting cached dir */
951
952 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
953 cache->cachehits++;
954 myErr = cache->haveaccess[cache_index];
955 if (scope_index != -1) {
956 if (myErr == ESRCH) {
957 myErr = 0;
958 }
959 } else {
960 scope_index = 0; // so we'll just use the cache result
961 scope_idx_start = ids_to_cache;
962 }
963 myResult = (myErr == 0) ? 1 : 0;
964 goto ExitThisRoutine;
965 }
966
967
968 if (parents) {
969 int tmp;
970 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
971 if (scope_index == -1)
972 scope_index = tmp;
973 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
974 scope_idx_start = ids_to_cache;
975 }
976 }
977
978 /* remember which parents we want to cache */
979 if (ids_to_cache < CACHE_LEVELS) {
980 parent_ids[ids_to_cache] = thisNodeID;
981 ids_to_cache++;
982 }
983 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
984 if (bitmap && map_size) {
985 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
986 }
987
988
989 /* do the lookup (checks the cnode hash, then the catalog) */
990 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
991 if (myErr) {
992 goto ExitThisRoutine; /* no access */
993 }
994
995 /* Root always gets access. */
996 if (suser(myp_ucred, NULL) == 0) {
997 thisNodeID = catkey.hfsPlus.parentID;
998 myResult = 1;
999 continue;
1000 }
1001
1002 // if the thing has acl's, do the full permission check
1003 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1004 struct vnode *vp;
1005
1006 /* get the vnode for this cnid */
1007 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
1008 if ( myErr ) {
1009 myResult = 0;
1010 goto ExitThisRoutine;
1011 }
1012
1013 thisNodeID = VTOC(vp)->c_parentcnid;
1014
1015 hfs_unlock(VTOC(vp));
1016
1017 if (vnode_vtype(vp) == VDIR) {
1018 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1019 } else {
1020 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1021 }
1022
1023 vnode_put(vp);
1024 if (myErr) {
1025 myResult = 0;
1026 goto ExitThisRoutine;
1027 }
1028 } else {
1029 unsigned int flags;
1030
1031 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1032 cnattr.ca_mode, hfsmp->hfs_mp,
1033 myp_ucred, theProcPtr);
1034
1035 if (cnattr.ca_mode & S_IFDIR) {
1036 flags = R_OK | X_OK;
1037 } else {
1038 flags = R_OK;
1039 }
1040 if ( (myPerms & flags) != flags) {
1041 myResult = 0;
1042 myErr = EACCES;
1043 goto ExitThisRoutine; /* no access */
1044 }
1045
1046 /* up the hierarchy we go */
1047 thisNodeID = catkey.hfsPlus.parentID;
1048 }
1049 }
1050
1051 /* if here, we have access to this node */
1052 myResult = 1;
1053
1054 ExitThisRoutine:
1055 if (parents && myErr == 0 && scope_index == -1) {
1056 myErr = ESRCH;
1057 }
1058
1059 if (myErr) {
1060 myResult = 0;
1061 }
1062 *err = myErr;
1063
1064 /* cache the parent directory(ies) */
1065 for (i = 0; i < ids_to_cache; i++) {
1066 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1067 add_node(cache, -1, parent_ids[i], ESRCH);
1068 } else {
1069 add_node(cache, -1, parent_ids[i], myErr);
1070 }
1071 }
1072
1073 return (myResult);
1074}
1075
1076static int
1077do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1078 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1079{
1080 boolean_t is64bit;
1081
1082 /*
1083 * NOTE: on entry, the vnode is locked. Incase this vnode
1084 * happens to be in our list of file_ids, we'll note it
1085 * avoid calling hfs_chashget_nowait() on that id as that
1086 * will cause a "locking against myself" panic.
1087 */
1088 Boolean check_leaf = true;
1089
1090 struct user64_ext_access_t *user_access_structp;
1091 struct user64_ext_access_t tmp_user_access;
1092 struct access_cache cache;
1093
1094 int error = 0, prev_parent_check_ok=1;
1095 unsigned int i;
1096
1097 short flags;
1098 unsigned int num_files = 0;
1099 int map_size = 0;
1100 int num_parents = 0;
1101 int *file_ids=NULL;
1102 short *access=NULL;
1103 char *bitmap=NULL;
1104 cnid_t *parents=NULL;
1105 int leaf_index;
1106
1107 cnid_t cnid;
1108 cnid_t prevParent_cnid = 0;
1109 unsigned int myPerms;
1110 short myaccess = 0;
1111 struct cat_attr cnattr;
1112 CatalogKey catkey;
1113 struct cnode *skip_cp = VTOC(vp);
1114 kauth_cred_t cred = vfs_context_ucred(context);
1115 proc_t p = vfs_context_proc(context);
1116
1117 is64bit = proc_is64bit(p);
1118
1119 /* initialize the local cache and buffers */
1120 cache.numcached = 0;
1121 cache.cachehits = 0;
1122 cache.lookups = 0;
1123 cache.acache = NULL;
1124 cache.haveaccess = NULL;
1125
1126 /* struct copyin done during dispatch... need to copy file_id array separately */
1127 if (ap->a_data == NULL) {
1128 error = EINVAL;
1129 goto err_exit_bulk_access;
1130 }
1131
1132 if (is64bit) {
1133 if (arg_size != sizeof(struct user64_ext_access_t)) {
1134 error = EINVAL;
1135 goto err_exit_bulk_access;
1136 }
1137
1138 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1139
1140 } else if (arg_size == sizeof(struct user32_access_t)) {
1141 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1142
1143 // convert an old style bulk-access struct to the new style
1144 tmp_user_access.flags = accessp->flags;
1145 tmp_user_access.num_files = accessp->num_files;
1146 tmp_user_access.map_size = 0;
1147 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1148 tmp_user_access.bitmap = USER_ADDR_NULL;
1149 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1150 tmp_user_access.num_parents = 0;
1151 user_access_structp = &tmp_user_access;
1152
1153 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1154 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1155
1156 // up-cast from a 32-bit version of the struct
1157 tmp_user_access.flags = accessp->flags;
1158 tmp_user_access.num_files = accessp->num_files;
1159 tmp_user_access.map_size = accessp->map_size;
1160 tmp_user_access.num_parents = accessp->num_parents;
1161
1162 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1163 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1164 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1165 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1166
1167 user_access_structp = &tmp_user_access;
1168 } else {
1169 error = EINVAL;
1170 goto err_exit_bulk_access;
1171 }
1172
1173 map_size = user_access_structp->map_size;
1174
1175 num_files = user_access_structp->num_files;
1176
1177 num_parents= user_access_structp->num_parents;
1178
1179 if (num_files < 1) {
1180 goto err_exit_bulk_access;
1181 }
1182 if (num_files > 1024) {
1183 error = EINVAL;
1184 goto err_exit_bulk_access;
1185 }
1186
1187 if (num_parents > 1024) {
1188 error = EINVAL;
1189 goto err_exit_bulk_access;
1190 }
1191
1192 file_ids = (int *) kalloc(sizeof(int) * num_files);
1193 access = (short *) kalloc(sizeof(short) * num_files);
1194 if (map_size) {
1195 bitmap = (char *) kalloc(sizeof(char) * map_size);
1196 }
1197
1198 if (num_parents) {
1199 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1200 }
1201
1202 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1203 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1204
1205 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1206 if (file_ids) {
1207 kfree(file_ids, sizeof(int) * num_files);
1208 }
1209 if (bitmap) {
1210 kfree(bitmap, sizeof(char) * map_size);
1211 }
1212 if (access) {
1213 kfree(access, sizeof(short) * num_files);
1214 }
1215 if (cache.acache) {
1216 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1217 }
1218 if (cache.haveaccess) {
1219 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1220 }
1221 if (parents) {
1222 kfree(parents, sizeof(cnid_t) * num_parents);
1223 }
1224 return ENOMEM;
1225 }
1226
1227 // make sure the bitmap is zero'ed out...
1228 if (bitmap) {
1229 bzero(bitmap, (sizeof(char) * map_size));
1230 }
1231
1232 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1233 num_files * sizeof(int)))) {
1234 goto err_exit_bulk_access;
1235 }
1236
1237 if (num_parents) {
1238 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1239 num_parents * sizeof(cnid_t)))) {
1240 goto err_exit_bulk_access;
1241 }
1242 }
1243
1244 flags = user_access_structp->flags;
1245 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1246 flags = R_OK;
1247 }
1248
1249 /* check if we've been passed leaf node ids or parent ids */
1250 if (flags & PARENT_IDS_FLAG) {
1251 check_leaf = false;
1252 }
1253
1254 /* Check access to each file_id passed in */
1255 for (i = 0; i < num_files; i++) {
1256 leaf_index=-1;
1257 cnid = (cnid_t) file_ids[i];
1258
1259 /* root always has access */
1260 if ((!parents) && (!suser(cred, NULL))) {
1261 access[i] = 0;
1262 continue;
1263 }
1264
1265 if (check_leaf) {
1266 /* do the lookup (checks the cnode hash, then the catalog) */
1267 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1268 if (error) {
1269 access[i] = (short) error;
1270 continue;
1271 }
1272
1273 if (parents) {
1274 // Check if the leaf matches one of the parent scopes
1275 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1276 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1277 prev_parent_check_ok = 0;
1278 else if (leaf_index >= 0)
1279 prev_parent_check_ok = 1;
1280 }
1281
1282 // if the thing has acl's, do the full permission check
1283 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1284 struct vnode *cvp;
1285 int myErr = 0;
1286 /* get the vnode for this cnid */
1287 myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
1288 if ( myErr ) {
1289 access[i] = myErr;
1290 continue;
1291 }
1292
1293 hfs_unlock(VTOC(cvp));
1294
1295 if (vnode_vtype(cvp) == VDIR) {
1296 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1297 } else {
1298 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1299 }
1300
1301 vnode_put(cvp);
1302 if (myErr) {
1303 access[i] = myErr;
1304 continue;
1305 }
1306 } else {
1307 /* before calling CheckAccess(), check the target file for read access */
1308 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1309 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1310
1311 /* fail fast if no access */
1312 if ((myPerms & flags) == 0) {
1313 access[i] = EACCES;
1314 continue;
1315 }
1316 }
1317 } else {
1318 /* we were passed an array of parent ids */
1319 catkey.hfsPlus.parentID = cnid;
1320 }
1321
1322 /* if the last guy had the same parent and had access, we're done */
1323 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1324 cache.cachehits++;
1325 access[i] = 0;
1326 continue;
1327 }
1328
1329 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1330 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1331
1332 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1333 access[i] = 0; // have access.. no errors to report
1334 } else {
1335 access[i] = (error != 0 ? (short) error : EACCES);
1336 }
1337
1338 prevParent_cnid = catkey.hfsPlus.parentID;
1339 }
1340
1341 /* copyout the access array */
1342 if ((error = copyout((caddr_t)access, user_access_structp->access,
1343 num_files * sizeof (short)))) {
1344 goto err_exit_bulk_access;
1345 }
1346 if (map_size && bitmap) {
1347 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1348 map_size * sizeof (char)))) {
1349 goto err_exit_bulk_access;
1350 }
1351 }
1352
1353
1354 err_exit_bulk_access:
1355
1356 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1357
1358 if (file_ids)
1359 kfree(file_ids, sizeof(int) * num_files);
1360 if (parents)
1361 kfree(parents, sizeof(cnid_t) * num_parents);
1362 if (bitmap)
1363 kfree(bitmap, sizeof(char) * map_size);
1364 if (access)
1365 kfree(access, sizeof(short) * num_files);
1366 if (cache.acache)
1367 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1368 if (cache.haveaccess)
1369 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1370
1371 return (error);
1372}
1373
1374
1375/* end "bulk-access" support */
1376
1377
1378/*
1379 * Callback for use with freeze ioctl.
1380 */
1381static int
1382hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
1383{
1384 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1385
1386 return 0;
1387}
1388
1389/*
1390 * Control filesystem operating characteristics.
1391 */
1392int
1393hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1394 vnode_t a_vp;
1395 int a_command;
1396 caddr_t a_data;
1397 int a_fflag;
1398 vfs_context_t a_context;
1399 } */ *ap)
1400{
1401 struct vnode * vp = ap->a_vp;
1402 struct hfsmount *hfsmp = VTOHFS(vp);
1403 vfs_context_t context = ap->a_context;
1404 kauth_cred_t cred = vfs_context_ucred(context);
1405 proc_t p = vfs_context_proc(context);
1406 struct vfsstatfs *vfsp;
1407 boolean_t is64bit;
1408 off_t jnl_start, jnl_size;
1409 struct hfs_journal_info *jip;
1410#if HFS_COMPRESSION
1411 int compressed = 0;
1412 off_t uncompressed_size = -1;
1413 int decmpfs_error = 0;
1414
1415 if (ap->a_command == F_RDADVISE) {
1416 /* we need to inspect the decmpfs state of the file as early as possible */
1417 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1418 if (compressed) {
1419 if (VNODE_IS_RSRC(vp)) {
1420 /* if this is the resource fork, treat it as if it were empty */
1421 uncompressed_size = 0;
1422 } else {
1423 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1424 if (decmpfs_error != 0) {
1425 /* failed to get the uncompressed size, we'll check for this later */
1426 uncompressed_size = -1;
1427 }
1428 }
1429 }
1430 }
1431#endif /* HFS_COMPRESSION */
1432
1433 is64bit = proc_is64bit(p);
1434
1435 switch (ap->a_command) {
1436
1437 case HFS_GETPATH:
1438 {
1439 struct vnode *file_vp;
1440 cnid_t cnid;
1441 int outlen;
1442 char *bufptr;
1443 int error;
1444
1445 /* Caller must be owner of file system. */
1446 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1447 if (suser(cred, NULL) &&
1448 kauth_cred_getuid(cred) != vfsp->f_owner) {
1449 return (EACCES);
1450 }
1451 /* Target vnode must be file system's root. */
1452 if (!vnode_isvroot(vp)) {
1453 return (EINVAL);
1454 }
1455 bufptr = (char *)ap->a_data;
1456 cnid = strtoul(bufptr, NULL, 10);
1457
1458 /* We need to call hfs_vfs_vget to leverage the code that will
1459 * fix the origin list for us if needed, as opposed to calling
1460 * hfs_vget, since we will need the parent for build_path call.
1461 */
1462
1463 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1464 return (error);
1465 }
1466 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1467 vnode_put(file_vp);
1468
1469 return (error);
1470 }
1471
1472 case HFS_PREV_LINK:
1473 case HFS_NEXT_LINK:
1474 {
1475 cnid_t linkfileid;
1476 cnid_t nextlinkid;
1477 cnid_t prevlinkid;
1478 int error;
1479
1480 /* Caller must be owner of file system. */
1481 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1482 if (suser(cred, NULL) &&
1483 kauth_cred_getuid(cred) != vfsp->f_owner) {
1484 return (EACCES);
1485 }
1486 /* Target vnode must be file system's root. */
1487 if (!vnode_isvroot(vp)) {
1488 return (EINVAL);
1489 }
1490 linkfileid = *(cnid_t *)ap->a_data;
1491 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1492 return (EINVAL);
1493 }
1494 if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1495 return (error);
1496 }
1497 if (ap->a_command == HFS_NEXT_LINK) {
1498 *(cnid_t *)ap->a_data = nextlinkid;
1499 } else {
1500 *(cnid_t *)ap->a_data = prevlinkid;
1501 }
1502 return (0);
1503 }
1504
1505 case HFS_RESIZE_PROGRESS: {
1506
1507 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1508 if (suser(cred, NULL) &&
1509 kauth_cred_getuid(cred) != vfsp->f_owner) {
1510 return (EACCES); /* must be owner of file system */
1511 }
1512 if (!vnode_isvroot(vp)) {
1513 return (EINVAL);
1514 }
1515 /* file system must not be mounted read-only */
1516 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1517 return (EROFS);
1518 }
1519
1520 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1521 }
1522
1523 case HFS_RESIZE_VOLUME: {
1524 u_int64_t newsize;
1525 u_int64_t cursize;
1526
1527 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1528 if (suser(cred, NULL) &&
1529 kauth_cred_getuid(cred) != vfsp->f_owner) {
1530 return (EACCES); /* must be owner of file system */
1531 }
1532 if (!vnode_isvroot(vp)) {
1533 return (EINVAL);
1534 }
1535
1536 /* filesystem must not be mounted read only */
1537 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1538 return (EROFS);
1539 }
1540 newsize = *(u_int64_t *)ap->a_data;
1541 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1542
1543 if (newsize > cursize) {
1544 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1545 } else if (newsize < cursize) {
1546 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1547 } else {
1548 return (0);
1549 }
1550 }
1551 case HFS_CHANGE_NEXT_ALLOCATION: {
1552 int error = 0; /* Assume success */
1553 u_int32_t location;
1554
1555 if (vnode_vfsisrdonly(vp)) {
1556 return (EROFS);
1557 }
1558 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1559 if (suser(cred, NULL) &&
1560 kauth_cred_getuid(cred) != vfsp->f_owner) {
1561 return (EACCES); /* must be owner of file system */
1562 }
1563 if (!vnode_isvroot(vp)) {
1564 return (EINVAL);
1565 }
1566 HFS_MOUNT_LOCK(hfsmp, TRUE);
1567 location = *(u_int32_t *)ap->a_data;
1568 if ((location >= hfsmp->allocLimit) &&
1569 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1570 error = EINVAL;
1571 goto fail_change_next_allocation;
1572 }
1573 /* Return previous value. */
1574 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1575 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1576 /* On magic value for location, set nextAllocation to next block
1577 * after metadata zone and set flag in mount structure to indicate
1578 * that nextAllocation should not be updated again.
1579 */
1580 if (hfsmp->hfs_metazone_end != 0) {
1581 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1582 }
1583 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1584 } else {
1585 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1586 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1587 }
1588 MarkVCBDirty(hfsmp);
1589fail_change_next_allocation:
1590 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1591 return (error);
1592 }
1593
1594#ifdef HFS_SPARSE_DEV
1595 case HFS_SETBACKINGSTOREINFO: {
1596 struct vnode * bsfs_rootvp;
1597 struct vnode * di_vp;
1598 struct hfs_backingstoreinfo *bsdata;
1599 int error = 0;
1600
1601 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1602 return (EROFS);
1603 }
1604 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1605 return (EALREADY);
1606 }
1607 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1608 if (suser(cred, NULL) &&
1609 kauth_cred_getuid(cred) != vfsp->f_owner) {
1610 return (EACCES); /* must be owner of file system */
1611 }
1612 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1613 if (bsdata == NULL) {
1614 return (EINVAL);
1615 }
1616 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1617 return (error);
1618 }
1619 if ((error = vnode_getwithref(di_vp))) {
1620 file_drop(bsdata->backingfd);
1621 return(error);
1622 }
1623
1624 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1625 (void)vnode_put(di_vp);
1626 file_drop(bsdata->backingfd);
1627 return (EINVAL);
1628 }
1629
1630 /*
1631 * Obtain the backing fs root vnode and keep a reference
1632 * on it. This reference will be dropped in hfs_unmount.
1633 */
1634 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
1635 if (error) {
1636 (void)vnode_put(di_vp);
1637 file_drop(bsdata->backingfd);
1638 return (error);
1639 }
1640 vnode_ref(bsfs_rootvp);
1641 vnode_put(bsfs_rootvp);
1642
1643 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1644 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1645 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1646 hfsmp->hfs_sparsebandblks *= 4;
1647
1648 vfs_markdependency(hfsmp->hfs_mp);
1649
1650 /*
1651 * If the sparse image is on a sparse image file (as opposed to a sparse
1652 * bundle), then we may need to limit the free space to the maximum size
1653 * of a file on that volume. So we query (using pathconf), and if we get
1654 * a meaningful result, we cache the number of blocks for later use in
1655 * hfs_freeblks().
1656 */
1657 hfsmp->hfs_backingfs_maxblocks = 0;
1658 if (vnode_vtype(di_vp) == VREG) {
1659 int terr;
1660 int hostbits;
1661 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1662 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1663 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1664
1665 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1666 }
1667 }
1668
1669 (void)vnode_put(di_vp);
1670 file_drop(bsdata->backingfd);
1671 return (0);
1672 }
1673 case HFS_CLRBACKINGSTOREINFO: {
1674 struct vnode * tmpvp;
1675
1676 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1677 if (suser(cred, NULL) &&
1678 kauth_cred_getuid(cred) != vfsp->f_owner) {
1679 return (EACCES); /* must be owner of file system */
1680 }
1681 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1682 return (EROFS);
1683 }
1684
1685 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1686 hfsmp->hfs_backingfs_rootvp) {
1687
1688 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1689 tmpvp = hfsmp->hfs_backingfs_rootvp;
1690 hfsmp->hfs_backingfs_rootvp = NULLVP;
1691 hfsmp->hfs_sparsebandblks = 0;
1692 vnode_rele(tmpvp);
1693 }
1694 return (0);
1695 }
1696#endif /* HFS_SPARSE_DEV */
1697
1698 case F_FREEZE_FS: {
1699 struct mount *mp;
1700
1701 mp = vnode_mount(vp);
1702 hfsmp = VFSTOHFS(mp);
1703
1704 if (!(hfsmp->jnl))
1705 return (ENOTSUP);
1706
1707 vfsp = vfs_statfs(mp);
1708
1709 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1710 !kauth_cred_issuser(cred))
1711 return (EACCES);
1712
1713 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1714
1715 // flush things before we get started to try and prevent
1716 // dirty data from being paged out while we're frozen.
1717 // note: can't do this after taking the lock as it will
1718 // deadlock against ourselves.
1719 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1720 hfs_global_exclusive_lock_acquire(hfsmp);
1721
1722 // DO NOT call hfs_journal_flush() because that takes a
1723 // shared lock on the global exclusive lock!
1724 journal_flush(hfsmp->jnl);
1725
1726 // don't need to iterate on all vnodes, we just need to
1727 // wait for writes to the system files and the device vnode
1728 if (HFSTOVCB(hfsmp)->extentsRefNum)
1729 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1730 if (HFSTOVCB(hfsmp)->catalogRefNum)
1731 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1732 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1733 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1734 if (hfsmp->hfs_attribute_vp)
1735 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1736 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1737
1738 hfsmp->hfs_freezing_proc = current_proc();
1739
1740 return (0);
1741 }
1742
1743 case F_THAW_FS: {
1744 vfsp = vfs_statfs(vnode_mount(vp));
1745 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1746 !kauth_cred_issuser(cred))
1747 return (EACCES);
1748
1749 // if we're not the one who froze the fs then we
1750 // can't thaw it.
1751 if (hfsmp->hfs_freezing_proc != current_proc()) {
1752 return EPERM;
1753 }
1754
1755 // NOTE: if you add code here, also go check the
1756 // code that "thaws" the fs in hfs_vnop_close()
1757 //
1758 hfsmp->hfs_freezing_proc = NULL;
1759 hfs_global_exclusive_lock_release(hfsmp);
1760 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1761
1762 return (0);
1763 }
1764
1765 case HFS_BULKACCESS_FSCTL: {
1766 int size;
1767
1768 if (hfsmp->hfs_flags & HFS_STANDARD) {
1769 return EINVAL;
1770 }
1771
1772 if (is64bit) {
1773 size = sizeof(struct user64_access_t);
1774 } else {
1775 size = sizeof(struct user32_access_t);
1776 }
1777
1778 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1779 }
1780
1781 case HFS_EXT_BULKACCESS_FSCTL: {
1782 int size;
1783
1784 if (hfsmp->hfs_flags & HFS_STANDARD) {
1785 return EINVAL;
1786 }
1787
1788 if (is64bit) {
1789 size = sizeof(struct user64_ext_access_t);
1790 } else {
1791 size = sizeof(struct user32_ext_access_t);
1792 }
1793
1794 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1795 }
1796
1797 case HFS_SETACLSTATE: {
1798 int state;
1799
1800 if (ap->a_data == NULL) {
1801 return (EINVAL);
1802 }
1803
1804 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1805 state = *(int *)ap->a_data;
1806
1807 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1808 return (EROFS);
1809 }
1810 // super-user can enable or disable acl's on a volume.
1811 // the volume owner can only enable acl's
1812 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1813 return (EPERM);
1814 }
1815 if (state == 0 || state == 1)
1816 return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
1817 else
1818 return (EINVAL);
1819 }
1820
1821 case HFS_SET_XATTREXTENTS_STATE: {
1822 int state;
1823
1824 if (ap->a_data == NULL) {
1825 return (EINVAL);
1826 }
1827
1828 state = *(int *)ap->a_data;
1829
1830 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1831 return (EROFS);
1832 }
1833
1834 /* Super-user can enable or disable extent-based extended
1835 * attribute support on a volume
1836 */
1837 if (!is_suser()) {
1838 return (EPERM);
1839 }
1840 if (state == 0 || state == 1)
1841 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
1842 else
1843 return (EINVAL);
1844 }
1845
1846 case F_FULLFSYNC: {
1847 int error;
1848
1849 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1850 return (EROFS);
1851 }
1852 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1853 if (error == 0) {
1854 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
1855 hfs_unlock(VTOC(vp));
1856 }
1857
1858 return error;
1859 }
1860
1861 case F_CHKCLEAN: {
1862 register struct cnode *cp;
1863 int error;
1864
1865 if (!vnode_isreg(vp))
1866 return EINVAL;
1867
1868 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1869 if (error == 0) {
1870 cp = VTOC(vp);
1871 /*
1872 * used by regression test to determine if
1873 * all the dirty pages (via write) have been cleaned
1874 * after a call to 'fsysnc'.
1875 */
1876 error = is_file_clean(vp, VTOF(vp)->ff_size);
1877 hfs_unlock(cp);
1878 }
1879 return (error);
1880 }
1881
1882 case F_RDADVISE: {
1883 register struct radvisory *ra;
1884 struct filefork *fp;
1885 int error;
1886
1887 if (!vnode_isreg(vp))
1888 return EINVAL;
1889
1890 ra = (struct radvisory *)(ap->a_data);
1891 fp = VTOF(vp);
1892
1893 /* Protect against a size change. */
1894 hfs_lock_truncate(VTOC(vp), TRUE);
1895
1896#if HFS_COMPRESSION
1897 if (compressed && (uncompressed_size == -1)) {
1898 /* fetching the uncompressed size failed above, so return the error */
1899 error = decmpfs_error;
1900 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
1901 (!compressed && (ra->ra_offset >= fp->ff_size))) {
1902 error = EFBIG;
1903 }
1904#else /* HFS_COMPRESSION */
1905 if (ra->ra_offset >= fp->ff_size) {
1906 error = EFBIG;
1907 }
1908#endif /* HFS_COMPRESSION */
1909 else {
1910 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1911 }
1912
1913 hfs_unlock_truncate(VTOC(vp), TRUE);
1914 return (error);
1915 }
1916
1917 case F_READBOOTSTRAP:
1918 case F_WRITEBOOTSTRAP:
1919 {
1920 struct vnode *devvp = NULL;
1921 user_fbootstraptransfer_t *user_bootstrapp;
1922 int devBlockSize;
1923 int error;
1924 uio_t auio;
1925 daddr64_t blockNumber;
1926 u_int32_t blockOffset;
1927 u_int32_t xfersize;
1928 struct buf *bp;
1929 user_fbootstraptransfer_t user_bootstrap;
1930
1931 if (!vnode_isvroot(vp))
1932 return (EINVAL);
1933 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1934 * to a user_fbootstraptransfer_t else we get a pointer to a
1935 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1936 */
1937 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1938 return (EROFS);
1939 }
1940 if (is64bit) {
1941 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1942 }
1943 else {
1944 user32_fbootstraptransfer_t *bootstrapp = (user32_fbootstraptransfer_t *)ap->a_data;
1945 user_bootstrapp = &user_bootstrap;
1946 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1947 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1948 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1949 }
1950 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1951 return EINVAL;
1952
1953 devvp = VTOHFS(vp)->hfs_devvp;
1954 auio = uio_create(1, user_bootstrapp->fbt_offset,
1955 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1956 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1957 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1958
1959 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1960
1961 while (uio_resid(auio) > 0) {
1962 blockNumber = uio_offset(auio) / devBlockSize;
1963 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1964 if (error) {
1965 if (bp) buf_brelse(bp);
1966 uio_free(auio);
1967 return error;
1968 };
1969
1970 blockOffset = uio_offset(auio) % devBlockSize;
1971 xfersize = devBlockSize - blockOffset;
1972 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1973 if (error) {
1974 buf_brelse(bp);
1975 uio_free(auio);
1976 return error;
1977 };
1978 if (uio_rw(auio) == UIO_WRITE) {
1979 error = VNOP_BWRITE(bp);
1980 if (error) {
1981 uio_free(auio);
1982 return error;
1983 }
1984 } else {
1985 buf_brelse(bp);
1986 };
1987 };
1988 uio_free(auio);
1989 };
1990 return 0;
1991
1992 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1993 {
1994 if (is64bit) {
1995 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1996 }
1997 else {
1998 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1999 }
2000 return 0;
2001 }
2002
2003 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2004 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2005 break;
2006
2007 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2008 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2009 break;
2010
2011 case HFS_FSCTL_SET_VERY_LOW_DISK:
2012 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2013 return EINVAL;
2014 }
2015
2016 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2017 break;
2018
2019 case HFS_FSCTL_SET_LOW_DISK:
2020 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2021 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2022
2023 return EINVAL;
2024 }
2025
2026 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2027 break;
2028
2029 case HFS_FSCTL_SET_DESIRED_DISK:
2030 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2031 return EINVAL;
2032 }
2033
2034 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2035 break;
2036
2037 case HFS_VOLUME_STATUS:
2038 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2039 break;
2040
2041 case HFS_SET_BOOT_INFO:
2042 if (!vnode_isvroot(vp))
2043 return(EINVAL);
2044 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2045 return(EACCES); /* must be superuser or owner of filesystem */
2046 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2047 return (EROFS);
2048 }
2049 HFS_MOUNT_LOCK(hfsmp, TRUE);
2050 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2051 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2052 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2053 break;
2054
2055 case HFS_GET_BOOT_INFO:
2056 if (!vnode_isvroot(vp))
2057 return(EINVAL);
2058 HFS_MOUNT_LOCK(hfsmp, TRUE);
2059 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2060 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2061 break;
2062
2063 case HFS_MARK_BOOT_CORRUPT:
2064 /* Mark the boot volume corrupt by setting
2065 * kHFSVolumeInconsistentBit in the volume header. This will
2066 * force fsck_hfs on next mount.
2067 */
2068 if (!is_suser()) {
2069 return EACCES;
2070 }
2071
2072 /* Allowed only on the root vnode of the boot volume */
2073 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2074 !vnode_isvroot(vp)) {
2075 return EINVAL;
2076 }
2077 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2078 return (EROFS);
2079 }
2080 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2081 hfs_mark_volume_inconsistent(hfsmp);
2082 break;
2083
2084 case HFS_FSCTL_GET_JOURNAL_INFO:
2085 jip = (struct hfs_journal_info*)ap->a_data;
2086
2087 if (vp == NULLVP)
2088 return EINVAL;
2089
2090 if (hfsmp->jnl == NULL) {
2091 jnl_start = 0;
2092 jnl_size = 0;
2093 } else {
2094 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2095 jnl_size = (off_t)hfsmp->jnl_size;
2096 }
2097
2098 jip->jstart = jnl_start;
2099 jip->jsize = jnl_size;
2100 break;
2101
2102 case HFS_SET_ALWAYS_ZEROFILL: {
2103 struct cnode *cp = VTOC(vp);
2104
2105 if (*(int *)ap->a_data) {
2106 cp->c_flag |= C_ALWAYS_ZEROFILL;
2107 } else {
2108 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2109 }
2110 break;
2111 }
2112
2113 default:
2114 return (ENOTTY);
2115 }
2116
2117 return 0;
2118}
2119
2120/*
2121 * select
2122 */
2123int
2124hfs_vnop_select(__unused struct vnop_select_args *ap)
2125/*
2126 struct vnop_select_args {
2127 vnode_t a_vp;
2128 int a_which;
2129 int a_fflags;
2130 void *a_wql;
2131 vfs_context_t a_context;
2132 };
2133*/
2134{
2135 /*
2136 * We should really check to see if I/O is possible.
2137 */
2138 return (1);
2139}
2140
2141/*
2142 * Converts a logical block number to a physical block, and optionally returns
2143 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2144 * The physical block number is based on the device block size, currently its 512.
2145 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2146 */
2147int
2148hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
2149{
2150 struct filefork *fp = VTOF(vp);
2151 struct hfsmount *hfsmp = VTOHFS(vp);
2152 int retval = E_NONE;
2153 u_int32_t logBlockSize;
2154 size_t bytesContAvail = 0;
2155 off_t blockposition;
2156 int lockExtBtree;
2157 int lockflags = 0;
2158
2159 /*
2160 * Check for underlying vnode requests and ensure that logical
2161 * to physical mapping is requested.
2162 */
2163 if (vpp != NULL)
2164 *vpp = hfsmp->hfs_devvp;
2165 if (bnp == NULL)
2166 return (0);
2167
2168 logBlockSize = GetLogicalBlockSize(vp);
2169 blockposition = (off_t)bn * logBlockSize;
2170
2171 lockExtBtree = overflow_extents(fp);
2172
2173 if (lockExtBtree)
2174 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2175
2176 retval = MacToVFSError(
2177 MapFileBlockC (HFSTOVCB(hfsmp),
2178 (FCB*)fp,
2179 MAXPHYSIO,
2180 blockposition,
2181 bnp,
2182 &bytesContAvail));
2183
2184 if (lockExtBtree)
2185 hfs_systemfile_unlock(hfsmp, lockflags);
2186
2187 if (retval == E_NONE) {
2188 /* Figure out how many read ahead blocks there are */
2189 if (runp != NULL) {
2190 if (can_cluster(logBlockSize)) {
2191 /* Make sure this result never goes negative: */
2192 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2193 } else {
2194 *runp = 0;
2195 }
2196 }
2197 }
2198 return (retval);
2199}
2200
2201/*
2202 * Convert logical block number to file offset.
2203 */
2204int
2205hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2206/*
2207 struct vnop_blktooff_args {
2208 vnode_t a_vp;
2209 daddr64_t a_lblkno;
2210 off_t *a_offset;
2211 };
2212*/
2213{
2214 if (ap->a_vp == NULL)
2215 return (EINVAL);
2216 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
2217
2218 return(0);
2219}
2220
2221/*
2222 * Convert file offset to logical block number.
2223 */
2224int
2225hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2226/*
2227 struct vnop_offtoblk_args {
2228 vnode_t a_vp;
2229 off_t a_offset;
2230 daddr64_t *a_lblkno;
2231 };
2232*/
2233{
2234 if (ap->a_vp == NULL)
2235 return (EINVAL);
2236 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
2237
2238 return(0);
2239}
2240
2241/*
2242 * Map file offset to physical block number.
2243 *
2244 * If this function is called for write operation, and if the file
2245 * had virtual blocks allocated (delayed allocation), real blocks
2246 * are allocated by calling ExtendFileC().
2247 *
2248 * If this function is called for read operation, and if the file
2249 * had virtual blocks allocated (delayed allocation), no change
2250 * to the size of file is done, and if required, rangelist is
2251 * searched for mapping.
2252 *
2253 * System file cnodes are expected to be locked (shared or exclusive).
2254 */
2255int
2256hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2257/*
2258 struct vnop_blockmap_args {
2259 vnode_t a_vp;
2260 off_t a_foffset;
2261 size_t a_size;
2262 daddr64_t *a_bpn;
2263 size_t *a_run;
2264 void *a_poff;
2265 int a_flags;
2266 vfs_context_t a_context;
2267 };
2268*/
2269{
2270 struct vnode *vp = ap->a_vp;
2271 struct cnode *cp;
2272 struct filefork *fp;
2273 struct hfsmount *hfsmp;
2274 size_t bytesContAvail = 0;
2275 int retval = E_NONE;
2276 int syslocks = 0;
2277 int lockflags = 0;
2278 struct rl_entry *invalid_range;
2279 enum rl_overlaptype overlaptype;
2280 int started_tr = 0;
2281 int tooklock = 0;
2282
2283#if HFS_COMPRESSION
2284 if (VNODE_IS_RSRC(vp)) {
2285 /* allow blockmaps to the resource fork */
2286 } else {
2287 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2288 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2289 switch(state) {
2290 case FILE_IS_COMPRESSED:
2291 return ENOTSUP;
2292 case FILE_IS_CONVERTING:
2293 /* if FILE_IS_CONVERTING, we allow blockmap */
2294 break;
2295 default:
2296 printf("invalid state %d for compressed file\n", state);
2297 /* fall through */
2298 }
2299 }
2300 }
2301#endif /* HFS_COMPRESSION */
2302
2303 /* Do not allow blockmap operation on a directory */
2304 if (vnode_isdir(vp)) {
2305 return (ENOTSUP);
2306 }
2307
2308 /*
2309 * Check for underlying vnode requests and ensure that logical
2310 * to physical mapping is requested.
2311 */
2312 if (ap->a_bpn == NULL)
2313 return (0);
2314
2315 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
2316 if (VTOC(vp)->c_lockowner != current_thread()) {
2317 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2318 tooklock = 1;
2319 }
2320 }
2321 hfsmp = VTOHFS(vp);
2322 cp = VTOC(vp);
2323 fp = VTOF(vp);
2324
2325retry:
2326 /* Check virtual blocks only when performing write operation */
2327 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2328 if (hfs_start_transaction(hfsmp) != 0) {
2329 retval = EINVAL;
2330 goto exit;
2331 } else {
2332 started_tr = 1;
2333 }
2334 syslocks = SFL_EXTENTS | SFL_BITMAP;
2335
2336 } else if (overflow_extents(fp)) {
2337 syslocks = SFL_EXTENTS;
2338 }
2339
2340 if (syslocks)
2341 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
2342
2343 /*
2344 * Check for any delayed allocations.
2345 */
2346 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2347 int64_t actbytes;
2348 u_int32_t loanedBlocks;
2349
2350 //
2351 // Make sure we have a transaction. It's possible
2352 // that we came in and fp->ff_unallocblocks was zero
2353 // but during the time we blocked acquiring the extents
2354 // btree, ff_unallocblocks became non-zero and so we
2355 // will need to start a transaction.
2356 //
2357 if (started_tr == 0) {
2358 if (syslocks) {
2359 hfs_systemfile_unlock(hfsmp, lockflags);
2360 syslocks = 0;
2361 }
2362 goto retry;
2363 }
2364
2365 /*
2366 * Note: ExtendFileC will Release any blocks on loan and
2367 * aquire real blocks. So we ask to extend by zero bytes
2368 * since ExtendFileC will account for the virtual blocks.
2369 */
2370
2371 loanedBlocks = fp->ff_unallocblocks;
2372 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2373 kEFAllMask | kEFNoClumpMask, &actbytes);
2374
2375 if (retval) {
2376 fp->ff_unallocblocks = loanedBlocks;
2377 cp->c_blocks += loanedBlocks;
2378 fp->ff_blocks += loanedBlocks;
2379
2380 HFS_MOUNT_LOCK(hfsmp, TRUE);
2381 hfsmp->loanedBlocks += loanedBlocks;
2382 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2383
2384 hfs_systemfile_unlock(hfsmp, lockflags);
2385 cp->c_flag |= C_MODIFIED;
2386 if (started_tr) {
2387 (void) hfs_update(vp, TRUE);
2388 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2389
2390 hfs_end_transaction(hfsmp);
2391 started_tr = 0;
2392 }
2393 goto exit;
2394 }
2395 }
2396
2397 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2398 ap->a_bpn, &bytesContAvail);
2399 if (syslocks) {
2400 hfs_systemfile_unlock(hfsmp, lockflags);
2401 syslocks = 0;
2402 }
2403
2404 if (started_tr) {
2405 (void) hfs_update(vp, TRUE);
2406 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2407 hfs_end_transaction(hfsmp);
2408 started_tr = 0;
2409 }
2410 if (retval) {
2411 /* On write, always return error because virtual blocks, if any,
2412 * should have been allocated in ExtendFileC(). We do not
2413 * allocate virtual blocks on read, therefore return error
2414 * only if no virtual blocks are allocated. Otherwise we search
2415 * rangelist for zero-fills
2416 */
2417 if ((MacToVFSError(retval) != ERANGE) ||
2418 (ap->a_flags & VNODE_WRITE) ||
2419 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2420 goto exit;
2421 }
2422
2423 /* Validate if the start offset is within logical file size */
2424 if (ap->a_foffset > fp->ff_size) {
2425 goto exit;
2426 }
2427
2428 /* Searching file extents has failed for read operation, therefore
2429 * search rangelist for any uncommitted holes in the file.
2430 */
2431 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2432 ap->a_foffset + (off_t)(ap->a_size - 1),
2433 &invalid_range);
2434 switch(overlaptype) {
2435 case RL_OVERLAPISCONTAINED:
2436 /* start_offset <= rl_start, end_offset >= rl_end */
2437 if (ap->a_foffset != invalid_range->rl_start) {
2438 break;
2439 }
2440 case RL_MATCHINGOVERLAP:
2441 /* start_offset = rl_start, end_offset = rl_end */
2442 case RL_OVERLAPCONTAINSRANGE:
2443 /* start_offset >= rl_start, end_offset <= rl_end */
2444 case RL_OVERLAPSTARTSBEFORE:
2445 /* start_offset > rl_start, end_offset >= rl_start */
2446 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2447 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2448 } else {
2449 bytesContAvail = fp->ff_size - ap->a_foffset;
2450 }
2451 if (bytesContAvail > ap->a_size) {
2452 bytesContAvail = ap->a_size;
2453 }
2454 *ap->a_bpn = (daddr64_t)-1;
2455 retval = 0;
2456 break;
2457 case RL_OVERLAPENDSAFTER:
2458 /* start_offset < rl_start, end_offset < rl_end */
2459 case RL_NOOVERLAP:
2460 break;
2461 }
2462 goto exit;
2463 }
2464
2465 /* MapFileC() found a valid extent in the filefork. Search the
2466 * mapping information further for invalid file ranges
2467 */
2468 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2469 ap->a_foffset + (off_t)bytesContAvail - 1,
2470 &invalid_range);
2471 if (overlaptype != RL_NOOVERLAP) {
2472 switch(overlaptype) {
2473 case RL_MATCHINGOVERLAP:
2474 case RL_OVERLAPCONTAINSRANGE:
2475 case RL_OVERLAPSTARTSBEFORE:
2476 /* There's no valid block for this byte offset */
2477 *ap->a_bpn = (daddr64_t)-1;
2478 /* There's no point limiting the amount to be returned
2479 * if the invalid range that was hit extends all the way
2480 * to the EOF (i.e. there's no valid bytes between the
2481 * end of this range and the file's EOF):
2482 */
2483 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2484 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
2485 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2486 }
2487 break;
2488
2489 case RL_OVERLAPISCONTAINED:
2490 case RL_OVERLAPENDSAFTER:
2491 /* The range of interest hits an invalid block before the end: */
2492 if (invalid_range->rl_start == ap->a_foffset) {
2493 /* There's actually no valid information to be had starting here: */
2494 *ap->a_bpn = (daddr64_t)-1;
2495 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2496 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
2497 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2498 }
2499 } else {
2500 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2501 }
2502 break;
2503
2504 case RL_NOOVERLAP:
2505 break;
2506 } /* end switch */
2507 if (bytesContAvail > ap->a_size)
2508 bytesContAvail = ap->a_size;
2509 }
2510
2511exit:
2512 if (retval == 0) {
2513 if (ap->a_run)
2514 *ap->a_run = bytesContAvail;
2515
2516 if (ap->a_poff)
2517 *(int *)ap->a_poff = 0;
2518 }
2519
2520 if (tooklock)
2521 hfs_unlock(cp);
2522
2523 return (MacToVFSError(retval));
2524}
2525
2526
2527/*
2528 * prepare and issue the I/O
2529 * buf_strategy knows how to deal
2530 * with requests that require
2531 * fragmented I/Os
2532 */
2533int
2534hfs_vnop_strategy(struct vnop_strategy_args *ap)
2535{
2536 buf_t bp = ap->a_bp;
2537 vnode_t vp = buf_vnode(bp);
2538
2539 return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
2540}
2541
2542static int
2543hfs_minorupdate(struct vnode *vp) {
2544 struct cnode *cp = VTOC(vp);
2545 cp->c_flag &= ~C_MODIFIED;
2546 cp->c_touch_acctime = 0;
2547 cp->c_touch_chgtime = 0;
2548 cp->c_touch_modtime = 0;
2549
2550 return 0;
2551}
2552
2553static int
2554do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
2555{
2556 register struct cnode *cp = VTOC(vp);
2557 struct filefork *fp = VTOF(vp);
2558 struct proc *p = vfs_context_proc(context);;
2559 kauth_cred_t cred = vfs_context_ucred(context);
2560 int retval;
2561 off_t bytesToAdd;
2562 off_t actualBytesAdded;
2563 off_t filebytes;
2564 u_int32_t fileblocks;
2565 int blksize;
2566 struct hfsmount *hfsmp;
2567 int lockflags;
2568
2569 blksize = VTOVCB(vp)->blockSize;
2570 fileblocks = fp->ff_blocks;
2571 filebytes = (off_t)fileblocks * (off_t)blksize;
2572
2573 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2574 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2575
2576 if (length < 0)
2577 return (EINVAL);
2578
2579 /* This should only happen with a corrupt filesystem */
2580 if ((off_t)fp->ff_size < 0)
2581 return (EINVAL);
2582
2583 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2584 return (EFBIG);
2585
2586 hfsmp = VTOHFS(vp);
2587
2588 retval = E_NONE;
2589
2590 /* Files that are changing size are not hot file candidates. */
2591 if (hfsmp->hfc_stage == HFC_RECORDING) {
2592 fp->ff_bytesread = 0;
2593 }
2594
2595 /*
2596 * We cannot just check if fp->ff_size == length (as an optimization)
2597 * since there may be extra physical blocks that also need truncation.
2598 */
2599#if QUOTA
2600 if ((retval = hfs_getinoquota(cp)))
2601 return(retval);
2602#endif /* QUOTA */
2603
2604 /*
2605 * Lengthen the size of the file. We must ensure that the
2606 * last byte of the file is allocated. Since the smallest
2607 * value of ff_size is 0, length will be at least 1.
2608 */
2609 if (length > (off_t)fp->ff_size) {
2610#if QUOTA
2611 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
2612 cred, 0);
2613 if (retval)
2614 goto Err_Exit;
2615#endif /* QUOTA */
2616 /*
2617 * If we don't have enough physical space then
2618 * we need to extend the physical size.
2619 */
2620 if (length > filebytes) {
2621 int eflags;
2622 u_int32_t blockHint = 0;
2623
2624 /* All or nothing and don't round up to clumpsize. */
2625 eflags = kEFAllMask | kEFNoClumpMask;
2626
2627 if (cred && suser(cred, NULL) != 0)
2628 eflags |= kEFReserveMask; /* keep a reserve */
2629
2630 /*
2631 * Allocate Journal and Quota files in metadata zone.
2632 */
2633 if (filebytes == 0 &&
2634 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2635 hfs_virtualmetafile(cp)) {
2636 eflags |= kEFMetadataMask;
2637 blockHint = hfsmp->hfs_metazone_start;
2638 }
2639 if (hfs_start_transaction(hfsmp) != 0) {
2640 retval = EINVAL;
2641 goto Err_Exit;
2642 }
2643
2644 /* Protect extents b-tree and allocation bitmap */
2645 lockflags = SFL_BITMAP;
2646 if (overflow_extents(fp))
2647 lockflags |= SFL_EXTENTS;
2648 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2649
2650 while ((length > filebytes) && (retval == E_NONE)) {
2651 bytesToAdd = length - filebytes;
2652 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2653 (FCB*)fp,
2654 bytesToAdd,
2655 blockHint,
2656 eflags,
2657 &actualBytesAdded));
2658
2659 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2660 if (actualBytesAdded == 0 && retval == E_NONE) {
2661 if (length > filebytes)
2662 length = filebytes;
2663 break;
2664 }
2665 } /* endwhile */
2666
2667 hfs_systemfile_unlock(hfsmp, lockflags);
2668
2669 if (hfsmp->jnl) {
2670 if (skipupdate) {
2671 (void) hfs_minorupdate(vp);
2672 }
2673 else {
2674 (void) hfs_update(vp, TRUE);
2675 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2676 }
2677 }
2678
2679 hfs_end_transaction(hfsmp);
2680
2681 if (retval)
2682 goto Err_Exit;
2683
2684 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2685 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2686 }
2687
2688 if (!(flags & IO_NOZEROFILL)) {
2689 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
2690 struct rl_entry *invalid_range;
2691 off_t zero_limit;
2692
2693 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2694 if (length < zero_limit) zero_limit = length;
2695
2696 if (length > (off_t)fp->ff_size) {
2697 struct timeval tv;
2698
2699 /* Extending the file: time to fill out the current last page w. zeroes? */
2700 if ((fp->ff_size & PAGE_MASK_64) &&
2701 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2702 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
2703
2704 /* There's some valid data at the start of the (current) last page
2705 of the file, so zero out the remainder of that page to ensure the
2706 entire page contains valid data. Since there is no invalid range
2707 possible past the (current) eof, there's no need to remove anything
2708 from the invalid range list before calling cluster_write(): */
2709 hfs_unlock(cp);
2710 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
2711 fp->ff_size, (off_t)0,
2712 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2713 hfs_lock(cp, HFS_FORCE_LOCK);
2714 if (retval) goto Err_Exit;
2715
2716 /* Merely invalidate the remaining area, if necessary: */
2717 if (length > zero_limit) {
2718 microuptime(&tv);
2719 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
2720 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2721 }
2722 } else {
2723 /* The page containing the (current) eof is invalid: just add the
2724 remainder of the page to the invalid list, along with the area
2725 being newly allocated:
2726 */
2727 microuptime(&tv);
2728 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
2729 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2730 };
2731 }
2732 } else {
2733 panic("hfs_truncate: invoked on non-UBC object?!");
2734 };
2735 }
2736 cp->c_touch_modtime = TRUE;
2737 fp->ff_size = length;
2738
2739 } else { /* Shorten the size of the file */
2740
2741 if ((off_t)fp->ff_size > length) {
2742 /* Any space previously marked as invalid is now irrelevant: */
2743 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2744 }
2745
2746 /*
2747 * Account for any unmapped blocks. Note that the new
2748 * file length can still end up with unmapped blocks.
2749 */
2750 if (fp->ff_unallocblocks > 0) {
2751 u_int32_t finalblks;
2752 u_int32_t loanedBlocks;
2753
2754 HFS_MOUNT_LOCK(hfsmp, TRUE);
2755
2756 loanedBlocks = fp->ff_unallocblocks;
2757 cp->c_blocks -= loanedBlocks;
2758 fp->ff_blocks -= loanedBlocks;
2759 fp->ff_unallocblocks = 0;
2760
2761 hfsmp->loanedBlocks -= loanedBlocks;
2762
2763 finalblks = (length + blksize - 1) / blksize;
2764 if (finalblks > fp->ff_blocks) {
2765 /* calculate required unmapped blocks */
2766 loanedBlocks = finalblks - fp->ff_blocks;
2767 hfsmp->loanedBlocks += loanedBlocks;
2768
2769 fp->ff_unallocblocks = loanedBlocks;
2770 cp->c_blocks += loanedBlocks;
2771 fp->ff_blocks += loanedBlocks;
2772 }
2773 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2774 }
2775
2776 /*
2777 * For a TBE process the deallocation of the file blocks is
2778 * delayed until the file is closed. And hfs_close calls
2779 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2780 * isn't set, we make sure this isn't a TBE process.
2781 */
2782 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2783#if QUOTA
2784 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2785#endif /* QUOTA */
2786 if (hfs_start_transaction(hfsmp) != 0) {
2787 retval = EINVAL;
2788 goto Err_Exit;
2789 }
2790
2791 if (fp->ff_unallocblocks == 0) {
2792 /* Protect extents b-tree and allocation bitmap */
2793 lockflags = SFL_BITMAP;
2794 if (overflow_extents(fp))
2795 lockflags |= SFL_EXTENTS;
2796 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2797
2798 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2799 (FCB*)fp, length, false));
2800
2801 hfs_systemfile_unlock(hfsmp, lockflags);
2802 }
2803 if (hfsmp->jnl) {
2804 if (retval == 0) {
2805 fp->ff_size = length;
2806 }
2807 if (skipupdate) {
2808 (void) hfs_minorupdate(vp);
2809 }
2810 else {
2811 (void) hfs_update(vp, TRUE);
2812 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2813 }
2814 }
2815 hfs_end_transaction(hfsmp);
2816
2817 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2818 if (retval)
2819 goto Err_Exit;
2820#if QUOTA
2821 /* These are bytesreleased */
2822 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2823#endif /* QUOTA */
2824 }
2825 /* Only set update flag if the logical length changes */
2826 if ((off_t)fp->ff_size != length)
2827 cp->c_touch_modtime = TRUE;
2828 fp->ff_size = length;
2829 }
2830 if (cp->c_mode & (S_ISUID | S_ISGID)) {
2831 if (!vfs_context_issuser(context)) {
2832 cp->c_mode &= ~(S_ISUID | S_ISGID);
2833 skipupdate = 0;
2834 }
2835 }
2836 if (skipupdate) {
2837 retval = hfs_minorupdate(vp);
2838 }
2839 else {
2840 cp->c_touch_chgtime = TRUE; /* status changed */
2841 cp->c_touch_modtime = TRUE; /* file data was modified */
2842 retval = hfs_update(vp, MNT_WAIT);
2843 }
2844 if (retval) {
2845 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2846 -1, -1, -1, retval, 0);
2847 }
2848
2849Err_Exit:
2850
2851 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2852 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2853
2854 return (retval);
2855}
2856
2857
2858
2859/*
2860 * Truncate a cnode to at most length size, freeing (or adding) the
2861 * disk blocks.
2862 */
2863__private_extern__
2864int
2865hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2866 int skipupdate, vfs_context_t context)
2867{
2868 struct filefork *fp = VTOF(vp);
2869 off_t filebytes;
2870 u_int32_t fileblocks;
2871 int blksize, error = 0;
2872 struct cnode *cp = VTOC(vp);
2873
2874 /* Cannot truncate an HFS directory! */
2875 if (vnode_isdir(vp)) {
2876 return (EISDIR);
2877 }
2878 /* A swap file cannot change size. */
2879 if (vnode_isswap(vp) && (length != 0)) {
2880 return (EPERM);
2881 }
2882
2883 blksize = VTOVCB(vp)->blockSize;
2884 fileblocks = fp->ff_blocks;
2885 filebytes = (off_t)fileblocks * (off_t)blksize;
2886
2887 //
2888 // Have to do this here so that we don't wind up with
2889 // i/o pending for blocks that are about to be released
2890 // if we truncate the file.
2891 //
2892 // If skipsetsize is set, then the caller is responsible
2893 // for the ubc_setsize.
2894 //
2895 // Even if skipsetsize is set, if the length is zero we
2896 // want to call ubc_setsize() because as of SnowLeopard
2897 // it will no longer cause any page-ins and it will drop
2898 // any dirty pages so that we don't do any i/o that we
2899 // don't have to. This also prevents a race where i/o
2900 // for truncated blocks may overwrite later data if the
2901 // blocks get reallocated to a different file.
2902 //
2903 if (!skipsetsize || length == 0)
2904 ubc_setsize(vp, length);
2905
2906 // have to loop truncating or growing files that are
2907 // really big because otherwise transactions can get
2908 // enormous and consume too many kernel resources.
2909
2910 if (length < filebytes) {
2911 while (filebytes > length) {
2912 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2913 filebytes -= HFS_BIGFILE_SIZE;
2914 } else {
2915 filebytes = length;
2916 }
2917 cp->c_flag |= C_FORCEUPDATE;
2918 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
2919 if (error)
2920 break;
2921 }
2922 } else if (length > filebytes) {
2923 while (filebytes < length) {
2924 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2925 filebytes += HFS_BIGFILE_SIZE;
2926 } else {
2927 filebytes = length;
2928 }
2929 cp->c_flag |= C_FORCEUPDATE;
2930 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
2931 if (error)
2932 break;
2933 }
2934 } else /* Same logical size */ {
2935
2936 error = do_hfs_truncate(vp, length, flags, skipupdate, context);
2937 }
2938 /* Files that are changing size are not hot file candidates. */
2939 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2940 fp->ff_bytesread = 0;
2941 }
2942
2943 return (error);
2944}
2945
2946
2947
2948/*
2949 * Preallocate file storage space.
2950 */
2951int
2952hfs_vnop_allocate(struct vnop_allocate_args /* {
2953 vnode_t a_vp;
2954 off_t a_length;
2955 u_int32_t a_flags;
2956 off_t *a_bytesallocated;
2957 off_t a_offset;
2958 vfs_context_t a_context;
2959 } */ *ap)
2960{
2961 struct vnode *vp = ap->a_vp;
2962 struct cnode *cp;
2963 struct filefork *fp;
2964 ExtendedVCB *vcb;
2965 off_t length = ap->a_length;
2966 off_t startingPEOF;
2967 off_t moreBytesRequested;
2968 off_t actualBytesAdded;
2969 off_t filebytes;
2970 u_int32_t fileblocks;
2971 int retval, retval2;
2972 u_int32_t blockHint;
2973 u_int32_t extendFlags; /* For call to ExtendFileC */
2974 struct hfsmount *hfsmp;
2975 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2976 int lockflags;
2977
2978 *(ap->a_bytesallocated) = 0;
2979
2980 if (!vnode_isreg(vp))
2981 return (EISDIR);
2982 if (length < (off_t)0)
2983 return (EINVAL);
2984
2985 cp = VTOC(vp);
2986
2987 hfs_lock_truncate(cp, TRUE);
2988
2989 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2990 goto Err_Exit;
2991 }
2992
2993 fp = VTOF(vp);
2994 hfsmp = VTOHFS(vp);
2995 vcb = VTOVCB(vp);
2996
2997 fileblocks = fp->ff_blocks;
2998 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2999
3000 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
3001 retval = EINVAL;
3002 goto Err_Exit;
3003 }
3004
3005 /* Fill in the flags word for the call to Extend the file */
3006
3007 extendFlags = kEFNoClumpMask;
3008 if (ap->a_flags & ALLOCATECONTIG)
3009 extendFlags |= kEFContigMask;
3010 if (ap->a_flags & ALLOCATEALL)
3011 extendFlags |= kEFAllMask;
3012 if (cred && suser(cred, NULL) != 0)
3013 extendFlags |= kEFReserveMask;
3014 if (hfs_virtualmetafile(cp))
3015 extendFlags |= kEFMetadataMask;
3016
3017 retval = E_NONE;
3018 blockHint = 0;
3019 startingPEOF = filebytes;
3020
3021 if (ap->a_flags & ALLOCATEFROMPEOF)
3022 length += filebytes;
3023 else if (ap->a_flags & ALLOCATEFROMVOL)
3024 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
3025
3026 /* If no changes are necesary, then we're done */
3027 if (filebytes == length)
3028 goto Std_Exit;
3029
3030 /*
3031 * Lengthen the size of the file. We must ensure that the
3032 * last byte of the file is allocated. Since the smallest
3033 * value of filebytes is 0, length will be at least 1.
3034 */
3035 if (length > filebytes) {
3036 off_t total_bytes_added = 0, orig_request_size;
3037
3038 orig_request_size = moreBytesRequested = length - filebytes;
3039
3040#if QUOTA
3041 retval = hfs_chkdq(cp,
3042 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
3043 cred, 0);
3044 if (retval)
3045 goto Err_Exit;
3046
3047#endif /* QUOTA */
3048 /*
3049 * Metadata zone checks.
3050 */
3051 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3052 /*
3053 * Allocate Journal and Quota files in metadata zone.
3054 */
3055 if (hfs_virtualmetafile(cp)) {
3056 blockHint = hfsmp->hfs_metazone_start;
3057 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
3058 (blockHint <= hfsmp->hfs_metazone_end)) {
3059 /*
3060 * Move blockHint outside metadata zone.
3061 */
3062 blockHint = hfsmp->hfs_metazone_end + 1;
3063 }
3064 }
3065
3066
3067 while ((length > filebytes) && (retval == E_NONE)) {
3068 off_t bytesRequested;
3069
3070 if (hfs_start_transaction(hfsmp) != 0) {
3071 retval = EINVAL;
3072 goto Err_Exit;
3073 }
3074
3075 /* Protect extents b-tree and allocation bitmap */
3076 lockflags = SFL_BITMAP;
3077 if (overflow_extents(fp))
3078 lockflags |= SFL_EXTENTS;
3079 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3080
3081 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
3082 bytesRequested = HFS_BIGFILE_SIZE;
3083 } else {
3084 bytesRequested = moreBytesRequested;
3085 }
3086
3087 if (extendFlags & kEFContigMask) {
3088 // if we're on a sparse device, this will force it to do a
3089 // full scan to find the space needed.
3090 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
3091 }
3092
3093 retval = MacToVFSError(ExtendFileC(vcb,
3094 (FCB*)fp,
3095 bytesRequested,
3096 blockHint,
3097 extendFlags,
3098 &actualBytesAdded));
3099
3100 if (retval == E_NONE) {
3101 *(ap->a_bytesallocated) += actualBytesAdded;
3102 total_bytes_added += actualBytesAdded;
3103 moreBytesRequested -= actualBytesAdded;
3104 if (blockHint != 0) {
3105 blockHint += actualBytesAdded / vcb->blockSize;
3106 }
3107 }
3108 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3109
3110 hfs_systemfile_unlock(hfsmp, lockflags);
3111
3112 if (hfsmp->jnl) {
3113 (void) hfs_update(vp, TRUE);
3114 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3115 }
3116
3117 hfs_end_transaction(hfsmp);
3118 }
3119
3120
3121 /*
3122 * if we get an error and no changes were made then exit
3123 * otherwise we must do the hfs_update to reflect the changes
3124 */
3125 if (retval && (startingPEOF == filebytes))
3126 goto Err_Exit;
3127
3128 /*
3129 * Adjust actualBytesAdded to be allocation block aligned, not
3130 * clump size aligned.
3131 * NOTE: So what we are reporting does not affect reality
3132 * until the file is closed, when we truncate the file to allocation
3133 * block size.
3134 */
3135 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
3136 *(ap->a_bytesallocated) =
3137 roundup(orig_request_size, (off_t)vcb->blockSize);
3138
3139 } else { /* Shorten the size of the file */
3140
3141 if (fp->ff_size > length) {
3142 /*
3143 * Any buffers that are past the truncation point need to be
3144 * invalidated (to maintain buffer cache consistency).
3145 */
3146 }
3147
3148 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
3149 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3150
3151 /*
3152 * if we get an error and no changes were made then exit
3153 * otherwise we must do the hfs_update to reflect the changes
3154 */
3155 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
3156#if QUOTA
3157 /* These are bytesreleased */
3158 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
3159#endif /* QUOTA */
3160
3161 if (fp->ff_size > filebytes) {
3162 fp->ff_size = filebytes;
3163
3164 hfs_unlock(cp);
3165 ubc_setsize(vp, fp->ff_size);
3166 hfs_lock(cp, HFS_FORCE_LOCK);
3167 }
3168 }
3169
3170Std_Exit:
3171 cp->c_touch_chgtime = TRUE;
3172 cp->c_touch_modtime = TRUE;
3173 retval2 = hfs_update(vp, MNT_WAIT);
3174
3175 if (retval == 0)
3176 retval = retval2;
3177Err_Exit:
3178 hfs_unlock_truncate(cp, TRUE);
3179 hfs_unlock(cp);
3180 return (retval);
3181}
3182
3183
3184/*
3185 * Pagein for HFS filesystem
3186 */
3187int
3188hfs_vnop_pagein(struct vnop_pagein_args *ap)
3189/*
3190 struct vnop_pagein_args {
3191 vnode_t a_vp,
3192 upl_t a_pl,
3193 vm_offset_t a_pl_offset,
3194 off_t a_f_offset,
3195 size_t a_size,
3196 int a_flags
3197 vfs_context_t a_context;
3198 };
3199*/
3200{
3201 vnode_t vp = ap->a_vp;
3202 int error;
3203
3204#if HFS_COMPRESSION
3205 if (VNODE_IS_RSRC(vp)) {
3206 /* allow pageins of the resource fork */
3207 } else {
3208 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
3209 if (compressed) {
3210 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
3211 if (compressed) {
3212 if (error == 0) {
3213 /* successful page-in, update the access time */
3214 VTOC(vp)->c_touch_acctime = TRUE;
3215
3216 /* compressed files are not hot file candidates */
3217 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3218 VTOF(vp)->ff_bytesread = 0;
3219 }
3220 }
3221 return error;
3222 }
3223 /* otherwise the file was converted back to a regular file while we were reading it */
3224 }
3225 }
3226#endif
3227
3228 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
3229 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
3230 /*
3231 * Keep track of blocks read.
3232 */
3233 if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
3234 struct cnode *cp;
3235 struct filefork *fp;
3236 int bytesread;
3237 int took_cnode_lock = 0;
3238
3239 cp = VTOC(vp);
3240 fp = VTOF(vp);
3241
3242 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
3243 bytesread = fp->ff_size;
3244 else
3245 bytesread = ap->a_size;
3246
3247 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
3248 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
3249 hfs_lock(cp, HFS_FORCE_LOCK);
3250 took_cnode_lock = 1;
3251 }
3252 /*
3253 * If this file hasn't been seen since the start of
3254 * the current sampling period then start over.
3255 */
3256 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
3257 struct timeval tv;
3258
3259 fp->ff_bytesread = bytesread;
3260 microtime(&tv);
3261 cp->c_atime = tv.tv_sec;
3262 } else {
3263 fp->ff_bytesread += bytesread;
3264 }
3265 cp->c_touch_acctime = TRUE;
3266 if (took_cnode_lock)
3267 hfs_unlock(cp);
3268 }
3269 return (error);
3270}
3271
3272/*
3273 * Pageout for HFS filesystem.
3274 */
3275int
3276hfs_vnop_pageout(struct vnop_pageout_args *ap)
3277/*
3278 struct vnop_pageout_args {
3279 vnode_t a_vp,
3280 upl_t a_pl,
3281 vm_offset_t a_pl_offset,
3282 off_t a_f_offset,
3283 size_t a_size,
3284 int a_flags
3285 vfs_context_t a_context;
3286 };
3287*/
3288{
3289 vnode_t vp = ap->a_vp;
3290 struct cnode *cp;
3291 struct filefork *fp;
3292 int retval = 0;
3293 off_t filesize;
3294 upl_t upl;
3295 upl_page_info_t* pl;
3296 vm_offset_t a_pl_offset;
3297 int a_flags;
3298 int is_pageoutv2 = 0;
3299
3300 cp = VTOC(vp);
3301 fp = VTOF(vp);
3302
3303 /*
3304 * Figure out where the file ends, for pageout purposes. If
3305 * ff_new_size > ff_size, then we're in the middle of extending the
3306 * file via a write, so it is safe (and necessary) that we be able
3307 * to pageout up to that point.
3308 */
3309 filesize = fp->ff_size;
3310 if (fp->ff_new_size > filesize)
3311 filesize = fp->ff_new_size;
3312
3313 a_flags = ap->a_flags;
3314 a_pl_offset = ap->a_pl_offset;
3315
3316 /*
3317 * we can tell if we're getting the new or old behavior from the UPL
3318 */
3319 if ((upl = ap->a_pl) == NULL) {
3320 int request_flags;
3321
3322 is_pageoutv2 = 1;
3323 /*
3324 * we're in control of any UPL we commit
3325 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
3326 */
3327 a_flags &= ~UPL_NOCOMMIT;
3328 a_pl_offset = 0;
3329
3330 /*
3331 * take truncate lock (shared) to guard against
3332 * zero-fill thru fsync interfering, but only for v2
3333 */
3334 hfs_lock_truncate(cp, 0);
3335
3336 if (a_flags & UPL_MSYNC) {
3337 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
3338 }
3339 else {
3340 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
3341 }
3342 ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
3343
3344 if (upl == (upl_t) NULL) {
3345 retval = EINVAL;
3346 goto pageout_done;
3347 }
3348 }
3349 /*
3350 * from this point forward upl points at the UPL we're working with
3351 * it was either passed in or we succesfully created it
3352 */
3353
3354 /*
3355 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
3356 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
3357 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
3358 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
3359 * logic in vnode_pageout except that we need to do it after grabbing the truncate
3360 * lock in HFS so that we don't lock invert ourselves.
3361 *
3362 * Note that we can still get into this function on behalf of the default pager with
3363 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
3364 * since fsync and other writing threads will grab the locks, then mark the
3365 * relevant pages as busy. But the pageout codepath marks the pages as busy,
3366 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
3367 * we do not try to grab anything for the pre-V2 case, which should only be accessed
3368 * by the paging/VM system.
3369 */
3370
3371 if (is_pageoutv2) {
3372 off_t f_offset;
3373 int offset;
3374 int isize;
3375 int pg_index;
3376 int error;
3377 int error_ret = 0;
3378
3379 isize = ap->a_size;
3380 f_offset = ap->a_f_offset;
3381
3382 /*
3383 * Scan from the back to find the last page in the UPL, so that we
3384 * aren't looking at a UPL that may have already been freed by the
3385 * preceding aborts/completions.
3386 */
3387 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
3388 if (upl_page_present(pl, --pg_index))
3389 break;
3390 if (pg_index == 0) {
3391 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
3392 goto pageout_done;
3393 }
3394 }
3395
3396 /*
3397 * initialize the offset variables before we touch the UPL.
3398 * a_f_offset is the position into the file, in bytes
3399 * offset is the position into the UPL, in bytes
3400 * pg_index is the pg# of the UPL we're operating on.
3401 * isize is the offset into the UPL of the last non-clean page.
3402 */
3403 isize = ((pg_index + 1) * PAGE_SIZE);
3404
3405 offset = 0;
3406 pg_index = 0;
3407
3408 while (isize) {
3409 int xsize;
3410 int num_of_pages;
3411
3412 if ( !upl_page_present(pl, pg_index)) {
3413 /*
3414 * we asked for RET_ONLY_DIRTY, so it's possible
3415 * to get back empty slots in the UPL.
3416 * just skip over them
3417 */
3418 f_offset += PAGE_SIZE;
3419 offset += PAGE_SIZE;
3420 isize -= PAGE_SIZE;
3421 pg_index++;
3422
3423 continue;
3424 }
3425 if ( !upl_dirty_page(pl, pg_index)) {
3426 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
3427 }
3428
3429 /*
3430 * We know that we have at least one dirty page.
3431 * Now checking to see how many in a row we have
3432 */
3433 num_of_pages = 1;
3434 xsize = isize - PAGE_SIZE;
3435
3436 while (xsize) {
3437 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
3438 break;
3439 num_of_pages++;
3440 xsize -= PAGE_SIZE;
3441 }
3442 xsize = num_of_pages * PAGE_SIZE;
3443
3444 if (!vnode_isswap(vp)) {
3445 off_t end_of_range;
3446 int tooklock;
3447
3448 tooklock = 0;
3449
3450 if (cp->c_lockowner != current_thread()) {
3451 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3452 /*
3453 * we're in the v2 path, so we are the
3454 * owner of the UPL... we may have already
3455 * processed some of the UPL, so abort it
3456 * from the current working offset to the
3457 * end of the UPL
3458 */
3459 ubc_upl_abort_range(upl,
3460 offset,
3461 ap->a_size - offset,
3462 UPL_ABORT_FREE_ON_EMPTY);
3463 goto pageout_done;
3464 }
3465 tooklock = 1;
3466 }
3467 end_of_range = f_offset + xsize - 1;
3468
3469 if (end_of_range >= filesize) {
3470 end_of_range = (off_t)(filesize - 1);
3471 }
3472 if (f_offset < filesize) {
3473 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
3474 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3475 }
3476 if (tooklock) {
3477 hfs_unlock(cp);
3478 }
3479 }
3480 if ((error = cluster_pageout(vp, upl, offset, f_offset,
3481 xsize, filesize, a_flags))) {
3482 if (error_ret == 0)
3483 error_ret = error;
3484 }
3485 f_offset += xsize;
3486 offset += xsize;
3487 isize -= xsize;
3488 pg_index += num_of_pages;
3489 }
3490 /* capture errnos bubbled out of cluster_pageout if they occurred */
3491 if (error_ret != 0) {
3492 retval = error_ret;
3493 }
3494 } /* end block for v2 pageout behavior */
3495 else {
3496 if (!vnode_isswap(vp)) {
3497 off_t end_of_range;
3498 int tooklock = 0;
3499
3500 if (cp->c_lockowner != current_thread()) {
3501 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3502 if (!(a_flags & UPL_NOCOMMIT)) {
3503 ubc_upl_abort_range(upl,
3504 a_pl_offset,
3505 ap->a_size,
3506 UPL_ABORT_FREE_ON_EMPTY);
3507 }
3508 goto pageout_done;
3509 }
3510 tooklock = 1;
3511 }
3512 end_of_range = ap->a_f_offset + ap->a_size - 1;
3513
3514 if (end_of_range >= filesize) {
3515 end_of_range = (off_t)(filesize - 1);
3516 }
3517 if (ap->a_f_offset < filesize) {
3518 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
3519 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3520 }
3521
3522 if (tooklock) {
3523 hfs_unlock(cp);
3524 }
3525 }
3526 /*
3527 * just call cluster_pageout for old pre-v2 behavior
3528 */
3529 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
3530 ap->a_size, filesize, a_flags);
3531 }
3532
3533 /*
3534 * If data was written, update the modification time of the file.
3535 * If setuid or setgid bits are set and this process is not the
3536 * superuser then clear the setuid and setgid bits as a precaution
3537 * against tampering.
3538 */
3539 if (retval == 0) {
3540 cp->c_touch_modtime = TRUE;
3541 cp->c_touch_chgtime = TRUE;
3542 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
3543 (vfs_context_suser(ap->a_context) != 0)) {
3544 hfs_lock(cp, HFS_FORCE_LOCK);
3545 cp->c_mode &= ~(S_ISUID | S_ISGID);
3546 hfs_unlock(cp);
3547 }
3548 }
3549
3550pageout_done:
3551 if (is_pageoutv2) {
3552 /* release truncate lock (shared) */
3553 hfs_unlock_truncate(cp, 0);
3554 }
3555 return (retval);
3556}
3557
3558/*
3559 * Intercept B-Tree node writes to unswap them if necessary.
3560 */
3561int
3562hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
3563{
3564 int retval = 0;
3565 register struct buf *bp = ap->a_bp;
3566 register struct vnode *vp = buf_vnode(bp);
3567 BlockDescriptor block;
3568
3569 /* Trap B-Tree writes */
3570 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
3571 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
3572 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
3573 (vp == VTOHFS(vp)->hfc_filevp)) {
3574
3575 /*
3576 * Swap and validate the node if it is in native byte order.
3577 * This is always be true on big endian, so we always validate
3578 * before writing here. On little endian, the node typically has
3579 * been swapped and validated when it was written to the journal,
3580 * so we won't do anything here.
3581 */
3582 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
3583 /* Prepare the block pointer */
3584 block.blockHeader = bp;
3585 block.buffer = (char *)buf_dataptr(bp);
3586 block.blockNum = buf_lblkno(bp);
3587 /* not found in cache ==> came from disk */
3588 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
3589 block.blockSize = buf_count(bp);
3590
3591 /* Endian un-swap B-Tree node */
3592 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3593 if (retval)
3594 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3595 }
3596 }
3597
3598 /* This buffer shouldn't be locked anymore but if it is clear it */
3599 if ((buf_flags(bp) & B_LOCKED)) {
3600 // XXXdbg
3601 if (VTOHFS(vp)->jnl) {
3602 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
3603 }
3604 buf_clearflags(bp, B_LOCKED);
3605 }
3606 retval = vn_bwrite (ap);
3607
3608 return (retval);
3609}
3610
3611/*
3612 * Relocate a file to a new location on disk
3613 * cnode must be locked on entry
3614 *
3615 * Relocation occurs by cloning the file's data from its
3616 * current set of blocks to a new set of blocks. During
3617 * the relocation all of the blocks (old and new) are
3618 * owned by the file.
3619 *
3620 * -----------------
3621 * |///////////////|
3622 * -----------------
3623 * 0 N (file offset)
3624 *
3625 * ----------------- -----------------
3626 * |///////////////| | | STEP 1 (acquire new blocks)
3627 * ----------------- -----------------
3628 * 0 N N+1 2N
3629 *
3630 * ----------------- -----------------
3631 * |///////////////| |///////////////| STEP 2 (clone data)
3632 * ----------------- -----------------
3633 * 0 N N+1 2N
3634 *
3635 * -----------------
3636 * |///////////////| STEP 3 (head truncate blocks)
3637 * -----------------
3638 * 0 N
3639 *
3640 * During steps 2 and 3 page-outs to file offsets less
3641 * than or equal to N are suspended.
3642 *
3643 * During step 3 page-ins to the file get suspended.
3644 */
3645__private_extern__
3646int
3647hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
3648 struct proc *p)
3649{
3650 struct cnode *cp;
3651 struct filefork *fp;
3652 struct hfsmount *hfsmp;
3653 u_int32_t headblks;
3654 u_int32_t datablks;
3655 u_int32_t blksize;
3656 u_int32_t growsize;
3657 u_int32_t nextallocsave;
3658 daddr64_t sector_a, sector_b;
3659 int eflags;
3660 off_t newbytes;
3661 int retval;
3662 int lockflags = 0;
3663 int took_trunc_lock = 0;
3664 int started_tr = 0;
3665 enum vtype vnodetype;
3666
3667 vnodetype = vnode_vtype(vp);
3668 if (vnodetype != VREG && vnodetype != VLNK) {
3669 return (EPERM);
3670 }
3671
3672 hfsmp = VTOHFS(vp);
3673 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
3674 return (ENOSPC);
3675 }
3676
3677 cp = VTOC(vp);
3678 fp = VTOF(vp);
3679 if (fp->ff_unallocblocks)
3680 return (EINVAL);
3681 blksize = hfsmp->blockSize;
3682 if (blockHint == 0)
3683 blockHint = hfsmp->nextAllocation;
3684
3685 if ((fp->ff_size > 0x7fffffff) ||
3686 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
3687 return (EFBIG);
3688 }
3689
3690 //
3691 // We do not believe that this call to hfs_fsync() is
3692 // necessary and it causes a journal transaction
3693 // deadlock so we are removing it.
3694 //
3695 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3696 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3697 // if (retval)
3698 // return (retval);
3699 //}
3700
3701 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
3702 hfs_unlock(cp);
3703 hfs_lock_truncate(cp, TRUE);
3704 /* Force lock since callers expects lock to be held. */
3705 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
3706 hfs_unlock_truncate(cp, TRUE);
3707 return (retval);
3708 }
3709 /* No need to continue if file was removed. */
3710 if (cp->c_flag & C_NOEXISTS) {
3711 hfs_unlock_truncate(cp, TRUE);
3712 return (ENOENT);
3713 }
3714 took_trunc_lock = 1;
3715 }
3716 headblks = fp->ff_blocks;
3717 datablks = howmany(fp->ff_size, blksize);
3718 growsize = datablks * blksize;
3719 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
3720 if (blockHint >= hfsmp->hfs_metazone_start &&
3721 blockHint <= hfsmp->hfs_metazone_end)
3722 eflags |= kEFMetadataMask;
3723
3724 if (hfs_start_transaction(hfsmp) != 0) {
3725 if (took_trunc_lock)
3726 hfs_unlock_truncate(cp, TRUE);
3727 return (EINVAL);
3728 }
3729 started_tr = 1;
3730 /*
3731 * Protect the extents b-tree and the allocation bitmap
3732 * during MapFileBlockC and ExtendFileC operations.
3733 */
3734 lockflags = SFL_BITMAP;
3735 if (overflow_extents(fp))
3736 lockflags |= SFL_EXTENTS;
3737 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3738
3739 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
3740 if (retval) {
3741 retval = MacToVFSError(retval);
3742 goto out;
3743 }
3744
3745 /*
3746 * STEP 1 - acquire new allocation blocks.
3747 */
3748 nextallocsave = hfsmp->nextAllocation;
3749 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
3750 if (eflags & kEFMetadataMask) {
3751 HFS_MOUNT_LOCK(hfsmp, TRUE);
3752 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
3753 MarkVCBDirty(hfsmp);
3754 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3755 }
3756
3757 retval = MacToVFSError(retval);
3758 if (retval == 0) {
3759 cp->c_flag |= C_MODIFIED;
3760 if (newbytes < growsize) {
3761 retval = ENOSPC;
3762 goto restore;
3763 } else if (fp->ff_blocks < (headblks + datablks)) {
3764 printf("hfs_relocate: allocation failed");
3765 retval = ENOSPC;
3766 goto restore;
3767 }
3768
3769 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
3770 if (retval) {
3771 retval = MacToVFSError(retval);
3772 } else if ((sector_a + 1) == sector_b) {
3773 retval = ENOSPC;
3774 goto restore;
3775 } else if ((eflags & kEFMetadataMask) &&
3776 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
3777 hfsmp->hfs_metazone_end)) {
3778#if 0
3779 const char * filestr;
3780 char emptystr = '\0';
3781
3782 if (cp->c_desc.cd_nameptr != NULL) {
3783 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
3784 } else if (vnode_name(vp) != NULL) {
3785 filestr = vnode_name(vp);
3786 } else {
3787 filestr = &emptystr;
3788 }
3789#endif
3790 retval = ENOSPC;
3791 goto restore;
3792 }
3793 }
3794 /* Done with system locks and journal for now. */
3795 hfs_systemfile_unlock(hfsmp, lockflags);
3796 lockflags = 0;
3797 hfs_end_transaction(hfsmp);
3798 started_tr = 0;
3799
3800 if (retval) {
3801 /*
3802 * Check to see if failure is due to excessive fragmentation.
3803 */
3804 if ((retval == ENOSPC) &&
3805 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
3806 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
3807 }
3808 goto out;
3809 }
3810 /*
3811 * STEP 2 - clone file data into the new allocation blocks.
3812 */
3813
3814 if (vnodetype == VLNK)
3815 retval = hfs_clonelink(vp, blksize, cred, p);
3816 else if (vnode_issystem(vp))
3817 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
3818 else
3819 retval = hfs_clonefile(vp, headblks, datablks, blksize);
3820
3821 /* Start transaction for step 3 or for a restore. */
3822 if (hfs_start_transaction(hfsmp) != 0) {
3823 retval = EINVAL;
3824 goto out;
3825 }
3826 started_tr = 1;
3827 if (retval)
3828 goto restore;
3829
3830 /*
3831 * STEP 3 - switch to cloned data and remove old blocks.
3832 */
3833 lockflags = SFL_BITMAP;
3834 if (overflow_extents(fp))
3835 lockflags |= SFL_EXTENTS;
3836 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3837
3838 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
3839
3840 hfs_systemfile_unlock(hfsmp, lockflags);
3841 lockflags = 0;
3842 if (retval)
3843 goto restore;
3844out:
3845 if (took_trunc_lock)
3846 hfs_unlock_truncate(cp, TRUE);
3847
3848 if (lockflags) {
3849 hfs_systemfile_unlock(hfsmp, lockflags);
3850 lockflags = 0;
3851 }
3852
3853 /* Push cnode's new extent data to disk. */
3854 if (retval == 0) {
3855 (void) hfs_update(vp, MNT_WAIT);
3856 }
3857 if (hfsmp->jnl) {
3858 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
3859 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3860 else
3861 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
3862 }
3863exit:
3864 if (started_tr)
3865 hfs_end_transaction(hfsmp);
3866
3867 return (retval);
3868
3869restore:
3870 if (fp->ff_blocks == headblks) {
3871 if (took_trunc_lock)
3872 hfs_unlock_truncate(cp, TRUE);
3873 goto exit;
3874 }
3875 /*
3876 * Give back any newly allocated space.
3877 */
3878 if (lockflags == 0) {
3879 lockflags = SFL_BITMAP;
3880 if (overflow_extents(fp))
3881 lockflags |= SFL_EXTENTS;
3882 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3883 }
3884
3885 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
3886
3887 hfs_systemfile_unlock(hfsmp, lockflags);
3888 lockflags = 0;
3889
3890 if (took_trunc_lock)
3891 hfs_unlock_truncate(cp, TRUE);
3892 goto exit;
3893}
3894
3895
3896/*
3897 * Clone a symlink.
3898 *
3899 */
3900static int
3901hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
3902{
3903 struct buf *head_bp = NULL;
3904 struct buf *tail_bp = NULL;
3905 int error;
3906
3907
3908 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
3909 if (error)
3910 goto out;
3911
3912 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
3913 if (tail_bp == NULL) {
3914 error = EIO;
3915 goto out;
3916 }
3917 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
3918 error = (int)buf_bwrite(tail_bp);
3919out:
3920 if (head_bp) {
3921 buf_markinvalid(head_bp);
3922 buf_brelse(head_bp);
3923 }
3924 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
3925
3926 return (error);
3927}
3928
3929/*
3930 * Clone a file's data within the file.
3931 *
3932 */
3933static int
3934hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
3935{
3936 caddr_t bufp;
3937 size_t bufsize;
3938 size_t copysize;
3939 size_t iosize;
3940 size_t offset;
3941 off_t writebase;
3942 uio_t auio;
3943 int error = 0;
3944
3945 writebase = blkstart * blksize;
3946 copysize = blkcnt * blksize;
3947 iosize = bufsize = MIN(copysize, 128 * 1024);
3948 offset = 0;
3949
3950 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3951 return (ENOMEM);
3952 }
3953 hfs_unlock(VTOC(vp));
3954
3955 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
3956
3957 while (offset < copysize) {
3958 iosize = MIN(copysize - offset, iosize);
3959
3960 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
3961 uio_addiov(auio, (uintptr_t)bufp, iosize);
3962
3963 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
3964 if (error) {
3965 printf("hfs_clonefile: cluster_read failed - %d\n", error);
3966 break;
3967 }
3968 if (uio_resid(auio) != 0) {
3969 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", uio_resid(auio));
3970 error = EIO;
3971 break;
3972 }
3973
3974 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
3975 uio_addiov(auio, (uintptr_t)bufp, iosize);
3976
3977 error = cluster_write(vp, auio, writebase + offset,
3978 writebase + offset + iosize,
3979 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
3980 if (error) {
3981 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3982 break;
3983 }
3984 if (uio_resid(auio) != 0) {
3985 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3986 error = EIO;
3987 break;
3988 }
3989 offset += iosize;
3990 }
3991 uio_free(auio);
3992
3993 if ((blksize & PAGE_MASK)) {
3994 /*
3995 * since the copy may not have started on a PAGE
3996 * boundary (or may not have ended on one), we
3997 * may have pages left in the cache since NOCACHE
3998 * will let partially written pages linger...
3999 * lets just flush the entire range to make sure
4000 * we don't have any pages left that are beyond
4001 * (or intersect) the real LEOF of this file
4002 */
4003 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
4004 } else {
4005 /*
4006 * No need to call ubc_sync_range or hfs_invalbuf
4007 * since the file was copied using IO_NOCACHE and
4008 * the copy was done starting and ending on a page
4009 * boundary in the file.
4010 */
4011 }
4012 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
4013
4014 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
4015 return (error);
4016}
4017
4018/*
4019 * Clone a system (metadata) file.
4020 *
4021 */
4022static int
4023hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
4024 kauth_cred_t cred, struct proc *p)
4025{
4026 caddr_t bufp;
4027 char * offset;
4028 size_t bufsize;
4029 size_t iosize;
4030 struct buf *bp = NULL;
4031 daddr64_t blkno;
4032 daddr64_t blk;
4033 daddr64_t start_blk;
4034 daddr64_t last_blk;
4035 int breadcnt;
4036 int i;
4037 int error = 0;
4038
4039
4040 iosize = GetLogicalBlockSize(vp);
4041 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
4042 breadcnt = bufsize / iosize;
4043
4044 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
4045 return (ENOMEM);
4046 }
4047 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
4048 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
4049 blkno = 0;
4050
4051 while (blkno < last_blk) {
4052 /*
4053 * Read up to a megabyte
4054 */
4055 offset = bufp;
4056 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
4057 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
4058 if (error) {
4059 printf("hfs_clonesysfile: meta_bread error %d\n", error);
4060 goto out;
4061 }
4062 if (buf_count(bp) != iosize) {
4063 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
4064 goto out;
4065 }
4066 bcopy((char *)buf_dataptr(bp), offset, iosize);
4067
4068 buf_markinvalid(bp);
4069 buf_brelse(bp);
4070 bp = NULL;
4071
4072 offset += iosize;
4073 }
4074
4075 /*
4076 * Write up to a megabyte
4077 */
4078 offset = bufp;
4079 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
4080 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
4081 if (bp == NULL) {
4082 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
4083 error = EIO;
4084 goto out;
4085 }
4086 bcopy(offset, (char *)buf_dataptr(bp), iosize);
4087 error = (int)buf_bwrite(bp);
4088 bp = NULL;
4089 if (error)
4090 goto out;
4091 offset += iosize;
4092 }
4093 }
4094out:
4095 if (bp) {
4096 buf_brelse(bp);
4097 }
4098
4099 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
4100
4101 error = hfs_fsync(vp, MNT_WAIT, 0, p);
4102
4103 return (error);
4104}