]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_readwrite.c
hfs-522.100.5.tar.gz
[apple/hfs.git] / core / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/buf.h>
42 #include <sys/proc.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/vfs_context.h>
47 #include <sys/disk.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
50 #include <sys/ubc.h>
51 #include <sys/fsevents.h>
52 #include <uuid/uuid.h>
53
54 #include <libkern/OSDebug.h>
55
56 #include <miscfs/specfs/specdev.h>
57
58 #include <sys/ubc.h>
59
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
62
63 #include <IOKit/IOBSD.h>
64
65 #include <sys/kdebug.h>
66
67 #include "hfs.h"
68 #include "hfs_attrlist.h"
69 #include "hfs_endian.h"
70 #include "hfs_fsctl.h"
71 #include "hfs_quota.h"
72 #include "FileMgrInternal.h"
73 #include "BTreesInternal.h"
74 #include "hfs_cnode.h"
75 #include "hfs_dbg.h"
76
77 #if HFS_CONFIG_KEY_ROLL
78 #include "hfs_key_roll.h"
79 #endif
80
81 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
82
83 enum {
84 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
85 };
86
87 /* from bsd/hfs/hfs_vfsops.c */
88 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
89
90 /* from hfs_hotfiles.c */
91 extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
92 uint8_t forktype, uint32_t *pinned);
93
94 static int hfs_clonefile(struct vnode *, int, int, int);
95 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
96 static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
97
98
99 /*
100 * Read data from a file.
101 */
102 int
103 hfs_vnop_read(struct vnop_read_args *ap)
104 {
105 /*
106 struct vnop_read_args {
107 struct vnodeop_desc *a_desc;
108 vnode_t a_vp;
109 struct uio *a_uio;
110 int a_ioflag;
111 vfs_context_t a_context;
112 };
113 */
114
115 uio_t uio = ap->a_uio;
116 struct vnode *vp = ap->a_vp;
117 struct cnode *cp;
118 struct filefork *fp;
119 struct hfsmount *hfsmp;
120 off_t filesize;
121 off_t filebytes;
122 off_t start_resid = uio_resid(uio);
123 off_t offset = uio_offset(uio);
124 int retval = 0;
125 int took_truncate_lock = 0;
126 int io_throttle = 0;
127 int throttled_count = 0;
128
129 /* Preflight checks */
130 if (!vnode_isreg(vp)) {
131 /* can only read regular files */
132 if (vnode_isdir(vp))
133 return (EISDIR);
134 else
135 return (EPERM);
136 }
137 if (start_resid == 0)
138 return (0); /* Nothing left to do */
139 if (offset < 0)
140 return (EINVAL); /* cant read from a negative offset */
141
142 #if SECURE_KERNEL
143 if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
144 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
145 /* Don't allow unencrypted io request from user space */
146 return EPERM;
147 }
148 #endif
149
150 #if HFS_COMPRESSION
151 if (VNODE_IS_RSRC(vp)) {
152 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
153 return 0;
154 }
155 /* otherwise read the resource fork normally */
156 } else {
157 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
158 if (compressed) {
159 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
160 if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
161 (void) hfs_addhotfile(vp);
162 }
163 if (compressed) {
164 if (retval == 0) {
165 /* successful read, update the access time */
166 VTOC(vp)->c_touch_acctime = TRUE;
167
168 //
169 // compressed files are not traditional hot file candidates
170 // but they may be for CF (which ignores the ff_bytesread
171 // field)
172 //
173 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
174 VTOF(vp)->ff_bytesread = 0;
175 }
176 }
177 return retval;
178 }
179 /* otherwise the file was converted back to a regular file while we were reading it */
180 retval = 0;
181 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
182 int error;
183
184 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
185 if (error) {
186 return error;
187 }
188
189 }
190 }
191 #endif /* HFS_COMPRESSION */
192
193 cp = VTOC(vp);
194 fp = VTOF(vp);
195 hfsmp = VTOHFS(vp);
196
197 #if CONFIG_PROTECT
198 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
199 goto exit;
200 }
201
202 #if HFS_CONFIG_KEY_ROLL
203 if (ISSET(ap->a_ioflag, IO_ENCRYPTED)) {
204 off_rsrc_t off_rsrc = off_rsrc_make(offset + start_resid,
205 VNODE_IS_RSRC(vp));
206
207 retval = hfs_key_roll_up_to(ap->a_context, vp, off_rsrc);
208 if (retval)
209 goto exit;
210 }
211 #endif // HFS_CONFIG_KEY_ROLL
212 #endif // CONFIG_PROTECT
213
214 /*
215 * If this read request originated from a syscall (as opposed to
216 * an in-kernel page fault or something), then set it up for
217 * throttle checks
218 */
219 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
220 io_throttle = IO_RETURN_ON_THROTTLE;
221 }
222
223 read_again:
224
225 /* Protect against a size change. */
226 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
227 took_truncate_lock = 1;
228
229 filesize = fp->ff_size;
230 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
231
232 /*
233 * Check the file size. Note that per POSIX spec, we return 0 at
234 * file EOF, so attempting a read at an offset that is too big
235 * should just return 0 on HFS+. Since the return value was initialized
236 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
237 */
238 if (offset > filesize) {
239 #if CONFIG_HFS_STD
240 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
241 (offset > (off_t)MAXHFSFILESIZE)) {
242 retval = EFBIG;
243 }
244 #endif
245 goto exit;
246 }
247
248 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
249 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
250
251 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
252
253 cp->c_touch_acctime = TRUE;
254
255 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
256 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
257
258 /*
259 * Keep track blocks read
260 */
261 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
262 int took_cnode_lock = 0;
263 off_t bytesread;
264
265 bytesread = start_resid - uio_resid(uio);
266
267 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
268 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
269 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
270 took_cnode_lock = 1;
271 }
272 /*
273 * If this file hasn't been seen since the start of
274 * the current sampling period then start over.
275 */
276 if (cp->c_atime < hfsmp->hfc_timebase) {
277 struct timeval tv;
278
279 fp->ff_bytesread = bytesread;
280 microtime(&tv);
281 cp->c_atime = tv.tv_sec;
282 } else {
283 fp->ff_bytesread += bytesread;
284 }
285
286 if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
287 //
288 // We don't add hotfiles for processes doing IO_EVTONLY I/O
289 // on the assumption that they're system processes such as
290 // mdworker which scan everything in the system (and thus
291 // do not represent user-initiated access to files)
292 //
293 (void) hfs_addhotfile(vp);
294 }
295 if (took_cnode_lock)
296 hfs_unlock(cp);
297 }
298 exit:
299 if (took_truncate_lock) {
300 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
301 }
302 if (retval == EAGAIN) {
303 throttle_lowpri_io(1);
304 throttled_count++;
305
306 retval = 0;
307 goto read_again;
308 }
309 if (throttled_count)
310 throttle_info_reset_window(NULL);
311 return (retval);
312 }
313
314 /*
315 * Ideally, this wouldn't be necessary; the cluster code should be
316 * able to handle this on the read-side. See <rdar://20420068>.
317 */
318 static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to)
319 {
320 hfs_assert(VTOC(vp)->c_lockowner != current_thread());
321 hfs_assert(VTOC(vp)->c_truncatelockowner == current_thread());
322
323 struct filefork *fp = VTOF(vp);
324
325 if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) {
326 // Nothing to do
327 return 0;
328 }
329
330 zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size));
331
332 /* N.B. At present, @zero_up_to is not important because the cluster
333 code will always zero up to the end of the page anyway. */
334 return cluster_write(vp, NULL, fp->ff_size, zero_up_to,
335 fp->ff_size, 0, IO_HEADZEROFILL);
336 }
337
338 /*
339 * Write data to a file.
340 */
341 int
342 hfs_vnop_write(struct vnop_write_args *ap)
343 {
344 uio_t uio = ap->a_uio;
345 struct vnode *vp = ap->a_vp;
346 struct cnode *cp;
347 struct filefork *fp;
348 struct hfsmount *hfsmp;
349 kauth_cred_t cred = NULL;
350 off_t origFileSize;
351 off_t writelimit;
352 off_t bytesToAdd = 0;
353 off_t actualBytesAdded;
354 off_t filebytes;
355 off_t offset;
356 ssize_t resid;
357 int eflags;
358 int ioflag = ap->a_ioflag;
359 int retval = 0;
360 int lockflags;
361 int cnode_locked = 0;
362 int partialwrite = 0;
363 int do_snapshot = 1;
364 time_t orig_ctime=VTOC(vp)->c_ctime;
365 int took_truncate_lock = 0;
366 int io_return_on_throttle = 0;
367 int throttled_count = 0;
368
369 #if HFS_COMPRESSION
370 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
371 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
372 switch(state) {
373 case FILE_IS_COMPRESSED:
374 return EACCES;
375 case FILE_IS_CONVERTING:
376 /* if FILE_IS_CONVERTING, we allow writes but do not
377 bother with snapshots or else we will deadlock.
378 */
379 do_snapshot = 0;
380 break;
381 default:
382 printf("invalid state %d for compressed file\n", state);
383 /* fall through */
384 }
385 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
386 int error;
387
388 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
389 if (error != 0) {
390 return error;
391 }
392 }
393
394 if (do_snapshot) {
395 nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
396 }
397
398 #endif
399
400 #if SECURE_KERNEL
401 if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
402 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
403 /* Don't allow unencrypted io request from user space */
404 return EPERM;
405 }
406 #endif
407
408 resid = uio_resid(uio);
409 offset = uio_offset(uio);
410
411 if (offset < 0)
412 return (EINVAL);
413 if (resid == 0)
414 return (E_NONE);
415 if (!vnode_isreg(vp))
416 return (EPERM); /* Can only write regular files */
417
418 cp = VTOC(vp);
419 fp = VTOF(vp);
420 hfsmp = VTOHFS(vp);
421
422 #if CONFIG_PROTECT
423 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
424 goto exit;
425 }
426 #endif
427
428 eflags = kEFDeferMask; /* defer file block allocations */
429 #if HFS_SPARSE_DEV
430 /*
431 * When the underlying device is sparse and space
432 * is low (< 8MB), stop doing delayed allocations
433 * and begin doing synchronous I/O.
434 */
435 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
436 (hfs_freeblks(hfsmp, 0) < 2048)) {
437 eflags &= ~kEFDeferMask;
438 ioflag |= IO_SYNC;
439 }
440 #endif /* HFS_SPARSE_DEV */
441
442 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
443 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
444 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
445 }
446
447 again:
448 /*
449 * Protect against a size change.
450 *
451 * Note: If took_truncate_lock is true, then we previously got the lock shared
452 * but needed to upgrade to exclusive. So try getting it exclusive from the
453 * start.
454 */
455 if (ioflag & IO_APPEND || took_truncate_lock) {
456 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
457 }
458 else {
459 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
460 }
461 took_truncate_lock = 1;
462
463 /* Update UIO */
464 if (ioflag & IO_APPEND) {
465 uio_setoffset(uio, fp->ff_size);
466 offset = fp->ff_size;
467 }
468 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
469 retval = EPERM;
470 goto exit;
471 }
472
473 cred = vfs_context_ucred(ap->a_context);
474 if (cred && suser(cred, NULL) != 0)
475 eflags |= kEFReserveMask;
476
477 origFileSize = fp->ff_size;
478 writelimit = offset + resid;
479
480 /*
481 * We may need an exclusive truncate lock for several reasons, all
482 * of which are because we may be writing to a (portion of a) block
483 * for the first time, and we need to make sure no readers see the
484 * prior, uninitialized contents of the block. The cases are:
485 *
486 * 1. We have unallocated (delayed allocation) blocks. We may be
487 * allocating new blocks to the file and writing to them.
488 * (A more precise check would be whether the range we're writing
489 * to contains delayed allocation blocks.)
490 * 2. We need to extend the file. The bytes between the old EOF
491 * and the new EOF are not yet initialized. This is important
492 * even if we're not allocating new blocks to the file. If the
493 * old EOF and new EOF are in the same block, we still need to
494 * protect that range of bytes until they are written for the
495 * first time.
496 *
497 * If we had a shared lock with the above cases, we need to try to upgrade
498 * to an exclusive lock. If the upgrade fails, we will lose the shared
499 * lock, and will need to take the truncate lock again; the took_truncate_lock
500 * flag will still be set, causing us to try for an exclusive lock next time.
501 */
502 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
503 ((fp->ff_unallocblocks != 0) ||
504 (writelimit > origFileSize))) {
505 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
506 /*
507 * Lock upgrade failed and we lost our shared lock, try again.
508 * Note: we do not set took_truncate_lock=0 here. Leaving it
509 * set to 1 will cause us to try to get the lock exclusive.
510 */
511 goto again;
512 }
513 else {
514 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
515 cp->c_truncatelockowner = current_thread();
516 }
517 }
518
519 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
520 goto exit;
521 }
522 cnode_locked = 1;
523
524 filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize);
525
526 if (offset > filebytes
527 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)),
528 hfsmp->blockSize) < offset - filebytes)) {
529 retval = ENOSPC;
530 goto exit;
531 }
532
533 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
534 (int)offset, uio_resid(uio), (int)fp->ff_size,
535 (int)filebytes, 0);
536
537 /* Check if we do not need to extend the file */
538 if (writelimit <= filebytes) {
539 goto sizeok;
540 }
541
542 bytesToAdd = writelimit - filebytes;
543
544 #if QUOTA
545 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
546 cred, 0);
547 if (retval)
548 goto exit;
549 #endif /* QUOTA */
550
551 if (hfs_start_transaction(hfsmp) != 0) {
552 retval = EINVAL;
553 goto exit;
554 }
555
556 while (writelimit > filebytes) {
557 bytesToAdd = writelimit - filebytes;
558
559 /* Protect extents b-tree and allocation bitmap */
560 lockflags = SFL_BITMAP;
561 if (overflow_extents(fp))
562 lockflags |= SFL_EXTENTS;
563 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
564
565 /* Files that are changing size are not hot file candidates. */
566 if (hfsmp->hfc_stage == HFC_RECORDING) {
567 fp->ff_bytesread = 0;
568 }
569 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
570 0, eflags, &actualBytesAdded));
571
572 hfs_systemfile_unlock(hfsmp, lockflags);
573
574 if ((actualBytesAdded == 0) && (retval == E_NONE))
575 retval = ENOSPC;
576 if (retval != E_NONE)
577 break;
578 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
579 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
580 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
581 }
582 (void) hfs_update(vp, 0);
583 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
584 (void) hfs_end_transaction(hfsmp);
585
586 /*
587 * If we didn't grow the file enough try a partial write.
588 * POSIX expects this behavior.
589 */
590 if ((retval == ENOSPC) && (filebytes > offset)) {
591 retval = 0;
592 partialwrite = 1;
593 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
594 resid -= bytesToAdd;
595 writelimit = filebytes;
596 }
597 sizeok:
598 if (retval == E_NONE) {
599 off_t filesize;
600 off_t head_off;
601 int lflag;
602
603 if (writelimit > fp->ff_size) {
604 filesize = writelimit;
605 struct timeval tv;
606 rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges);
607 microuptime(&tv);
608 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
609 } else
610 filesize = fp->ff_size;
611
612 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
613
614 /*
615 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
616 * for one case below). For the regions that lie before the
617 * beginning and after the end of this write that are in the
618 * same page, we let the cluster code handle zeroing that out
619 * if necessary. If those areas are not cached, the cluster
620 * code will try and read those areas in, and in the case
621 * where those regions have never been written to,
622 * hfs_vnop_blockmap will consult the invalid ranges and then
623 * indicate that. The cluster code will zero out those areas.
624 */
625
626 head_off = trunc_page_64(offset);
627
628 if (head_off < offset && head_off >= fp->ff_size) {
629 /*
630 * The first page is beyond current EOF, so as an
631 * optimisation, we can pass IO_HEADZEROFILL.
632 */
633 lflag |= IO_HEADZEROFILL;
634 }
635
636 hfs_unlock(cp);
637 cnode_locked = 0;
638
639 /*
640 * We need to tell UBC the fork's new size BEFORE calling
641 * cluster_write, in case any of the new pages need to be
642 * paged out before cluster_write completes (which does happen
643 * in embedded systems due to extreme memory pressure).
644 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
645 * will be, so that it can pass that on to cluster_pageout, and
646 * allow those pageouts.
647 *
648 * We don't update ff_size yet since we don't want pageins to
649 * be able to see uninitialized data between the old and new
650 * EOF, until cluster_write has completed and initialized that
651 * part of the file.
652 *
653 * The vnode pager relies on the file size last given to UBC via
654 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
655 * ff_size (whichever is larger). NOTE: ff_new_size is always
656 * zero, unless we are extending the file via write.
657 */
658 if (filesize > fp->ff_size) {
659 retval = hfs_zero_eof_page(vp, offset);
660 if (retval)
661 goto exit;
662 fp->ff_new_size = filesize;
663 ubc_setsize(vp, filesize);
664 }
665 retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off,
666 0, lflag | IO_NOZERODIRTY | io_return_on_throttle);
667 if (retval) {
668 fp->ff_new_size = 0; /* no longer extending; use ff_size */
669
670 if (retval == EAGAIN) {
671 /*
672 * EAGAIN indicates that we still have I/O to do, but
673 * that we now need to be throttled
674 */
675 if (resid != uio_resid(uio)) {
676 /*
677 * did manage to do some I/O before returning EAGAIN
678 */
679 resid = uio_resid(uio);
680 offset = uio_offset(uio);
681
682 cp->c_touch_chgtime = TRUE;
683 cp->c_touch_modtime = TRUE;
684 hfs_incr_gencount(cp);
685 }
686 if (filesize > fp->ff_size) {
687 /*
688 * we called ubc_setsize before the call to
689 * cluster_write... since we only partially
690 * completed the I/O, we need to
691 * re-adjust our idea of the filesize based
692 * on our interim EOF
693 */
694 ubc_setsize(vp, offset);
695
696 fp->ff_size = offset;
697 }
698 goto exit;
699 }
700 if (filesize > origFileSize) {
701 ubc_setsize(vp, origFileSize);
702 }
703 goto ioerr_exit;
704 }
705
706 if (filesize > origFileSize) {
707 fp->ff_size = filesize;
708
709 /* Files that are changing size are not hot file candidates. */
710 if (hfsmp->hfc_stage == HFC_RECORDING) {
711 fp->ff_bytesread = 0;
712 }
713 }
714 fp->ff_new_size = 0; /* ff_size now has the correct size */
715 }
716 if (partialwrite) {
717 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
718 resid += bytesToAdd;
719 }
720
721 if (vnode_should_flush_after_write(vp, ioflag))
722 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
723
724 ioerr_exit:
725 if (!cnode_locked) {
726 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
727 cnode_locked = 1;
728 }
729
730 if (resid > uio_resid(uio)) {
731 cp->c_touch_chgtime = TRUE;
732 cp->c_touch_modtime = TRUE;
733 hfs_incr_gencount(cp);
734
735 /*
736 * If we successfully wrote any data, and we are not the superuser
737 * we clear the setuid and setgid bits as a precaution against
738 * tampering.
739 */
740 if (cp->c_mode & (S_ISUID | S_ISGID)) {
741 cred = vfs_context_ucred(ap->a_context);
742 if (cred && suser(cred, NULL)) {
743 cp->c_mode &= ~(S_ISUID | S_ISGID);
744 }
745 }
746 }
747 if (retval) {
748 if (ioflag & IO_UNIT) {
749 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
750 0, ap->a_context);
751 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
752 uio_setresid(uio, resid);
753 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
754 }
755 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
756 retval = hfs_update(vp, 0);
757
758 /* Updating vcbWrCnt doesn't need to be atomic. */
759 hfsmp->vcbWrCnt++;
760
761 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
762 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
763 exit:
764 if (retval && took_truncate_lock
765 && cp->c_truncatelockowner == current_thread()) {
766 fp->ff_new_size = 0;
767 rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges);
768 }
769
770 if (cnode_locked)
771 hfs_unlock(cp);
772
773 if (took_truncate_lock) {
774 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
775 }
776 if (retval == EAGAIN) {
777 throttle_lowpri_io(1);
778 throttled_count++;
779
780 retval = 0;
781 goto again;
782 }
783 if (throttled_count)
784 throttle_info_reset_window(NULL);
785 return (retval);
786 }
787
788 /* support for the "bulk-access" fcntl */
789
790 #define CACHE_LEVELS 16
791 #define NUM_CACHE_ENTRIES (64*16)
792 #define PARENT_IDS_FLAG 0x100
793
794 struct access_cache {
795 int numcached;
796 int cachehits; /* these two for statistics gathering */
797 int lookups;
798 unsigned int *acache;
799 unsigned char *haveaccess;
800 };
801
802 struct access_t {
803 uid_t uid; /* IN: effective user id */
804 short flags; /* IN: access requested (i.e. R_OK) */
805 short num_groups; /* IN: number of groups user belongs to */
806 int num_files; /* IN: number of files to process */
807 int *file_ids; /* IN: array of file ids */
808 gid_t *groups; /* IN: array of groups */
809 short *access; /* OUT: access info for each file (0 for 'has access') */
810 } __attribute__((unavailable)); // this structure is for reference purposes only
811
812 struct user32_access_t {
813 uid_t uid; /* IN: effective user id */
814 short flags; /* IN: access requested (i.e. R_OK) */
815 short num_groups; /* IN: number of groups user belongs to */
816 int num_files; /* IN: number of files to process */
817 user32_addr_t file_ids; /* IN: array of file ids */
818 user32_addr_t groups; /* IN: array of groups */
819 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
820 };
821
822 struct user64_access_t {
823 uid_t uid; /* IN: effective user id */
824 short flags; /* IN: access requested (i.e. R_OK) */
825 short num_groups; /* IN: number of groups user belongs to */
826 int num_files; /* IN: number of files to process */
827 user64_addr_t file_ids; /* IN: array of file ids */
828 user64_addr_t groups; /* IN: array of groups */
829 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
830 };
831
832
833 // these are the "extended" versions of the above structures
834 // note that it is crucial that they be different sized than
835 // the regular version
836 struct ext_access_t {
837 uint32_t flags; /* IN: access requested (i.e. R_OK) */
838 uint32_t num_files; /* IN: number of files to process */
839 uint32_t map_size; /* IN: size of the bit map */
840 uint32_t *file_ids; /* IN: Array of file ids */
841 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
842 short *access; /* OUT: access info for each file (0 for 'has access') */
843 uint32_t num_parents; /* future use */
844 cnid_t *parents; /* future use */
845 } __attribute__((unavailable)); // this structure is for reference purposes only
846
847 struct user32_ext_access_t {
848 uint32_t flags; /* IN: access requested (i.e. R_OK) */
849 uint32_t num_files; /* IN: number of files to process */
850 uint32_t map_size; /* IN: size of the bit map */
851 user32_addr_t file_ids; /* IN: Array of file ids */
852 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
853 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
854 uint32_t num_parents; /* future use */
855 user32_addr_t parents; /* future use */
856 };
857
858 struct user64_ext_access_t {
859 uint32_t flags; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files; /* IN: number of files to process */
861 uint32_t map_size; /* IN: size of the bit map */
862 user64_addr_t file_ids; /* IN: array of file ids */
863 user64_addr_t bitmap; /* IN: array of groups */
864 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents;/* future use */
866 user64_addr_t parents;/* future use */
867 };
868
869
870 /*
871 * Perform a binary search for the given parent_id. Return value is
872 * the index if there is a match. If no_match_indexp is non-NULL it
873 * will be assigned with the index to insert the item (even if it was
874 * not found).
875 */
876 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
877 {
878 int index=-1;
879 unsigned int lo=0;
880
881 do {
882 unsigned int mid = ((hi - lo)/2) + lo;
883 unsigned int this_id = array[mid];
884
885 if (parent_id == this_id) {
886 hi = mid;
887 break;
888 }
889
890 if (parent_id < this_id) {
891 hi = mid;
892 continue;
893 }
894
895 if (parent_id > this_id) {
896 lo = mid + 1;
897 continue;
898 }
899 } while(lo < hi);
900
901 /* check if lo and hi converged on the match */
902 if (parent_id == array[hi]) {
903 index = hi;
904 }
905
906 if (no_match_indexp) {
907 *no_match_indexp = hi;
908 }
909
910 return index;
911 }
912
913
914 static int
915 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
916 {
917 unsigned int hi;
918 int matches = 0;
919 int index, no_match_index;
920
921 if (cache->numcached == 0) {
922 *indexp = 0;
923 return 0; // table is empty, so insert at index=0 and report no match
924 }
925
926 if (cache->numcached > NUM_CACHE_ENTRIES) {
927 cache->numcached = NUM_CACHE_ENTRIES;
928 }
929
930 hi = cache->numcached - 1;
931
932 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
933
934 /* if no existing entry found, find index for new one */
935 if (index == -1) {
936 index = no_match_index;
937 matches = 0;
938 } else {
939 matches = 1;
940 }
941
942 *indexp = index;
943 return matches;
944 }
945
946 /*
947 * Add a node to the access_cache at the given index (or do a lookup first
948 * to find the index if -1 is passed in). We currently do a replace rather
949 * than an insert if the cache is full.
950 */
951 static void
952 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
953 {
954 int lookup_index = -1;
955
956 /* need to do a lookup first if -1 passed for index */
957 if (index == -1) {
958 if (lookup_bucket(cache, &lookup_index, nodeID)) {
959 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
960 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
961 cache->haveaccess[lookup_index] = access;
962 }
963
964 /* mission accomplished */
965 return;
966 } else {
967 index = lookup_index;
968 }
969
970 }
971
972 /* if the cache is full, do a replace rather than an insert */
973 if (cache->numcached >= NUM_CACHE_ENTRIES) {
974 cache->numcached = NUM_CACHE_ENTRIES-1;
975
976 if (index > cache->numcached) {
977 index = cache->numcached;
978 }
979 }
980
981 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
982 index++;
983 }
984
985 if (index >= 0 && index < cache->numcached) {
986 /* only do bcopy if we're inserting */
987 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
988 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
989 }
990
991 cache->acache[index] = nodeID;
992 cache->haveaccess[index] = access;
993 cache->numcached++;
994 }
995
996
997 struct cinfo {
998 uid_t uid;
999 gid_t gid;
1000 mode_t mode;
1001 cnid_t parentcnid;
1002 u_int16_t recflags;
1003 };
1004
1005 static int
1006 snoop_callback(const cnode_t *cp, void *arg)
1007 {
1008 struct cinfo *cip = arg;
1009
1010 cip->uid = cp->c_uid;
1011 cip->gid = cp->c_gid;
1012 cip->mode = cp->c_mode;
1013 cip->parentcnid = cp->c_parentcnid;
1014 cip->recflags = cp->c_attr.ca_recflags;
1015
1016 return (0);
1017 }
1018
1019 /*
1020 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1021 * isn't incore, then go to the catalog.
1022 */
1023 static int
1024 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
1025 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
1026 {
1027 int error = 0;
1028
1029 /* if this id matches the one the fsctl was called with, skip the lookup */
1030 if (cnid == skip_cp->c_cnid) {
1031 cnattrp->ca_uid = skip_cp->c_uid;
1032 cnattrp->ca_gid = skip_cp->c_gid;
1033 cnattrp->ca_mode = skip_cp->c_mode;
1034 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1035 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1036 } else {
1037 struct cinfo c_info;
1038
1039 /* otherwise, check the cnode hash incase the file/dir is incore */
1040 error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
1041
1042 if (error == EACCES) {
1043 // File is deleted
1044 return ENOENT;
1045 } else if (!error) {
1046 cnattrp->ca_uid = c_info.uid;
1047 cnattrp->ca_gid = c_info.gid;
1048 cnattrp->ca_mode = c_info.mode;
1049 cnattrp->ca_recflags = c_info.recflags;
1050 keyp->hfsPlus.parentID = c_info.parentcnid;
1051 } else {
1052 int lockflags;
1053
1054 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1055 throttle_lowpri_io(1);
1056
1057 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1058
1059 /* lookup this cnid in the catalog */
1060 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
1061
1062 hfs_systemfile_unlock(hfsmp, lockflags);
1063
1064 cache->lookups++;
1065 }
1066 }
1067
1068 return (error);
1069 }
1070
1071
1072 /*
1073 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1074 * up to CACHE_LEVELS as we progress towards the root.
1075 */
1076 static int
1077 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
1078 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
1079 struct vfs_context *my_context,
1080 char *bitmap,
1081 uint32_t map_size,
1082 cnid_t* parents,
1083 uint32_t num_parents)
1084 {
1085 int myErr = 0;
1086 int myResult;
1087 HFSCatalogNodeID thisNodeID;
1088 unsigned int myPerms;
1089 struct cat_attr cnattr;
1090 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1091 CatalogKey catkey;
1092
1093 int i = 0, ids_to_cache = 0;
1094 int parent_ids[CACHE_LEVELS];
1095
1096 thisNodeID = nodeID;
1097 while (thisNodeID >= kRootDirID) {
1098 myResult = 0; /* default to "no access" */
1099
1100 /* check the cache before resorting to hitting the catalog */
1101
1102 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1103 * to look any further after hitting cached dir */
1104
1105 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1106 cache->cachehits++;
1107 myErr = cache->haveaccess[cache_index];
1108 if (scope_index != -1) {
1109 if (myErr == ESRCH) {
1110 myErr = 0;
1111 }
1112 } else {
1113 scope_index = 0; // so we'll just use the cache result
1114 scope_idx_start = ids_to_cache;
1115 }
1116 myResult = (myErr == 0) ? 1 : 0;
1117 goto ExitThisRoutine;
1118 }
1119
1120
1121 if (parents) {
1122 int tmp;
1123 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1124 if (scope_index == -1)
1125 scope_index = tmp;
1126 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1127 scope_idx_start = ids_to_cache;
1128 }
1129 }
1130
1131 /* remember which parents we want to cache */
1132 if (ids_to_cache < CACHE_LEVELS) {
1133 parent_ids[ids_to_cache] = thisNodeID;
1134 ids_to_cache++;
1135 }
1136 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1137 if (bitmap && map_size) {
1138 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1139 }
1140
1141
1142 /* do the lookup (checks the cnode hash, then the catalog) */
1143 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
1144 if (myErr) {
1145 goto ExitThisRoutine; /* no access */
1146 }
1147
1148 /* Root always gets access. */
1149 if (suser(myp_ucred, NULL) == 0) {
1150 thisNodeID = catkey.hfsPlus.parentID;
1151 myResult = 1;
1152 continue;
1153 }
1154
1155 // if the thing has acl's, do the full permission check
1156 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1157 struct vnode *vp;
1158
1159 /* get the vnode for this cnid */
1160 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
1161 if ( myErr ) {
1162 myResult = 0;
1163 goto ExitThisRoutine;
1164 }
1165
1166 thisNodeID = VTOC(vp)->c_parentcnid;
1167
1168 hfs_unlock(VTOC(vp));
1169
1170 if (vnode_vtype(vp) == VDIR) {
1171 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1172 } else {
1173 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1174 }
1175
1176 vnode_put(vp);
1177 if (myErr) {
1178 myResult = 0;
1179 goto ExitThisRoutine;
1180 }
1181 } else {
1182 unsigned int flags;
1183 int mode = cnattr.ca_mode & S_IFMT;
1184 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
1185
1186 if (mode == S_IFDIR) {
1187 flags = R_OK | X_OK;
1188 } else {
1189 flags = R_OK;
1190 }
1191 if ( (myPerms & flags) != flags) {
1192 myResult = 0;
1193 myErr = EACCES;
1194 goto ExitThisRoutine; /* no access */
1195 }
1196
1197 /* up the hierarchy we go */
1198 thisNodeID = catkey.hfsPlus.parentID;
1199 }
1200 }
1201
1202 /* if here, we have access to this node */
1203 myResult = 1;
1204
1205 ExitThisRoutine:
1206 if (parents && myErr == 0 && scope_index == -1) {
1207 myErr = ESRCH;
1208 }
1209
1210 if (myErr) {
1211 myResult = 0;
1212 }
1213 *err = myErr;
1214
1215 /* cache the parent directory(ies) */
1216 for (i = 0; i < ids_to_cache; i++) {
1217 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1218 add_node(cache, -1, parent_ids[i], ESRCH);
1219 } else {
1220 add_node(cache, -1, parent_ids[i], myErr);
1221 }
1222 }
1223
1224 return (myResult);
1225 }
1226
1227 static int
1228 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1229 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1230 {
1231 boolean_t is64bit;
1232
1233 /*
1234 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1235 * happens to be in our list of file_ids, we'll note it
1236 * avoid calling hfs_chashget_nowait() on that id as that
1237 * will cause a "locking against myself" panic.
1238 */
1239 Boolean check_leaf = true;
1240
1241 struct user64_ext_access_t *user_access_structp;
1242 struct user64_ext_access_t tmp_user_access;
1243 struct access_cache cache;
1244
1245 int error = 0, prev_parent_check_ok=1;
1246 unsigned int i;
1247
1248 short flags;
1249 unsigned int num_files = 0;
1250 int map_size = 0;
1251 int num_parents = 0;
1252 int *file_ids=NULL;
1253 short *access=NULL;
1254 char *bitmap=NULL;
1255 cnid_t *parents=NULL;
1256 int leaf_index;
1257
1258 cnid_t cnid;
1259 cnid_t prevParent_cnid = 0;
1260 unsigned int myPerms;
1261 short myaccess = 0;
1262 struct cat_attr cnattr;
1263 CatalogKey catkey;
1264 struct cnode *skip_cp = VTOC(vp);
1265 kauth_cred_t cred = vfs_context_ucred(context);
1266 proc_t p = vfs_context_proc(context);
1267
1268 is64bit = proc_is64bit(p);
1269
1270 /* initialize the local cache and buffers */
1271 cache.numcached = 0;
1272 cache.cachehits = 0;
1273 cache.lookups = 0;
1274 cache.acache = NULL;
1275 cache.haveaccess = NULL;
1276
1277 /* struct copyin done during dispatch... need to copy file_id array separately */
1278 if (ap->a_data == NULL) {
1279 error = EINVAL;
1280 goto err_exit_bulk_access;
1281 }
1282
1283 if (is64bit) {
1284 if (arg_size != sizeof(struct user64_ext_access_t)) {
1285 error = EINVAL;
1286 goto err_exit_bulk_access;
1287 }
1288
1289 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1290
1291 } else if (arg_size == sizeof(struct user32_access_t)) {
1292 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1293
1294 // convert an old style bulk-access struct to the new style
1295 tmp_user_access.flags = accessp->flags;
1296 tmp_user_access.num_files = accessp->num_files;
1297 tmp_user_access.map_size = 0;
1298 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1299 tmp_user_access.bitmap = USER_ADDR_NULL;
1300 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1301 tmp_user_access.num_parents = 0;
1302 user_access_structp = &tmp_user_access;
1303
1304 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1305 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1306
1307 // up-cast from a 32-bit version of the struct
1308 tmp_user_access.flags = accessp->flags;
1309 tmp_user_access.num_files = accessp->num_files;
1310 tmp_user_access.map_size = accessp->map_size;
1311 tmp_user_access.num_parents = accessp->num_parents;
1312
1313 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1314 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1315 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1316 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1317
1318 user_access_structp = &tmp_user_access;
1319 } else {
1320 error = EINVAL;
1321 goto err_exit_bulk_access;
1322 }
1323
1324 map_size = user_access_structp->map_size;
1325
1326 num_files = user_access_structp->num_files;
1327
1328 num_parents= user_access_structp->num_parents;
1329
1330 if (num_files < 1) {
1331 goto err_exit_bulk_access;
1332 }
1333 if (num_files > 1024) {
1334 error = EINVAL;
1335 goto err_exit_bulk_access;
1336 }
1337
1338 if (num_parents > 1024) {
1339 error = EINVAL;
1340 goto err_exit_bulk_access;
1341 }
1342
1343 file_ids = hfs_malloc(sizeof(int) * num_files);
1344 access = hfs_malloc(sizeof(short) * num_files);
1345 if (map_size) {
1346 bitmap = hfs_mallocz(sizeof(char) * map_size);
1347 }
1348
1349 if (num_parents) {
1350 parents = hfs_malloc(sizeof(cnid_t) * num_parents);
1351 }
1352
1353 cache.acache = hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES);
1354 cache.haveaccess = hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1355
1356 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1357 num_files * sizeof(int)))) {
1358 goto err_exit_bulk_access;
1359 }
1360
1361 if (num_parents) {
1362 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1363 num_parents * sizeof(cnid_t)))) {
1364 goto err_exit_bulk_access;
1365 }
1366 }
1367
1368 flags = user_access_structp->flags;
1369 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1370 flags = R_OK;
1371 }
1372
1373 /* check if we've been passed leaf node ids or parent ids */
1374 if (flags & PARENT_IDS_FLAG) {
1375 check_leaf = false;
1376 }
1377
1378 /* Check access to each file_id passed in */
1379 for (i = 0; i < num_files; i++) {
1380 leaf_index=-1;
1381 cnid = (cnid_t) file_ids[i];
1382
1383 /* root always has access */
1384 if ((!parents) && (!suser(cred, NULL))) {
1385 access[i] = 0;
1386 continue;
1387 }
1388
1389 if (check_leaf) {
1390 /* do the lookup (checks the cnode hash, then the catalog) */
1391 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1392 if (error) {
1393 access[i] = (short) error;
1394 continue;
1395 }
1396
1397 if (parents) {
1398 // Check if the leaf matches one of the parent scopes
1399 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1400 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1401 prev_parent_check_ok = 0;
1402 else if (leaf_index >= 0)
1403 prev_parent_check_ok = 1;
1404 }
1405
1406 // if the thing has acl's, do the full permission check
1407 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1408 struct vnode *cvp;
1409 int myErr = 0;
1410 /* get the vnode for this cnid */
1411 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
1412 if ( myErr ) {
1413 access[i] = myErr;
1414 continue;
1415 }
1416
1417 hfs_unlock(VTOC(cvp));
1418
1419 if (vnode_vtype(cvp) == VDIR) {
1420 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1421 } else {
1422 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1423 }
1424
1425 vnode_put(cvp);
1426 if (myErr) {
1427 access[i] = myErr;
1428 continue;
1429 }
1430 } else {
1431 /* before calling CheckAccess(), check the target file for read access */
1432 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1433 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1434
1435 /* fail fast if no access */
1436 if ((myPerms & flags) == 0) {
1437 access[i] = EACCES;
1438 continue;
1439 }
1440 }
1441 } else {
1442 /* we were passed an array of parent ids */
1443 catkey.hfsPlus.parentID = cnid;
1444 }
1445
1446 /* if the last guy had the same parent and had access, we're done */
1447 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1448 cache.cachehits++;
1449 access[i] = 0;
1450 continue;
1451 }
1452
1453 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1454 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1455
1456 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1457 access[i] = 0; // have access.. no errors to report
1458 } else {
1459 access[i] = (error != 0 ? (short) error : EACCES);
1460 }
1461
1462 prevParent_cnid = catkey.hfsPlus.parentID;
1463 }
1464
1465 /* copyout the access array */
1466 if ((error = copyout((caddr_t)access, user_access_structp->access,
1467 num_files * sizeof (short)))) {
1468 goto err_exit_bulk_access;
1469 }
1470 if (map_size && bitmap) {
1471 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1472 map_size * sizeof (char)))) {
1473 goto err_exit_bulk_access;
1474 }
1475 }
1476
1477
1478 err_exit_bulk_access:
1479
1480 hfs_free(file_ids, sizeof(int) * num_files);
1481 hfs_free(parents, sizeof(cnid_t) * num_parents);
1482 hfs_free(bitmap, sizeof(char) * map_size);
1483 hfs_free(access, sizeof(short) * num_files);
1484 hfs_free(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1485 hfs_free(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1486
1487 return (error);
1488 }
1489
1490
1491 /* end "bulk-access" support */
1492
1493
1494 /*
1495 * Control filesystem operating characteristics.
1496 */
1497 int
1498 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1499 vnode_t a_vp;
1500 long a_command;
1501 caddr_t a_data;
1502 int a_fflag;
1503 vfs_context_t a_context;
1504 } */ *ap)
1505 {
1506 struct vnode * vp = ap->a_vp;
1507 struct hfsmount *hfsmp = VTOHFS(vp);
1508 vfs_context_t context = ap->a_context;
1509 kauth_cred_t cred = vfs_context_ucred(context);
1510 proc_t p = vfs_context_proc(context);
1511 struct vfsstatfs *vfsp;
1512 boolean_t is64bit;
1513 off_t jnl_start, jnl_size;
1514 struct hfs_journal_info *jip;
1515 #if HFS_COMPRESSION
1516 int compressed = 0;
1517 off_t uncompressed_size = -1;
1518 int decmpfs_error = 0;
1519
1520 if (ap->a_command == F_RDADVISE) {
1521 /* we need to inspect the decmpfs state of the file as early as possible */
1522 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1523 if (compressed) {
1524 if (VNODE_IS_RSRC(vp)) {
1525 /* if this is the resource fork, treat it as if it were empty */
1526 uncompressed_size = 0;
1527 } else {
1528 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1529 if (decmpfs_error != 0) {
1530 /* failed to get the uncompressed size, we'll check for this later */
1531 uncompressed_size = -1;
1532 }
1533 }
1534 }
1535 }
1536 #endif /* HFS_COMPRESSION */
1537
1538 is64bit = proc_is64bit(p);
1539
1540 #if CONFIG_PROTECT
1541 #if HFS_CONFIG_KEY_ROLL
1542 // The HFSIOC_KEY_ROLL fsctl does its own access checks
1543 if (ap->a_command != HFSIOC_KEY_ROLL)
1544 #endif
1545 {
1546 int error = 0;
1547 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
1548 return error;
1549 }
1550 }
1551 #endif /* CONFIG_PROTECT */
1552
1553 switch (ap->a_command) {
1554
1555 case HFSIOC_GETPATH:
1556 {
1557 struct vnode *file_vp;
1558 cnid_t cnid;
1559 int outlen;
1560 char *bufptr;
1561 int error;
1562 int flags = 0;
1563
1564 /* Caller must be owner of file system. */
1565 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1566 if (suser(cred, NULL) &&
1567 kauth_cred_getuid(cred) != vfsp->f_owner) {
1568 return (EACCES);
1569 }
1570 /* Target vnode must be file system's root. */
1571 if (!vnode_isvroot(vp)) {
1572 return (EINVAL);
1573 }
1574 bufptr = (char *)ap->a_data;
1575 cnid = strtoul(bufptr, NULL, 10);
1576 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1577 flags |= BUILDPATH_VOLUME_RELATIVE;
1578 }
1579
1580 /* We need to call hfs_vfs_vget to leverage the code that will
1581 * fix the origin list for us if needed, as opposed to calling
1582 * hfs_vget, since we will need the parent for build_path call.
1583 */
1584
1585 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1586 return (error);
1587 }
1588
1589 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
1590 vnode_put(file_vp);
1591
1592 return (error);
1593 }
1594
1595 case HFSIOC_SET_MAX_DEFRAG_SIZE:
1596 {
1597 int error = 0; /* Assume success */
1598 u_int32_t maxsize = 0;
1599
1600 if (vnode_vfsisrdonly(vp)) {
1601 return (EROFS);
1602 }
1603 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1604 if (!kauth_cred_issuser(cred)) {
1605 return (EACCES); /* must be root */
1606 }
1607
1608 maxsize = *(u_int32_t *)ap->a_data;
1609
1610 hfs_lock_mount(hfsmp);
1611 if (maxsize > HFS_MAX_DEFRAG_SIZE) {
1612 error = EINVAL;
1613 }
1614 else {
1615 hfsmp->hfs_defrag_max = maxsize;
1616 }
1617 hfs_unlock_mount(hfsmp);
1618
1619 return (error);
1620 }
1621
1622 case HFSIOC_FORCE_ENABLE_DEFRAG:
1623 {
1624 int error = 0; /* Assume success */
1625 u_int32_t do_enable = 0;
1626
1627 if (vnode_vfsisrdonly(vp)) {
1628 return (EROFS);
1629 }
1630 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1631 if (!kauth_cred_issuser(cred)) {
1632 return (EACCES); /* must be root */
1633 }
1634
1635 do_enable = *(u_int32_t *)ap->a_data;
1636
1637 hfs_lock_mount(hfsmp);
1638 if (do_enable != 0) {
1639 hfsmp->hfs_defrag_nowait = 1;
1640 }
1641 else {
1642 error = EINVAL;
1643 }
1644
1645 hfs_unlock_mount(hfsmp);
1646
1647 return (error);
1648 }
1649
1650
1651 case HFSIOC_TRANSFER_DOCUMENT_ID:
1652 {
1653 struct cnode *cp = NULL;
1654 int error;
1655 u_int32_t to_fd = *(u_int32_t *)ap->a_data;
1656 struct fileproc *to_fp;
1657 struct vnode *to_vp;
1658 struct cnode *to_cp;
1659
1660 cp = VTOC(vp);
1661
1662 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
1663 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1664 return error;
1665 }
1666 if ( (error = vnode_getwithref(to_vp)) ) {
1667 file_drop(to_fd);
1668 return error;
1669 }
1670
1671 if (VTOHFS(to_vp) != hfsmp) {
1672 error = EXDEV;
1673 goto transfer_cleanup;
1674 }
1675
1676 int need_unlock = 1;
1677 to_cp = VTOC(to_vp);
1678 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1679 if (error != 0) {
1680 //printf("could not lock the pair of cnodes (error %d)\n", error);
1681 goto transfer_cleanup;
1682 }
1683
1684 if (!(cp->c_bsdflags & UF_TRACKED)) {
1685 error = EINVAL;
1686 } else if (to_cp->c_bsdflags & UF_TRACKED) {
1687 //
1688 // if the destination is already tracked, return an error
1689 // as otherwise it's a silent deletion of the target's
1690 // document-id
1691 //
1692 error = EEXIST;
1693 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1694 //
1695 // we can use the FndrExtendedFileInfo because the doc-id is the first
1696 // thing in both it and the ExtendedDirInfo struct which is fixed in
1697 // format and can not change layout
1698 //
1699 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
1700 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
1701
1702 if (f_extinfo->document_id == 0) {
1703 uint32_t new_id;
1704
1705 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
1706
1707 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
1708 //
1709 // re-lock the pair now that we have the document-id
1710 //
1711 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1712 f_extinfo->document_id = new_id;
1713 } else {
1714 goto transfer_cleanup;
1715 }
1716 }
1717
1718 to_extinfo->document_id = f_extinfo->document_id;
1719 f_extinfo->document_id = 0;
1720 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1721
1722 // make sure the destination is also UF_TRACKED
1723 to_cp->c_bsdflags |= UF_TRACKED;
1724 cp->c_bsdflags &= ~UF_TRACKED;
1725
1726 // mark the cnodes dirty
1727 cp->c_flag |= C_MODIFIED;
1728 to_cp->c_flag |= C_MODIFIED;
1729
1730 int lockflags;
1731 if ((error = hfs_start_transaction(hfsmp)) == 0) {
1732
1733 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1734
1735 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
1736 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
1737
1738 hfs_systemfile_unlock (hfsmp, lockflags);
1739 (void) hfs_end_transaction(hfsmp);
1740 }
1741
1742 add_fsevent(FSE_DOCID_CHANGED, context,
1743 FSE_ARG_DEV, hfsmp->hfs_raw_dev,
1744 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
1745 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
1746 FSE_ARG_INT32, to_extinfo->document_id,
1747 FSE_ARG_DONE);
1748
1749 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1750 need_unlock = 0;
1751
1752 if (need_fsevent(FSE_STAT_CHANGED, vp)) {
1753 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1754 }
1755 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
1756 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
1757 }
1758 }
1759
1760 if (need_unlock) {
1761 hfs_unlockpair(cp, to_cp);
1762 }
1763
1764 transfer_cleanup:
1765 vnode_put(to_vp);
1766 file_drop(to_fd);
1767
1768 return error;
1769 }
1770
1771
1772
1773 case HFSIOC_PREV_LINK:
1774 case HFSIOC_NEXT_LINK:
1775 {
1776 cnid_t linkfileid;
1777 cnid_t nextlinkid;
1778 cnid_t prevlinkid;
1779 int error;
1780
1781 /* Caller must be owner of file system. */
1782 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1783 if (suser(cred, NULL) &&
1784 kauth_cred_getuid(cred) != vfsp->f_owner) {
1785 return (EACCES);
1786 }
1787 /* Target vnode must be file system's root. */
1788 if (!vnode_isvroot(vp)) {
1789 return (EINVAL);
1790 }
1791 linkfileid = *(cnid_t *)ap->a_data;
1792 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1793 return (EINVAL);
1794 }
1795 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1796 return (error);
1797 }
1798 if (ap->a_command == HFSIOC_NEXT_LINK) {
1799 *(cnid_t *)ap->a_data = nextlinkid;
1800 } else {
1801 *(cnid_t *)ap->a_data = prevlinkid;
1802 }
1803 return (0);
1804 }
1805
1806 case HFSIOC_RESIZE_PROGRESS: {
1807
1808 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1809 if (suser(cred, NULL) &&
1810 kauth_cred_getuid(cred) != vfsp->f_owner) {
1811 return (EACCES); /* must be owner of file system */
1812 }
1813 if (!vnode_isvroot(vp)) {
1814 return (EINVAL);
1815 }
1816 /* file system must not be mounted read-only */
1817 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1818 return (EROFS);
1819 }
1820
1821 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1822 }
1823
1824 case HFSIOC_RESIZE_VOLUME: {
1825 u_int64_t newsize;
1826 u_int64_t cursize;
1827 int ret;
1828
1829 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1830 if (suser(cred, NULL) &&
1831 kauth_cred_getuid(cred) != vfsp->f_owner) {
1832 return (EACCES); /* must be owner of file system */
1833 }
1834 if (!vnode_isvroot(vp)) {
1835 return (EINVAL);
1836 }
1837
1838 /* filesystem must not be mounted read only */
1839 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1840 return (EROFS);
1841 }
1842 newsize = *(u_int64_t *)ap->a_data;
1843 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1844
1845 if (newsize == cursize) {
1846 return (0);
1847 }
1848 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize);
1849 if (newsize > cursize) {
1850 ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1851 } else {
1852 ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1853 }
1854 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize);
1855 return (ret);
1856 }
1857 case HFSIOC_CHANGE_NEXT_ALLOCATION: {
1858 int error = 0; /* Assume success */
1859 u_int32_t location;
1860
1861 if (vnode_vfsisrdonly(vp)) {
1862 return (EROFS);
1863 }
1864 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1865 if (suser(cred, NULL) &&
1866 kauth_cred_getuid(cred) != vfsp->f_owner) {
1867 return (EACCES); /* must be owner of file system */
1868 }
1869 if (!vnode_isvroot(vp)) {
1870 return (EINVAL);
1871 }
1872 hfs_lock_mount(hfsmp);
1873 location = *(u_int32_t *)ap->a_data;
1874 if ((location >= hfsmp->allocLimit) &&
1875 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1876 error = EINVAL;
1877 goto fail_change_next_allocation;
1878 }
1879 /* Return previous value. */
1880 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1881 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1882 /* On magic value for location, set nextAllocation to next block
1883 * after metadata zone and set flag in mount structure to indicate
1884 * that nextAllocation should not be updated again.
1885 */
1886 if (hfsmp->hfs_metazone_end != 0) {
1887 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1888 }
1889 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1890 } else {
1891 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1892 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1893 }
1894 MarkVCBDirty(hfsmp);
1895 fail_change_next_allocation:
1896 hfs_unlock_mount(hfsmp);
1897 return (error);
1898 }
1899
1900 #if HFS_SPARSE_DEV
1901 case HFSIOC_SETBACKINGSTOREINFO: {
1902 struct vnode * di_vp;
1903 struct hfs_backingstoreinfo *bsdata;
1904 int error = 0;
1905
1906 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1907 return (EROFS);
1908 }
1909 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1910 return (EALREADY);
1911 }
1912 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1913 if (suser(cred, NULL) &&
1914 kauth_cred_getuid(cred) != vfsp->f_owner) {
1915 return (EACCES); /* must be owner of file system */
1916 }
1917 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1918 if (bsdata == NULL) {
1919 return (EINVAL);
1920 }
1921 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1922 return (error);
1923 }
1924 if ((error = vnode_getwithref(di_vp))) {
1925 file_drop(bsdata->backingfd);
1926 return(error);
1927 }
1928
1929 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1930 (void)vnode_put(di_vp);
1931 file_drop(bsdata->backingfd);
1932 return (EINVAL);
1933 }
1934
1935 // Dropped in unmount
1936 vnode_ref(di_vp);
1937
1938 hfs_lock_mount(hfsmp);
1939 hfsmp->hfs_backingvp = di_vp;
1940 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1941 hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
1942 hfs_unlock_mount(hfsmp);
1943
1944 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1945
1946 /*
1947 * If the sparse image is on a sparse image file (as opposed to a sparse
1948 * bundle), then we may need to limit the free space to the maximum size
1949 * of a file on that volume. So we query (using pathconf), and if we get
1950 * a meaningful result, we cache the number of blocks for later use in
1951 * hfs_freeblks().
1952 */
1953 hfsmp->hfs_backingfs_maxblocks = 0;
1954 if (vnode_vtype(di_vp) == VREG) {
1955 int terr;
1956 int hostbits;
1957 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1958 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1959 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1960
1961 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1962 }
1963 }
1964
1965 /* The free extent cache is managed differently for sparse devices.
1966 * There is a window between which the volume is mounted and the
1967 * device is marked as sparse, so the free extent cache for this
1968 * volume is currently initialized as normal volume (sorted by block
1969 * count). Reset the cache so that it will be rebuilt again
1970 * for sparse device (sorted by start block).
1971 */
1972 ResetVCBFreeExtCache(hfsmp);
1973
1974 (void)vnode_put(di_vp);
1975 file_drop(bsdata->backingfd);
1976 return (0);
1977 }
1978
1979 case HFSIOC_CLRBACKINGSTOREINFO: {
1980 struct vnode * tmpvp;
1981
1982 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1983 if (suser(cred, NULL) &&
1984 kauth_cred_getuid(cred) != vfsp->f_owner) {
1985 return (EACCES); /* must be owner of file system */
1986 }
1987 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1988 return (EROFS);
1989 }
1990
1991 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1992 hfsmp->hfs_backingvp) {
1993
1994 hfs_lock_mount(hfsmp);
1995 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1996 tmpvp = hfsmp->hfs_backingvp;
1997 hfsmp->hfs_backingvp = NULLVP;
1998 hfsmp->hfs_sparsebandblks = 0;
1999 hfs_unlock_mount(hfsmp);
2000
2001 vnode_rele(tmpvp);
2002 }
2003 return (0);
2004 }
2005 #endif /* HFS_SPARSE_DEV */
2006
2007 /* Change the next CNID stored in the VH */
2008 case HFSIOC_CHANGE_NEXTCNID: {
2009 int error = 0; /* Assume success */
2010 u_int32_t fileid;
2011 int wraparound = 0;
2012 int lockflags = 0;
2013
2014 if (vnode_vfsisrdonly(vp)) {
2015 return (EROFS);
2016 }
2017 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2018 if (suser(cred, NULL) &&
2019 kauth_cred_getuid(cred) != vfsp->f_owner) {
2020 return (EACCES); /* must be owner of file system */
2021 }
2022
2023 fileid = *(u_int32_t *)ap->a_data;
2024
2025 /* Must have catalog lock excl. to advance the CNID pointer */
2026 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
2027
2028 hfs_lock_mount(hfsmp);
2029
2030 /* If it is less than the current next CNID, force the wraparound bit to be set */
2031 if (fileid < hfsmp->vcbNxtCNID) {
2032 wraparound=1;
2033 }
2034
2035 /* Return previous value. */
2036 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
2037
2038 hfsmp->vcbNxtCNID = fileid;
2039
2040 if (wraparound) {
2041 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
2042 }
2043
2044 MarkVCBDirty(hfsmp);
2045 hfs_unlock_mount(hfsmp);
2046 hfs_systemfile_unlock (hfsmp, lockflags);
2047
2048 return (error);
2049 }
2050
2051 case F_FREEZE_FS: {
2052 struct mount *mp;
2053
2054 mp = vnode_mount(vp);
2055 hfsmp = VFSTOHFS(mp);
2056
2057 if (!(hfsmp->jnl))
2058 return (ENOTSUP);
2059
2060 vfsp = vfs_statfs(mp);
2061
2062 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2063 !kauth_cred_issuser(cred))
2064 return (EACCES);
2065
2066 return hfs_freeze(hfsmp);
2067 }
2068
2069 case F_THAW_FS: {
2070 vfsp = vfs_statfs(vnode_mount(vp));
2071 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2072 !kauth_cred_issuser(cred))
2073 return (EACCES);
2074
2075 return hfs_thaw(hfsmp, current_proc());
2076 }
2077
2078 case HFSIOC_EXT_BULKACCESS32:
2079 case HFSIOC_EXT_BULKACCESS64: {
2080 int size;
2081 #if CONFIG_HFS_STD
2082 if (hfsmp->hfs_flags & HFS_STANDARD) {
2083 return EINVAL;
2084 }
2085 #endif
2086
2087 if (is64bit) {
2088 size = sizeof(struct user64_ext_access_t);
2089 } else {
2090 size = sizeof(struct user32_ext_access_t);
2091 }
2092
2093 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2094 }
2095
2096 case HFSIOC_SET_XATTREXTENTS_STATE: {
2097 int state;
2098
2099 if (ap->a_data == NULL) {
2100 return (EINVAL);
2101 }
2102
2103 state = *(int *)ap->a_data;
2104
2105 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2106 return (EROFS);
2107 }
2108
2109 /* Super-user can enable or disable extent-based extended
2110 * attribute support on a volume
2111 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2112 * are enabled by default, so any change will be transient only
2113 * till the volume is remounted.
2114 */
2115 if (!kauth_cred_issuser(kauth_cred_get())) {
2116 return (EPERM);
2117 }
2118 if (state == 0 || state == 1)
2119 return hfs_set_volxattr(hfsmp, HFSIOC_SET_XATTREXTENTS_STATE, state);
2120 else
2121 return (EINVAL);
2122 }
2123
2124 case F_SETSTATICCONTENT: {
2125 int error;
2126 int enable_static = 0;
2127 struct cnode *cp = NULL;
2128 /*
2129 * lock the cnode, decorate the cnode flag, and bail out.
2130 * VFS should have already authenticated the caller for us.
2131 */
2132
2133 if (ap->a_data) {
2134 /*
2135 * Note that even though ap->a_data is of type caddr_t,
2136 * the fcntl layer at the syscall handler will pass in NULL
2137 * or 1 depending on what the argument supplied to the fcntl
2138 * was. So it is in fact correct to check the ap->a_data
2139 * argument for zero or non-zero value when deciding whether or not
2140 * to enable the static bit in the cnode.
2141 */
2142 enable_static = 1;
2143 }
2144 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2145 return EROFS;
2146 }
2147 cp = VTOC(vp);
2148
2149 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2150 if (error == 0) {
2151 if (enable_static) {
2152 cp->c_flag |= C_SSD_STATIC;
2153 }
2154 else {
2155 cp->c_flag &= ~C_SSD_STATIC;
2156 }
2157 hfs_unlock (cp);
2158 }
2159 return error;
2160 }
2161
2162 case F_SET_GREEDY_MODE: {
2163 int error;
2164 int enable_greedy_mode = 0;
2165 struct cnode *cp = NULL;
2166 /*
2167 * lock the cnode, decorate the cnode flag, and bail out.
2168 * VFS should have already authenticated the caller for us.
2169 */
2170
2171 if (ap->a_data) {
2172 /*
2173 * Note that even though ap->a_data is of type caddr_t,
2174 * the fcntl layer at the syscall handler will pass in NULL
2175 * or 1 depending on what the argument supplied to the fcntl
2176 * was. So it is in fact correct to check the ap->a_data
2177 * argument for zero or non-zero value when deciding whether or not
2178 * to enable the greedy mode bit in the cnode.
2179 */
2180 enable_greedy_mode = 1;
2181 }
2182 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2183 return EROFS;
2184 }
2185 cp = VTOC(vp);
2186
2187 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2188 if (error == 0) {
2189 if (enable_greedy_mode) {
2190 cp->c_flag |= C_SSD_GREEDY_MODE;
2191 }
2192 else {
2193 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2194 }
2195 hfs_unlock (cp);
2196 }
2197 return error;
2198 }
2199
2200 case F_SETIOTYPE: {
2201 int error;
2202 uint32_t iotypeflag = 0;
2203
2204 struct cnode *cp = NULL;
2205 /*
2206 * lock the cnode, decorate the cnode flag, and bail out.
2207 * VFS should have already authenticated the caller for us.
2208 */
2209
2210 if (ap->a_data == NULL) {
2211 return EINVAL;
2212 }
2213
2214 /*
2215 * Note that even though ap->a_data is of type caddr_t, we
2216 * can only use 32 bits of flag values.
2217 */
2218 iotypeflag = (uint32_t) ap->a_data;
2219 switch (iotypeflag) {
2220 case F_IOTYPE_ISOCHRONOUS:
2221 break;
2222 default:
2223 return EINVAL;
2224 }
2225
2226
2227 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2228 return EROFS;
2229 }
2230 cp = VTOC(vp);
2231
2232 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2233 if (error == 0) {
2234 switch (iotypeflag) {
2235 case F_IOTYPE_ISOCHRONOUS:
2236 cp->c_flag |= C_IO_ISOCHRONOUS;
2237 break;
2238 default:
2239 break;
2240 }
2241 hfs_unlock (cp);
2242 }
2243 return error;
2244 }
2245
2246 case F_MAKECOMPRESSED: {
2247 int error = 0;
2248 uint32_t gen_counter;
2249 struct cnode *cp = NULL;
2250 int reset_decmp = 0;
2251
2252 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2253 return EROFS;
2254 }
2255
2256 /*
2257 * acquire & lock the cnode.
2258 * VFS should have already authenticated the caller for us.
2259 */
2260
2261 if (ap->a_data) {
2262 /*
2263 * Cast the pointer into a uint32_t so we can extract the
2264 * supplied generation counter.
2265 */
2266 gen_counter = *((uint32_t*)ap->a_data);
2267 }
2268 else {
2269 return EINVAL;
2270 }
2271
2272 #if HFS_COMPRESSION
2273 cp = VTOC(vp);
2274 /* Grab truncate lock first; we may truncate the file */
2275 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2276
2277 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2278 if (error) {
2279 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2280 return error;
2281 }
2282
2283 /* Are there any other usecounts/FDs? */
2284 if (vnode_isinuse(vp, 1)) {
2285 hfs_unlock(cp);
2286 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2287 return EBUSY;
2288 }
2289
2290 /* now we have the cnode locked down; Validate arguments */
2291 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2292 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2293 hfs_unlock(cp);
2294 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2295 return EINVAL;
2296 }
2297
2298 if ((hfs_get_gencount (cp)) == gen_counter) {
2299 /*
2300 * OK, the gen_counter matched. Go for it:
2301 * Toggle state bits, truncate file, and suppress mtime update
2302 */
2303 reset_decmp = 1;
2304 cp->c_bsdflags |= UF_COMPRESSED;
2305
2306 error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
2307 ap->a_context);
2308 }
2309 else {
2310 error = ESTALE;
2311 }
2312
2313 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2314 hfs_unlock(cp);
2315
2316 /*
2317 * Reset the decmp state while still holding the truncate lock. We need to
2318 * serialize here against a listxattr on this node which may occur at any
2319 * time.
2320 *
2321 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2322 * that will still potentially require getting the com.apple.decmpfs EA. If the
2323 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2324 * generic(through VFS), and can't pass along any info telling it that we're already
2325 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2326 * and trying to fill in the hfs_file_is_compressed info during the callback
2327 * operation, which will result in deadlock against the b-tree node.
2328 *
2329 * So, to serialize against listxattr (which will grab buf_t meta references on
2330 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2331 * decmpfs payload.
2332 */
2333 if ((reset_decmp) && (error == 0)) {
2334 decmpfs_cnode *dp = VTOCMP (vp);
2335 if (dp != NULL) {
2336 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2337 }
2338
2339 /* Initialize the decmpfs node as needed */
2340 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2341 }
2342
2343 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2344
2345 #endif
2346 return error;
2347 }
2348
2349 case F_SETBACKINGSTORE: {
2350
2351 int error = 0;
2352
2353 /*
2354 * See comment in F_SETSTATICCONTENT re: using
2355 * a null check for a_data
2356 */
2357 if (ap->a_data) {
2358 error = hfs_set_backingstore (vp, 1);
2359 }
2360 else {
2361 error = hfs_set_backingstore (vp, 0);
2362 }
2363
2364 return error;
2365 }
2366
2367 case F_GETPATH_MTMINFO: {
2368 int error = 0;
2369
2370 int *data = (int*) ap->a_data;
2371
2372 /* Ask if this is a backingstore vnode */
2373 error = hfs_is_backingstore (vp, data);
2374
2375 return error;
2376 }
2377
2378 case F_FULLFSYNC: {
2379 int error;
2380
2381 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2382 return (EROFS);
2383 }
2384 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2385 if (error == 0) {
2386 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p);
2387 hfs_unlock(VTOC(vp));
2388 }
2389
2390 return error;
2391 }
2392
2393 case F_BARRIERFSYNC: {
2394 int error;
2395
2396 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2397 return (EROFS);
2398 }
2399 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2400 if (error == 0) {
2401 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p);
2402 hfs_unlock(VTOC(vp));
2403 }
2404
2405 return error;
2406 }
2407
2408 case F_CHKCLEAN: {
2409 register struct cnode *cp;
2410 int error;
2411
2412 if (!vnode_isreg(vp))
2413 return EINVAL;
2414
2415 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2416 if (error == 0) {
2417 cp = VTOC(vp);
2418 /*
2419 * used by regression test to determine if
2420 * all the dirty pages (via write) have been cleaned
2421 * after a call to 'fsysnc'.
2422 */
2423 error = is_file_clean(vp, VTOF(vp)->ff_size);
2424 hfs_unlock(cp);
2425 }
2426 return (error);
2427 }
2428
2429 case F_RDADVISE: {
2430 register struct radvisory *ra;
2431 struct filefork *fp;
2432 int error;
2433
2434 if (!vnode_isreg(vp))
2435 return EINVAL;
2436
2437 ra = (struct radvisory *)(ap->a_data);
2438 fp = VTOF(vp);
2439
2440 /* Protect against a size change. */
2441 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2442
2443 #if HFS_COMPRESSION
2444 if (compressed) {
2445 if (uncompressed_size == -1) {
2446 /* fetching the uncompressed size failed above, so return the error */
2447 error = decmpfs_error;
2448 } else if (ra->ra_offset >= uncompressed_size) {
2449 error = EFBIG;
2450 } else {
2451 error = advisory_read(vp, uncompressed_size, ra->ra_offset, ra->ra_count);
2452 }
2453 } else
2454 #endif /* HFS_COMPRESSION */
2455 if (ra->ra_offset >= fp->ff_size) {
2456 error = EFBIG;
2457 } else {
2458 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
2459 }
2460
2461 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
2462 return (error);
2463 }
2464
2465 case HFSIOC_GET_VOL_CREATE_TIME_32: {
2466 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2467 return 0;
2468 }
2469
2470 case HFSIOC_GET_VOL_CREATE_TIME_64: {
2471 *(user64_time_t *)(ap->a_data) = (user64_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2472 return 0;
2473 }
2474
2475 case SPOTLIGHT_IOC_GET_MOUNT_TIME:
2476 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2477 break;
2478
2479 case SPOTLIGHT_IOC_GET_LAST_MTIME:
2480 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2481 break;
2482
2483 case HFSIOC_GET_VERY_LOW_DISK:
2484 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2485 break;
2486
2487 case HFSIOC_SET_VERY_LOW_DISK:
2488 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2489 return EINVAL;
2490 }
2491
2492 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2493 break;
2494
2495 case HFSIOC_GET_LOW_DISK:
2496 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2497 break;
2498
2499 case HFSIOC_SET_LOW_DISK:
2500 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2501 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2502
2503 return EINVAL;
2504 }
2505
2506 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2507 break;
2508
2509 /* The following two fsctls were ported from apfs. */
2510 case APFSIOC_GET_NEAR_LOW_DISK:
2511 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_nearwarninglimit;
2512 break;
2513
2514 case APFSIOC_SET_NEAR_LOW_DISK:
2515 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2516 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2517 return EINVAL;
2518 }
2519
2520 hfsmp->hfs_freespace_notify_nearwarninglimit = *(uint32_t *)ap->a_data;
2521 break;
2522
2523 case HFSIOC_GET_DESIRED_DISK:
2524 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2525 break;
2526
2527 case HFSIOC_SET_DESIRED_DISK:
2528 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2529 return EINVAL;
2530 }
2531
2532 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2533 break;
2534
2535 case HFSIOC_VOLUME_STATUS:
2536 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2537 break;
2538
2539 case HFS_SET_BOOT_INFO:
2540 if (!vnode_isvroot(vp))
2541 return(EINVAL);
2542 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2543 return(EACCES); /* must be superuser or owner of filesystem */
2544 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2545 return (EROFS);
2546 }
2547 hfs_lock_mount (hfsmp);
2548 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2549 /* Null out the cached UUID, to be safe */
2550 uuid_clear (hfsmp->hfs_full_uuid);
2551 hfs_unlock_mount (hfsmp);
2552 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
2553 break;
2554
2555 case HFS_GET_BOOT_INFO:
2556 if (!vnode_isvroot(vp))
2557 return(EINVAL);
2558 hfs_lock_mount (hfsmp);
2559 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2560 hfs_unlock_mount(hfsmp);
2561 break;
2562
2563 /* case HFS_MARK_BOOT_CORRUPT: _IO are the same */
2564 case HFSIOC_MARK_BOOT_CORRUPT:
2565 /* Mark the boot volume corrupt by setting
2566 * kHFSVolumeInconsistentBit in the volume header. This will
2567 * force fsck_hfs on next mount.
2568 */
2569 if (!kauth_cred_issuser(kauth_cred_get())) {
2570 return EACCES;
2571 }
2572
2573 /* Allowed only on the root vnode of the boot volume */
2574 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2575 !vnode_isvroot(vp)) {
2576 return EINVAL;
2577 }
2578 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2579 return (EROFS);
2580 }
2581 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2582 hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
2583 break;
2584
2585 case HFSIOC_GET_JOURNAL_INFO:
2586 jip = (struct hfs_journal_info*)ap->a_data;
2587
2588 if (vp == NULLVP)
2589 return EINVAL;
2590
2591 if (hfsmp->jnl == NULL) {
2592 jnl_start = 0;
2593 jnl_size = 0;
2594 } else {
2595 jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset;
2596 jnl_size = hfsmp->jnl_size;
2597 }
2598
2599 jip->jstart = jnl_start;
2600 jip->jsize = jnl_size;
2601 break;
2602
2603 case HFSIOC_SET_ALWAYS_ZEROFILL: {
2604 struct cnode *cp = VTOC(vp);
2605
2606 if (*(int *)ap->a_data) {
2607 cp->c_flag |= C_ALWAYS_ZEROFILL;
2608 } else {
2609 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2610 }
2611 break;
2612 }
2613
2614 /* case HFS_DISABLE_METAZONE: _IO are the same */
2615 case HFSIOC_DISABLE_METAZONE: {
2616 /* Only root can disable metadata zone */
2617 if (!kauth_cred_issuser(kauth_cred_get())) {
2618 return EACCES;
2619 }
2620 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2621 return (EROFS);
2622 }
2623
2624 /* Disable metadata zone now */
2625 (void) hfs_metadatazone_init(hfsmp, true);
2626 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2627 break;
2628 }
2629
2630
2631 case HFSIOC_FSINFO_METADATA_BLOCKS: {
2632 int error;
2633 struct hfsinfo_metadata *hinfo;
2634
2635 hinfo = (struct hfsinfo_metadata *)ap->a_data;
2636
2637 /* Get information about number of metadata blocks */
2638 error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
2639 if (error) {
2640 return error;
2641 }
2642
2643 break;
2644 }
2645
2646 case HFSIOC_GET_FSINFO: {
2647 hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
2648
2649 /* Only root is allowed to get fsinfo */
2650 if (!kauth_cred_issuser(kauth_cred_get())) {
2651 return EACCES;
2652 }
2653
2654 /*
2655 * Make sure that the caller's version number matches with
2656 * the kernel's version number. This will make sure that
2657 * if the structures being read/written into are changed
2658 * by the kernel, the caller will not read incorrect data.
2659 *
2660 * The first three fields --- request_type, version and
2661 * flags are same for all the hfs_fsinfo structures, so
2662 * we can access the version number by assuming any
2663 * structure for now.
2664 */
2665 if (fsinfo->header.version != HFS_FSINFO_VERSION) {
2666 return ENOTSUP;
2667 }
2668
2669 /* Make sure that the current file system is not marked inconsistent */
2670 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2671 return EIO;
2672 }
2673
2674 return hfs_get_fsinfo(hfsmp, ap->a_data);
2675 }
2676
2677 case HFSIOC_CS_FREESPACE_TRIM: {
2678 int error = 0;
2679 int lockflags = 0;
2680
2681 /* Only root allowed */
2682 if (!kauth_cred_issuser(kauth_cred_get())) {
2683 return EACCES;
2684 }
2685
2686 /*
2687 * This core functionality is similar to hfs_scan_blocks().
2688 * The main difference is that hfs_scan_blocks() is called
2689 * as part of mount where we are assured that the journal is
2690 * empty to start with. This fcntl() can be called on a
2691 * mounted volume, therefore it has to flush the content of
2692 * the journal as well as ensure the state of summary table.
2693 *
2694 * This fcntl scans over the entire allocation bitmap,
2695 * creates list of all the free blocks, and issues TRIM
2696 * down to the underlying device. This can take long time
2697 * as it can generate up to 512MB of read I/O.
2698 */
2699
2700 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
2701 error = hfs_init_summary(hfsmp);
2702 if (error) {
2703 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
2704 return error;
2705 }
2706 }
2707
2708 /*
2709 * The journal maintains list of recently deallocated blocks to
2710 * issue DKIOCUNMAPs when the corresponding journal transaction is
2711 * flushed to the disk. To avoid any race conditions, we only
2712 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2713 * Therefore we make sure that the journal trim list is sync'ed,
2714 * empty, and not modifiable for the duration of our scan.
2715 *
2716 * Take the journal lock before flushing the journal to the disk.
2717 * We will keep on holding the journal lock till we don't get the
2718 * bitmap lock to make sure that no new journal transactions can
2719 * start. This will make sure that the journal trim list is not
2720 * modified after the journal flush and before getting bitmap lock.
2721 * We can release the journal lock after we acquire the bitmap
2722 * lock as it will prevent any further block deallocations.
2723 */
2724 hfs_journal_lock(hfsmp);
2725
2726 /* Flush the journal and wait for all I/Os to finish up */
2727 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
2728 if (error) {
2729 hfs_journal_unlock(hfsmp);
2730 return error;
2731 }
2732
2733 /* Take bitmap lock to ensure it is not being modified */
2734 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2735
2736 /* Release the journal lock */
2737 hfs_journal_unlock(hfsmp);
2738
2739 /*
2740 * ScanUnmapBlocks reads the bitmap in large block size
2741 * (up to 1MB) unlike the runtime which reads the bitmap
2742 * in the 4K block size. This can cause buf_t collisions
2743 * and potential data corruption. To avoid this, we
2744 * invalidate all the existing buffers associated with
2745 * the bitmap vnode before scanning it.
2746 *
2747 * Note: ScanUnmapBlock() cleans up all the buffers
2748 * after itself, so there won't be any large buffers left
2749 * for us to clean up after it returns.
2750 */
2751 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
2752 if (error) {
2753 hfs_systemfile_unlock(hfsmp, lockflags);
2754 return error;
2755 }
2756
2757 /* Traverse bitmap and issue DKIOCUNMAPs */
2758 error = ScanUnmapBlocks(hfsmp);
2759 hfs_systemfile_unlock(hfsmp, lockflags);
2760 if (error) {
2761 return error;
2762 }
2763
2764 break;
2765 }
2766
2767 case HFSIOC_SET_HOTFILE_STATE: {
2768 int error;
2769 struct cnode *cp = VTOC(vp);
2770 uint32_t hf_state = *((uint32_t*)ap->a_data);
2771 uint32_t num_unpinned = 0;
2772
2773 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2774 if (error) {
2775 return error;
2776 }
2777
2778 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2779 if (hf_state == HFS_MARK_FASTDEVCANDIDATE) {
2780 vnode_setfastdevicecandidate(vp);
2781
2782 cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
2783 cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask;
2784 cp->c_flag |= C_MODIFIED;
2785 } else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2786 vnode_clearfastdevicecandidate(vp);
2787 hfs_removehotfile(vp);
2788
2789 if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) {
2790 hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned);
2791 }
2792
2793 if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2794 cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
2795 }
2796 cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask);
2797 cp->c_flag |= C_MODIFIED;
2798
2799 } else {
2800 error = EINVAL;
2801 }
2802
2803 if (num_unpinned != 0) {
2804 lck_mtx_lock(&hfsmp->hfc_mutex);
2805 hfsmp->hfs_hotfile_freeblks += num_unpinned;
2806 lck_mtx_unlock(&hfsmp->hfc_mutex);
2807 }
2808
2809 hfs_unlock(cp);
2810 return error;
2811 }
2812
2813 case HFSIOC_REPIN_HOTFILE_STATE: {
2814 int error=0;
2815 uint32_t repin_what = *((uint32_t*)ap->a_data);
2816
2817 /* Only root allowed */
2818 if (!kauth_cred_issuser(kauth_cred_get())) {
2819 return EACCES;
2820 }
2821
2822 if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) {
2823 // this system is neither regular Fusion or Cooperative Fusion
2824 // so this fsctl makes no sense.
2825 return EINVAL;
2826 }
2827
2828 //
2829 // After a converting a CoreStorage volume to be encrypted, the
2830 // extents could have moved around underneath us. This call
2831 // allows corestoraged to re-pin everything that should be
2832 // pinned (it would happen on the next reboot too but that could
2833 // be a long time away).
2834 //
2835 if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) {
2836 hfs_pin_fs_metadata(hfsmp);
2837 }
2838 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
2839 hfs_repin_hotfiles(hfsmp);
2840 }
2841 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) {
2842 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2843 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2844 }
2845
2846 return error;
2847 }
2848
2849 #if HFS_CONFIG_KEY_ROLL
2850
2851 case HFSIOC_KEY_ROLL: {
2852 if (!kauth_cred_issuser(kauth_cred_get()))
2853 return EACCES;
2854
2855 hfs_key_roll_args_t *args = (hfs_key_roll_args_t *)ap->a_data;
2856
2857 return hfs_key_roll_op(ap->a_context, ap->a_vp, args);
2858 }
2859
2860 case HFSIOC_GET_KEY_AUTO_ROLL: {
2861 if (!kauth_cred_issuser(kauth_cred_get()))
2862 return EACCES;
2863
2864 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2865 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2866 return ENOTSUP;
2867 args->flags = (ISSET(hfsmp->cproot_flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION)
2868 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION : 0);
2869 args->min_key_os_version = hfsmp->hfs_auto_roll_min_key_os_version;
2870 args->max_key_os_version = hfsmp->hfs_auto_roll_max_key_os_version;
2871 break;
2872 }
2873
2874 case HFSIOC_SET_KEY_AUTO_ROLL: {
2875 if (!kauth_cred_issuser(kauth_cred_get()))
2876 return EACCES;
2877
2878 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2879 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2880 return ENOTSUP;
2881 return cp_set_auto_roll(hfsmp, args);
2882 }
2883
2884 #endif // HFS_CONFIG_KEY_ROLL
2885
2886 #if CONFIG_PROTECT
2887 case F_TRANSCODEKEY:
2888 /*
2889 * This API is only supported when called via kernel so
2890 * a_fflag must be set to 1 (it's not possible to get here
2891 * with it set to 1 via fsctl).
2892 */
2893 if (ap->a_fflag != 1)
2894 return ENOTTY;
2895 return cp_vnode_transcode(vp, (cp_key_t *)ap->a_data);
2896
2897 case F_GETPROTECTIONLEVEL:
2898 return cp_get_root_major_vers (vp, (uint32_t *)ap->a_data);
2899
2900 case F_GETDEFAULTPROTLEVEL:
2901 return cp_get_default_level(vp, (uint32_t *)ap->a_data);
2902 #endif // CONFIG_PROTECT
2903
2904 case FIOPINSWAP:
2905 return hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT | HFS_DATALESS_PIN,
2906 NULL);
2907
2908 case FSIOC_CAS_BSDFLAGS: {
2909 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2910 return (EROFS);
2911 }
2912
2913 #if 0
2914 struct fsioc_cas_bsdflags *cas = (void *)ap->a_data;
2915 struct cnode *cp = VTOC(vp);
2916 u_int32_t document_id = 0;
2917 int decmpfs_reset_state = 0;
2918 int error;
2919
2920 /* Don't allow modification of the journal. */
2921 if (hfs_is_journal_file(hfsmp, cp)) {
2922 return (EPERM);
2923 }
2924
2925 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2926 return (error);
2927 }
2928
2929 cas->actual_flags = cp->c_bsdflags;
2930 if (cas->actual_flags != cas->expected_flags) {
2931 hfs_unlock(cp);
2932 return (0);
2933 }
2934
2935 //
2936 // Check if we'll need a document_id. If so, we need to drop the lock
2937 // (to avoid any possible deadlock with the root vnode which has to get
2938 // locked to get the document id), generate the document_id, re-acquire
2939 // the lock, and perform the CAS check again. We do it in this sequence
2940 // in order to avoid throwing away document_ids in the case where the
2941 // CAS check fails. Note that it can still happen, but by performing
2942 // the check first, hopefully we can reduce the ocurrence.
2943 //
2944 if ((cas->new_flags & UF_TRACKED) && !(VTOC(vp)->c_bsdflags & UF_TRACKED)) {
2945 struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&(VTOC(vp)->c_attr.ca_finderinfo) + 16);
2946 //
2947 // If the document_id is not set, get a new one. It will be set
2948 // on the file down below once we hold the cnode lock.
2949 //
2950 if (fip->document_id == 0) {
2951 //
2952 // Drat, we have to generate one. Unlock the cnode, do the
2953 // deed, re-lock the cnode, and then to the CAS check again
2954 // to see if we lost the race.
2955 //
2956 hfs_unlock(cp);
2957 if (hfs_generate_document_id(hfsmp, &document_id) != 0) {
2958 document_id = 0;
2959 }
2960 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2961 return (error);
2962 }
2963 cas->actual_flags = cp->c_bsdflags;
2964 if (cas->actual_flags != cas->expected_flags) {
2965 hfs_unlock(cp);
2966 return (0);
2967 }
2968 }
2969 }
2970
2971 bool setting_compression = false;
2972
2973 if (!(cas->actual_flags & UF_COMPRESSED) && (cas->new_flags & UF_COMPRESSED))
2974 setting_compression = true;
2975
2976 if (setting_compression) {
2977 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2978 if (VTOF(vp)->ff_size) {
2979 // hfs_truncate will deal with the cnode lock
2980 error = hfs_truncate(vp, 0, IO_NDELAY, 0, ap->a_context);
2981 }
2982 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2983 }
2984
2985 if (!error)
2986 error = hfs_set_bsd_flags(hfsmp, cp, cas->new_flags,
2987 document_id, ap->a_context,
2988 &decmpfs_reset_state);
2989 if (error == 0) {
2990 error = hfs_update(vp, 0);
2991 }
2992 hfs_unlock(cp);
2993 if (error) {
2994 return (error);
2995 }
2996
2997 #if HFS_COMPRESSION
2998 if (decmpfs_reset_state) {
2999 /*
3000 * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode
3001 * but don't do it while holding the hfs cnode lock
3002 */
3003 decmpfs_cnode *dp = VTOCMP(vp);
3004 if (!dp) {
3005 /*
3006 * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode
3007 * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes
3008 * on this file if it's locked
3009 */
3010 dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp));
3011 if (!dp) {
3012 /* failed to allocate a decmpfs_cnode */
3013 return ENOMEM; /* what should this be? */
3014 }
3015 }
3016 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
3017 }
3018 #endif
3019 break;
3020 #endif
3021 return ENOTSUP;
3022 }
3023
3024 default:
3025 return (ENOTTY);
3026 }
3027
3028 return 0;
3029 }
3030
3031 /*
3032 * select
3033 */
3034 int
3035 hfs_vnop_select(__unused struct vnop_select_args *ap)
3036 /*
3037 struct vnop_select_args {
3038 vnode_t a_vp;
3039 int a_which;
3040 int a_fflags;
3041 void *a_wql;
3042 vfs_context_t a_context;
3043 };
3044 */
3045 {
3046 /*
3047 * We should really check to see if I/O is possible.
3048 */
3049 return (1);
3050 }
3051
3052 /*
3053 * Converts a logical block number to a physical block, and optionally returns
3054 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
3055 * The physical block number is based on the device block size, currently its 512.
3056 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
3057 */
3058 int
3059 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
3060 {
3061 struct filefork *fp = VTOF(vp);
3062 struct hfsmount *hfsmp = VTOHFS(vp);
3063 int retval = E_NONE;
3064 u_int32_t logBlockSize;
3065 size_t bytesContAvail = 0;
3066 off_t blockposition;
3067 int lockExtBtree;
3068 int lockflags = 0;
3069
3070 /*
3071 * Check for underlying vnode requests and ensure that logical
3072 * to physical mapping is requested.
3073 */
3074 if (vpp != NULL)
3075 *vpp = hfsmp->hfs_devvp;
3076 if (bnp == NULL)
3077 return (0);
3078
3079 logBlockSize = GetLogicalBlockSize(vp);
3080 blockposition = (off_t)bn * logBlockSize;
3081
3082 lockExtBtree = overflow_extents(fp);
3083
3084 if (lockExtBtree)
3085 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
3086
3087 retval = MacToVFSError(
3088 MapFileBlockC (HFSTOVCB(hfsmp),
3089 (FCB*)fp,
3090 MAXPHYSIO,
3091 blockposition,
3092 bnp,
3093 &bytesContAvail));
3094
3095 if (lockExtBtree)
3096 hfs_systemfile_unlock(hfsmp, lockflags);
3097
3098 if (retval == E_NONE) {
3099 /* Figure out how many read ahead blocks there are */
3100 if (runp != NULL) {
3101 if (can_cluster(logBlockSize)) {
3102 /* Make sure this result never goes negative: */
3103 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
3104 } else {
3105 *runp = 0;
3106 }
3107 }
3108 }
3109 return (retval);
3110 }
3111
3112 /*
3113 * Convert logical block number to file offset.
3114 */
3115 int
3116 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
3117 /*
3118 struct vnop_blktooff_args {
3119 vnode_t a_vp;
3120 daddr64_t a_lblkno;
3121 off_t *a_offset;
3122 };
3123 */
3124 {
3125 if (ap->a_vp == NULL)
3126 return (EINVAL);
3127 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
3128
3129 return(0);
3130 }
3131
3132 /*
3133 * Convert file offset to logical block number.
3134 */
3135 int
3136 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
3137 /*
3138 struct vnop_offtoblk_args {
3139 vnode_t a_vp;
3140 off_t a_offset;
3141 daddr64_t *a_lblkno;
3142 };
3143 */
3144 {
3145 if (ap->a_vp == NULL)
3146 return (EINVAL);
3147 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
3148
3149 return(0);
3150 }
3151
3152 /*
3153 * Map file offset to physical block number.
3154 *
3155 * If this function is called for write operation, and if the file
3156 * had virtual blocks allocated (delayed allocation), real blocks
3157 * are allocated by calling ExtendFileC().
3158 *
3159 * If this function is called for read operation, and if the file
3160 * had virtual blocks allocated (delayed allocation), no change
3161 * to the size of file is done, and if required, rangelist is
3162 * searched for mapping.
3163 *
3164 * System file cnodes are expected to be locked (shared or exclusive).
3165 *
3166 * -- INVALID RANGES --
3167 *
3168 * Invalid ranges are used to keep track of where we have extended a
3169 * file, but have not yet written that data to disk. In the past we
3170 * would clear up the invalid ranges as we wrote to those areas, but
3171 * before data was actually flushed to disk. The problem with that
3172 * approach is that the data can be left in the cache and is therefore
3173 * still not valid on disk. So now we clear up the ranges here, when
3174 * the flags field has VNODE_WRITE set, indicating a write is about to
3175 * occur. This isn't ideal (ideally we want to clear them up when
3176 * know the data has been successfully written), but it's the best we
3177 * can do.
3178 *
3179 * For reads, we use the invalid ranges here in block map to indicate
3180 * to the caller that the data should be zeroed (a_bpn == -1). We
3181 * have to be careful about what ranges we return to the cluster code.
3182 * Currently the cluster code can only handle non-rounded values for
3183 * the EOF; it cannot handle funny sized ranges in the middle of the
3184 * file (the main problem is that it sends down odd sized I/Os to the
3185 * disk). Our code currently works because whilst the very first
3186 * offset and the last offset in the invalid ranges are not aligned,
3187 * gaps in the invalid ranges between the first and last, have to be
3188 * aligned (because we always write page sized blocks). For example,
3189 * consider this arrangement:
3190 *
3191 * +-------------+-----+-------+------+
3192 * | |XXXXX| |XXXXXX|
3193 * +-------------+-----+-------+------+
3194 * a b c d
3195 *
3196 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3197 * are not necessarily aligned, b and c *must* be.
3198 *
3199 * Zero-filling occurs in a number of ways:
3200 *
3201 * 1. When a read occurs and we return with a_bpn == -1.
3202 *
3203 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3204 * which will cause us to iterate over the ranges bringing in
3205 * pages that are not present in the cache and zeroing them. Any
3206 * pages that are already in the cache are left untouched. Note
3207 * that hfs_fsync does not always flush invalid ranges.
3208 *
3209 * 3. When we extend a file we zero out from the old EOF to the end
3210 * of the page. It would be nice if we didn't have to do this if
3211 * the page wasn't present (and could defer it), but because of
3212 * the problem described above, we have to.
3213 *
3214 * The invalid ranges are also used to restrict the size that we write
3215 * out on disk: see hfs_prepare_fork_for_update.
3216 *
3217 * Note that invalid ranges are ignored when neither the VNODE_READ or
3218 * the VNODE_WRITE flag is specified. This is useful for the
3219 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3220 * just want to know whether blocks are physically allocated or not.
3221 */
3222 int
3223 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
3224 /*
3225 struct vnop_blockmap_args {
3226 vnode_t a_vp;
3227 off_t a_foffset;
3228 size_t a_size;
3229 daddr64_t *a_bpn;
3230 size_t *a_run;
3231 void *a_poff;
3232 int a_flags;
3233 vfs_context_t a_context;
3234 };
3235 */
3236 {
3237 struct vnode *vp = ap->a_vp;
3238 struct cnode *cp;
3239 struct filefork *fp;
3240 struct hfsmount *hfsmp;
3241 size_t bytesContAvail = ap->a_size;
3242 int retval = E_NONE;
3243 int syslocks = 0;
3244 int lockflags = 0;
3245 struct rl_entry *invalid_range;
3246 enum rl_overlaptype overlaptype;
3247 int started_tr = 0;
3248 int tooklock = 0;
3249
3250 #if HFS_COMPRESSION
3251 if (VNODE_IS_RSRC(vp)) {
3252 /* allow blockmaps to the resource fork */
3253 } else {
3254 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
3255 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
3256 switch(state) {
3257 case FILE_IS_COMPRESSED:
3258 return ENOTSUP;
3259 case FILE_IS_CONVERTING:
3260 /* if FILE_IS_CONVERTING, we allow blockmap */
3261 break;
3262 default:
3263 printf("invalid state %d for compressed file\n", state);
3264 /* fall through */
3265 }
3266 }
3267 }
3268 #endif /* HFS_COMPRESSION */
3269
3270 /* Do not allow blockmap operation on a directory */
3271 if (vnode_isdir(vp)) {
3272 return (ENOTSUP);
3273 }
3274
3275 /*
3276 * Check for underlying vnode requests and ensure that logical
3277 * to physical mapping is requested.
3278 */
3279 if (ap->a_bpn == NULL)
3280 return (0);
3281
3282 hfsmp = VTOHFS(vp);
3283 cp = VTOC(vp);
3284 fp = VTOF(vp);
3285
3286 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
3287 if (cp->c_lockowner != current_thread()) {
3288 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3289 tooklock = 1;
3290 }
3291
3292 // For reads, check the invalid ranges
3293 if (ISSET(ap->a_flags, VNODE_READ)) {
3294 if (ap->a_foffset >= fp->ff_size) {
3295 retval = ERANGE;
3296 goto exit;
3297 }
3298
3299 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
3300 ap->a_foffset + (off_t)bytesContAvail - 1,
3301 &invalid_range);
3302 switch(overlaptype) {
3303 case RL_MATCHINGOVERLAP:
3304 case RL_OVERLAPCONTAINSRANGE:
3305 case RL_OVERLAPSTARTSBEFORE:
3306 /* There's no valid block for this byte offset */
3307 *ap->a_bpn = (daddr64_t)-1;
3308 /* There's no point limiting the amount to be returned
3309 * if the invalid range that was hit extends all the way
3310 * to the EOF (i.e. there's no valid bytes between the
3311 * end of this range and the file's EOF):
3312 */
3313 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3314 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3315 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3316 }
3317
3318 retval = 0;
3319 goto exit;
3320
3321 case RL_OVERLAPISCONTAINED:
3322 case RL_OVERLAPENDSAFTER:
3323 /* The range of interest hits an invalid block before the end: */
3324 if (invalid_range->rl_start == ap->a_foffset) {
3325 /* There's actually no valid information to be had starting here: */
3326 *ap->a_bpn = (daddr64_t)-1;
3327 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3328 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3329 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3330 }
3331
3332 retval = 0;
3333 goto exit;
3334 } else {
3335 /*
3336 * Sadly, the lower layers don't like us to
3337 * return unaligned ranges, so we skip over
3338 * any invalid ranges here that are less than
3339 * a page: zeroing of those bits is not our
3340 * responsibility (it's dealt with elsewhere).
3341 */
3342 do {
3343 off_t rounded_start = round_page_64(invalid_range->rl_start);
3344 if ((off_t)bytesContAvail < rounded_start - ap->a_foffset)
3345 break;
3346 if (rounded_start < invalid_range->rl_end + 1) {
3347 bytesContAvail = rounded_start - ap->a_foffset;
3348 break;
3349 }
3350 } while ((invalid_range = TAILQ_NEXT(invalid_range,
3351 rl_link)));
3352 }
3353 break;
3354
3355 case RL_NOOVERLAP:
3356 break;
3357 } // switch
3358 }
3359 }
3360
3361 #if CONFIG_PROTECT
3362 if (cp->c_cpentry) {
3363 const int direction = (ISSET(ap->a_flags, VNODE_WRITE)
3364 ? VNODE_WRITE : VNODE_READ);
3365
3366 cp_io_params_t io_params;
3367 cp_io_params(hfsmp, cp->c_cpentry,
3368 off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)),
3369 direction, &io_params);
3370
3371 if (io_params.max_len < (off_t)bytesContAvail)
3372 bytesContAvail = io_params.max_len;
3373
3374 if (io_params.phys_offset != -1) {
3375 *ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset)
3376 / hfsmp->hfs_logical_block_size);
3377
3378 retval = 0;
3379 goto exit;
3380 }
3381 }
3382 #endif
3383
3384 retry:
3385
3386 /* Check virtual blocks only when performing write operation */
3387 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3388 if (hfs_start_transaction(hfsmp) != 0) {
3389 retval = EINVAL;
3390 goto exit;
3391 } else {
3392 started_tr = 1;
3393 }
3394 syslocks = SFL_EXTENTS | SFL_BITMAP;
3395
3396 } else if (overflow_extents(fp)) {
3397 syslocks = SFL_EXTENTS;
3398 }
3399
3400 if (syslocks)
3401 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
3402
3403 /*
3404 * Check for any delayed allocations.
3405 */
3406 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3407 int64_t actbytes;
3408 u_int32_t loanedBlocks;
3409
3410 //
3411 // Make sure we have a transaction. It's possible
3412 // that we came in and fp->ff_unallocblocks was zero
3413 // but during the time we blocked acquiring the extents
3414 // btree, ff_unallocblocks became non-zero and so we
3415 // will need to start a transaction.
3416 //
3417 if (started_tr == 0) {
3418 if (syslocks) {
3419 hfs_systemfile_unlock(hfsmp, lockflags);
3420 syslocks = 0;
3421 }
3422 goto retry;
3423 }
3424
3425 /*
3426 * Note: ExtendFileC will Release any blocks on loan and
3427 * aquire real blocks. So we ask to extend by zero bytes
3428 * since ExtendFileC will account for the virtual blocks.
3429 */
3430
3431 loanedBlocks = fp->ff_unallocblocks;
3432 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
3433 kEFAllMask | kEFNoClumpMask, &actbytes);
3434
3435 if (retval) {
3436 fp->ff_unallocblocks = loanedBlocks;
3437 cp->c_blocks += loanedBlocks;
3438 fp->ff_blocks += loanedBlocks;
3439
3440 hfs_lock_mount (hfsmp);
3441 hfsmp->loanedBlocks += loanedBlocks;
3442 hfs_unlock_mount (hfsmp);
3443
3444 hfs_systemfile_unlock(hfsmp, lockflags);
3445 cp->c_flag |= C_MODIFIED;
3446 if (started_tr) {
3447 (void) hfs_update(vp, 0);
3448 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3449
3450 hfs_end_transaction(hfsmp);
3451 started_tr = 0;
3452 }
3453 goto exit;
3454 }
3455 }
3456
3457 retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
3458 ap->a_bpn, &bytesContAvail);
3459 if (syslocks) {
3460 hfs_systemfile_unlock(hfsmp, lockflags);
3461 syslocks = 0;
3462 }
3463
3464 if (retval) {
3465 /* On write, always return error because virtual blocks, if any,
3466 * should have been allocated in ExtendFileC(). We do not
3467 * allocate virtual blocks on read, therefore return error
3468 * only if no virtual blocks are allocated. Otherwise we search
3469 * rangelist for zero-fills
3470 */
3471 if ((MacToVFSError(retval) != ERANGE) ||
3472 (ap->a_flags & VNODE_WRITE) ||
3473 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
3474 goto exit;
3475 }
3476
3477 /* Validate if the start offset is within logical file size */
3478 if (ap->a_foffset >= fp->ff_size) {
3479 goto exit;
3480 }
3481
3482 /*
3483 * At this point, we have encountered a failure during
3484 * MapFileBlockC that resulted in ERANGE, and we are not
3485 * servicing a write, and there are borrowed blocks.
3486 *
3487 * However, the cluster layer will not call blockmap for
3488 * blocks that are borrowed and in-cache. We have to assume
3489 * that because we observed ERANGE being emitted from
3490 * MapFileBlockC, this extent range is not valid on-disk. So
3491 * we treat this as a mapping that needs to be zero-filled
3492 * prior to reading.
3493 */
3494
3495 if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail)
3496 bytesContAvail = fp->ff_size - ap->a_foffset;
3497
3498 *ap->a_bpn = (daddr64_t) -1;
3499 retval = 0;
3500
3501 goto exit;
3502 }
3503
3504 exit:
3505 if (retval == 0) {
3506 if (ISSET(ap->a_flags, VNODE_WRITE)) {
3507 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
3508
3509 // See if we might be overlapping invalid ranges...
3510 if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) {
3511 /*
3512 * Mark the file as needing an update if we think the
3513 * on-disk EOF has changed.
3514 */
3515 if (ap->a_foffset <= r->rl_start)
3516 SET(cp->c_flag, C_MODIFIED);
3517
3518 /*
3519 * This isn't the ideal place to put this. Ideally, we
3520 * should do something *after* we have successfully
3521 * written to the range, but that's difficult to do
3522 * because we cannot take locks in the callback. At
3523 * present, the cluster code will call us with VNODE_WRITE
3524 * set just before it's about to write the data so we know
3525 * that data is about to be written. If we get an I/O
3526 * error at this point then chances are the metadata
3527 * update to follow will also have an I/O error so the
3528 * risk here is small.
3529 */
3530 rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1,
3531 &fp->ff_invalidranges);
3532
3533 if (!TAILQ_FIRST(&fp->ff_invalidranges)) {
3534 cp->c_flag &= ~C_ZFWANTSYNC;
3535 cp->c_zftimeout = 0;
3536 }
3537 }
3538 }
3539
3540 if (ap->a_run)
3541 *ap->a_run = bytesContAvail;
3542
3543 if (ap->a_poff)
3544 *(int *)ap->a_poff = 0;
3545 }
3546
3547 if (started_tr) {
3548 hfs_update(vp, TRUE);
3549 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3550 hfs_end_transaction(hfsmp);
3551 started_tr = 0;
3552 }
3553
3554 if (tooklock)
3555 hfs_unlock(cp);
3556
3557 return (MacToVFSError(retval));
3558 }
3559
3560 /*
3561 * prepare and issue the I/O
3562 * buf_strategy knows how to deal
3563 * with requests that require
3564 * fragmented I/Os
3565 */
3566 int
3567 hfs_vnop_strategy(struct vnop_strategy_args *ap)
3568 {
3569 buf_t bp = ap->a_bp;
3570 vnode_t vp = buf_vnode(bp);
3571 int error = 0;
3572
3573 /* Mark buffer as containing static data if cnode flag set */
3574 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
3575 buf_markstatic(bp);
3576 }
3577
3578 /* Mark buffer as containing static data if cnode flag set */
3579 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
3580 bufattr_markgreedymode(buf_attr(bp));
3581 }
3582
3583 /* mark buffer as containing burst mode data if cnode flag set */
3584 if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
3585 bufattr_markisochronous(buf_attr(bp));
3586 }
3587
3588 #if CONFIG_PROTECT
3589 error = cp_handle_strategy(bp);
3590
3591 if (error)
3592 return error;
3593 #endif
3594
3595 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
3596
3597 return error;
3598 }
3599
3600 int
3601 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
3602 {
3603 register struct cnode *cp = VTOC(vp);
3604 struct filefork *fp = VTOF(vp);
3605 kauth_cred_t cred = vfs_context_ucred(context);
3606 int retval;
3607 off_t bytesToAdd;
3608 off_t actualBytesAdded;
3609 off_t filebytes;
3610 u_int32_t fileblocks;
3611 int blksize;
3612 struct hfsmount *hfsmp;
3613 int lockflags;
3614 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
3615
3616 blksize = VTOVCB(vp)->blockSize;
3617 fileblocks = fp->ff_blocks;
3618 filebytes = (off_t)fileblocks * (off_t)blksize;
3619
3620 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
3621 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3622
3623 if (length < 0)
3624 return (EINVAL);
3625
3626 /* This should only happen with a corrupt filesystem */
3627 if ((off_t)fp->ff_size < 0)
3628 return (EINVAL);
3629
3630 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3631 return (EFBIG);
3632
3633 hfsmp = VTOHFS(vp);
3634
3635 retval = E_NONE;
3636
3637 /* Files that are changing size are not hot file candidates. */
3638 if (hfsmp->hfc_stage == HFC_RECORDING) {
3639 fp->ff_bytesread = 0;
3640 }
3641
3642 /*
3643 * We cannot just check if fp->ff_size == length (as an optimization)
3644 * since there may be extra physical blocks that also need truncation.
3645 */
3646 #if QUOTA
3647 if ((retval = hfs_getinoquota(cp)))
3648 return(retval);
3649 #endif /* QUOTA */
3650
3651 /*
3652 * Lengthen the size of the file. We must ensure that the
3653 * last byte of the file is allocated. Since the smallest
3654 * value of ff_size is 0, length will be at least 1.
3655 */
3656 if (length > (off_t)fp->ff_size) {
3657 #if QUOTA
3658 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
3659 cred, 0);
3660 if (retval)
3661 goto Err_Exit;
3662 #endif /* QUOTA */
3663 /*
3664 * If we don't have enough physical space then
3665 * we need to extend the physical size.
3666 */
3667 if (length > filebytes) {
3668 int eflags;
3669 u_int32_t blockHint = 0;
3670
3671 /* All or nothing and don't round up to clumpsize. */
3672 eflags = kEFAllMask | kEFNoClumpMask;
3673
3674 if (cred && (suser(cred, NULL) != 0)) {
3675 eflags |= kEFReserveMask; /* keep a reserve */
3676 }
3677
3678 /*
3679 * Allocate Journal and Quota files in metadata zone.
3680 */
3681 if (filebytes == 0 &&
3682 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3683 hfs_virtualmetafile(cp)) {
3684 eflags |= kEFMetadataMask;
3685 blockHint = hfsmp->hfs_metazone_start;
3686 }
3687 if (hfs_start_transaction(hfsmp) != 0) {
3688 retval = EINVAL;
3689 goto Err_Exit;
3690 }
3691
3692 /* Protect extents b-tree and allocation bitmap */
3693 lockflags = SFL_BITMAP;
3694 if (overflow_extents(fp))
3695 lockflags |= SFL_EXTENTS;
3696 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3697
3698 /*
3699 * Keep growing the file as long as the current EOF is
3700 * less than the desired value.
3701 */
3702 while ((length > filebytes) && (retval == E_NONE)) {
3703 bytesToAdd = length - filebytes;
3704 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3705 (FCB*)fp,
3706 bytesToAdd,
3707 blockHint,
3708 eflags,
3709 &actualBytesAdded));
3710
3711 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3712 if (actualBytesAdded == 0 && retval == E_NONE) {
3713 if (length > filebytes)
3714 length = filebytes;
3715 break;
3716 }
3717 } /* endwhile */
3718
3719 hfs_systemfile_unlock(hfsmp, lockflags);
3720
3721 if (hfsmp->jnl) {
3722 hfs_update(vp, 0);
3723 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3724 }
3725
3726 hfs_end_transaction(hfsmp);
3727
3728 if (retval)
3729 goto Err_Exit;
3730
3731 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3732 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3733 }
3734
3735 if (ISSET(flags, IO_NOZEROFILL)) {
3736 // An optimisation for the hibernation file
3737 if (vnode_isswap(vp))
3738 rl_remove_all(&fp->ff_invalidranges);
3739 } else {
3740 if (!vnode_issystem(vp) && retval == E_NONE) {
3741 if (length > (off_t)fp->ff_size) {
3742 struct timeval tv;
3743
3744 /* Extending the file: time to fill out the current last page w. zeroes? */
3745 if (fp->ff_size & PAGE_MASK_64) {
3746 /* There might be some valid data at the start of the (current) last page
3747 of the file, so zero out the remainder of that page to ensure the
3748 entire page contains valid data. */
3749 hfs_unlock(cp);
3750 retval = hfs_zero_eof_page(vp, length);
3751 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3752 if (retval) goto Err_Exit;
3753 }
3754 microuptime(&tv);
3755 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
3756 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3757 }
3758 } else {
3759 panic("hfs_truncate: invoked on non-UBC object?!");
3760 };
3761 }
3762 if (suppress_times == 0) {
3763 cp->c_touch_modtime = TRUE;
3764 }
3765 fp->ff_size = length;
3766
3767 } else { /* Shorten the size of the file */
3768
3769 // An optimisation for the hibernation file
3770 if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
3771 rl_remove_all(&fp->ff_invalidranges);
3772 } else if ((off_t)fp->ff_size > length) {
3773 /* Any space previously marked as invalid is now irrelevant: */
3774 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3775 }
3776
3777 /*
3778 * Account for any unmapped blocks. Note that the new
3779 * file length can still end up with unmapped blocks.
3780 */
3781 if (fp->ff_unallocblocks > 0) {
3782 u_int32_t finalblks;
3783 u_int32_t loanedBlocks;
3784
3785 hfs_lock_mount(hfsmp);
3786 loanedBlocks = fp->ff_unallocblocks;
3787 cp->c_blocks -= loanedBlocks;
3788 fp->ff_blocks -= loanedBlocks;
3789 fp->ff_unallocblocks = 0;
3790
3791 hfsmp->loanedBlocks -= loanedBlocks;
3792
3793 finalblks = (length + blksize - 1) / blksize;
3794 if (finalblks > fp->ff_blocks) {
3795 /* calculate required unmapped blocks */
3796 loanedBlocks = finalblks - fp->ff_blocks;
3797 hfsmp->loanedBlocks += loanedBlocks;
3798
3799 fp->ff_unallocblocks = loanedBlocks;
3800 cp->c_blocks += loanedBlocks;
3801 fp->ff_blocks += loanedBlocks;
3802 }
3803 hfs_unlock_mount (hfsmp);
3804 }
3805
3806 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3807 if (hfs_start_transaction(hfsmp) != 0) {
3808 retval = EINVAL;
3809 goto Err_Exit;
3810 }
3811
3812 if (fp->ff_unallocblocks == 0) {
3813 /* Protect extents b-tree and allocation bitmap */
3814 lockflags = SFL_BITMAP;
3815 if (overflow_extents(fp))
3816 lockflags |= SFL_EXTENTS;
3817 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3818
3819 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3820 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
3821
3822 hfs_systemfile_unlock(hfsmp, lockflags);
3823 }
3824 if (hfsmp->jnl) {
3825 if (retval == 0) {
3826 fp->ff_size = length;
3827 }
3828 hfs_update(vp, 0);
3829 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3830 }
3831 hfs_end_transaction(hfsmp);
3832
3833 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3834 if (retval)
3835 goto Err_Exit;
3836 #if QUOTA
3837 /* These are bytesreleased */
3838 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3839 #endif /* QUOTA */
3840
3841 //
3842 // Unlike when growing a file, we adjust the hotfile block count here
3843 // instead of deeper down in the block allocation code because we do
3844 // not necessarily have a vnode or "fcb" at the time we're deleting
3845 // the file and so we wouldn't know if it was hotfile cached or not
3846 //
3847 hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize));
3848
3849
3850 /*
3851 * Only set update flag if the logical length changes & we aren't
3852 * suppressing modtime updates.
3853 */
3854 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
3855 cp->c_touch_modtime = TRUE;
3856 }
3857 fp->ff_size = length;
3858 }
3859 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3860 if (!vfs_context_issuser(context))
3861 cp->c_mode &= ~(S_ISUID | S_ISGID);
3862 }
3863 cp->c_flag |= C_MODIFIED;
3864 cp->c_touch_chgtime = TRUE; /* status changed */
3865 if (suppress_times == 0) {
3866 cp->c_touch_modtime = TRUE; /* file data was modified */
3867
3868 /*
3869 * If we are not suppressing the modtime update, then
3870 * update the gen count as well.
3871 */
3872 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3873 hfs_incr_gencount(cp);
3874 }
3875 }
3876
3877 retval = hfs_update(vp, 0);
3878 if (retval) {
3879 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3880 -1, -1, -1, retval, 0);
3881 }
3882
3883 Err_Exit:
3884
3885 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
3886 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
3887
3888 return (retval);
3889 }
3890
3891 /*
3892 * Preparation which must be done prior to deleting the catalog record
3893 * of a file or directory. In order to make the on-disk as safe as possible,
3894 * we remove the catalog entry before releasing the bitmap blocks and the
3895 * overflow extent records. However, some work must be done prior to deleting
3896 * the catalog record.
3897 *
3898 * When calling this function, the cnode must exist both in memory and on-disk.
3899 * If there are both resource fork and data fork vnodes, this function should
3900 * be called on both.
3901 */
3902
3903 int
3904 hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3905
3906 struct filefork *fp = VTOF(vp);
3907 struct cnode *cp = VTOC(vp);
3908 #if QUOTA
3909 int retval = 0;
3910 #endif /* QUOTA */
3911
3912 /* Cannot truncate an HFS directory! */
3913 if (vnode_isdir(vp)) {
3914 return (EISDIR);
3915 }
3916
3917 /*
3918 * See the comment below in hfs_truncate for why we need to call
3919 * setsize here. Essentially we want to avoid pending IO if we
3920 * already know that the blocks are going to be released here.
3921 * This function is only called when totally removing all storage for a file, so
3922 * we can take a shortcut and immediately setsize (0);
3923 */
3924 ubc_setsize(vp, 0);
3925
3926 /* This should only happen with a corrupt filesystem */
3927 if ((off_t)fp->ff_size < 0)
3928 return (EINVAL);
3929
3930 /*
3931 * We cannot just check if fp->ff_size == length (as an optimization)
3932 * since there may be extra physical blocks that also need truncation.
3933 */
3934 #if QUOTA
3935 if ((retval = hfs_getinoquota(cp))) {
3936 return(retval);
3937 }
3938 #endif /* QUOTA */
3939
3940 /* Wipe out any invalid ranges which have yet to be backed by disk */
3941 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3942
3943 /*
3944 * Account for any unmapped blocks. Since we're deleting the
3945 * entire file, we don't have to worry about just shrinking
3946 * to a smaller number of borrowed blocks.
3947 */
3948 if (fp->ff_unallocblocks > 0) {
3949 u_int32_t loanedBlocks;
3950
3951 hfs_lock_mount (hfsmp);
3952 loanedBlocks = fp->ff_unallocblocks;
3953 cp->c_blocks -= loanedBlocks;
3954 fp->ff_blocks -= loanedBlocks;
3955 fp->ff_unallocblocks = 0;
3956
3957 hfsmp->loanedBlocks -= loanedBlocks;
3958
3959 hfs_unlock_mount (hfsmp);
3960 }
3961
3962 return 0;
3963 }
3964
3965
3966 /*
3967 * Special wrapper around calling TruncateFileC. This function is useable
3968 * even when the catalog record does not exist any longer, making it ideal
3969 * for use when deleting a file. The simplification here is that we know
3970 * that we are releasing all blocks.
3971 *
3972 * Note that this function may be called when there is no vnode backing
3973 * the file fork in question. We may call this from hfs_vnop_inactive
3974 * to clear out resource fork data (and may not want to clear out the data
3975 * fork yet). As a result, we pointer-check both sets of inputs before
3976 * doing anything with them.
3977 *
3978 * The caller is responsible for saving off a copy of the filefork(s)
3979 * embedded within the cnode prior to calling this function. The pointers
3980 * supplied as arguments must be valid even if the cnode is no longer valid.
3981 */
3982
3983 int
3984 hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3985 struct filefork *rsrcfork, u_int32_t fileid) {
3986
3987 off_t filebytes;
3988 u_int32_t fileblocks;
3989 int blksize = 0;
3990 int error = 0;
3991 int lockflags;
3992
3993 blksize = hfsmp->blockSize;
3994
3995 /* Data Fork */
3996 if (datafork) {
3997 off_t prev_filebytes;
3998
3999 datafork->ff_size = 0;
4000
4001 fileblocks = datafork->ff_blocks;
4002 filebytes = (off_t)fileblocks * (off_t)blksize;
4003 prev_filebytes = filebytes;
4004
4005 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4006
4007 while (filebytes > 0) {
4008 if (filebytes > HFS_BIGFILE_SIZE) {
4009 filebytes -= HFS_BIGFILE_SIZE;
4010 } else {
4011 filebytes = 0;
4012 }
4013
4014 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4015 if (hfs_start_transaction(hfsmp) != 0) {
4016 error = EINVAL;
4017 break;
4018 }
4019
4020 if (datafork->ff_unallocblocks == 0) {
4021 /* Protect extents b-tree and allocation bitmap */
4022 lockflags = SFL_BITMAP;
4023 if (overflow_extents(datafork))
4024 lockflags |= SFL_EXTENTS;
4025 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4026
4027 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
4028
4029 hfs_systemfile_unlock(hfsmp, lockflags);
4030 }
4031 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4032
4033 struct cnode *cp = datafork ? FTOC(datafork) : NULL;
4034 struct vnode *vp;
4035 vp = cp ? CTOV(cp, 0) : NULL;
4036 hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize));
4037 prev_filebytes = filebytes;
4038
4039 /* Finish the transaction and start over if necessary */
4040 hfs_end_transaction(hfsmp);
4041
4042 if (error) {
4043 break;
4044 }
4045 }
4046 }
4047
4048 /* Resource fork */
4049 if (error == 0 && rsrcfork) {
4050 rsrcfork->ff_size = 0;
4051
4052 fileblocks = rsrcfork->ff_blocks;
4053 filebytes = (off_t)fileblocks * (off_t)blksize;
4054
4055 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4056
4057 while (filebytes > 0) {
4058 if (filebytes > HFS_BIGFILE_SIZE) {
4059 filebytes -= HFS_BIGFILE_SIZE;
4060 } else {
4061 filebytes = 0;
4062 }
4063
4064 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4065 if (hfs_start_transaction(hfsmp) != 0) {
4066 error = EINVAL;
4067 break;
4068 }
4069
4070 if (rsrcfork->ff_unallocblocks == 0) {
4071 /* Protect extents b-tree and allocation bitmap */
4072 lockflags = SFL_BITMAP;
4073 if (overflow_extents(rsrcfork))
4074 lockflags |= SFL_EXTENTS;
4075 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4076
4077 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
4078
4079 hfs_systemfile_unlock(hfsmp, lockflags);
4080 }
4081 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4082
4083 /* Finish the transaction and start over if necessary */
4084 hfs_end_transaction(hfsmp);
4085
4086 if (error) {
4087 break;
4088 }
4089 }
4090 }
4091
4092 return error;
4093 }
4094
4095 errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
4096 {
4097 errno_t error;
4098
4099 /*
4100 * Call ubc_setsize to give the VM subsystem a chance to do
4101 * whatever it needs to with existing pages before we delete
4102 * blocks. Note that symlinks don't use the UBC so we'll
4103 * get back ENOENT in that case.
4104 */
4105 if (have_cnode_lock) {
4106 error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
4107 if (error == EAGAIN) {
4108 cnode_t *cp = VTOC(vp);
4109
4110 if (cp->c_truncatelockowner != current_thread())
4111 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
4112
4113 hfs_unlock(cp);
4114 error = ubc_setsize_ex(vp, len, 0);
4115 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
4116 }
4117 } else
4118 error = ubc_setsize_ex(vp, len, 0);
4119
4120 return error == ENOENT ? 0 : error;
4121 }
4122
4123 /*
4124 * Truncate a cnode to at most length size, freeing (or adding) the
4125 * disk blocks.
4126 */
4127 int
4128 hfs_truncate(struct vnode *vp, off_t length, int flags,
4129 int truncateflags, vfs_context_t context)
4130 {
4131 struct filefork *fp = VTOF(vp);
4132 off_t filebytes;
4133 u_int32_t fileblocks;
4134 int blksize;
4135 errno_t error = 0;
4136 struct cnode *cp = VTOC(vp);
4137 hfsmount_t *hfsmp = VTOHFS(vp);
4138
4139 /* Cannot truncate an HFS directory! */
4140 if (vnode_isdir(vp)) {
4141 return (EISDIR);
4142 }
4143 /* A swap file cannot change size. */
4144 if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
4145 return (EPERM);
4146 }
4147
4148 blksize = hfsmp->blockSize;
4149 fileblocks = fp->ff_blocks;
4150 filebytes = (off_t)fileblocks * (off_t)blksize;
4151
4152 bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
4153
4154 error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
4155 if (error)
4156 return error;
4157
4158 if (!caller_has_cnode_lock) {
4159 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4160 if (error)
4161 return error;
4162 }
4163
4164 if (vnode_islnk(vp) && cp->c_datafork->ff_symlinkptr) {
4165 hfs_free(cp->c_datafork->ff_symlinkptr, cp->c_datafork->ff_size);
4166 cp->c_datafork->ff_symlinkptr = NULL;
4167 }
4168
4169 // have to loop truncating or growing files that are
4170 // really big because otherwise transactions can get
4171 // enormous and consume too many kernel resources.
4172
4173 if (length < filebytes) {
4174 while (filebytes > length) {
4175 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
4176 filebytes -= HFS_BIGFILE_SIZE;
4177 } else {
4178 filebytes = length;
4179 }
4180 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4181 if (error)
4182 break;
4183 }
4184 } else if (length > filebytes) {
4185 kauth_cred_t cred = vfs_context_ucred(context);
4186 const bool keep_reserve = cred && suser(cred, NULL) != 0;
4187
4188 if (hfs_freeblks(hfsmp, keep_reserve)
4189 < howmany(length - filebytes, blksize)) {
4190 error = ENOSPC;
4191 } else {
4192 while (filebytes < length) {
4193 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
4194 filebytes += HFS_BIGFILE_SIZE;
4195 } else {
4196 filebytes = length;
4197 }
4198 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4199 if (error)
4200 break;
4201 }
4202 }
4203 } else /* Same logical size */ {
4204
4205 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
4206 }
4207 /* Files that are changing size are not hot file candidates. */
4208 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4209 fp->ff_bytesread = 0;
4210 }
4211
4212 #if HFS_CONFIG_KEY_ROLL
4213 if (!error && cp->c_truncatelockowner == current_thread()) {
4214 hfs_key_roll_check(cp, true);
4215 }
4216 #endif
4217
4218 if (!caller_has_cnode_lock)
4219 hfs_unlock(cp);
4220
4221 // Make sure UBC's size matches up (in case we didn't completely succeed)
4222 errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
4223 if (!error)
4224 error = err2;
4225
4226 return error;
4227 }
4228
4229
4230 /*
4231 * Preallocate file storage space.
4232 */
4233 int
4234 hfs_vnop_allocate(struct vnop_allocate_args /* {
4235 vnode_t a_vp;
4236 off_t a_length;
4237 u_int32_t a_flags;
4238 off_t *a_bytesallocated;
4239 off_t a_offset;
4240 vfs_context_t a_context;
4241 } */ *ap)
4242 {
4243 struct vnode *vp = ap->a_vp;
4244 struct cnode *cp;
4245 struct filefork *fp;
4246 ExtendedVCB *vcb;
4247 off_t length = ap->a_length;
4248 off_t startingPEOF;
4249 off_t moreBytesRequested;
4250 off_t actualBytesAdded;
4251 off_t filebytes;
4252 u_int32_t fileblocks;
4253 int retval, retval2;
4254 u_int32_t blockHint;
4255 u_int32_t extendFlags; /* For call to ExtendFileC */
4256 struct hfsmount *hfsmp;
4257 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
4258 int lockflags;
4259 time_t orig_ctime;
4260
4261 *(ap->a_bytesallocated) = 0;
4262
4263 if (!vnode_isreg(vp))
4264 return (EISDIR);
4265 if (length < (off_t)0)
4266 return (EINVAL);
4267
4268 cp = VTOC(vp);
4269
4270 orig_ctime = VTOC(vp)->c_ctime;
4271
4272 nspace_snapshot_event(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
4273
4274 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4275
4276 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4277 goto Err_Exit;
4278 }
4279
4280 fp = VTOF(vp);
4281 hfsmp = VTOHFS(vp);
4282 vcb = VTOVCB(vp);
4283
4284 fileblocks = fp->ff_blocks;
4285 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
4286
4287 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
4288 retval = EINVAL;
4289 goto Err_Exit;
4290 }
4291
4292 /* Fill in the flags word for the call to Extend the file */
4293
4294 extendFlags = kEFNoClumpMask;
4295 if (ap->a_flags & ALLOCATECONTIG)
4296 extendFlags |= kEFContigMask;
4297 if (ap->a_flags & ALLOCATEALL)
4298 extendFlags |= kEFAllMask;
4299 if (cred && suser(cred, NULL) != 0)
4300 extendFlags |= kEFReserveMask;
4301 if (hfs_virtualmetafile(cp))
4302 extendFlags |= kEFMetadataMask;
4303
4304 retval = E_NONE;
4305 blockHint = 0;
4306 startingPEOF = filebytes;
4307
4308 if (ap->a_flags & ALLOCATEFROMPEOF)
4309 length += filebytes;
4310 else if (ap->a_flags & ALLOCATEFROMVOL)
4311 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
4312
4313 /* If no changes are necesary, then we're done */
4314 if (filebytes == length)
4315 goto Std_Exit;
4316
4317 /*
4318 * Lengthen the size of the file. We must ensure that the
4319 * last byte of the file is allocated. Since the smallest
4320 * value of filebytes is 0, length will be at least 1.
4321 */
4322 if (length > filebytes) {
4323 if (ISSET(extendFlags, kEFAllMask)
4324 && (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask))
4325 < howmany(length - filebytes, hfsmp->blockSize))) {
4326 retval = ENOSPC;
4327 goto Err_Exit;
4328 }
4329
4330 off_t total_bytes_added = 0, orig_request_size;
4331
4332 orig_request_size = moreBytesRequested = length - filebytes;
4333
4334 #if QUOTA
4335 retval = hfs_chkdq(cp,
4336 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
4337 cred, 0);
4338 if (retval)
4339 goto Err_Exit;
4340
4341 #endif /* QUOTA */
4342 /*
4343 * Metadata zone checks.
4344 */
4345 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
4346 /*
4347 * Allocate Journal and Quota files in metadata zone.
4348 */
4349 if (hfs_virtualmetafile(cp)) {
4350 blockHint = hfsmp->hfs_metazone_start;
4351 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
4352 (blockHint <= hfsmp->hfs_metazone_end)) {
4353 /*
4354 * Move blockHint outside metadata zone.
4355 */
4356 blockHint = hfsmp->hfs_metazone_end + 1;
4357 }
4358 }
4359
4360
4361 while ((length > filebytes) && (retval == E_NONE)) {
4362 off_t bytesRequested;
4363
4364 if (hfs_start_transaction(hfsmp) != 0) {
4365 retval = EINVAL;
4366 goto Err_Exit;
4367 }
4368
4369 /* Protect extents b-tree and allocation bitmap */
4370 lockflags = SFL_BITMAP;
4371 if (overflow_extents(fp))
4372 lockflags |= SFL_EXTENTS;
4373 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4374
4375 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
4376 bytesRequested = HFS_BIGFILE_SIZE;
4377 } else {
4378 bytesRequested = moreBytesRequested;
4379 }
4380
4381 if (extendFlags & kEFContigMask) {
4382 // if we're on a sparse device, this will force it to do a
4383 // full scan to find the space needed.
4384 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
4385 }
4386
4387 retval = MacToVFSError(ExtendFileC(vcb,
4388 (FCB*)fp,
4389 bytesRequested,
4390 blockHint,
4391 extendFlags,
4392 &actualBytesAdded));
4393
4394 if (retval == E_NONE) {
4395 *(ap->a_bytesallocated) += actualBytesAdded;
4396 total_bytes_added += actualBytesAdded;
4397 moreBytesRequested -= actualBytesAdded;
4398 if (blockHint != 0) {
4399 blockHint += actualBytesAdded / vcb->blockSize;
4400 }
4401 }
4402 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4403
4404 hfs_systemfile_unlock(hfsmp, lockflags);
4405
4406 if (hfsmp->jnl) {
4407 (void) hfs_update(vp, 0);
4408 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4409 }
4410
4411 hfs_end_transaction(hfsmp);
4412 }
4413
4414
4415 /*
4416 * if we get an error and no changes were made then exit
4417 * otherwise we must do the hfs_update to reflect the changes
4418 */
4419 if (retval && (startingPEOF == filebytes))
4420 goto Err_Exit;
4421
4422 /*
4423 * Adjust actualBytesAdded to be allocation block aligned, not
4424 * clump size aligned.
4425 * NOTE: So what we are reporting does not affect reality
4426 * until the file is closed, when we truncate the file to allocation
4427 * block size.
4428 */
4429 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
4430 *(ap->a_bytesallocated) =
4431 roundup(orig_request_size, (off_t)vcb->blockSize);
4432
4433 } else { /* Shorten the size of the file */
4434
4435 /*
4436 * N.B. At present, this code is never called. If and when we
4437 * do start using it, it looks like there might be slightly
4438 * strange semantics with the file size: it's possible for the
4439 * file size to *increase* e.g. if current file size is 5,
4440 * length is 1024 and filebytes is 4096, the file size will
4441 * end up being 1024 bytes. This isn't necessarily a problem
4442 * but it's not consistent with the code above which doesn't
4443 * change the file size.
4444 */
4445
4446 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
4447 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4448
4449 /*
4450 * if we get an error and no changes were made then exit
4451 * otherwise we must do the hfs_update to reflect the changes
4452 */
4453 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
4454 #if QUOTA
4455 /* These are bytesreleased */
4456 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
4457 #endif /* QUOTA */
4458
4459 if (fp->ff_size > filebytes) {
4460 fp->ff_size = filebytes;
4461
4462 hfs_ubc_setsize(vp, fp->ff_size, true);
4463 }
4464 }
4465
4466 Std_Exit:
4467 cp->c_flag |= C_MODIFIED;
4468 cp->c_touch_chgtime = TRUE;
4469 cp->c_touch_modtime = TRUE;
4470 retval2 = hfs_update(vp, 0);
4471
4472 if (retval == 0)
4473 retval = retval2;
4474 Err_Exit:
4475 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4476 hfs_unlock(cp);
4477 return (retval);
4478 }
4479
4480
4481 /*
4482 * Pagein for HFS filesystem
4483 */
4484 int
4485 hfs_vnop_pagein(struct vnop_pagein_args *ap)
4486 /*
4487 struct vnop_pagein_args {
4488 vnode_t a_vp,
4489 upl_t a_pl,
4490 vm_offset_t a_pl_offset,
4491 off_t a_f_offset,
4492 size_t a_size,
4493 int a_flags
4494 vfs_context_t a_context;
4495 };
4496 */
4497 {
4498 vnode_t vp;
4499 struct cnode *cp;
4500 struct filefork *fp;
4501 int error = 0;
4502 upl_t upl;
4503 upl_page_info_t *pl;
4504 off_t f_offset;
4505 off_t page_needed_f_offset;
4506 int offset;
4507 int isize;
4508 int upl_size;
4509 int pg_index;
4510 boolean_t truncate_lock_held = FALSE;
4511 boolean_t file_converted = FALSE;
4512 kern_return_t kret;
4513
4514 vp = ap->a_vp;
4515 cp = VTOC(vp);
4516 fp = VTOF(vp);
4517
4518 #if CONFIG_PROTECT
4519 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
4520 /*
4521 * If we errored here, then this means that one of two things occurred:
4522 * 1. there was a problem with the decryption of the key.
4523 * 2. the device is locked and we are not allowed to access this particular file.
4524 *
4525 * Either way, this means that we need to shut down this upl now. As long as
4526 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4527 * then we create a upl and immediately abort it.
4528 */
4529 if (ap->a_pl == NULL) {
4530 /* create the upl */
4531 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
4532 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4533 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4534 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
4535
4536 /* Abort the range */
4537 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4538 }
4539
4540
4541 return error;
4542 }
4543 #endif /* CONFIG_PROTECT */
4544
4545 if (ap->a_pl != NULL) {
4546 /*
4547 * this can only happen for swap files now that
4548 * we're asking for V2 paging behavior...
4549 * so don't need to worry about decompression, or
4550 * keeping track of blocks read or taking the truncate lock
4551 */
4552 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
4553 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
4554 goto pagein_done;
4555 }
4556
4557 page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
4558
4559 retry_pagein:
4560 /*
4561 * take truncate lock (shared/recursive) to guard against
4562 * zero-fill thru fsync interfering, but only for v2
4563 *
4564 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4565 * lock shared and we are allowed to recurse 1 level if this thread already
4566 * owns the lock exclusively... this can legally occur
4567 * if we are doing a shrinking ftruncate against a file
4568 * that is mapped private, and the pages being truncated
4569 * do not currently exist in the cache... in that case
4570 * we will have to page-in the missing pages in order
4571 * to provide them to the private mapping... we must
4572 * also call hfs_unlock_truncate with a postive been_recursed
4573 * arg to indicate that if we have recursed, there is no need to drop
4574 * the lock. Allowing this simple recursion is necessary
4575 * in order to avoid a certain deadlock... since the ftruncate
4576 * already holds the truncate lock exclusively, if we try
4577 * to acquire it shared to protect the pagein path, we will
4578 * hang this thread
4579 *
4580 * NOTE: The if () block below is a workaround in order to prevent a
4581 * VM deadlock. See rdar://7853471.
4582 *
4583 * If we are in a forced unmount, then launchd will still have the
4584 * dyld_shared_cache file mapped as it is trying to reboot. If we
4585 * take the truncate lock here to service a page fault, then our
4586 * thread could deadlock with the forced-unmount. The forced unmount
4587 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4588 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4589 * thread will think it needs to copy all of the data out of the file
4590 * and into a VM copy object. If we hold the cnode lock here, then that
4591 * VM operation will not be able to proceed, because we'll set a busy page
4592 * before attempting to grab the lock. Note that this isn't as simple as "don't
4593 * call ubc_setsize" because doing that would just shift the problem to the
4594 * ubc_msync done before the vnode is reclaimed.
4595 *
4596 * So, if a forced unmount on this volume is in flight AND the cnode is
4597 * marked C_DELETED, then just go ahead and do the page in without taking
4598 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4599 * that is not going to be available on the next mount, this seems like a
4600 * OK solution from a correctness point of view, even though it is hacky.
4601 */
4602 if (vfs_isforce(vnode_mount(vp))) {
4603 if (cp->c_flag & C_DELETED) {
4604 /* If we don't get it, then just go ahead and operate without the lock */
4605 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4606 }
4607 }
4608 else {
4609 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4610 truncate_lock_held = TRUE;
4611 }
4612
4613 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4614
4615 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4616 error = EINVAL;
4617 goto pagein_done;
4618 }
4619 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4620
4621 upl_size = isize = ap->a_size;
4622
4623 /*
4624 * Scan from the back to find the last page in the UPL, so that we
4625 * aren't looking at a UPL that may have already been freed by the
4626 * preceding aborts/completions.
4627 */
4628 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4629 if (upl_page_present(pl, --pg_index))
4630 break;
4631 if (pg_index == 0) {
4632 /*
4633 * no absent pages were found in the range specified
4634 * just abort the UPL to get rid of it and then we're done
4635 */
4636 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4637 goto pagein_done;
4638 }
4639 }
4640 /*
4641 * initialize the offset variables before we touch the UPL.
4642 * f_offset is the position into the file, in bytes
4643 * offset is the position into the UPL, in bytes
4644 * pg_index is the pg# of the UPL we're operating on
4645 * isize is the offset into the UPL of the last page that is present.
4646 */
4647 isize = ((pg_index + 1) * PAGE_SIZE);
4648 pg_index = 0;
4649 offset = 0;
4650 f_offset = ap->a_f_offset;
4651
4652 while (isize) {
4653 int xsize;
4654 int num_of_pages;
4655
4656 if ( !upl_page_present(pl, pg_index)) {
4657 /*
4658 * we asked for RET_ONLY_ABSENT, so it's possible
4659 * to get back empty slots in the UPL.
4660 * just skip over them
4661 */
4662 f_offset += PAGE_SIZE;
4663 offset += PAGE_SIZE;
4664 isize -= PAGE_SIZE;
4665 pg_index++;
4666
4667 continue;
4668 }
4669 /*
4670 * We know that we have at least one absent page.
4671 * Now checking to see how many in a row we have
4672 */
4673 num_of_pages = 1;
4674 xsize = isize - PAGE_SIZE;
4675
4676 while (xsize) {
4677 if ( !upl_page_present(pl, pg_index + num_of_pages))
4678 break;
4679 num_of_pages++;
4680 xsize -= PAGE_SIZE;
4681 }
4682 xsize = num_of_pages * PAGE_SIZE;
4683
4684 #if HFS_COMPRESSION
4685 if (VNODE_IS_RSRC(vp)) {
4686 /* allow pageins of the resource fork */
4687 } else {
4688 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4689
4690 if (compressed) {
4691
4692 if (truncate_lock_held) {
4693 /*
4694 * can't hold the truncate lock when calling into the decmpfs layer
4695 * since it calls back into this layer... even though we're only
4696 * holding the lock in shared mode, and the re-entrant path only
4697 * takes the lock shared, we can deadlock if some other thread
4698 * tries to grab the lock exclusively in between.
4699 */
4700 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4701 truncate_lock_held = FALSE;
4702 }
4703 ap->a_pl = upl;
4704 ap->a_pl_offset = offset;
4705 ap->a_f_offset = f_offset;
4706 ap->a_size = xsize;
4707
4708 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4709 /*
4710 * note that decpfs_pagein_compressed can change the state of
4711 * 'compressed'... it will set it to 0 if the file is no longer
4712 * compressed once the compression lock is successfully taken
4713 * i.e. we would block on that lock while the file is being inflated
4714 */
4715 if (error == 0 && vnode_isfastdevicecandidate(vp)) {
4716 (void) hfs_addhotfile(vp);
4717 }
4718 if (compressed) {
4719 if (error == 0) {
4720 /* successful page-in, update the access time */
4721 VTOC(vp)->c_touch_acctime = TRUE;
4722
4723 //
4724 // compressed files are not traditional hot file candidates
4725 // but they may be for CF (which ignores the ff_bytesread
4726 // field)
4727 //
4728 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4729 fp->ff_bytesread = 0;
4730 }
4731 } else if (error == EAGAIN) {
4732 /*
4733 * EAGAIN indicates someone else already holds the compression lock...
4734 * to avoid deadlocking, we'll abort this range of pages with an
4735 * indication that the pagein needs to be redriven
4736 */
4737 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
4738 } else if (error == ENOSPC) {
4739
4740 if (upl_size == PAGE_SIZE)
4741 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4742
4743 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4744
4745 ap->a_size = PAGE_SIZE;
4746 ap->a_pl = NULL;
4747 ap->a_pl_offset = 0;
4748 ap->a_f_offset = page_needed_f_offset;
4749
4750 goto retry_pagein;
4751 } else {
4752 ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4753 goto pagein_done;
4754 }
4755 goto pagein_next_range;
4756 }
4757 else {
4758 /*
4759 * Set file_converted only if the file became decompressed while we were
4760 * paging in. If it were still compressed, we would re-start the loop using the goto
4761 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4762 * condition below, since we could have avoided taking the truncate lock to prevent
4763 * a deadlock in the force unmount case.
4764 */
4765 file_converted = TRUE;
4766 }
4767 }
4768 if (file_converted == TRUE) {
4769 /*
4770 * the file was converted back to a regular file after we first saw it as compressed
4771 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4772 * reset a_size so that we consider what remains of the original request
4773 * and null out a_upl and a_pl_offset.
4774 *
4775 * We should only be able to get into this block if the decmpfs_pagein_compressed
4776 * successfully decompressed the range in question for this file.
4777 */
4778 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4779
4780 ap->a_size = isize;
4781 ap->a_pl = NULL;
4782 ap->a_pl_offset = 0;
4783
4784 /* Reset file_converted back to false so that we don't infinite-loop. */
4785 file_converted = FALSE;
4786 goto retry_pagein;
4787 }
4788 }
4789 #endif
4790 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
4791
4792 /*
4793 * Keep track of blocks read.
4794 */
4795 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4796 int bytesread;
4797 int took_cnode_lock = 0;
4798
4799 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4800 bytesread = fp->ff_size;
4801 else
4802 bytesread = xsize;
4803
4804 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4805 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
4806 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4807 took_cnode_lock = 1;
4808 }
4809 /*
4810 * If this file hasn't been seen since the start of
4811 * the current sampling period then start over.
4812 */
4813 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4814 struct timeval tv;
4815
4816 fp->ff_bytesread = bytesread;
4817 microtime(&tv);
4818 cp->c_atime = tv.tv_sec;
4819 } else {
4820 fp->ff_bytesread += bytesread;
4821 }
4822 cp->c_touch_acctime = TRUE;
4823
4824 if (vnode_isfastdevicecandidate(vp)) {
4825 (void) hfs_addhotfile(vp);
4826 }
4827 if (took_cnode_lock)
4828 hfs_unlock(cp);
4829 }
4830 pagein_next_range:
4831 f_offset += xsize;
4832 offset += xsize;
4833 isize -= xsize;
4834 pg_index += num_of_pages;
4835
4836 error = 0;
4837 }
4838
4839 pagein_done:
4840 if (truncate_lock_held == TRUE) {
4841 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4842 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4843 }
4844
4845 return (error);
4846 }
4847
4848 /*
4849 * Pageout for HFS filesystem.
4850 */
4851 int
4852 hfs_vnop_pageout(struct vnop_pageout_args *ap)
4853 /*
4854 struct vnop_pageout_args {
4855 vnode_t a_vp,
4856 upl_t a_pl,
4857 vm_offset_t a_pl_offset,
4858 off_t a_f_offset,
4859 size_t a_size,
4860 int a_flags
4861 vfs_context_t a_context;
4862 };
4863 */
4864 {
4865 vnode_t vp = ap->a_vp;
4866 struct cnode *cp;
4867 struct filefork *fp;
4868 int retval = 0;
4869 off_t filesize;
4870 upl_t upl;
4871 upl_page_info_t* pl = NULL;
4872 vm_offset_t a_pl_offset;
4873 int a_flags;
4874 int is_pageoutv2 = 0;
4875 kern_return_t kret;
4876
4877 cp = VTOC(vp);
4878 fp = VTOF(vp);
4879
4880 a_flags = ap->a_flags;
4881 a_pl_offset = ap->a_pl_offset;
4882
4883 /*
4884 * we can tell if we're getting the new or old behavior from the UPL
4885 */
4886 if ((upl = ap->a_pl) == NULL) {
4887 int request_flags;
4888
4889 is_pageoutv2 = 1;
4890 /*
4891 * we're in control of any UPL we commit
4892 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4893 */
4894 a_flags &= ~UPL_NOCOMMIT;
4895 a_pl_offset = 0;
4896
4897 /*
4898 * For V2 semantics, we want to take the cnode truncate lock
4899 * shared to guard against the file size changing via zero-filling.
4900 *
4901 * However, we have to be careful because we may be invoked
4902 * via the ubc_msync path to write out dirty mmap'd pages
4903 * in response to a lock event on a content-protected
4904 * filesystem (e.g. to write out class A files).
4905 * As a result, we want to take the truncate lock 'SHARED' with
4906 * the mini-recursion locktype so that we don't deadlock/panic
4907 * because we may be already holding the truncate lock exclusive to force any other
4908 * IOs to have blocked behind us.
4909 */
4910 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4911
4912 if (a_flags & UPL_MSYNC) {
4913 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4914 }
4915 else {
4916 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4917 }
4918
4919 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
4920
4921 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4922 retval = EINVAL;
4923 goto pageout_done;
4924 }
4925 }
4926 /*
4927 * from this point forward upl points at the UPL we're working with
4928 * it was either passed in or we succesfully created it
4929 */
4930
4931 /*
4932 * Figure out where the file ends, for pageout purposes. If
4933 * ff_new_size > ff_size, then we're in the middle of extending the
4934 * file via a write, so it is safe (and necessary) that we be able
4935 * to pageout up to that point.
4936 */
4937 filesize = fp->ff_size;
4938 if (fp->ff_new_size > filesize)
4939 filesize = fp->ff_new_size;
4940
4941 /*
4942 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4943 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4944 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4945 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4946 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4947 * lock in HFS so that we don't lock invert ourselves.
4948 *
4949 * Note that we can still get into this function on behalf of the default pager with
4950 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4951 * since fsync and other writing threads will grab the locks, then mark the
4952 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4953 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4954 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4955 * by the paging/VM system.
4956 */
4957
4958 if (is_pageoutv2) {
4959 off_t f_offset;
4960 int offset;
4961 int isize;
4962 int pg_index;
4963 int error;
4964 int error_ret = 0;
4965
4966 isize = ap->a_size;
4967 f_offset = ap->a_f_offset;
4968
4969 /*
4970 * Scan from the back to find the last page in the UPL, so that we
4971 * aren't looking at a UPL that may have already been freed by the
4972 * preceding aborts/completions.
4973 */
4974 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4975 if (upl_page_present(pl, --pg_index))
4976 break;
4977 if (pg_index == 0) {
4978 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4979 goto pageout_done;
4980 }
4981 }
4982
4983 /*
4984 * initialize the offset variables before we touch the UPL.
4985 * a_f_offset is the position into the file, in bytes
4986 * offset is the position into the UPL, in bytes
4987 * pg_index is the pg# of the UPL we're operating on.
4988 * isize is the offset into the UPL of the last non-clean page.
4989 */
4990 isize = ((pg_index + 1) * PAGE_SIZE);
4991
4992 offset = 0;
4993 pg_index = 0;
4994
4995 while (isize) {
4996 int xsize;
4997 int num_of_pages;
4998
4999 if ( !upl_page_present(pl, pg_index)) {
5000 /*
5001 * we asked for RET_ONLY_DIRTY, so it's possible
5002 * to get back empty slots in the UPL.
5003 * just skip over them
5004 */
5005 f_offset += PAGE_SIZE;
5006 offset += PAGE_SIZE;
5007 isize -= PAGE_SIZE;
5008 pg_index++;
5009
5010 continue;
5011 }
5012 if ( !upl_dirty_page(pl, pg_index)) {
5013 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
5014 }
5015
5016 /*
5017 * We know that we have at least one dirty page.
5018 * Now checking to see how many in a row we have
5019 */
5020 num_of_pages = 1;
5021 xsize = isize - PAGE_SIZE;
5022
5023 while (xsize) {
5024 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
5025 break;
5026 num_of_pages++;
5027 xsize -= PAGE_SIZE;
5028 }
5029 xsize = num_of_pages * PAGE_SIZE;
5030
5031 if ((error = cluster_pageout(vp, upl, offset, f_offset,
5032 xsize, filesize, a_flags))) {
5033 if (error_ret == 0)
5034 error_ret = error;
5035 }
5036 f_offset += xsize;
5037 offset += xsize;
5038 isize -= xsize;
5039 pg_index += num_of_pages;
5040 }
5041 /* capture errnos bubbled out of cluster_pageout if they occurred */
5042 if (error_ret != 0) {
5043 retval = error_ret;
5044 }
5045 } /* end block for v2 pageout behavior */
5046 else {
5047 /*
5048 * just call cluster_pageout for old pre-v2 behavior
5049 */
5050 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
5051 ap->a_size, filesize, a_flags);
5052 }
5053
5054 /*
5055 * If data was written, update the modification time of the file
5056 * but only if it's mapped writable; we will have touched the
5057 * modifcation time for direct writes.
5058 */
5059 if (retval == 0 && (ubc_is_mapped_writable(vp)
5060 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
5061 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5062
5063 // Check again with lock
5064 bool mapped_writable = ubc_is_mapped_writable(vp);
5065 if (mapped_writable
5066 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
5067 cp->c_touch_modtime = TRUE;
5068 cp->c_touch_chgtime = TRUE;
5069
5070 /*
5071 * We only need to increment the generation counter if
5072 * it's currently mapped writable because we incremented
5073 * the counter in hfs_vnop_mnomap.
5074 */
5075 if (mapped_writable)
5076 hfs_incr_gencount(VTOC(vp));
5077
5078 /*
5079 * If setuid or setgid bits are set and this process is
5080 * not the superuser then clear the setuid and setgid bits
5081 * as a precaution against tampering.
5082 */
5083 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
5084 (vfs_context_suser(ap->a_context) != 0)) {
5085 cp->c_mode &= ~(S_ISUID | S_ISGID);
5086 }
5087 }
5088
5089 hfs_unlock(cp);
5090 }
5091
5092 pageout_done:
5093 if (is_pageoutv2) {
5094 /*
5095 * Release the truncate lock. Note that because
5096 * we may have taken the lock recursively by
5097 * being invoked via ubc_msync due to lockdown,
5098 * we should release it recursively, too.
5099 */
5100 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
5101 }
5102 return (retval);
5103 }
5104
5105 /*
5106 * Intercept B-Tree node writes to unswap them if necessary.
5107 */
5108 int
5109 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
5110 {
5111 int retval = 0;
5112 register struct buf *bp = ap->a_bp;
5113 register struct vnode *vp = buf_vnode(bp);
5114 BlockDescriptor block;
5115
5116 /* Trap B-Tree writes */
5117 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
5118 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
5119 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
5120 (vp == VTOHFS(vp)->hfc_filevp)) {
5121
5122 /*
5123 * Swap and validate the node if it is in native byte order.
5124 * This is always be true on big endian, so we always validate
5125 * before writing here. On little endian, the node typically has
5126 * been swapped and validated when it was written to the journal,
5127 * so we won't do anything here.
5128 */
5129 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
5130 /* Prepare the block pointer */
5131 block.blockHeader = bp;
5132 block.buffer = (char *)buf_dataptr(bp);
5133 block.blockNum = buf_lblkno(bp);
5134 /* not found in cache ==> came from disk */
5135 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
5136 block.blockSize = buf_count(bp);
5137
5138 /* Endian un-swap B-Tree node */
5139 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
5140 if (retval)
5141 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5142 }
5143 }
5144
5145 /* This buffer shouldn't be locked anymore but if it is clear it */
5146 if ((buf_flags(bp) & B_LOCKED)) {
5147 // XXXdbg
5148 if (VTOHFS(vp)->jnl) {
5149 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
5150 }
5151 buf_clearflags(bp, B_LOCKED);
5152 }
5153 retval = vn_bwrite (ap);
5154
5155 return (retval);
5156 }
5157
5158
5159 int
5160 hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks)
5161 {
5162 _dk_cs_pin_t pin;
5163 unsigned ioc;
5164 int err;
5165
5166 memset(&pin, 0, sizeof(pin));
5167 pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize;
5168 pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize;
5169 switch (pin_state) {
5170 case HFS_PIN_IT:
5171 ioc = _DKIOCCSPINEXTENT;
5172 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA;
5173 break;
5174 case HFS_PIN_IT | HFS_TEMP_PIN:
5175 ioc = _DKIOCCSPINEXTENT;
5176 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN;
5177 break;
5178 case HFS_PIN_IT | HFS_DATALESS_PIN:
5179 ioc = _DKIOCCSPINEXTENT;
5180 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE;
5181 break;
5182 case HFS_UNPIN_IT:
5183 ioc = _DKIOCCSUNPINEXTENT;
5184 pin.cp_flags = 0;
5185 break;
5186 case HFS_UNPIN_IT | HFS_EVICT_PIN:
5187 ioc = _DKIOCCSPINEXTENT;
5188 pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA;
5189 break;
5190 default:
5191 return EINVAL;
5192 }
5193 err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, vfs_context_kernel());
5194 return err;
5195 }
5196
5197 //
5198 // The cnode lock should already be held on entry to this function
5199 //
5200 int
5201 hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned)
5202 {
5203 struct filefork *fp = VTOF(vp);
5204 int i, err=0, need_put=0;
5205 struct vnode *rsrc_vp=NULL;
5206 uint32_t npinned = 0;
5207 off_t offset;
5208
5209 if (num_blocks_pinned) {
5210 *num_blocks_pinned = 0;
5211 }
5212
5213 if (vnode_vtype(vp) != VREG) {
5214 /* Not allowed to pin directories or symlinks */
5215 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp));
5216 return (EPERM);
5217 }
5218
5219 if (fp->ff_unallocblocks) {
5220 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks);
5221 return (EINVAL);
5222 }
5223
5224 /*
5225 * It is possible that if the caller unlocked/re-locked the cnode after checking
5226 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5227 * cnode was unlocked. So check the condition again and return ENOENT so that
5228 * the caller knows why we failed to pin the vnode.
5229 */
5230 if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) {
5231 // makes no sense to pin something that's pending deletion
5232 return ENOENT;
5233 }
5234
5235 if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
5236 if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) {
5237 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5238 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5239
5240 fp = VTOC(rsrc_vp)->c_rsrcfork;
5241 need_put = 1;
5242 }
5243 }
5244 if (fp->ff_blocks == 0) {
5245 if (need_put) {
5246 //
5247 // use a distinct error code for a compressed file that has no resource fork;
5248 // we return EALREADY to indicate that the data is already probably hot file
5249 // cached because it's in an EA and the attributes btree is on the ssd
5250 //
5251 err = EALREADY;
5252 } else {
5253 err = EINVAL;
5254 }
5255 goto out;
5256 }
5257
5258 offset = 0;
5259 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5260 if (fp->ff_extents[i].startBlock == 0) {
5261 break;
5262 }
5263
5264 err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount);
5265 if (err) {
5266 break;
5267 } else {
5268 npinned += fp->ff_extents[i].blockCount;
5269 }
5270 }
5271
5272 if (err || npinned == 0) {
5273 goto out;
5274 }
5275
5276 if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) {
5277 uint32_t pblocks;
5278 uint8_t forktype = 0;
5279
5280 if (fp == VTOC(vp)->c_rsrcfork) {
5281 forktype = 0xff;
5282 }
5283 /*
5284 * The file could have overflow extents, better pin them.
5285 *
5286 * We assume that since we are holding the cnode lock for this cnode,
5287 * the files extents cannot be manipulated, but the tree could, so we
5288 * need to ensure that it doesn't change behind our back as we iterate it.
5289 */
5290 int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
5291 err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks);
5292 hfs_systemfile_unlock (hfsmp, lockflags);
5293
5294 if (err) {
5295 goto out;
5296 }
5297 npinned += pblocks;
5298 }
5299
5300 out:
5301 if (num_blocks_pinned) {
5302 *num_blocks_pinned = npinned;
5303 }
5304
5305 if (need_put && rsrc_vp) {
5306 //
5307 // have to unlock the cnode since it's shared between the
5308 // resource fork vnode and the data fork vnode (and the
5309 // vnode_put() may need to re-acquire the cnode lock to
5310 // reclaim the resource fork vnode)
5311 //
5312 hfs_unlock(VTOC(vp));
5313 vnode_put(rsrc_vp);
5314 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5315 }
5316 return err;
5317 }
5318
5319
5320 /*
5321 * Relocate a file to a new location on disk
5322 * cnode must be locked on entry
5323 *
5324 * Relocation occurs by cloning the file's data from its
5325 * current set of blocks to a new set of blocks. During
5326 * the relocation all of the blocks (old and new) are
5327 * owned by the file.
5328 *
5329 * -----------------
5330 * |///////////////|
5331 * -----------------
5332 * 0 N (file offset)
5333 *
5334 * ----------------- -----------------
5335 * |///////////////| | | STEP 1 (acquire new blocks)
5336 * ----------------- -----------------
5337 * 0 N N+1 2N
5338 *
5339 * ----------------- -----------------
5340 * |///////////////| |///////////////| STEP 2 (clone data)
5341 * ----------------- -----------------
5342 * 0 N N+1 2N
5343 *
5344 * -----------------
5345 * |///////////////| STEP 3 (head truncate blocks)
5346 * -----------------
5347 * 0 N
5348 *
5349 * During steps 2 and 3 page-outs to file offsets less
5350 * than or equal to N are suspended.
5351 *
5352 * During step 3 page-ins to the file get suspended.
5353 */
5354 int
5355 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
5356 struct proc *p)
5357 {
5358 struct cnode *cp;
5359 struct filefork *fp;
5360 struct hfsmount *hfsmp;
5361 u_int32_t headblks;
5362 u_int32_t datablks;
5363 u_int32_t blksize;
5364 u_int32_t growsize;
5365 u_int32_t nextallocsave;
5366 daddr64_t sector_a, sector_b;
5367 int eflags;
5368 off_t newbytes;
5369 int retval;
5370 int lockflags = 0;
5371 int took_trunc_lock = 0;
5372 int started_tr = 0;
5373 enum vtype vnodetype;
5374
5375 vnodetype = vnode_vtype(vp);
5376 if (vnodetype != VREG) {
5377 /* Not allowed to move symlinks. */
5378 return (EPERM);
5379 }
5380
5381 hfsmp = VTOHFS(vp);
5382 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
5383 return (ENOSPC);
5384 }
5385
5386 cp = VTOC(vp);
5387 fp = VTOF(vp);
5388 if (fp->ff_unallocblocks)
5389 return (EINVAL);
5390
5391 #if CONFIG_PROTECT
5392 /*
5393 * <rdar://problem/9118426>
5394 * Disable HFS file relocation on content-protected filesystems
5395 */
5396 if (cp_fs_protected (hfsmp->hfs_mp)) {
5397 return EINVAL;
5398 }
5399 #endif
5400 /* If it's an SSD, also disable HFS relocation */
5401 if (hfsmp->hfs_flags & HFS_SSD) {
5402 return EINVAL;
5403 }
5404
5405
5406 blksize = hfsmp->blockSize;
5407 if (blockHint == 0)
5408 blockHint = hfsmp->nextAllocation;
5409
5410 if (fp->ff_size > 0x7fffffff) {
5411 return (EFBIG);
5412 }
5413
5414 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
5415 hfs_unlock(cp);
5416 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
5417 /* Force lock since callers expects lock to be held. */
5418 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
5419 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5420 return (retval);
5421 }
5422 /* No need to continue if file was removed. */
5423 if (cp->c_flag & C_NOEXISTS) {
5424 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5425 return (ENOENT);
5426 }
5427 took_trunc_lock = 1;
5428 }
5429 headblks = fp->ff_blocks;
5430 datablks = howmany(fp->ff_size, blksize);
5431 growsize = datablks * blksize;
5432 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
5433 if (blockHint >= hfsmp->hfs_metazone_start &&
5434 blockHint <= hfsmp->hfs_metazone_end)
5435 eflags |= kEFMetadataMask;
5436
5437 if (hfs_start_transaction(hfsmp) != 0) {
5438 if (took_trunc_lock)
5439 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5440 return (EINVAL);
5441 }
5442 started_tr = 1;
5443 /*
5444 * Protect the extents b-tree and the allocation bitmap
5445 * during MapFileBlockC and ExtendFileC operations.
5446 */
5447 lockflags = SFL_BITMAP;
5448 if (overflow_extents(fp))
5449 lockflags |= SFL_EXTENTS;
5450 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5451
5452 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
5453 if (retval) {
5454 retval = MacToVFSError(retval);
5455 goto out;
5456 }
5457
5458 /*
5459 * STEP 1 - acquire new allocation blocks.
5460 */
5461 nextallocsave = hfsmp->nextAllocation;
5462 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
5463 if (eflags & kEFMetadataMask) {
5464 hfs_lock_mount(hfsmp);
5465 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
5466 MarkVCBDirty(hfsmp);
5467 hfs_unlock_mount(hfsmp);
5468 }
5469
5470 retval = MacToVFSError(retval);
5471 if (retval == 0) {
5472 cp->c_flag |= C_MODIFIED;
5473 if (newbytes < growsize) {
5474 retval = ENOSPC;
5475 goto restore;
5476 } else if (fp->ff_blocks < (headblks + datablks)) {
5477 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
5478 retval = ENOSPC;
5479 goto restore;
5480 }
5481
5482 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
5483 if (retval) {
5484 retval = MacToVFSError(retval);
5485 } else if ((sector_a + 1) == sector_b) {
5486 retval = ENOSPC;
5487 goto restore;
5488 } else if ((eflags & kEFMetadataMask) &&
5489 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
5490 hfsmp->hfs_metazone_end)) {
5491 #if 0
5492 const char * filestr;
5493 char emptystr = '\0';
5494
5495 if (cp->c_desc.cd_nameptr != NULL) {
5496 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
5497 } else if (vnode_name(vp) != NULL) {
5498 filestr = vnode_name(vp);
5499 } else {
5500 filestr = &emptystr;
5501 }
5502 #endif
5503 retval = ENOSPC;
5504 goto restore;
5505 }
5506 }
5507 /* Done with system locks and journal for now. */
5508 hfs_systemfile_unlock(hfsmp, lockflags);
5509 lockflags = 0;
5510 hfs_end_transaction(hfsmp);
5511 started_tr = 0;
5512
5513 if (retval) {
5514 /*
5515 * Check to see if failure is due to excessive fragmentation.
5516 */
5517 if ((retval == ENOSPC) &&
5518 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
5519 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
5520 }
5521 goto out;
5522 }
5523 /*
5524 * STEP 2 - clone file data into the new allocation blocks.
5525 */
5526
5527 if (vnodetype == VLNK)
5528 retval = EPERM;
5529 else if (vnode_issystem(vp))
5530 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
5531 else
5532 retval = hfs_clonefile(vp, headblks, datablks, blksize);
5533
5534 /* Start transaction for step 3 or for a restore. */
5535 if (hfs_start_transaction(hfsmp) != 0) {
5536 retval = EINVAL;
5537 goto out;
5538 }
5539 started_tr = 1;
5540 if (retval)
5541 goto restore;
5542
5543 /*
5544 * STEP 3 - switch to cloned data and remove old blocks.
5545 */
5546 lockflags = SFL_BITMAP;
5547 if (overflow_extents(fp))
5548 lockflags |= SFL_EXTENTS;
5549 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5550
5551 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
5552
5553 hfs_systemfile_unlock(hfsmp, lockflags);
5554 lockflags = 0;
5555 if (retval)
5556 goto restore;
5557 out:
5558 if (took_trunc_lock)
5559 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5560
5561 if (lockflags) {
5562 hfs_systemfile_unlock(hfsmp, lockflags);
5563 lockflags = 0;
5564 }
5565
5566 /* Push cnode's new extent data to disk. */
5567 if (retval == 0) {
5568 hfs_update(vp, 0);
5569 }
5570 if (hfsmp->jnl) {
5571 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
5572 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
5573 else
5574 (void) hfs_flushvolumeheader(hfsmp, 0);
5575 }
5576 exit:
5577 if (started_tr)
5578 hfs_end_transaction(hfsmp);
5579
5580 return (retval);
5581
5582 restore:
5583 if (fp->ff_blocks == headblks) {
5584 if (took_trunc_lock)
5585 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5586 goto exit;
5587 }
5588 /*
5589 * Give back any newly allocated space.
5590 */
5591 if (lockflags == 0) {
5592 lockflags = SFL_BITMAP;
5593 if (overflow_extents(fp))
5594 lockflags |= SFL_EXTENTS;
5595 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5596 }
5597
5598 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
5599 FTOC(fp)->c_fileid, false);
5600
5601 hfs_systemfile_unlock(hfsmp, lockflags);
5602 lockflags = 0;
5603
5604 if (took_trunc_lock)
5605 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5606 goto exit;
5607 }
5608
5609
5610 /*
5611 * Clone a file's data within the file.
5612 *
5613 */
5614 static int
5615 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
5616 {
5617 caddr_t bufp;
5618 size_t bufsize;
5619 size_t copysize;
5620 size_t iosize;
5621 size_t offset;
5622 off_t writebase;
5623 uio_t auio;
5624 int error = 0;
5625
5626 writebase = blkstart * blksize;
5627 copysize = blkcnt * blksize;
5628 iosize = bufsize = MIN(copysize, 128 * 1024);
5629 offset = 0;
5630
5631 hfs_unlock(VTOC(vp));
5632
5633 #if CONFIG_PROTECT
5634 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
5635 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5636 return (error);
5637 }
5638 #endif /* CONFIG_PROTECT */
5639
5640 bufp = hfs_malloc(bufsize);
5641
5642 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
5643
5644 while (offset < copysize) {
5645 iosize = MIN(copysize - offset, iosize);
5646
5647 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
5648 uio_addiov(auio, (uintptr_t)bufp, iosize);
5649
5650 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
5651 if (error) {
5652 printf("hfs_clonefile: cluster_read failed - %d\n", error);
5653 break;
5654 }
5655 if (uio_resid(auio) != 0) {
5656 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
5657 error = EIO;
5658 break;
5659 }
5660
5661 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
5662 uio_addiov(auio, (uintptr_t)bufp, iosize);
5663
5664 error = cluster_write(vp, auio, writebase + offset,
5665 writebase + offset + iosize,
5666 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
5667 if (error) {
5668 printf("hfs_clonefile: cluster_write failed - %d\n", error);
5669 break;
5670 }
5671 if (uio_resid(auio) != 0) {
5672 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5673 error = EIO;
5674 break;
5675 }
5676 offset += iosize;
5677 }
5678 uio_free(auio);
5679
5680 if ((blksize & PAGE_MASK)) {
5681 /*
5682 * since the copy may not have started on a PAGE
5683 * boundary (or may not have ended on one), we
5684 * may have pages left in the cache since NOCACHE
5685 * will let partially written pages linger...
5686 * lets just flush the entire range to make sure
5687 * we don't have any pages left that are beyond
5688 * (or intersect) the real LEOF of this file
5689 */
5690 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
5691 } else {
5692 /*
5693 * No need to call ubc_msync or hfs_invalbuf
5694 * since the file was copied using IO_NOCACHE and
5695 * the copy was done starting and ending on a page
5696 * boundary in the file.
5697 */
5698 }
5699 hfs_free(bufp, bufsize);
5700
5701 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5702 return (error);
5703 }
5704
5705 /*
5706 * Clone a system (metadata) file.
5707 *
5708 */
5709 static int
5710 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
5711 kauth_cred_t cred, struct proc *p)
5712 {
5713 caddr_t bufp;
5714 char * offset;
5715 size_t bufsize;
5716 size_t iosize;
5717 struct buf *bp = NULL;
5718 daddr64_t blkno;
5719 daddr64_t blk;
5720 daddr64_t start_blk;
5721 daddr64_t last_blk;
5722 int breadcnt;
5723 int i;
5724 int error = 0;
5725
5726
5727 iosize = GetLogicalBlockSize(vp);
5728 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5729 breadcnt = bufsize / iosize;
5730
5731 bufp = hfs_malloc(bufsize);
5732
5733 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5734 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
5735 blkno = 0;
5736
5737 while (blkno < last_blk) {
5738 /*
5739 * Read up to a megabyte
5740 */
5741 offset = bufp;
5742 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5743 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
5744 if (error) {
5745 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5746 goto out;
5747 }
5748 if (buf_count(bp) != iosize) {
5749 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
5750 goto out;
5751 }
5752 bcopy((char *)buf_dataptr(bp), offset, iosize);
5753
5754 buf_markinvalid(bp);
5755 buf_brelse(bp);
5756 bp = NULL;
5757
5758 offset += iosize;
5759 }
5760
5761 /*
5762 * Write up to a megabyte
5763 */
5764 offset = bufp;
5765 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5766 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
5767 if (bp == NULL) {
5768 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
5769 error = EIO;
5770 goto out;
5771 }
5772 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5773 error = (int)buf_bwrite(bp);
5774 bp = NULL;
5775 if (error)
5776 goto out;
5777 offset += iosize;
5778 }
5779 }
5780 out:
5781 if (bp) {
5782 buf_brelse(bp);
5783 }
5784
5785 hfs_free(bufp, bufsize);
5786
5787 error = hfs_fsync(vp, MNT_WAIT, 0, p);
5788
5789 return (error);
5790 }
5791
5792 errno_t hfs_flush_invalid_ranges(vnode_t vp)
5793 {
5794 cnode_t *cp = VTOC(vp);
5795
5796 hfs_assert(cp->c_lockowner == current_thread());
5797 hfs_assert(cp->c_truncatelockowner == current_thread());
5798
5799 if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout)
5800 return 0;
5801
5802 filefork_t *fp = VTOF(vp);
5803
5804 /*
5805 * We can't hold the cnode lock whilst we call cluster_write so we
5806 * need to copy the extents into a local buffer.
5807 */
5808 int max_exts = 16;
5809 struct ext {
5810 off_t start, end;
5811 } exts_buf[max_exts]; // 256 bytes
5812 struct ext *exts = exts_buf;
5813 int ext_count = 0;
5814 errno_t ret;
5815
5816 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
5817
5818 while (r) {
5819 /* If we have more than can fit in our stack buffer, switch
5820 to a heap buffer. */
5821 if (exts == exts_buf && ext_count == max_exts) {
5822 max_exts = 256;
5823 exts = hfs_malloc(sizeof(struct ext) * max_exts);
5824 memcpy(exts, exts_buf, ext_count * sizeof(struct ext));
5825 }
5826
5827 struct rl_entry *next = TAILQ_NEXT(r, rl_link);
5828
5829 exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end };
5830
5831 if (!next || (ext_count == max_exts && exts != exts_buf)) {
5832 hfs_unlock(cp);
5833 for (int i = 0; i < ext_count; ++i) {
5834 ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1,
5835 exts[i].start, 0,
5836 IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
5837 if (ret) {
5838 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5839 goto exit;
5840 }
5841 }
5842
5843 if (!next) {
5844 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5845 break;
5846 }
5847
5848 /* Push any existing clusters which should clean up our invalid
5849 ranges as they go through hfs_vnop_blockmap. */
5850 cluster_push(vp, 0);
5851
5852 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5853
5854 /*
5855 * Get back to where we were (given we dropped the lock).
5856 * This shouldn't be many because we pushed above.
5857 */
5858 TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) {
5859 if (r->rl_end > exts[ext_count - 1].end)
5860 break;
5861 }
5862
5863 ext_count = 0;
5864 } else
5865 r = next;
5866 }
5867
5868 ret = 0;
5869
5870 exit:
5871
5872 if (exts != exts_buf)
5873 hfs_free(exts, sizeof(struct ext) * max_exts);
5874
5875 return ret;
5876 }