]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_readwrite.c
hfs-556.41.1.tar.gz
[apple/hfs.git] / core / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/buf.h>
42 #include <sys/proc.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/vfs_context.h>
47 #include <sys/disk.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
50 #include <sys/ubc.h>
51 #include <sys/fsevents.h>
52 #include <uuid/uuid.h>
53
54 #include <libkern/OSDebug.h>
55
56 #include <miscfs/specfs/specdev.h>
57
58 #include <sys/ubc.h>
59
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
62
63 #include <IOKit/IOBSD.h>
64
65 #include <sys/kdebug.h>
66
67 #include "hfs.h"
68 #include "hfs_attrlist.h"
69 #include "hfs_endian.h"
70 #include "hfs_fsctl.h"
71 #include "hfs_quota.h"
72 #include "FileMgrInternal.h"
73 #include "BTreesInternal.h"
74 #include "hfs_cnode.h"
75 #include "hfs_dbg.h"
76
77 #if HFS_CONFIG_KEY_ROLL
78 #include "hfs_key_roll.h"
79 #endif
80
81 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
82
83 enum {
84 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
85 };
86
87 /* from bsd/hfs/hfs_vfsops.c */
88 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
89
90 /* from hfs_hotfiles.c */
91 extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
92 uint8_t forktype, uint32_t *pinned);
93
94 static int hfs_clonefile(struct vnode *, int, int, int);
95 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
96 static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
97
98
99 /*
100 * Read data from a file.
101 */
102 int
103 hfs_vnop_read(struct vnop_read_args *ap)
104 {
105 /*
106 struct vnop_read_args {
107 struct vnodeop_desc *a_desc;
108 vnode_t a_vp;
109 struct uio *a_uio;
110 int a_ioflag;
111 vfs_context_t a_context;
112 };
113 */
114
115 uio_t uio = ap->a_uio;
116 struct vnode *vp = ap->a_vp;
117 struct cnode *cp;
118 struct filefork *fp;
119 struct hfsmount *hfsmp;
120 off_t filesize;
121 off_t filebytes;
122 off_t start_resid = uio_resid(uio);
123 off_t offset = uio_offset(uio);
124 int retval = 0;
125 int took_truncate_lock = 0;
126 int io_throttle = 0;
127 int throttled_count = 0;
128
129 /* Preflight checks */
130 if (!vnode_isreg(vp)) {
131 /* can only read regular files */
132 if (vnode_isdir(vp))
133 return (EISDIR);
134 else
135 return (EPERM);
136 }
137 if (start_resid == 0)
138 return (0); /* Nothing left to do */
139 if (offset < 0)
140 return (EINVAL); /* cant read from a negative offset */
141
142 #if SECURE_KERNEL
143 if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
144 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
145 /* Don't allow unencrypted io request from user space */
146 return EPERM;
147 }
148 #endif
149
150 #if HFS_COMPRESSION
151 if (VNODE_IS_RSRC(vp)) {
152 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
153 return 0;
154 }
155 /* otherwise read the resource fork normally */
156 } else {
157 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
158 if (compressed) {
159 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
160 if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
161 (void) hfs_addhotfile(vp);
162 }
163 if (compressed) {
164 if (retval == 0) {
165 /* successful read, update the access time */
166 VTOC(vp)->c_touch_acctime = TRUE;
167
168 //
169 // compressed files are not traditional hot file candidates
170 // but they may be for CF (which ignores the ff_bytesread
171 // field)
172 //
173 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
174 VTOF(vp)->ff_bytesread = 0;
175 }
176 }
177 return retval;
178 }
179 /* otherwise the file was converted back to a regular file while we were reading it */
180 retval = 0;
181 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
182 int error;
183
184 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
185 if (error) {
186 return error;
187 }
188
189 }
190 }
191 #endif /* HFS_COMPRESSION */
192
193 cp = VTOC(vp);
194 fp = VTOF(vp);
195 hfsmp = VTOHFS(vp);
196
197 #if CONFIG_PROTECT
198 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
199 goto exit;
200 }
201
202 #if HFS_CONFIG_KEY_ROLL
203 if (ISSET(ap->a_ioflag, IO_ENCRYPTED)) {
204 off_rsrc_t off_rsrc = off_rsrc_make(offset + start_resid,
205 VNODE_IS_RSRC(vp));
206
207 retval = hfs_key_roll_up_to(ap->a_context, vp, off_rsrc);
208 if (retval)
209 goto exit;
210 }
211 #endif // HFS_CONFIG_KEY_ROLL
212 #endif // CONFIG_PROTECT
213
214 /*
215 * If this read request originated from a syscall (as opposed to
216 * an in-kernel page fault or something), then set it up for
217 * throttle checks
218 */
219 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
220 io_throttle = IO_RETURN_ON_THROTTLE;
221 }
222
223 read_again:
224
225 /* Protect against a size change. */
226 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
227 took_truncate_lock = 1;
228
229 filesize = fp->ff_size;
230 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
231
232 /*
233 * Check the file size. Note that per POSIX spec, we return 0 at
234 * file EOF, so attempting a read at an offset that is too big
235 * should just return 0 on HFS+. Since the return value was initialized
236 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
237 */
238 if (offset > filesize) {
239 #if CONFIG_HFS_STD
240 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
241 (offset > (off_t)MAXHFSFILESIZE)) {
242 retval = EFBIG;
243 }
244 #endif
245 goto exit;
246 }
247
248 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
249 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
250
251 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
252
253 cp->c_touch_acctime = TRUE;
254
255 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
256 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
257
258 /*
259 * Keep track blocks read
260 */
261 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
262 int took_cnode_lock = 0;
263 off_t bytesread;
264
265 bytesread = start_resid - uio_resid(uio);
266
267 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
268 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
269 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
270 took_cnode_lock = 1;
271 }
272 /*
273 * If this file hasn't been seen since the start of
274 * the current sampling period then start over.
275 */
276 if (cp->c_atime < hfsmp->hfc_timebase) {
277 struct timeval tv;
278
279 fp->ff_bytesread = bytesread;
280 microtime(&tv);
281 cp->c_atime = tv.tv_sec;
282 } else {
283 fp->ff_bytesread += bytesread;
284 }
285
286 if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
287 //
288 // We don't add hotfiles for processes doing IO_EVTONLY I/O
289 // on the assumption that they're system processes such as
290 // mdworker which scan everything in the system (and thus
291 // do not represent user-initiated access to files)
292 //
293 (void) hfs_addhotfile(vp);
294 }
295 if (took_cnode_lock)
296 hfs_unlock(cp);
297 }
298 exit:
299 if (took_truncate_lock) {
300 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
301 }
302 if (retval == EAGAIN) {
303 throttle_lowpri_io(1);
304 throttled_count++;
305
306 retval = 0;
307 goto read_again;
308 }
309 if (throttled_count)
310 throttle_info_reset_window(NULL);
311 return (retval);
312 }
313
314 /*
315 * Ideally, this wouldn't be necessary; the cluster code should be
316 * able to handle this on the read-side. See <rdar://20420068>.
317 */
318 static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to)
319 {
320 hfs_assert(VTOC(vp)->c_lockowner != current_thread());
321 hfs_assert(VTOC(vp)->c_truncatelockowner == current_thread());
322
323 struct filefork *fp = VTOF(vp);
324
325 if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) {
326 // Nothing to do
327 return 0;
328 }
329
330 zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size));
331
332 /* N.B. At present, @zero_up_to is not important because the cluster
333 code will always zero up to the end of the page anyway. */
334 return cluster_write(vp, NULL, fp->ff_size, zero_up_to,
335 fp->ff_size, 0, IO_HEADZEROFILL);
336 }
337
338 /*
339 * Write data to a file.
340 */
341 int
342 hfs_vnop_write(struct vnop_write_args *ap)
343 {
344 uio_t uio = ap->a_uio;
345 struct vnode *vp = ap->a_vp;
346 struct cnode *cp;
347 struct filefork *fp;
348 struct hfsmount *hfsmp;
349 kauth_cred_t cred = NULL;
350 off_t origFileSize;
351 off_t writelimit;
352 off_t bytesToAdd = 0;
353 off_t actualBytesAdded;
354 off_t filebytes;
355 off_t offset;
356 ssize_t resid;
357 int eflags = 0;
358 int ioflag = ap->a_ioflag;
359 int retval = 0;
360 int lockflags;
361 int cnode_locked = 0;
362 int partialwrite = 0;
363 int do_snapshot = 1;
364 time_t orig_ctime=VTOC(vp)->c_ctime;
365 int took_truncate_lock = 0;
366 int io_return_on_throttle = 0;
367 int throttled_count = 0;
368
369 #if HFS_COMPRESSION
370 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
371 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
372 switch(state) {
373 case FILE_IS_COMPRESSED:
374 return EACCES;
375 case FILE_IS_CONVERTING:
376 /* if FILE_IS_CONVERTING, we allow writes but do not
377 bother with snapshots or else we will deadlock.
378 */
379 do_snapshot = 0;
380 break;
381 default:
382 printf("invalid state %d for compressed file\n", state);
383 /* fall through */
384 }
385 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
386 int error;
387
388 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
389 if (error != 0) {
390 return error;
391 }
392 }
393
394 if (do_snapshot) {
395 nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
396 }
397
398 #endif
399
400 #if SECURE_KERNEL
401 if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
402 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
403 /* Don't allow unencrypted io request from user space */
404 return EPERM;
405 }
406 #endif
407
408 resid = uio_resid(uio);
409 offset = uio_offset(uio);
410
411 if (offset < 0)
412 return (EINVAL);
413 if (resid == 0)
414 return (E_NONE);
415 if (!vnode_isreg(vp))
416 return (EPERM); /* Can only write regular files */
417
418 cp = VTOC(vp);
419 fp = VTOF(vp);
420 hfsmp = VTOHFS(vp);
421
422 #if CONFIG_PROTECT
423 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
424 goto exit;
425 }
426 #endif
427
428 eflags = kEFDeferMask; /* defer file block allocations */
429 #if HFS_SPARSE_DEV
430 /*
431 * When the underlying device is sparse and space
432 * is low (< 8MB), stop doing delayed allocations
433 * and begin doing synchronous I/O.
434 */
435 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
436 (hfs_freeblks(hfsmp, 0) < 2048)) {
437 eflags &= ~kEFDeferMask;
438 ioflag |= IO_SYNC;
439 }
440 #endif /* HFS_SPARSE_DEV */
441
442 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
443 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
444 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
445 }
446
447 again:
448 /*
449 * Protect against a size change.
450 *
451 * Note: If took_truncate_lock is true, then we previously got the lock shared
452 * but needed to upgrade to exclusive. So try getting it exclusive from the
453 * start.
454 */
455 if (ioflag & IO_APPEND || took_truncate_lock) {
456 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
457 }
458 else {
459 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
460 }
461 took_truncate_lock = 1;
462
463 /* Update UIO */
464 if (ioflag & IO_APPEND) {
465 uio_setoffset(uio, fp->ff_size);
466 offset = fp->ff_size;
467 }
468 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
469 retval = EPERM;
470 goto exit;
471 }
472
473 cred = vfs_context_ucred(ap->a_context);
474 if (cred && suser(cred, NULL) != 0)
475 eflags |= kEFReserveMask;
476
477 origFileSize = fp->ff_size;
478 writelimit = offset + resid;
479
480 /*
481 * We may need an exclusive truncate lock for several reasons, all
482 * of which are because we may be writing to a (portion of a) block
483 * for the first time, and we need to make sure no readers see the
484 * prior, uninitialized contents of the block. The cases are:
485 *
486 * 1. We have unallocated (delayed allocation) blocks. We may be
487 * allocating new blocks to the file and writing to them.
488 * (A more precise check would be whether the range we're writing
489 * to contains delayed allocation blocks.)
490 * 2. We need to extend the file. The bytes between the old EOF
491 * and the new EOF are not yet initialized. This is important
492 * even if we're not allocating new blocks to the file. If the
493 * old EOF and new EOF are in the same block, we still need to
494 * protect that range of bytes until they are written for the
495 * first time.
496 *
497 * If we had a shared lock with the above cases, we need to try to upgrade
498 * to an exclusive lock. If the upgrade fails, we will lose the shared
499 * lock, and will need to take the truncate lock again; the took_truncate_lock
500 * flag will still be set, causing us to try for an exclusive lock next time.
501 */
502 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
503 ((fp->ff_unallocblocks != 0) ||
504 (writelimit > origFileSize))) {
505 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
506 /*
507 * Lock upgrade failed and we lost our shared lock, try again.
508 * Note: we do not set took_truncate_lock=0 here. Leaving it
509 * set to 1 will cause us to try to get the lock exclusive.
510 */
511 goto again;
512 }
513 else {
514 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
515 cp->c_truncatelockowner = current_thread();
516 }
517 }
518
519 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
520 goto exit;
521 }
522 cnode_locked = 1;
523
524 filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize);
525
526 if (offset > filebytes
527 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)),
528 hfsmp->blockSize) < offset - filebytes)) {
529 retval = ENOSPC;
530 goto exit;
531 }
532
533 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
534 (int)offset, uio_resid(uio), (int)fp->ff_size,
535 (int)filebytes, 0);
536
537 /* Check if we do not need to extend the file */
538 if (writelimit <= filebytes) {
539 goto sizeok;
540 }
541
542 bytesToAdd = writelimit - filebytes;
543
544 #if QUOTA
545 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
546 cred, 0);
547 if (retval)
548 goto exit;
549 #endif /* QUOTA */
550
551 if (hfs_start_transaction(hfsmp) != 0) {
552 retval = EINVAL;
553 goto exit;
554 }
555
556 while (writelimit > filebytes) {
557 bytesToAdd = writelimit - filebytes;
558
559 /* Protect extents b-tree and allocation bitmap */
560 lockflags = SFL_BITMAP;
561 if (overflow_extents(fp))
562 lockflags |= SFL_EXTENTS;
563 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
564
565 /* Files that are changing size are not hot file candidates. */
566 if (hfsmp->hfc_stage == HFC_RECORDING) {
567 fp->ff_bytesread = 0;
568 }
569 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
570 0, eflags, &actualBytesAdded));
571
572 hfs_systemfile_unlock(hfsmp, lockflags);
573
574 if ((actualBytesAdded == 0) && (retval == E_NONE))
575 retval = ENOSPC;
576 if (retval != E_NONE)
577 break;
578 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
579 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
580 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
581 }
582 (void) hfs_update(vp, 0);
583 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
584 (void) hfs_end_transaction(hfsmp);
585
586 /*
587 * If we didn't grow the file enough try a partial write.
588 * POSIX expects this behavior.
589 */
590 if ((retval == ENOSPC) && (filebytes > offset)) {
591 retval = 0;
592 partialwrite = 1;
593 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
594 resid -= bytesToAdd;
595 writelimit = filebytes;
596 }
597 sizeok:
598 if (retval == E_NONE) {
599 off_t filesize;
600 off_t head_off;
601 int lflag;
602
603 if (writelimit > fp->ff_size) {
604 filesize = writelimit;
605 struct timeval tv;
606 rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges);
607 microuptime(&tv);
608 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
609 } else
610 filesize = fp->ff_size;
611
612 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
613
614 /*
615 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
616 * for one case below). For the regions that lie before the
617 * beginning and after the end of this write that are in the
618 * same page, we let the cluster code handle zeroing that out
619 * if necessary. If those areas are not cached, the cluster
620 * code will try and read those areas in, and in the case
621 * where those regions have never been written to,
622 * hfs_vnop_blockmap will consult the invalid ranges and then
623 * indicate that. The cluster code will zero out those areas.
624 */
625
626 head_off = trunc_page_64(offset);
627
628 if (head_off < offset && head_off >= fp->ff_size) {
629 /*
630 * The first page is beyond current EOF, so as an
631 * optimisation, we can pass IO_HEADZEROFILL.
632 */
633 lflag |= IO_HEADZEROFILL;
634 }
635
636 hfs_unlock(cp);
637 cnode_locked = 0;
638
639 /*
640 * We need to tell UBC the fork's new size BEFORE calling
641 * cluster_write, in case any of the new pages need to be
642 * paged out before cluster_write completes (which does happen
643 * in embedded systems due to extreme memory pressure).
644 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
645 * will be, so that it can pass that on to cluster_pageout, and
646 * allow those pageouts.
647 *
648 * We don't update ff_size yet since we don't want pageins to
649 * be able to see uninitialized data between the old and new
650 * EOF, until cluster_write has completed and initialized that
651 * part of the file.
652 *
653 * The vnode pager relies on the file size last given to UBC via
654 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
655 * ff_size (whichever is larger). NOTE: ff_new_size is always
656 * zero, unless we are extending the file via write.
657 */
658 if (filesize > fp->ff_size) {
659 retval = hfs_zero_eof_page(vp, offset);
660 if (retval)
661 goto exit;
662 fp->ff_new_size = filesize;
663 ubc_setsize(vp, filesize);
664 }
665 retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off,
666 0, lflag | IO_NOZERODIRTY | io_return_on_throttle);
667 if (retval) {
668 fp->ff_new_size = 0; /* no longer extending; use ff_size */
669
670 if (retval == EAGAIN) {
671 /*
672 * EAGAIN indicates that we still have I/O to do, but
673 * that we now need to be throttled
674 */
675 if (resid != uio_resid(uio)) {
676 /*
677 * did manage to do some I/O before returning EAGAIN
678 */
679 resid = uio_resid(uio);
680 offset = uio_offset(uio);
681
682 cp->c_touch_chgtime = TRUE;
683 cp->c_touch_modtime = TRUE;
684 hfs_incr_gencount(cp);
685 }
686 if (filesize > fp->ff_size) {
687 /*
688 * we called ubc_setsize before the call to
689 * cluster_write... since we only partially
690 * completed the I/O, we need to
691 * re-adjust our idea of the filesize based
692 * on our interim EOF
693 */
694 ubc_setsize(vp, offset);
695
696 fp->ff_size = offset;
697 }
698 goto exit;
699 }
700 if (filesize > origFileSize) {
701 ubc_setsize(vp, origFileSize);
702 }
703 goto ioerr_exit;
704 }
705
706 if (filesize > origFileSize) {
707 fp->ff_size = filesize;
708
709 /* Files that are changing size are not hot file candidates. */
710 if (hfsmp->hfc_stage == HFC_RECORDING) {
711 fp->ff_bytesread = 0;
712 }
713 }
714 fp->ff_new_size = 0; /* ff_size now has the correct size */
715 }
716 if (partialwrite) {
717 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
718 resid += bytesToAdd;
719 }
720
721 if (vnode_should_flush_after_write(vp, ioflag))
722 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
723
724 ioerr_exit:
725 if (!cnode_locked) {
726 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
727 cnode_locked = 1;
728 }
729
730 if (resid > uio_resid(uio)) {
731 cp->c_touch_chgtime = TRUE;
732 cp->c_touch_modtime = TRUE;
733 hfs_incr_gencount(cp);
734
735 /*
736 * If we successfully wrote any data, and we are not the superuser
737 * we clear the setuid and setgid bits as a precaution against
738 * tampering.
739 */
740 if (cp->c_mode & (S_ISUID | S_ISGID)) {
741 cred = vfs_context_ucred(ap->a_context);
742 if (cred && suser(cred, NULL)) {
743 cp->c_mode &= ~(S_ISUID | S_ISGID);
744 }
745 }
746 }
747 if (retval) {
748 if (ioflag & IO_UNIT) {
749 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
750 0, ap->a_context);
751 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
752 uio_setresid(uio, resid);
753 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
754 }
755 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
756 retval = hfs_update(vp, 0);
757
758 /* Updating vcbWrCnt doesn't need to be atomic. */
759 hfsmp->vcbWrCnt++;
760
761 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
762 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
763 exit:
764 if (retval && took_truncate_lock
765 && cp->c_truncatelockowner == current_thread()) {
766 fp->ff_new_size = 0;
767 rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges);
768 }
769
770 if (cnode_locked)
771 hfs_unlock(cp);
772
773 if (took_truncate_lock) {
774 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
775 }
776 if (retval == EAGAIN) {
777 throttle_lowpri_io(1);
778 throttled_count++;
779
780 retval = 0;
781 goto again;
782 }
783 if (throttled_count)
784 throttle_info_reset_window(NULL);
785 return (retval);
786 }
787
788 /* support for the "bulk-access" fcntl */
789
790 #define CACHE_LEVELS 16
791 #define NUM_CACHE_ENTRIES (64*16)
792 #define PARENT_IDS_FLAG 0x100
793
794 struct access_cache {
795 int numcached;
796 int cachehits; /* these two for statistics gathering */
797 int lookups;
798 unsigned int *acache;
799 unsigned char *haveaccess;
800 };
801
802 struct access_t {
803 uid_t uid; /* IN: effective user id */
804 short flags; /* IN: access requested (i.e. R_OK) */
805 short num_groups; /* IN: number of groups user belongs to */
806 int num_files; /* IN: number of files to process */
807 int *file_ids; /* IN: array of file ids */
808 gid_t *groups; /* IN: array of groups */
809 short *access; /* OUT: access info for each file (0 for 'has access') */
810 } __attribute__((unavailable)); // this structure is for reference purposes only
811
812 struct user32_access_t {
813 uid_t uid; /* IN: effective user id */
814 short flags; /* IN: access requested (i.e. R_OK) */
815 short num_groups; /* IN: number of groups user belongs to */
816 int num_files; /* IN: number of files to process */
817 user32_addr_t file_ids; /* IN: array of file ids */
818 user32_addr_t groups; /* IN: array of groups */
819 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
820 };
821
822 struct user64_access_t {
823 uid_t uid; /* IN: effective user id */
824 short flags; /* IN: access requested (i.e. R_OK) */
825 short num_groups; /* IN: number of groups user belongs to */
826 int num_files; /* IN: number of files to process */
827 user64_addr_t file_ids; /* IN: array of file ids */
828 user64_addr_t groups; /* IN: array of groups */
829 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
830 };
831
832
833 // these are the "extended" versions of the above structures
834 // note that it is crucial that they be different sized than
835 // the regular version
836 struct ext_access_t {
837 uint32_t flags; /* IN: access requested (i.e. R_OK) */
838 uint32_t num_files; /* IN: number of files to process */
839 uint32_t map_size; /* IN: size of the bit map */
840 uint32_t *file_ids; /* IN: Array of file ids */
841 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
842 short *access; /* OUT: access info for each file (0 for 'has access') */
843 uint32_t num_parents; /* future use */
844 cnid_t *parents; /* future use */
845 } __attribute__((unavailable)); // this structure is for reference purposes only
846
847 struct user32_ext_access_t {
848 uint32_t flags; /* IN: access requested (i.e. R_OK) */
849 uint32_t num_files; /* IN: number of files to process */
850 uint32_t map_size; /* IN: size of the bit map */
851 user32_addr_t file_ids; /* IN: Array of file ids */
852 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
853 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
854 uint32_t num_parents; /* future use */
855 user32_addr_t parents; /* future use */
856 };
857
858 struct user64_ext_access_t {
859 uint32_t flags; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files; /* IN: number of files to process */
861 uint32_t map_size; /* IN: size of the bit map */
862 user64_addr_t file_ids; /* IN: array of file ids */
863 user64_addr_t bitmap; /* IN: array of groups */
864 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents;/* future use */
866 user64_addr_t parents;/* future use */
867 };
868
869
870 /*
871 * Perform a binary search for the given parent_id. Return value is
872 * the index if there is a match. If no_match_indexp is non-NULL it
873 * will be assigned with the index to insert the item (even if it was
874 * not found).
875 */
876 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
877 {
878 int index=-1;
879 unsigned int lo=0;
880
881 do {
882 unsigned int mid = ((hi - lo)/2) + lo;
883 unsigned int this_id = array[mid];
884
885 if (parent_id == this_id) {
886 hi = mid;
887 break;
888 }
889
890 if (parent_id < this_id) {
891 hi = mid;
892 continue;
893 }
894
895 if (parent_id > this_id) {
896 lo = mid + 1;
897 continue;
898 }
899 } while(lo < hi);
900
901 /* check if lo and hi converged on the match */
902 if (parent_id == array[hi]) {
903 index = hi;
904 }
905
906 if (no_match_indexp) {
907 *no_match_indexp = hi;
908 }
909
910 return index;
911 }
912
913
914 static int
915 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
916 {
917 unsigned int hi;
918 int matches = 0;
919 int index, no_match_index;
920
921 if (cache->numcached == 0) {
922 *indexp = 0;
923 return 0; // table is empty, so insert at index=0 and report no match
924 }
925
926 if (cache->numcached > NUM_CACHE_ENTRIES) {
927 cache->numcached = NUM_CACHE_ENTRIES;
928 }
929
930 hi = cache->numcached - 1;
931
932 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
933
934 /* if no existing entry found, find index for new one */
935 if (index == -1) {
936 index = no_match_index;
937 matches = 0;
938 } else {
939 matches = 1;
940 }
941
942 *indexp = index;
943 return matches;
944 }
945
946 /*
947 * Add a node to the access_cache at the given index (or do a lookup first
948 * to find the index if -1 is passed in). We currently do a replace rather
949 * than an insert if the cache is full.
950 */
951 static void
952 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
953 {
954 int lookup_index = -1;
955
956 /* need to do a lookup first if -1 passed for index */
957 if (index == -1) {
958 if (lookup_bucket(cache, &lookup_index, nodeID)) {
959 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
960 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
961 cache->haveaccess[lookup_index] = access;
962 }
963
964 /* mission accomplished */
965 return;
966 } else {
967 index = lookup_index;
968 }
969
970 }
971
972 /* if the cache is full, do a replace rather than an insert */
973 if (cache->numcached >= NUM_CACHE_ENTRIES) {
974 cache->numcached = NUM_CACHE_ENTRIES-1;
975
976 if (index > cache->numcached) {
977 index = cache->numcached;
978 }
979 }
980
981 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
982 index++;
983 }
984
985 if (index >= 0 && index < cache->numcached) {
986 /* only do bcopy if we're inserting */
987 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
988 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
989 }
990
991 cache->acache[index] = nodeID;
992 cache->haveaccess[index] = access;
993 cache->numcached++;
994 }
995
996
997 struct cinfo {
998 uid_t uid;
999 gid_t gid;
1000 mode_t mode;
1001 cnid_t parentcnid;
1002 u_int16_t recflags;
1003 };
1004
1005 static int
1006 snoop_callback(const cnode_t *cp, void *arg)
1007 {
1008 struct cinfo *cip = arg;
1009
1010 cip->uid = cp->c_uid;
1011 cip->gid = cp->c_gid;
1012 cip->mode = cp->c_mode;
1013 cip->parentcnid = cp->c_parentcnid;
1014 cip->recflags = cp->c_attr.ca_recflags;
1015
1016 return (0);
1017 }
1018
1019 /*
1020 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1021 * isn't incore, then go to the catalog.
1022 */
1023 static int
1024 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
1025 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
1026 {
1027 int error = 0;
1028
1029 /* if this id matches the one the fsctl was called with, skip the lookup */
1030 if (cnid == skip_cp->c_cnid) {
1031 cnattrp->ca_uid = skip_cp->c_uid;
1032 cnattrp->ca_gid = skip_cp->c_gid;
1033 cnattrp->ca_mode = skip_cp->c_mode;
1034 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1035 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1036 } else {
1037 struct cinfo c_info;
1038
1039 /* otherwise, check the cnode hash incase the file/dir is incore */
1040 error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
1041
1042 if (error == EACCES) {
1043 // File is deleted
1044 return ENOENT;
1045 } else if (!error) {
1046 cnattrp->ca_uid = c_info.uid;
1047 cnattrp->ca_gid = c_info.gid;
1048 cnattrp->ca_mode = c_info.mode;
1049 cnattrp->ca_recflags = c_info.recflags;
1050 keyp->hfsPlus.parentID = c_info.parentcnid;
1051 } else {
1052 int lockflags;
1053
1054 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1055 throttle_lowpri_io(1);
1056
1057 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1058
1059 /* lookup this cnid in the catalog */
1060 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
1061
1062 hfs_systemfile_unlock(hfsmp, lockflags);
1063
1064 cache->lookups++;
1065 }
1066 }
1067
1068 return (error);
1069 }
1070
1071
1072 /*
1073 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1074 * up to CACHE_LEVELS as we progress towards the root.
1075 */
1076 static int
1077 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
1078 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
1079 struct vfs_context *my_context,
1080 char *bitmap,
1081 uint32_t map_size,
1082 cnid_t* parents,
1083 uint32_t num_parents)
1084 {
1085 int myErr = 0;
1086 int myResult;
1087 HFSCatalogNodeID thisNodeID;
1088 unsigned int myPerms;
1089 struct cat_attr cnattr;
1090 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1091 CatalogKey catkey;
1092
1093 int i = 0, ids_to_cache = 0;
1094 int parent_ids[CACHE_LEVELS];
1095
1096 thisNodeID = nodeID;
1097 while (thisNodeID >= kRootDirID) {
1098 myResult = 0; /* default to "no access" */
1099
1100 /* check the cache before resorting to hitting the catalog */
1101
1102 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1103 * to look any further after hitting cached dir */
1104
1105 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1106 cache->cachehits++;
1107 myErr = cache->haveaccess[cache_index];
1108 if (scope_index != -1) {
1109 if (myErr == ESRCH) {
1110 myErr = 0;
1111 }
1112 } else {
1113 scope_index = 0; // so we'll just use the cache result
1114 scope_idx_start = ids_to_cache;
1115 }
1116 myResult = (myErr == 0) ? 1 : 0;
1117 goto ExitThisRoutine;
1118 }
1119
1120
1121 if (parents) {
1122 int tmp;
1123 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1124 if (scope_index == -1)
1125 scope_index = tmp;
1126 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1127 scope_idx_start = ids_to_cache;
1128 }
1129 }
1130
1131 /* remember which parents we want to cache */
1132 if (ids_to_cache < CACHE_LEVELS) {
1133 parent_ids[ids_to_cache] = thisNodeID;
1134 ids_to_cache++;
1135 }
1136 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1137 if (bitmap && map_size) {
1138 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1139 }
1140
1141
1142 /* do the lookup (checks the cnode hash, then the catalog) */
1143 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
1144 if (myErr) {
1145 goto ExitThisRoutine; /* no access */
1146 }
1147
1148 /* Root always gets access. */
1149 if (suser(myp_ucred, NULL) == 0) {
1150 thisNodeID = catkey.hfsPlus.parentID;
1151 myResult = 1;
1152 continue;
1153 }
1154
1155 // if the thing has acl's, do the full permission check
1156 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1157 struct vnode *vp;
1158
1159 /* get the vnode for this cnid */
1160 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
1161 if ( myErr ) {
1162 myResult = 0;
1163 goto ExitThisRoutine;
1164 }
1165
1166 thisNodeID = VTOC(vp)->c_parentcnid;
1167
1168 hfs_unlock(VTOC(vp));
1169
1170 if (vnode_vtype(vp) == VDIR) {
1171 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1172 } else {
1173 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1174 }
1175
1176 vnode_put(vp);
1177 if (myErr) {
1178 myResult = 0;
1179 goto ExitThisRoutine;
1180 }
1181 } else {
1182 unsigned int flags;
1183 int mode = cnattr.ca_mode & S_IFMT;
1184 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
1185
1186 if (mode == S_IFDIR) {
1187 flags = R_OK | X_OK;
1188 } else {
1189 flags = R_OK;
1190 }
1191 if ( (myPerms & flags) != flags) {
1192 myResult = 0;
1193 myErr = EACCES;
1194 goto ExitThisRoutine; /* no access */
1195 }
1196
1197 /* up the hierarchy we go */
1198 thisNodeID = catkey.hfsPlus.parentID;
1199 }
1200 }
1201
1202 /* if here, we have access to this node */
1203 myResult = 1;
1204
1205 ExitThisRoutine:
1206 if (parents && myErr == 0 && scope_index == -1) {
1207 myErr = ESRCH;
1208 }
1209
1210 if (myErr) {
1211 myResult = 0;
1212 }
1213 *err = myErr;
1214
1215 /* cache the parent directory(ies) */
1216 for (i = 0; i < ids_to_cache; i++) {
1217 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1218 add_node(cache, -1, parent_ids[i], ESRCH);
1219 } else {
1220 add_node(cache, -1, parent_ids[i], myErr);
1221 }
1222 }
1223
1224 return (myResult);
1225 }
1226
1227 static int
1228 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1229 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1230 {
1231 boolean_t is64bit;
1232
1233 /*
1234 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1235 * happens to be in our list of file_ids, we'll note it
1236 * avoid calling hfs_chashget_nowait() on that id as that
1237 * will cause a "locking against myself" panic.
1238 */
1239 Boolean check_leaf = true;
1240
1241 struct user64_ext_access_t *user_access_structp;
1242 struct user64_ext_access_t tmp_user_access;
1243 struct access_cache cache;
1244
1245 int error = 0, prev_parent_check_ok=1;
1246 unsigned int i;
1247
1248 short flags;
1249 unsigned int num_files = 0;
1250 int map_size = 0;
1251 int num_parents = 0;
1252 int *file_ids=NULL;
1253 short *access=NULL;
1254 char *bitmap=NULL;
1255 cnid_t *parents=NULL;
1256 int leaf_index;
1257
1258 cnid_t cnid;
1259 cnid_t prevParent_cnid = 0;
1260 unsigned int myPerms;
1261 short myaccess = 0;
1262 struct cat_attr cnattr;
1263 CatalogKey catkey;
1264 struct cnode *skip_cp = VTOC(vp);
1265 kauth_cred_t cred = vfs_context_ucred(context);
1266 proc_t p = vfs_context_proc(context);
1267
1268 is64bit = proc_is64bit(p);
1269
1270 /* initialize the local cache and buffers */
1271 cache.numcached = 0;
1272 cache.cachehits = 0;
1273 cache.lookups = 0;
1274 cache.acache = NULL;
1275 cache.haveaccess = NULL;
1276
1277 /* struct copyin done during dispatch... need to copy file_id array separately */
1278 if (ap->a_data == NULL) {
1279 error = EINVAL;
1280 goto err_exit_bulk_access;
1281 }
1282
1283 if (is64bit) {
1284 if (arg_size != sizeof(struct user64_ext_access_t)) {
1285 error = EINVAL;
1286 goto err_exit_bulk_access;
1287 }
1288
1289 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1290
1291 } else if (arg_size == sizeof(struct user32_access_t)) {
1292 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1293
1294 // convert an old style bulk-access struct to the new style
1295 tmp_user_access.flags = accessp->flags;
1296 tmp_user_access.num_files = accessp->num_files;
1297 tmp_user_access.map_size = 0;
1298 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1299 tmp_user_access.bitmap = USER_ADDR_NULL;
1300 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1301 tmp_user_access.num_parents = 0;
1302 user_access_structp = &tmp_user_access;
1303
1304 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1305 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1306
1307 // up-cast from a 32-bit version of the struct
1308 tmp_user_access.flags = accessp->flags;
1309 tmp_user_access.num_files = accessp->num_files;
1310 tmp_user_access.map_size = accessp->map_size;
1311 tmp_user_access.num_parents = accessp->num_parents;
1312
1313 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1314 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1315 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1316 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1317
1318 user_access_structp = &tmp_user_access;
1319 } else {
1320 error = EINVAL;
1321 goto err_exit_bulk_access;
1322 }
1323
1324 map_size = user_access_structp->map_size;
1325
1326 num_files = user_access_structp->num_files;
1327
1328 num_parents= user_access_structp->num_parents;
1329
1330 if (num_files < 1) {
1331 goto err_exit_bulk_access;
1332 }
1333 if (num_files > 1024) {
1334 error = EINVAL;
1335 goto err_exit_bulk_access;
1336 }
1337
1338 if (num_parents > 1024) {
1339 error = EINVAL;
1340 goto err_exit_bulk_access;
1341 }
1342
1343 file_ids = hfs_malloc(sizeof(int) * num_files);
1344 access = hfs_malloc(sizeof(short) * num_files);
1345 if (map_size) {
1346 bitmap = hfs_mallocz(sizeof(char) * map_size);
1347 }
1348
1349 if (num_parents) {
1350 parents = hfs_malloc(sizeof(cnid_t) * num_parents);
1351 }
1352
1353 cache.acache = hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES);
1354 cache.haveaccess = hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1355
1356 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1357 num_files * sizeof(int)))) {
1358 goto err_exit_bulk_access;
1359 }
1360
1361 if (num_parents) {
1362 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1363 num_parents * sizeof(cnid_t)))) {
1364 goto err_exit_bulk_access;
1365 }
1366 }
1367
1368 flags = user_access_structp->flags;
1369 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1370 flags = R_OK;
1371 }
1372
1373 /* check if we've been passed leaf node ids or parent ids */
1374 if (flags & PARENT_IDS_FLAG) {
1375 check_leaf = false;
1376 }
1377
1378 /* Check access to each file_id passed in */
1379 for (i = 0; i < num_files; i++) {
1380 leaf_index=-1;
1381 cnid = (cnid_t) file_ids[i];
1382
1383 /* root always has access */
1384 if ((!parents) && (!suser(cred, NULL))) {
1385 access[i] = 0;
1386 continue;
1387 }
1388
1389 if (check_leaf) {
1390 /* do the lookup (checks the cnode hash, then the catalog) */
1391 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1392 if (error) {
1393 access[i] = (short) error;
1394 continue;
1395 }
1396
1397 if (parents) {
1398 // Check if the leaf matches one of the parent scopes
1399 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1400 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1401 prev_parent_check_ok = 0;
1402 else if (leaf_index >= 0)
1403 prev_parent_check_ok = 1;
1404 }
1405
1406 // if the thing has acl's, do the full permission check
1407 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1408 struct vnode *cvp;
1409 int myErr = 0;
1410 /* get the vnode for this cnid */
1411 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
1412 if ( myErr ) {
1413 access[i] = myErr;
1414 continue;
1415 }
1416
1417 hfs_unlock(VTOC(cvp));
1418
1419 if (vnode_vtype(cvp) == VDIR) {
1420 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1421 } else {
1422 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1423 }
1424
1425 vnode_put(cvp);
1426 if (myErr) {
1427 access[i] = myErr;
1428 continue;
1429 }
1430 } else {
1431 /* before calling CheckAccess(), check the target file for read access */
1432 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1433 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1434
1435 /* fail fast if no access */
1436 if ((myPerms & flags) == 0) {
1437 access[i] = EACCES;
1438 continue;
1439 }
1440 }
1441 } else {
1442 /* we were passed an array of parent ids */
1443 catkey.hfsPlus.parentID = cnid;
1444 }
1445
1446 /* if the last guy had the same parent and had access, we're done */
1447 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1448 cache.cachehits++;
1449 access[i] = 0;
1450 continue;
1451 }
1452
1453 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1454 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1455
1456 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1457 access[i] = 0; // have access.. no errors to report
1458 } else {
1459 access[i] = (error != 0 ? (short) error : EACCES);
1460 }
1461
1462 prevParent_cnid = catkey.hfsPlus.parentID;
1463 }
1464
1465 /* copyout the access array */
1466 if ((error = copyout((caddr_t)access, user_access_structp->access,
1467 num_files * sizeof (short)))) {
1468 goto err_exit_bulk_access;
1469 }
1470 if (map_size && bitmap) {
1471 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1472 map_size * sizeof (char)))) {
1473 goto err_exit_bulk_access;
1474 }
1475 }
1476
1477
1478 err_exit_bulk_access:
1479
1480 hfs_free(file_ids, sizeof(int) * num_files);
1481 hfs_free(parents, sizeof(cnid_t) * num_parents);
1482 hfs_free(bitmap, sizeof(char) * map_size);
1483 hfs_free(access, sizeof(short) * num_files);
1484 hfs_free(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1485 hfs_free(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1486
1487 return (error);
1488 }
1489
1490
1491 /* end "bulk-access" support */
1492
1493
1494 /*
1495 * Control filesystem operating characteristics.
1496 */
1497 int
1498 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1499 vnode_t a_vp;
1500 long a_command;
1501 caddr_t a_data;
1502 int a_fflag;
1503 vfs_context_t a_context;
1504 } */ *ap)
1505 {
1506 struct vnode * vp = ap->a_vp;
1507 struct hfsmount *hfsmp = VTOHFS(vp);
1508 vfs_context_t context = ap->a_context;
1509 kauth_cred_t cred = vfs_context_ucred(context);
1510 proc_t p = vfs_context_proc(context);
1511 struct vfsstatfs *vfsp;
1512 boolean_t is64bit;
1513 off_t jnl_start, jnl_size;
1514 struct hfs_journal_info *jip;
1515 #if HFS_COMPRESSION
1516 int compressed = 0;
1517 off_t uncompressed_size = -1;
1518 int decmpfs_error = 0;
1519
1520 if (ap->a_command == F_RDADVISE) {
1521 /* we need to inspect the decmpfs state of the file as early as possible */
1522 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1523 if (compressed) {
1524 if (VNODE_IS_RSRC(vp)) {
1525 /* if this is the resource fork, treat it as if it were empty */
1526 uncompressed_size = 0;
1527 } else {
1528 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1529 if (decmpfs_error != 0) {
1530 /* failed to get the uncompressed size, we'll check for this later */
1531 uncompressed_size = -1;
1532 }
1533 }
1534 }
1535 }
1536 #endif /* HFS_COMPRESSION */
1537
1538 is64bit = proc_is64bit(p);
1539
1540 #if CONFIG_PROTECT
1541 #if HFS_CONFIG_KEY_ROLL
1542 // The HFSIOC_KEY_ROLL fsctl does its own access checks
1543 if (ap->a_command != HFSIOC_KEY_ROLL)
1544 #endif
1545 {
1546 int error = 0;
1547 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
1548 return error;
1549 }
1550 }
1551 #endif /* CONFIG_PROTECT */
1552
1553 switch (ap->a_command) {
1554
1555 case HFSIOC_GETPATH:
1556 {
1557 struct vnode *file_vp;
1558 cnid_t cnid;
1559 int error;
1560 int flags = 0;
1561 char *bufptr;
1562 #ifdef VN_GETPATH_NEW
1563 size_t outlen;
1564 #else // VN_GETPATH_NEW
1565 int outlen;
1566 #endif // VN_GETPATH_NEW
1567
1568 /* Caller must be owner of file system. */
1569 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1570 if (suser(cred, NULL) &&
1571 kauth_cred_getuid(cred) != vfsp->f_owner) {
1572 return (EACCES);
1573 }
1574 /* Target vnode must be file system's root. */
1575 if (!vnode_isvroot(vp)) {
1576 return (EINVAL);
1577 }
1578 bufptr = (char *)ap->a_data;
1579 cnid = strtoul(bufptr, NULL, 10);
1580 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1581 flags |= BUILDPATH_VOLUME_RELATIVE;
1582 }
1583
1584 /* We need to call hfs_vfs_vget to leverage the code that will
1585 * fix the origin list for us if needed, as opposed to calling
1586 * hfs_vget, since we will need the parent for vn_getpath_ext call.
1587 */
1588
1589 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1590 return (error);
1591 }
1592
1593 outlen = sizeof(pathname_t);
1594 error = vn_getpath_ext(file_vp, NULLVP, bufptr, &outlen, flags);
1595 vnode_put(file_vp);
1596
1597 return (error);
1598 }
1599
1600 case HFSIOC_SET_MAX_DEFRAG_SIZE:
1601 {
1602 int error = 0; /* Assume success */
1603 u_int32_t maxsize = 0;
1604
1605 if (vnode_vfsisrdonly(vp)) {
1606 return (EROFS);
1607 }
1608 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1609 if (!kauth_cred_issuser(cred)) {
1610 return (EACCES); /* must be root */
1611 }
1612
1613 maxsize = *(u_int32_t *)ap->a_data;
1614
1615 hfs_lock_mount(hfsmp);
1616 if (maxsize > HFS_MAX_DEFRAG_SIZE) {
1617 error = EINVAL;
1618 }
1619 else {
1620 hfsmp->hfs_defrag_max = maxsize;
1621 }
1622 hfs_unlock_mount(hfsmp);
1623
1624 return (error);
1625 }
1626
1627 case HFSIOC_FORCE_ENABLE_DEFRAG:
1628 {
1629 int error = 0; /* Assume success */
1630 u_int32_t do_enable = 0;
1631
1632 if (vnode_vfsisrdonly(vp)) {
1633 return (EROFS);
1634 }
1635 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1636 if (!kauth_cred_issuser(cred)) {
1637 return (EACCES); /* must be root */
1638 }
1639
1640 do_enable = *(u_int32_t *)ap->a_data;
1641
1642 hfs_lock_mount(hfsmp);
1643 if (do_enable != 0) {
1644 hfsmp->hfs_defrag_nowait = 1;
1645 }
1646 else {
1647 error = EINVAL;
1648 }
1649
1650 hfs_unlock_mount(hfsmp);
1651
1652 return (error);
1653 }
1654
1655
1656 case HFSIOC_TRANSFER_DOCUMENT_ID:
1657 {
1658 struct cnode *cp = NULL;
1659 int error;
1660 u_int32_t to_fd = *(u_int32_t *)ap->a_data;
1661 struct fileproc *to_fp;
1662 struct vnode *to_vp;
1663 struct cnode *to_cp;
1664
1665 cp = VTOC(vp);
1666
1667 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
1668 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1669 return error;
1670 }
1671 if ( (error = vnode_getwithref(to_vp)) ) {
1672 file_drop(to_fd);
1673 return error;
1674 }
1675
1676 if (VTOHFS(to_vp) != hfsmp) {
1677 error = EXDEV;
1678 goto transfer_cleanup;
1679 }
1680
1681 int need_unlock = 1;
1682 to_cp = VTOC(to_vp);
1683 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1684 if (error != 0) {
1685 //printf("could not lock the pair of cnodes (error %d)\n", error);
1686 goto transfer_cleanup;
1687 }
1688
1689 if (!(cp->c_bsdflags & UF_TRACKED)) {
1690 error = EINVAL;
1691 } else if (to_cp->c_bsdflags & UF_TRACKED) {
1692 //
1693 // if the destination is already tracked, return an error
1694 // as otherwise it's a silent deletion of the target's
1695 // document-id
1696 //
1697 error = EEXIST;
1698 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1699 //
1700 // we can use the FndrExtendedFileInfo because the doc-id is the first
1701 // thing in both it and the ExtendedDirInfo struct which is fixed in
1702 // format and can not change layout
1703 //
1704 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
1705 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
1706
1707 if (f_extinfo->document_id == 0) {
1708 uint32_t new_id;
1709
1710 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
1711
1712 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
1713 //
1714 // re-lock the pair now that we have the document-id
1715 //
1716 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1717 f_extinfo->document_id = new_id;
1718 } else {
1719 goto transfer_cleanup;
1720 }
1721 }
1722
1723 to_extinfo->document_id = f_extinfo->document_id;
1724 f_extinfo->document_id = 0;
1725 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1726
1727 // make sure the destination is also UF_TRACKED
1728 to_cp->c_bsdflags |= UF_TRACKED;
1729 cp->c_bsdflags &= ~UF_TRACKED;
1730
1731 // mark the cnodes dirty
1732 cp->c_flag |= C_MODIFIED;
1733 to_cp->c_flag |= C_MODIFIED;
1734
1735 int lockflags;
1736 if ((error = hfs_start_transaction(hfsmp)) == 0) {
1737
1738 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1739
1740 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
1741 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
1742
1743 hfs_systemfile_unlock (hfsmp, lockflags);
1744 (void) hfs_end_transaction(hfsmp);
1745 }
1746
1747 add_fsevent(FSE_DOCID_CHANGED, context,
1748 FSE_ARG_DEV, hfsmp->hfs_raw_dev,
1749 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
1750 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
1751 FSE_ARG_INT32, to_extinfo->document_id,
1752 FSE_ARG_DONE);
1753
1754 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1755 need_unlock = 0;
1756
1757 if (need_fsevent(FSE_STAT_CHANGED, vp)) {
1758 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1759 }
1760 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
1761 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
1762 }
1763 }
1764
1765 if (need_unlock) {
1766 hfs_unlockpair(cp, to_cp);
1767 }
1768
1769 transfer_cleanup:
1770 vnode_put(to_vp);
1771 file_drop(to_fd);
1772
1773 return error;
1774 }
1775
1776
1777
1778 case HFSIOC_PREV_LINK:
1779 case HFSIOC_NEXT_LINK:
1780 {
1781 cnid_t linkfileid;
1782 cnid_t nextlinkid;
1783 cnid_t prevlinkid;
1784 int error;
1785
1786 /* Caller must be owner of file system. */
1787 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1788 if (suser(cred, NULL) &&
1789 kauth_cred_getuid(cred) != vfsp->f_owner) {
1790 return (EACCES);
1791 }
1792 /* Target vnode must be file system's root. */
1793 if (!vnode_isvroot(vp)) {
1794 return (EINVAL);
1795 }
1796 linkfileid = *(cnid_t *)ap->a_data;
1797 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1798 return (EINVAL);
1799 }
1800 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1801 return (error);
1802 }
1803 if (ap->a_command == HFSIOC_NEXT_LINK) {
1804 *(cnid_t *)ap->a_data = nextlinkid;
1805 } else {
1806 *(cnid_t *)ap->a_data = prevlinkid;
1807 }
1808 return (0);
1809 }
1810
1811 case HFSIOC_RESIZE_PROGRESS: {
1812
1813 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1814 if (suser(cred, NULL) &&
1815 kauth_cred_getuid(cred) != vfsp->f_owner) {
1816 return (EACCES); /* must be owner of file system */
1817 }
1818 if (!vnode_isvroot(vp)) {
1819 return (EINVAL);
1820 }
1821 /* file system must not be mounted read-only */
1822 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1823 return (EROFS);
1824 }
1825
1826 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1827 }
1828
1829 case HFSIOC_RESIZE_VOLUME: {
1830 u_int64_t newsize;
1831 u_int64_t cursize;
1832 int ret;
1833
1834 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1835 if (suser(cred, NULL) &&
1836 kauth_cred_getuid(cred) != vfsp->f_owner) {
1837 return (EACCES); /* must be owner of file system */
1838 }
1839 if (!vnode_isvroot(vp)) {
1840 return (EINVAL);
1841 }
1842
1843 /* filesystem must not be mounted read only */
1844 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1845 return (EROFS);
1846 }
1847 newsize = *(u_int64_t *)ap->a_data;
1848 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1849
1850 if (newsize == cursize) {
1851 return (0);
1852 }
1853 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize);
1854 if (newsize > cursize) {
1855 ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1856 } else {
1857 ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1858 }
1859 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize);
1860 return (ret);
1861 }
1862 case HFSIOC_CHANGE_NEXT_ALLOCATION: {
1863 int error = 0; /* Assume success */
1864 u_int32_t location;
1865
1866 if (vnode_vfsisrdonly(vp)) {
1867 return (EROFS);
1868 }
1869 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1870 if (suser(cred, NULL) &&
1871 kauth_cred_getuid(cred) != vfsp->f_owner) {
1872 return (EACCES); /* must be owner of file system */
1873 }
1874 if (!vnode_isvroot(vp)) {
1875 return (EINVAL);
1876 }
1877 hfs_lock_mount(hfsmp);
1878 location = *(u_int32_t *)ap->a_data;
1879 if ((location >= hfsmp->allocLimit) &&
1880 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1881 error = EINVAL;
1882 goto fail_change_next_allocation;
1883 }
1884 /* Return previous value. */
1885 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1886 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1887 /* On magic value for location, set nextAllocation to next block
1888 * after metadata zone and set flag in mount structure to indicate
1889 * that nextAllocation should not be updated again.
1890 */
1891 if (hfsmp->hfs_metazone_end != 0) {
1892 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1893 }
1894 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1895 } else {
1896 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1897 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1898 }
1899 MarkVCBDirty(hfsmp);
1900 fail_change_next_allocation:
1901 hfs_unlock_mount(hfsmp);
1902 return (error);
1903 }
1904
1905 #if HFS_SPARSE_DEV
1906 case HFSIOC_SETBACKINGSTOREINFO: {
1907 struct vnode * di_vp;
1908 struct hfs_backingstoreinfo *bsdata;
1909 int error = 0;
1910
1911 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1912 return (EROFS);
1913 }
1914 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1915 return (EALREADY);
1916 }
1917 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1918 if (suser(cred, NULL) &&
1919 kauth_cred_getuid(cred) != vfsp->f_owner) {
1920 return (EACCES); /* must be owner of file system */
1921 }
1922 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1923 if (bsdata == NULL) {
1924 return (EINVAL);
1925 }
1926 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1927 return (error);
1928 }
1929 if ((error = vnode_getwithref(di_vp))) {
1930 file_drop(bsdata->backingfd);
1931 return(error);
1932 }
1933
1934 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1935 (void)vnode_put(di_vp);
1936 file_drop(bsdata->backingfd);
1937 return (EINVAL);
1938 }
1939
1940 // Dropped in unmount
1941 vnode_ref(di_vp);
1942
1943 hfs_lock_mount(hfsmp);
1944 hfsmp->hfs_backingvp = di_vp;
1945 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1946 hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
1947 hfs_unlock_mount(hfsmp);
1948
1949 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1950
1951 /*
1952 * If the sparse image is on a sparse image file (as opposed to a sparse
1953 * bundle), then we may need to limit the free space to the maximum size
1954 * of a file on that volume. So we query (using pathconf), and if we get
1955 * a meaningful result, we cache the number of blocks for later use in
1956 * hfs_freeblks().
1957 */
1958 hfsmp->hfs_backingfs_maxblocks = 0;
1959 if (vnode_vtype(di_vp) == VREG) {
1960 int terr;
1961 int hostbits;
1962 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1963 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1964 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1965
1966 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1967 }
1968 }
1969
1970 /* The free extent cache is managed differently for sparse devices.
1971 * There is a window between which the volume is mounted and the
1972 * device is marked as sparse, so the free extent cache for this
1973 * volume is currently initialized as normal volume (sorted by block
1974 * count). Reset the cache so that it will be rebuilt again
1975 * for sparse device (sorted by start block).
1976 */
1977 ResetVCBFreeExtCache(hfsmp);
1978
1979 (void)vnode_put(di_vp);
1980 file_drop(bsdata->backingfd);
1981 return (0);
1982 }
1983
1984 case HFSIOC_CLRBACKINGSTOREINFO: {
1985 struct vnode * tmpvp;
1986
1987 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1988 if (suser(cred, NULL) &&
1989 kauth_cred_getuid(cred) != vfsp->f_owner) {
1990 return (EACCES); /* must be owner of file system */
1991 }
1992 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1993 return (EROFS);
1994 }
1995
1996 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1997 hfsmp->hfs_backingvp) {
1998
1999 hfs_lock_mount(hfsmp);
2000 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2001 tmpvp = hfsmp->hfs_backingvp;
2002 hfsmp->hfs_backingvp = NULLVP;
2003 hfsmp->hfs_sparsebandblks = 0;
2004 hfs_unlock_mount(hfsmp);
2005
2006 vnode_rele(tmpvp);
2007 }
2008 return (0);
2009 }
2010 #endif /* HFS_SPARSE_DEV */
2011
2012 /* Change the next CNID stored in the VH */
2013 case HFSIOC_CHANGE_NEXTCNID: {
2014 int error = 0; /* Assume success */
2015 u_int32_t fileid;
2016 int wraparound = 0;
2017 int lockflags = 0;
2018
2019 if (vnode_vfsisrdonly(vp)) {
2020 return (EROFS);
2021 }
2022 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2023 if (suser(cred, NULL) &&
2024 kauth_cred_getuid(cred) != vfsp->f_owner) {
2025 return (EACCES); /* must be owner of file system */
2026 }
2027
2028 fileid = *(u_int32_t *)ap->a_data;
2029
2030 /* Must have catalog lock excl. to advance the CNID pointer */
2031 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
2032
2033 hfs_lock_mount(hfsmp);
2034
2035 /* If it is less than the current next CNID, force the wraparound bit to be set */
2036 if (fileid < hfsmp->vcbNxtCNID) {
2037 wraparound=1;
2038 }
2039
2040 /* Return previous value. */
2041 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
2042
2043 hfsmp->vcbNxtCNID = fileid;
2044
2045 if (wraparound) {
2046 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
2047 }
2048
2049 MarkVCBDirty(hfsmp);
2050 hfs_unlock_mount(hfsmp);
2051 hfs_systemfile_unlock (hfsmp, lockflags);
2052
2053 return (error);
2054 }
2055
2056 case F_FREEZE_FS: {
2057 struct mount *mp;
2058
2059 mp = vnode_mount(vp);
2060 hfsmp = VFSTOHFS(mp);
2061
2062 if (!(hfsmp->jnl))
2063 return (ENOTSUP);
2064
2065 vfsp = vfs_statfs(mp);
2066
2067 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2068 !kauth_cred_issuser(cred))
2069 return (EACCES);
2070
2071 return hfs_freeze(hfsmp);
2072 }
2073
2074 case F_THAW_FS: {
2075 vfsp = vfs_statfs(vnode_mount(vp));
2076 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2077 !kauth_cred_issuser(cred))
2078 return (EACCES);
2079
2080 return hfs_thaw(hfsmp, current_proc());
2081 }
2082
2083 case HFSIOC_EXT_BULKACCESS32:
2084 case HFSIOC_EXT_BULKACCESS64: {
2085 int size;
2086 #if CONFIG_HFS_STD
2087 if (hfsmp->hfs_flags & HFS_STANDARD) {
2088 return EINVAL;
2089 }
2090 #endif
2091
2092 if (is64bit) {
2093 size = sizeof(struct user64_ext_access_t);
2094 } else {
2095 size = sizeof(struct user32_ext_access_t);
2096 }
2097
2098 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2099 }
2100
2101 case HFSIOC_SET_XATTREXTENTS_STATE: {
2102 int state;
2103
2104 if (ap->a_data == NULL) {
2105 return (EINVAL);
2106 }
2107
2108 state = *(int *)ap->a_data;
2109
2110 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2111 return (EROFS);
2112 }
2113
2114 /* Super-user can enable or disable extent-based extended
2115 * attribute support on a volume
2116 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2117 * are enabled by default, so any change will be transient only
2118 * till the volume is remounted.
2119 */
2120 if (!kauth_cred_issuser(kauth_cred_get())) {
2121 return (EPERM);
2122 }
2123 if (state == 0 || state == 1)
2124 return hfs_set_volxattr(hfsmp, HFSIOC_SET_XATTREXTENTS_STATE, state);
2125 else
2126 return (EINVAL);
2127 }
2128
2129 case F_SETSTATICCONTENT: {
2130 int error;
2131 int enable_static = 0;
2132 struct cnode *cp = NULL;
2133 /*
2134 * lock the cnode, decorate the cnode flag, and bail out.
2135 * VFS should have already authenticated the caller for us.
2136 */
2137
2138 if (ap->a_data) {
2139 /*
2140 * Note that even though ap->a_data is of type caddr_t,
2141 * the fcntl layer at the syscall handler will pass in NULL
2142 * or 1 depending on what the argument supplied to the fcntl
2143 * was. So it is in fact correct to check the ap->a_data
2144 * argument for zero or non-zero value when deciding whether or not
2145 * to enable the static bit in the cnode.
2146 */
2147 enable_static = 1;
2148 }
2149 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2150 return EROFS;
2151 }
2152 cp = VTOC(vp);
2153
2154 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2155 if (error == 0) {
2156 if (enable_static) {
2157 cp->c_flag |= C_SSD_STATIC;
2158 }
2159 else {
2160 cp->c_flag &= ~C_SSD_STATIC;
2161 }
2162 hfs_unlock (cp);
2163 }
2164 return error;
2165 }
2166
2167 case F_SET_GREEDY_MODE: {
2168 int error;
2169 int enable_greedy_mode = 0;
2170 struct cnode *cp = NULL;
2171 /*
2172 * lock the cnode, decorate the cnode flag, and bail out.
2173 * VFS should have already authenticated the caller for us.
2174 */
2175
2176 if (ap->a_data) {
2177 /*
2178 * Note that even though ap->a_data is of type caddr_t,
2179 * the fcntl layer at the syscall handler will pass in NULL
2180 * or 1 depending on what the argument supplied to the fcntl
2181 * was. So it is in fact correct to check the ap->a_data
2182 * argument for zero or non-zero value when deciding whether or not
2183 * to enable the greedy mode bit in the cnode.
2184 */
2185 enable_greedy_mode = 1;
2186 }
2187 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2188 return EROFS;
2189 }
2190 cp = VTOC(vp);
2191
2192 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2193 if (error == 0) {
2194 if (enable_greedy_mode) {
2195 cp->c_flag |= C_SSD_GREEDY_MODE;
2196 }
2197 else {
2198 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2199 }
2200 hfs_unlock (cp);
2201 }
2202 return error;
2203 }
2204
2205 case F_SETIOTYPE: {
2206 int error;
2207 uint32_t iotypeflag = 0;
2208
2209 struct cnode *cp = NULL;
2210 /*
2211 * lock the cnode, decorate the cnode flag, and bail out.
2212 * VFS should have already authenticated the caller for us.
2213 */
2214
2215 if (ap->a_data == NULL) {
2216 return EINVAL;
2217 }
2218
2219 /*
2220 * Note that even though ap->a_data is of type caddr_t, we
2221 * can only use 32 bits of flag values.
2222 */
2223 iotypeflag = (uint32_t) ap->a_data;
2224 switch (iotypeflag) {
2225 case F_IOTYPE_ISOCHRONOUS:
2226 break;
2227 default:
2228 return EINVAL;
2229 }
2230
2231
2232 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2233 return EROFS;
2234 }
2235 cp = VTOC(vp);
2236
2237 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2238 if (error == 0) {
2239 switch (iotypeflag) {
2240 case F_IOTYPE_ISOCHRONOUS:
2241 cp->c_flag |= C_IO_ISOCHRONOUS;
2242 break;
2243 default:
2244 break;
2245 }
2246 hfs_unlock (cp);
2247 }
2248 return error;
2249 }
2250
2251 case F_MAKECOMPRESSED: {
2252 int error = 0;
2253 uint32_t gen_counter;
2254 struct cnode *cp = NULL;
2255 int reset_decmp = 0;
2256
2257 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2258 return EROFS;
2259 }
2260
2261 /*
2262 * acquire & lock the cnode.
2263 * VFS should have already authenticated the caller for us.
2264 */
2265
2266 if (ap->a_data) {
2267 /*
2268 * Cast the pointer into a uint32_t so we can extract the
2269 * supplied generation counter.
2270 */
2271 gen_counter = *((uint32_t*)ap->a_data);
2272 }
2273 else {
2274 return EINVAL;
2275 }
2276
2277 #if HFS_COMPRESSION
2278 cp = VTOC(vp);
2279 /* Grab truncate lock first; we may truncate the file */
2280 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2281
2282 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2283 if (error) {
2284 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2285 return error;
2286 }
2287
2288 /* Are there any other usecounts/FDs? */
2289 if (vnode_isinuse(vp, 1)) {
2290 hfs_unlock(cp);
2291 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2292 return EBUSY;
2293 }
2294
2295 /* now we have the cnode locked down; Validate arguments */
2296 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2297 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2298 hfs_unlock(cp);
2299 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2300 return EINVAL;
2301 }
2302
2303 if ((hfs_get_gencount (cp)) == gen_counter) {
2304 /*
2305 * OK, the gen_counter matched. Go for it:
2306 * Toggle state bits, truncate file, and suppress mtime update
2307 */
2308 reset_decmp = 1;
2309 cp->c_bsdflags |= UF_COMPRESSED;
2310
2311 error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
2312 ap->a_context);
2313 }
2314 else {
2315 error = ESTALE;
2316 }
2317
2318 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2319 hfs_unlock(cp);
2320
2321 /*
2322 * Reset the decmp state while still holding the truncate lock. We need to
2323 * serialize here against a listxattr on this node which may occur at any
2324 * time.
2325 *
2326 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2327 * that will still potentially require getting the com.apple.decmpfs EA. If the
2328 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2329 * generic(through VFS), and can't pass along any info telling it that we're already
2330 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2331 * and trying to fill in the hfs_file_is_compressed info during the callback
2332 * operation, which will result in deadlock against the b-tree node.
2333 *
2334 * So, to serialize against listxattr (which will grab buf_t meta references on
2335 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2336 * decmpfs payload.
2337 */
2338 if ((reset_decmp) && (error == 0)) {
2339 decmpfs_cnode *dp = VTOCMP (vp);
2340 if (dp != NULL) {
2341 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2342 }
2343
2344 /* Initialize the decmpfs node as needed */
2345 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2346 }
2347
2348 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2349
2350 #endif
2351 return error;
2352 }
2353
2354 case F_SETBACKINGSTORE: {
2355
2356 int error = 0;
2357
2358 /*
2359 * See comment in F_SETSTATICCONTENT re: using
2360 * a null check for a_data
2361 */
2362 if (ap->a_data) {
2363 error = hfs_set_backingstore (vp, 1);
2364 }
2365 else {
2366 error = hfs_set_backingstore (vp, 0);
2367 }
2368
2369 return error;
2370 }
2371
2372 case F_GETPATH_MTMINFO: {
2373 int error = 0;
2374
2375 int *data = (int*) ap->a_data;
2376
2377 /* Ask if this is a backingstore vnode */
2378 error = hfs_is_backingstore (vp, data);
2379
2380 return error;
2381 }
2382
2383 case F_FULLFSYNC: {
2384 int error;
2385
2386 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2387 return (EROFS);
2388 }
2389 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2390 if (error == 0) {
2391 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p);
2392 hfs_unlock(VTOC(vp));
2393 }
2394
2395 return error;
2396 }
2397
2398 case F_BARRIERFSYNC: {
2399 int error;
2400
2401 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2402 return (EROFS);
2403 }
2404 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2405 if (error == 0) {
2406 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p);
2407 hfs_unlock(VTOC(vp));
2408 }
2409
2410 return error;
2411 }
2412
2413 case F_CHKCLEAN: {
2414 register struct cnode *cp;
2415 int error;
2416
2417 if (!vnode_isreg(vp))
2418 return EINVAL;
2419
2420 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2421 if (error == 0) {
2422 cp = VTOC(vp);
2423 /*
2424 * used by regression test to determine if
2425 * all the dirty pages (via write) have been cleaned
2426 * after a call to 'fsysnc'.
2427 */
2428 error = is_file_clean(vp, VTOF(vp)->ff_size);
2429 hfs_unlock(cp);
2430 }
2431 return (error);
2432 }
2433
2434 case F_RDADVISE: {
2435 register struct radvisory *ra;
2436 struct filefork *fp;
2437 int error;
2438
2439 if (!vnode_isreg(vp))
2440 return EINVAL;
2441
2442 ra = (struct radvisory *)(ap->a_data);
2443 fp = VTOF(vp);
2444
2445 /* Protect against a size change. */
2446 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2447
2448 #if HFS_COMPRESSION
2449 if (compressed) {
2450 if (uncompressed_size == -1) {
2451 /* fetching the uncompressed size failed above, so return the error */
2452 error = decmpfs_error;
2453 } else if (ra->ra_offset >= uncompressed_size) {
2454 error = EFBIG;
2455 } else {
2456 error = advisory_read(vp, uncompressed_size, ra->ra_offset, ra->ra_count);
2457 }
2458 } else
2459 #endif /* HFS_COMPRESSION */
2460 if (ra->ra_offset >= fp->ff_size) {
2461 error = EFBIG;
2462 } else {
2463 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
2464 }
2465
2466 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
2467 return (error);
2468 }
2469
2470 case HFSIOC_GET_VOL_CREATE_TIME_32: {
2471 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2472 return 0;
2473 }
2474
2475 case HFSIOC_GET_VOL_CREATE_TIME_64: {
2476 *(user64_time_t *)(ap->a_data) = (user64_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2477 return 0;
2478 }
2479
2480 case SPOTLIGHT_IOC_GET_MOUNT_TIME:
2481 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2482 break;
2483
2484 case SPOTLIGHT_IOC_GET_LAST_MTIME:
2485 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2486 break;
2487
2488 case HFSIOC_GET_VERY_LOW_DISK:
2489 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2490 break;
2491
2492 case HFSIOC_SET_VERY_LOW_DISK:
2493 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2494 return EINVAL;
2495 }
2496
2497 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2498 break;
2499
2500 case HFSIOC_GET_LOW_DISK:
2501 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2502 break;
2503
2504 case HFSIOC_SET_LOW_DISK:
2505 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2506 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2507
2508 return EINVAL;
2509 }
2510
2511 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2512 break;
2513
2514 /* The following two fsctls were ported from apfs. */
2515 case APFSIOC_GET_NEAR_LOW_DISK:
2516 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_nearwarninglimit;
2517 break;
2518
2519 case APFSIOC_SET_NEAR_LOW_DISK:
2520 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2521 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2522 return EINVAL;
2523 }
2524
2525 hfsmp->hfs_freespace_notify_nearwarninglimit = *(uint32_t *)ap->a_data;
2526 break;
2527
2528 case HFSIOC_GET_DESIRED_DISK:
2529 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2530 break;
2531
2532 case HFSIOC_SET_DESIRED_DISK:
2533 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2534 return EINVAL;
2535 }
2536
2537 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2538 break;
2539
2540 case HFSIOC_VOLUME_STATUS:
2541 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2542 break;
2543
2544 case HFS_SET_BOOT_INFO:
2545 if (!vnode_isvroot(vp))
2546 return(EINVAL);
2547 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2548 return(EACCES); /* must be superuser or owner of filesystem */
2549 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2550 return (EROFS);
2551 }
2552 hfs_lock_mount (hfsmp);
2553 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2554 /* Null out the cached UUID, to be safe */
2555 uuid_clear (hfsmp->hfs_full_uuid);
2556 hfs_unlock_mount (hfsmp);
2557 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
2558 break;
2559
2560 case HFS_GET_BOOT_INFO:
2561 if (!vnode_isvroot(vp))
2562 return(EINVAL);
2563 hfs_lock_mount (hfsmp);
2564 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2565 hfs_unlock_mount(hfsmp);
2566 break;
2567
2568 /* case HFS_MARK_BOOT_CORRUPT: _IO are the same */
2569 case HFSIOC_MARK_BOOT_CORRUPT:
2570 /* Mark the boot volume corrupt by setting
2571 * kHFSVolumeInconsistentBit in the volume header. This will
2572 * force fsck_hfs on next mount.
2573 */
2574 if (!kauth_cred_issuser(kauth_cred_get())) {
2575 return EACCES;
2576 }
2577
2578 /* Allowed only on the root vnode of the boot volume */
2579 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2580 !vnode_isvroot(vp)) {
2581 return EINVAL;
2582 }
2583 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2584 return (EROFS);
2585 }
2586 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2587 hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
2588 break;
2589
2590 case HFSIOC_GET_JOURNAL_INFO:
2591 jip = (struct hfs_journal_info*)ap->a_data;
2592
2593 if (vp == NULLVP)
2594 return EINVAL;
2595
2596 if (hfsmp->jnl == NULL) {
2597 jnl_start = 0;
2598 jnl_size = 0;
2599 } else {
2600 jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset;
2601 jnl_size = hfsmp->jnl_size;
2602 }
2603
2604 jip->jstart = jnl_start;
2605 jip->jsize = jnl_size;
2606 break;
2607
2608 case HFSIOC_SET_ALWAYS_ZEROFILL: {
2609 struct cnode *cp = VTOC(vp);
2610
2611 if (*(int *)ap->a_data) {
2612 cp->c_flag |= C_ALWAYS_ZEROFILL;
2613 } else {
2614 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2615 }
2616 break;
2617 }
2618
2619 /* case HFS_DISABLE_METAZONE: _IO are the same */
2620 case HFSIOC_DISABLE_METAZONE: {
2621 /* Only root can disable metadata zone */
2622 if (!kauth_cred_issuser(kauth_cred_get())) {
2623 return EACCES;
2624 }
2625 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2626 return (EROFS);
2627 }
2628
2629 /* Disable metadata zone now */
2630 (void) hfs_metadatazone_init(hfsmp, true);
2631 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2632 break;
2633 }
2634
2635
2636 case HFSIOC_FSINFO_METADATA_BLOCKS: {
2637 int error;
2638 struct hfsinfo_metadata *hinfo;
2639
2640 hinfo = (struct hfsinfo_metadata *)ap->a_data;
2641
2642 /* Get information about number of metadata blocks */
2643 error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
2644 if (error) {
2645 return error;
2646 }
2647
2648 break;
2649 }
2650
2651 case HFSIOC_GET_FSINFO: {
2652 hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
2653
2654 /* Only root is allowed to get fsinfo */
2655 if (!kauth_cred_issuser(kauth_cred_get())) {
2656 return EACCES;
2657 }
2658
2659 /*
2660 * Make sure that the caller's version number matches with
2661 * the kernel's version number. This will make sure that
2662 * if the structures being read/written into are changed
2663 * by the kernel, the caller will not read incorrect data.
2664 *
2665 * The first three fields --- request_type, version and
2666 * flags are same for all the hfs_fsinfo structures, so
2667 * we can access the version number by assuming any
2668 * structure for now.
2669 */
2670 if (fsinfo->header.version != HFS_FSINFO_VERSION) {
2671 return ENOTSUP;
2672 }
2673
2674 /* Make sure that the current file system is not marked inconsistent */
2675 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2676 return EIO;
2677 }
2678
2679 return hfs_get_fsinfo(hfsmp, ap->a_data);
2680 }
2681
2682 case HFSIOC_CS_FREESPACE_TRIM: {
2683 int error = 0;
2684 int lockflags = 0;
2685
2686 /* Only root allowed */
2687 if (!kauth_cred_issuser(kauth_cred_get())) {
2688 return EACCES;
2689 }
2690
2691 /*
2692 * This core functionality is similar to hfs_scan_blocks().
2693 * The main difference is that hfs_scan_blocks() is called
2694 * as part of mount where we are assured that the journal is
2695 * empty to start with. This fcntl() can be called on a
2696 * mounted volume, therefore it has to flush the content of
2697 * the journal as well as ensure the state of summary table.
2698 *
2699 * This fcntl scans over the entire allocation bitmap,
2700 * creates list of all the free blocks, and issues TRIM
2701 * down to the underlying device. This can take long time
2702 * as it can generate up to 512MB of read I/O.
2703 */
2704
2705 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
2706 error = hfs_init_summary(hfsmp);
2707 if (error) {
2708 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
2709 return error;
2710 }
2711 }
2712
2713 /*
2714 * The journal maintains list of recently deallocated blocks to
2715 * issue DKIOCUNMAPs when the corresponding journal transaction is
2716 * flushed to the disk. To avoid any race conditions, we only
2717 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2718 * Therefore we make sure that the journal trim list is sync'ed,
2719 * empty, and not modifiable for the duration of our scan.
2720 *
2721 * Take the journal lock before flushing the journal to the disk.
2722 * We will keep on holding the journal lock till we don't get the
2723 * bitmap lock to make sure that no new journal transactions can
2724 * start. This will make sure that the journal trim list is not
2725 * modified after the journal flush and before getting bitmap lock.
2726 * We can release the journal lock after we acquire the bitmap
2727 * lock as it will prevent any further block deallocations.
2728 */
2729 hfs_journal_lock(hfsmp);
2730
2731 /* Flush the journal and wait for all I/Os to finish up */
2732 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
2733 if (error) {
2734 hfs_journal_unlock(hfsmp);
2735 return error;
2736 }
2737
2738 /* Take bitmap lock to ensure it is not being modified */
2739 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2740
2741 /* Release the journal lock */
2742 hfs_journal_unlock(hfsmp);
2743
2744 /*
2745 * ScanUnmapBlocks reads the bitmap in large block size
2746 * (up to 1MB) unlike the runtime which reads the bitmap
2747 * in the 4K block size. This can cause buf_t collisions
2748 * and potential data corruption. To avoid this, we
2749 * invalidate all the existing buffers associated with
2750 * the bitmap vnode before scanning it.
2751 *
2752 * Note: ScanUnmapBlock() cleans up all the buffers
2753 * after itself, so there won't be any large buffers left
2754 * for us to clean up after it returns.
2755 */
2756 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
2757 if (error) {
2758 hfs_systemfile_unlock(hfsmp, lockflags);
2759 return error;
2760 }
2761
2762 /* Traverse bitmap and issue DKIOCUNMAPs */
2763 error = ScanUnmapBlocks(hfsmp);
2764 hfs_systemfile_unlock(hfsmp, lockflags);
2765 if (error) {
2766 return error;
2767 }
2768
2769 break;
2770 }
2771
2772 case HFSIOC_SET_HOTFILE_STATE: {
2773 int error;
2774 struct cnode *cp = VTOC(vp);
2775 uint32_t hf_state = *((uint32_t*)ap->a_data);
2776 uint32_t num_unpinned = 0;
2777
2778 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2779 if (error) {
2780 return error;
2781 }
2782
2783 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2784 if (hf_state == HFS_MARK_FASTDEVCANDIDATE) {
2785 vnode_setfastdevicecandidate(vp);
2786
2787 cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
2788 cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask;
2789 cp->c_flag |= C_MODIFIED;
2790 } else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2791 vnode_clearfastdevicecandidate(vp);
2792 hfs_removehotfile(vp);
2793
2794 if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) {
2795 hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned);
2796 }
2797
2798 if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2799 cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
2800 }
2801 cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask);
2802 cp->c_flag |= C_MODIFIED;
2803
2804 } else {
2805 error = EINVAL;
2806 }
2807
2808 if (num_unpinned != 0) {
2809 lck_mtx_lock(&hfsmp->hfc_mutex);
2810 hfsmp->hfs_hotfile_freeblks += num_unpinned;
2811 lck_mtx_unlock(&hfsmp->hfc_mutex);
2812 }
2813
2814 hfs_unlock(cp);
2815 return error;
2816 }
2817
2818 case HFSIOC_REPIN_HOTFILE_STATE: {
2819 int error=0;
2820 uint32_t repin_what = *((uint32_t*)ap->a_data);
2821
2822 /* Only root allowed */
2823 if (!kauth_cred_issuser(kauth_cred_get())) {
2824 return EACCES;
2825 }
2826
2827 if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) {
2828 // this system is neither regular Fusion or Cooperative Fusion
2829 // so this fsctl makes no sense.
2830 return EINVAL;
2831 }
2832
2833 //
2834 // After a converting a CoreStorage volume to be encrypted, the
2835 // extents could have moved around underneath us. This call
2836 // allows corestoraged to re-pin everything that should be
2837 // pinned (it would happen on the next reboot too but that could
2838 // be a long time away).
2839 //
2840 if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) {
2841 hfs_pin_fs_metadata(hfsmp);
2842 }
2843 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
2844 hfs_repin_hotfiles(hfsmp);
2845 }
2846 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) {
2847 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2848 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2849 }
2850
2851 return error;
2852 }
2853
2854 #if HFS_CONFIG_KEY_ROLL
2855
2856 case HFSIOC_KEY_ROLL: {
2857 if (!kauth_cred_issuser(kauth_cred_get()))
2858 return EACCES;
2859
2860 hfs_key_roll_args_t *args = (hfs_key_roll_args_t *)ap->a_data;
2861
2862 return hfs_key_roll_op(ap->a_context, ap->a_vp, args);
2863 }
2864
2865 case HFSIOC_GET_KEY_AUTO_ROLL: {
2866 if (!kauth_cred_issuser(kauth_cred_get()))
2867 return EACCES;
2868
2869 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2870 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2871 return ENOTSUP;
2872 args->flags = (ISSET(hfsmp->cproot_flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION)
2873 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION : 0);
2874 args->min_key_os_version = hfsmp->hfs_auto_roll_min_key_os_version;
2875 args->max_key_os_version = hfsmp->hfs_auto_roll_max_key_os_version;
2876 break;
2877 }
2878
2879 case HFSIOC_SET_KEY_AUTO_ROLL: {
2880 if (!kauth_cred_issuser(kauth_cred_get()))
2881 return EACCES;
2882
2883 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2884 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2885 return ENOTSUP;
2886 return cp_set_auto_roll(hfsmp, args);
2887 }
2888
2889 #endif // HFS_CONFIG_KEY_ROLL
2890
2891 #if CONFIG_PROTECT
2892 case F_TRANSCODEKEY:
2893 /*
2894 * This API is only supported when called via kernel so
2895 * a_fflag must be set to 1 (it's not possible to get here
2896 * with it set to 1 via fsctl).
2897 */
2898 if (ap->a_fflag != 1)
2899 return ENOTTY;
2900 return cp_vnode_transcode(vp, (cp_key_t *)ap->a_data);
2901
2902 case F_GETPROTECTIONLEVEL:
2903 return cp_get_root_major_vers (vp, (uint32_t *)ap->a_data);
2904
2905 case F_GETDEFAULTPROTLEVEL:
2906 return cp_get_default_level(vp, (uint32_t *)ap->a_data);
2907 #endif // CONFIG_PROTECT
2908
2909 case FIOPINSWAP:
2910 return hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT | HFS_DATALESS_PIN,
2911 NULL);
2912
2913 case FSIOC_CAS_BSDFLAGS: {
2914 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2915 return (EROFS);
2916 }
2917
2918 #if 0
2919 struct fsioc_cas_bsdflags *cas = (void *)ap->a_data;
2920 struct cnode *cp = VTOC(vp);
2921 u_int32_t document_id = 0;
2922 int decmpfs_reset_state = 0;
2923 int error;
2924
2925 /* Don't allow modification of the journal. */
2926 if (hfs_is_journal_file(hfsmp, cp)) {
2927 return (EPERM);
2928 }
2929
2930 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2931 return (error);
2932 }
2933
2934 cas->actual_flags = cp->c_bsdflags;
2935 if (cas->actual_flags != cas->expected_flags) {
2936 hfs_unlock(cp);
2937 return (0);
2938 }
2939
2940 //
2941 // Check if we'll need a document_id. If so, we need to drop the lock
2942 // (to avoid any possible deadlock with the root vnode which has to get
2943 // locked to get the document id), generate the document_id, re-acquire
2944 // the lock, and perform the CAS check again. We do it in this sequence
2945 // in order to avoid throwing away document_ids in the case where the
2946 // CAS check fails. Note that it can still happen, but by performing
2947 // the check first, hopefully we can reduce the ocurrence.
2948 //
2949 if ((cas->new_flags & UF_TRACKED) && !(VTOC(vp)->c_bsdflags & UF_TRACKED)) {
2950 struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&(VTOC(vp)->c_attr.ca_finderinfo) + 16);
2951 //
2952 // If the document_id is not set, get a new one. It will be set
2953 // on the file down below once we hold the cnode lock.
2954 //
2955 if (fip->document_id == 0) {
2956 //
2957 // Drat, we have to generate one. Unlock the cnode, do the
2958 // deed, re-lock the cnode, and then to the CAS check again
2959 // to see if we lost the race.
2960 //
2961 hfs_unlock(cp);
2962 if (hfs_generate_document_id(hfsmp, &document_id) != 0) {
2963 document_id = 0;
2964 }
2965 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2966 return (error);
2967 }
2968 cas->actual_flags = cp->c_bsdflags;
2969 if (cas->actual_flags != cas->expected_flags) {
2970 hfs_unlock(cp);
2971 return (0);
2972 }
2973 }
2974 }
2975
2976 bool setting_compression = false;
2977
2978 if (!(cas->actual_flags & UF_COMPRESSED) && (cas->new_flags & UF_COMPRESSED))
2979 setting_compression = true;
2980
2981 if (setting_compression) {
2982 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2983 if (VTOF(vp)->ff_size) {
2984 // hfs_truncate will deal with the cnode lock
2985 error = hfs_truncate(vp, 0, IO_NDELAY, 0, ap->a_context);
2986 }
2987 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2988 }
2989
2990 if (!error)
2991 error = hfs_set_bsd_flags(hfsmp, cp, cas->new_flags,
2992 document_id, ap->a_context,
2993 &decmpfs_reset_state);
2994 if (error == 0) {
2995 error = hfs_update(vp, 0);
2996 }
2997 hfs_unlock(cp);
2998 if (error) {
2999 return (error);
3000 }
3001
3002 #if HFS_COMPRESSION
3003 if (decmpfs_reset_state) {
3004 /*
3005 * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode
3006 * but don't do it while holding the hfs cnode lock
3007 */
3008 decmpfs_cnode *dp = VTOCMP(vp);
3009 if (!dp) {
3010 /*
3011 * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode
3012 * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes
3013 * on this file if it's locked
3014 */
3015 dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp));
3016 if (!dp) {
3017 /* failed to allocate a decmpfs_cnode */
3018 return ENOMEM; /* what should this be? */
3019 }
3020 }
3021 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
3022 }
3023 #endif
3024 break;
3025 #endif
3026 return ENOTSUP;
3027 }
3028
3029 default:
3030 return (ENOTTY);
3031 }
3032
3033 return 0;
3034 }
3035
3036 /*
3037 * select
3038 */
3039 int
3040 hfs_vnop_select(__unused struct vnop_select_args *ap)
3041 /*
3042 struct vnop_select_args {
3043 vnode_t a_vp;
3044 int a_which;
3045 int a_fflags;
3046 void *a_wql;
3047 vfs_context_t a_context;
3048 };
3049 */
3050 {
3051 /*
3052 * We should really check to see if I/O is possible.
3053 */
3054 return (1);
3055 }
3056
3057 /*
3058 * Converts a logical block number to a physical block, and optionally returns
3059 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
3060 * The physical block number is based on the device block size, currently its 512.
3061 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
3062 */
3063 int
3064 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
3065 {
3066 struct filefork *fp = VTOF(vp);
3067 struct hfsmount *hfsmp = VTOHFS(vp);
3068 int retval = E_NONE;
3069 u_int32_t logBlockSize;
3070 size_t bytesContAvail = 0;
3071 off_t blockposition;
3072 int lockExtBtree;
3073 int lockflags = 0;
3074
3075 /*
3076 * Check for underlying vnode requests and ensure that logical
3077 * to physical mapping is requested.
3078 */
3079 if (vpp != NULL)
3080 *vpp = hfsmp->hfs_devvp;
3081 if (bnp == NULL)
3082 return (0);
3083
3084 logBlockSize = GetLogicalBlockSize(vp);
3085 blockposition = (off_t)bn * logBlockSize;
3086
3087 lockExtBtree = overflow_extents(fp);
3088
3089 if (lockExtBtree)
3090 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
3091
3092 retval = MacToVFSError(
3093 MapFileBlockC (HFSTOVCB(hfsmp),
3094 (FCB*)fp,
3095 MAXPHYSIO,
3096 blockposition,
3097 bnp,
3098 &bytesContAvail));
3099
3100 if (lockExtBtree)
3101 hfs_systemfile_unlock(hfsmp, lockflags);
3102
3103 if (retval == E_NONE) {
3104 /* Figure out how many read ahead blocks there are */
3105 if (runp != NULL) {
3106 if (can_cluster(logBlockSize)) {
3107 /* Make sure this result never goes negative: */
3108 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
3109 } else {
3110 *runp = 0;
3111 }
3112 }
3113 }
3114 return (retval);
3115 }
3116
3117 /*
3118 * Convert logical block number to file offset.
3119 */
3120 int
3121 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
3122 /*
3123 struct vnop_blktooff_args {
3124 vnode_t a_vp;
3125 daddr64_t a_lblkno;
3126 off_t *a_offset;
3127 };
3128 */
3129 {
3130 if (ap->a_vp == NULL)
3131 return (EINVAL);
3132 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
3133
3134 return(0);
3135 }
3136
3137 /*
3138 * Convert file offset to logical block number.
3139 */
3140 int
3141 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
3142 /*
3143 struct vnop_offtoblk_args {
3144 vnode_t a_vp;
3145 off_t a_offset;
3146 daddr64_t *a_lblkno;
3147 };
3148 */
3149 {
3150 if (ap->a_vp == NULL)
3151 return (EINVAL);
3152 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
3153
3154 return(0);
3155 }
3156
3157 /*
3158 * Map file offset to physical block number.
3159 *
3160 * If this function is called for write operation, and if the file
3161 * had virtual blocks allocated (delayed allocation), real blocks
3162 * are allocated by calling ExtendFileC().
3163 *
3164 * If this function is called for read operation, and if the file
3165 * had virtual blocks allocated (delayed allocation), no change
3166 * to the size of file is done, and if required, rangelist is
3167 * searched for mapping.
3168 *
3169 * System file cnodes are expected to be locked (shared or exclusive).
3170 *
3171 * -- INVALID RANGES --
3172 *
3173 * Invalid ranges are used to keep track of where we have extended a
3174 * file, but have not yet written that data to disk. In the past we
3175 * would clear up the invalid ranges as we wrote to those areas, but
3176 * before data was actually flushed to disk. The problem with that
3177 * approach is that the data can be left in the cache and is therefore
3178 * still not valid on disk. So now we clear up the ranges here, when
3179 * the flags field has VNODE_WRITE set, indicating a write is about to
3180 * occur. This isn't ideal (ideally we want to clear them up when
3181 * know the data has been successfully written), but it's the best we
3182 * can do.
3183 *
3184 * For reads, we use the invalid ranges here in block map to indicate
3185 * to the caller that the data should be zeroed (a_bpn == -1). We
3186 * have to be careful about what ranges we return to the cluster code.
3187 * Currently the cluster code can only handle non-rounded values for
3188 * the EOF; it cannot handle funny sized ranges in the middle of the
3189 * file (the main problem is that it sends down odd sized I/Os to the
3190 * disk). Our code currently works because whilst the very first
3191 * offset and the last offset in the invalid ranges are not aligned,
3192 * gaps in the invalid ranges between the first and last, have to be
3193 * aligned (because we always write page sized blocks). For example,
3194 * consider this arrangement:
3195 *
3196 * +-------------+-----+-------+------+
3197 * | |XXXXX| |XXXXXX|
3198 * +-------------+-----+-------+------+
3199 * a b c d
3200 *
3201 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3202 * are not necessarily aligned, b and c *must* be.
3203 *
3204 * Zero-filling occurs in a number of ways:
3205 *
3206 * 1. When a read occurs and we return with a_bpn == -1.
3207 *
3208 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3209 * which will cause us to iterate over the ranges bringing in
3210 * pages that are not present in the cache and zeroing them. Any
3211 * pages that are already in the cache are left untouched. Note
3212 * that hfs_fsync does not always flush invalid ranges.
3213 *
3214 * 3. When we extend a file we zero out from the old EOF to the end
3215 * of the page. It would be nice if we didn't have to do this if
3216 * the page wasn't present (and could defer it), but because of
3217 * the problem described above, we have to.
3218 *
3219 * The invalid ranges are also used to restrict the size that we write
3220 * out on disk: see hfs_prepare_fork_for_update.
3221 *
3222 * Note that invalid ranges are ignored when neither the VNODE_READ or
3223 * the VNODE_WRITE flag is specified. This is useful for the
3224 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3225 * just want to know whether blocks are physically allocated or not.
3226 */
3227 int
3228 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
3229 /*
3230 struct vnop_blockmap_args {
3231 vnode_t a_vp;
3232 off_t a_foffset;
3233 size_t a_size;
3234 daddr64_t *a_bpn;
3235 size_t *a_run;
3236 void *a_poff;
3237 int a_flags;
3238 vfs_context_t a_context;
3239 };
3240 */
3241 {
3242 struct vnode *vp = ap->a_vp;
3243 struct cnode *cp;
3244 struct filefork *fp;
3245 struct hfsmount *hfsmp;
3246 size_t bytesContAvail = ap->a_size;
3247 int retval = E_NONE;
3248 int syslocks = 0;
3249 int lockflags = 0;
3250 struct rl_entry *invalid_range;
3251 enum rl_overlaptype overlaptype;
3252 int started_tr = 0;
3253 int tooklock = 0;
3254
3255 #if HFS_COMPRESSION
3256 if (VNODE_IS_RSRC(vp)) {
3257 /* allow blockmaps to the resource fork */
3258 } else {
3259 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
3260 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
3261 switch(state) {
3262 case FILE_IS_COMPRESSED:
3263 return ENOTSUP;
3264 case FILE_IS_CONVERTING:
3265 /* if FILE_IS_CONVERTING, we allow blockmap */
3266 break;
3267 default:
3268 printf("invalid state %d for compressed file\n", state);
3269 /* fall through */
3270 }
3271 }
3272 }
3273 #endif /* HFS_COMPRESSION */
3274
3275 /* Do not allow blockmap operation on a directory */
3276 if (vnode_isdir(vp)) {
3277 return (ENOTSUP);
3278 }
3279
3280 /*
3281 * Check for underlying vnode requests and ensure that logical
3282 * to physical mapping is requested.
3283 */
3284 if (ap->a_bpn == NULL)
3285 return (0);
3286
3287 hfsmp = VTOHFS(vp);
3288 cp = VTOC(vp);
3289 fp = VTOF(vp);
3290
3291 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
3292 if (cp->c_lockowner != current_thread()) {
3293 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3294 tooklock = 1;
3295 }
3296
3297 // For reads, check the invalid ranges
3298 if (ISSET(ap->a_flags, VNODE_READ)) {
3299 if (ap->a_foffset >= fp->ff_size) {
3300 retval = ERANGE;
3301 goto exit;
3302 }
3303
3304 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
3305 ap->a_foffset + (off_t)bytesContAvail - 1,
3306 &invalid_range);
3307 switch(overlaptype) {
3308 case RL_MATCHINGOVERLAP:
3309 case RL_OVERLAPCONTAINSRANGE:
3310 case RL_OVERLAPSTARTSBEFORE:
3311 /* There's no valid block for this byte offset */
3312 *ap->a_bpn = (daddr64_t)-1;
3313 /* There's no point limiting the amount to be returned
3314 * if the invalid range that was hit extends all the way
3315 * to the EOF (i.e. there's no valid bytes between the
3316 * end of this range and the file's EOF):
3317 */
3318 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3319 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3320 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3321 }
3322
3323 retval = 0;
3324 goto exit;
3325
3326 case RL_OVERLAPISCONTAINED:
3327 case RL_OVERLAPENDSAFTER:
3328 /* The range of interest hits an invalid block before the end: */
3329 if (invalid_range->rl_start == ap->a_foffset) {
3330 /* There's actually no valid information to be had starting here: */
3331 *ap->a_bpn = (daddr64_t)-1;
3332 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3333 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3334 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3335 }
3336
3337 retval = 0;
3338 goto exit;
3339 } else {
3340 /*
3341 * Sadly, the lower layers don't like us to
3342 * return unaligned ranges, so we skip over
3343 * any invalid ranges here that are less than
3344 * a page: zeroing of those bits is not our
3345 * responsibility (it's dealt with elsewhere).
3346 */
3347 do {
3348 off_t rounded_start = round_page_64(invalid_range->rl_start);
3349 if ((off_t)bytesContAvail < rounded_start - ap->a_foffset)
3350 break;
3351 if (rounded_start < invalid_range->rl_end + 1) {
3352 bytesContAvail = rounded_start - ap->a_foffset;
3353 break;
3354 }
3355 } while ((invalid_range = TAILQ_NEXT(invalid_range,
3356 rl_link)));
3357 }
3358 break;
3359
3360 case RL_NOOVERLAP:
3361 break;
3362 } // switch
3363 }
3364 }
3365
3366 #if CONFIG_PROTECT
3367 if (cp->c_cpentry) {
3368 const int direction = (ISSET(ap->a_flags, VNODE_WRITE)
3369 ? VNODE_WRITE : VNODE_READ);
3370
3371 cp_io_params_t io_params;
3372 cp_io_params(hfsmp, cp->c_cpentry,
3373 off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)),
3374 direction, &io_params);
3375
3376 if (io_params.max_len < (off_t)bytesContAvail)
3377 bytesContAvail = io_params.max_len;
3378
3379 if (io_params.phys_offset != -1) {
3380 *ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset)
3381 / hfsmp->hfs_logical_block_size);
3382
3383 retval = 0;
3384 goto exit;
3385 }
3386 }
3387 #endif
3388
3389 retry:
3390
3391 /* Check virtual blocks only when performing write operation */
3392 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3393 if (hfs_start_transaction(hfsmp) != 0) {
3394 retval = EINVAL;
3395 goto exit;
3396 } else {
3397 started_tr = 1;
3398 }
3399 syslocks = SFL_EXTENTS | SFL_BITMAP;
3400
3401 } else if (overflow_extents(fp)) {
3402 syslocks = SFL_EXTENTS;
3403 }
3404
3405 if (syslocks)
3406 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
3407
3408 /*
3409 * Check for any delayed allocations.
3410 */
3411 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3412 int64_t actbytes;
3413 u_int32_t loanedBlocks;
3414
3415 //
3416 // Make sure we have a transaction. It's possible
3417 // that we came in and fp->ff_unallocblocks was zero
3418 // but during the time we blocked acquiring the extents
3419 // btree, ff_unallocblocks became non-zero and so we
3420 // will need to start a transaction.
3421 //
3422 if (started_tr == 0) {
3423 if (syslocks) {
3424 hfs_systemfile_unlock(hfsmp, lockflags);
3425 syslocks = 0;
3426 }
3427 goto retry;
3428 }
3429
3430 /*
3431 * Note: ExtendFileC will Release any blocks on loan and
3432 * aquire real blocks. So we ask to extend by zero bytes
3433 * since ExtendFileC will account for the virtual blocks.
3434 */
3435
3436 loanedBlocks = fp->ff_unallocblocks;
3437 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
3438 kEFAllMask | kEFNoClumpMask, &actbytes);
3439
3440 if (retval) {
3441 fp->ff_unallocblocks = loanedBlocks;
3442 cp->c_blocks += loanedBlocks;
3443 fp->ff_blocks += loanedBlocks;
3444
3445 hfs_lock_mount (hfsmp);
3446 hfsmp->loanedBlocks += loanedBlocks;
3447 hfs_unlock_mount (hfsmp);
3448
3449 hfs_systemfile_unlock(hfsmp, lockflags);
3450 cp->c_flag |= C_MODIFIED;
3451 if (started_tr) {
3452 (void) hfs_update(vp, 0);
3453 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3454
3455 hfs_end_transaction(hfsmp);
3456 started_tr = 0;
3457 }
3458 goto exit;
3459 }
3460 }
3461
3462 retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
3463 ap->a_bpn, &bytesContAvail);
3464 if (syslocks) {
3465 hfs_systemfile_unlock(hfsmp, lockflags);
3466 syslocks = 0;
3467 }
3468
3469 if (retval) {
3470 /* On write, always return error because virtual blocks, if any,
3471 * should have been allocated in ExtendFileC(). We do not
3472 * allocate virtual blocks on read, therefore return error
3473 * only if no virtual blocks are allocated. Otherwise we search
3474 * rangelist for zero-fills
3475 */
3476 if ((MacToVFSError(retval) != ERANGE) ||
3477 (ap->a_flags & VNODE_WRITE) ||
3478 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
3479 goto exit;
3480 }
3481
3482 /* Validate if the start offset is within logical file size */
3483 if (ap->a_foffset >= fp->ff_size) {
3484 goto exit;
3485 }
3486
3487 /*
3488 * At this point, we have encountered a failure during
3489 * MapFileBlockC that resulted in ERANGE, and we are not
3490 * servicing a write, and there are borrowed blocks.
3491 *
3492 * However, the cluster layer will not call blockmap for
3493 * blocks that are borrowed and in-cache. We have to assume
3494 * that because we observed ERANGE being emitted from
3495 * MapFileBlockC, this extent range is not valid on-disk. So
3496 * we treat this as a mapping that needs to be zero-filled
3497 * prior to reading.
3498 */
3499
3500 if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail)
3501 bytesContAvail = fp->ff_size - ap->a_foffset;
3502
3503 *ap->a_bpn = (daddr64_t) -1;
3504 retval = 0;
3505
3506 goto exit;
3507 }
3508
3509 exit:
3510 if (retval == 0) {
3511 if (ISSET(ap->a_flags, VNODE_WRITE)) {
3512 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
3513
3514 // See if we might be overlapping invalid ranges...
3515 if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) {
3516 /*
3517 * Mark the file as needing an update if we think the
3518 * on-disk EOF has changed.
3519 */
3520 if (ap->a_foffset <= r->rl_start)
3521 SET(cp->c_flag, C_MODIFIED);
3522
3523 /*
3524 * This isn't the ideal place to put this. Ideally, we
3525 * should do something *after* we have successfully
3526 * written to the range, but that's difficult to do
3527 * because we cannot take locks in the callback. At
3528 * present, the cluster code will call us with VNODE_WRITE
3529 * set just before it's about to write the data so we know
3530 * that data is about to be written. If we get an I/O
3531 * error at this point then chances are the metadata
3532 * update to follow will also have an I/O error so the
3533 * risk here is small.
3534 */
3535 rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1,
3536 &fp->ff_invalidranges);
3537
3538 if (!TAILQ_FIRST(&fp->ff_invalidranges)) {
3539 cp->c_flag &= ~C_ZFWANTSYNC;
3540 cp->c_zftimeout = 0;
3541 }
3542 }
3543 }
3544
3545 if (ap->a_run)
3546 *ap->a_run = bytesContAvail;
3547
3548 if (ap->a_poff)
3549 *(int *)ap->a_poff = 0;
3550 }
3551
3552 if (started_tr) {
3553 hfs_update(vp, TRUE);
3554 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3555 hfs_end_transaction(hfsmp);
3556 started_tr = 0;
3557 }
3558
3559 if (tooklock)
3560 hfs_unlock(cp);
3561
3562 return (MacToVFSError(retval));
3563 }
3564
3565 /*
3566 * prepare and issue the I/O
3567 * buf_strategy knows how to deal
3568 * with requests that require
3569 * fragmented I/Os
3570 */
3571 int
3572 hfs_vnop_strategy(struct vnop_strategy_args *ap)
3573 {
3574 buf_t bp = ap->a_bp;
3575 vnode_t vp = buf_vnode(bp);
3576 int error = 0;
3577
3578 /* Mark buffer as containing static data if cnode flag set */
3579 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
3580 buf_markstatic(bp);
3581 }
3582
3583 /* Mark buffer as containing static data if cnode flag set */
3584 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
3585 bufattr_markgreedymode(buf_attr(bp));
3586 }
3587
3588 /* mark buffer as containing burst mode data if cnode flag set */
3589 if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
3590 bufattr_markisochronous(buf_attr(bp));
3591 }
3592
3593 #if CONFIG_PROTECT
3594 error = cp_handle_strategy(bp);
3595
3596 if (error)
3597 return error;
3598 #endif
3599
3600 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
3601
3602 return error;
3603 }
3604
3605 int
3606 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
3607 {
3608 register struct cnode *cp = VTOC(vp);
3609 struct filefork *fp = VTOF(vp);
3610 kauth_cred_t cred = vfs_context_ucred(context);
3611 int retval;
3612 off_t bytesToAdd;
3613 off_t actualBytesAdded;
3614 off_t filebytes;
3615 u_int32_t fileblocks;
3616 int blksize;
3617 struct hfsmount *hfsmp;
3618 int lockflags;
3619 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
3620
3621 blksize = VTOVCB(vp)->blockSize;
3622 fileblocks = fp->ff_blocks;
3623 filebytes = (off_t)fileblocks * (off_t)blksize;
3624
3625 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
3626 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3627
3628 if (length < 0)
3629 return (EINVAL);
3630
3631 /* This should only happen with a corrupt filesystem */
3632 if ((off_t)fp->ff_size < 0)
3633 return (EINVAL);
3634
3635 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3636 return (EFBIG);
3637
3638 hfsmp = VTOHFS(vp);
3639
3640 retval = E_NONE;
3641
3642 /* Files that are changing size are not hot file candidates. */
3643 if (hfsmp->hfc_stage == HFC_RECORDING) {
3644 fp->ff_bytesread = 0;
3645 }
3646
3647 /*
3648 * We cannot just check if fp->ff_size == length (as an optimization)
3649 * since there may be extra physical blocks that also need truncation.
3650 */
3651 #if QUOTA
3652 if ((retval = hfs_getinoquota(cp)))
3653 return(retval);
3654 #endif /* QUOTA */
3655
3656 /*
3657 * Lengthen the size of the file. We must ensure that the
3658 * last byte of the file is allocated. Since the smallest
3659 * value of ff_size is 0, length will be at least 1.
3660 */
3661 if (length > (off_t)fp->ff_size) {
3662 #if QUOTA
3663 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
3664 cred, 0);
3665 if (retval)
3666 goto Err_Exit;
3667 #endif /* QUOTA */
3668 /*
3669 * If we don't have enough physical space then
3670 * we need to extend the physical size.
3671 */
3672 if (length > filebytes) {
3673 int eflags;
3674 u_int32_t blockHint = 0;
3675
3676 /* All or nothing and don't round up to clumpsize. */
3677 eflags = kEFAllMask | kEFNoClumpMask;
3678
3679 if (cred && (suser(cred, NULL) != 0)) {
3680 eflags |= kEFReserveMask; /* keep a reserve */
3681 }
3682
3683 /*
3684 * Allocate Journal and Quota files in metadata zone.
3685 */
3686 if (filebytes == 0 &&
3687 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3688 hfs_virtualmetafile(cp)) {
3689 eflags |= kEFMetadataMask;
3690 blockHint = hfsmp->hfs_metazone_start;
3691 }
3692 if (hfs_start_transaction(hfsmp) != 0) {
3693 retval = EINVAL;
3694 goto Err_Exit;
3695 }
3696
3697 /* Protect extents b-tree and allocation bitmap */
3698 lockflags = SFL_BITMAP;
3699 if (overflow_extents(fp))
3700 lockflags |= SFL_EXTENTS;
3701 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3702
3703 /*
3704 * Keep growing the file as long as the current EOF is
3705 * less than the desired value.
3706 */
3707 while ((length > filebytes) && (retval == E_NONE)) {
3708 bytesToAdd = length - filebytes;
3709 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3710 (FCB*)fp,
3711 bytesToAdd,
3712 blockHint,
3713 eflags,
3714 &actualBytesAdded));
3715
3716 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3717 if (actualBytesAdded == 0 && retval == E_NONE) {
3718 if (length > filebytes)
3719 length = filebytes;
3720 break;
3721 }
3722 } /* endwhile */
3723
3724 hfs_systemfile_unlock(hfsmp, lockflags);
3725
3726 if (hfsmp->jnl) {
3727 hfs_update(vp, 0);
3728 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3729 }
3730
3731 hfs_end_transaction(hfsmp);
3732
3733 if (retval)
3734 goto Err_Exit;
3735
3736 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3737 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3738 }
3739
3740 if (ISSET(flags, IO_NOZEROFILL)) {
3741 // An optimisation for the hibernation file
3742 if (vnode_isswap(vp))
3743 rl_remove_all(&fp->ff_invalidranges);
3744 } else {
3745 if (!vnode_issystem(vp) && retval == E_NONE) {
3746 if (length > (off_t)fp->ff_size) {
3747 struct timeval tv;
3748
3749 /* Extending the file: time to fill out the current last page w. zeroes? */
3750 if (fp->ff_size & PAGE_MASK_64) {
3751 /* There might be some valid data at the start of the (current) last page
3752 of the file, so zero out the remainder of that page to ensure the
3753 entire page contains valid data. */
3754 hfs_unlock(cp);
3755 retval = hfs_zero_eof_page(vp, length);
3756 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3757 if (retval) goto Err_Exit;
3758 }
3759 microuptime(&tv);
3760 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
3761 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3762 }
3763 } else {
3764 panic("hfs_truncate: invoked on non-UBC object?!");
3765 };
3766 }
3767 if (suppress_times == 0) {
3768 cp->c_touch_modtime = TRUE;
3769 }
3770 fp->ff_size = length;
3771
3772 } else { /* Shorten the size of the file */
3773
3774 // An optimisation for the hibernation file
3775 if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
3776 rl_remove_all(&fp->ff_invalidranges);
3777 } else if ((off_t)fp->ff_size > length) {
3778 /* Any space previously marked as invalid is now irrelevant: */
3779 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3780 }
3781
3782 /*
3783 * Account for any unmapped blocks. Note that the new
3784 * file length can still end up with unmapped blocks.
3785 */
3786 if (fp->ff_unallocblocks > 0) {
3787 u_int32_t finalblks;
3788 u_int32_t loanedBlocks;
3789
3790 hfs_lock_mount(hfsmp);
3791 loanedBlocks = fp->ff_unallocblocks;
3792 cp->c_blocks -= loanedBlocks;
3793 fp->ff_blocks -= loanedBlocks;
3794 fp->ff_unallocblocks = 0;
3795
3796 hfsmp->loanedBlocks -= loanedBlocks;
3797
3798 finalblks = (length + blksize - 1) / blksize;
3799 if (finalblks > fp->ff_blocks) {
3800 /* calculate required unmapped blocks */
3801 loanedBlocks = finalblks - fp->ff_blocks;
3802 hfsmp->loanedBlocks += loanedBlocks;
3803
3804 fp->ff_unallocblocks = loanedBlocks;
3805 cp->c_blocks += loanedBlocks;
3806 fp->ff_blocks += loanedBlocks;
3807 }
3808 hfs_unlock_mount (hfsmp);
3809 }
3810
3811 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3812 if (hfs_start_transaction(hfsmp) != 0) {
3813 retval = EINVAL;
3814 goto Err_Exit;
3815 }
3816
3817 if (fp->ff_unallocblocks == 0) {
3818 /* Protect extents b-tree and allocation bitmap */
3819 lockflags = SFL_BITMAP;
3820 if (overflow_extents(fp))
3821 lockflags |= SFL_EXTENTS;
3822 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3823
3824 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3825 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
3826
3827 hfs_systemfile_unlock(hfsmp, lockflags);
3828 }
3829 if (hfsmp->jnl) {
3830 if (retval == 0) {
3831 fp->ff_size = length;
3832 }
3833 hfs_update(vp, 0);
3834 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3835 }
3836 hfs_end_transaction(hfsmp);
3837
3838 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3839 if (retval)
3840 goto Err_Exit;
3841 #if QUOTA
3842 /* These are bytesreleased */
3843 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3844 #endif /* QUOTA */
3845
3846 //
3847 // Unlike when growing a file, we adjust the hotfile block count here
3848 // instead of deeper down in the block allocation code because we do
3849 // not necessarily have a vnode or "fcb" at the time we're deleting
3850 // the file and so we wouldn't know if it was hotfile cached or not
3851 //
3852 hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize));
3853
3854
3855 /*
3856 * Only set update flag if the logical length changes & we aren't
3857 * suppressing modtime updates.
3858 */
3859 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
3860 cp->c_touch_modtime = TRUE;
3861 }
3862 fp->ff_size = length;
3863 }
3864 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3865 if (!vfs_context_issuser(context))
3866 cp->c_mode &= ~(S_ISUID | S_ISGID);
3867 }
3868 cp->c_flag |= C_MODIFIED;
3869 cp->c_touch_chgtime = TRUE; /* status changed */
3870 if (suppress_times == 0) {
3871 cp->c_touch_modtime = TRUE; /* file data was modified */
3872
3873 /*
3874 * If we are not suppressing the modtime update, then
3875 * update the gen count as well.
3876 */
3877 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3878 hfs_incr_gencount(cp);
3879 }
3880 }
3881
3882 retval = hfs_update(vp, 0);
3883 if (retval) {
3884 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3885 -1, -1, -1, retval, 0);
3886 }
3887
3888 Err_Exit:
3889
3890 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
3891 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
3892
3893 return (retval);
3894 }
3895
3896 /*
3897 * Preparation which must be done prior to deleting the catalog record
3898 * of a file or directory. In order to make the on-disk as safe as possible,
3899 * we remove the catalog entry before releasing the bitmap blocks and the
3900 * overflow extent records. However, some work must be done prior to deleting
3901 * the catalog record.
3902 *
3903 * When calling this function, the cnode must exist both in memory and on-disk.
3904 * If there are both resource fork and data fork vnodes, this function should
3905 * be called on both.
3906 */
3907
3908 int
3909 hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3910
3911 struct filefork *fp = VTOF(vp);
3912 struct cnode *cp = VTOC(vp);
3913 #if QUOTA
3914 int retval = 0;
3915 #endif /* QUOTA */
3916
3917 /* Cannot truncate an HFS directory! */
3918 if (vnode_isdir(vp)) {
3919 return (EISDIR);
3920 }
3921
3922 /*
3923 * See the comment below in hfs_truncate for why we need to call
3924 * setsize here. Essentially we want to avoid pending IO if we
3925 * already know that the blocks are going to be released here.
3926 * This function is only called when totally removing all storage for a file, so
3927 * we can take a shortcut and immediately setsize (0);
3928 */
3929 ubc_setsize(vp, 0);
3930
3931 /* This should only happen with a corrupt filesystem */
3932 if ((off_t)fp->ff_size < 0)
3933 return (EINVAL);
3934
3935 /*
3936 * We cannot just check if fp->ff_size == length (as an optimization)
3937 * since there may be extra physical blocks that also need truncation.
3938 */
3939 #if QUOTA
3940 if ((retval = hfs_getinoquota(cp))) {
3941 return(retval);
3942 }
3943 #endif /* QUOTA */
3944
3945 /* Wipe out any invalid ranges which have yet to be backed by disk */
3946 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3947
3948 /*
3949 * Account for any unmapped blocks. Since we're deleting the
3950 * entire file, we don't have to worry about just shrinking
3951 * to a smaller number of borrowed blocks.
3952 */
3953 if (fp->ff_unallocblocks > 0) {
3954 u_int32_t loanedBlocks;
3955
3956 hfs_lock_mount (hfsmp);
3957 loanedBlocks = fp->ff_unallocblocks;
3958 cp->c_blocks -= loanedBlocks;
3959 fp->ff_blocks -= loanedBlocks;
3960 fp->ff_unallocblocks = 0;
3961
3962 hfsmp->loanedBlocks -= loanedBlocks;
3963
3964 hfs_unlock_mount (hfsmp);
3965 }
3966
3967 return 0;
3968 }
3969
3970
3971 /*
3972 * Special wrapper around calling TruncateFileC. This function is useable
3973 * even when the catalog record does not exist any longer, making it ideal
3974 * for use when deleting a file. The simplification here is that we know
3975 * that we are releasing all blocks.
3976 *
3977 * Note that this function may be called when there is no vnode backing
3978 * the file fork in question. We may call this from hfs_vnop_inactive
3979 * to clear out resource fork data (and may not want to clear out the data
3980 * fork yet). As a result, we pointer-check both sets of inputs before
3981 * doing anything with them.
3982 *
3983 * The caller is responsible for saving off a copy of the filefork(s)
3984 * embedded within the cnode prior to calling this function. The pointers
3985 * supplied as arguments must be valid even if the cnode is no longer valid.
3986 */
3987
3988 int
3989 hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3990 struct filefork *rsrcfork, u_int32_t fileid) {
3991
3992 off_t filebytes;
3993 u_int32_t fileblocks;
3994 int blksize = 0;
3995 int error = 0;
3996 int lockflags;
3997
3998 blksize = hfsmp->blockSize;
3999
4000 /* Data Fork */
4001 if (datafork) {
4002 off_t prev_filebytes;
4003
4004 datafork->ff_size = 0;
4005
4006 fileblocks = datafork->ff_blocks;
4007 filebytes = (off_t)fileblocks * (off_t)blksize;
4008 prev_filebytes = filebytes;
4009
4010 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4011
4012 while (filebytes > 0) {
4013 if (filebytes > HFS_BIGFILE_SIZE) {
4014 filebytes -= HFS_BIGFILE_SIZE;
4015 } else {
4016 filebytes = 0;
4017 }
4018
4019 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4020 if (hfs_start_transaction(hfsmp) != 0) {
4021 error = EINVAL;
4022 break;
4023 }
4024
4025 if (datafork->ff_unallocblocks == 0) {
4026 /* Protect extents b-tree and allocation bitmap */
4027 lockflags = SFL_BITMAP;
4028 if (overflow_extents(datafork))
4029 lockflags |= SFL_EXTENTS;
4030 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4031
4032 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
4033
4034 hfs_systemfile_unlock(hfsmp, lockflags);
4035 }
4036 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4037
4038 struct cnode *cp = datafork ? FTOC(datafork) : NULL;
4039 struct vnode *vp;
4040 vp = cp ? CTOV(cp, 0) : NULL;
4041 hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize));
4042 prev_filebytes = filebytes;
4043
4044 /* Finish the transaction and start over if necessary */
4045 hfs_end_transaction(hfsmp);
4046
4047 if (error) {
4048 break;
4049 }
4050 }
4051 }
4052
4053 /* Resource fork */
4054 if (error == 0 && rsrcfork) {
4055 rsrcfork->ff_size = 0;
4056
4057 fileblocks = rsrcfork->ff_blocks;
4058 filebytes = (off_t)fileblocks * (off_t)blksize;
4059
4060 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4061
4062 while (filebytes > 0) {
4063 if (filebytes > HFS_BIGFILE_SIZE) {
4064 filebytes -= HFS_BIGFILE_SIZE;
4065 } else {
4066 filebytes = 0;
4067 }
4068
4069 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4070 if (hfs_start_transaction(hfsmp) != 0) {
4071 error = EINVAL;
4072 break;
4073 }
4074
4075 if (rsrcfork->ff_unallocblocks == 0) {
4076 /* Protect extents b-tree and allocation bitmap */
4077 lockflags = SFL_BITMAP;
4078 if (overflow_extents(rsrcfork))
4079 lockflags |= SFL_EXTENTS;
4080 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4081
4082 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
4083
4084 hfs_systemfile_unlock(hfsmp, lockflags);
4085 }
4086 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4087
4088 /* Finish the transaction and start over if necessary */
4089 hfs_end_transaction(hfsmp);
4090
4091 if (error) {
4092 break;
4093 }
4094 }
4095 }
4096
4097 return error;
4098 }
4099
4100 errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
4101 {
4102 errno_t error;
4103
4104 /*
4105 * Call ubc_setsize to give the VM subsystem a chance to do
4106 * whatever it needs to with existing pages before we delete
4107 * blocks. Note that symlinks don't use the UBC so we'll
4108 * get back ENOENT in that case.
4109 */
4110 if (have_cnode_lock) {
4111 error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
4112 if (error == EAGAIN) {
4113 cnode_t *cp = VTOC(vp);
4114
4115 if (cp->c_truncatelockowner != current_thread())
4116 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
4117
4118 hfs_unlock(cp);
4119 error = ubc_setsize_ex(vp, len, 0);
4120 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
4121 }
4122 } else
4123 error = ubc_setsize_ex(vp, len, 0);
4124
4125 return error == ENOENT ? 0 : error;
4126 }
4127
4128 /*
4129 * Truncate a cnode to at most length size, freeing (or adding) the
4130 * disk blocks.
4131 */
4132 int
4133 hfs_truncate(struct vnode *vp, off_t length, int flags,
4134 int truncateflags, vfs_context_t context)
4135 {
4136 struct filefork *fp = VTOF(vp);
4137 off_t filebytes;
4138 u_int32_t fileblocks;
4139 int blksize;
4140 errno_t error = 0;
4141 struct cnode *cp = VTOC(vp);
4142 hfsmount_t *hfsmp = VTOHFS(vp);
4143
4144 /* Cannot truncate an HFS directory! */
4145 if (vnode_isdir(vp)) {
4146 return (EISDIR);
4147 }
4148 /* A swap file cannot change size. */
4149 if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
4150 return (EPERM);
4151 }
4152
4153 blksize = hfsmp->blockSize;
4154 fileblocks = fp->ff_blocks;
4155 filebytes = (off_t)fileblocks * (off_t)blksize;
4156
4157 bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
4158
4159 error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
4160 if (error)
4161 return error;
4162
4163 if (!caller_has_cnode_lock) {
4164 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4165 if (error)
4166 return error;
4167 }
4168
4169 if (vnode_islnk(vp) && cp->c_datafork->ff_symlinkptr) {
4170 hfs_free(cp->c_datafork->ff_symlinkptr, cp->c_datafork->ff_size);
4171 cp->c_datafork->ff_symlinkptr = NULL;
4172 }
4173
4174 // have to loop truncating or growing files that are
4175 // really big because otherwise transactions can get
4176 // enormous and consume too many kernel resources.
4177
4178 if (length < filebytes) {
4179 while (filebytes > length) {
4180 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
4181 filebytes -= HFS_BIGFILE_SIZE;
4182 } else {
4183 filebytes = length;
4184 }
4185 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4186 if (error)
4187 break;
4188 }
4189 } else if (length > filebytes) {
4190 kauth_cred_t cred = vfs_context_ucred(context);
4191 const bool keep_reserve = cred && suser(cred, NULL) != 0;
4192
4193 if (hfs_freeblks(hfsmp, keep_reserve)
4194 < howmany(length - filebytes, blksize)) {
4195 error = ENOSPC;
4196 } else {
4197 while (filebytes < length) {
4198 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
4199 filebytes += HFS_BIGFILE_SIZE;
4200 } else {
4201 filebytes = length;
4202 }
4203 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4204 if (error)
4205 break;
4206 }
4207 }
4208 } else /* Same logical size */ {
4209
4210 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
4211 }
4212 /* Files that are changing size are not hot file candidates. */
4213 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4214 fp->ff_bytesread = 0;
4215 }
4216
4217 #if HFS_CONFIG_KEY_ROLL
4218 if (!error && cp->c_truncatelockowner == current_thread()) {
4219 hfs_key_roll_check(cp, true);
4220 }
4221 #endif
4222
4223 if (!caller_has_cnode_lock)
4224 hfs_unlock(cp);
4225
4226 // Make sure UBC's size matches up (in case we didn't completely succeed)
4227 errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
4228 if (!error)
4229 error = err2;
4230
4231 return error;
4232 }
4233
4234
4235 /*
4236 * Preallocate file storage space.
4237 */
4238 int
4239 hfs_vnop_allocate(struct vnop_allocate_args /* {
4240 vnode_t a_vp;
4241 off_t a_length;
4242 u_int32_t a_flags;
4243 off_t *a_bytesallocated;
4244 off_t a_offset;
4245 vfs_context_t a_context;
4246 } */ *ap)
4247 {
4248 struct vnode *vp = ap->a_vp;
4249 struct cnode *cp;
4250 struct filefork *fp;
4251 ExtendedVCB *vcb;
4252 off_t length = ap->a_length;
4253 off_t startingPEOF;
4254 off_t moreBytesRequested;
4255 off_t actualBytesAdded;
4256 off_t filebytes;
4257 u_int32_t fileblocks;
4258 int retval, retval2;
4259 u_int32_t blockHint;
4260 u_int32_t extendFlags; /* For call to ExtendFileC */
4261 struct hfsmount *hfsmp;
4262 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
4263 int lockflags;
4264 time_t orig_ctime;
4265
4266 *(ap->a_bytesallocated) = 0;
4267
4268 if (!vnode_isreg(vp))
4269 return (EISDIR);
4270 if (length < (off_t)0)
4271 return (EINVAL);
4272
4273 cp = VTOC(vp);
4274
4275 orig_ctime = VTOC(vp)->c_ctime;
4276
4277 nspace_snapshot_event(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
4278
4279 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4280
4281 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4282 goto Err_Exit;
4283 }
4284
4285 fp = VTOF(vp);
4286 hfsmp = VTOHFS(vp);
4287 vcb = VTOVCB(vp);
4288
4289 fileblocks = fp->ff_blocks;
4290 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
4291
4292 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
4293 retval = EINVAL;
4294 goto Err_Exit;
4295 }
4296
4297 /* Fill in the flags word for the call to Extend the file */
4298
4299 extendFlags = kEFNoClumpMask;
4300 if (ap->a_flags & ALLOCATECONTIG)
4301 extendFlags |= kEFContigMask;
4302 if (ap->a_flags & ALLOCATEALL)
4303 extendFlags |= kEFAllMask;
4304 if (cred && suser(cred, NULL) != 0)
4305 extendFlags |= kEFReserveMask;
4306 if (hfs_virtualmetafile(cp))
4307 extendFlags |= kEFMetadataMask;
4308
4309 retval = E_NONE;
4310 blockHint = 0;
4311 startingPEOF = filebytes;
4312
4313 if (ap->a_flags & ALLOCATEFROMPEOF)
4314 length += filebytes;
4315 else if (ap->a_flags & ALLOCATEFROMVOL)
4316 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
4317
4318 /* If no changes are necesary, then we're done */
4319 if (filebytes == length)
4320 goto Std_Exit;
4321
4322 /*
4323 * Lengthen the size of the file. We must ensure that the
4324 * last byte of the file is allocated. Since the smallest
4325 * value of filebytes is 0, length will be at least 1.
4326 */
4327 if (length > filebytes) {
4328 if (ISSET(extendFlags, kEFAllMask)
4329 && (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask))
4330 < howmany(length - filebytes, hfsmp->blockSize))) {
4331 retval = ENOSPC;
4332 goto Err_Exit;
4333 }
4334
4335 off_t total_bytes_added = 0, orig_request_size;
4336
4337 orig_request_size = moreBytesRequested = length - filebytes;
4338
4339 #if QUOTA
4340 retval = hfs_chkdq(cp,
4341 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
4342 cred, 0);
4343 if (retval)
4344 goto Err_Exit;
4345
4346 #endif /* QUOTA */
4347 /*
4348 * Metadata zone checks.
4349 */
4350 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
4351 /*
4352 * Allocate Journal and Quota files in metadata zone.
4353 */
4354 if (hfs_virtualmetafile(cp)) {
4355 blockHint = hfsmp->hfs_metazone_start;
4356 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
4357 (blockHint <= hfsmp->hfs_metazone_end)) {
4358 /*
4359 * Move blockHint outside metadata zone.
4360 */
4361 blockHint = hfsmp->hfs_metazone_end + 1;
4362 }
4363 }
4364
4365
4366 while ((length > filebytes) && (retval == E_NONE)) {
4367 off_t bytesRequested;
4368
4369 if (hfs_start_transaction(hfsmp) != 0) {
4370 retval = EINVAL;
4371 goto Err_Exit;
4372 }
4373
4374 /* Protect extents b-tree and allocation bitmap */
4375 lockflags = SFL_BITMAP;
4376 if (overflow_extents(fp))
4377 lockflags |= SFL_EXTENTS;
4378 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4379
4380 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
4381 bytesRequested = HFS_BIGFILE_SIZE;
4382 } else {
4383 bytesRequested = moreBytesRequested;
4384 }
4385
4386 if (extendFlags & kEFContigMask) {
4387 // if we're on a sparse device, this will force it to do a
4388 // full scan to find the space needed.
4389 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
4390 }
4391
4392 retval = MacToVFSError(ExtendFileC(vcb,
4393 (FCB*)fp,
4394 bytesRequested,
4395 blockHint,
4396 extendFlags,
4397 &actualBytesAdded));
4398
4399 if (retval == E_NONE) {
4400 *(ap->a_bytesallocated) += actualBytesAdded;
4401 total_bytes_added += actualBytesAdded;
4402 moreBytesRequested -= actualBytesAdded;
4403 if (blockHint != 0) {
4404 blockHint += actualBytesAdded / vcb->blockSize;
4405 }
4406 }
4407 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4408
4409 hfs_systemfile_unlock(hfsmp, lockflags);
4410
4411 if (hfsmp->jnl) {
4412 (void) hfs_update(vp, 0);
4413 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4414 }
4415
4416 hfs_end_transaction(hfsmp);
4417 }
4418
4419
4420 /*
4421 * if we get an error and no changes were made then exit
4422 * otherwise we must do the hfs_update to reflect the changes
4423 */
4424 if (retval && (startingPEOF == filebytes))
4425 goto Err_Exit;
4426
4427 /*
4428 * Adjust actualBytesAdded to be allocation block aligned, not
4429 * clump size aligned.
4430 * NOTE: So what we are reporting does not affect reality
4431 * until the file is closed, when we truncate the file to allocation
4432 * block size.
4433 */
4434 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
4435 *(ap->a_bytesallocated) =
4436 roundup(orig_request_size, (off_t)vcb->blockSize);
4437
4438 } else { /* Shorten the size of the file */
4439
4440 /*
4441 * N.B. At present, this code is never called. If and when we
4442 * do start using it, it looks like there might be slightly
4443 * strange semantics with the file size: it's possible for the
4444 * file size to *increase* e.g. if current file size is 5,
4445 * length is 1024 and filebytes is 4096, the file size will
4446 * end up being 1024 bytes. This isn't necessarily a problem
4447 * but it's not consistent with the code above which doesn't
4448 * change the file size.
4449 */
4450
4451 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
4452 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4453
4454 /*
4455 * if we get an error and no changes were made then exit
4456 * otherwise we must do the hfs_update to reflect the changes
4457 */
4458 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
4459 #if QUOTA
4460 /* These are bytesreleased */
4461 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
4462 #endif /* QUOTA */
4463
4464 if (fp->ff_size > filebytes) {
4465 fp->ff_size = filebytes;
4466
4467 hfs_ubc_setsize(vp, fp->ff_size, true);
4468 }
4469 }
4470
4471 Std_Exit:
4472 cp->c_flag |= C_MODIFIED;
4473 cp->c_touch_chgtime = TRUE;
4474 cp->c_touch_modtime = TRUE;
4475 retval2 = hfs_update(vp, 0);
4476
4477 if (retval == 0)
4478 retval = retval2;
4479 Err_Exit:
4480 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4481 hfs_unlock(cp);
4482 return (retval);
4483 }
4484
4485
4486 /*
4487 * Pagein for HFS filesystem
4488 */
4489 int
4490 hfs_vnop_pagein(struct vnop_pagein_args *ap)
4491 /*
4492 struct vnop_pagein_args {
4493 vnode_t a_vp,
4494 upl_t a_pl,
4495 vm_offset_t a_pl_offset,
4496 off_t a_f_offset,
4497 size_t a_size,
4498 int a_flags
4499 vfs_context_t a_context;
4500 };
4501 */
4502 {
4503 vnode_t vp;
4504 struct cnode *cp;
4505 struct filefork *fp;
4506 int error = 0;
4507 upl_t upl;
4508 upl_page_info_t *pl;
4509 off_t f_offset;
4510 off_t page_needed_f_offset;
4511 int offset;
4512 int isize;
4513 int upl_size;
4514 int pg_index;
4515 boolean_t truncate_lock_held = FALSE;
4516 boolean_t file_converted = FALSE;
4517 kern_return_t kret;
4518
4519 vp = ap->a_vp;
4520 cp = VTOC(vp);
4521 fp = VTOF(vp);
4522
4523 #if CONFIG_PROTECT
4524 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
4525 /*
4526 * If we errored here, then this means that one of two things occurred:
4527 * 1. there was a problem with the decryption of the key.
4528 * 2. the device is locked and we are not allowed to access this particular file.
4529 *
4530 * Either way, this means that we need to shut down this upl now. As long as
4531 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4532 * then we create a upl and immediately abort it.
4533 */
4534 if (ap->a_pl == NULL) {
4535 /* create the upl */
4536 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
4537 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4538 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4539 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
4540
4541 /* Abort the range */
4542 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4543 }
4544
4545
4546 return error;
4547 }
4548 #endif /* CONFIG_PROTECT */
4549
4550 if (ap->a_pl != NULL) {
4551 /*
4552 * this can only happen for swap files now that
4553 * we're asking for V2 paging behavior...
4554 * so don't need to worry about decompression, or
4555 * keeping track of blocks read or taking the truncate lock
4556 */
4557 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
4558 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
4559 goto pagein_done;
4560 }
4561
4562 page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
4563
4564 retry_pagein:
4565 /*
4566 * take truncate lock (shared/recursive) to guard against
4567 * zero-fill thru fsync interfering, but only for v2
4568 *
4569 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4570 * lock shared and we are allowed to recurse 1 level if this thread already
4571 * owns the lock exclusively... this can legally occur
4572 * if we are doing a shrinking ftruncate against a file
4573 * that is mapped private, and the pages being truncated
4574 * do not currently exist in the cache... in that case
4575 * we will have to page-in the missing pages in order
4576 * to provide them to the private mapping... we must
4577 * also call hfs_unlock_truncate with a postive been_recursed
4578 * arg to indicate that if we have recursed, there is no need to drop
4579 * the lock. Allowing this simple recursion is necessary
4580 * in order to avoid a certain deadlock... since the ftruncate
4581 * already holds the truncate lock exclusively, if we try
4582 * to acquire it shared to protect the pagein path, we will
4583 * hang this thread
4584 *
4585 * NOTE: The if () block below is a workaround in order to prevent a
4586 * VM deadlock. See rdar://7853471.
4587 *
4588 * If we are in a forced unmount, then launchd will still have the
4589 * dyld_shared_cache file mapped as it is trying to reboot. If we
4590 * take the truncate lock here to service a page fault, then our
4591 * thread could deadlock with the forced-unmount. The forced unmount
4592 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4593 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4594 * thread will think it needs to copy all of the data out of the file
4595 * and into a VM copy object. If we hold the cnode lock here, then that
4596 * VM operation will not be able to proceed, because we'll set a busy page
4597 * before attempting to grab the lock. Note that this isn't as simple as "don't
4598 * call ubc_setsize" because doing that would just shift the problem to the
4599 * ubc_msync done before the vnode is reclaimed.
4600 *
4601 * So, if a forced unmount on this volume is in flight AND the cnode is
4602 * marked C_DELETED, then just go ahead and do the page in without taking
4603 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4604 * that is not going to be available on the next mount, this seems like a
4605 * OK solution from a correctness point of view, even though it is hacky.
4606 */
4607 if (vfs_isforce(vnode_mount(vp))) {
4608 if (cp->c_flag & C_DELETED) {
4609 /* If we don't get it, then just go ahead and operate without the lock */
4610 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4611 }
4612 }
4613 else {
4614 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4615 truncate_lock_held = TRUE;
4616 }
4617
4618 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4619
4620 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4621 error = EINVAL;
4622 goto pagein_done;
4623 }
4624 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4625
4626 upl_size = isize = ap->a_size;
4627
4628 /*
4629 * Scan from the back to find the last page in the UPL, so that we
4630 * aren't looking at a UPL that may have already been freed by the
4631 * preceding aborts/completions.
4632 */
4633 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4634 if (upl_page_present(pl, --pg_index))
4635 break;
4636 if (pg_index == 0) {
4637 /*
4638 * no absent pages were found in the range specified
4639 * just abort the UPL to get rid of it and then we're done
4640 */
4641 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4642 goto pagein_done;
4643 }
4644 }
4645 /*
4646 * initialize the offset variables before we touch the UPL.
4647 * f_offset is the position into the file, in bytes
4648 * offset is the position into the UPL, in bytes
4649 * pg_index is the pg# of the UPL we're operating on
4650 * isize is the offset into the UPL of the last page that is present.
4651 */
4652 isize = ((pg_index + 1) * PAGE_SIZE);
4653 pg_index = 0;
4654 offset = 0;
4655 f_offset = ap->a_f_offset;
4656
4657 while (isize) {
4658 int xsize;
4659 int num_of_pages;
4660
4661 if ( !upl_page_present(pl, pg_index)) {
4662 /*
4663 * we asked for RET_ONLY_ABSENT, so it's possible
4664 * to get back empty slots in the UPL.
4665 * just skip over them
4666 */
4667 f_offset += PAGE_SIZE;
4668 offset += PAGE_SIZE;
4669 isize -= PAGE_SIZE;
4670 pg_index++;
4671
4672 continue;
4673 }
4674 /*
4675 * We know that we have at least one absent page.
4676 * Now checking to see how many in a row we have
4677 */
4678 num_of_pages = 1;
4679 xsize = isize - PAGE_SIZE;
4680
4681 while (xsize) {
4682 if ( !upl_page_present(pl, pg_index + num_of_pages))
4683 break;
4684 num_of_pages++;
4685 xsize -= PAGE_SIZE;
4686 }
4687 xsize = num_of_pages * PAGE_SIZE;
4688
4689 #if HFS_COMPRESSION
4690 if (VNODE_IS_RSRC(vp)) {
4691 /* allow pageins of the resource fork */
4692 } else {
4693 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4694
4695 if (compressed) {
4696
4697 if (truncate_lock_held) {
4698 /*
4699 * can't hold the truncate lock when calling into the decmpfs layer
4700 * since it calls back into this layer... even though we're only
4701 * holding the lock in shared mode, and the re-entrant path only
4702 * takes the lock shared, we can deadlock if some other thread
4703 * tries to grab the lock exclusively in between.
4704 */
4705 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4706 truncate_lock_held = FALSE;
4707 }
4708 ap->a_pl = upl;
4709 ap->a_pl_offset = offset;
4710 ap->a_f_offset = f_offset;
4711 ap->a_size = xsize;
4712
4713 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4714 /*
4715 * note that decpfs_pagein_compressed can change the state of
4716 * 'compressed'... it will set it to 0 if the file is no longer
4717 * compressed once the compression lock is successfully taken
4718 * i.e. we would block on that lock while the file is being inflated
4719 */
4720 if (error == 0 && vnode_isfastdevicecandidate(vp)) {
4721 (void) hfs_addhotfile(vp);
4722 }
4723 if (compressed) {
4724 if (error == 0) {
4725 /* successful page-in, update the access time */
4726 VTOC(vp)->c_touch_acctime = TRUE;
4727
4728 //
4729 // compressed files are not traditional hot file candidates
4730 // but they may be for CF (which ignores the ff_bytesread
4731 // field)
4732 //
4733 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4734 fp->ff_bytesread = 0;
4735 }
4736 } else if (error == EAGAIN) {
4737 /*
4738 * EAGAIN indicates someone else already holds the compression lock...
4739 * to avoid deadlocking, we'll abort this range of pages with an
4740 * indication that the pagein needs to be redriven
4741 */
4742 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
4743 } else if (error == ENOSPC) {
4744
4745 if (upl_size == PAGE_SIZE)
4746 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4747
4748 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4749
4750 ap->a_size = PAGE_SIZE;
4751 ap->a_pl = NULL;
4752 ap->a_pl_offset = 0;
4753 ap->a_f_offset = page_needed_f_offset;
4754
4755 goto retry_pagein;
4756 } else {
4757 ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4758 goto pagein_done;
4759 }
4760 goto pagein_next_range;
4761 }
4762 else {
4763 /*
4764 * Set file_converted only if the file became decompressed while we were
4765 * paging in. If it were still compressed, we would re-start the loop using the goto
4766 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4767 * condition below, since we could have avoided taking the truncate lock to prevent
4768 * a deadlock in the force unmount case.
4769 */
4770 file_converted = TRUE;
4771 }
4772 }
4773 if (file_converted == TRUE) {
4774 /*
4775 * the file was converted back to a regular file after we first saw it as compressed
4776 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4777 * reset a_size so that we consider what remains of the original request
4778 * and null out a_upl and a_pl_offset.
4779 *
4780 * We should only be able to get into this block if the decmpfs_pagein_compressed
4781 * successfully decompressed the range in question for this file.
4782 */
4783 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4784
4785 ap->a_size = isize;
4786 ap->a_pl = NULL;
4787 ap->a_pl_offset = 0;
4788
4789 /* Reset file_converted back to false so that we don't infinite-loop. */
4790 file_converted = FALSE;
4791 goto retry_pagein;
4792 }
4793 }
4794 #endif
4795 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
4796
4797 /*
4798 * Keep track of blocks read.
4799 */
4800 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4801 int bytesread;
4802 int took_cnode_lock = 0;
4803
4804 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4805 bytesread = fp->ff_size;
4806 else
4807 bytesread = xsize;
4808
4809 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4810 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
4811 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4812 took_cnode_lock = 1;
4813 }
4814 /*
4815 * If this file hasn't been seen since the start of
4816 * the current sampling period then start over.
4817 */
4818 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4819 struct timeval tv;
4820
4821 fp->ff_bytesread = bytesread;
4822 microtime(&tv);
4823 cp->c_atime = tv.tv_sec;
4824 } else {
4825 fp->ff_bytesread += bytesread;
4826 }
4827 cp->c_touch_acctime = TRUE;
4828
4829 if (vnode_isfastdevicecandidate(vp)) {
4830 (void) hfs_addhotfile(vp);
4831 }
4832 if (took_cnode_lock)
4833 hfs_unlock(cp);
4834 }
4835 pagein_next_range:
4836 f_offset += xsize;
4837 offset += xsize;
4838 isize -= xsize;
4839 pg_index += num_of_pages;
4840
4841 error = 0;
4842 }
4843
4844 pagein_done:
4845 if (truncate_lock_held == TRUE) {
4846 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4847 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4848 }
4849
4850 return (error);
4851 }
4852
4853 /*
4854 * Pageout for HFS filesystem.
4855 */
4856 int
4857 hfs_vnop_pageout(struct vnop_pageout_args *ap)
4858 /*
4859 struct vnop_pageout_args {
4860 vnode_t a_vp,
4861 upl_t a_pl,
4862 vm_offset_t a_pl_offset,
4863 off_t a_f_offset,
4864 size_t a_size,
4865 int a_flags
4866 vfs_context_t a_context;
4867 };
4868 */
4869 {
4870 vnode_t vp = ap->a_vp;
4871 struct cnode *cp;
4872 struct filefork *fp;
4873 int retval = 0;
4874 off_t filesize;
4875 upl_t upl;
4876 upl_page_info_t* pl = NULL;
4877 vm_offset_t a_pl_offset;
4878 int a_flags;
4879 int is_pageoutv2 = 0;
4880 kern_return_t kret;
4881
4882 cp = VTOC(vp);
4883 fp = VTOF(vp);
4884
4885 a_flags = ap->a_flags;
4886 a_pl_offset = ap->a_pl_offset;
4887
4888 /*
4889 * we can tell if we're getting the new or old behavior from the UPL
4890 */
4891 if ((upl = ap->a_pl) == NULL) {
4892 int request_flags;
4893
4894 is_pageoutv2 = 1;
4895 /*
4896 * we're in control of any UPL we commit
4897 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4898 */
4899 a_flags &= ~UPL_NOCOMMIT;
4900 a_pl_offset = 0;
4901
4902 /*
4903 * For V2 semantics, we want to take the cnode truncate lock
4904 * shared to guard against the file size changing via zero-filling.
4905 *
4906 * However, we have to be careful because we may be invoked
4907 * via the ubc_msync path to write out dirty mmap'd pages
4908 * in response to a lock event on a content-protected
4909 * filesystem (e.g. to write out class A files).
4910 * As a result, we want to take the truncate lock 'SHARED' with
4911 * the mini-recursion locktype so that we don't deadlock/panic
4912 * because we may be already holding the truncate lock exclusive to force any other
4913 * IOs to have blocked behind us.
4914 */
4915 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4916
4917 if (a_flags & UPL_MSYNC) {
4918 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4919 }
4920 else {
4921 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4922 }
4923
4924 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
4925
4926 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4927 retval = EINVAL;
4928 goto pageout_done;
4929 }
4930 }
4931 /*
4932 * from this point forward upl points at the UPL we're working with
4933 * it was either passed in or we succesfully created it
4934 */
4935
4936 /*
4937 * Figure out where the file ends, for pageout purposes. If
4938 * ff_new_size > ff_size, then we're in the middle of extending the
4939 * file via a write, so it is safe (and necessary) that we be able
4940 * to pageout up to that point.
4941 */
4942 filesize = fp->ff_size;
4943 if (fp->ff_new_size > filesize)
4944 filesize = fp->ff_new_size;
4945
4946 /*
4947 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4948 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4949 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4950 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4951 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4952 * lock in HFS so that we don't lock invert ourselves.
4953 *
4954 * Note that we can still get into this function on behalf of the default pager with
4955 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4956 * since fsync and other writing threads will grab the locks, then mark the
4957 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4958 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4959 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4960 * by the paging/VM system.
4961 */
4962
4963 if (is_pageoutv2) {
4964 off_t f_offset;
4965 int offset;
4966 int isize;
4967 int pg_index;
4968 int error;
4969 int error_ret = 0;
4970
4971 isize = ap->a_size;
4972 f_offset = ap->a_f_offset;
4973
4974 /*
4975 * Scan from the back to find the last page in the UPL, so that we
4976 * aren't looking at a UPL that may have already been freed by the
4977 * preceding aborts/completions.
4978 */
4979 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4980 if (upl_page_present(pl, --pg_index))
4981 break;
4982 if (pg_index == 0) {
4983 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4984 goto pageout_done;
4985 }
4986 }
4987
4988 /*
4989 * initialize the offset variables before we touch the UPL.
4990 * a_f_offset is the position into the file, in bytes
4991 * offset is the position into the UPL, in bytes
4992 * pg_index is the pg# of the UPL we're operating on.
4993 * isize is the offset into the UPL of the last non-clean page.
4994 */
4995 isize = ((pg_index + 1) * PAGE_SIZE);
4996
4997 offset = 0;
4998 pg_index = 0;
4999
5000 while (isize) {
5001 int xsize;
5002 int num_of_pages;
5003
5004 if ( !upl_page_present(pl, pg_index)) {
5005 /*
5006 * we asked for RET_ONLY_DIRTY, so it's possible
5007 * to get back empty slots in the UPL.
5008 * just skip over them
5009 */
5010 f_offset += PAGE_SIZE;
5011 offset += PAGE_SIZE;
5012 isize -= PAGE_SIZE;
5013 pg_index++;
5014
5015 continue;
5016 }
5017 if ( !upl_dirty_page(pl, pg_index)) {
5018 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
5019 }
5020
5021 /*
5022 * We know that we have at least one dirty page.
5023 * Now checking to see how many in a row we have
5024 */
5025 num_of_pages = 1;
5026 xsize = isize - PAGE_SIZE;
5027
5028 while (xsize) {
5029 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
5030 break;
5031 num_of_pages++;
5032 xsize -= PAGE_SIZE;
5033 }
5034 xsize = num_of_pages * PAGE_SIZE;
5035
5036 if ((error = cluster_pageout(vp, upl, offset, f_offset,
5037 xsize, filesize, a_flags))) {
5038 if (error_ret == 0)
5039 error_ret = error;
5040 }
5041 f_offset += xsize;
5042 offset += xsize;
5043 isize -= xsize;
5044 pg_index += num_of_pages;
5045 }
5046 /* capture errnos bubbled out of cluster_pageout if they occurred */
5047 if (error_ret != 0) {
5048 retval = error_ret;
5049 }
5050 } /* end block for v2 pageout behavior */
5051 else {
5052 /*
5053 * just call cluster_pageout for old pre-v2 behavior
5054 */
5055 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
5056 ap->a_size, filesize, a_flags);
5057 }
5058
5059 /*
5060 * If data was written, update the modification time of the file
5061 * but only if it's mapped writable; we will have touched the
5062 * modifcation time for direct writes.
5063 */
5064 if (retval == 0 && (ubc_is_mapped_writable(vp)
5065 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
5066 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5067
5068 // Check again with lock
5069 bool mapped_writable = ubc_is_mapped_writable(vp);
5070 if (mapped_writable
5071 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
5072 cp->c_touch_modtime = TRUE;
5073 cp->c_touch_chgtime = TRUE;
5074
5075 /*
5076 * We only need to increment the generation counter if
5077 * it's currently mapped writable because we incremented
5078 * the counter in hfs_vnop_mnomap.
5079 */
5080 if (mapped_writable)
5081 hfs_incr_gencount(VTOC(vp));
5082
5083 /*
5084 * If setuid or setgid bits are set and this process is
5085 * not the superuser then clear the setuid and setgid bits
5086 * as a precaution against tampering.
5087 */
5088 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
5089 (vfs_context_suser(ap->a_context) != 0)) {
5090 cp->c_mode &= ~(S_ISUID | S_ISGID);
5091 }
5092 }
5093
5094 hfs_unlock(cp);
5095 }
5096
5097 pageout_done:
5098 if (is_pageoutv2) {
5099 /*
5100 * Release the truncate lock. Note that because
5101 * we may have taken the lock recursively by
5102 * being invoked via ubc_msync due to lockdown,
5103 * we should release it recursively, too.
5104 */
5105 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
5106 }
5107 return (retval);
5108 }
5109
5110 /*
5111 * Intercept B-Tree node writes to unswap them if necessary.
5112 */
5113 int
5114 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
5115 {
5116 int retval = 0;
5117 register struct buf *bp = ap->a_bp;
5118 register struct vnode *vp = buf_vnode(bp);
5119 BlockDescriptor block;
5120
5121 /* Trap B-Tree writes */
5122 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
5123 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
5124 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
5125 (vp == VTOHFS(vp)->hfc_filevp)) {
5126
5127 /*
5128 * Swap and validate the node if it is in native byte order.
5129 * This is always be true on big endian, so we always validate
5130 * before writing here. On little endian, the node typically has
5131 * been swapped and validated when it was written to the journal,
5132 * so we won't do anything here.
5133 */
5134 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
5135 /* Prepare the block pointer */
5136 block.blockHeader = bp;
5137 block.buffer = (char *)buf_dataptr(bp);
5138 block.blockNum = buf_lblkno(bp);
5139 /* not found in cache ==> came from disk */
5140 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
5141 block.blockSize = buf_count(bp);
5142
5143 /* Endian un-swap B-Tree node */
5144 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
5145 if (retval)
5146 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5147 }
5148 }
5149
5150 /* This buffer shouldn't be locked anymore but if it is clear it */
5151 if ((buf_flags(bp) & B_LOCKED)) {
5152 // XXXdbg
5153 if (VTOHFS(vp)->jnl) {
5154 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
5155 }
5156 buf_clearflags(bp, B_LOCKED);
5157 }
5158 retval = vn_bwrite (ap);
5159
5160 return (retval);
5161 }
5162
5163
5164 int
5165 hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks)
5166 {
5167 _dk_cs_pin_t pin;
5168 unsigned ioc;
5169 int err;
5170
5171 memset(&pin, 0, sizeof(pin));
5172 pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize;
5173 pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize;
5174 switch (pin_state) {
5175 case HFS_PIN_IT:
5176 ioc = _DKIOCCSPINEXTENT;
5177 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA;
5178 break;
5179 case HFS_PIN_IT | HFS_TEMP_PIN:
5180 ioc = _DKIOCCSPINEXTENT;
5181 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN;
5182 break;
5183 case HFS_PIN_IT | HFS_DATALESS_PIN:
5184 ioc = _DKIOCCSPINEXTENT;
5185 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE;
5186 break;
5187 case HFS_UNPIN_IT:
5188 ioc = _DKIOCCSUNPINEXTENT;
5189 pin.cp_flags = 0;
5190 break;
5191 case HFS_UNPIN_IT | HFS_EVICT_PIN:
5192 ioc = _DKIOCCSPINEXTENT;
5193 pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA;
5194 break;
5195 default:
5196 return EINVAL;
5197 }
5198 err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, vfs_context_kernel());
5199 return err;
5200 }
5201
5202 //
5203 // The cnode lock should already be held on entry to this function
5204 //
5205 int
5206 hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned)
5207 {
5208 struct filefork *fp = VTOF(vp);
5209 int i, err=0, need_put=0;
5210 struct vnode *rsrc_vp=NULL;
5211 uint32_t npinned = 0;
5212 off_t offset;
5213
5214 if (num_blocks_pinned) {
5215 *num_blocks_pinned = 0;
5216 }
5217
5218 if (vnode_vtype(vp) != VREG) {
5219 /* Not allowed to pin directories or symlinks */
5220 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp));
5221 return (EPERM);
5222 }
5223
5224 if (fp->ff_unallocblocks) {
5225 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks);
5226 return (EINVAL);
5227 }
5228
5229 /*
5230 * It is possible that if the caller unlocked/re-locked the cnode after checking
5231 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5232 * cnode was unlocked. So check the condition again and return ENOENT so that
5233 * the caller knows why we failed to pin the vnode.
5234 */
5235 if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) {
5236 // makes no sense to pin something that's pending deletion
5237 return ENOENT;
5238 }
5239
5240 if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
5241 if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) {
5242 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5243 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5244
5245 fp = VTOC(rsrc_vp)->c_rsrcfork;
5246 need_put = 1;
5247 }
5248 }
5249 if (fp->ff_blocks == 0) {
5250 if (need_put) {
5251 //
5252 // use a distinct error code for a compressed file that has no resource fork;
5253 // we return EALREADY to indicate that the data is already probably hot file
5254 // cached because it's in an EA and the attributes btree is on the ssd
5255 //
5256 err = EALREADY;
5257 } else {
5258 err = EINVAL;
5259 }
5260 goto out;
5261 }
5262
5263 offset = 0;
5264 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5265 if (fp->ff_extents[i].startBlock == 0) {
5266 break;
5267 }
5268
5269 err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount);
5270 if (err) {
5271 break;
5272 } else {
5273 npinned += fp->ff_extents[i].blockCount;
5274 }
5275 }
5276
5277 if (err || npinned == 0) {
5278 goto out;
5279 }
5280
5281 if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) {
5282 uint32_t pblocks;
5283 uint8_t forktype = 0;
5284
5285 if (fp == VTOC(vp)->c_rsrcfork) {
5286 forktype = 0xff;
5287 }
5288 /*
5289 * The file could have overflow extents, better pin them.
5290 *
5291 * We assume that since we are holding the cnode lock for this cnode,
5292 * the files extents cannot be manipulated, but the tree could, so we
5293 * need to ensure that it doesn't change behind our back as we iterate it.
5294 */
5295 int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
5296 err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks);
5297 hfs_systemfile_unlock (hfsmp, lockflags);
5298
5299 if (err) {
5300 goto out;
5301 }
5302 npinned += pblocks;
5303 }
5304
5305 out:
5306 if (num_blocks_pinned) {
5307 *num_blocks_pinned = npinned;
5308 }
5309
5310 if (need_put && rsrc_vp) {
5311 //
5312 // have to unlock the cnode since it's shared between the
5313 // resource fork vnode and the data fork vnode (and the
5314 // vnode_put() may need to re-acquire the cnode lock to
5315 // reclaim the resource fork vnode)
5316 //
5317 hfs_unlock(VTOC(vp));
5318 vnode_put(rsrc_vp);
5319 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5320 }
5321 return err;
5322 }
5323
5324
5325 /*
5326 * Relocate a file to a new location on disk
5327 * cnode must be locked on entry
5328 *
5329 * Relocation occurs by cloning the file's data from its
5330 * current set of blocks to a new set of blocks. During
5331 * the relocation all of the blocks (old and new) are
5332 * owned by the file.
5333 *
5334 * -----------------
5335 * |///////////////|
5336 * -----------------
5337 * 0 N (file offset)
5338 *
5339 * ----------------- -----------------
5340 * |///////////////| | | STEP 1 (acquire new blocks)
5341 * ----------------- -----------------
5342 * 0 N N+1 2N
5343 *
5344 * ----------------- -----------------
5345 * |///////////////| |///////////////| STEP 2 (clone data)
5346 * ----------------- -----------------
5347 * 0 N N+1 2N
5348 *
5349 * -----------------
5350 * |///////////////| STEP 3 (head truncate blocks)
5351 * -----------------
5352 * 0 N
5353 *
5354 * During steps 2 and 3 page-outs to file offsets less
5355 * than or equal to N are suspended.
5356 *
5357 * During step 3 page-ins to the file get suspended.
5358 */
5359 int
5360 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
5361 struct proc *p)
5362 {
5363 struct cnode *cp;
5364 struct filefork *fp;
5365 struct hfsmount *hfsmp;
5366 u_int32_t headblks;
5367 u_int32_t datablks;
5368 u_int32_t blksize;
5369 u_int32_t growsize;
5370 u_int32_t nextallocsave;
5371 daddr64_t sector_a, sector_b;
5372 int eflags = 0;
5373 off_t newbytes;
5374 int retval;
5375 int lockflags = 0;
5376 int took_trunc_lock = 0;
5377 int started_tr = 0;
5378 enum vtype vnodetype;
5379
5380 vnodetype = vnode_vtype(vp);
5381 if (vnodetype != VREG) {
5382 /* Not allowed to move symlinks. */
5383 return (EPERM);
5384 }
5385
5386 hfsmp = VTOHFS(vp);
5387 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
5388 return (ENOSPC);
5389 }
5390
5391 cp = VTOC(vp);
5392 fp = VTOF(vp);
5393 if (fp->ff_unallocblocks)
5394 return (EINVAL);
5395
5396 #if CONFIG_PROTECT
5397 /*
5398 * <rdar://problem/9118426>
5399 * Disable HFS file relocation on content-protected filesystems
5400 */
5401 if (cp_fs_protected (hfsmp->hfs_mp)) {
5402 return EINVAL;
5403 }
5404 #endif
5405 /* If it's an SSD, also disable HFS relocation */
5406 if (hfsmp->hfs_flags & HFS_SSD) {
5407 return EINVAL;
5408 }
5409
5410
5411 blksize = hfsmp->blockSize;
5412 if (blockHint == 0)
5413 blockHint = hfsmp->nextAllocation;
5414
5415 if (fp->ff_size > 0x7fffffff) {
5416 return (EFBIG);
5417 }
5418
5419 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
5420 hfs_unlock(cp);
5421 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
5422 /* Force lock since callers expects lock to be held. */
5423 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
5424 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5425 return (retval);
5426 }
5427 /* No need to continue if file was removed. */
5428 if (cp->c_flag & C_NOEXISTS) {
5429 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5430 return (ENOENT);
5431 }
5432 took_trunc_lock = 1;
5433 }
5434 headblks = fp->ff_blocks;
5435 datablks = howmany(fp->ff_size, blksize);
5436 growsize = datablks * blksize;
5437 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
5438 if (blockHint >= hfsmp->hfs_metazone_start &&
5439 blockHint <= hfsmp->hfs_metazone_end)
5440 eflags |= kEFMetadataMask;
5441
5442 if (hfs_start_transaction(hfsmp) != 0) {
5443 if (took_trunc_lock)
5444 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5445 return (EINVAL);
5446 }
5447 started_tr = 1;
5448 /*
5449 * Protect the extents b-tree and the allocation bitmap
5450 * during MapFileBlockC and ExtendFileC operations.
5451 */
5452 lockflags = SFL_BITMAP;
5453 if (overflow_extents(fp))
5454 lockflags |= SFL_EXTENTS;
5455 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5456
5457 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
5458 if (retval) {
5459 retval = MacToVFSError(retval);
5460 goto out;
5461 }
5462
5463 /*
5464 * STEP 1 - acquire new allocation blocks.
5465 */
5466 nextallocsave = hfsmp->nextAllocation;
5467 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
5468 if (eflags & kEFMetadataMask) {
5469 hfs_lock_mount(hfsmp);
5470 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
5471 MarkVCBDirty(hfsmp);
5472 hfs_unlock_mount(hfsmp);
5473 }
5474
5475 retval = MacToVFSError(retval);
5476 if (retval == 0) {
5477 cp->c_flag |= C_MODIFIED;
5478 if (newbytes < growsize) {
5479 retval = ENOSPC;
5480 goto restore;
5481 } else if (fp->ff_blocks < (headblks + datablks)) {
5482 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
5483 retval = ENOSPC;
5484 goto restore;
5485 }
5486
5487 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
5488 if (retval) {
5489 retval = MacToVFSError(retval);
5490 } else if ((sector_a + 1) == sector_b) {
5491 retval = ENOSPC;
5492 goto restore;
5493 } else if ((eflags & kEFMetadataMask) &&
5494 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
5495 hfsmp->hfs_metazone_end)) {
5496 #if 0
5497 const char * filestr;
5498 char emptystr = '\0';
5499
5500 if (cp->c_desc.cd_nameptr != NULL) {
5501 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
5502 } else if (vnode_name(vp) != NULL) {
5503 filestr = vnode_name(vp);
5504 } else {
5505 filestr = &emptystr;
5506 }
5507 #endif
5508 retval = ENOSPC;
5509 goto restore;
5510 }
5511 }
5512 /* Done with system locks and journal for now. */
5513 hfs_systemfile_unlock(hfsmp, lockflags);
5514 lockflags = 0;
5515 hfs_end_transaction(hfsmp);
5516 started_tr = 0;
5517
5518 if (retval) {
5519 /*
5520 * Check to see if failure is due to excessive fragmentation.
5521 */
5522 if ((retval == ENOSPC) &&
5523 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
5524 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
5525 }
5526 goto out;
5527 }
5528 /*
5529 * STEP 2 - clone file data into the new allocation blocks.
5530 */
5531
5532 if (vnodetype == VLNK)
5533 retval = EPERM;
5534 else if (vnode_issystem(vp))
5535 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
5536 else
5537 retval = hfs_clonefile(vp, headblks, datablks, blksize);
5538
5539 /* Start transaction for step 3 or for a restore. */
5540 if (hfs_start_transaction(hfsmp) != 0) {
5541 retval = EINVAL;
5542 goto out;
5543 }
5544 started_tr = 1;
5545 if (retval)
5546 goto restore;
5547
5548 /*
5549 * STEP 3 - switch to cloned data and remove old blocks.
5550 */
5551 lockflags = SFL_BITMAP;
5552 if (overflow_extents(fp))
5553 lockflags |= SFL_EXTENTS;
5554 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5555
5556 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
5557
5558 hfs_systemfile_unlock(hfsmp, lockflags);
5559 lockflags = 0;
5560 if (retval)
5561 goto restore;
5562 out:
5563 if (took_trunc_lock)
5564 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5565
5566 if (lockflags) {
5567 hfs_systemfile_unlock(hfsmp, lockflags);
5568 lockflags = 0;
5569 }
5570
5571 /* Push cnode's new extent data to disk. */
5572 if (retval == 0) {
5573 hfs_update(vp, 0);
5574 }
5575 if (hfsmp->jnl) {
5576 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
5577 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
5578 else
5579 (void) hfs_flushvolumeheader(hfsmp, 0);
5580 }
5581 exit:
5582 if (started_tr)
5583 hfs_end_transaction(hfsmp);
5584
5585 return (retval);
5586
5587 restore:
5588 if (fp->ff_blocks == headblks) {
5589 if (took_trunc_lock)
5590 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5591 goto exit;
5592 }
5593 /*
5594 * Give back any newly allocated space.
5595 */
5596 if (lockflags == 0) {
5597 lockflags = SFL_BITMAP;
5598 if (overflow_extents(fp))
5599 lockflags |= SFL_EXTENTS;
5600 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5601 }
5602
5603 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
5604 FTOC(fp)->c_fileid, false);
5605
5606 hfs_systemfile_unlock(hfsmp, lockflags);
5607 lockflags = 0;
5608
5609 if (took_trunc_lock)
5610 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5611 goto exit;
5612 }
5613
5614
5615 /*
5616 * Clone a file's data within the file.
5617 *
5618 */
5619 static int
5620 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
5621 {
5622 caddr_t bufp;
5623 size_t bufsize;
5624 size_t copysize;
5625 size_t iosize;
5626 size_t offset;
5627 off_t writebase;
5628 uio_t auio;
5629 int error = 0;
5630
5631 writebase = blkstart * blksize;
5632 copysize = blkcnt * blksize;
5633 iosize = bufsize = MIN(copysize, 128 * 1024);
5634 offset = 0;
5635
5636 hfs_unlock(VTOC(vp));
5637
5638 #if CONFIG_PROTECT
5639 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
5640 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5641 return (error);
5642 }
5643 #endif /* CONFIG_PROTECT */
5644
5645 bufp = hfs_malloc(bufsize);
5646
5647 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
5648
5649 while (offset < copysize) {
5650 iosize = MIN(copysize - offset, iosize);
5651
5652 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
5653 uio_addiov(auio, (uintptr_t)bufp, iosize);
5654
5655 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
5656 if (error) {
5657 printf("hfs_clonefile: cluster_read failed - %d\n", error);
5658 break;
5659 }
5660 if (uio_resid(auio) != 0) {
5661 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
5662 error = EIO;
5663 break;
5664 }
5665
5666 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
5667 uio_addiov(auio, (uintptr_t)bufp, iosize);
5668
5669 error = cluster_write(vp, auio, writebase + offset,
5670 writebase + offset + iosize,
5671 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
5672 if (error) {
5673 printf("hfs_clonefile: cluster_write failed - %d\n", error);
5674 break;
5675 }
5676 if (uio_resid(auio) != 0) {
5677 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5678 error = EIO;
5679 break;
5680 }
5681 offset += iosize;
5682 }
5683 uio_free(auio);
5684
5685 if ((blksize & PAGE_MASK)) {
5686 /*
5687 * since the copy may not have started on a PAGE
5688 * boundary (or may not have ended on one), we
5689 * may have pages left in the cache since NOCACHE
5690 * will let partially written pages linger...
5691 * lets just flush the entire range to make sure
5692 * we don't have any pages left that are beyond
5693 * (or intersect) the real LEOF of this file
5694 */
5695 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
5696 } else {
5697 /*
5698 * No need to call ubc_msync or hfs_invalbuf
5699 * since the file was copied using IO_NOCACHE and
5700 * the copy was done starting and ending on a page
5701 * boundary in the file.
5702 */
5703 }
5704 hfs_free(bufp, bufsize);
5705
5706 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5707 return (error);
5708 }
5709
5710 /*
5711 * Clone a system (metadata) file.
5712 *
5713 */
5714 static int
5715 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
5716 kauth_cred_t cred, struct proc *p)
5717 {
5718 caddr_t bufp;
5719 char * offset;
5720 size_t bufsize;
5721 size_t iosize;
5722 struct buf *bp = NULL;
5723 daddr64_t blkno;
5724 daddr64_t blk;
5725 daddr64_t start_blk;
5726 daddr64_t last_blk;
5727 int breadcnt;
5728 int i;
5729 int error = 0;
5730
5731
5732 iosize = GetLogicalBlockSize(vp);
5733 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5734 breadcnt = bufsize / iosize;
5735
5736 bufp = hfs_malloc(bufsize);
5737
5738 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5739 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
5740 blkno = 0;
5741
5742 while (blkno < last_blk) {
5743 /*
5744 * Read up to a megabyte
5745 */
5746 offset = bufp;
5747 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5748 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
5749 if (error) {
5750 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5751 goto out;
5752 }
5753 if (buf_count(bp) != iosize) {
5754 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
5755 goto out;
5756 }
5757 bcopy((char *)buf_dataptr(bp), offset, iosize);
5758
5759 buf_markinvalid(bp);
5760 buf_brelse(bp);
5761 bp = NULL;
5762
5763 offset += iosize;
5764 }
5765
5766 /*
5767 * Write up to a megabyte
5768 */
5769 offset = bufp;
5770 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5771 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
5772 if (bp == NULL) {
5773 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
5774 error = EIO;
5775 goto out;
5776 }
5777 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5778 error = (int)buf_bwrite(bp);
5779 bp = NULL;
5780 if (error)
5781 goto out;
5782 offset += iosize;
5783 }
5784 }
5785 out:
5786 if (bp) {
5787 buf_brelse(bp);
5788 }
5789
5790 hfs_free(bufp, bufsize);
5791
5792 error = hfs_fsync(vp, MNT_WAIT, 0, p);
5793
5794 return (error);
5795 }
5796
5797 errno_t hfs_flush_invalid_ranges(vnode_t vp)
5798 {
5799 cnode_t *cp = VTOC(vp);
5800
5801 hfs_assert(cp->c_lockowner == current_thread());
5802 hfs_assert(cp->c_truncatelockowner == current_thread());
5803
5804 if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout)
5805 return 0;
5806
5807 filefork_t *fp = VTOF(vp);
5808
5809 /*
5810 * We can't hold the cnode lock whilst we call cluster_write so we
5811 * need to copy the extents into a local buffer.
5812 */
5813 int max_exts = 16;
5814 struct ext {
5815 off_t start, end;
5816 } exts_buf[max_exts]; // 256 bytes
5817 struct ext *exts = exts_buf;
5818 int ext_count = 0;
5819 errno_t ret;
5820
5821 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
5822
5823 while (r) {
5824 /* If we have more than can fit in our stack buffer, switch
5825 to a heap buffer. */
5826 if (exts == exts_buf && ext_count == max_exts) {
5827 max_exts = 256;
5828 exts = hfs_malloc(sizeof(struct ext) * max_exts);
5829 memcpy(exts, exts_buf, ext_count * sizeof(struct ext));
5830 }
5831
5832 struct rl_entry *next = TAILQ_NEXT(r, rl_link);
5833
5834 exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end };
5835
5836 if (!next || (ext_count == max_exts && exts != exts_buf)) {
5837 hfs_unlock(cp);
5838 for (int i = 0; i < ext_count; ++i) {
5839 ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1,
5840 exts[i].start, 0,
5841 IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
5842 if (ret) {
5843 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5844 goto exit;
5845 }
5846 }
5847
5848 if (!next) {
5849 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5850 break;
5851 }
5852
5853 /* Push any existing clusters which should clean up our invalid
5854 ranges as they go through hfs_vnop_blockmap. */
5855 cluster_push(vp, 0);
5856
5857 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5858
5859 /*
5860 * Get back to where we were (given we dropped the lock).
5861 * This shouldn't be many because we pushed above.
5862 */
5863 TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) {
5864 if (r->rl_end > exts[ext_count - 1].end)
5865 break;
5866 }
5867
5868 ext_count = 0;
5869 } else
5870 r = next;
5871 }
5872
5873 ret = 0;
5874
5875 exit:
5876
5877 if (exts != exts_buf)
5878 hfs_free(exts, sizeof(struct ext) * max_exts);
5879
5880 return ret;
5881 }