]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_readwrite.c
hfs-556.100.11.tar.gz
[apple/hfs.git] / core / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2020 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/buf.h>
42 #include <sys/proc.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/vfs_context.h>
47 #include <sys/disk.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
50 #include <sys/ubc.h>
51 #include <sys/fsevents.h>
52 #include <uuid/uuid.h>
53
54 #include <libkern/OSDebug.h>
55
56 #include <miscfs/specfs/specdev.h>
57
58 #include <sys/ubc.h>
59
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
62
63 #include <IOKit/IOBSD.h>
64
65 #include <sys/kdebug.h>
66
67 #include "hfs.h"
68 #include "hfs_attrlist.h"
69 #include "hfs_endian.h"
70 #include "hfs_fsctl.h"
71 #include "hfs_quota.h"
72 #include "FileMgrInternal.h"
73 #include "BTreesInternal.h"
74 #include "hfs_cnode.h"
75 #include "hfs_dbg.h"
76
77 #if HFS_CONFIG_KEY_ROLL
78 #include "hfs_key_roll.h"
79 #endif
80
81 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
82
83 enum {
84 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
85 };
86
87 /* from bsd/hfs/hfs_vfsops.c */
88 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
89
90 /* from hfs_hotfiles.c */
91 extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
92 uint8_t forktype, uint32_t *pinned);
93
94 static int hfs_clonefile(struct vnode *, int, int, int);
95 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
96 static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
97
98
99 /*
100 * Read data from a file.
101 */
102 int
103 hfs_vnop_read(struct vnop_read_args *ap)
104 {
105 /*
106 struct vnop_read_args {
107 struct vnodeop_desc *a_desc;
108 vnode_t a_vp;
109 struct uio *a_uio;
110 int a_ioflag;
111 vfs_context_t a_context;
112 };
113 */
114
115 uio_t uio = ap->a_uio;
116 struct vnode *vp = ap->a_vp;
117 struct cnode *cp;
118 struct filefork *fp;
119 struct hfsmount *hfsmp;
120 off_t filesize;
121 off_t filebytes;
122 off_t start_resid = uio_resid(uio);
123 off_t offset = uio_offset(uio);
124 int retval = 0;
125 int took_truncate_lock = 0;
126 int io_throttle = 0;
127 int throttled_count = 0;
128
129 /* Preflight checks */
130 if (!vnode_isreg(vp)) {
131 /* can only read regular files */
132 if (vnode_isdir(vp))
133 return (EISDIR);
134 else
135 return (EPERM);
136 }
137 if (start_resid == 0)
138 return (0); /* Nothing left to do */
139 if (offset < 0)
140 return (EINVAL); /* cant read from a negative offset */
141
142 #if SECURE_KERNEL
143 if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
144 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
145 /* Don't allow unencrypted io request from user space */
146 return EPERM;
147 }
148 #endif
149
150 #if HFS_COMPRESSION
151 if (VNODE_IS_RSRC(vp)) {
152 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
153 return 0;
154 }
155 /* otherwise read the resource fork normally */
156 } else {
157 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
158 if (compressed) {
159 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
160 if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
161 (void) hfs_addhotfile(vp);
162 }
163 if (compressed) {
164 if (retval == 0) {
165 /* successful read, update the access time */
166 VTOC(vp)->c_touch_acctime = TRUE;
167
168 //
169 // compressed files are not traditional hot file candidates
170 // but they may be for CF (which ignores the ff_bytesread
171 // field)
172 //
173 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
174 VTOF(vp)->ff_bytesread = 0;
175 }
176 }
177 return retval;
178 }
179 /* otherwise the file was converted back to a regular file while we were reading it */
180 retval = 0;
181 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
182 int error;
183
184 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
185 if (error) {
186 return error;
187 }
188
189 }
190 }
191 #endif /* HFS_COMPRESSION */
192
193 cp = VTOC(vp);
194 fp = VTOF(vp);
195 hfsmp = VTOHFS(vp);
196
197 #if CONFIG_PROTECT
198 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
199 goto exit;
200 }
201
202 #if HFS_CONFIG_KEY_ROLL
203 if (ISSET(ap->a_ioflag, IO_ENCRYPTED)) {
204 off_rsrc_t off_rsrc = off_rsrc_make(offset + start_resid,
205 VNODE_IS_RSRC(vp));
206
207 retval = hfs_key_roll_up_to(ap->a_context, vp, off_rsrc);
208 if (retval)
209 goto exit;
210 }
211 #endif // HFS_CONFIG_KEY_ROLL
212 #endif // CONFIG_PROTECT
213
214 /*
215 * If this read request originated from a syscall (as opposed to
216 * an in-kernel page fault or something), then set it up for
217 * throttle checks
218 */
219 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
220 io_throttle = IO_RETURN_ON_THROTTLE;
221 }
222
223 read_again:
224
225 /* Protect against a size change. */
226 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
227 took_truncate_lock = 1;
228
229 filesize = fp->ff_size;
230 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
231
232 /*
233 * Check the file size. Note that per POSIX spec, we return 0 at
234 * file EOF, so attempting a read at an offset that is too big
235 * should just return 0 on HFS+. Since the return value was initialized
236 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
237 */
238 if (offset > filesize) {
239 #if CONFIG_HFS_STD
240 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
241 (offset > (off_t)MAXHFSFILESIZE)) {
242 retval = EFBIG;
243 }
244 #endif
245 goto exit;
246 }
247
248 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
249 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
250
251 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
252
253 cp->c_touch_acctime = TRUE;
254
255 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
256 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
257
258 /*
259 * Keep track blocks read
260 */
261 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
262 int took_cnode_lock = 0;
263 off_t bytesread;
264
265 bytesread = start_resid - uio_resid(uio);
266
267 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
268 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
269 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
270 took_cnode_lock = 1;
271 }
272 /*
273 * If this file hasn't been seen since the start of
274 * the current sampling period then start over.
275 */
276 if (cp->c_atime < hfsmp->hfc_timebase) {
277 struct timeval tv;
278
279 fp->ff_bytesread = bytesread;
280 microtime(&tv);
281 cp->c_atime = tv.tv_sec;
282 } else {
283 fp->ff_bytesread += bytesread;
284 }
285
286 if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
287 //
288 // We don't add hotfiles for processes doing IO_EVTONLY I/O
289 // on the assumption that they're system processes such as
290 // mdworker which scan everything in the system (and thus
291 // do not represent user-initiated access to files)
292 //
293 (void) hfs_addhotfile(vp);
294 }
295 if (took_cnode_lock)
296 hfs_unlock(cp);
297 }
298 exit:
299 if (took_truncate_lock) {
300 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
301 }
302 if (retval == EAGAIN) {
303 throttle_lowpri_io(1);
304 throttled_count++;
305
306 retval = 0;
307 goto read_again;
308 }
309 if (throttled_count)
310 throttle_info_reset_window(NULL);
311 return (retval);
312 }
313
314 /*
315 * Ideally, this wouldn't be necessary; the cluster code should be
316 * able to handle this on the read-side. See <rdar://20420068>.
317 */
318 static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to)
319 {
320 hfs_assert(VTOC(vp)->c_lockowner != current_thread());
321 hfs_assert(VTOC(vp)->c_truncatelockowner == current_thread());
322
323 struct filefork *fp = VTOF(vp);
324
325 if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) {
326 // Nothing to do
327 return 0;
328 }
329
330 zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size));
331
332 /* N.B. At present, @zero_up_to is not important because the cluster
333 code will always zero up to the end of the page anyway. */
334 return cluster_write(vp, NULL, fp->ff_size, zero_up_to,
335 fp->ff_size, 0, IO_HEADZEROFILL);
336 }
337
338 /*
339 * Write data to a file.
340 */
341 int
342 hfs_vnop_write(struct vnop_write_args *ap)
343 {
344 uio_t uio = ap->a_uio;
345 struct vnode *vp = ap->a_vp;
346 struct cnode *cp;
347 struct filefork *fp;
348 struct hfsmount *hfsmp;
349 kauth_cred_t cred = NULL;
350 off_t origFileSize;
351 off_t writelimit;
352 off_t bytesToAdd = 0;
353 off_t actualBytesAdded;
354 off_t filebytes;
355 off_t offset;
356 ssize_t resid;
357 int eflags = 0;
358 int ioflag = ap->a_ioflag;
359 int retval = 0;
360 int lockflags;
361 int cnode_locked = 0;
362 int partialwrite = 0;
363 int do_snapshot = 1;
364 time_t orig_ctime=VTOC(vp)->c_ctime;
365 int took_truncate_lock = 0;
366 int io_return_on_throttle = 0;
367 int throttled_count = 0;
368
369 #if HFS_COMPRESSION
370 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
371 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
372 switch(state) {
373 case FILE_IS_COMPRESSED:
374 return EACCES;
375 case FILE_IS_CONVERTING:
376 /* if FILE_IS_CONVERTING, we allow writes but do not
377 bother with snapshots or else we will deadlock.
378 */
379 do_snapshot = 0;
380 break;
381 default:
382 printf("invalid state %d for compressed file\n", state);
383 /* fall through */
384 }
385 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
386 int error;
387
388 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
389 if (error != 0) {
390 return error;
391 }
392 }
393
394 if (do_snapshot) {
395 nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
396 }
397
398 #endif
399
400 #if SECURE_KERNEL
401 if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
402 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
403 /* Don't allow unencrypted io request from user space */
404 return EPERM;
405 }
406 #endif
407
408 resid = uio_resid(uio);
409 offset = uio_offset(uio);
410
411 if (offset < 0)
412 return (EINVAL);
413 if (resid == 0)
414 return (E_NONE);
415 if (!vnode_isreg(vp))
416 return (EPERM); /* Can only write regular files */
417
418 cp = VTOC(vp);
419 fp = VTOF(vp);
420 hfsmp = VTOHFS(vp);
421
422 #if CONFIG_PROTECT
423 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
424 goto exit;
425 }
426 #endif
427
428 eflags = kEFDeferMask; /* defer file block allocations */
429 #if HFS_SPARSE_DEV
430 /*
431 * When the underlying device is sparse and space
432 * is low (< 8MB), stop doing delayed allocations
433 * and begin doing synchronous I/O.
434 */
435 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
436 (hfs_freeblks(hfsmp, 0) < 2048)) {
437 eflags &= ~kEFDeferMask;
438 ioflag |= IO_SYNC;
439 }
440 #endif /* HFS_SPARSE_DEV */
441
442 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
443 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
444 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
445 }
446
447 again:
448 /*
449 * Protect against a size change.
450 *
451 * Note: If took_truncate_lock is true, then we previously got the lock shared
452 * but needed to upgrade to exclusive. So try getting it exclusive from the
453 * start.
454 */
455 if (ioflag & IO_APPEND || took_truncate_lock) {
456 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
457 }
458 else {
459 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
460 }
461 took_truncate_lock = 1;
462
463 /* Update UIO */
464 if (ioflag & IO_APPEND) {
465 uio_setoffset(uio, fp->ff_size);
466 offset = fp->ff_size;
467 }
468 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
469 retval = EPERM;
470 goto exit;
471 }
472
473 cred = vfs_context_ucred(ap->a_context);
474 if (cred && suser(cred, NULL) != 0)
475 eflags |= kEFReserveMask;
476
477 origFileSize = fp->ff_size;
478 writelimit = offset + resid;
479
480 /*
481 * We may need an exclusive truncate lock for several reasons, all
482 * of which are because we may be writing to a (portion of a) block
483 * for the first time, and we need to make sure no readers see the
484 * prior, uninitialized contents of the block. The cases are:
485 *
486 * 1. We have unallocated (delayed allocation) blocks. We may be
487 * allocating new blocks to the file and writing to them.
488 * (A more precise check would be whether the range we're writing
489 * to contains delayed allocation blocks.)
490 * 2. We need to extend the file. The bytes between the old EOF
491 * and the new EOF are not yet initialized. This is important
492 * even if we're not allocating new blocks to the file. If the
493 * old EOF and new EOF are in the same block, we still need to
494 * protect that range of bytes until they are written for the
495 * first time.
496 *
497 * If we had a shared lock with the above cases, we need to try to upgrade
498 * to an exclusive lock. If the upgrade fails, we will lose the shared
499 * lock, and will need to take the truncate lock again; the took_truncate_lock
500 * flag will still be set, causing us to try for an exclusive lock next time.
501 */
502 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
503 ((fp->ff_unallocblocks != 0) ||
504 (writelimit > origFileSize))) {
505 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
506 /*
507 * Lock upgrade failed and we lost our shared lock, try again.
508 * Note: we do not set took_truncate_lock=0 here. Leaving it
509 * set to 1 will cause us to try to get the lock exclusive.
510 */
511 goto again;
512 }
513 else {
514 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
515 cp->c_truncatelockowner = current_thread();
516 }
517 }
518
519 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
520 goto exit;
521 }
522 cnode_locked = 1;
523
524 filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize);
525
526 if (offset > filebytes
527 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)),
528 hfsmp->blockSize) < offset - filebytes)) {
529 retval = ENOSPC;
530 goto exit;
531 }
532
533 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
534 (int)offset, uio_resid(uio), (int)fp->ff_size,
535 (int)filebytes, 0);
536
537 /* Check if we do not need to extend the file */
538 if (writelimit <= filebytes) {
539 goto sizeok;
540 }
541
542 bytesToAdd = writelimit - filebytes;
543
544 #if QUOTA
545 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
546 cred, 0);
547 if (retval)
548 goto exit;
549 #endif /* QUOTA */
550
551 if (hfs_start_transaction(hfsmp) != 0) {
552 retval = EINVAL;
553 goto exit;
554 }
555
556 while (writelimit > filebytes) {
557 bytesToAdd = writelimit - filebytes;
558
559 /* Protect extents b-tree and allocation bitmap */
560 lockflags = SFL_BITMAP;
561 if (overflow_extents(fp))
562 lockflags |= SFL_EXTENTS;
563 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
564
565 /* Files that are changing size are not hot file candidates. */
566 if (hfsmp->hfc_stage == HFC_RECORDING) {
567 fp->ff_bytesread = 0;
568 }
569 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
570 0, eflags, &actualBytesAdded));
571
572 hfs_systemfile_unlock(hfsmp, lockflags);
573
574 if ((actualBytesAdded == 0) && (retval == E_NONE))
575 retval = ENOSPC;
576 if (retval != E_NONE)
577 break;
578 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
579 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
580 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
581 }
582 (void) hfs_update(vp, 0);
583 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
584 (void) hfs_end_transaction(hfsmp);
585
586 /*
587 * If we didn't grow the file enough try a partial write.
588 * POSIX expects this behavior.
589 */
590 if ((retval == ENOSPC) && (filebytes > offset)) {
591 retval = 0;
592 partialwrite = 1;
593 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
594 resid -= bytesToAdd;
595 writelimit = filebytes;
596 }
597 sizeok:
598 if (retval == E_NONE) {
599 off_t filesize;
600 off_t head_off;
601 int lflag;
602
603 if (writelimit > fp->ff_size) {
604 filesize = writelimit;
605 struct timeval tv;
606 rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges);
607 microuptime(&tv);
608 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
609 } else
610 filesize = fp->ff_size;
611
612 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
613
614 /*
615 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
616 * for one case below). For the regions that lie before the
617 * beginning and after the end of this write that are in the
618 * same page, we let the cluster code handle zeroing that out
619 * if necessary. If those areas are not cached, the cluster
620 * code will try and read those areas in, and in the case
621 * where those regions have never been written to,
622 * hfs_vnop_blockmap will consult the invalid ranges and then
623 * indicate that. The cluster code will zero out those areas.
624 */
625
626 head_off = trunc_page_64(offset);
627
628 if (head_off < offset && head_off >= fp->ff_size) {
629 /*
630 * The first page is beyond current EOF, so as an
631 * optimisation, we can pass IO_HEADZEROFILL.
632 */
633 lflag |= IO_HEADZEROFILL;
634 }
635
636 hfs_unlock(cp);
637 cnode_locked = 0;
638
639 /*
640 * We need to tell UBC the fork's new size BEFORE calling
641 * cluster_write, in case any of the new pages need to be
642 * paged out before cluster_write completes (which does happen
643 * in embedded systems due to extreme memory pressure).
644 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
645 * will be, so that it can pass that on to cluster_pageout, and
646 * allow those pageouts.
647 *
648 * We don't update ff_size yet since we don't want pageins to
649 * be able to see uninitialized data between the old and new
650 * EOF, until cluster_write has completed and initialized that
651 * part of the file.
652 *
653 * The vnode pager relies on the file size last given to UBC via
654 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
655 * ff_size (whichever is larger). NOTE: ff_new_size is always
656 * zero, unless we are extending the file via write.
657 */
658 if (filesize > fp->ff_size) {
659 retval = hfs_zero_eof_page(vp, offset);
660 if (retval)
661 goto exit;
662 fp->ff_new_size = filesize;
663 ubc_setsize(vp, filesize);
664 }
665 retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off,
666 0, lflag | IO_NOZERODIRTY | io_return_on_throttle);
667 if (retval) {
668 fp->ff_new_size = 0; /* no longer extending; use ff_size */
669
670 if (retval == EAGAIN) {
671 /*
672 * EAGAIN indicates that we still have I/O to do, but
673 * that we now need to be throttled
674 */
675 if (resid != uio_resid(uio)) {
676 /*
677 * did manage to do some I/O before returning EAGAIN
678 */
679 resid = uio_resid(uio);
680 offset = uio_offset(uio);
681
682 cp->c_touch_chgtime = TRUE;
683 cp->c_touch_modtime = TRUE;
684 hfs_incr_gencount(cp);
685 }
686 if (filesize > fp->ff_size) {
687 /*
688 * we called ubc_setsize before the call to
689 * cluster_write... since we only partially
690 * completed the I/O, we need to
691 * re-adjust our idea of the filesize based
692 * on our interim EOF
693 */
694 ubc_setsize(vp, offset);
695
696 fp->ff_size = offset;
697 }
698 goto exit;
699 }
700 if (filesize > origFileSize) {
701 ubc_setsize(vp, origFileSize);
702 }
703 goto ioerr_exit;
704 }
705
706 if (filesize > origFileSize) {
707 fp->ff_size = filesize;
708
709 /* Files that are changing size are not hot file candidates. */
710 if (hfsmp->hfc_stage == HFC_RECORDING) {
711 fp->ff_bytesread = 0;
712 }
713 }
714 fp->ff_new_size = 0; /* ff_size now has the correct size */
715 }
716 if (partialwrite) {
717 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
718 resid += bytesToAdd;
719 }
720
721 if (vnode_should_flush_after_write(vp, ioflag))
722 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
723
724 ioerr_exit:
725 if (!cnode_locked) {
726 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
727 cnode_locked = 1;
728 }
729
730 if (resid > uio_resid(uio)) {
731 cp->c_touch_chgtime = TRUE;
732 cp->c_touch_modtime = TRUE;
733 hfs_incr_gencount(cp);
734
735 /*
736 * If we successfully wrote any data, and we are not the superuser
737 * we clear the setuid and setgid bits as a precaution against
738 * tampering.
739 */
740 if (cp->c_mode & (S_ISUID | S_ISGID)) {
741 cred = vfs_context_ucred(ap->a_context);
742 if (cred && suser(cred, NULL)) {
743 cp->c_mode &= ~(S_ISUID | S_ISGID);
744 }
745 }
746 }
747 if (retval) {
748 if (ioflag & IO_UNIT) {
749 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
750 0, ap->a_context);
751 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
752 uio_setresid(uio, resid);
753 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
754 }
755 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
756 retval = hfs_update(vp, 0);
757
758 /* Updating vcbWrCnt doesn't need to be atomic. */
759 hfsmp->vcbWrCnt++;
760
761 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
762 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
763 exit:
764 if (retval && took_truncate_lock
765 && cp->c_truncatelockowner == current_thread()) {
766 fp->ff_new_size = 0;
767 rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges);
768 }
769
770 if (cnode_locked)
771 hfs_unlock(cp);
772
773 if (took_truncate_lock) {
774 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
775 }
776 if (retval == EAGAIN) {
777 throttle_lowpri_io(1);
778 throttled_count++;
779
780 retval = 0;
781 goto again;
782 }
783 if (throttled_count)
784 throttle_info_reset_window(NULL);
785 return (retval);
786 }
787
788 /* support for the "bulk-access" fcntl */
789
790 #define CACHE_LEVELS 16
791 #define NUM_CACHE_ENTRIES (64*16)
792 #define PARENT_IDS_FLAG 0x100
793
794 struct access_cache {
795 int numcached;
796 int cachehits; /* these two for statistics gathering */
797 int lookups;
798 unsigned int *acache;
799 unsigned char *haveaccess;
800 };
801
802 struct access_t {
803 uid_t uid; /* IN: effective user id */
804 short flags; /* IN: access requested (i.e. R_OK) */
805 short num_groups; /* IN: number of groups user belongs to */
806 int num_files; /* IN: number of files to process */
807 int *file_ids; /* IN: array of file ids */
808 gid_t *groups; /* IN: array of groups */
809 short *access; /* OUT: access info for each file (0 for 'has access') */
810 } __attribute__((unavailable)); // this structure is for reference purposes only
811
812 struct user32_access_t {
813 uid_t uid; /* IN: effective user id */
814 short flags; /* IN: access requested (i.e. R_OK) */
815 short num_groups; /* IN: number of groups user belongs to */
816 int num_files; /* IN: number of files to process */
817 user32_addr_t file_ids; /* IN: array of file ids */
818 user32_addr_t groups; /* IN: array of groups */
819 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
820 };
821
822 struct user64_access_t {
823 uid_t uid; /* IN: effective user id */
824 short flags; /* IN: access requested (i.e. R_OK) */
825 short num_groups; /* IN: number of groups user belongs to */
826 int num_files; /* IN: number of files to process */
827 user64_addr_t file_ids; /* IN: array of file ids */
828 user64_addr_t groups; /* IN: array of groups */
829 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
830 };
831
832
833 // these are the "extended" versions of the above structures
834 // note that it is crucial that they be different sized than
835 // the regular version
836 struct ext_access_t {
837 uint32_t flags; /* IN: access requested (i.e. R_OK) */
838 uint32_t num_files; /* IN: number of files to process */
839 uint32_t map_size; /* IN: size of the bit map */
840 uint32_t *file_ids; /* IN: Array of file ids */
841 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
842 short *access; /* OUT: access info for each file (0 for 'has access') */
843 uint32_t num_parents; /* future use */
844 cnid_t *parents; /* future use */
845 } __attribute__((unavailable)); // this structure is for reference purposes only
846
847 struct user32_ext_access_t {
848 uint32_t flags; /* IN: access requested (i.e. R_OK) */
849 uint32_t num_files; /* IN: number of files to process */
850 uint32_t map_size; /* IN: size of the bit map */
851 user32_addr_t file_ids; /* IN: Array of file ids */
852 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
853 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
854 uint32_t num_parents; /* future use */
855 user32_addr_t parents; /* future use */
856 };
857
858 struct user64_ext_access_t {
859 uint32_t flags; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files; /* IN: number of files to process */
861 uint32_t map_size; /* IN: size of the bit map */
862 user64_addr_t file_ids; /* IN: array of file ids */
863 user64_addr_t bitmap; /* IN: array of groups */
864 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents;/* future use */
866 user64_addr_t parents;/* future use */
867 };
868
869
870 /*
871 * Perform a binary search for the given parent_id. Return value is
872 * the index if there is a match. If no_match_indexp is non-NULL it
873 * will be assigned with the index to insert the item (even if it was
874 * not found).
875 */
876 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
877 {
878 int index=-1;
879 unsigned int lo=0;
880
881 do {
882 unsigned int mid = ((hi - lo)/2) + lo;
883 unsigned int this_id = array[mid];
884
885 if (parent_id == this_id) {
886 hi = mid;
887 break;
888 }
889
890 if (parent_id < this_id) {
891 hi = mid;
892 continue;
893 }
894
895 if (parent_id > this_id) {
896 lo = mid + 1;
897 continue;
898 }
899 } while(lo < hi);
900
901 /* check if lo and hi converged on the match */
902 if (parent_id == array[hi]) {
903 index = hi;
904 }
905
906 if (no_match_indexp) {
907 *no_match_indexp = hi;
908 }
909
910 return index;
911 }
912
913
914 static int
915 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
916 {
917 unsigned int hi;
918 int matches = 0;
919 int index, no_match_index;
920
921 if (cache->numcached == 0) {
922 *indexp = 0;
923 return 0; // table is empty, so insert at index=0 and report no match
924 }
925
926 if (cache->numcached > NUM_CACHE_ENTRIES) {
927 cache->numcached = NUM_CACHE_ENTRIES;
928 }
929
930 hi = cache->numcached - 1;
931
932 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
933
934 /* if no existing entry found, find index for new one */
935 if (index == -1) {
936 index = no_match_index;
937 matches = 0;
938 } else {
939 matches = 1;
940 }
941
942 *indexp = index;
943 return matches;
944 }
945
946 /*
947 * Add a node to the access_cache at the given index (or do a lookup first
948 * to find the index if -1 is passed in). We currently do a replace rather
949 * than an insert if the cache is full.
950 */
951 static void
952 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
953 {
954 int lookup_index = -1;
955
956 /* need to do a lookup first if -1 passed for index */
957 if (index == -1) {
958 if (lookup_bucket(cache, &lookup_index, nodeID)) {
959 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
960 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
961 cache->haveaccess[lookup_index] = access;
962 }
963
964 /* mission accomplished */
965 return;
966 } else {
967 index = lookup_index;
968 }
969
970 }
971
972 /* if the cache is full, do a replace rather than an insert */
973 if (cache->numcached >= NUM_CACHE_ENTRIES) {
974 cache->numcached = NUM_CACHE_ENTRIES-1;
975
976 if (index > cache->numcached) {
977 index = cache->numcached;
978 }
979 }
980
981 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
982 index++;
983 }
984
985 if (index >= 0 && index < cache->numcached) {
986 /* only do bcopy if we're inserting */
987 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
988 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
989 }
990
991 cache->acache[index] = nodeID;
992 cache->haveaccess[index] = access;
993 cache->numcached++;
994 }
995
996
997 struct cinfo {
998 uid_t uid;
999 gid_t gid;
1000 mode_t mode;
1001 cnid_t parentcnid;
1002 u_int16_t recflags;
1003 };
1004
1005 static int
1006 snoop_callback(const cnode_t *cp, void *arg)
1007 {
1008 struct cinfo *cip = arg;
1009
1010 cip->uid = cp->c_uid;
1011 cip->gid = cp->c_gid;
1012 cip->mode = cp->c_mode;
1013 cip->parentcnid = cp->c_parentcnid;
1014 cip->recflags = cp->c_attr.ca_recflags;
1015
1016 return (0);
1017 }
1018
1019 /*
1020 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1021 * isn't incore, then go to the catalog.
1022 */
1023 static int
1024 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
1025 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
1026 {
1027 int error = 0;
1028
1029 /* if this id matches the one the fsctl was called with, skip the lookup */
1030 if (cnid == skip_cp->c_cnid) {
1031 cnattrp->ca_uid = skip_cp->c_uid;
1032 cnattrp->ca_gid = skip_cp->c_gid;
1033 cnattrp->ca_mode = skip_cp->c_mode;
1034 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1035 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1036 } else {
1037 struct cinfo c_info;
1038
1039 /* otherwise, check the cnode hash incase the file/dir is incore */
1040 error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
1041
1042 if (error == EACCES) {
1043 // File is deleted
1044 return ENOENT;
1045 } else if (!error) {
1046 cnattrp->ca_uid = c_info.uid;
1047 cnattrp->ca_gid = c_info.gid;
1048 cnattrp->ca_mode = c_info.mode;
1049 cnattrp->ca_recflags = c_info.recflags;
1050 keyp->hfsPlus.parentID = c_info.parentcnid;
1051 } else {
1052 int lockflags;
1053
1054 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1055 throttle_lowpri_io(1);
1056
1057 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1058
1059 /* lookup this cnid in the catalog */
1060 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
1061
1062 hfs_systemfile_unlock(hfsmp, lockflags);
1063
1064 cache->lookups++;
1065 }
1066 }
1067
1068 return (error);
1069 }
1070
1071
1072 /*
1073 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1074 * up to CACHE_LEVELS as we progress towards the root.
1075 */
1076 static int
1077 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
1078 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
1079 struct vfs_context *my_context,
1080 char *bitmap,
1081 uint32_t map_size,
1082 cnid_t* parents,
1083 uint32_t num_parents)
1084 {
1085 int myErr = 0;
1086 int myResult;
1087 HFSCatalogNodeID thisNodeID;
1088 unsigned int myPerms;
1089 struct cat_attr cnattr;
1090 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1091 CatalogKey catkey;
1092
1093 int i = 0, ids_to_cache = 0;
1094 int parent_ids[CACHE_LEVELS];
1095
1096 thisNodeID = nodeID;
1097 while (thisNodeID >= kRootDirID) {
1098 myResult = 0; /* default to "no access" */
1099
1100 /* check the cache before resorting to hitting the catalog */
1101
1102 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1103 * to look any further after hitting cached dir */
1104
1105 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1106 cache->cachehits++;
1107 myErr = cache->haveaccess[cache_index];
1108 if (scope_index != -1) {
1109 if (myErr == ESRCH) {
1110 myErr = 0;
1111 }
1112 } else {
1113 scope_index = 0; // so we'll just use the cache result
1114 scope_idx_start = ids_to_cache;
1115 }
1116 myResult = (myErr == 0) ? 1 : 0;
1117 goto ExitThisRoutine;
1118 }
1119
1120
1121 if (parents) {
1122 int tmp;
1123 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1124 if (scope_index == -1)
1125 scope_index = tmp;
1126 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1127 scope_idx_start = ids_to_cache;
1128 }
1129 }
1130
1131 /* remember which parents we want to cache */
1132 if (ids_to_cache < CACHE_LEVELS) {
1133 parent_ids[ids_to_cache] = thisNodeID;
1134 ids_to_cache++;
1135 }
1136 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1137 if (bitmap && map_size) {
1138 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1139 }
1140
1141
1142 /* do the lookup (checks the cnode hash, then the catalog) */
1143 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
1144 if (myErr) {
1145 goto ExitThisRoutine; /* no access */
1146 }
1147
1148 /* Root always gets access. */
1149 if (suser(myp_ucred, NULL) == 0) {
1150 thisNodeID = catkey.hfsPlus.parentID;
1151 myResult = 1;
1152 continue;
1153 }
1154
1155 // if the thing has acl's, do the full permission check
1156 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1157 struct vnode *vp;
1158
1159 /* get the vnode for this cnid */
1160 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
1161 if ( myErr ) {
1162 myResult = 0;
1163 goto ExitThisRoutine;
1164 }
1165
1166 thisNodeID = VTOC(vp)->c_parentcnid;
1167
1168 hfs_unlock(VTOC(vp));
1169
1170 if (vnode_vtype(vp) == VDIR) {
1171 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1172 } else {
1173 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1174 }
1175
1176 vnode_put(vp);
1177 if (myErr) {
1178 myResult = 0;
1179 goto ExitThisRoutine;
1180 }
1181 } else {
1182 unsigned int flags;
1183 int mode = cnattr.ca_mode & S_IFMT;
1184 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
1185
1186 if (mode == S_IFDIR) {
1187 flags = R_OK | X_OK;
1188 } else {
1189 flags = R_OK;
1190 }
1191 if ( (myPerms & flags) != flags) {
1192 myResult = 0;
1193 myErr = EACCES;
1194 goto ExitThisRoutine; /* no access */
1195 }
1196
1197 /* up the hierarchy we go */
1198 thisNodeID = catkey.hfsPlus.parentID;
1199 }
1200 }
1201
1202 /* if here, we have access to this node */
1203 myResult = 1;
1204
1205 ExitThisRoutine:
1206 if (parents && myErr == 0 && scope_index == -1) {
1207 myErr = ESRCH;
1208 }
1209
1210 if (myErr) {
1211 myResult = 0;
1212 }
1213 *err = myErr;
1214
1215 /* cache the parent directory(ies) */
1216 for (i = 0; i < ids_to_cache; i++) {
1217 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1218 add_node(cache, -1, parent_ids[i], ESRCH);
1219 } else {
1220 add_node(cache, -1, parent_ids[i], myErr);
1221 }
1222 }
1223
1224 return (myResult);
1225 }
1226
1227 static int
1228 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1229 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1230 {
1231 boolean_t is64bit;
1232
1233 /*
1234 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1235 * happens to be in our list of file_ids, we'll note it
1236 * avoid calling hfs_chashget_nowait() on that id as that
1237 * will cause a "locking against myself" panic.
1238 */
1239 Boolean check_leaf = true;
1240
1241 struct user64_ext_access_t *user_access_structp;
1242 struct user64_ext_access_t tmp_user_access;
1243 struct access_cache cache;
1244
1245 int error = 0, prev_parent_check_ok=1;
1246 unsigned int i;
1247
1248 short flags;
1249 unsigned int num_files = 0;
1250 int map_size = 0;
1251 int num_parents = 0;
1252 int *file_ids=NULL;
1253 short *access=NULL;
1254 char *bitmap=NULL;
1255 cnid_t *parents=NULL;
1256 int leaf_index;
1257
1258 cnid_t cnid;
1259 cnid_t prevParent_cnid = 0;
1260 unsigned int myPerms;
1261 short myaccess = 0;
1262 struct cat_attr cnattr;
1263 CatalogKey catkey;
1264 struct cnode *skip_cp = VTOC(vp);
1265 kauth_cred_t cred = vfs_context_ucred(context);
1266 proc_t p = vfs_context_proc(context);
1267
1268 is64bit = proc_is64bit(p);
1269
1270 /* initialize the local cache and buffers */
1271 cache.numcached = 0;
1272 cache.cachehits = 0;
1273 cache.lookups = 0;
1274 cache.acache = NULL;
1275 cache.haveaccess = NULL;
1276
1277 /* struct copyin done during dispatch... need to copy file_id array separately */
1278 if (ap->a_data == NULL) {
1279 error = EINVAL;
1280 goto err_exit_bulk_access;
1281 }
1282
1283 if (is64bit) {
1284 if (arg_size != sizeof(struct user64_ext_access_t)) {
1285 error = EINVAL;
1286 goto err_exit_bulk_access;
1287 }
1288
1289 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1290
1291 } else if (arg_size == sizeof(struct user32_access_t)) {
1292 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1293
1294 // convert an old style bulk-access struct to the new style
1295 tmp_user_access.flags = accessp->flags;
1296 tmp_user_access.num_files = accessp->num_files;
1297 tmp_user_access.map_size = 0;
1298 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1299 tmp_user_access.bitmap = USER_ADDR_NULL;
1300 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1301 tmp_user_access.num_parents = 0;
1302 user_access_structp = &tmp_user_access;
1303
1304 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1305 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1306
1307 // up-cast from a 32-bit version of the struct
1308 tmp_user_access.flags = accessp->flags;
1309 tmp_user_access.num_files = accessp->num_files;
1310 tmp_user_access.map_size = accessp->map_size;
1311 tmp_user_access.num_parents = accessp->num_parents;
1312
1313 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1314 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1315 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1316 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1317
1318 user_access_structp = &tmp_user_access;
1319 } else {
1320 error = EINVAL;
1321 goto err_exit_bulk_access;
1322 }
1323
1324 map_size = user_access_structp->map_size;
1325
1326 num_files = user_access_structp->num_files;
1327
1328 num_parents= user_access_structp->num_parents;
1329
1330 if (num_files < 1) {
1331 goto err_exit_bulk_access;
1332 }
1333 if (num_files > 1024) {
1334 error = EINVAL;
1335 goto err_exit_bulk_access;
1336 }
1337
1338 if (num_parents > 1024) {
1339 error = EINVAL;
1340 goto err_exit_bulk_access;
1341 }
1342
1343 file_ids = hfs_malloc(sizeof(int) * num_files);
1344 access = hfs_malloc(sizeof(short) * num_files);
1345 if (map_size) {
1346 bitmap = hfs_mallocz(sizeof(char) * map_size);
1347 }
1348
1349 if (num_parents) {
1350 parents = hfs_malloc(sizeof(cnid_t) * num_parents);
1351 }
1352
1353 cache.acache = hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES);
1354 cache.haveaccess = hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1355
1356 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1357 num_files * sizeof(int)))) {
1358 goto err_exit_bulk_access;
1359 }
1360
1361 if (num_parents) {
1362 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1363 num_parents * sizeof(cnid_t)))) {
1364 goto err_exit_bulk_access;
1365 }
1366 }
1367
1368 flags = user_access_structp->flags;
1369 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1370 flags = R_OK;
1371 }
1372
1373 /* check if we've been passed leaf node ids or parent ids */
1374 if (flags & PARENT_IDS_FLAG) {
1375 check_leaf = false;
1376 }
1377
1378 /* Check access to each file_id passed in */
1379 for (i = 0; i < num_files; i++) {
1380 leaf_index=-1;
1381 cnid = (cnid_t) file_ids[i];
1382
1383 /* root always has access */
1384 if ((!parents) && (!suser(cred, NULL))) {
1385 access[i] = 0;
1386 continue;
1387 }
1388
1389 if (check_leaf) {
1390 /* do the lookup (checks the cnode hash, then the catalog) */
1391 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1392 if (error) {
1393 access[i] = (short) error;
1394 continue;
1395 }
1396
1397 if (parents) {
1398 // Check if the leaf matches one of the parent scopes
1399 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1400 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1401 prev_parent_check_ok = 0;
1402 else if (leaf_index >= 0)
1403 prev_parent_check_ok = 1;
1404 }
1405
1406 // if the thing has acl's, do the full permission check
1407 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1408 struct vnode *cvp;
1409 int myErr = 0;
1410 /* get the vnode for this cnid */
1411 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
1412 if ( myErr ) {
1413 access[i] = myErr;
1414 continue;
1415 }
1416
1417 hfs_unlock(VTOC(cvp));
1418
1419 if (vnode_vtype(cvp) == VDIR) {
1420 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1421 } else {
1422 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1423 }
1424
1425 vnode_put(cvp);
1426 if (myErr) {
1427 access[i] = myErr;
1428 continue;
1429 }
1430 } else {
1431 /* before calling CheckAccess(), check the target file for read access */
1432 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1433 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1434
1435 /* fail fast if no access */
1436 if ((myPerms & flags) == 0) {
1437 access[i] = EACCES;
1438 continue;
1439 }
1440 }
1441 } else {
1442 /* we were passed an array of parent ids */
1443 catkey.hfsPlus.parentID = cnid;
1444 }
1445
1446 /* if the last guy had the same parent and had access, we're done */
1447 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1448 cache.cachehits++;
1449 access[i] = 0;
1450 continue;
1451 }
1452
1453 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1454 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1455
1456 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1457 access[i] = 0; // have access.. no errors to report
1458 } else {
1459 access[i] = (error != 0 ? (short) error : EACCES);
1460 }
1461
1462 prevParent_cnid = catkey.hfsPlus.parentID;
1463 }
1464
1465 /* copyout the access array */
1466 if ((error = copyout((caddr_t)access, user_access_structp->access,
1467 num_files * sizeof (short)))) {
1468 goto err_exit_bulk_access;
1469 }
1470 if (map_size && bitmap) {
1471 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1472 map_size * sizeof (char)))) {
1473 goto err_exit_bulk_access;
1474 }
1475 }
1476
1477
1478 err_exit_bulk_access:
1479
1480 hfs_free(file_ids, sizeof(int) * num_files);
1481 hfs_free(parents, sizeof(cnid_t) * num_parents);
1482 hfs_free(bitmap, sizeof(char) * map_size);
1483 hfs_free(access, sizeof(short) * num_files);
1484 hfs_free(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1485 hfs_free(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1486
1487 return (error);
1488 }
1489
1490
1491 /* end "bulk-access" support */
1492
1493
1494 /*
1495 * Control filesystem operating characteristics.
1496 */
1497 int
1498 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1499 vnode_t a_vp;
1500 long a_command;
1501 caddr_t a_data;
1502 int a_fflag;
1503 vfs_context_t a_context;
1504 } */ *ap)
1505 {
1506 struct vnode * vp = ap->a_vp;
1507 struct hfsmount *hfsmp = VTOHFS(vp);
1508 vfs_context_t context = ap->a_context;
1509 kauth_cred_t cred = vfs_context_ucred(context);
1510 proc_t p = vfs_context_proc(context);
1511 struct vfsstatfs *vfsp;
1512 boolean_t is64bit;
1513 off_t jnl_start, jnl_size;
1514 struct hfs_journal_info *jip;
1515 #if HFS_COMPRESSION
1516 int compressed = 0;
1517 off_t uncompressed_size = -1;
1518 int decmpfs_error = 0;
1519
1520 if (ap->a_command == F_RDADVISE) {
1521 /* we need to inspect the decmpfs state of the file as early as possible */
1522 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1523 if (compressed) {
1524 if (VNODE_IS_RSRC(vp)) {
1525 /* if this is the resource fork, treat it as if it were empty */
1526 uncompressed_size = 0;
1527 } else {
1528 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1529 if (decmpfs_error != 0) {
1530 /* failed to get the uncompressed size, we'll check for this later */
1531 uncompressed_size = -1;
1532 }
1533 }
1534 }
1535 }
1536 #endif /* HFS_COMPRESSION */
1537
1538 is64bit = proc_is64bit(p);
1539
1540 #if CONFIG_PROTECT
1541 #if HFS_CONFIG_KEY_ROLL
1542 // The HFSIOC_KEY_ROLL fsctl does its own access checks
1543 if (ap->a_command != HFSIOC_KEY_ROLL)
1544 #endif
1545 {
1546 int error = 0;
1547 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
1548 return error;
1549 }
1550 }
1551 #endif /* CONFIG_PROTECT */
1552
1553 switch (ap->a_command) {
1554
1555 case HFSIOC_GETPATH:
1556 {
1557 struct vnode *file_vp;
1558 cnid_t cnid;
1559 int error;
1560 int flags = 0;
1561 char *bufptr;
1562 #ifdef VN_GETPATH_NEW
1563 size_t outlen;
1564 #else // VN_GETPATH_NEW
1565 int outlen;
1566 #endif // VN_GETPATH_NEW
1567
1568 /* Caller must be owner of file system. */
1569 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1570 if (suser(cred, NULL) &&
1571 kauth_cred_getuid(cred) != vfsp->f_owner) {
1572 return (EACCES);
1573 }
1574 /* Target vnode must be file system's root. */
1575 if (!vnode_isvroot(vp)) {
1576 return (EINVAL);
1577 }
1578 bufptr = (char *)ap->a_data;
1579 cnid = strtoul(bufptr, NULL, 10);
1580 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1581 flags |= BUILDPATH_VOLUME_RELATIVE;
1582 }
1583
1584 /* We need to call hfs_vfs_vget to leverage the code that will
1585 * fix the origin list for us if needed, as opposed to calling
1586 * hfs_vget, since we will need the parent for vn_getpath_ext call.
1587 */
1588
1589 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1590 return (error);
1591 }
1592
1593 outlen = sizeof(pathname_t);
1594 error = vn_getpath_ext(file_vp, NULLVP, bufptr, &outlen, flags);
1595 vnode_put(file_vp);
1596
1597 return (error);
1598 }
1599
1600 case HFSIOC_SET_MAX_DEFRAG_SIZE:
1601 {
1602 int error = 0; /* Assume success */
1603 u_int32_t maxsize = 0;
1604
1605 if (vnode_vfsisrdonly(vp)) {
1606 return (EROFS);
1607 }
1608 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1609 if (!kauth_cred_issuser(cred)) {
1610 return (EACCES); /* must be root */
1611 }
1612
1613 maxsize = *(u_int32_t *)ap->a_data;
1614
1615 hfs_lock_mount(hfsmp);
1616 if (maxsize > HFS_MAX_DEFRAG_SIZE) {
1617 error = EINVAL;
1618 }
1619 else {
1620 hfsmp->hfs_defrag_max = maxsize;
1621 }
1622 hfs_unlock_mount(hfsmp);
1623
1624 return (error);
1625 }
1626
1627 case HFSIOC_FORCE_ENABLE_DEFRAG:
1628 {
1629 int error = 0; /* Assume success */
1630 u_int32_t do_enable = 0;
1631
1632 if (vnode_vfsisrdonly(vp)) {
1633 return (EROFS);
1634 }
1635 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1636 if (!kauth_cred_issuser(cred)) {
1637 return (EACCES); /* must be root */
1638 }
1639
1640 do_enable = *(u_int32_t *)ap->a_data;
1641
1642 hfs_lock_mount(hfsmp);
1643 if (do_enable != 0) {
1644 hfsmp->hfs_defrag_nowait = 1;
1645 }
1646 else {
1647 error = EINVAL;
1648 }
1649
1650 hfs_unlock_mount(hfsmp);
1651
1652 return (error);
1653 }
1654
1655
1656 case HFSIOC_TRANSFER_DOCUMENT_ID:
1657 {
1658 struct cnode *cp = NULL;
1659 int error;
1660 u_int32_t to_fd = *(u_int32_t *)ap->a_data;
1661 struct fileproc *to_fp;
1662 struct vnode *to_vp;
1663 struct cnode *to_cp;
1664
1665 cp = VTOC(vp);
1666
1667 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
1668 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1669 return error;
1670 }
1671 if ( (error = vnode_getwithref(to_vp)) ) {
1672 file_drop(to_fd);
1673 return error;
1674 }
1675
1676 if (VTOHFS(to_vp) != hfsmp) {
1677 error = EXDEV;
1678 goto transfer_cleanup;
1679 }
1680
1681 int need_unlock = 1;
1682 to_cp = VTOC(to_vp);
1683 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1684 if (error != 0) {
1685 //printf("could not lock the pair of cnodes (error %d)\n", error);
1686 goto transfer_cleanup;
1687 }
1688
1689 if (!(cp->c_bsdflags & UF_TRACKED)) {
1690 error = EINVAL;
1691 } else if (to_cp->c_bsdflags & UF_TRACKED) {
1692 //
1693 // if the destination is already tracked, return an error
1694 // as otherwise it's a silent deletion of the target's
1695 // document-id
1696 //
1697 error = EEXIST;
1698 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1699 //
1700 // we can use the FndrExtendedFileInfo because the doc-id is the first
1701 // thing in both it and the ExtendedDirInfo struct which is fixed in
1702 // format and can not change layout
1703 //
1704 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
1705 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
1706
1707 if (f_extinfo->document_id == 0) {
1708 uint32_t new_id;
1709
1710 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
1711
1712 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
1713 //
1714 // re-lock the pair now that we have the document-id
1715 //
1716 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1717 f_extinfo->document_id = new_id;
1718 } else {
1719 goto transfer_cleanup;
1720 }
1721 }
1722
1723 to_extinfo->document_id = f_extinfo->document_id;
1724 f_extinfo->document_id = 0;
1725 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1726
1727 // make sure the destination is also UF_TRACKED
1728 to_cp->c_bsdflags |= UF_TRACKED;
1729 cp->c_bsdflags &= ~UF_TRACKED;
1730
1731 // mark the cnodes dirty
1732 cp->c_flag |= C_MODIFIED;
1733 to_cp->c_flag |= C_MODIFIED;
1734
1735 int lockflags;
1736 if ((error = hfs_start_transaction(hfsmp)) == 0) {
1737
1738 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1739
1740 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
1741 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
1742
1743 hfs_systemfile_unlock (hfsmp, lockflags);
1744 (void) hfs_end_transaction(hfsmp);
1745 }
1746
1747 add_fsevent(FSE_DOCID_CHANGED, context,
1748 FSE_ARG_DEV, hfsmp->hfs_raw_dev,
1749 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
1750 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
1751 FSE_ARG_INT32, to_extinfo->document_id,
1752 FSE_ARG_DONE);
1753
1754 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1755 need_unlock = 0;
1756
1757 if (need_fsevent(FSE_STAT_CHANGED, vp)) {
1758 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1759 }
1760 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
1761 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
1762 }
1763 }
1764
1765 if (need_unlock) {
1766 hfs_unlockpair(cp, to_cp);
1767 }
1768
1769 transfer_cleanup:
1770 vnode_put(to_vp);
1771 file_drop(to_fd);
1772
1773 return error;
1774 }
1775
1776
1777
1778 case HFSIOC_PREV_LINK:
1779 case HFSIOC_NEXT_LINK:
1780 {
1781 cnid_t linkfileid;
1782 cnid_t nextlinkid;
1783 cnid_t prevlinkid;
1784 int error;
1785
1786 /* Caller must be owner of file system. */
1787 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1788 if (suser(cred, NULL) &&
1789 kauth_cred_getuid(cred) != vfsp->f_owner) {
1790 return (EACCES);
1791 }
1792 /* Target vnode must be file system's root. */
1793 if (!vnode_isvroot(vp)) {
1794 return (EINVAL);
1795 }
1796 linkfileid = *(cnid_t *)ap->a_data;
1797 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1798 return (EINVAL);
1799 }
1800 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1801 return (error);
1802 }
1803 if (ap->a_command == HFSIOC_NEXT_LINK) {
1804 *(cnid_t *)ap->a_data = nextlinkid;
1805 } else {
1806 *(cnid_t *)ap->a_data = prevlinkid;
1807 }
1808 return (0);
1809 }
1810
1811 case HFSIOC_RESIZE_PROGRESS: {
1812
1813 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1814 if (suser(cred, NULL) &&
1815 kauth_cred_getuid(cred) != vfsp->f_owner) {
1816 return (EACCES); /* must be owner of file system */
1817 }
1818 if (!vnode_isvroot(vp)) {
1819 return (EINVAL);
1820 }
1821 /* file system must not be mounted read-only */
1822 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1823 return (EROFS);
1824 }
1825
1826 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1827 }
1828
1829 case HFSIOC_RESIZE_VOLUME: {
1830 u_int64_t newsize;
1831 u_int64_t cursize;
1832 int ret;
1833
1834 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1835 if (suser(cred, NULL) &&
1836 kauth_cred_getuid(cred) != vfsp->f_owner) {
1837 return (EACCES); /* must be owner of file system */
1838 }
1839 if (!vnode_isvroot(vp)) {
1840 return (EINVAL);
1841 }
1842
1843 /* filesystem must not be mounted read only */
1844 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1845 return (EROFS);
1846 }
1847 newsize = *(u_int64_t *)ap->a_data;
1848 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1849
1850 if (newsize == cursize) {
1851 return (0);
1852 }
1853 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize);
1854 if (newsize > cursize) {
1855 ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1856 } else {
1857 ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1858 }
1859 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize);
1860 return (ret);
1861 }
1862 case HFSIOC_CHANGE_NEXT_ALLOCATION: {
1863 int error = 0; /* Assume success */
1864 u_int32_t location;
1865
1866 if (vnode_vfsisrdonly(vp)) {
1867 return (EROFS);
1868 }
1869 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1870 if (suser(cred, NULL) &&
1871 kauth_cred_getuid(cred) != vfsp->f_owner) {
1872 return (EACCES); /* must be owner of file system */
1873 }
1874 if (!vnode_isvroot(vp)) {
1875 return (EINVAL);
1876 }
1877 hfs_lock_mount(hfsmp);
1878 location = *(u_int32_t *)ap->a_data;
1879 if ((location >= hfsmp->allocLimit) &&
1880 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1881 error = EINVAL;
1882 goto fail_change_next_allocation;
1883 }
1884 /* Return previous value. */
1885 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1886 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1887 /* On magic value for location, set nextAllocation to next block
1888 * after metadata zone and set flag in mount structure to indicate
1889 * that nextAllocation should not be updated again.
1890 */
1891 if (hfsmp->hfs_metazone_end != 0) {
1892 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1893 }
1894 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1895 } else {
1896 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1897 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1898 }
1899 MarkVCBDirty(hfsmp);
1900 fail_change_next_allocation:
1901 hfs_unlock_mount(hfsmp);
1902 return (error);
1903 }
1904
1905 #if HFS_SPARSE_DEV
1906 case HFSIOC_SETBACKINGSTOREINFO: {
1907 struct vnode * di_vp;
1908 struct hfs_backingstoreinfo *bsdata;
1909 int error = 0;
1910
1911 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1912 return (EROFS);
1913 }
1914 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1915 return (EALREADY);
1916 }
1917 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1918 if (suser(cred, NULL) &&
1919 kauth_cred_getuid(cred) != vfsp->f_owner) {
1920 return (EACCES); /* must be owner of file system */
1921 }
1922 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1923 if (bsdata == NULL) {
1924 return (EINVAL);
1925 }
1926 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1927 return (error);
1928 }
1929 if ((error = vnode_getwithref(di_vp))) {
1930 file_drop(bsdata->backingfd);
1931 return(error);
1932 }
1933
1934 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1935 (void)vnode_put(di_vp);
1936 file_drop(bsdata->backingfd);
1937 return (EINVAL);
1938 }
1939
1940 // Dropped in unmount
1941 vnode_ref(di_vp);
1942
1943 hfs_lock_mount(hfsmp);
1944 hfsmp->hfs_backingvp = di_vp;
1945 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1946 hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
1947 hfs_unlock_mount(hfsmp);
1948
1949 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1950
1951 /*
1952 * If the sparse image is on a sparse image file (as opposed to a sparse
1953 * bundle), then we may need to limit the free space to the maximum size
1954 * of a file on that volume. So we query (using pathconf), and if we get
1955 * a meaningful result, we cache the number of blocks for later use in
1956 * hfs_freeblks().
1957 */
1958 hfsmp->hfs_backingfs_maxblocks = 0;
1959 if (vnode_vtype(di_vp) == VREG) {
1960 int terr;
1961 int hostbits;
1962 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1963 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1964 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1965
1966 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1967 }
1968 }
1969
1970 /* The free extent cache is managed differently for sparse devices.
1971 * There is a window between which the volume is mounted and the
1972 * device is marked as sparse, so the free extent cache for this
1973 * volume is currently initialized as normal volume (sorted by block
1974 * count). Reset the cache so that it will be rebuilt again
1975 * for sparse device (sorted by start block).
1976 */
1977 ResetVCBFreeExtCache(hfsmp);
1978
1979 (void)vnode_put(di_vp);
1980 file_drop(bsdata->backingfd);
1981 return (0);
1982 }
1983
1984 case HFSIOC_CLRBACKINGSTOREINFO: {
1985 struct vnode * tmpvp;
1986
1987 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1988 if (suser(cred, NULL) &&
1989 kauth_cred_getuid(cred) != vfsp->f_owner) {
1990 return (EACCES); /* must be owner of file system */
1991 }
1992 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1993 return (EROFS);
1994 }
1995
1996 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1997 hfsmp->hfs_backingvp) {
1998
1999 hfs_lock_mount(hfsmp);
2000 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2001 tmpvp = hfsmp->hfs_backingvp;
2002 hfsmp->hfs_backingvp = NULLVP;
2003 hfsmp->hfs_sparsebandblks = 0;
2004 hfs_unlock_mount(hfsmp);
2005
2006 vnode_rele(tmpvp);
2007 }
2008 return (0);
2009 }
2010 #endif /* HFS_SPARSE_DEV */
2011
2012 /* Change the next CNID stored in the VH */
2013 case HFSIOC_CHANGE_NEXTCNID: {
2014 int error = 0; /* Assume success */
2015 u_int32_t fileid;
2016 int wraparound = 0;
2017 int lockflags = 0;
2018
2019 if (vnode_vfsisrdonly(vp)) {
2020 return (EROFS);
2021 }
2022 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2023 if (suser(cred, NULL) &&
2024 kauth_cred_getuid(cred) != vfsp->f_owner) {
2025 return (EACCES); /* must be owner of file system */
2026 }
2027
2028 fileid = *(u_int32_t *)ap->a_data;
2029
2030 /* Must have catalog lock excl. to advance the CNID pointer */
2031 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
2032
2033 hfs_lock_mount(hfsmp);
2034
2035 /* If it is less than the current next CNID, force the wraparound bit to be set */
2036 if (fileid < hfsmp->vcbNxtCNID) {
2037 wraparound=1;
2038 }
2039
2040 /* Return previous value. */
2041 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
2042
2043 hfsmp->vcbNxtCNID = fileid;
2044
2045 if (wraparound) {
2046 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
2047 }
2048
2049 MarkVCBDirty(hfsmp);
2050 hfs_unlock_mount(hfsmp);
2051 hfs_systemfile_unlock (hfsmp, lockflags);
2052
2053 return (error);
2054 }
2055
2056 case F_FREEZE_FS: {
2057 struct mount *mp;
2058
2059 mp = vnode_mount(vp);
2060 hfsmp = VFSTOHFS(mp);
2061
2062 if (!(hfsmp->jnl))
2063 return (ENOTSUP);
2064
2065 vfsp = vfs_statfs(mp);
2066
2067 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2068 !kauth_cred_issuser(cred))
2069 return (EACCES);
2070
2071 return hfs_freeze(hfsmp);
2072 }
2073
2074 case F_THAW_FS: {
2075 vfsp = vfs_statfs(vnode_mount(vp));
2076 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2077 !kauth_cred_issuser(cred))
2078 return (EACCES);
2079
2080 return hfs_thaw(hfsmp, current_proc());
2081 }
2082
2083 case HFSIOC_EXT_BULKACCESS32:
2084 case HFSIOC_EXT_BULKACCESS64: {
2085 int size;
2086 #if CONFIG_HFS_STD
2087 if (hfsmp->hfs_flags & HFS_STANDARD) {
2088 return EINVAL;
2089 }
2090 #endif
2091
2092 if (is64bit) {
2093 size = sizeof(struct user64_ext_access_t);
2094 } else {
2095 size = sizeof(struct user32_ext_access_t);
2096 }
2097
2098 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2099 }
2100
2101 case HFSIOC_SET_XATTREXTENTS_STATE: {
2102 int state;
2103
2104 if (ap->a_data == NULL) {
2105 return (EINVAL);
2106 }
2107
2108 state = *(int *)ap->a_data;
2109
2110 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2111 return (EROFS);
2112 }
2113
2114 /* Super-user can enable or disable extent-based extended
2115 * attribute support on a volume
2116 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2117 * are enabled by default, so any change will be transient only
2118 * till the volume is remounted.
2119 */
2120 if (!kauth_cred_issuser(kauth_cred_get())) {
2121 return (EPERM);
2122 }
2123 if (state == 0 || state == 1)
2124 return hfs_set_volxattr(hfsmp, HFSIOC_SET_XATTREXTENTS_STATE, state);
2125 else
2126 return (EINVAL);
2127 }
2128
2129 case F_SETSTATICCONTENT: {
2130 int error;
2131 int enable_static = 0;
2132 struct cnode *cp = NULL;
2133 /*
2134 * lock the cnode, decorate the cnode flag, and bail out.
2135 * VFS should have already authenticated the caller for us.
2136 */
2137
2138 if (ap->a_data) {
2139 /*
2140 * Note that even though ap->a_data is of type caddr_t,
2141 * the fcntl layer at the syscall handler will pass in NULL
2142 * or 1 depending on what the argument supplied to the fcntl
2143 * was. So it is in fact correct to check the ap->a_data
2144 * argument for zero or non-zero value when deciding whether or not
2145 * to enable the static bit in the cnode.
2146 */
2147 enable_static = 1;
2148 }
2149 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2150 return EROFS;
2151 }
2152 cp = VTOC(vp);
2153
2154 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2155 if (error == 0) {
2156 if (enable_static) {
2157 cp->c_flag |= C_SSD_STATIC;
2158 }
2159 else {
2160 cp->c_flag &= ~C_SSD_STATIC;
2161 }
2162 hfs_unlock (cp);
2163 }
2164 return error;
2165 }
2166
2167 case F_SET_GREEDY_MODE: {
2168 int error;
2169 int enable_greedy_mode = 0;
2170 struct cnode *cp = NULL;
2171 /*
2172 * lock the cnode, decorate the cnode flag, and bail out.
2173 * VFS should have already authenticated the caller for us.
2174 */
2175
2176 if (ap->a_data) {
2177 /*
2178 * Note that even though ap->a_data is of type caddr_t,
2179 * the fcntl layer at the syscall handler will pass in NULL
2180 * or 1 depending on what the argument supplied to the fcntl
2181 * was. So it is in fact correct to check the ap->a_data
2182 * argument for zero or non-zero value when deciding whether or not
2183 * to enable the greedy mode bit in the cnode.
2184 */
2185 enable_greedy_mode = 1;
2186 }
2187 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2188 return EROFS;
2189 }
2190 cp = VTOC(vp);
2191
2192 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2193 if (error == 0) {
2194 if (enable_greedy_mode) {
2195 cp->c_flag |= C_SSD_GREEDY_MODE;
2196 }
2197 else {
2198 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2199 }
2200 hfs_unlock (cp);
2201 }
2202 return error;
2203 }
2204
2205 case F_SETIOTYPE: {
2206 int error;
2207 uint32_t iotypeflag = 0;
2208
2209 struct cnode *cp = NULL;
2210 /*
2211 * lock the cnode, decorate the cnode flag, and bail out.
2212 * VFS should have already authenticated the caller for us.
2213 */
2214
2215 if (ap->a_data == NULL) {
2216 return EINVAL;
2217 }
2218
2219 /*
2220 * Note that even though ap->a_data is of type caddr_t, we
2221 * can only use 32 bits of flag values.
2222 */
2223 iotypeflag = (uint32_t) ap->a_data;
2224 switch (iotypeflag) {
2225 case F_IOTYPE_ISOCHRONOUS:
2226 break;
2227 default:
2228 return EINVAL;
2229 }
2230
2231
2232 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2233 return EROFS;
2234 }
2235 cp = VTOC(vp);
2236
2237 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2238 if (error == 0) {
2239 switch (iotypeflag) {
2240 case F_IOTYPE_ISOCHRONOUS:
2241 cp->c_flag |= C_IO_ISOCHRONOUS;
2242 break;
2243 default:
2244 break;
2245 }
2246 hfs_unlock (cp);
2247 }
2248 return error;
2249 }
2250
2251 case F_MAKECOMPRESSED: {
2252 int error = 0;
2253 uint32_t gen_counter;
2254 struct cnode *cp = NULL;
2255 int reset_decmp = 0;
2256
2257 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2258 return EROFS;
2259 }
2260
2261 /*
2262 * acquire & lock the cnode.
2263 * VFS should have already authenticated the caller for us.
2264 */
2265
2266 if (ap->a_data) {
2267 /*
2268 * Cast the pointer into a uint32_t so we can extract the
2269 * supplied generation counter.
2270 */
2271 gen_counter = *((uint32_t*)ap->a_data);
2272 }
2273 else {
2274 return EINVAL;
2275 }
2276
2277 #if HFS_COMPRESSION
2278 cp = VTOC(vp);
2279 /* Grab truncate lock first; we may truncate the file */
2280 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2281
2282 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2283 if (error) {
2284 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2285 return error;
2286 }
2287
2288 /* Are there any other usecounts/FDs? */
2289 if (vnode_isinuse(vp, 1)) {
2290 hfs_unlock(cp);
2291 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2292 return EBUSY;
2293 }
2294
2295 /* now we have the cnode locked down; Validate arguments */
2296 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2297 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2298 hfs_unlock(cp);
2299 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2300 return EINVAL;
2301 }
2302
2303 if ((hfs_get_gencount (cp)) == gen_counter) {
2304 /*
2305 * OK, the gen_counter matched. Go for it:
2306 * Toggle state bits, truncate file, and suppress mtime update
2307 */
2308 reset_decmp = 1;
2309 cp->c_bsdflags |= UF_COMPRESSED;
2310
2311 error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
2312 ap->a_context);
2313 }
2314 else {
2315 error = ESTALE;
2316 }
2317
2318 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2319 hfs_unlock(cp);
2320
2321 /*
2322 * Reset the decmp state while still holding the truncate lock. We need to
2323 * serialize here against a listxattr on this node which may occur at any
2324 * time.
2325 *
2326 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2327 * that will still potentially require getting the com.apple.decmpfs EA. If the
2328 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2329 * generic(through VFS), and can't pass along any info telling it that we're already
2330 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2331 * and trying to fill in the hfs_file_is_compressed info during the callback
2332 * operation, which will result in deadlock against the b-tree node.
2333 *
2334 * So, to serialize against listxattr (which will grab buf_t meta references on
2335 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2336 * decmpfs payload.
2337 */
2338 if ((reset_decmp) && (error == 0)) {
2339 decmpfs_cnode *dp = VTOCMP (vp);
2340 if (dp != NULL) {
2341 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2342 }
2343
2344 /* Initialize the decmpfs node as needed */
2345 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2346 }
2347
2348 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2349
2350 #endif
2351 return error;
2352 }
2353
2354 case F_SETBACKINGSTORE: {
2355
2356 int error = 0;
2357
2358 /*
2359 * See comment in F_SETSTATICCONTENT re: using
2360 * a null check for a_data
2361 */
2362 if (ap->a_data) {
2363 error = hfs_set_backingstore (vp, 1);
2364 }
2365 else {
2366 error = hfs_set_backingstore (vp, 0);
2367 }
2368
2369 return error;
2370 }
2371
2372 case F_GETPATH_MTMINFO: {
2373 int error = 0;
2374
2375 int *data = (int*) ap->a_data;
2376
2377 /* Ask if this is a backingstore vnode */
2378 error = hfs_is_backingstore (vp, data);
2379
2380 return error;
2381 }
2382
2383 case F_FULLFSYNC: {
2384 int error;
2385
2386 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2387 return (EROFS);
2388 }
2389 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2390 if (error == 0) {
2391 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p);
2392 hfs_unlock(VTOC(vp));
2393 }
2394
2395 return error;
2396 }
2397
2398 case F_BARRIERFSYNC: {
2399 int error;
2400
2401 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2402 return (EROFS);
2403 }
2404 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2405 if (error == 0) {
2406 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p);
2407 hfs_unlock(VTOC(vp));
2408 }
2409
2410 return error;
2411 }
2412
2413 case F_CHKCLEAN: {
2414 register struct cnode *cp;
2415 int error;
2416
2417 if (!vnode_isreg(vp))
2418 return EINVAL;
2419
2420 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2421 if (error == 0) {
2422 cp = VTOC(vp);
2423 /*
2424 * used by regression test to determine if
2425 * all the dirty pages (via write) have been cleaned
2426 * after a call to 'fsysnc'.
2427 */
2428 error = is_file_clean(vp, VTOF(vp)->ff_size);
2429 hfs_unlock(cp);
2430 }
2431 return (error);
2432 }
2433
2434 case F_RDADVISE: {
2435 register struct radvisory *ra;
2436 struct filefork *fp;
2437 int error;
2438
2439 if (!vnode_isreg(vp))
2440 return EINVAL;
2441
2442 ra = (struct radvisory *)(ap->a_data);
2443 fp = VTOF(vp);
2444
2445 /* Protect against a size change. */
2446 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2447
2448 #if HFS_COMPRESSION
2449 if (compressed) {
2450 if (uncompressed_size == -1) {
2451 /* fetching the uncompressed size failed above, so return the error */
2452 error = decmpfs_error;
2453 } else if (ra->ra_offset >= uncompressed_size) {
2454 error = EFBIG;
2455 } else {
2456 error = advisory_read(vp, uncompressed_size, ra->ra_offset, ra->ra_count);
2457 }
2458 } else
2459 #endif /* HFS_COMPRESSION */
2460 if (ra->ra_offset >= fp->ff_size) {
2461 error = EFBIG;
2462 } else {
2463 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
2464 }
2465
2466 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
2467 return (error);
2468 }
2469
2470 case HFSIOC_GET_VOL_CREATE_TIME_32: {
2471 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2472 return 0;
2473 }
2474
2475 case HFSIOC_GET_VOL_CREATE_TIME_64: {
2476 *(user64_time_t *)(ap->a_data) = (user64_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2477 return 0;
2478 }
2479
2480 case SPOTLIGHT_IOC_GET_MOUNT_TIME:
2481 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2482 break;
2483
2484 case SPOTLIGHT_IOC_GET_LAST_MTIME:
2485 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2486 break;
2487
2488 case HFSIOC_GET_VERY_LOW_DISK:
2489 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2490 break;
2491
2492 case HFSIOC_SET_VERY_LOW_DISK:
2493 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2494 return EINVAL;
2495 }
2496
2497 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2498 break;
2499
2500 case HFSIOC_GET_LOW_DISK:
2501 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2502 break;
2503
2504 case HFSIOC_SET_LOW_DISK:
2505 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2506 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2507
2508 return EINVAL;
2509 }
2510
2511 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2512 break;
2513
2514 /* The following two fsctls were ported from apfs. */
2515 case APFSIOC_GET_NEAR_LOW_DISK:
2516 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_nearwarninglimit;
2517 break;
2518
2519 case APFSIOC_SET_NEAR_LOW_DISK:
2520 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2521 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2522 return EINVAL;
2523 }
2524
2525 hfsmp->hfs_freespace_notify_nearwarninglimit = *(uint32_t *)ap->a_data;
2526 break;
2527
2528 case HFSIOC_GET_DESIRED_DISK:
2529 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2530 break;
2531
2532 case HFSIOC_SET_DESIRED_DISK:
2533 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2534 return EINVAL;
2535 }
2536
2537 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2538 break;
2539
2540 case HFSIOC_VOLUME_STATUS:
2541 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2542 break;
2543
2544 case HFS_SET_BOOT_INFO:
2545 if (!vnode_isvroot(vp))
2546 return(EINVAL);
2547 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2548 return(EACCES); /* must be superuser or owner of filesystem */
2549 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2550 return (EROFS);
2551 }
2552 hfs_lock_mount (hfsmp);
2553 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2554 /* Null out the cached UUID, to be safe */
2555 uuid_clear (hfsmp->hfs_full_uuid);
2556 hfs_unlock_mount (hfsmp);
2557 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
2558 break;
2559
2560 case HFS_GET_BOOT_INFO:
2561 if (!vnode_isvroot(vp))
2562 return(EINVAL);
2563 hfs_lock_mount (hfsmp);
2564 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2565 hfs_unlock_mount(hfsmp);
2566 break;
2567
2568 /* case HFS_MARK_BOOT_CORRUPT: _IO are the same */
2569 case HFSIOC_MARK_BOOT_CORRUPT:
2570 /* Mark the boot volume corrupt by setting
2571 * kHFSVolumeInconsistentBit in the volume header. This will
2572 * force fsck_hfs on next mount.
2573 */
2574 if (!kauth_cred_issuser(kauth_cred_get())) {
2575 return EACCES;
2576 }
2577
2578 /* Allowed only on the root vnode of the boot volume */
2579 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2580 !vnode_isvroot(vp)) {
2581 return EINVAL;
2582 }
2583 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2584 return (EROFS);
2585 }
2586 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2587 hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
2588 break;
2589
2590 case HFSIOC_GET_JOURNAL_INFO:
2591 jip = (struct hfs_journal_info*)ap->a_data;
2592
2593 if (vp == NULLVP)
2594 return EINVAL;
2595
2596 if (hfsmp->jnl == NULL) {
2597 jnl_start = 0;
2598 jnl_size = 0;
2599 } else {
2600 jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset;
2601 jnl_size = hfsmp->jnl_size;
2602 }
2603
2604 jip->jstart = jnl_start;
2605 jip->jsize = jnl_size;
2606 break;
2607
2608 case HFSIOC_SET_ALWAYS_ZEROFILL: {
2609 struct cnode *cp = VTOC(vp);
2610
2611 if (*(int *)ap->a_data) {
2612 cp->c_flag |= C_ALWAYS_ZEROFILL;
2613 } else {
2614 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2615 }
2616 break;
2617 }
2618
2619 /* case HFS_DISABLE_METAZONE: _IO are the same */
2620 case HFSIOC_DISABLE_METAZONE: {
2621 /* Only root can disable metadata zone */
2622 if (!kauth_cred_issuser(kauth_cred_get())) {
2623 return EACCES;
2624 }
2625 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2626 return (EROFS);
2627 }
2628
2629 /* Disable metadata zone now */
2630 (void) hfs_metadatazone_init(hfsmp, true);
2631 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2632 break;
2633 }
2634
2635
2636 case HFSIOC_FSINFO_METADATA_BLOCKS: {
2637 int error;
2638 struct hfsinfo_metadata *hinfo;
2639
2640 hinfo = (struct hfsinfo_metadata *)ap->a_data;
2641
2642 /* Get information about number of metadata blocks */
2643 error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
2644 if (error) {
2645 return error;
2646 }
2647
2648 break;
2649 }
2650
2651 case HFSIOC_GET_FSINFO: {
2652 hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
2653
2654 /* Only root is allowed to get fsinfo */
2655 if (!kauth_cred_issuser(kauth_cred_get())) {
2656 return EACCES;
2657 }
2658
2659 /*
2660 * Make sure that the caller's version number matches with
2661 * the kernel's version number. This will make sure that
2662 * if the structures being read/written into are changed
2663 * by the kernel, the caller will not read incorrect data.
2664 *
2665 * The first three fields --- request_type, version and
2666 * flags are same for all the hfs_fsinfo structures, so
2667 * we can access the version number by assuming any
2668 * structure for now.
2669 */
2670 if (fsinfo->header.version != HFS_FSINFO_VERSION) {
2671 return ENOTSUP;
2672 }
2673
2674 /* Make sure that the current file system is not marked inconsistent */
2675 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2676 return EIO;
2677 }
2678
2679 return hfs_get_fsinfo(hfsmp, ap->a_data);
2680 }
2681
2682 case HFSIOC_CS_FREESPACE_TRIM: {
2683 int error = 0;
2684 int lockflags = 0;
2685
2686 /* Only root allowed */
2687 if (!kauth_cred_issuser(kauth_cred_get())) {
2688 return EACCES;
2689 }
2690
2691 /*
2692 * This core functionality is similar to hfs_scan_blocks().
2693 * The main difference is that hfs_scan_blocks() is called
2694 * as part of mount where we are assured that the journal is
2695 * empty to start with. This fcntl() can be called on a
2696 * mounted volume, therefore it has to flush the content of
2697 * the journal as well as ensure the state of summary table.
2698 *
2699 * This fcntl scans over the entire allocation bitmap,
2700 * creates list of all the free blocks, and issues TRIM
2701 * down to the underlying device. This can take long time
2702 * as it can generate up to 512MB of read I/O.
2703 */
2704
2705 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
2706 error = hfs_init_summary(hfsmp);
2707 if (error) {
2708 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
2709 return error;
2710 }
2711 }
2712
2713 /*
2714 * The journal maintains list of recently deallocated blocks to
2715 * issue DKIOCUNMAPs when the corresponding journal transaction is
2716 * flushed to the disk. To avoid any race conditions, we only
2717 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2718 * Therefore we make sure that the journal trim list is sync'ed,
2719 * empty, and not modifiable for the duration of our scan.
2720 *
2721 * Take the journal lock before flushing the journal to the disk.
2722 * We will keep on holding the journal lock till we don't get the
2723 * bitmap lock to make sure that no new journal transactions can
2724 * start. This will make sure that the journal trim list is not
2725 * modified after the journal flush and before getting bitmap lock.
2726 * We can release the journal lock after we acquire the bitmap
2727 * lock as it will prevent any further block deallocations.
2728 */
2729 hfs_journal_lock(hfsmp);
2730
2731 /* Flush the journal and wait for all I/Os to finish up */
2732 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
2733 if (error) {
2734 hfs_journal_unlock(hfsmp);
2735 return error;
2736 }
2737
2738 /* Take bitmap lock to ensure it is not being modified */
2739 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2740
2741 /* Release the journal lock */
2742 hfs_journal_unlock(hfsmp);
2743
2744 /*
2745 * ScanUnmapBlocks reads the bitmap in large block size
2746 * (up to 1MB) unlike the runtime which reads the bitmap
2747 * in the 4K block size. This can cause buf_t collisions
2748 * and potential data corruption. To avoid this, we
2749 * invalidate all the existing buffers associated with
2750 * the bitmap vnode before scanning it.
2751 *
2752 * Note: ScanUnmapBlock() cleans up all the buffers
2753 * after itself, so there won't be any large buffers left
2754 * for us to clean up after it returns.
2755 */
2756 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
2757 if (error) {
2758 hfs_systemfile_unlock(hfsmp, lockflags);
2759 return error;
2760 }
2761
2762 /* Traverse bitmap and issue DKIOCUNMAPs */
2763 error = ScanUnmapBlocks(hfsmp);
2764 hfs_systemfile_unlock(hfsmp, lockflags);
2765 if (error) {
2766 return error;
2767 }
2768
2769 break;
2770 }
2771
2772 case HFSIOC_SET_HOTFILE_STATE: {
2773 int error;
2774 struct cnode *cp = VTOC(vp);
2775 uint32_t hf_state = *((uint32_t*)ap->a_data);
2776 uint32_t num_unpinned = 0;
2777
2778 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2779 if (error) {
2780 return error;
2781 }
2782
2783 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2784 if (hf_state == HFS_MARK_FASTDEVCANDIDATE) {
2785 vnode_setfastdevicecandidate(vp);
2786
2787 cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
2788 cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask;
2789 cp->c_flag |= C_MODIFIED;
2790 } else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2791 vnode_clearfastdevicecandidate(vp);
2792 hfs_removehotfile(vp);
2793
2794 if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) {
2795 hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned);
2796 }
2797
2798 if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2799 cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
2800 }
2801 cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask);
2802 cp->c_flag |= C_MODIFIED;
2803
2804 } else {
2805 error = EINVAL;
2806 }
2807
2808 if (num_unpinned != 0) {
2809 lck_mtx_lock(&hfsmp->hfc_mutex);
2810 hfsmp->hfs_hotfile_freeblks += num_unpinned;
2811 lck_mtx_unlock(&hfsmp->hfc_mutex);
2812 }
2813
2814 hfs_unlock(cp);
2815 return error;
2816 }
2817
2818 case HFSIOC_REPIN_HOTFILE_STATE: {
2819 int error=0;
2820 uint32_t repin_what = *((uint32_t*)ap->a_data);
2821
2822 /* Only root allowed */
2823 if (!kauth_cred_issuser(kauth_cred_get())) {
2824 return EACCES;
2825 }
2826
2827 if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) {
2828 // this system is neither regular Fusion or Cooperative Fusion
2829 // so this fsctl makes no sense.
2830 return EINVAL;
2831 }
2832
2833 //
2834 // After a converting a CoreStorage volume to be encrypted, the
2835 // extents could have moved around underneath us. This call
2836 // allows corestoraged to re-pin everything that should be
2837 // pinned (it would happen on the next reboot too but that could
2838 // be a long time away).
2839 //
2840 if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) {
2841 hfs_pin_fs_metadata(hfsmp);
2842 }
2843 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
2844 hfs_repin_hotfiles(hfsmp);
2845 }
2846 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) {
2847 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2848 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2849 }
2850
2851 return error;
2852 }
2853
2854 #if HFS_CONFIG_KEY_ROLL
2855
2856 case HFSIOC_KEY_ROLL: {
2857 if (!kauth_cred_issuser(kauth_cred_get()))
2858 return EACCES;
2859
2860 hfs_key_roll_args_t *args = (hfs_key_roll_args_t *)ap->a_data;
2861
2862 return hfs_key_roll_op(ap->a_context, ap->a_vp, args);
2863 }
2864
2865 case HFSIOC_GET_KEY_AUTO_ROLL: {
2866 if (!kauth_cred_issuser(kauth_cred_get()))
2867 return EACCES;
2868
2869 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2870 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2871 return ENOTSUP;
2872 args->flags = (ISSET(hfsmp->cproot_flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION)
2873 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION : 0);
2874 args->min_key_os_version = hfsmp->hfs_auto_roll_min_key_os_version;
2875 args->max_key_os_version = hfsmp->hfs_auto_roll_max_key_os_version;
2876 break;
2877 }
2878
2879 case HFSIOC_SET_KEY_AUTO_ROLL: {
2880 if (!kauth_cred_issuser(kauth_cred_get()))
2881 return EACCES;
2882
2883 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2884 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2885 return ENOTSUP;
2886 return cp_set_auto_roll(hfsmp, args);
2887 }
2888
2889 #endif // HFS_CONFIG_KEY_ROLL
2890
2891 #if CONFIG_PROTECT
2892 case F_TRANSCODEKEY:
2893 /*
2894 * This API is only supported when called via kernel so
2895 * a_fflag must be set to 1 (it's not possible to get here
2896 * with it set to 1 via fsctl).
2897 */
2898 if (ap->a_fflag != 1)
2899 return ENOTTY;
2900 return cp_vnode_transcode(vp, (cp_key_t *)ap->a_data);
2901
2902 case F_GETPROTECTIONLEVEL:
2903 return cp_get_root_major_vers (vp, (uint32_t *)ap->a_data);
2904
2905 case F_GETDEFAULTPROTLEVEL:
2906 return cp_get_default_level(vp, (uint32_t *)ap->a_data);
2907 #endif // CONFIG_PROTECT
2908
2909 case FIOPINSWAP:
2910 return hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT | HFS_DATALESS_PIN,
2911 NULL);
2912
2913 case FSIOC_CAS_BSDFLAGS: {
2914 struct fsioc_cas_bsdflags *cas = (void *)ap->a_data;
2915 struct cnode *cp = VTOC(vp);
2916 u_int32_t document_id = 0;
2917 bool need_truncate = false;
2918 int decmpfs_reset_state = 0;
2919 int error;
2920
2921 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2922 return (EROFS);
2923 }
2924
2925 /* Don't allow modification of the journal. */
2926 if (hfs_is_journal_file(hfsmp, cp)) {
2927 return (EPERM);
2928 }
2929
2930 // Check if we need to set UF_COMPRESSED.
2931 // If so, ask decmpfs if we're allowed to (and if so, if we need to truncate
2932 // the data fork to 0).
2933 if (!(cas->expected_flags & UF_COMPRESSED) && (cas->new_flags & UF_COMPRESSED)) {
2934 struct vnode_attr vap;
2935 VATTR_INIT(&vap);
2936 VATTR_SET(&vap, va_flags, cas->new_flags);
2937
2938 error = decmpfs_update_attributes(vp, &vap);
2939 if (error) {
2940 return (error);
2941 }
2942
2943 // Similar to hfs_vnop_setattr(), we call decmpfs_update_attributes()
2944 // as it is the ultimate arbiter of whether or not UF_COMPRESSED can be set.
2945 // (If the decmpfs xattr is not present or invalid, for example,
2946 // UF_COMPRESSED should *not* be set.)
2947 // It will also tell us if we need to truncate the data fork to 0.
2948 if (!(vap.va_flags & UF_COMPRESSED)) {
2949 // The request to update UF_COMPRESSED is denied.
2950 // (Note that decmpfs_update_attributes() won't touch va_active
2951 // in this case.) Error out.
2952 return (EPERM);
2953 }
2954
2955 if (VATTR_IS_ACTIVE(&vap, va_data_size) && (vap.va_data_size == 0)) {
2956 // We must also truncate this file's data fork to 0.
2957 need_truncate = true;
2958 }
2959 }
2960
2961 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2962 return (error);
2963 }
2964
2965 cas->actual_flags = cp->c_bsdflags;
2966 if (cas->actual_flags != cas->expected_flags) {
2967 hfs_unlock(cp);
2968 return (0);
2969 }
2970
2971 //
2972 // Check if we'll need a document_id. If so, we need to drop the lock
2973 // (to avoid any possible deadlock with the root vnode which has to get
2974 // locked to get the document id), generate the document_id, re-acquire
2975 // the lock, and perform the CAS check again. We do it in this sequence
2976 // in order to avoid throwing away document_ids in the case where the
2977 // CAS check fails. Note that it can still happen, but by performing
2978 // the check first, hopefully we can reduce the ocurrence.
2979 //
2980 if ((cas->new_flags & UF_TRACKED) && !(VTOC(vp)->c_bsdflags & UF_TRACKED)) {
2981 struct FndrExtendedDirInfo *fip = (struct FndrExtendedDirInfo *)((char *)&(VTOC(vp)->c_attr.ca_finderinfo) + 16);
2982 //
2983 // If the document_id is not set, get a new one. It will be set
2984 // on the file down below once we hold the cnode lock.
2985 //
2986 if (fip->document_id == 0) {
2987 //
2988 // Drat, we have to generate one. Unlock the cnode, do the
2989 // deed, re-lock the cnode, and then to the CAS check again
2990 // to see if we lost the race.
2991 //
2992 hfs_unlock(cp);
2993 if (hfs_generate_document_id(hfsmp, &document_id) != 0) {
2994 document_id = 0;
2995 }
2996 if ((error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2997 return (error);
2998 }
2999 cas->actual_flags = cp->c_bsdflags;
3000 if (cas->actual_flags != cas->expected_flags) {
3001 hfs_unlock(cp);
3002 return (0);
3003 }
3004 }
3005 }
3006
3007 // Attempt to truncate our data fork to 0 length, if necessary.
3008 if (need_truncate && (VTOF(vp)->ff_size)) {
3009 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
3010 // hfs_truncate will deal with the cnode lock
3011 error = hfs_truncate(vp, 0, IO_NDELAY, 0, ap->a_context);
3012 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
3013 }
3014
3015 if (!error)
3016 error = hfs_set_bsd_flags(hfsmp, cp, cas->new_flags,
3017 document_id, ap->a_context,
3018 &decmpfs_reset_state);
3019 if (error == 0) {
3020 error = hfs_update(vp, 0);
3021 }
3022 hfs_unlock(cp);
3023 if (error) {
3024 return (error);
3025 }
3026
3027 #if HFS_COMPRESSION
3028 if (decmpfs_reset_state) {
3029 /*
3030 * we've changed the UF_COMPRESSED flag, so reset the decmpfs state for this cnode
3031 * but don't do it while holding the hfs cnode lock
3032 */
3033 decmpfs_cnode *dp = VTOCMP(vp);
3034 if (!dp) {
3035 /*
3036 * call hfs_lazy_init_decmpfs_cnode() to make sure that the decmpfs_cnode
3037 * is filled in; we need a decmpfs_cnode to prevent decmpfs state changes
3038 * on this file if it's locked
3039 */
3040 dp = hfs_lazy_init_decmpfs_cnode(VTOC(vp));
3041 if (!dp) {
3042 /* failed to allocate a decmpfs_cnode */
3043 return ENOMEM; /* what should this be? */
3044 }
3045 }
3046 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
3047 }
3048 #endif
3049 break; // return 0 below
3050 }
3051
3052 default:
3053 return (ENOTTY);
3054 }
3055
3056 return 0;
3057 }
3058
3059 /*
3060 * select
3061 */
3062 int
3063 hfs_vnop_select(__unused struct vnop_select_args *ap)
3064 /*
3065 struct vnop_select_args {
3066 vnode_t a_vp;
3067 int a_which;
3068 int a_fflags;
3069 void *a_wql;
3070 vfs_context_t a_context;
3071 };
3072 */
3073 {
3074 /*
3075 * We should really check to see if I/O is possible.
3076 */
3077 return (1);
3078 }
3079
3080 /*
3081 * Converts a logical block number to a physical block, and optionally returns
3082 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
3083 * The physical block number is based on the device block size, currently its 512.
3084 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
3085 */
3086 int
3087 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
3088 {
3089 struct filefork *fp = VTOF(vp);
3090 struct hfsmount *hfsmp = VTOHFS(vp);
3091 int retval = E_NONE;
3092 u_int32_t logBlockSize;
3093 size_t bytesContAvail = 0;
3094 off_t blockposition;
3095 int lockExtBtree;
3096 int lockflags = 0;
3097
3098 /*
3099 * Check for underlying vnode requests and ensure that logical
3100 * to physical mapping is requested.
3101 */
3102 if (vpp != NULL)
3103 *vpp = hfsmp->hfs_devvp;
3104 if (bnp == NULL)
3105 return (0);
3106
3107 logBlockSize = GetLogicalBlockSize(vp);
3108 blockposition = (off_t)bn * logBlockSize;
3109
3110 lockExtBtree = overflow_extents(fp);
3111
3112 if (lockExtBtree)
3113 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
3114
3115 retval = MacToVFSError(
3116 MapFileBlockC (HFSTOVCB(hfsmp),
3117 (FCB*)fp,
3118 MAXPHYSIO,
3119 blockposition,
3120 bnp,
3121 &bytesContAvail));
3122
3123 if (lockExtBtree)
3124 hfs_systemfile_unlock(hfsmp, lockflags);
3125
3126 if (retval == E_NONE) {
3127 /* Figure out how many read ahead blocks there are */
3128 if (runp != NULL) {
3129 if (can_cluster(logBlockSize)) {
3130 /* Make sure this result never goes negative: */
3131 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
3132 } else {
3133 *runp = 0;
3134 }
3135 }
3136 }
3137 return (retval);
3138 }
3139
3140 /*
3141 * Convert logical block number to file offset.
3142 */
3143 int
3144 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
3145 /*
3146 struct vnop_blktooff_args {
3147 vnode_t a_vp;
3148 daddr64_t a_lblkno;
3149 off_t *a_offset;
3150 };
3151 */
3152 {
3153 if (ap->a_vp == NULL)
3154 return (EINVAL);
3155 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
3156
3157 return(0);
3158 }
3159
3160 /*
3161 * Convert file offset to logical block number.
3162 */
3163 int
3164 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
3165 /*
3166 struct vnop_offtoblk_args {
3167 vnode_t a_vp;
3168 off_t a_offset;
3169 daddr64_t *a_lblkno;
3170 };
3171 */
3172 {
3173 if (ap->a_vp == NULL)
3174 return (EINVAL);
3175 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
3176
3177 return(0);
3178 }
3179
3180 /*
3181 * Map file offset to physical block number.
3182 *
3183 * If this function is called for write operation, and if the file
3184 * had virtual blocks allocated (delayed allocation), real blocks
3185 * are allocated by calling ExtendFileC().
3186 *
3187 * If this function is called for read operation, and if the file
3188 * had virtual blocks allocated (delayed allocation), no change
3189 * to the size of file is done, and if required, rangelist is
3190 * searched for mapping.
3191 *
3192 * System file cnodes are expected to be locked (shared or exclusive).
3193 *
3194 * -- INVALID RANGES --
3195 *
3196 * Invalid ranges are used to keep track of where we have extended a
3197 * file, but have not yet written that data to disk. In the past we
3198 * would clear up the invalid ranges as we wrote to those areas, but
3199 * before data was actually flushed to disk. The problem with that
3200 * approach is that the data can be left in the cache and is therefore
3201 * still not valid on disk. So now we clear up the ranges here, when
3202 * the flags field has VNODE_WRITE set, indicating a write is about to
3203 * occur. This isn't ideal (ideally we want to clear them up when
3204 * know the data has been successfully written), but it's the best we
3205 * can do.
3206 *
3207 * For reads, we use the invalid ranges here in block map to indicate
3208 * to the caller that the data should be zeroed (a_bpn == -1). We
3209 * have to be careful about what ranges we return to the cluster code.
3210 * Currently the cluster code can only handle non-rounded values for
3211 * the EOF; it cannot handle funny sized ranges in the middle of the
3212 * file (the main problem is that it sends down odd sized I/Os to the
3213 * disk). Our code currently works because whilst the very first
3214 * offset and the last offset in the invalid ranges are not aligned,
3215 * gaps in the invalid ranges between the first and last, have to be
3216 * aligned (because we always write page sized blocks). For example,
3217 * consider this arrangement:
3218 *
3219 * +-------------+-----+-------+------+
3220 * | |XXXXX| |XXXXXX|
3221 * +-------------+-----+-------+------+
3222 * a b c d
3223 *
3224 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3225 * are not necessarily aligned, b and c *must* be.
3226 *
3227 * Zero-filling occurs in a number of ways:
3228 *
3229 * 1. When a read occurs and we return with a_bpn == -1.
3230 *
3231 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3232 * which will cause us to iterate over the ranges bringing in
3233 * pages that are not present in the cache and zeroing them. Any
3234 * pages that are already in the cache are left untouched. Note
3235 * that hfs_fsync does not always flush invalid ranges.
3236 *
3237 * 3. When we extend a file we zero out from the old EOF to the end
3238 * of the page. It would be nice if we didn't have to do this if
3239 * the page wasn't present (and could defer it), but because of
3240 * the problem described above, we have to.
3241 *
3242 * The invalid ranges are also used to restrict the size that we write
3243 * out on disk: see hfs_prepare_fork_for_update.
3244 *
3245 * Note that invalid ranges are ignored when neither the VNODE_READ or
3246 * the VNODE_WRITE flag is specified. This is useful for the
3247 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3248 * just want to know whether blocks are physically allocated or not.
3249 */
3250 int
3251 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
3252 /*
3253 struct vnop_blockmap_args {
3254 vnode_t a_vp;
3255 off_t a_foffset;
3256 size_t a_size;
3257 daddr64_t *a_bpn;
3258 size_t *a_run;
3259 void *a_poff;
3260 int a_flags;
3261 vfs_context_t a_context;
3262 };
3263 */
3264 {
3265 struct vnode *vp = ap->a_vp;
3266 struct cnode *cp;
3267 struct filefork *fp;
3268 struct hfsmount *hfsmp;
3269 size_t bytesContAvail = ap->a_size;
3270 int retval = E_NONE;
3271 int syslocks = 0;
3272 int lockflags = 0;
3273 struct rl_entry *invalid_range;
3274 enum rl_overlaptype overlaptype;
3275 int started_tr = 0;
3276 int tooklock = 0;
3277
3278 #if HFS_COMPRESSION
3279 if (VNODE_IS_RSRC(vp)) {
3280 /* allow blockmaps to the resource fork */
3281 } else {
3282 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
3283 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
3284 switch(state) {
3285 case FILE_IS_COMPRESSED:
3286 return ENOTSUP;
3287 case FILE_IS_CONVERTING:
3288 /* if FILE_IS_CONVERTING, we allow blockmap */
3289 break;
3290 default:
3291 printf("invalid state %d for compressed file\n", state);
3292 /* fall through */
3293 }
3294 }
3295 }
3296 #endif /* HFS_COMPRESSION */
3297
3298 /* Do not allow blockmap operation on a directory */
3299 if (vnode_isdir(vp)) {
3300 return (ENOTSUP);
3301 }
3302
3303 /*
3304 * Check for underlying vnode requests and ensure that logical
3305 * to physical mapping is requested.
3306 */
3307 if (ap->a_bpn == NULL)
3308 return (0);
3309
3310 hfsmp = VTOHFS(vp);
3311 cp = VTOC(vp);
3312 fp = VTOF(vp);
3313
3314 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
3315 if (cp->c_lockowner != current_thread()) {
3316 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3317 tooklock = 1;
3318 }
3319
3320 // For reads, check the invalid ranges
3321 if (ISSET(ap->a_flags, VNODE_READ)) {
3322 if (ap->a_foffset >= fp->ff_size) {
3323 retval = ERANGE;
3324 goto exit;
3325 }
3326
3327 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
3328 ap->a_foffset + (off_t)bytesContAvail - 1,
3329 &invalid_range);
3330 switch(overlaptype) {
3331 case RL_MATCHINGOVERLAP:
3332 case RL_OVERLAPCONTAINSRANGE:
3333 case RL_OVERLAPSTARTSBEFORE:
3334 /* There's no valid block for this byte offset */
3335 *ap->a_bpn = (daddr64_t)-1;
3336 /* There's no point limiting the amount to be returned
3337 * if the invalid range that was hit extends all the way
3338 * to the EOF (i.e. there's no valid bytes between the
3339 * end of this range and the file's EOF):
3340 */
3341 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3342 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3343 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3344 }
3345
3346 retval = 0;
3347 goto exit;
3348
3349 case RL_OVERLAPISCONTAINED:
3350 case RL_OVERLAPENDSAFTER:
3351 /* The range of interest hits an invalid block before the end: */
3352 if (invalid_range->rl_start == ap->a_foffset) {
3353 /* There's actually no valid information to be had starting here: */
3354 *ap->a_bpn = (daddr64_t)-1;
3355 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3356 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3357 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3358 }
3359
3360 retval = 0;
3361 goto exit;
3362 } else {
3363 /*
3364 * Sadly, the lower layers don't like us to
3365 * return unaligned ranges, so we skip over
3366 * any invalid ranges here that are less than
3367 * a page: zeroing of those bits is not our
3368 * responsibility (it's dealt with elsewhere).
3369 */
3370 do {
3371 off_t rounded_start = round_page_64(invalid_range->rl_start);
3372 if ((off_t)bytesContAvail < rounded_start - ap->a_foffset)
3373 break;
3374 if (rounded_start < invalid_range->rl_end + 1) {
3375 bytesContAvail = rounded_start - ap->a_foffset;
3376 break;
3377 }
3378 } while ((invalid_range = TAILQ_NEXT(invalid_range,
3379 rl_link)));
3380 }
3381 break;
3382
3383 case RL_NOOVERLAP:
3384 break;
3385 } // switch
3386 }
3387 }
3388
3389 #if CONFIG_PROTECT
3390 if (cp->c_cpentry) {
3391 const int direction = (ISSET(ap->a_flags, VNODE_WRITE)
3392 ? VNODE_WRITE : VNODE_READ);
3393
3394 cp_io_params_t io_params;
3395 cp_io_params(hfsmp, cp->c_cpentry,
3396 off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)),
3397 direction, &io_params);
3398
3399 if (io_params.max_len < (off_t)bytesContAvail)
3400 bytesContAvail = io_params.max_len;
3401
3402 if (io_params.phys_offset != -1) {
3403 *ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset)
3404 / hfsmp->hfs_logical_block_size);
3405
3406 retval = 0;
3407 goto exit;
3408 }
3409 }
3410 #endif
3411
3412 retry:
3413
3414 /* Check virtual blocks only when performing write operation */
3415 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3416 if (hfs_start_transaction(hfsmp) != 0) {
3417 retval = EINVAL;
3418 goto exit;
3419 } else {
3420 started_tr = 1;
3421 }
3422 syslocks = SFL_EXTENTS | SFL_BITMAP;
3423
3424 } else if (overflow_extents(fp)) {
3425 syslocks = SFL_EXTENTS;
3426 }
3427
3428 if (syslocks)
3429 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
3430
3431 /*
3432 * Check for any delayed allocations.
3433 */
3434 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3435 int64_t actbytes;
3436 u_int32_t loanedBlocks;
3437
3438 //
3439 // Make sure we have a transaction. It's possible
3440 // that we came in and fp->ff_unallocblocks was zero
3441 // but during the time we blocked acquiring the extents
3442 // btree, ff_unallocblocks became non-zero and so we
3443 // will need to start a transaction.
3444 //
3445 if (started_tr == 0) {
3446 if (syslocks) {
3447 hfs_systemfile_unlock(hfsmp, lockflags);
3448 syslocks = 0;
3449 }
3450 goto retry;
3451 }
3452
3453 /*
3454 * Note: ExtendFileC will Release any blocks on loan and
3455 * aquire real blocks. So we ask to extend by zero bytes
3456 * since ExtendFileC will account for the virtual blocks.
3457 */
3458
3459 loanedBlocks = fp->ff_unallocblocks;
3460 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
3461 kEFAllMask | kEFNoClumpMask, &actbytes);
3462
3463 if (retval) {
3464 fp->ff_unallocblocks = loanedBlocks;
3465 cp->c_blocks += loanedBlocks;
3466 fp->ff_blocks += loanedBlocks;
3467
3468 hfs_lock_mount (hfsmp);
3469 hfsmp->loanedBlocks += loanedBlocks;
3470 hfs_unlock_mount (hfsmp);
3471
3472 hfs_systemfile_unlock(hfsmp, lockflags);
3473 cp->c_flag |= C_MODIFIED;
3474 if (started_tr) {
3475 (void) hfs_update(vp, 0);
3476 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3477
3478 hfs_end_transaction(hfsmp);
3479 started_tr = 0;
3480 }
3481 goto exit;
3482 }
3483 }
3484
3485 #if NEW_XATTR
3486 // check for the alternate xattr vnode
3487 if (vp == hfsmp->hfs_attrdata_vp) {
3488 HFSPlusExtentDescriptor real_fext;
3489 size_t availableBytes;
3490 u_int32_t sectorsPerBlock; // Number of sectors per allocation block
3491 u_int32_t sectorSize;
3492 uint64_t f_offset;
3493
3494 if (!hfs_xattr_fext_find(&hfsmp->hfs_xattr_io, hfsmp->blockSize,
3495 ap->a_foffset, &real_fext, &f_offset)) {
3496 panic("cannot find xattr fext for %llu", f_offset);
3497 }
3498
3499 sectorSize = hfsmp->hfs_logical_block_size;
3500 // Compute the number of sectors in an allocation block
3501 sectorsPerBlock = hfsmp->blockSize / sectorSize;
3502
3503 *ap->a_bpn = (f_offset / hfsmp->blockSize) * sectorsPerBlock;
3504 availableBytes = real_fext.blockCount * hfsmp->blockSize - (f_offset - (real_fext.startBlock * hfsmp->blockSize));
3505 if (availableBytes < bytesContAvail) {
3506 bytesContAvail = availableBytes;
3507 }
3508
3509 goto got_fext;
3510 }
3511 #endif
3512
3513 retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
3514 ap->a_bpn, &bytesContAvail);
3515
3516 #if NEW_XATTR
3517 got_fext:
3518 #endif
3519
3520 if (syslocks) {
3521 hfs_systemfile_unlock(hfsmp, lockflags);
3522 syslocks = 0;
3523 }
3524
3525 if (retval) {
3526 /* On write, always return error because virtual blocks, if any,
3527 * should have been allocated in ExtendFileC(). We do not
3528 * allocate virtual blocks on read, therefore return error
3529 * only if no virtual blocks are allocated. Otherwise we search
3530 * rangelist for zero-fills
3531 */
3532 if ((MacToVFSError(retval) != ERANGE) ||
3533 (ap->a_flags & VNODE_WRITE) ||
3534 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
3535 goto exit;
3536 }
3537
3538 /* Validate if the start offset is within logical file size */
3539 if (ap->a_foffset >= fp->ff_size) {
3540 goto exit;
3541 }
3542
3543 /*
3544 * At this point, we have encountered a failure during
3545 * MapFileBlockC that resulted in ERANGE, and we are not
3546 * servicing a write, and there are borrowed blocks.
3547 *
3548 * However, the cluster layer will not call blockmap for
3549 * blocks that are borrowed and in-cache. We have to assume
3550 * that because we observed ERANGE being emitted from
3551 * MapFileBlockC, this extent range is not valid on-disk. So
3552 * we treat this as a mapping that needs to be zero-filled
3553 * prior to reading.
3554 */
3555
3556 if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail)
3557 bytesContAvail = fp->ff_size - ap->a_foffset;
3558
3559 *ap->a_bpn = (daddr64_t) -1;
3560 retval = 0;
3561
3562 goto exit;
3563 }
3564
3565 exit:
3566 if (retval == 0) {
3567 if (ISSET(ap->a_flags, VNODE_WRITE)) {
3568 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
3569
3570 // See if we might be overlapping invalid ranges...
3571 if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) {
3572 /*
3573 * Mark the file as needing an update if we think the
3574 * on-disk EOF has changed.
3575 */
3576 if (ap->a_foffset <= r->rl_start)
3577 SET(cp->c_flag, C_MODIFIED);
3578
3579 /*
3580 * This isn't the ideal place to put this. Ideally, we
3581 * should do something *after* we have successfully
3582 * written to the range, but that's difficult to do
3583 * because we cannot take locks in the callback. At
3584 * present, the cluster code will call us with VNODE_WRITE
3585 * set just before it's about to write the data so we know
3586 * that data is about to be written. If we get an I/O
3587 * error at this point then chances are the metadata
3588 * update to follow will also have an I/O error so the
3589 * risk here is small.
3590 */
3591 rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1,
3592 &fp->ff_invalidranges);
3593
3594 if (!TAILQ_FIRST(&fp->ff_invalidranges)) {
3595 cp->c_flag &= ~C_ZFWANTSYNC;
3596 cp->c_zftimeout = 0;
3597 }
3598 }
3599 }
3600
3601 if (ap->a_run)
3602 *ap->a_run = bytesContAvail;
3603
3604 if (ap->a_poff)
3605 *(int *)ap->a_poff = 0;
3606 }
3607
3608 if (started_tr) {
3609 hfs_update(vp, TRUE);
3610 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3611 hfs_end_transaction(hfsmp);
3612 started_tr = 0;
3613 }
3614
3615 if (tooklock)
3616 hfs_unlock(cp);
3617
3618 return (MacToVFSError(retval));
3619 }
3620
3621 /*
3622 * prepare and issue the I/O
3623 * buf_strategy knows how to deal
3624 * with requests that require
3625 * fragmented I/Os
3626 */
3627 int
3628 hfs_vnop_strategy(struct vnop_strategy_args *ap)
3629 {
3630 buf_t bp = ap->a_bp;
3631 vnode_t vp = buf_vnode(bp);
3632 int error = 0;
3633
3634 /* Mark buffer as containing static data if cnode flag set */
3635 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
3636 buf_markstatic(bp);
3637 }
3638
3639 /* Mark buffer as containing static data if cnode flag set */
3640 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
3641 bufattr_markgreedymode(buf_attr(bp));
3642 }
3643
3644 /* mark buffer as containing burst mode data if cnode flag set */
3645 if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
3646 bufattr_markisochronous(buf_attr(bp));
3647 }
3648
3649 #if CONFIG_PROTECT
3650 error = cp_handle_strategy(bp);
3651
3652 if (error)
3653 return error;
3654 #endif
3655
3656 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
3657
3658 return error;
3659 }
3660
3661 int
3662 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
3663 {
3664 register struct cnode *cp = VTOC(vp);
3665 struct filefork *fp = VTOF(vp);
3666 kauth_cred_t cred = vfs_context_ucred(context);
3667 int retval;
3668 off_t bytesToAdd;
3669 off_t actualBytesAdded;
3670 off_t filebytes;
3671 u_int32_t fileblocks;
3672 int blksize;
3673 struct hfsmount *hfsmp;
3674 int lockflags;
3675 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
3676
3677 blksize = VTOVCB(vp)->blockSize;
3678 fileblocks = fp->ff_blocks;
3679 filebytes = (off_t)fileblocks * (off_t)blksize;
3680
3681 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
3682 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3683
3684 if (length < 0)
3685 return (EINVAL);
3686
3687 /* This should only happen with a corrupt filesystem */
3688 if ((off_t)fp->ff_size < 0)
3689 return (EINVAL);
3690
3691 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3692 return (EFBIG);
3693
3694 hfsmp = VTOHFS(vp);
3695
3696 retval = E_NONE;
3697
3698 /* Files that are changing size are not hot file candidates. */
3699 if (hfsmp->hfc_stage == HFC_RECORDING) {
3700 fp->ff_bytesread = 0;
3701 }
3702
3703 /*
3704 * We cannot just check if fp->ff_size == length (as an optimization)
3705 * since there may be extra physical blocks that also need truncation.
3706 */
3707 #if QUOTA
3708 if ((retval = hfs_getinoquota(cp)))
3709 return(retval);
3710 #endif /* QUOTA */
3711
3712 /*
3713 * Lengthen the size of the file. We must ensure that the
3714 * last byte of the file is allocated. Since the smallest
3715 * value of ff_size is 0, length will be at least 1.
3716 */
3717 if (length > (off_t)fp->ff_size) {
3718 #if QUOTA
3719 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
3720 cred, 0);
3721 if (retval)
3722 goto Err_Exit;
3723 #endif /* QUOTA */
3724 /*
3725 * If we don't have enough physical space then
3726 * we need to extend the physical size.
3727 */
3728 if (length > filebytes) {
3729 int eflags;
3730 u_int32_t blockHint = 0;
3731
3732 /* All or nothing and don't round up to clumpsize. */
3733 eflags = kEFAllMask | kEFNoClumpMask;
3734
3735 if (cred && (suser(cred, NULL) != 0)) {
3736 eflags |= kEFReserveMask; /* keep a reserve */
3737 }
3738
3739 /*
3740 * Allocate Journal and Quota files in metadata zone.
3741 */
3742 if (filebytes == 0 &&
3743 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3744 hfs_virtualmetafile(cp)) {
3745 eflags |= kEFMetadataMask;
3746 blockHint = hfsmp->hfs_metazone_start;
3747 }
3748 if (hfs_start_transaction(hfsmp) != 0) {
3749 retval = EINVAL;
3750 goto Err_Exit;
3751 }
3752
3753 /* Protect extents b-tree and allocation bitmap */
3754 lockflags = SFL_BITMAP;
3755 if (overflow_extents(fp))
3756 lockflags |= SFL_EXTENTS;
3757 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3758
3759 /*
3760 * Keep growing the file as long as the current EOF is
3761 * less than the desired value.
3762 */
3763 while ((length > filebytes) && (retval == E_NONE)) {
3764 bytesToAdd = length - filebytes;
3765 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3766 (FCB*)fp,
3767 bytesToAdd,
3768 blockHint,
3769 eflags,
3770 &actualBytesAdded));
3771
3772 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3773 if (actualBytesAdded == 0 && retval == E_NONE) {
3774 if (length > filebytes)
3775 length = filebytes;
3776 break;
3777 }
3778 } /* endwhile */
3779
3780 hfs_systemfile_unlock(hfsmp, lockflags);
3781
3782 if (hfsmp->jnl) {
3783 hfs_update(vp, 0);
3784 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3785 }
3786
3787 hfs_end_transaction(hfsmp);
3788
3789 if (retval)
3790 goto Err_Exit;
3791
3792 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3793 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3794 }
3795
3796 if (ISSET(flags, IO_NOZEROFILL)) {
3797 // An optimisation for the hibernation file
3798 if (vnode_isswap(vp))
3799 rl_remove_all(&fp->ff_invalidranges);
3800 } else {
3801 if (!vnode_issystem(vp) && retval == E_NONE) {
3802 if (length > (off_t)fp->ff_size) {
3803 struct timeval tv;
3804
3805 /* Extending the file: time to fill out the current last page w. zeroes? */
3806 if (fp->ff_size & PAGE_MASK_64) {
3807 /* There might be some valid data at the start of the (current) last page
3808 of the file, so zero out the remainder of that page to ensure the
3809 entire page contains valid data. */
3810 hfs_unlock(cp);
3811 retval = hfs_zero_eof_page(vp, length);
3812 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3813 if (retval) goto Err_Exit;
3814 }
3815 microuptime(&tv);
3816 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
3817 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3818 }
3819 } else {
3820 panic("hfs_truncate: invoked on non-UBC object?!");
3821 };
3822 }
3823 if (suppress_times == 0) {
3824 cp->c_touch_modtime = TRUE;
3825 }
3826 fp->ff_size = length;
3827
3828 } else { /* Shorten the size of the file */
3829
3830 // An optimisation for the hibernation file
3831 if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
3832 rl_remove_all(&fp->ff_invalidranges);
3833 } else if ((off_t)fp->ff_size > length) {
3834 /* Any space previously marked as invalid is now irrelevant: */
3835 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3836 }
3837
3838 /*
3839 * Account for any unmapped blocks. Note that the new
3840 * file length can still end up with unmapped blocks.
3841 */
3842 if (fp->ff_unallocblocks > 0) {
3843 u_int32_t finalblks;
3844 u_int32_t loanedBlocks;
3845
3846 hfs_lock_mount(hfsmp);
3847 loanedBlocks = fp->ff_unallocblocks;
3848 cp->c_blocks -= loanedBlocks;
3849 fp->ff_blocks -= loanedBlocks;
3850 fp->ff_unallocblocks = 0;
3851
3852 hfsmp->loanedBlocks -= loanedBlocks;
3853
3854 finalblks = (length + blksize - 1) / blksize;
3855 if (finalblks > fp->ff_blocks) {
3856 /* calculate required unmapped blocks */
3857 loanedBlocks = finalblks - fp->ff_blocks;
3858 hfsmp->loanedBlocks += loanedBlocks;
3859
3860 fp->ff_unallocblocks = loanedBlocks;
3861 cp->c_blocks += loanedBlocks;
3862 fp->ff_blocks += loanedBlocks;
3863 }
3864 hfs_unlock_mount (hfsmp);
3865 }
3866
3867 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3868 if (hfs_start_transaction(hfsmp) != 0) {
3869 retval = EINVAL;
3870 goto Err_Exit;
3871 }
3872
3873 if (fp->ff_unallocblocks == 0) {
3874 /* Protect extents b-tree and allocation bitmap */
3875 lockflags = SFL_BITMAP;
3876 if (overflow_extents(fp))
3877 lockflags |= SFL_EXTENTS;
3878 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3879
3880 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3881 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
3882
3883 hfs_systemfile_unlock(hfsmp, lockflags);
3884 }
3885 if (hfsmp->jnl) {
3886 if (retval == 0) {
3887 fp->ff_size = length;
3888 }
3889 hfs_update(vp, 0);
3890 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3891 }
3892 hfs_end_transaction(hfsmp);
3893
3894 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3895 if (retval)
3896 goto Err_Exit;
3897 #if QUOTA
3898 /* These are bytesreleased */
3899 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3900 #endif /* QUOTA */
3901
3902 //
3903 // Unlike when growing a file, we adjust the hotfile block count here
3904 // instead of deeper down in the block allocation code because we do
3905 // not necessarily have a vnode or "fcb" at the time we're deleting
3906 // the file and so we wouldn't know if it was hotfile cached or not
3907 //
3908 hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize));
3909
3910
3911 /*
3912 * Only set update flag if the logical length changes & we aren't
3913 * suppressing modtime updates.
3914 */
3915 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
3916 cp->c_touch_modtime = TRUE;
3917 }
3918 fp->ff_size = length;
3919 }
3920 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3921 if (!vfs_context_issuser(context))
3922 cp->c_mode &= ~(S_ISUID | S_ISGID);
3923 }
3924 cp->c_flag |= C_MODIFIED;
3925 cp->c_touch_chgtime = TRUE; /* status changed */
3926 if (suppress_times == 0) {
3927 cp->c_touch_modtime = TRUE; /* file data was modified */
3928
3929 /*
3930 * If we are not suppressing the modtime update, then
3931 * update the gen count as well.
3932 */
3933 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3934 hfs_incr_gencount(cp);
3935 }
3936 }
3937
3938 retval = hfs_update(vp, 0);
3939 if (retval) {
3940 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3941 -1, -1, -1, retval, 0);
3942 }
3943
3944 Err_Exit:
3945
3946 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
3947 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
3948
3949 return (retval);
3950 }
3951
3952 /*
3953 * Preparation which must be done prior to deleting the catalog record
3954 * of a file or directory. In order to make the on-disk as safe as possible,
3955 * we remove the catalog entry before releasing the bitmap blocks and the
3956 * overflow extent records. However, some work must be done prior to deleting
3957 * the catalog record.
3958 *
3959 * When calling this function, the cnode must exist both in memory and on-disk.
3960 * If there are both resource fork and data fork vnodes, this function should
3961 * be called on both.
3962 */
3963
3964 int
3965 hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3966
3967 struct filefork *fp = VTOF(vp);
3968 struct cnode *cp = VTOC(vp);
3969 #if QUOTA
3970 int retval = 0;
3971 #endif /* QUOTA */
3972
3973 /* Cannot truncate an HFS directory! */
3974 if (vnode_isdir(vp)) {
3975 return (EISDIR);
3976 }
3977
3978 /*
3979 * See the comment below in hfs_truncate for why we need to call
3980 * setsize here. Essentially we want to avoid pending IO if we
3981 * already know that the blocks are going to be released here.
3982 * This function is only called when totally removing all storage for a file, so
3983 * we can take a shortcut and immediately setsize (0);
3984 */
3985 ubc_setsize(vp, 0);
3986
3987 /* This should only happen with a corrupt filesystem */
3988 if ((off_t)fp->ff_size < 0)
3989 return (EINVAL);
3990
3991 /*
3992 * We cannot just check if fp->ff_size == length (as an optimization)
3993 * since there may be extra physical blocks that also need truncation.
3994 */
3995 #if QUOTA
3996 if ((retval = hfs_getinoquota(cp))) {
3997 return(retval);
3998 }
3999 #endif /* QUOTA */
4000
4001 /* Wipe out any invalid ranges which have yet to be backed by disk */
4002 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
4003
4004 /*
4005 * Account for any unmapped blocks. Since we're deleting the
4006 * entire file, we don't have to worry about just shrinking
4007 * to a smaller number of borrowed blocks.
4008 */
4009 if (fp->ff_unallocblocks > 0) {
4010 u_int32_t loanedBlocks;
4011
4012 hfs_lock_mount (hfsmp);
4013 loanedBlocks = fp->ff_unallocblocks;
4014 cp->c_blocks -= loanedBlocks;
4015 fp->ff_blocks -= loanedBlocks;
4016 fp->ff_unallocblocks = 0;
4017
4018 hfsmp->loanedBlocks -= loanedBlocks;
4019
4020 hfs_unlock_mount (hfsmp);
4021 }
4022
4023 return 0;
4024 }
4025
4026
4027 /*
4028 * Special wrapper around calling TruncateFileC. This function is useable
4029 * even when the catalog record does not exist any longer, making it ideal
4030 * for use when deleting a file. The simplification here is that we know
4031 * that we are releasing all blocks.
4032 *
4033 * Note that this function may be called when there is no vnode backing
4034 * the file fork in question. We may call this from hfs_vnop_inactive
4035 * to clear out resource fork data (and may not want to clear out the data
4036 * fork yet). As a result, we pointer-check both sets of inputs before
4037 * doing anything with them.
4038 *
4039 * The caller is responsible for saving off a copy of the filefork(s)
4040 * embedded within the cnode prior to calling this function. The pointers
4041 * supplied as arguments must be valid even if the cnode is no longer valid.
4042 */
4043
4044 int
4045 hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
4046 struct filefork *rsrcfork, u_int32_t fileid) {
4047
4048 off_t filebytes;
4049 u_int32_t fileblocks;
4050 int blksize = 0;
4051 int error = 0;
4052 int lockflags;
4053
4054 blksize = hfsmp->blockSize;
4055
4056 /* Data Fork */
4057 if (datafork) {
4058 off_t prev_filebytes;
4059
4060 datafork->ff_size = 0;
4061
4062 fileblocks = datafork->ff_blocks;
4063 filebytes = (off_t)fileblocks * (off_t)blksize;
4064 prev_filebytes = filebytes;
4065
4066 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4067
4068 while (filebytes > 0) {
4069 if (filebytes > HFS_BIGFILE_SIZE) {
4070 filebytes -= HFS_BIGFILE_SIZE;
4071 } else {
4072 filebytes = 0;
4073 }
4074
4075 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4076 if (hfs_start_transaction(hfsmp) != 0) {
4077 error = EINVAL;
4078 break;
4079 }
4080
4081 if (datafork->ff_unallocblocks == 0) {
4082 /* Protect extents b-tree and allocation bitmap */
4083 lockflags = SFL_BITMAP;
4084 if (overflow_extents(datafork))
4085 lockflags |= SFL_EXTENTS;
4086 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4087
4088 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
4089
4090 hfs_systemfile_unlock(hfsmp, lockflags);
4091 }
4092 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4093
4094 struct cnode *cp = datafork ? FTOC(datafork) : NULL;
4095 struct vnode *vp;
4096 vp = cp ? CTOV(cp, 0) : NULL;
4097 hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize));
4098 prev_filebytes = filebytes;
4099
4100 /* Finish the transaction and start over if necessary */
4101 hfs_end_transaction(hfsmp);
4102
4103 if (error) {
4104 break;
4105 }
4106 }
4107 }
4108
4109 /* Resource fork */
4110 if (error == 0 && rsrcfork) {
4111 rsrcfork->ff_size = 0;
4112
4113 fileblocks = rsrcfork->ff_blocks;
4114 filebytes = (off_t)fileblocks * (off_t)blksize;
4115
4116 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
4117
4118 while (filebytes > 0) {
4119 if (filebytes > HFS_BIGFILE_SIZE) {
4120 filebytes -= HFS_BIGFILE_SIZE;
4121 } else {
4122 filebytes = 0;
4123 }
4124
4125 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
4126 if (hfs_start_transaction(hfsmp) != 0) {
4127 error = EINVAL;
4128 break;
4129 }
4130
4131 if (rsrcfork->ff_unallocblocks == 0) {
4132 /* Protect extents b-tree and allocation bitmap */
4133 lockflags = SFL_BITMAP;
4134 if (overflow_extents(rsrcfork))
4135 lockflags |= SFL_EXTENTS;
4136 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4137
4138 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
4139
4140 hfs_systemfile_unlock(hfsmp, lockflags);
4141 }
4142 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4143
4144 /* Finish the transaction and start over if necessary */
4145 hfs_end_transaction(hfsmp);
4146
4147 if (error) {
4148 break;
4149 }
4150 }
4151 }
4152
4153 return error;
4154 }
4155
4156 errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
4157 {
4158 errno_t error;
4159
4160 /*
4161 * Call ubc_setsize to give the VM subsystem a chance to do
4162 * whatever it needs to with existing pages before we delete
4163 * blocks. Note that symlinks don't use the UBC so we'll
4164 * get back ENOENT in that case.
4165 */
4166 if (have_cnode_lock) {
4167 error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
4168 if (error == EAGAIN) {
4169 cnode_t *cp = VTOC(vp);
4170
4171 if (cp->c_truncatelockowner != current_thread())
4172 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
4173
4174 hfs_unlock(cp);
4175 error = ubc_setsize_ex(vp, len, 0);
4176 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
4177 }
4178 } else
4179 error = ubc_setsize_ex(vp, len, 0);
4180
4181 return error == ENOENT ? 0 : error;
4182 }
4183
4184 /*
4185 * Truncate a cnode to at most length size, freeing (or adding) the
4186 * disk blocks.
4187 */
4188 int
4189 hfs_truncate(struct vnode *vp, off_t length, int flags,
4190 int truncateflags, vfs_context_t context)
4191 {
4192 struct filefork *fp = VTOF(vp);
4193 off_t filebytes;
4194 u_int32_t fileblocks;
4195 int blksize;
4196 errno_t error = 0;
4197 struct cnode *cp = VTOC(vp);
4198 hfsmount_t *hfsmp = VTOHFS(vp);
4199
4200 /* Cannot truncate an HFS directory! */
4201 if (vnode_isdir(vp)) {
4202 return (EISDIR);
4203 }
4204 /* A swap file cannot change size. */
4205 if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
4206 return (EPERM);
4207 }
4208
4209 blksize = hfsmp->blockSize;
4210 fileblocks = fp->ff_blocks;
4211 filebytes = (off_t)fileblocks * (off_t)blksize;
4212
4213 bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
4214
4215 error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
4216 if (error)
4217 return error;
4218
4219 if (!caller_has_cnode_lock) {
4220 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4221 if (error)
4222 return error;
4223 }
4224
4225 if (vnode_islnk(vp) && cp->c_datafork->ff_symlinkptr) {
4226 hfs_free(cp->c_datafork->ff_symlinkptr, cp->c_datafork->ff_size);
4227 cp->c_datafork->ff_symlinkptr = NULL;
4228 }
4229
4230 // have to loop truncating or growing files that are
4231 // really big because otherwise transactions can get
4232 // enormous and consume too many kernel resources.
4233
4234 if (length < filebytes) {
4235 while (filebytes > length) {
4236 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
4237 filebytes -= HFS_BIGFILE_SIZE;
4238 } else {
4239 filebytes = length;
4240 }
4241 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4242 if (error)
4243 break;
4244 }
4245 } else if (length > filebytes) {
4246 kauth_cred_t cred = vfs_context_ucred(context);
4247 const bool keep_reserve = cred && suser(cred, NULL) != 0;
4248
4249 if (hfs_freeblks(hfsmp, keep_reserve)
4250 < howmany(length - filebytes, blksize)) {
4251 error = ENOSPC;
4252 } else {
4253 while (filebytes < length) {
4254 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
4255 filebytes += HFS_BIGFILE_SIZE;
4256 } else {
4257 filebytes = length;
4258 }
4259 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4260 if (error)
4261 break;
4262 }
4263 }
4264 } else /* Same logical size */ {
4265
4266 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
4267 }
4268 /* Files that are changing size are not hot file candidates. */
4269 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4270 fp->ff_bytesread = 0;
4271 }
4272
4273 #if HFS_CONFIG_KEY_ROLL
4274 if (!error && cp->c_truncatelockowner == current_thread()) {
4275 hfs_key_roll_check(cp, true);
4276 }
4277 #endif
4278
4279 if (!caller_has_cnode_lock)
4280 hfs_unlock(cp);
4281
4282 // Make sure UBC's size matches up (in case we didn't completely succeed)
4283 errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
4284 if (!error)
4285 error = err2;
4286
4287 return error;
4288 }
4289
4290
4291 /*
4292 * Preallocate file storage space.
4293 */
4294 int
4295 hfs_vnop_allocate(struct vnop_allocate_args /* {
4296 vnode_t a_vp;
4297 off_t a_length;
4298 u_int32_t a_flags;
4299 off_t *a_bytesallocated;
4300 off_t a_offset;
4301 vfs_context_t a_context;
4302 } */ *ap)
4303 {
4304 struct vnode *vp = ap->a_vp;
4305 struct cnode *cp;
4306 struct filefork *fp;
4307 ExtendedVCB *vcb;
4308 off_t length = ap->a_length;
4309 off_t startingPEOF;
4310 off_t moreBytesRequested;
4311 off_t actualBytesAdded;
4312 off_t filebytes;
4313 u_int32_t fileblocks;
4314 int retval, retval2;
4315 u_int32_t blockHint;
4316 u_int32_t extendFlags; /* For call to ExtendFileC */
4317 struct hfsmount *hfsmp;
4318 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
4319 int lockflags;
4320 time_t orig_ctime;
4321
4322 *(ap->a_bytesallocated) = 0;
4323
4324 if (!vnode_isreg(vp))
4325 return (EISDIR);
4326 if (length < (off_t)0)
4327 return (EINVAL);
4328
4329 cp = VTOC(vp);
4330
4331 orig_ctime = VTOC(vp)->c_ctime;
4332
4333 nspace_snapshot_event(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
4334
4335 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4336
4337 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4338 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4339 return (retval);
4340 }
4341
4342 fp = VTOF(vp);
4343 hfsmp = VTOHFS(vp);
4344 vcb = VTOVCB(vp);
4345
4346 fileblocks = fp->ff_blocks;
4347 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
4348
4349 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
4350 retval = EINVAL;
4351 goto Err_Exit;
4352 }
4353
4354 /* Fill in the flags word for the call to Extend the file */
4355
4356 extendFlags = kEFNoClumpMask;
4357 if (ap->a_flags & ALLOCATECONTIG)
4358 extendFlags |= kEFContigMask;
4359 if (ap->a_flags & ALLOCATEALL)
4360 extendFlags |= kEFAllMask;
4361 if (cred && suser(cred, NULL) != 0)
4362 extendFlags |= kEFReserveMask;
4363 if (hfs_virtualmetafile(cp))
4364 extendFlags |= kEFMetadataMask;
4365
4366 retval = E_NONE;
4367 blockHint = 0;
4368 startingPEOF = filebytes;
4369
4370 if (ap->a_flags & ALLOCATEFROMPEOF)
4371 length += filebytes;
4372 else if (ap->a_flags & ALLOCATEFROMVOL)
4373 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
4374
4375 /* If no changes are necesary, then we're done */
4376 if (filebytes == length)
4377 goto Std_Exit;
4378
4379 /*
4380 * Lengthen the size of the file. We must ensure that the
4381 * last byte of the file is allocated. Since the smallest
4382 * value of filebytes is 0, length will be at least 1.
4383 */
4384 if (length > filebytes) {
4385 if (ISSET(extendFlags, kEFAllMask)
4386 && (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask))
4387 < howmany(length - filebytes, hfsmp->blockSize))) {
4388 retval = ENOSPC;
4389 goto Err_Exit;
4390 }
4391
4392 off_t total_bytes_added = 0, orig_request_size;
4393
4394 orig_request_size = moreBytesRequested = length - filebytes;
4395
4396 #if QUOTA
4397 retval = hfs_chkdq(cp,
4398 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
4399 cred, 0);
4400 if (retval)
4401 goto Err_Exit;
4402
4403 #endif /* QUOTA */
4404 /*
4405 * Metadata zone checks.
4406 */
4407 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
4408 /*
4409 * Allocate Journal and Quota files in metadata zone.
4410 */
4411 if (hfs_virtualmetafile(cp)) {
4412 blockHint = hfsmp->hfs_metazone_start;
4413 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
4414 (blockHint <= hfsmp->hfs_metazone_end)) {
4415 /*
4416 * Move blockHint outside metadata zone.
4417 */
4418 blockHint = hfsmp->hfs_metazone_end + 1;
4419 }
4420 }
4421
4422
4423 while ((length > filebytes) && (retval == E_NONE)) {
4424 off_t bytesRequested;
4425
4426 if (hfs_start_transaction(hfsmp) != 0) {
4427 retval = EINVAL;
4428 goto Err_Exit;
4429 }
4430
4431 /* Protect extents b-tree and allocation bitmap */
4432 lockflags = SFL_BITMAP;
4433 if (overflow_extents(fp))
4434 lockflags |= SFL_EXTENTS;
4435 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4436
4437 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
4438 bytesRequested = HFS_BIGFILE_SIZE;
4439 } else {
4440 bytesRequested = moreBytesRequested;
4441 }
4442
4443 if (extendFlags & kEFContigMask) {
4444 // if we're on a sparse device, this will force it to do a
4445 // full scan to find the space needed.
4446 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
4447 }
4448
4449 retval = MacToVFSError(ExtendFileC(vcb,
4450 (FCB*)fp,
4451 bytesRequested,
4452 blockHint,
4453 extendFlags,
4454 &actualBytesAdded));
4455
4456 if (retval == E_NONE) {
4457 *(ap->a_bytesallocated) += actualBytesAdded;
4458 total_bytes_added += actualBytesAdded;
4459 moreBytesRequested -= actualBytesAdded;
4460 if (blockHint != 0) {
4461 blockHint += actualBytesAdded / vcb->blockSize;
4462 }
4463 }
4464 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4465
4466 hfs_systemfile_unlock(hfsmp, lockflags);
4467
4468 if (hfsmp->jnl) {
4469 (void) hfs_update(vp, 0);
4470 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4471 }
4472
4473 hfs_end_transaction(hfsmp);
4474 }
4475
4476
4477 /*
4478 * if we get an error and no changes were made then exit
4479 * otherwise we must do the hfs_update to reflect the changes
4480 */
4481 if (retval && (startingPEOF == filebytes))
4482 goto Err_Exit;
4483
4484 /*
4485 * Adjust actualBytesAdded to be allocation block aligned, not
4486 * clump size aligned.
4487 * NOTE: So what we are reporting does not affect reality
4488 * until the file is closed, when we truncate the file to allocation
4489 * block size.
4490 */
4491 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
4492 *(ap->a_bytesallocated) =
4493 roundup(orig_request_size, (off_t)vcb->blockSize);
4494
4495 } else { /* Shorten the size of the file */
4496
4497 /*
4498 * N.B. At present, this code is never called. If and when we
4499 * do start using it, it looks like there might be slightly
4500 * strange semantics with the file size: it's possible for the
4501 * file size to *increase* e.g. if current file size is 5,
4502 * length is 1024 and filebytes is 4096, the file size will
4503 * end up being 1024 bytes. This isn't necessarily a problem
4504 * but it's not consistent with the code above which doesn't
4505 * change the file size.
4506 */
4507
4508 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
4509 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4510
4511 /*
4512 * if we get an error and no changes were made then exit
4513 * otherwise we must do the hfs_update to reflect the changes
4514 */
4515 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
4516 #if QUOTA
4517 /* These are bytesreleased */
4518 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
4519 #endif /* QUOTA */
4520
4521 if (fp->ff_size > filebytes) {
4522 fp->ff_size = filebytes;
4523
4524 hfs_ubc_setsize(vp, fp->ff_size, true);
4525 }
4526 }
4527
4528 Std_Exit:
4529 cp->c_flag |= C_MODIFIED;
4530 cp->c_touch_chgtime = TRUE;
4531 cp->c_touch_modtime = TRUE;
4532 retval2 = hfs_update(vp, 0);
4533
4534 if (retval == 0)
4535 retval = retval2;
4536 Err_Exit:
4537 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4538 hfs_unlock(cp);
4539 return (retval);
4540 }
4541
4542
4543 /*
4544 * Pagein for HFS filesystem
4545 */
4546 int
4547 hfs_vnop_pagein(struct vnop_pagein_args *ap)
4548 /*
4549 struct vnop_pagein_args {
4550 vnode_t a_vp,
4551 upl_t a_pl,
4552 vm_offset_t a_pl_offset,
4553 off_t a_f_offset,
4554 size_t a_size,
4555 int a_flags
4556 vfs_context_t a_context;
4557 };
4558 */
4559 {
4560 vnode_t vp;
4561 struct cnode *cp;
4562 struct filefork *fp;
4563 int error = 0;
4564 upl_t upl;
4565 upl_page_info_t *pl;
4566 off_t f_offset;
4567 off_t page_needed_f_offset;
4568 int offset;
4569 int isize;
4570 int upl_size;
4571 int pg_index;
4572 boolean_t truncate_lock_held = FALSE;
4573 boolean_t file_converted = FALSE;
4574 kern_return_t kret;
4575
4576 vp = ap->a_vp;
4577 cp = VTOC(vp);
4578 fp = VTOF(vp);
4579
4580 #if CONFIG_PROTECT
4581 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
4582 /*
4583 * If we errored here, then this means that one of two things occurred:
4584 * 1. there was a problem with the decryption of the key.
4585 * 2. the device is locked and we are not allowed to access this particular file.
4586 *
4587 * Either way, this means that we need to shut down this upl now. As long as
4588 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4589 * then we create a upl and immediately abort it.
4590 */
4591 if (ap->a_pl == NULL) {
4592 /* create the upl */
4593 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
4594 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4595 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4596 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
4597
4598 /* Abort the range */
4599 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4600 }
4601
4602
4603 return error;
4604 }
4605 #endif /* CONFIG_PROTECT */
4606
4607 if (ap->a_pl != NULL) {
4608 /*
4609 * this can only happen for swap files now that
4610 * we're asking for V2 paging behavior...
4611 * so don't need to worry about decompression, or
4612 * keeping track of blocks read or taking the truncate lock
4613 */
4614 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
4615 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
4616 goto pagein_done;
4617 }
4618
4619 page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
4620
4621 retry_pagein:
4622 /*
4623 * take truncate lock (shared/recursive) to guard against
4624 * zero-fill thru fsync interfering, but only for v2
4625 *
4626 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4627 * lock shared and we are allowed to recurse 1 level if this thread already
4628 * owns the lock exclusively... this can legally occur
4629 * if we are doing a shrinking ftruncate against a file
4630 * that is mapped private, and the pages being truncated
4631 * do not currently exist in the cache... in that case
4632 * we will have to page-in the missing pages in order
4633 * to provide them to the private mapping... we must
4634 * also call hfs_unlock_truncate with a postive been_recursed
4635 * arg to indicate that if we have recursed, there is no need to drop
4636 * the lock. Allowing this simple recursion is necessary
4637 * in order to avoid a certain deadlock... since the ftruncate
4638 * already holds the truncate lock exclusively, if we try
4639 * to acquire it shared to protect the pagein path, we will
4640 * hang this thread
4641 *
4642 * NOTE: The if () block below is a workaround in order to prevent a
4643 * VM deadlock. See rdar://7853471.
4644 *
4645 * If we are in a forced unmount, then launchd will still have the
4646 * dyld_shared_cache file mapped as it is trying to reboot. If we
4647 * take the truncate lock here to service a page fault, then our
4648 * thread could deadlock with the forced-unmount. The forced unmount
4649 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4650 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4651 * thread will think it needs to copy all of the data out of the file
4652 * and into a VM copy object. If we hold the cnode lock here, then that
4653 * VM operation will not be able to proceed, because we'll set a busy page
4654 * before attempting to grab the lock. Note that this isn't as simple as "don't
4655 * call ubc_setsize" because doing that would just shift the problem to the
4656 * ubc_msync done before the vnode is reclaimed.
4657 *
4658 * So, if a forced unmount on this volume is in flight AND the cnode is
4659 * marked C_DELETED, then just go ahead and do the page in without taking
4660 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4661 * that is not going to be available on the next mount, this seems like a
4662 * OK solution from a correctness point of view, even though it is hacky.
4663 */
4664 if (vfs_isforce(vnode_mount(vp))) {
4665 if (cp->c_flag & C_DELETED) {
4666 /* If we don't get it, then just go ahead and operate without the lock */
4667 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4668 }
4669 }
4670 else {
4671 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4672 truncate_lock_held = TRUE;
4673 }
4674
4675 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4676
4677 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4678 error = EINVAL;
4679 goto pagein_done;
4680 }
4681 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4682
4683 upl_size = isize = ap->a_size;
4684
4685 /*
4686 * Scan from the back to find the last page in the UPL, so that we
4687 * aren't looking at a UPL that may have already been freed by the
4688 * preceding aborts/completions.
4689 */
4690 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4691 if (upl_page_present(pl, --pg_index))
4692 break;
4693 if (pg_index == 0) {
4694 /*
4695 * no absent pages were found in the range specified
4696 * just abort the UPL to get rid of it and then we're done
4697 */
4698 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4699 goto pagein_done;
4700 }
4701 }
4702 /*
4703 * initialize the offset variables before we touch the UPL.
4704 * f_offset is the position into the file, in bytes
4705 * offset is the position into the UPL, in bytes
4706 * pg_index is the pg# of the UPL we're operating on
4707 * isize is the offset into the UPL of the last page that is present.
4708 */
4709 isize = ((pg_index + 1) * PAGE_SIZE);
4710 pg_index = 0;
4711 offset = 0;
4712 f_offset = ap->a_f_offset;
4713
4714 while (isize) {
4715 int xsize;
4716 int num_of_pages;
4717
4718 if ( !upl_page_present(pl, pg_index)) {
4719 /*
4720 * we asked for RET_ONLY_ABSENT, so it's possible
4721 * to get back empty slots in the UPL.
4722 * just skip over them
4723 */
4724 f_offset += PAGE_SIZE;
4725 offset += PAGE_SIZE;
4726 isize -= PAGE_SIZE;
4727 pg_index++;
4728
4729 continue;
4730 }
4731 /*
4732 * We know that we have at least one absent page.
4733 * Now checking to see how many in a row we have
4734 */
4735 num_of_pages = 1;
4736 xsize = isize - PAGE_SIZE;
4737
4738 while (xsize) {
4739 if ( !upl_page_present(pl, pg_index + num_of_pages))
4740 break;
4741 num_of_pages++;
4742 xsize -= PAGE_SIZE;
4743 }
4744 xsize = num_of_pages * PAGE_SIZE;
4745
4746 #if HFS_COMPRESSION
4747 if (VNODE_IS_RSRC(vp)) {
4748 /* allow pageins of the resource fork */
4749 } else {
4750 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4751
4752 if (compressed) {
4753
4754 if (truncate_lock_held) {
4755 /*
4756 * can't hold the truncate lock when calling into the decmpfs layer
4757 * since it calls back into this layer... even though we're only
4758 * holding the lock in shared mode, and the re-entrant path only
4759 * takes the lock shared, we can deadlock if some other thread
4760 * tries to grab the lock exclusively in between.
4761 */
4762 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4763 truncate_lock_held = FALSE;
4764 }
4765 ap->a_pl = upl;
4766 ap->a_pl_offset = offset;
4767 ap->a_f_offset = f_offset;
4768 ap->a_size = xsize;
4769
4770 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4771 /*
4772 * note that decpfs_pagein_compressed can change the state of
4773 * 'compressed'... it will set it to 0 if the file is no longer
4774 * compressed once the compression lock is successfully taken
4775 * i.e. we would block on that lock while the file is being inflated
4776 */
4777 if (error == 0 && vnode_isfastdevicecandidate(vp)) {
4778 (void) hfs_addhotfile(vp);
4779 }
4780 if (compressed) {
4781 if (error == 0) {
4782 /* successful page-in, update the access time */
4783 VTOC(vp)->c_touch_acctime = TRUE;
4784
4785 //
4786 // compressed files are not traditional hot file candidates
4787 // but they may be for CF (which ignores the ff_bytesread
4788 // field)
4789 //
4790 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4791 fp->ff_bytesread = 0;
4792 }
4793 } else if (error == EAGAIN) {
4794 /*
4795 * EAGAIN indicates someone else already holds the compression lock...
4796 * to avoid deadlocking, we'll abort this range of pages with an
4797 * indication that the pagein needs to be redriven
4798 */
4799 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
4800 } else if (error == ENOSPC) {
4801
4802 if (upl_size == PAGE_SIZE)
4803 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4804
4805 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4806
4807 ap->a_size = PAGE_SIZE;
4808 ap->a_pl = NULL;
4809 ap->a_pl_offset = 0;
4810 ap->a_f_offset = page_needed_f_offset;
4811
4812 goto retry_pagein;
4813 } else {
4814 ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4815 goto pagein_done;
4816 }
4817 goto pagein_next_range;
4818 }
4819 else {
4820 /*
4821 * Set file_converted only if the file became decompressed while we were
4822 * paging in. If it were still compressed, we would re-start the loop using the goto
4823 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4824 * condition below, since we could have avoided taking the truncate lock to prevent
4825 * a deadlock in the force unmount case.
4826 */
4827 file_converted = TRUE;
4828 }
4829 }
4830 if (file_converted == TRUE) {
4831 /*
4832 * the file was converted back to a regular file after we first saw it as compressed
4833 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4834 * reset a_size so that we consider what remains of the original request
4835 * and null out a_upl and a_pl_offset.
4836 *
4837 * We should only be able to get into this block if the decmpfs_pagein_compressed
4838 * successfully decompressed the range in question for this file.
4839 */
4840 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4841
4842 ap->a_size = isize;
4843 ap->a_pl = NULL;
4844 ap->a_pl_offset = 0;
4845
4846 /* Reset file_converted back to false so that we don't infinite-loop. */
4847 file_converted = FALSE;
4848 goto retry_pagein;
4849 }
4850 }
4851 #endif
4852 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
4853
4854 /*
4855 * Keep track of blocks read.
4856 */
4857 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4858 int bytesread;
4859 int took_cnode_lock = 0;
4860
4861 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4862 bytesread = fp->ff_size;
4863 else
4864 bytesread = xsize;
4865
4866 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4867 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
4868 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4869 took_cnode_lock = 1;
4870 }
4871 /*
4872 * If this file hasn't been seen since the start of
4873 * the current sampling period then start over.
4874 */
4875 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4876 struct timeval tv;
4877
4878 fp->ff_bytesread = bytesread;
4879 microtime(&tv);
4880 cp->c_atime = tv.tv_sec;
4881 } else {
4882 fp->ff_bytesread += bytesread;
4883 }
4884 cp->c_touch_acctime = TRUE;
4885
4886 if (vnode_isfastdevicecandidate(vp)) {
4887 (void) hfs_addhotfile(vp);
4888 }
4889 if (took_cnode_lock)
4890 hfs_unlock(cp);
4891 }
4892 pagein_next_range:
4893 f_offset += xsize;
4894 offset += xsize;
4895 isize -= xsize;
4896 pg_index += num_of_pages;
4897
4898 error = 0;
4899 }
4900
4901 pagein_done:
4902 if (truncate_lock_held == TRUE) {
4903 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4904 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4905 }
4906
4907 return (error);
4908 }
4909
4910 /*
4911 * Pageout for HFS filesystem.
4912 */
4913 int
4914 hfs_vnop_pageout(struct vnop_pageout_args *ap)
4915 /*
4916 struct vnop_pageout_args {
4917 vnode_t a_vp,
4918 upl_t a_pl,
4919 vm_offset_t a_pl_offset,
4920 off_t a_f_offset,
4921 size_t a_size,
4922 int a_flags
4923 vfs_context_t a_context;
4924 };
4925 */
4926 {
4927 vnode_t vp = ap->a_vp;
4928 struct cnode *cp;
4929 struct filefork *fp;
4930 int retval = 0;
4931 off_t filesize;
4932 upl_t upl;
4933 upl_page_info_t* pl = NULL;
4934 vm_offset_t a_pl_offset;
4935 int a_flags;
4936 int is_pageoutv2 = 0;
4937 kern_return_t kret;
4938
4939 cp = VTOC(vp);
4940 fp = VTOF(vp);
4941
4942 a_flags = ap->a_flags;
4943 a_pl_offset = ap->a_pl_offset;
4944
4945 /*
4946 * we can tell if we're getting the new or old behavior from the UPL
4947 */
4948 if ((upl = ap->a_pl) == NULL) {
4949 int request_flags;
4950
4951 is_pageoutv2 = 1;
4952 /*
4953 * we're in control of any UPL we commit
4954 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4955 */
4956 a_flags &= ~UPL_NOCOMMIT;
4957 a_pl_offset = 0;
4958
4959 /*
4960 * For V2 semantics, we want to take the cnode truncate lock
4961 * shared to guard against the file size changing via zero-filling.
4962 *
4963 * However, we have to be careful because we may be invoked
4964 * via the ubc_msync path to write out dirty mmap'd pages
4965 * in response to a lock event on a content-protected
4966 * filesystem (e.g. to write out class A files).
4967 * As a result, we want to take the truncate lock 'SHARED' with
4968 * the mini-recursion locktype so that we don't deadlock/panic
4969 * because we may be already holding the truncate lock exclusive to force any other
4970 * IOs to have blocked behind us.
4971 */
4972 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4973
4974 if (a_flags & UPL_MSYNC) {
4975 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4976 }
4977 else {
4978 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4979 }
4980
4981 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
4982
4983 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4984 retval = EINVAL;
4985 goto pageout_done;
4986 }
4987 }
4988 /*
4989 * from this point forward upl points at the UPL we're working with
4990 * it was either passed in or we succesfully created it
4991 */
4992
4993 /*
4994 * Figure out where the file ends, for pageout purposes. If
4995 * ff_new_size > ff_size, then we're in the middle of extending the
4996 * file via a write, so it is safe (and necessary) that we be able
4997 * to pageout up to that point.
4998 */
4999 filesize = fp->ff_size;
5000 if (fp->ff_new_size > filesize)
5001 filesize = fp->ff_new_size;
5002
5003 /*
5004 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
5005 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
5006 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
5007 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
5008 * logic in vnode_pageout except that we need to do it after grabbing the truncate
5009 * lock in HFS so that we don't lock invert ourselves.
5010 *
5011 * Note that we can still get into this function on behalf of the default pager with
5012 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
5013 * since fsync and other writing threads will grab the locks, then mark the
5014 * relevant pages as busy. But the pageout codepath marks the pages as busy,
5015 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
5016 * we do not try to grab anything for the pre-V2 case, which should only be accessed
5017 * by the paging/VM system.
5018 */
5019
5020 if (is_pageoutv2) {
5021 off_t f_offset;
5022 int offset;
5023 int isize;
5024 int pg_index;
5025 int error;
5026 int error_ret = 0;
5027
5028 isize = ap->a_size;
5029 f_offset = ap->a_f_offset;
5030
5031 /*
5032 * Scan from the back to find the last page in the UPL, so that we
5033 * aren't looking at a UPL that may have already been freed by the
5034 * preceding aborts/completions.
5035 */
5036 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
5037 if (upl_page_present(pl, --pg_index))
5038 break;
5039 if (pg_index == 0) {
5040 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
5041 goto pageout_done;
5042 }
5043 }
5044
5045 /*
5046 * initialize the offset variables before we touch the UPL.
5047 * a_f_offset is the position into the file, in bytes
5048 * offset is the position into the UPL, in bytes
5049 * pg_index is the pg# of the UPL we're operating on.
5050 * isize is the offset into the UPL of the last non-clean page.
5051 */
5052 isize = ((pg_index + 1) * PAGE_SIZE);
5053
5054 offset = 0;
5055 pg_index = 0;
5056
5057 while (isize) {
5058 int xsize;
5059 int num_of_pages;
5060
5061 if ( !upl_page_present(pl, pg_index)) {
5062 /*
5063 * we asked for RET_ONLY_DIRTY, so it's possible
5064 * to get back empty slots in the UPL.
5065 * just skip over them
5066 */
5067 f_offset += PAGE_SIZE;
5068 offset += PAGE_SIZE;
5069 isize -= PAGE_SIZE;
5070 pg_index++;
5071
5072 continue;
5073 }
5074 if ( !upl_dirty_page(pl, pg_index)) {
5075 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
5076 }
5077
5078 /*
5079 * We know that we have at least one dirty page.
5080 * Now checking to see how many in a row we have
5081 */
5082 num_of_pages = 1;
5083 xsize = isize - PAGE_SIZE;
5084
5085 while (xsize) {
5086 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
5087 break;
5088 num_of_pages++;
5089 xsize -= PAGE_SIZE;
5090 }
5091 xsize = num_of_pages * PAGE_SIZE;
5092
5093 if ((error = cluster_pageout(vp, upl, offset, f_offset,
5094 xsize, filesize, a_flags))) {
5095 if (error_ret == 0)
5096 error_ret = error;
5097 }
5098 f_offset += xsize;
5099 offset += xsize;
5100 isize -= xsize;
5101 pg_index += num_of_pages;
5102 }
5103 /* capture errnos bubbled out of cluster_pageout if they occurred */
5104 if (error_ret != 0) {
5105 retval = error_ret;
5106 }
5107 } /* end block for v2 pageout behavior */
5108 else {
5109 /*
5110 * just call cluster_pageout for old pre-v2 behavior
5111 */
5112 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
5113 ap->a_size, filesize, a_flags);
5114 }
5115
5116 /*
5117 * If data was written, update the modification time of the file
5118 * but only if it's mapped writable; we will have touched the
5119 * modifcation time for direct writes.
5120 */
5121 if (retval == 0 && (ubc_is_mapped_writable(vp)
5122 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
5123 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5124
5125 // Check again with lock
5126 bool mapped_writable = ubc_is_mapped_writable(vp);
5127 if (mapped_writable
5128 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
5129 cp->c_touch_modtime = TRUE;
5130 cp->c_touch_chgtime = TRUE;
5131
5132 /*
5133 * We only need to increment the generation counter if
5134 * it's currently mapped writable because we incremented
5135 * the counter in hfs_vnop_mnomap.
5136 */
5137 if (mapped_writable)
5138 hfs_incr_gencount(VTOC(vp));
5139
5140 /*
5141 * If setuid or setgid bits are set and this process is
5142 * not the superuser then clear the setuid and setgid bits
5143 * as a precaution against tampering.
5144 */
5145 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
5146 (vfs_context_suser(ap->a_context) != 0)) {
5147 cp->c_mode &= ~(S_ISUID | S_ISGID);
5148 }
5149 }
5150
5151 hfs_unlock(cp);
5152 }
5153
5154 pageout_done:
5155 if (is_pageoutv2) {
5156 /*
5157 * Release the truncate lock. Note that because
5158 * we may have taken the lock recursively by
5159 * being invoked via ubc_msync due to lockdown,
5160 * we should release it recursively, too.
5161 */
5162 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
5163 }
5164 return (retval);
5165 }
5166
5167 /*
5168 * Intercept B-Tree node writes to unswap them if necessary.
5169 */
5170 int
5171 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
5172 {
5173 int retval = 0;
5174 register struct buf *bp = ap->a_bp;
5175 register struct vnode *vp = buf_vnode(bp);
5176 BlockDescriptor block;
5177
5178 /* Trap B-Tree writes */
5179 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
5180 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
5181 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
5182 (vp == VTOHFS(vp)->hfc_filevp)) {
5183
5184 /*
5185 * Swap and validate the node if it is in native byte order.
5186 * This is always be true on big endian, so we always validate
5187 * before writing here. On little endian, the node typically has
5188 * been swapped and validated when it was written to the journal,
5189 * so we won't do anything here.
5190 */
5191 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
5192 /* Prepare the block pointer */
5193 block.blockHeader = bp;
5194 block.buffer = (char *)buf_dataptr(bp);
5195 block.blockNum = buf_lblkno(bp);
5196 /* not found in cache ==> came from disk */
5197 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
5198 block.blockSize = buf_count(bp);
5199
5200 /* Endian un-swap B-Tree node */
5201 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
5202 if (retval)
5203 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5204 }
5205 }
5206
5207 /* This buffer shouldn't be locked anymore but if it is clear it */
5208 if ((buf_flags(bp) & B_LOCKED)) {
5209 // XXXdbg
5210 if (VTOHFS(vp)->jnl) {
5211 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
5212 }
5213 buf_clearflags(bp, B_LOCKED);
5214 }
5215 retval = vn_bwrite (ap);
5216
5217 return (retval);
5218 }
5219
5220
5221 int
5222 hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks)
5223 {
5224 _dk_cs_pin_t pin;
5225 unsigned ioc;
5226 int err;
5227
5228 memset(&pin, 0, sizeof(pin));
5229 pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize;
5230 pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize;
5231 switch (pin_state) {
5232 case HFS_PIN_IT:
5233 ioc = _DKIOCCSPINEXTENT;
5234 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA;
5235 break;
5236 case HFS_PIN_IT | HFS_TEMP_PIN:
5237 ioc = _DKIOCCSPINEXTENT;
5238 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN;
5239 break;
5240 case HFS_PIN_IT | HFS_DATALESS_PIN:
5241 ioc = _DKIOCCSPINEXTENT;
5242 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE;
5243 break;
5244 case HFS_UNPIN_IT:
5245 ioc = _DKIOCCSUNPINEXTENT;
5246 pin.cp_flags = 0;
5247 break;
5248 case HFS_UNPIN_IT | HFS_EVICT_PIN:
5249 ioc = _DKIOCCSPINEXTENT;
5250 pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA;
5251 break;
5252 default:
5253 return EINVAL;
5254 }
5255 err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, vfs_context_kernel());
5256 return err;
5257 }
5258
5259 //
5260 // The cnode lock should already be held on entry to this function
5261 //
5262 int
5263 hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned)
5264 {
5265 struct filefork *fp = VTOF(vp);
5266 int i, err=0, need_put=0;
5267 struct vnode *rsrc_vp=NULL;
5268 uint32_t npinned = 0;
5269 off_t offset;
5270
5271 if (num_blocks_pinned) {
5272 *num_blocks_pinned = 0;
5273 }
5274
5275 if (vnode_vtype(vp) != VREG) {
5276 /* Not allowed to pin directories or symlinks */
5277 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp));
5278 return (EPERM);
5279 }
5280
5281 if (fp->ff_unallocblocks) {
5282 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks);
5283 return (EINVAL);
5284 }
5285
5286 /*
5287 * It is possible that if the caller unlocked/re-locked the cnode after checking
5288 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5289 * cnode was unlocked. So check the condition again and return ENOENT so that
5290 * the caller knows why we failed to pin the vnode.
5291 */
5292 if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) {
5293 // makes no sense to pin something that's pending deletion
5294 return ENOENT;
5295 }
5296
5297 if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
5298 if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) {
5299 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5300 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5301
5302 fp = VTOC(rsrc_vp)->c_rsrcfork;
5303 need_put = 1;
5304 }
5305 }
5306 if (fp->ff_blocks == 0) {
5307 if (need_put) {
5308 //
5309 // use a distinct error code for a compressed file that has no resource fork;
5310 // we return EALREADY to indicate that the data is already probably hot file
5311 // cached because it's in an EA and the attributes btree is on the ssd
5312 //
5313 err = EALREADY;
5314 } else {
5315 err = EINVAL;
5316 }
5317 goto out;
5318 }
5319
5320 offset = 0;
5321 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5322 if (fp->ff_extents[i].startBlock == 0) {
5323 break;
5324 }
5325
5326 err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount);
5327 if (err) {
5328 break;
5329 } else {
5330 npinned += fp->ff_extents[i].blockCount;
5331 }
5332 }
5333
5334 if (err || npinned == 0) {
5335 goto out;
5336 }
5337
5338 if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) {
5339 uint32_t pblocks;
5340 uint8_t forktype = 0;
5341
5342 if (fp == VTOC(vp)->c_rsrcfork) {
5343 forktype = 0xff;
5344 }
5345 /*
5346 * The file could have overflow extents, better pin them.
5347 *
5348 * We assume that since we are holding the cnode lock for this cnode,
5349 * the files extents cannot be manipulated, but the tree could, so we
5350 * need to ensure that it doesn't change behind our back as we iterate it.
5351 */
5352 int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
5353 err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks);
5354 hfs_systemfile_unlock (hfsmp, lockflags);
5355
5356 if (err) {
5357 goto out;
5358 }
5359 npinned += pblocks;
5360 }
5361
5362 out:
5363 if (num_blocks_pinned) {
5364 *num_blocks_pinned = npinned;
5365 }
5366
5367 if (need_put && rsrc_vp) {
5368 //
5369 // have to unlock the cnode since it's shared between the
5370 // resource fork vnode and the data fork vnode (and the
5371 // vnode_put() may need to re-acquire the cnode lock to
5372 // reclaim the resource fork vnode)
5373 //
5374 hfs_unlock(VTOC(vp));
5375 vnode_put(rsrc_vp);
5376 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5377 }
5378 return err;
5379 }
5380
5381
5382 /*
5383 * Relocate a file to a new location on disk
5384 * cnode must be locked on entry
5385 *
5386 * Relocation occurs by cloning the file's data from its
5387 * current set of blocks to a new set of blocks. During
5388 * the relocation all of the blocks (old and new) are
5389 * owned by the file.
5390 *
5391 * -----------------
5392 * |///////////////|
5393 * -----------------
5394 * 0 N (file offset)
5395 *
5396 * ----------------- -----------------
5397 * |///////////////| | | STEP 1 (acquire new blocks)
5398 * ----------------- -----------------
5399 * 0 N N+1 2N
5400 *
5401 * ----------------- -----------------
5402 * |///////////////| |///////////////| STEP 2 (clone data)
5403 * ----------------- -----------------
5404 * 0 N N+1 2N
5405 *
5406 * -----------------
5407 * |///////////////| STEP 3 (head truncate blocks)
5408 * -----------------
5409 * 0 N
5410 *
5411 * During steps 2 and 3 page-outs to file offsets less
5412 * than or equal to N are suspended.
5413 *
5414 * During step 3 page-ins to the file get suspended.
5415 */
5416 int
5417 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
5418 struct proc *p)
5419 {
5420 struct cnode *cp;
5421 struct filefork *fp;
5422 struct hfsmount *hfsmp;
5423 u_int32_t headblks;
5424 u_int32_t datablks;
5425 u_int32_t blksize;
5426 u_int32_t growsize;
5427 u_int32_t nextallocsave;
5428 daddr64_t sector_a, sector_b;
5429 int eflags = 0;
5430 off_t newbytes;
5431 int retval;
5432 int lockflags = 0;
5433 int took_trunc_lock = 0;
5434 int started_tr = 0;
5435 enum vtype vnodetype;
5436
5437 vnodetype = vnode_vtype(vp);
5438 if (vnodetype != VREG) {
5439 /* Not allowed to move symlinks. */
5440 return (EPERM);
5441 }
5442
5443 hfsmp = VTOHFS(vp);
5444 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
5445 return (ENOSPC);
5446 }
5447
5448 cp = VTOC(vp);
5449 fp = VTOF(vp);
5450 if (fp->ff_unallocblocks)
5451 return (EINVAL);
5452
5453 #if CONFIG_PROTECT
5454 /*
5455 * <rdar://problem/9118426>
5456 * Disable HFS file relocation on content-protected filesystems
5457 */
5458 if (cp_fs_protected (hfsmp->hfs_mp)) {
5459 return EINVAL;
5460 }
5461 #endif
5462 /* If it's an SSD, also disable HFS relocation */
5463 if (hfsmp->hfs_flags & HFS_SSD) {
5464 return EINVAL;
5465 }
5466
5467
5468 blksize = hfsmp->blockSize;
5469 if (blockHint == 0)
5470 blockHint = hfsmp->nextAllocation;
5471
5472 if (fp->ff_size > 0x7fffffff) {
5473 return (EFBIG);
5474 }
5475
5476 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
5477 hfs_unlock(cp);
5478 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
5479 /* Force lock since callers expects lock to be held. */
5480 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
5481 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5482 return (retval);
5483 }
5484 /* No need to continue if file was removed. */
5485 if (cp->c_flag & C_NOEXISTS) {
5486 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5487 return (ENOENT);
5488 }
5489 took_trunc_lock = 1;
5490 }
5491 headblks = fp->ff_blocks;
5492 datablks = howmany(fp->ff_size, blksize);
5493 growsize = datablks * blksize;
5494 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
5495 if (blockHint >= hfsmp->hfs_metazone_start &&
5496 blockHint <= hfsmp->hfs_metazone_end)
5497 eflags |= kEFMetadataMask;
5498
5499 if (hfs_start_transaction(hfsmp) != 0) {
5500 if (took_trunc_lock)
5501 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5502 return (EINVAL);
5503 }
5504 started_tr = 1;
5505 /*
5506 * Protect the extents b-tree and the allocation bitmap
5507 * during MapFileBlockC and ExtendFileC operations.
5508 */
5509 lockflags = SFL_BITMAP;
5510 if (overflow_extents(fp))
5511 lockflags |= SFL_EXTENTS;
5512 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5513
5514 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
5515 if (retval) {
5516 retval = MacToVFSError(retval);
5517 goto out;
5518 }
5519
5520 /*
5521 * STEP 1 - acquire new allocation blocks.
5522 */
5523 nextallocsave = hfsmp->nextAllocation;
5524 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
5525 if (eflags & kEFMetadataMask) {
5526 hfs_lock_mount(hfsmp);
5527 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
5528 MarkVCBDirty(hfsmp);
5529 hfs_unlock_mount(hfsmp);
5530 }
5531
5532 retval = MacToVFSError(retval);
5533 if (retval == 0) {
5534 cp->c_flag |= C_MODIFIED;
5535 if (newbytes < growsize) {
5536 retval = ENOSPC;
5537 goto restore;
5538 } else if (fp->ff_blocks < (headblks + datablks)) {
5539 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
5540 retval = ENOSPC;
5541 goto restore;
5542 }
5543
5544 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
5545 if (retval) {
5546 retval = MacToVFSError(retval);
5547 } else if ((sector_a + 1) == sector_b) {
5548 retval = ENOSPC;
5549 goto restore;
5550 } else if ((eflags & kEFMetadataMask) &&
5551 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
5552 hfsmp->hfs_metazone_end)) {
5553 #if 0
5554 const char * filestr;
5555 char emptystr = '\0';
5556
5557 if (cp->c_desc.cd_nameptr != NULL) {
5558 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
5559 } else if (vnode_name(vp) != NULL) {
5560 filestr = vnode_name(vp);
5561 } else {
5562 filestr = &emptystr;
5563 }
5564 #endif
5565 retval = ENOSPC;
5566 goto restore;
5567 }
5568 }
5569 /* Done with system locks and journal for now. */
5570 hfs_systemfile_unlock(hfsmp, lockflags);
5571 lockflags = 0;
5572 hfs_end_transaction(hfsmp);
5573 started_tr = 0;
5574
5575 if (retval) {
5576 /*
5577 * Check to see if failure is due to excessive fragmentation.
5578 */
5579 if ((retval == ENOSPC) &&
5580 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
5581 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
5582 }
5583 goto out;
5584 }
5585 /*
5586 * STEP 2 - clone file data into the new allocation blocks.
5587 */
5588
5589 if (vnodetype == VLNK)
5590 retval = EPERM;
5591 else if (vnode_issystem(vp))
5592 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
5593 else
5594 retval = hfs_clonefile(vp, headblks, datablks, blksize);
5595
5596 /* Start transaction for step 3 or for a restore. */
5597 if (hfs_start_transaction(hfsmp) != 0) {
5598 retval = EINVAL;
5599 goto out;
5600 }
5601 started_tr = 1;
5602 if (retval)
5603 goto restore;
5604
5605 /*
5606 * STEP 3 - switch to cloned data and remove old blocks.
5607 */
5608 lockflags = SFL_BITMAP;
5609 if (overflow_extents(fp))
5610 lockflags |= SFL_EXTENTS;
5611 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5612
5613 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
5614
5615 hfs_systemfile_unlock(hfsmp, lockflags);
5616 lockflags = 0;
5617 if (retval)
5618 goto restore;
5619 out:
5620 if (took_trunc_lock)
5621 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5622
5623 if (lockflags) {
5624 hfs_systemfile_unlock(hfsmp, lockflags);
5625 lockflags = 0;
5626 }
5627
5628 /* Push cnode's new extent data to disk. */
5629 if (retval == 0) {
5630 hfs_update(vp, 0);
5631 }
5632 if (hfsmp->jnl) {
5633 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
5634 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
5635 else
5636 (void) hfs_flushvolumeheader(hfsmp, 0);
5637 }
5638 exit:
5639 if (started_tr)
5640 hfs_end_transaction(hfsmp);
5641
5642 return (retval);
5643
5644 restore:
5645 if (fp->ff_blocks == headblks) {
5646 if (took_trunc_lock)
5647 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5648 goto exit;
5649 }
5650 /*
5651 * Give back any newly allocated space.
5652 */
5653 if (lockflags == 0) {
5654 lockflags = SFL_BITMAP;
5655 if (overflow_extents(fp))
5656 lockflags |= SFL_EXTENTS;
5657 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5658 }
5659
5660 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
5661 FTOC(fp)->c_fileid, false);
5662
5663 hfs_systemfile_unlock(hfsmp, lockflags);
5664 lockflags = 0;
5665
5666 if (took_trunc_lock)
5667 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5668 goto exit;
5669 }
5670
5671
5672 /*
5673 * Clone a file's data within the file.
5674 *
5675 */
5676 static int
5677 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
5678 {
5679 caddr_t bufp;
5680 size_t bufsize;
5681 size_t copysize;
5682 size_t iosize;
5683 size_t offset;
5684 off_t writebase;
5685 uio_t auio;
5686 int error = 0;
5687
5688 writebase = blkstart * blksize;
5689 copysize = blkcnt * blksize;
5690 iosize = bufsize = MIN(copysize, 128 * 1024);
5691 offset = 0;
5692
5693 hfs_unlock(VTOC(vp));
5694
5695 #if CONFIG_PROTECT
5696 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
5697 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5698 return (error);
5699 }
5700 #endif /* CONFIG_PROTECT */
5701
5702 bufp = hfs_malloc(bufsize);
5703
5704 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
5705
5706 while (offset < copysize) {
5707 iosize = MIN(copysize - offset, iosize);
5708
5709 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
5710 uio_addiov(auio, (uintptr_t)bufp, iosize);
5711
5712 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
5713 if (error) {
5714 printf("hfs_clonefile: cluster_read failed - %d\n", error);
5715 break;
5716 }
5717 if (uio_resid(auio) != 0) {
5718 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
5719 error = EIO;
5720 break;
5721 }
5722
5723 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
5724 uio_addiov(auio, (uintptr_t)bufp, iosize);
5725
5726 error = cluster_write(vp, auio, writebase + offset,
5727 writebase + offset + iosize,
5728 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
5729 if (error) {
5730 printf("hfs_clonefile: cluster_write failed - %d\n", error);
5731 break;
5732 }
5733 if (uio_resid(auio) != 0) {
5734 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5735 error = EIO;
5736 break;
5737 }
5738 offset += iosize;
5739 }
5740 uio_free(auio);
5741
5742 if ((blksize & PAGE_MASK)) {
5743 /*
5744 * since the copy may not have started on a PAGE
5745 * boundary (or may not have ended on one), we
5746 * may have pages left in the cache since NOCACHE
5747 * will let partially written pages linger...
5748 * lets just flush the entire range to make sure
5749 * we don't have any pages left that are beyond
5750 * (or intersect) the real LEOF of this file
5751 */
5752 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
5753 } else {
5754 /*
5755 * No need to call ubc_msync or hfs_invalbuf
5756 * since the file was copied using IO_NOCACHE and
5757 * the copy was done starting and ending on a page
5758 * boundary in the file.
5759 */
5760 }
5761 hfs_free(bufp, bufsize);
5762
5763 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5764 return (error);
5765 }
5766
5767 /*
5768 * Clone a system (metadata) file.
5769 *
5770 */
5771 static int
5772 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
5773 kauth_cred_t cred, struct proc *p)
5774 {
5775 caddr_t bufp;
5776 char * offset;
5777 size_t bufsize;
5778 size_t iosize;
5779 struct buf *bp = NULL;
5780 daddr64_t blkno;
5781 daddr64_t blk;
5782 daddr64_t start_blk;
5783 daddr64_t last_blk;
5784 int breadcnt;
5785 int i;
5786 int error = 0;
5787
5788
5789 iosize = GetLogicalBlockSize(vp);
5790 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5791 breadcnt = bufsize / iosize;
5792
5793 bufp = hfs_malloc(bufsize);
5794
5795 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5796 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
5797 blkno = 0;
5798
5799 while (blkno < last_blk) {
5800 /*
5801 * Read up to a megabyte
5802 */
5803 offset = bufp;
5804 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5805 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
5806 if (error) {
5807 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5808 goto out;
5809 }
5810 if (buf_count(bp) != iosize) {
5811 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
5812 goto out;
5813 }
5814 bcopy((char *)buf_dataptr(bp), offset, iosize);
5815
5816 buf_markinvalid(bp);
5817 buf_brelse(bp);
5818 bp = NULL;
5819
5820 offset += iosize;
5821 }
5822
5823 /*
5824 * Write up to a megabyte
5825 */
5826 offset = bufp;
5827 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5828 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
5829 if (bp == NULL) {
5830 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
5831 error = EIO;
5832 goto out;
5833 }
5834 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5835 error = (int)buf_bwrite(bp);
5836 bp = NULL;
5837 if (error)
5838 goto out;
5839 offset += iosize;
5840 }
5841 }
5842 out:
5843 if (bp) {
5844 buf_brelse(bp);
5845 }
5846
5847 hfs_free(bufp, bufsize);
5848
5849 error = hfs_fsync(vp, MNT_WAIT, 0, p);
5850
5851 return (error);
5852 }
5853
5854 errno_t hfs_flush_invalid_ranges(vnode_t vp)
5855 {
5856 cnode_t *cp = VTOC(vp);
5857
5858 hfs_assert(cp->c_lockowner == current_thread());
5859 hfs_assert(cp->c_truncatelockowner == current_thread());
5860
5861 if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout)
5862 return 0;
5863
5864 filefork_t *fp = VTOF(vp);
5865
5866 /*
5867 * We can't hold the cnode lock whilst we call cluster_write so we
5868 * need to copy the extents into a local buffer.
5869 */
5870 int max_exts = 16;
5871 struct ext {
5872 off_t start, end;
5873 } exts_buf[max_exts]; // 256 bytes
5874 struct ext *exts = exts_buf;
5875 int ext_count = 0;
5876 errno_t ret;
5877
5878 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
5879
5880 while (r) {
5881 /* If we have more than can fit in our stack buffer, switch
5882 to a heap buffer. */
5883 if (exts == exts_buf && ext_count == max_exts) {
5884 max_exts = 256;
5885 exts = hfs_malloc(sizeof(struct ext) * max_exts);
5886 memcpy(exts, exts_buf, ext_count * sizeof(struct ext));
5887 }
5888
5889 struct rl_entry *next = TAILQ_NEXT(r, rl_link);
5890
5891 exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end };
5892
5893 if (!next || (ext_count == max_exts && exts != exts_buf)) {
5894 hfs_unlock(cp);
5895 for (int i = 0; i < ext_count; ++i) {
5896 ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1,
5897 exts[i].start, 0,
5898 IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
5899 if (ret) {
5900 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5901 goto exit;
5902 }
5903 }
5904
5905 if (!next) {
5906 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5907 break;
5908 }
5909
5910 /* Push any existing clusters which should clean up our invalid
5911 ranges as they go through hfs_vnop_blockmap. */
5912 cluster_push(vp, 0);
5913
5914 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5915
5916 /*
5917 * Get back to where we were (given we dropped the lock).
5918 * This shouldn't be many because we pushed above.
5919 */
5920 TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) {
5921 if (r->rl_end > exts[ext_count - 1].end)
5922 break;
5923 }
5924
5925 ext_count = 0;
5926 } else
5927 r = next;
5928 }
5929
5930 ret = 0;
5931
5932 exit:
5933
5934 if (exts != exts_buf)
5935 hfs_free(exts, sizeof(struct ext) * max_exts);
5936
5937 return ret;
5938 }