]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_readwrite.c
f3a0827940638fa22ac772ed065ef023ed5c1d28
[apple/hfs.git] / core / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/buf.h>
42 #include <sys/proc.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/vfs_context.h>
47 #include <sys/disk.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
50 #include <sys/ubc.h>
51 #include <sys/fsevents.h>
52
53 #include <libkern/OSDebug.h>
54
55 #include <miscfs/specfs/specdev.h>
56
57 #include <sys/ubc.h>
58
59 #include <vm/vm_pageout.h>
60 #include <vm/vm_kern.h>
61
62 #include <IOKit/IOBSD.h>
63
64 #include <sys/kdebug.h>
65
66 #include "hfs.h"
67 #include "hfs_attrlist.h"
68 #include "hfs_endian.h"
69 #include "hfs_fsctl.h"
70 #include "hfs_quota.h"
71 #include "FileMgrInternal.h"
72 #include "BTreesInternal.h"
73 #include "hfs_cnode.h"
74 #include "hfs_dbg.h"
75
76 #if HFS_CONFIG_KEY_ROLL
77 #include "hfs_key_roll.h"
78 #endif
79
80 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
81
82 enum {
83 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
84 };
85
86 /* from bsd/hfs/hfs_vfsops.c */
87 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
88
89 /* from hfs_hotfiles.c */
90 extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
91 uint8_t forktype, uint32_t *pinned);
92
93 static int hfs_clonefile(struct vnode *, int, int, int);
94 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
95 static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
96
97
98 /*
99 * Read data from a file.
100 */
101 int
102 hfs_vnop_read(struct vnop_read_args *ap)
103 {
104 /*
105 struct vnop_read_args {
106 struct vnodeop_desc *a_desc;
107 vnode_t a_vp;
108 struct uio *a_uio;
109 int a_ioflag;
110 vfs_context_t a_context;
111 };
112 */
113
114 uio_t uio = ap->a_uio;
115 struct vnode *vp = ap->a_vp;
116 struct cnode *cp;
117 struct filefork *fp;
118 struct hfsmount *hfsmp;
119 off_t filesize;
120 off_t filebytes;
121 off_t start_resid = uio_resid(uio);
122 off_t offset = uio_offset(uio);
123 int retval = 0;
124 int took_truncate_lock = 0;
125 int io_throttle = 0;
126 int throttled_count = 0;
127
128 /* Preflight checks */
129 if (!vnode_isreg(vp)) {
130 /* can only read regular files */
131 if (vnode_isdir(vp))
132 return (EISDIR);
133 else
134 return (EPERM);
135 }
136 if (start_resid == 0)
137 return (0); /* Nothing left to do */
138 if (offset < 0)
139 return (EINVAL); /* cant read from a negative offset */
140
141 #if SECURE_KERNEL
142 if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
143 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
144 /* Don't allow unencrypted io request from user space */
145 return EPERM;
146 }
147 #endif
148
149 #if HFS_COMPRESSION
150 if (VNODE_IS_RSRC(vp)) {
151 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
152 return 0;
153 }
154 /* otherwise read the resource fork normally */
155 } else {
156 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
157 if (compressed) {
158 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
159 if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
160 (void) hfs_addhotfile(vp);
161 }
162 if (compressed) {
163 if (retval == 0) {
164 /* successful read, update the access time */
165 VTOC(vp)->c_touch_acctime = TRUE;
166
167 //
168 // compressed files are not traditional hot file candidates
169 // but they may be for CF (which ignores the ff_bytesread
170 // field)
171 //
172 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
173 VTOF(vp)->ff_bytesread = 0;
174 }
175 }
176 return retval;
177 }
178 /* otherwise the file was converted back to a regular file while we were reading it */
179 retval = 0;
180 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
181 int error;
182
183 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
184 if (error) {
185 return error;
186 }
187
188 }
189 }
190 #endif /* HFS_COMPRESSION */
191
192 cp = VTOC(vp);
193 fp = VTOF(vp);
194 hfsmp = VTOHFS(vp);
195
196 #if CONFIG_PROTECT
197 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
198 goto exit;
199 }
200
201 #if HFS_CONFIG_KEY_ROLL
202 if (ISSET(ap->a_ioflag, IO_ENCRYPTED)) {
203 off_rsrc_t off_rsrc = off_rsrc_make(offset + start_resid,
204 VNODE_IS_RSRC(vp));
205
206 retval = hfs_key_roll_up_to(ap->a_context, vp, off_rsrc);
207 if (retval)
208 goto exit;
209 }
210 #endif // HFS_CONFIG_KEY_ROLL
211 #endif // CONFIG_PROTECT
212
213 /*
214 * If this read request originated from a syscall (as opposed to
215 * an in-kernel page fault or something), then set it up for
216 * throttle checks
217 */
218 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
219 io_throttle = IO_RETURN_ON_THROTTLE;
220 }
221
222 read_again:
223
224 /* Protect against a size change. */
225 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
226 took_truncate_lock = 1;
227
228 filesize = fp->ff_size;
229 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
230
231 /*
232 * Check the file size. Note that per POSIX spec, we return 0 at
233 * file EOF, so attempting a read at an offset that is too big
234 * should just return 0 on HFS+. Since the return value was initialized
235 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
236 */
237 if (offset > filesize) {
238 #if CONFIG_HFS_STD
239 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
240 (offset > (off_t)MAXHFSFILESIZE)) {
241 retval = EFBIG;
242 }
243 #endif
244 goto exit;
245 }
246
247 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
248 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
249
250 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
251
252 cp->c_touch_acctime = TRUE;
253
254 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
255 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
256
257 /*
258 * Keep track blocks read
259 */
260 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
261 int took_cnode_lock = 0;
262 off_t bytesread;
263
264 bytesread = start_resid - uio_resid(uio);
265
266 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
267 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
268 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
269 took_cnode_lock = 1;
270 }
271 /*
272 * If this file hasn't been seen since the start of
273 * the current sampling period then start over.
274 */
275 if (cp->c_atime < hfsmp->hfc_timebase) {
276 struct timeval tv;
277
278 fp->ff_bytesread = bytesread;
279 microtime(&tv);
280 cp->c_atime = tv.tv_sec;
281 } else {
282 fp->ff_bytesread += bytesread;
283 }
284
285 if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
286 //
287 // We don't add hotfiles for processes doing IO_EVTONLY I/O
288 // on the assumption that they're system processes such as
289 // mdworker which scan everything in the system (and thus
290 // do not represent user-initiated access to files)
291 //
292 (void) hfs_addhotfile(vp);
293 }
294 if (took_cnode_lock)
295 hfs_unlock(cp);
296 }
297 exit:
298 if (took_truncate_lock) {
299 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
300 }
301 if (retval == EAGAIN) {
302 throttle_lowpri_io(1);
303 throttled_count++;
304
305 retval = 0;
306 goto read_again;
307 }
308 if (throttled_count)
309 throttle_info_reset_window(NULL);
310 return (retval);
311 }
312
313 /*
314 * Ideally, this wouldn't be necessary; the cluster code should be
315 * able to handle this on the read-side. See <rdar://20420068>.
316 */
317 static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to)
318 {
319 hfs_assert(VTOC(vp)->c_lockowner != current_thread());
320 hfs_assert(VTOC(vp)->c_truncatelockowner == current_thread());
321
322 struct filefork *fp = VTOF(vp);
323
324 if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) {
325 // Nothing to do
326 return 0;
327 }
328
329 zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size));
330
331 /* N.B. At present, @zero_up_to is not important because the cluster
332 code will always zero up to the end of the page anyway. */
333 return cluster_write(vp, NULL, fp->ff_size, zero_up_to,
334 fp->ff_size, 0, IO_HEADZEROFILL);
335 }
336
337 /*
338 * Write data to a file.
339 */
340 int
341 hfs_vnop_write(struct vnop_write_args *ap)
342 {
343 uio_t uio = ap->a_uio;
344 struct vnode *vp = ap->a_vp;
345 struct cnode *cp;
346 struct filefork *fp;
347 struct hfsmount *hfsmp;
348 kauth_cred_t cred = NULL;
349 off_t origFileSize;
350 off_t writelimit;
351 off_t bytesToAdd = 0;
352 off_t actualBytesAdded;
353 off_t filebytes;
354 off_t offset;
355 ssize_t resid;
356 int eflags;
357 int ioflag = ap->a_ioflag;
358 int retval = 0;
359 int lockflags;
360 int cnode_locked = 0;
361 int partialwrite = 0;
362 int do_snapshot = 1;
363 time_t orig_ctime=VTOC(vp)->c_ctime;
364 int took_truncate_lock = 0;
365 int io_return_on_throttle = 0;
366 int throttled_count = 0;
367
368 #if HFS_COMPRESSION
369 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
370 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
371 switch(state) {
372 case FILE_IS_COMPRESSED:
373 return EACCES;
374 case FILE_IS_CONVERTING:
375 /* if FILE_IS_CONVERTING, we allow writes but do not
376 bother with snapshots or else we will deadlock.
377 */
378 do_snapshot = 0;
379 break;
380 default:
381 printf("invalid state %d for compressed file\n", state);
382 /* fall through */
383 }
384 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
385 int error;
386
387 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
388 if (error != 0) {
389 return error;
390 }
391 }
392
393 if (do_snapshot) {
394 nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
395 }
396
397 #endif
398
399 #if SECURE_KERNEL
400 if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
401 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
402 /* Don't allow unencrypted io request from user space */
403 return EPERM;
404 }
405 #endif
406
407 resid = uio_resid(uio);
408 offset = uio_offset(uio);
409
410 if (offset < 0)
411 return (EINVAL);
412 if (resid == 0)
413 return (E_NONE);
414 if (!vnode_isreg(vp))
415 return (EPERM); /* Can only write regular files */
416
417 cp = VTOC(vp);
418 fp = VTOF(vp);
419 hfsmp = VTOHFS(vp);
420
421 #if CONFIG_PROTECT
422 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
423 goto exit;
424 }
425 #endif
426
427 eflags = kEFDeferMask; /* defer file block allocations */
428 #if HFS_SPARSE_DEV
429 /*
430 * When the underlying device is sparse and space
431 * is low (< 8MB), stop doing delayed allocations
432 * and begin doing synchronous I/O.
433 */
434 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
435 (hfs_freeblks(hfsmp, 0) < 2048)) {
436 eflags &= ~kEFDeferMask;
437 ioflag |= IO_SYNC;
438 }
439 #endif /* HFS_SPARSE_DEV */
440
441 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
442 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
443 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
444 }
445
446 again:
447 /*
448 * Protect against a size change.
449 *
450 * Note: If took_truncate_lock is true, then we previously got the lock shared
451 * but needed to upgrade to exclusive. So try getting it exclusive from the
452 * start.
453 */
454 if (ioflag & IO_APPEND || took_truncate_lock) {
455 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
456 }
457 else {
458 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
459 }
460 took_truncate_lock = 1;
461
462 /* Update UIO */
463 if (ioflag & IO_APPEND) {
464 uio_setoffset(uio, fp->ff_size);
465 offset = fp->ff_size;
466 }
467 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
468 retval = EPERM;
469 goto exit;
470 }
471
472 cred = vfs_context_ucred(ap->a_context);
473 if (cred && suser(cred, NULL) != 0)
474 eflags |= kEFReserveMask;
475
476 origFileSize = fp->ff_size;
477 writelimit = offset + resid;
478
479 /*
480 * We may need an exclusive truncate lock for several reasons, all
481 * of which are because we may be writing to a (portion of a) block
482 * for the first time, and we need to make sure no readers see the
483 * prior, uninitialized contents of the block. The cases are:
484 *
485 * 1. We have unallocated (delayed allocation) blocks. We may be
486 * allocating new blocks to the file and writing to them.
487 * (A more precise check would be whether the range we're writing
488 * to contains delayed allocation blocks.)
489 * 2. We need to extend the file. The bytes between the old EOF
490 * and the new EOF are not yet initialized. This is important
491 * even if we're not allocating new blocks to the file. If the
492 * old EOF and new EOF are in the same block, we still need to
493 * protect that range of bytes until they are written for the
494 * first time.
495 *
496 * If we had a shared lock with the above cases, we need to try to upgrade
497 * to an exclusive lock. If the upgrade fails, we will lose the shared
498 * lock, and will need to take the truncate lock again; the took_truncate_lock
499 * flag will still be set, causing us to try for an exclusive lock next time.
500 */
501 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
502 ((fp->ff_unallocblocks != 0) ||
503 (writelimit > origFileSize))) {
504 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
505 /*
506 * Lock upgrade failed and we lost our shared lock, try again.
507 * Note: we do not set took_truncate_lock=0 here. Leaving it
508 * set to 1 will cause us to try to get the lock exclusive.
509 */
510 goto again;
511 }
512 else {
513 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
514 cp->c_truncatelockowner = current_thread();
515 }
516 }
517
518 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
519 goto exit;
520 }
521 cnode_locked = 1;
522
523 filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize);
524
525 if (offset > filebytes
526 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)),
527 hfsmp->blockSize) < offset - filebytes)) {
528 retval = ENOSPC;
529 goto exit;
530 }
531
532 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
533 (int)offset, uio_resid(uio), (int)fp->ff_size,
534 (int)filebytes, 0);
535
536 /* Check if we do not need to extend the file */
537 if (writelimit <= filebytes) {
538 goto sizeok;
539 }
540
541 bytesToAdd = writelimit - filebytes;
542
543 #if QUOTA
544 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
545 cred, 0);
546 if (retval)
547 goto exit;
548 #endif /* QUOTA */
549
550 if (hfs_start_transaction(hfsmp) != 0) {
551 retval = EINVAL;
552 goto exit;
553 }
554
555 while (writelimit > filebytes) {
556 bytesToAdd = writelimit - filebytes;
557
558 /* Protect extents b-tree and allocation bitmap */
559 lockflags = SFL_BITMAP;
560 if (overflow_extents(fp))
561 lockflags |= SFL_EXTENTS;
562 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
563
564 /* Files that are changing size are not hot file candidates. */
565 if (hfsmp->hfc_stage == HFC_RECORDING) {
566 fp->ff_bytesread = 0;
567 }
568 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
569 0, eflags, &actualBytesAdded));
570
571 hfs_systemfile_unlock(hfsmp, lockflags);
572
573 if ((actualBytesAdded == 0) && (retval == E_NONE))
574 retval = ENOSPC;
575 if (retval != E_NONE)
576 break;
577 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
578 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
579 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
580 }
581 (void) hfs_update(vp, 0);
582 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
583 (void) hfs_end_transaction(hfsmp);
584
585 /*
586 * If we didn't grow the file enough try a partial write.
587 * POSIX expects this behavior.
588 */
589 if ((retval == ENOSPC) && (filebytes > offset)) {
590 retval = 0;
591 partialwrite = 1;
592 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
593 resid -= bytesToAdd;
594 writelimit = filebytes;
595 }
596 sizeok:
597 if (retval == E_NONE) {
598 off_t filesize;
599 off_t head_off;
600 int lflag;
601
602 if (writelimit > fp->ff_size) {
603 filesize = writelimit;
604 struct timeval tv;
605 rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges);
606 microuptime(&tv);
607 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
608 } else
609 filesize = fp->ff_size;
610
611 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
612
613 /*
614 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
615 * for one case below). For the regions that lie before the
616 * beginning and after the end of this write that are in the
617 * same page, we let the cluster code handle zeroing that out
618 * if necessary. If those areas are not cached, the cluster
619 * code will try and read those areas in, and in the case
620 * where those regions have never been written to,
621 * hfs_vnop_blockmap will consult the invalid ranges and then
622 * indicate that. The cluster code will zero out those areas.
623 */
624
625 head_off = trunc_page_64(offset);
626
627 if (head_off < offset && head_off >= fp->ff_size) {
628 /*
629 * The first page is beyond current EOF, so as an
630 * optimisation, we can pass IO_HEADZEROFILL.
631 */
632 lflag |= IO_HEADZEROFILL;
633 }
634
635 hfs_unlock(cp);
636 cnode_locked = 0;
637
638 /*
639 * We need to tell UBC the fork's new size BEFORE calling
640 * cluster_write, in case any of the new pages need to be
641 * paged out before cluster_write completes (which does happen
642 * in embedded systems due to extreme memory pressure).
643 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
644 * will be, so that it can pass that on to cluster_pageout, and
645 * allow those pageouts.
646 *
647 * We don't update ff_size yet since we don't want pageins to
648 * be able to see uninitialized data between the old and new
649 * EOF, until cluster_write has completed and initialized that
650 * part of the file.
651 *
652 * The vnode pager relies on the file size last given to UBC via
653 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
654 * ff_size (whichever is larger). NOTE: ff_new_size is always
655 * zero, unless we are extending the file via write.
656 */
657 if (filesize > fp->ff_size) {
658 retval = hfs_zero_eof_page(vp, offset);
659 if (retval)
660 goto exit;
661 fp->ff_new_size = filesize;
662 ubc_setsize(vp, filesize);
663 }
664 retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off,
665 0, lflag | IO_NOZERODIRTY | io_return_on_throttle);
666 if (retval) {
667 fp->ff_new_size = 0; /* no longer extending; use ff_size */
668
669 if (retval == EAGAIN) {
670 /*
671 * EAGAIN indicates that we still have I/O to do, but
672 * that we now need to be throttled
673 */
674 if (resid != uio_resid(uio)) {
675 /*
676 * did manage to do some I/O before returning EAGAIN
677 */
678 resid = uio_resid(uio);
679 offset = uio_offset(uio);
680
681 cp->c_touch_chgtime = TRUE;
682 cp->c_touch_modtime = TRUE;
683 hfs_incr_gencount(cp);
684 }
685 if (filesize > fp->ff_size) {
686 /*
687 * we called ubc_setsize before the call to
688 * cluster_write... since we only partially
689 * completed the I/O, we need to
690 * re-adjust our idea of the filesize based
691 * on our interim EOF
692 */
693 ubc_setsize(vp, offset);
694
695 fp->ff_size = offset;
696 }
697 goto exit;
698 }
699 if (filesize > origFileSize) {
700 ubc_setsize(vp, origFileSize);
701 }
702 goto ioerr_exit;
703 }
704
705 if (filesize > origFileSize) {
706 fp->ff_size = filesize;
707
708 /* Files that are changing size are not hot file candidates. */
709 if (hfsmp->hfc_stage == HFC_RECORDING) {
710 fp->ff_bytesread = 0;
711 }
712 }
713 fp->ff_new_size = 0; /* ff_size now has the correct size */
714 }
715 if (partialwrite) {
716 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
717 resid += bytesToAdd;
718 }
719
720 if (vnode_should_flush_after_write(vp, ioflag))
721 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
722
723 ioerr_exit:
724 if (!cnode_locked) {
725 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
726 cnode_locked = 1;
727 }
728
729 if (resid > uio_resid(uio)) {
730 cp->c_touch_chgtime = TRUE;
731 cp->c_touch_modtime = TRUE;
732 hfs_incr_gencount(cp);
733
734 /*
735 * If we successfully wrote any data, and we are not the superuser
736 * we clear the setuid and setgid bits as a precaution against
737 * tampering.
738 */
739 if (cp->c_mode & (S_ISUID | S_ISGID)) {
740 cred = vfs_context_ucred(ap->a_context);
741 if (cred && suser(cred, NULL)) {
742 cp->c_mode &= ~(S_ISUID | S_ISGID);
743 }
744 }
745 }
746 if (retval) {
747 if (ioflag & IO_UNIT) {
748 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
749 0, ap->a_context);
750 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
751 uio_setresid(uio, resid);
752 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
753 }
754 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
755 retval = hfs_update(vp, 0);
756
757 /* Updating vcbWrCnt doesn't need to be atomic. */
758 hfsmp->vcbWrCnt++;
759
760 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
761 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
762 exit:
763 if (retval && took_truncate_lock
764 && cp->c_truncatelockowner == current_thread()) {
765 fp->ff_new_size = 0;
766 rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges);
767 }
768
769 if (cnode_locked)
770 hfs_unlock(cp);
771
772 if (took_truncate_lock) {
773 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
774 }
775 if (retval == EAGAIN) {
776 throttle_lowpri_io(1);
777 throttled_count++;
778
779 retval = 0;
780 goto again;
781 }
782 if (throttled_count)
783 throttle_info_reset_window(NULL);
784 return (retval);
785 }
786
787 /* support for the "bulk-access" fcntl */
788
789 #define CACHE_LEVELS 16
790 #define NUM_CACHE_ENTRIES (64*16)
791 #define PARENT_IDS_FLAG 0x100
792
793 struct access_cache {
794 int numcached;
795 int cachehits; /* these two for statistics gathering */
796 int lookups;
797 unsigned int *acache;
798 unsigned char *haveaccess;
799 };
800
801 struct access_t {
802 uid_t uid; /* IN: effective user id */
803 short flags; /* IN: access requested (i.e. R_OK) */
804 short num_groups; /* IN: number of groups user belongs to */
805 int num_files; /* IN: number of files to process */
806 int *file_ids; /* IN: array of file ids */
807 gid_t *groups; /* IN: array of groups */
808 short *access; /* OUT: access info for each file (0 for 'has access') */
809 } __attribute__((unavailable)); // this structure is for reference purposes only
810
811 struct user32_access_t {
812 uid_t uid; /* IN: effective user id */
813 short flags; /* IN: access requested (i.e. R_OK) */
814 short num_groups; /* IN: number of groups user belongs to */
815 int num_files; /* IN: number of files to process */
816 user32_addr_t file_ids; /* IN: array of file ids */
817 user32_addr_t groups; /* IN: array of groups */
818 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
819 };
820
821 struct user64_access_t {
822 uid_t uid; /* IN: effective user id */
823 short flags; /* IN: access requested (i.e. R_OK) */
824 short num_groups; /* IN: number of groups user belongs to */
825 int num_files; /* IN: number of files to process */
826 user64_addr_t file_ids; /* IN: array of file ids */
827 user64_addr_t groups; /* IN: array of groups */
828 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
829 };
830
831
832 // these are the "extended" versions of the above structures
833 // note that it is crucial that they be different sized than
834 // the regular version
835 struct ext_access_t {
836 uint32_t flags; /* IN: access requested (i.e. R_OK) */
837 uint32_t num_files; /* IN: number of files to process */
838 uint32_t map_size; /* IN: size of the bit map */
839 uint32_t *file_ids; /* IN: Array of file ids */
840 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
841 short *access; /* OUT: access info for each file (0 for 'has access') */
842 uint32_t num_parents; /* future use */
843 cnid_t *parents; /* future use */
844 } __attribute__((unavailable)); // this structure is for reference purposes only
845
846 struct user32_ext_access_t {
847 uint32_t flags; /* IN: access requested (i.e. R_OK) */
848 uint32_t num_files; /* IN: number of files to process */
849 uint32_t map_size; /* IN: size of the bit map */
850 user32_addr_t file_ids; /* IN: Array of file ids */
851 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
852 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
853 uint32_t num_parents; /* future use */
854 user32_addr_t parents; /* future use */
855 };
856
857 struct user64_ext_access_t {
858 uint32_t flags; /* IN: access requested (i.e. R_OK) */
859 uint32_t num_files; /* IN: number of files to process */
860 uint32_t map_size; /* IN: size of the bit map */
861 user64_addr_t file_ids; /* IN: array of file ids */
862 user64_addr_t bitmap; /* IN: array of groups */
863 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
864 uint32_t num_parents;/* future use */
865 user64_addr_t parents;/* future use */
866 };
867
868
869 /*
870 * Perform a binary search for the given parent_id. Return value is
871 * the index if there is a match. If no_match_indexp is non-NULL it
872 * will be assigned with the index to insert the item (even if it was
873 * not found).
874 */
875 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
876 {
877 int index=-1;
878 unsigned int lo=0;
879
880 do {
881 unsigned int mid = ((hi - lo)/2) + lo;
882 unsigned int this_id = array[mid];
883
884 if (parent_id == this_id) {
885 hi = mid;
886 break;
887 }
888
889 if (parent_id < this_id) {
890 hi = mid;
891 continue;
892 }
893
894 if (parent_id > this_id) {
895 lo = mid + 1;
896 continue;
897 }
898 } while(lo < hi);
899
900 /* check if lo and hi converged on the match */
901 if (parent_id == array[hi]) {
902 index = hi;
903 }
904
905 if (no_match_indexp) {
906 *no_match_indexp = hi;
907 }
908
909 return index;
910 }
911
912
913 static int
914 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
915 {
916 unsigned int hi;
917 int matches = 0;
918 int index, no_match_index;
919
920 if (cache->numcached == 0) {
921 *indexp = 0;
922 return 0; // table is empty, so insert at index=0 and report no match
923 }
924
925 if (cache->numcached > NUM_CACHE_ENTRIES) {
926 cache->numcached = NUM_CACHE_ENTRIES;
927 }
928
929 hi = cache->numcached - 1;
930
931 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
932
933 /* if no existing entry found, find index for new one */
934 if (index == -1) {
935 index = no_match_index;
936 matches = 0;
937 } else {
938 matches = 1;
939 }
940
941 *indexp = index;
942 return matches;
943 }
944
945 /*
946 * Add a node to the access_cache at the given index (or do a lookup first
947 * to find the index if -1 is passed in). We currently do a replace rather
948 * than an insert if the cache is full.
949 */
950 static void
951 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
952 {
953 int lookup_index = -1;
954
955 /* need to do a lookup first if -1 passed for index */
956 if (index == -1) {
957 if (lookup_bucket(cache, &lookup_index, nodeID)) {
958 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
959 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
960 cache->haveaccess[lookup_index] = access;
961 }
962
963 /* mission accomplished */
964 return;
965 } else {
966 index = lookup_index;
967 }
968
969 }
970
971 /* if the cache is full, do a replace rather than an insert */
972 if (cache->numcached >= NUM_CACHE_ENTRIES) {
973 cache->numcached = NUM_CACHE_ENTRIES-1;
974
975 if (index > cache->numcached) {
976 index = cache->numcached;
977 }
978 }
979
980 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
981 index++;
982 }
983
984 if (index >= 0 && index < cache->numcached) {
985 /* only do bcopy if we're inserting */
986 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
987 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
988 }
989
990 cache->acache[index] = nodeID;
991 cache->haveaccess[index] = access;
992 cache->numcached++;
993 }
994
995
996 struct cinfo {
997 uid_t uid;
998 gid_t gid;
999 mode_t mode;
1000 cnid_t parentcnid;
1001 u_int16_t recflags;
1002 };
1003
1004 static int
1005 snoop_callback(const cnode_t *cp, void *arg)
1006 {
1007 struct cinfo *cip = arg;
1008
1009 cip->uid = cp->c_uid;
1010 cip->gid = cp->c_gid;
1011 cip->mode = cp->c_mode;
1012 cip->parentcnid = cp->c_parentcnid;
1013 cip->recflags = cp->c_attr.ca_recflags;
1014
1015 return (0);
1016 }
1017
1018 /*
1019 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1020 * isn't incore, then go to the catalog.
1021 */
1022 static int
1023 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
1024 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
1025 {
1026 int error = 0;
1027
1028 /* if this id matches the one the fsctl was called with, skip the lookup */
1029 if (cnid == skip_cp->c_cnid) {
1030 cnattrp->ca_uid = skip_cp->c_uid;
1031 cnattrp->ca_gid = skip_cp->c_gid;
1032 cnattrp->ca_mode = skip_cp->c_mode;
1033 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1034 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1035 } else {
1036 struct cinfo c_info;
1037
1038 /* otherwise, check the cnode hash incase the file/dir is incore */
1039 error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
1040
1041 if (error == EACCES) {
1042 // File is deleted
1043 return ENOENT;
1044 } else if (!error) {
1045 cnattrp->ca_uid = c_info.uid;
1046 cnattrp->ca_gid = c_info.gid;
1047 cnattrp->ca_mode = c_info.mode;
1048 cnattrp->ca_recflags = c_info.recflags;
1049 keyp->hfsPlus.parentID = c_info.parentcnid;
1050 } else {
1051 int lockflags;
1052
1053 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1054 throttle_lowpri_io(1);
1055
1056 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1057
1058 /* lookup this cnid in the catalog */
1059 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
1060
1061 hfs_systemfile_unlock(hfsmp, lockflags);
1062
1063 cache->lookups++;
1064 }
1065 }
1066
1067 return (error);
1068 }
1069
1070
1071 /*
1072 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1073 * up to CACHE_LEVELS as we progress towards the root.
1074 */
1075 static int
1076 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
1077 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
1078 struct vfs_context *my_context,
1079 char *bitmap,
1080 uint32_t map_size,
1081 cnid_t* parents,
1082 uint32_t num_parents)
1083 {
1084 int myErr = 0;
1085 int myResult;
1086 HFSCatalogNodeID thisNodeID;
1087 unsigned int myPerms;
1088 struct cat_attr cnattr;
1089 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1090 CatalogKey catkey;
1091
1092 int i = 0, ids_to_cache = 0;
1093 int parent_ids[CACHE_LEVELS];
1094
1095 thisNodeID = nodeID;
1096 while (thisNodeID >= kRootDirID) {
1097 myResult = 0; /* default to "no access" */
1098
1099 /* check the cache before resorting to hitting the catalog */
1100
1101 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1102 * to look any further after hitting cached dir */
1103
1104 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1105 cache->cachehits++;
1106 myErr = cache->haveaccess[cache_index];
1107 if (scope_index != -1) {
1108 if (myErr == ESRCH) {
1109 myErr = 0;
1110 }
1111 } else {
1112 scope_index = 0; // so we'll just use the cache result
1113 scope_idx_start = ids_to_cache;
1114 }
1115 myResult = (myErr == 0) ? 1 : 0;
1116 goto ExitThisRoutine;
1117 }
1118
1119
1120 if (parents) {
1121 int tmp;
1122 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1123 if (scope_index == -1)
1124 scope_index = tmp;
1125 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1126 scope_idx_start = ids_to_cache;
1127 }
1128 }
1129
1130 /* remember which parents we want to cache */
1131 if (ids_to_cache < CACHE_LEVELS) {
1132 parent_ids[ids_to_cache] = thisNodeID;
1133 ids_to_cache++;
1134 }
1135 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1136 if (bitmap && map_size) {
1137 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1138 }
1139
1140
1141 /* do the lookup (checks the cnode hash, then the catalog) */
1142 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
1143 if (myErr) {
1144 goto ExitThisRoutine; /* no access */
1145 }
1146
1147 /* Root always gets access. */
1148 if (suser(myp_ucred, NULL) == 0) {
1149 thisNodeID = catkey.hfsPlus.parentID;
1150 myResult = 1;
1151 continue;
1152 }
1153
1154 // if the thing has acl's, do the full permission check
1155 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1156 struct vnode *vp;
1157
1158 /* get the vnode for this cnid */
1159 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
1160 if ( myErr ) {
1161 myResult = 0;
1162 goto ExitThisRoutine;
1163 }
1164
1165 thisNodeID = VTOC(vp)->c_parentcnid;
1166
1167 hfs_unlock(VTOC(vp));
1168
1169 if (vnode_vtype(vp) == VDIR) {
1170 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1171 } else {
1172 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1173 }
1174
1175 vnode_put(vp);
1176 if (myErr) {
1177 myResult = 0;
1178 goto ExitThisRoutine;
1179 }
1180 } else {
1181 unsigned int flags;
1182 int mode = cnattr.ca_mode & S_IFMT;
1183 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
1184
1185 if (mode == S_IFDIR) {
1186 flags = R_OK | X_OK;
1187 } else {
1188 flags = R_OK;
1189 }
1190 if ( (myPerms & flags) != flags) {
1191 myResult = 0;
1192 myErr = EACCES;
1193 goto ExitThisRoutine; /* no access */
1194 }
1195
1196 /* up the hierarchy we go */
1197 thisNodeID = catkey.hfsPlus.parentID;
1198 }
1199 }
1200
1201 /* if here, we have access to this node */
1202 myResult = 1;
1203
1204 ExitThisRoutine:
1205 if (parents && myErr == 0 && scope_index == -1) {
1206 myErr = ESRCH;
1207 }
1208
1209 if (myErr) {
1210 myResult = 0;
1211 }
1212 *err = myErr;
1213
1214 /* cache the parent directory(ies) */
1215 for (i = 0; i < ids_to_cache; i++) {
1216 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1217 add_node(cache, -1, parent_ids[i], ESRCH);
1218 } else {
1219 add_node(cache, -1, parent_ids[i], myErr);
1220 }
1221 }
1222
1223 return (myResult);
1224 }
1225
1226 static int
1227 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1228 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1229 {
1230 boolean_t is64bit;
1231
1232 /*
1233 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1234 * happens to be in our list of file_ids, we'll note it
1235 * avoid calling hfs_chashget_nowait() on that id as that
1236 * will cause a "locking against myself" panic.
1237 */
1238 Boolean check_leaf = true;
1239
1240 struct user64_ext_access_t *user_access_structp;
1241 struct user64_ext_access_t tmp_user_access;
1242 struct access_cache cache;
1243
1244 int error = 0, prev_parent_check_ok=1;
1245 unsigned int i;
1246
1247 short flags;
1248 unsigned int num_files = 0;
1249 int map_size = 0;
1250 int num_parents = 0;
1251 int *file_ids=NULL;
1252 short *access=NULL;
1253 char *bitmap=NULL;
1254 cnid_t *parents=NULL;
1255 int leaf_index;
1256
1257 cnid_t cnid;
1258 cnid_t prevParent_cnid = 0;
1259 unsigned int myPerms;
1260 short myaccess = 0;
1261 struct cat_attr cnattr;
1262 CatalogKey catkey;
1263 struct cnode *skip_cp = VTOC(vp);
1264 kauth_cred_t cred = vfs_context_ucred(context);
1265 proc_t p = vfs_context_proc(context);
1266
1267 is64bit = proc_is64bit(p);
1268
1269 /* initialize the local cache and buffers */
1270 cache.numcached = 0;
1271 cache.cachehits = 0;
1272 cache.lookups = 0;
1273 cache.acache = NULL;
1274 cache.haveaccess = NULL;
1275
1276 /* struct copyin done during dispatch... need to copy file_id array separately */
1277 if (ap->a_data == NULL) {
1278 error = EINVAL;
1279 goto err_exit_bulk_access;
1280 }
1281
1282 if (is64bit) {
1283 if (arg_size != sizeof(struct user64_ext_access_t)) {
1284 error = EINVAL;
1285 goto err_exit_bulk_access;
1286 }
1287
1288 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1289
1290 } else if (arg_size == sizeof(struct user32_access_t)) {
1291 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1292
1293 // convert an old style bulk-access struct to the new style
1294 tmp_user_access.flags = accessp->flags;
1295 tmp_user_access.num_files = accessp->num_files;
1296 tmp_user_access.map_size = 0;
1297 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1298 tmp_user_access.bitmap = USER_ADDR_NULL;
1299 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1300 tmp_user_access.num_parents = 0;
1301 user_access_structp = &tmp_user_access;
1302
1303 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1304 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1305
1306 // up-cast from a 32-bit version of the struct
1307 tmp_user_access.flags = accessp->flags;
1308 tmp_user_access.num_files = accessp->num_files;
1309 tmp_user_access.map_size = accessp->map_size;
1310 tmp_user_access.num_parents = accessp->num_parents;
1311
1312 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1313 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1314 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1315 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1316
1317 user_access_structp = &tmp_user_access;
1318 } else {
1319 error = EINVAL;
1320 goto err_exit_bulk_access;
1321 }
1322
1323 map_size = user_access_structp->map_size;
1324
1325 num_files = user_access_structp->num_files;
1326
1327 num_parents= user_access_structp->num_parents;
1328
1329 if (num_files < 1) {
1330 goto err_exit_bulk_access;
1331 }
1332 if (num_files > 1024) {
1333 error = EINVAL;
1334 goto err_exit_bulk_access;
1335 }
1336
1337 if (num_parents > 1024) {
1338 error = EINVAL;
1339 goto err_exit_bulk_access;
1340 }
1341
1342 file_ids = hfs_malloc(sizeof(int) * num_files);
1343 access = hfs_malloc(sizeof(short) * num_files);
1344 if (map_size) {
1345 bitmap = hfs_mallocz(sizeof(char) * map_size);
1346 }
1347
1348 if (num_parents) {
1349 parents = hfs_malloc(sizeof(cnid_t) * num_parents);
1350 }
1351
1352 cache.acache = hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES);
1353 cache.haveaccess = hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1354
1355 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1356 num_files * sizeof(int)))) {
1357 goto err_exit_bulk_access;
1358 }
1359
1360 if (num_parents) {
1361 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1362 num_parents * sizeof(cnid_t)))) {
1363 goto err_exit_bulk_access;
1364 }
1365 }
1366
1367 flags = user_access_structp->flags;
1368 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1369 flags = R_OK;
1370 }
1371
1372 /* check if we've been passed leaf node ids or parent ids */
1373 if (flags & PARENT_IDS_FLAG) {
1374 check_leaf = false;
1375 }
1376
1377 /* Check access to each file_id passed in */
1378 for (i = 0; i < num_files; i++) {
1379 leaf_index=-1;
1380 cnid = (cnid_t) file_ids[i];
1381
1382 /* root always has access */
1383 if ((!parents) && (!suser(cred, NULL))) {
1384 access[i] = 0;
1385 continue;
1386 }
1387
1388 if (check_leaf) {
1389 /* do the lookup (checks the cnode hash, then the catalog) */
1390 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1391 if (error) {
1392 access[i] = (short) error;
1393 continue;
1394 }
1395
1396 if (parents) {
1397 // Check if the leaf matches one of the parent scopes
1398 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1399 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1400 prev_parent_check_ok = 0;
1401 else if (leaf_index >= 0)
1402 prev_parent_check_ok = 1;
1403 }
1404
1405 // if the thing has acl's, do the full permission check
1406 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1407 struct vnode *cvp;
1408 int myErr = 0;
1409 /* get the vnode for this cnid */
1410 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
1411 if ( myErr ) {
1412 access[i] = myErr;
1413 continue;
1414 }
1415
1416 hfs_unlock(VTOC(cvp));
1417
1418 if (vnode_vtype(cvp) == VDIR) {
1419 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1420 } else {
1421 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1422 }
1423
1424 vnode_put(cvp);
1425 if (myErr) {
1426 access[i] = myErr;
1427 continue;
1428 }
1429 } else {
1430 /* before calling CheckAccess(), check the target file for read access */
1431 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1432 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1433
1434 /* fail fast if no access */
1435 if ((myPerms & flags) == 0) {
1436 access[i] = EACCES;
1437 continue;
1438 }
1439 }
1440 } else {
1441 /* we were passed an array of parent ids */
1442 catkey.hfsPlus.parentID = cnid;
1443 }
1444
1445 /* if the last guy had the same parent and had access, we're done */
1446 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1447 cache.cachehits++;
1448 access[i] = 0;
1449 continue;
1450 }
1451
1452 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1453 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1454
1455 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1456 access[i] = 0; // have access.. no errors to report
1457 } else {
1458 access[i] = (error != 0 ? (short) error : EACCES);
1459 }
1460
1461 prevParent_cnid = catkey.hfsPlus.parentID;
1462 }
1463
1464 /* copyout the access array */
1465 if ((error = copyout((caddr_t)access, user_access_structp->access,
1466 num_files * sizeof (short)))) {
1467 goto err_exit_bulk_access;
1468 }
1469 if (map_size && bitmap) {
1470 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1471 map_size * sizeof (char)))) {
1472 goto err_exit_bulk_access;
1473 }
1474 }
1475
1476
1477 err_exit_bulk_access:
1478
1479 hfs_free(file_ids, sizeof(int) * num_files);
1480 hfs_free(parents, sizeof(cnid_t) * num_parents);
1481 hfs_free(bitmap, sizeof(char) * map_size);
1482 hfs_free(access, sizeof(short) * num_files);
1483 hfs_free(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1484 hfs_free(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1485
1486 return (error);
1487 }
1488
1489
1490 /* end "bulk-access" support */
1491
1492
1493 /*
1494 * Control filesystem operating characteristics.
1495 */
1496 int
1497 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1498 vnode_t a_vp;
1499 long a_command;
1500 caddr_t a_data;
1501 int a_fflag;
1502 vfs_context_t a_context;
1503 } */ *ap)
1504 {
1505 struct vnode * vp = ap->a_vp;
1506 struct hfsmount *hfsmp = VTOHFS(vp);
1507 vfs_context_t context = ap->a_context;
1508 kauth_cred_t cred = vfs_context_ucred(context);
1509 proc_t p = vfs_context_proc(context);
1510 struct vfsstatfs *vfsp;
1511 boolean_t is64bit;
1512 off_t jnl_start, jnl_size;
1513 struct hfs_journal_info *jip;
1514 #if HFS_COMPRESSION
1515 int compressed = 0;
1516 off_t uncompressed_size = -1;
1517 int decmpfs_error = 0;
1518
1519 if (ap->a_command == F_RDADVISE) {
1520 /* we need to inspect the decmpfs state of the file as early as possible */
1521 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1522 if (compressed) {
1523 if (VNODE_IS_RSRC(vp)) {
1524 /* if this is the resource fork, treat it as if it were empty */
1525 uncompressed_size = 0;
1526 } else {
1527 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1528 if (decmpfs_error != 0) {
1529 /* failed to get the uncompressed size, we'll check for this later */
1530 uncompressed_size = -1;
1531 }
1532 }
1533 }
1534 }
1535 #endif /* HFS_COMPRESSION */
1536
1537 is64bit = proc_is64bit(p);
1538
1539 #if CONFIG_PROTECT
1540 #if HFS_CONFIG_KEY_ROLL
1541 // The HFS_KEY_ROLL fsctl does its own access checks
1542 if (ap->a_command != HFS_KEY_ROLL)
1543 #endif
1544 {
1545 int error = 0;
1546 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
1547 return error;
1548 }
1549 }
1550 #endif /* CONFIG_PROTECT */
1551
1552 switch (ap->a_command) {
1553
1554 case HFS_GETPATH:
1555 {
1556 struct vnode *file_vp;
1557 cnid_t cnid;
1558 int outlen;
1559 char *bufptr;
1560 int error;
1561 int flags = 0;
1562
1563 /* Caller must be owner of file system. */
1564 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1565 if (suser(cred, NULL) &&
1566 kauth_cred_getuid(cred) != vfsp->f_owner) {
1567 return (EACCES);
1568 }
1569 /* Target vnode must be file system's root. */
1570 if (!vnode_isvroot(vp)) {
1571 return (EINVAL);
1572 }
1573 bufptr = (char *)ap->a_data;
1574 cnid = strtoul(bufptr, NULL, 10);
1575 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1576 flags |= BUILDPATH_VOLUME_RELATIVE;
1577 }
1578
1579 /* We need to call hfs_vfs_vget to leverage the code that will
1580 * fix the origin list for us if needed, as opposed to calling
1581 * hfs_vget, since we will need the parent for build_path call.
1582 */
1583
1584 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1585 return (error);
1586 }
1587
1588 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
1589 vnode_put(file_vp);
1590
1591 return (error);
1592 }
1593
1594 case HFS_SET_MAX_DEFRAG_SIZE:
1595 {
1596 int error = 0; /* Assume success */
1597 u_int32_t maxsize = 0;
1598
1599 if (vnode_vfsisrdonly(vp)) {
1600 return (EROFS);
1601 }
1602 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1603 if (!kauth_cred_issuser(cred)) {
1604 return (EACCES); /* must be root */
1605 }
1606
1607 maxsize = *(u_int32_t *)ap->a_data;
1608
1609 hfs_lock_mount(hfsmp);
1610 if (maxsize > HFS_MAX_DEFRAG_SIZE) {
1611 error = EINVAL;
1612 }
1613 else {
1614 hfsmp->hfs_defrag_max = maxsize;
1615 }
1616 hfs_unlock_mount(hfsmp);
1617
1618 return (error);
1619 }
1620
1621 case HFS_FORCE_ENABLE_DEFRAG:
1622 {
1623 int error = 0; /* Assume success */
1624 u_int32_t do_enable = 0;
1625
1626 if (vnode_vfsisrdonly(vp)) {
1627 return (EROFS);
1628 }
1629 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1630 if (!kauth_cred_issuser(cred)) {
1631 return (EACCES); /* must be root */
1632 }
1633
1634 do_enable = *(u_int32_t *)ap->a_data;
1635
1636 hfs_lock_mount(hfsmp);
1637 if (do_enable != 0) {
1638 hfsmp->hfs_defrag_nowait = 1;
1639 }
1640 else {
1641 error = EINVAL;
1642 }
1643
1644 hfs_unlock_mount(hfsmp);
1645
1646 return (error);
1647 }
1648
1649
1650 case HFS_TRANSFER_DOCUMENT_ID:
1651 {
1652 struct cnode *cp = NULL;
1653 int error;
1654 u_int32_t to_fd = *(u_int32_t *)ap->a_data;
1655 struct fileproc *to_fp;
1656 struct vnode *to_vp;
1657 struct cnode *to_cp;
1658
1659 cp = VTOC(vp);
1660
1661 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
1662 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1663 return error;
1664 }
1665 if ( (error = vnode_getwithref(to_vp)) ) {
1666 file_drop(to_fd);
1667 return error;
1668 }
1669
1670 if (VTOHFS(to_vp) != hfsmp) {
1671 error = EXDEV;
1672 goto transfer_cleanup;
1673 }
1674
1675 int need_unlock = 1;
1676 to_cp = VTOC(to_vp);
1677 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1678 if (error != 0) {
1679 //printf("could not lock the pair of cnodes (error %d)\n", error);
1680 goto transfer_cleanup;
1681 }
1682
1683 if (!(cp->c_bsdflags & UF_TRACKED)) {
1684 error = EINVAL;
1685 } else if (to_cp->c_bsdflags & UF_TRACKED) {
1686 //
1687 // if the destination is already tracked, return an error
1688 // as otherwise it's a silent deletion of the target's
1689 // document-id
1690 //
1691 error = EEXIST;
1692 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1693 //
1694 // we can use the FndrExtendedFileInfo because the doc-id is the first
1695 // thing in both it and the ExtendedDirInfo struct which is fixed in
1696 // format and can not change layout
1697 //
1698 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
1699 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
1700
1701 if (f_extinfo->document_id == 0) {
1702 uint32_t new_id;
1703
1704 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
1705
1706 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
1707 //
1708 // re-lock the pair now that we have the document-id
1709 //
1710 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1711 f_extinfo->document_id = new_id;
1712 } else {
1713 goto transfer_cleanup;
1714 }
1715 }
1716
1717 to_extinfo->document_id = f_extinfo->document_id;
1718 f_extinfo->document_id = 0;
1719 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1720
1721 // make sure the destination is also UF_TRACKED
1722 to_cp->c_bsdflags |= UF_TRACKED;
1723 cp->c_bsdflags &= ~UF_TRACKED;
1724
1725 // mark the cnodes dirty
1726 cp->c_flag |= C_MODIFIED;
1727 to_cp->c_flag |= C_MODIFIED;
1728
1729 int lockflags;
1730 if ((error = hfs_start_transaction(hfsmp)) == 0) {
1731
1732 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1733
1734 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
1735 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
1736
1737 hfs_systemfile_unlock (hfsmp, lockflags);
1738 (void) hfs_end_transaction(hfsmp);
1739 }
1740
1741 add_fsevent(FSE_DOCID_CHANGED, context,
1742 FSE_ARG_DEV, hfsmp->hfs_raw_dev,
1743 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
1744 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
1745 FSE_ARG_INT32, to_extinfo->document_id,
1746 FSE_ARG_DONE);
1747
1748 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1749 need_unlock = 0;
1750
1751 if (need_fsevent(FSE_STAT_CHANGED, vp)) {
1752 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1753 }
1754 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
1755 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
1756 }
1757 }
1758
1759 if (need_unlock) {
1760 hfs_unlockpair(cp, to_cp);
1761 }
1762
1763 transfer_cleanup:
1764 vnode_put(to_vp);
1765 file_drop(to_fd);
1766
1767 return error;
1768 }
1769
1770
1771
1772 case HFS_PREV_LINK:
1773 case HFS_NEXT_LINK:
1774 {
1775 cnid_t linkfileid;
1776 cnid_t nextlinkid;
1777 cnid_t prevlinkid;
1778 int error;
1779
1780 /* Caller must be owner of file system. */
1781 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1782 if (suser(cred, NULL) &&
1783 kauth_cred_getuid(cred) != vfsp->f_owner) {
1784 return (EACCES);
1785 }
1786 /* Target vnode must be file system's root. */
1787 if (!vnode_isvroot(vp)) {
1788 return (EINVAL);
1789 }
1790 linkfileid = *(cnid_t *)ap->a_data;
1791 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1792 return (EINVAL);
1793 }
1794 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1795 return (error);
1796 }
1797 if (ap->a_command == HFS_NEXT_LINK) {
1798 *(cnid_t *)ap->a_data = nextlinkid;
1799 } else {
1800 *(cnid_t *)ap->a_data = prevlinkid;
1801 }
1802 return (0);
1803 }
1804
1805 case HFS_RESIZE_PROGRESS: {
1806
1807 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1808 if (suser(cred, NULL) &&
1809 kauth_cred_getuid(cred) != vfsp->f_owner) {
1810 return (EACCES); /* must be owner of file system */
1811 }
1812 if (!vnode_isvroot(vp)) {
1813 return (EINVAL);
1814 }
1815 /* file system must not be mounted read-only */
1816 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1817 return (EROFS);
1818 }
1819
1820 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1821 }
1822
1823 case HFS_RESIZE_VOLUME: {
1824 u_int64_t newsize;
1825 u_int64_t cursize;
1826 int ret;
1827
1828 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1829 if (suser(cred, NULL) &&
1830 kauth_cred_getuid(cred) != vfsp->f_owner) {
1831 return (EACCES); /* must be owner of file system */
1832 }
1833 if (!vnode_isvroot(vp)) {
1834 return (EINVAL);
1835 }
1836
1837 /* filesystem must not be mounted read only */
1838 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1839 return (EROFS);
1840 }
1841 newsize = *(u_int64_t *)ap->a_data;
1842 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1843
1844 if (newsize == cursize) {
1845 return (0);
1846 }
1847 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize);
1848 if (newsize > cursize) {
1849 ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1850 } else {
1851 ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1852 }
1853 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize);
1854 return (ret);
1855 }
1856 case HFS_CHANGE_NEXT_ALLOCATION: {
1857 int error = 0; /* Assume success */
1858 u_int32_t location;
1859
1860 if (vnode_vfsisrdonly(vp)) {
1861 return (EROFS);
1862 }
1863 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1864 if (suser(cred, NULL) &&
1865 kauth_cred_getuid(cred) != vfsp->f_owner) {
1866 return (EACCES); /* must be owner of file system */
1867 }
1868 if (!vnode_isvroot(vp)) {
1869 return (EINVAL);
1870 }
1871 hfs_lock_mount(hfsmp);
1872 location = *(u_int32_t *)ap->a_data;
1873 if ((location >= hfsmp->allocLimit) &&
1874 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1875 error = EINVAL;
1876 goto fail_change_next_allocation;
1877 }
1878 /* Return previous value. */
1879 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1880 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1881 /* On magic value for location, set nextAllocation to next block
1882 * after metadata zone and set flag in mount structure to indicate
1883 * that nextAllocation should not be updated again.
1884 */
1885 if (hfsmp->hfs_metazone_end != 0) {
1886 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1887 }
1888 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1889 } else {
1890 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1891 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1892 }
1893 MarkVCBDirty(hfsmp);
1894 fail_change_next_allocation:
1895 hfs_unlock_mount(hfsmp);
1896 return (error);
1897 }
1898
1899 #if HFS_SPARSE_DEV
1900 case HFS_SETBACKINGSTOREINFO: {
1901 struct vnode * di_vp;
1902 struct hfs_backingstoreinfo *bsdata;
1903 int error = 0;
1904
1905 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1906 return (EROFS);
1907 }
1908 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1909 return (EALREADY);
1910 }
1911 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1912 if (suser(cred, NULL) &&
1913 kauth_cred_getuid(cred) != vfsp->f_owner) {
1914 return (EACCES); /* must be owner of file system */
1915 }
1916 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1917 if (bsdata == NULL) {
1918 return (EINVAL);
1919 }
1920 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1921 return (error);
1922 }
1923 if ((error = vnode_getwithref(di_vp))) {
1924 file_drop(bsdata->backingfd);
1925 return(error);
1926 }
1927
1928 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1929 (void)vnode_put(di_vp);
1930 file_drop(bsdata->backingfd);
1931 return (EINVAL);
1932 }
1933
1934 // Dropped in unmount
1935 vnode_ref(di_vp);
1936
1937 hfs_lock_mount(hfsmp);
1938 hfsmp->hfs_backingvp = di_vp;
1939 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1940 hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
1941 hfs_unlock_mount(hfsmp);
1942
1943 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1944
1945 /*
1946 * If the sparse image is on a sparse image file (as opposed to a sparse
1947 * bundle), then we may need to limit the free space to the maximum size
1948 * of a file on that volume. So we query (using pathconf), and if we get
1949 * a meaningful result, we cache the number of blocks for later use in
1950 * hfs_freeblks().
1951 */
1952 hfsmp->hfs_backingfs_maxblocks = 0;
1953 if (vnode_vtype(di_vp) == VREG) {
1954 int terr;
1955 int hostbits;
1956 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1957 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1958 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1959
1960 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1961 }
1962 }
1963
1964 /* The free extent cache is managed differently for sparse devices.
1965 * There is a window between which the volume is mounted and the
1966 * device is marked as sparse, so the free extent cache for this
1967 * volume is currently initialized as normal volume (sorted by block
1968 * count). Reset the cache so that it will be rebuilt again
1969 * for sparse device (sorted by start block).
1970 */
1971 ResetVCBFreeExtCache(hfsmp);
1972
1973 (void)vnode_put(di_vp);
1974 file_drop(bsdata->backingfd);
1975 return (0);
1976 }
1977 case HFS_CLRBACKINGSTOREINFO: {
1978 struct vnode * tmpvp;
1979
1980 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1981 if (suser(cred, NULL) &&
1982 kauth_cred_getuid(cred) != vfsp->f_owner) {
1983 return (EACCES); /* must be owner of file system */
1984 }
1985 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1986 return (EROFS);
1987 }
1988
1989 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1990 hfsmp->hfs_backingvp) {
1991
1992 hfs_lock_mount(hfsmp);
1993 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1994 tmpvp = hfsmp->hfs_backingvp;
1995 hfsmp->hfs_backingvp = NULLVP;
1996 hfsmp->hfs_sparsebandblks = 0;
1997 hfs_unlock_mount(hfsmp);
1998
1999 vnode_rele(tmpvp);
2000 }
2001 return (0);
2002 }
2003 #endif /* HFS_SPARSE_DEV */
2004
2005 /* Change the next CNID stored in the VH */
2006 case HFS_CHANGE_NEXTCNID: {
2007 int error = 0; /* Assume success */
2008 u_int32_t fileid;
2009 int wraparound = 0;
2010 int lockflags = 0;
2011
2012 if (vnode_vfsisrdonly(vp)) {
2013 return (EROFS);
2014 }
2015 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2016 if (suser(cred, NULL) &&
2017 kauth_cred_getuid(cred) != vfsp->f_owner) {
2018 return (EACCES); /* must be owner of file system */
2019 }
2020
2021 fileid = *(u_int32_t *)ap->a_data;
2022
2023 /* Must have catalog lock excl. to advance the CNID pointer */
2024 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
2025
2026 hfs_lock_mount(hfsmp);
2027
2028 /* If it is less than the current next CNID, force the wraparound bit to be set */
2029 if (fileid < hfsmp->vcbNxtCNID) {
2030 wraparound=1;
2031 }
2032
2033 /* Return previous value. */
2034 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
2035
2036 hfsmp->vcbNxtCNID = fileid;
2037
2038 if (wraparound) {
2039 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
2040 }
2041
2042 MarkVCBDirty(hfsmp);
2043 hfs_unlock_mount(hfsmp);
2044 hfs_systemfile_unlock (hfsmp, lockflags);
2045
2046 return (error);
2047 }
2048
2049 case F_FREEZE_FS: {
2050 struct mount *mp;
2051
2052 mp = vnode_mount(vp);
2053 hfsmp = VFSTOHFS(mp);
2054
2055 if (!(hfsmp->jnl))
2056 return (ENOTSUP);
2057
2058 vfsp = vfs_statfs(mp);
2059
2060 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2061 !kauth_cred_issuser(cred))
2062 return (EACCES);
2063
2064 return hfs_freeze(hfsmp);
2065 }
2066
2067 case F_THAW_FS: {
2068 vfsp = vfs_statfs(vnode_mount(vp));
2069 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2070 !kauth_cred_issuser(cred))
2071 return (EACCES);
2072
2073 return hfs_thaw(hfsmp, current_proc());
2074 }
2075
2076 case HFS_EXT_BULKACCESS_FSCTL: {
2077 int size;
2078 #if CONFIG_HFS_STD
2079 if (hfsmp->hfs_flags & HFS_STANDARD) {
2080 return EINVAL;
2081 }
2082 #endif
2083
2084 if (is64bit) {
2085 size = sizeof(struct user64_ext_access_t);
2086 } else {
2087 size = sizeof(struct user32_ext_access_t);
2088 }
2089
2090 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2091 }
2092
2093 case HFS_SET_XATTREXTENTS_STATE: {
2094 int state;
2095
2096 if (ap->a_data == NULL) {
2097 return (EINVAL);
2098 }
2099
2100 state = *(int *)ap->a_data;
2101
2102 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2103 return (EROFS);
2104 }
2105
2106 /* Super-user can enable or disable extent-based extended
2107 * attribute support on a volume
2108 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2109 * are enabled by default, so any change will be transient only
2110 * till the volume is remounted.
2111 */
2112 if (!kauth_cred_issuser(kauth_cred_get())) {
2113 return (EPERM);
2114 }
2115 if (state == 0 || state == 1)
2116 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
2117 else
2118 return (EINVAL);
2119 }
2120
2121 case F_SETSTATICCONTENT: {
2122 int error;
2123 int enable_static = 0;
2124 struct cnode *cp = NULL;
2125 /*
2126 * lock the cnode, decorate the cnode flag, and bail out.
2127 * VFS should have already authenticated the caller for us.
2128 */
2129
2130 if (ap->a_data) {
2131 /*
2132 * Note that even though ap->a_data is of type caddr_t,
2133 * the fcntl layer at the syscall handler will pass in NULL
2134 * or 1 depending on what the argument supplied to the fcntl
2135 * was. So it is in fact correct to check the ap->a_data
2136 * argument for zero or non-zero value when deciding whether or not
2137 * to enable the static bit in the cnode.
2138 */
2139 enable_static = 1;
2140 }
2141 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2142 return EROFS;
2143 }
2144 cp = VTOC(vp);
2145
2146 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2147 if (error == 0) {
2148 if (enable_static) {
2149 cp->c_flag |= C_SSD_STATIC;
2150 }
2151 else {
2152 cp->c_flag &= ~C_SSD_STATIC;
2153 }
2154 hfs_unlock (cp);
2155 }
2156 return error;
2157 }
2158
2159 case F_SET_GREEDY_MODE: {
2160 int error;
2161 int enable_greedy_mode = 0;
2162 struct cnode *cp = NULL;
2163 /*
2164 * lock the cnode, decorate the cnode flag, and bail out.
2165 * VFS should have already authenticated the caller for us.
2166 */
2167
2168 if (ap->a_data) {
2169 /*
2170 * Note that even though ap->a_data is of type caddr_t,
2171 * the fcntl layer at the syscall handler will pass in NULL
2172 * or 1 depending on what the argument supplied to the fcntl
2173 * was. So it is in fact correct to check the ap->a_data
2174 * argument for zero or non-zero value when deciding whether or not
2175 * to enable the greedy mode bit in the cnode.
2176 */
2177 enable_greedy_mode = 1;
2178 }
2179 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2180 return EROFS;
2181 }
2182 cp = VTOC(vp);
2183
2184 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2185 if (error == 0) {
2186 if (enable_greedy_mode) {
2187 cp->c_flag |= C_SSD_GREEDY_MODE;
2188 }
2189 else {
2190 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2191 }
2192 hfs_unlock (cp);
2193 }
2194 return error;
2195 }
2196
2197 case F_SETIOTYPE: {
2198 int error;
2199 uint32_t iotypeflag = 0;
2200
2201 struct cnode *cp = NULL;
2202 /*
2203 * lock the cnode, decorate the cnode flag, and bail out.
2204 * VFS should have already authenticated the caller for us.
2205 */
2206
2207 if (ap->a_data == NULL) {
2208 return EINVAL;
2209 }
2210
2211 /*
2212 * Note that even though ap->a_data is of type caddr_t, we
2213 * can only use 32 bits of flag values.
2214 */
2215 iotypeflag = (uint32_t) ap->a_data;
2216 switch (iotypeflag) {
2217 case F_IOTYPE_ISOCHRONOUS:
2218 break;
2219 default:
2220 return EINVAL;
2221 }
2222
2223
2224 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2225 return EROFS;
2226 }
2227 cp = VTOC(vp);
2228
2229 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2230 if (error == 0) {
2231 switch (iotypeflag) {
2232 case F_IOTYPE_ISOCHRONOUS:
2233 cp->c_flag |= C_IO_ISOCHRONOUS;
2234 break;
2235 default:
2236 break;
2237 }
2238 hfs_unlock (cp);
2239 }
2240 return error;
2241 }
2242
2243 case F_MAKECOMPRESSED: {
2244 int error = 0;
2245 uint32_t gen_counter;
2246 struct cnode *cp = NULL;
2247 int reset_decmp = 0;
2248
2249 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2250 return EROFS;
2251 }
2252
2253 /*
2254 * acquire & lock the cnode.
2255 * VFS should have already authenticated the caller for us.
2256 */
2257
2258 if (ap->a_data) {
2259 /*
2260 * Cast the pointer into a uint32_t so we can extract the
2261 * supplied generation counter.
2262 */
2263 gen_counter = *((uint32_t*)ap->a_data);
2264 }
2265 else {
2266 return EINVAL;
2267 }
2268
2269 #if HFS_COMPRESSION
2270 cp = VTOC(vp);
2271 /* Grab truncate lock first; we may truncate the file */
2272 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2273
2274 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2275 if (error) {
2276 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2277 return error;
2278 }
2279
2280 /* Are there any other usecounts/FDs? */
2281 if (vnode_isinuse(vp, 1)) {
2282 hfs_unlock(cp);
2283 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2284 return EBUSY;
2285 }
2286
2287 /* now we have the cnode locked down; Validate arguments */
2288 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2289 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2290 hfs_unlock(cp);
2291 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2292 return EINVAL;
2293 }
2294
2295 if ((hfs_get_gencount (cp)) == gen_counter) {
2296 /*
2297 * OK, the gen_counter matched. Go for it:
2298 * Toggle state bits, truncate file, and suppress mtime update
2299 */
2300 reset_decmp = 1;
2301 cp->c_bsdflags |= UF_COMPRESSED;
2302
2303 error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
2304 ap->a_context);
2305 }
2306 else {
2307 error = ESTALE;
2308 }
2309
2310 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2311 hfs_unlock(cp);
2312
2313 /*
2314 * Reset the decmp state while still holding the truncate lock. We need to
2315 * serialize here against a listxattr on this node which may occur at any
2316 * time.
2317 *
2318 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2319 * that will still potentially require getting the com.apple.decmpfs EA. If the
2320 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2321 * generic(through VFS), and can't pass along any info telling it that we're already
2322 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2323 * and trying to fill in the hfs_file_is_compressed info during the callback
2324 * operation, which will result in deadlock against the b-tree node.
2325 *
2326 * So, to serialize against listxattr (which will grab buf_t meta references on
2327 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2328 * decmpfs payload.
2329 */
2330 if ((reset_decmp) && (error == 0)) {
2331 decmpfs_cnode *dp = VTOCMP (vp);
2332 if (dp != NULL) {
2333 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2334 }
2335
2336 /* Initialize the decmpfs node as needed */
2337 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2338 }
2339
2340 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2341
2342 #endif
2343 return error;
2344 }
2345
2346 case F_SETBACKINGSTORE: {
2347
2348 int error = 0;
2349
2350 /*
2351 * See comment in F_SETSTATICCONTENT re: using
2352 * a null check for a_data
2353 */
2354 if (ap->a_data) {
2355 error = hfs_set_backingstore (vp, 1);
2356 }
2357 else {
2358 error = hfs_set_backingstore (vp, 0);
2359 }
2360
2361 return error;
2362 }
2363
2364 case F_GETPATH_MTMINFO: {
2365 int error = 0;
2366
2367 int *data = (int*) ap->a_data;
2368
2369 /* Ask if this is a backingstore vnode */
2370 error = hfs_is_backingstore (vp, data);
2371
2372 return error;
2373 }
2374
2375 case F_FULLFSYNC: {
2376 int error;
2377
2378 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2379 return (EROFS);
2380 }
2381 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2382 if (error == 0) {
2383 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p);
2384 hfs_unlock(VTOC(vp));
2385 }
2386
2387 return error;
2388 }
2389
2390 case F_BARRIERFSYNC: {
2391 int error;
2392
2393 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2394 return (EROFS);
2395 }
2396 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2397 if (error == 0) {
2398 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p);
2399 hfs_unlock(VTOC(vp));
2400 }
2401
2402 return error;
2403 }
2404
2405 case F_CHKCLEAN: {
2406 register struct cnode *cp;
2407 int error;
2408
2409 if (!vnode_isreg(vp))
2410 return EINVAL;
2411
2412 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2413 if (error == 0) {
2414 cp = VTOC(vp);
2415 /*
2416 * used by regression test to determine if
2417 * all the dirty pages (via write) have been cleaned
2418 * after a call to 'fsysnc'.
2419 */
2420 error = is_file_clean(vp, VTOF(vp)->ff_size);
2421 hfs_unlock(cp);
2422 }
2423 return (error);
2424 }
2425
2426 case F_RDADVISE: {
2427 register struct radvisory *ra;
2428 struct filefork *fp;
2429 int error;
2430
2431 if (!vnode_isreg(vp))
2432 return EINVAL;
2433
2434 ra = (struct radvisory *)(ap->a_data);
2435 fp = VTOF(vp);
2436
2437 /* Protect against a size change. */
2438 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2439
2440 #if HFS_COMPRESSION
2441 if (compressed) {
2442 if (uncompressed_size == -1) {
2443 /* fetching the uncompressed size failed above, so return the error */
2444 error = decmpfs_error;
2445 } else if (ra->ra_offset >= uncompressed_size) {
2446 error = EFBIG;
2447 } else {
2448 error = advisory_read(vp, uncompressed_size, ra->ra_offset, ra->ra_count);
2449 }
2450 } else
2451 #endif /* HFS_COMPRESSION */
2452 if (ra->ra_offset >= fp->ff_size) {
2453 error = EFBIG;
2454 } else {
2455 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
2456 }
2457
2458 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
2459 return (error);
2460 }
2461
2462 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
2463 {
2464 if (is64bit) {
2465 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2466 }
2467 else {
2468 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2469 }
2470 return 0;
2471 }
2472
2473 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2474 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2475 break;
2476
2477 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2478 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2479 break;
2480
2481 case HFS_FSCTL_GET_VERY_LOW_DISK:
2482 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2483 break;
2484
2485 case HFS_FSCTL_SET_VERY_LOW_DISK:
2486 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2487 return EINVAL;
2488 }
2489
2490 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2491 break;
2492
2493 case HFS_FSCTL_GET_LOW_DISK:
2494 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2495 break;
2496
2497 case HFS_FSCTL_SET_LOW_DISK:
2498 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2499 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2500
2501 return EINVAL;
2502 }
2503
2504 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2505 break;
2506
2507 case HFS_FSCTL_GET_DESIRED_DISK:
2508 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2509 break;
2510
2511 case HFS_FSCTL_SET_DESIRED_DISK:
2512 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2513 return EINVAL;
2514 }
2515
2516 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2517 break;
2518
2519 case HFS_VOLUME_STATUS:
2520 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2521 break;
2522
2523 case HFS_SET_BOOT_INFO:
2524 if (!vnode_isvroot(vp))
2525 return(EINVAL);
2526 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2527 return(EACCES); /* must be superuser or owner of filesystem */
2528 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2529 return (EROFS);
2530 }
2531 hfs_lock_mount (hfsmp);
2532 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2533 hfs_unlock_mount (hfsmp);
2534 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
2535 break;
2536
2537 case HFS_GET_BOOT_INFO:
2538 if (!vnode_isvroot(vp))
2539 return(EINVAL);
2540 hfs_lock_mount (hfsmp);
2541 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2542 hfs_unlock_mount(hfsmp);
2543 break;
2544
2545 case HFS_MARK_BOOT_CORRUPT:
2546 /* Mark the boot volume corrupt by setting
2547 * kHFSVolumeInconsistentBit in the volume header. This will
2548 * force fsck_hfs on next mount.
2549 */
2550 if (!kauth_cred_issuser(kauth_cred_get())) {
2551 return EACCES;
2552 }
2553
2554 /* Allowed only on the root vnode of the boot volume */
2555 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2556 !vnode_isvroot(vp)) {
2557 return EINVAL;
2558 }
2559 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2560 return (EROFS);
2561 }
2562 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2563 hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
2564 break;
2565
2566 case HFS_FSCTL_GET_JOURNAL_INFO:
2567 jip = (struct hfs_journal_info*)ap->a_data;
2568
2569 if (vp == NULLVP)
2570 return EINVAL;
2571
2572 if (hfsmp->jnl == NULL) {
2573 jnl_start = 0;
2574 jnl_size = 0;
2575 } else {
2576 jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset;
2577 jnl_size = hfsmp->jnl_size;
2578 }
2579
2580 jip->jstart = jnl_start;
2581 jip->jsize = jnl_size;
2582 break;
2583
2584 case HFS_SET_ALWAYS_ZEROFILL: {
2585 struct cnode *cp = VTOC(vp);
2586
2587 if (*(int *)ap->a_data) {
2588 cp->c_flag |= C_ALWAYS_ZEROFILL;
2589 } else {
2590 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2591 }
2592 break;
2593 }
2594
2595 case HFS_DISABLE_METAZONE: {
2596 /* Only root can disable metadata zone */
2597 if (!kauth_cred_issuser(kauth_cred_get())) {
2598 return EACCES;
2599 }
2600 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2601 return (EROFS);
2602 }
2603
2604 /* Disable metadata zone now */
2605 (void) hfs_metadatazone_init(hfsmp, true);
2606 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2607 break;
2608 }
2609
2610
2611 case HFS_FSINFO_METADATA_BLOCKS: {
2612 int error;
2613 struct hfsinfo_metadata *hinfo;
2614
2615 hinfo = (struct hfsinfo_metadata *)ap->a_data;
2616
2617 /* Get information about number of metadata blocks */
2618 error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
2619 if (error) {
2620 return error;
2621 }
2622
2623 break;
2624 }
2625
2626 case HFS_GET_FSINFO: {
2627 hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
2628
2629 /* Only root is allowed to get fsinfo */
2630 if (!kauth_cred_issuser(kauth_cred_get())) {
2631 return EACCES;
2632 }
2633
2634 /*
2635 * Make sure that the caller's version number matches with
2636 * the kernel's version number. This will make sure that
2637 * if the structures being read/written into are changed
2638 * by the kernel, the caller will not read incorrect data.
2639 *
2640 * The first three fields --- request_type, version and
2641 * flags are same for all the hfs_fsinfo structures, so
2642 * we can access the version number by assuming any
2643 * structure for now.
2644 */
2645 if (fsinfo->header.version != HFS_FSINFO_VERSION) {
2646 return ENOTSUP;
2647 }
2648
2649 /* Make sure that the current file system is not marked inconsistent */
2650 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2651 return EIO;
2652 }
2653
2654 return hfs_get_fsinfo(hfsmp, ap->a_data);
2655 }
2656
2657 case HFS_CS_FREESPACE_TRIM: {
2658 int error = 0;
2659 int lockflags = 0;
2660
2661 /* Only root allowed */
2662 if (!kauth_cred_issuser(kauth_cred_get())) {
2663 return EACCES;
2664 }
2665
2666 /*
2667 * This core functionality is similar to hfs_scan_blocks().
2668 * The main difference is that hfs_scan_blocks() is called
2669 * as part of mount where we are assured that the journal is
2670 * empty to start with. This fcntl() can be called on a
2671 * mounted volume, therefore it has to flush the content of
2672 * the journal as well as ensure the state of summary table.
2673 *
2674 * This fcntl scans over the entire allocation bitmap,
2675 * creates list of all the free blocks, and issues TRIM
2676 * down to the underlying device. This can take long time
2677 * as it can generate up to 512MB of read I/O.
2678 */
2679
2680 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
2681 error = hfs_init_summary(hfsmp);
2682 if (error) {
2683 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
2684 return error;
2685 }
2686 }
2687
2688 /*
2689 * The journal maintains list of recently deallocated blocks to
2690 * issue DKIOCUNMAPs when the corresponding journal transaction is
2691 * flushed to the disk. To avoid any race conditions, we only
2692 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2693 * Therefore we make sure that the journal trim list is sync'ed,
2694 * empty, and not modifiable for the duration of our scan.
2695 *
2696 * Take the journal lock before flushing the journal to the disk.
2697 * We will keep on holding the journal lock till we don't get the
2698 * bitmap lock to make sure that no new journal transactions can
2699 * start. This will make sure that the journal trim list is not
2700 * modified after the journal flush and before getting bitmap lock.
2701 * We can release the journal lock after we acquire the bitmap
2702 * lock as it will prevent any further block deallocations.
2703 */
2704 hfs_journal_lock(hfsmp);
2705
2706 /* Flush the journal and wait for all I/Os to finish up */
2707 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
2708 if (error) {
2709 hfs_journal_unlock(hfsmp);
2710 return error;
2711 }
2712
2713 /* Take bitmap lock to ensure it is not being modified */
2714 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2715
2716 /* Release the journal lock */
2717 hfs_journal_unlock(hfsmp);
2718
2719 /*
2720 * ScanUnmapBlocks reads the bitmap in large block size
2721 * (up to 1MB) unlike the runtime which reads the bitmap
2722 * in the 4K block size. This can cause buf_t collisions
2723 * and potential data corruption. To avoid this, we
2724 * invalidate all the existing buffers associated with
2725 * the bitmap vnode before scanning it.
2726 *
2727 * Note: ScanUnmapBlock() cleans up all the buffers
2728 * after itself, so there won't be any large buffers left
2729 * for us to clean up after it returns.
2730 */
2731 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
2732 if (error) {
2733 hfs_systemfile_unlock(hfsmp, lockflags);
2734 return error;
2735 }
2736
2737 /* Traverse bitmap and issue DKIOCUNMAPs */
2738 error = ScanUnmapBlocks(hfsmp);
2739 hfs_systemfile_unlock(hfsmp, lockflags);
2740 if (error) {
2741 return error;
2742 }
2743
2744 break;
2745 }
2746
2747 case HFS_SET_HOTFILE_STATE: {
2748 int error;
2749 struct cnode *cp = VTOC(vp);
2750 uint32_t hf_state = *((uint32_t*)ap->a_data);
2751 uint32_t num_unpinned = 0;
2752
2753 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2754 if (error) {
2755 return error;
2756 }
2757
2758 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2759 if (hf_state == HFS_MARK_FASTDEVCANDIDATE) {
2760 vnode_setfastdevicecandidate(vp);
2761
2762 cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
2763 cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask;
2764 cp->c_flag |= C_MODIFIED;
2765 } else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2766 vnode_clearfastdevicecandidate(vp);
2767 hfs_removehotfile(vp);
2768
2769 if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) {
2770 hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned);
2771 }
2772
2773 if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2774 cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
2775 }
2776 cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask);
2777 cp->c_flag |= C_MODIFIED;
2778
2779 } else {
2780 error = EINVAL;
2781 }
2782
2783 if (num_unpinned != 0) {
2784 lck_mtx_lock(&hfsmp->hfc_mutex);
2785 hfsmp->hfs_hotfile_freeblks += num_unpinned;
2786 lck_mtx_unlock(&hfsmp->hfc_mutex);
2787 }
2788
2789 hfs_unlock(cp);
2790 return error;
2791 }
2792
2793 case HFS_REPIN_HOTFILE_STATE: {
2794 int error=0;
2795 uint32_t repin_what = *((uint32_t*)ap->a_data);
2796
2797 /* Only root allowed */
2798 if (!kauth_cred_issuser(kauth_cred_get())) {
2799 return EACCES;
2800 }
2801
2802 if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) {
2803 // this system is neither regular Fusion or Cooperative Fusion
2804 // so this fsctl makes no sense.
2805 return EINVAL;
2806 }
2807
2808 //
2809 // After a converting a CoreStorage volume to be encrypted, the
2810 // extents could have moved around underneath us. This call
2811 // allows corestoraged to re-pin everything that should be
2812 // pinned (it would happen on the next reboot too but that could
2813 // be a long time away).
2814 //
2815 if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) {
2816 hfs_pin_fs_metadata(hfsmp);
2817 }
2818 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
2819 hfs_repin_hotfiles(hfsmp);
2820 }
2821 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) {
2822 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2823 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2824 }
2825
2826 return error;
2827 }
2828
2829 #if HFS_CONFIG_KEY_ROLL
2830
2831 case HFS_KEY_ROLL: {
2832 if (!kauth_cred_issuser(kauth_cred_get()))
2833 return EACCES;
2834
2835 hfs_key_roll_args_t *args = (hfs_key_roll_args_t *)ap->a_data;
2836
2837 return hfs_key_roll_op(ap->a_context, ap->a_vp, args);
2838 }
2839
2840 case HFS_GET_KEY_AUTO_ROLL: {
2841 if (!kauth_cred_issuser(kauth_cred_get()))
2842 return EACCES;
2843
2844 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2845 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2846 return ENOTSUP;
2847 args->flags = (ISSET(hfsmp->cproot_flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION)
2848 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION : 0);
2849 args->min_key_os_version = hfsmp->hfs_auto_roll_min_key_os_version;
2850 args->max_key_os_version = hfsmp->hfs_auto_roll_max_key_os_version;
2851 break;
2852 }
2853
2854 case HFS_SET_KEY_AUTO_ROLL: {
2855 if (!kauth_cred_issuser(kauth_cred_get()))
2856 return EACCES;
2857
2858 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2859 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2860 return ENOTSUP;
2861 return cp_set_auto_roll(hfsmp, args);
2862 }
2863
2864 #endif // HFS_CONFIG_KEY_ROLL
2865
2866 #if CONFIG_PROTECT
2867 case F_TRANSCODEKEY:
2868 /*
2869 * This API is only supported when called via kernel so
2870 * a_fflag must be set to 1 (it's not possible to get here
2871 * with it set to 1 via fsctl).
2872 */
2873 if (ap->a_fflag != 1)
2874 return ENOTTY;
2875 return cp_vnode_transcode(vp, (cp_key_t *)ap->a_data);
2876
2877 case F_GETPROTECTIONLEVEL:
2878 return cp_get_root_major_vers (vp, (uint32_t *)ap->a_data);
2879
2880 case F_GETDEFAULTPROTLEVEL:
2881 return cp_get_default_level(vp, (uint32_t *)ap->a_data);
2882 #endif // CONFIG_PROTECT
2883
2884 case FIOPINSWAP:
2885 return hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT | HFS_DATALESS_PIN,
2886 NULL);
2887
2888 default:
2889 return (ENOTTY);
2890 }
2891
2892 return 0;
2893 }
2894
2895 /*
2896 * select
2897 */
2898 int
2899 hfs_vnop_select(__unused struct vnop_select_args *ap)
2900 /*
2901 struct vnop_select_args {
2902 vnode_t a_vp;
2903 int a_which;
2904 int a_fflags;
2905 void *a_wql;
2906 vfs_context_t a_context;
2907 };
2908 */
2909 {
2910 /*
2911 * We should really check to see if I/O is possible.
2912 */
2913 return (1);
2914 }
2915
2916 /*
2917 * Converts a logical block number to a physical block, and optionally returns
2918 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2919 * The physical block number is based on the device block size, currently its 512.
2920 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2921 */
2922 int
2923 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
2924 {
2925 struct filefork *fp = VTOF(vp);
2926 struct hfsmount *hfsmp = VTOHFS(vp);
2927 int retval = E_NONE;
2928 u_int32_t logBlockSize;
2929 size_t bytesContAvail = 0;
2930 off_t blockposition;
2931 int lockExtBtree;
2932 int lockflags = 0;
2933
2934 /*
2935 * Check for underlying vnode requests and ensure that logical
2936 * to physical mapping is requested.
2937 */
2938 if (vpp != NULL)
2939 *vpp = hfsmp->hfs_devvp;
2940 if (bnp == NULL)
2941 return (0);
2942
2943 logBlockSize = GetLogicalBlockSize(vp);
2944 blockposition = (off_t)bn * logBlockSize;
2945
2946 lockExtBtree = overflow_extents(fp);
2947
2948 if (lockExtBtree)
2949 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2950
2951 retval = MacToVFSError(
2952 MapFileBlockC (HFSTOVCB(hfsmp),
2953 (FCB*)fp,
2954 MAXPHYSIO,
2955 blockposition,
2956 bnp,
2957 &bytesContAvail));
2958
2959 if (lockExtBtree)
2960 hfs_systemfile_unlock(hfsmp, lockflags);
2961
2962 if (retval == E_NONE) {
2963 /* Figure out how many read ahead blocks there are */
2964 if (runp != NULL) {
2965 if (can_cluster(logBlockSize)) {
2966 /* Make sure this result never goes negative: */
2967 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2968 } else {
2969 *runp = 0;
2970 }
2971 }
2972 }
2973 return (retval);
2974 }
2975
2976 /*
2977 * Convert logical block number to file offset.
2978 */
2979 int
2980 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2981 /*
2982 struct vnop_blktooff_args {
2983 vnode_t a_vp;
2984 daddr64_t a_lblkno;
2985 off_t *a_offset;
2986 };
2987 */
2988 {
2989 if (ap->a_vp == NULL)
2990 return (EINVAL);
2991 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
2992
2993 return(0);
2994 }
2995
2996 /*
2997 * Convert file offset to logical block number.
2998 */
2999 int
3000 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
3001 /*
3002 struct vnop_offtoblk_args {
3003 vnode_t a_vp;
3004 off_t a_offset;
3005 daddr64_t *a_lblkno;
3006 };
3007 */
3008 {
3009 if (ap->a_vp == NULL)
3010 return (EINVAL);
3011 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
3012
3013 return(0);
3014 }
3015
3016 /*
3017 * Map file offset to physical block number.
3018 *
3019 * If this function is called for write operation, and if the file
3020 * had virtual blocks allocated (delayed allocation), real blocks
3021 * are allocated by calling ExtendFileC().
3022 *
3023 * If this function is called for read operation, and if the file
3024 * had virtual blocks allocated (delayed allocation), no change
3025 * to the size of file is done, and if required, rangelist is
3026 * searched for mapping.
3027 *
3028 * System file cnodes are expected to be locked (shared or exclusive).
3029 *
3030 * -- INVALID RANGES --
3031 *
3032 * Invalid ranges are used to keep track of where we have extended a
3033 * file, but have not yet written that data to disk. In the past we
3034 * would clear up the invalid ranges as we wrote to those areas, but
3035 * before data was actually flushed to disk. The problem with that
3036 * approach is that the data can be left in the cache and is therefore
3037 * still not valid on disk. So now we clear up the ranges here, when
3038 * the flags field has VNODE_WRITE set, indicating a write is about to
3039 * occur. This isn't ideal (ideally we want to clear them up when
3040 * know the data has been successfully written), but it's the best we
3041 * can do.
3042 *
3043 * For reads, we use the invalid ranges here in block map to indicate
3044 * to the caller that the data should be zeroed (a_bpn == -1). We
3045 * have to be careful about what ranges we return to the cluster code.
3046 * Currently the cluster code can only handle non-rounded values for
3047 * the EOF; it cannot handle funny sized ranges in the middle of the
3048 * file (the main problem is that it sends down odd sized I/Os to the
3049 * disk). Our code currently works because whilst the very first
3050 * offset and the last offset in the invalid ranges are not aligned,
3051 * gaps in the invalid ranges between the first and last, have to be
3052 * aligned (because we always write page sized blocks). For example,
3053 * consider this arrangement:
3054 *
3055 * +-------------+-----+-------+------+
3056 * | |XXXXX| |XXXXXX|
3057 * +-------------+-----+-------+------+
3058 * a b c d
3059 *
3060 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3061 * are not necessarily aligned, b and c *must* be.
3062 *
3063 * Zero-filling occurs in a number of ways:
3064 *
3065 * 1. When a read occurs and we return with a_bpn == -1.
3066 *
3067 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3068 * which will cause us to iterate over the ranges bringing in
3069 * pages that are not present in the cache and zeroing them. Any
3070 * pages that are already in the cache are left untouched. Note
3071 * that hfs_fsync does not always flush invalid ranges.
3072 *
3073 * 3. When we extend a file we zero out from the old EOF to the end
3074 * of the page. It would be nice if we didn't have to do this if
3075 * the page wasn't present (and could defer it), but because of
3076 * the problem described above, we have to.
3077 *
3078 * The invalid ranges are also used to restrict the size that we write
3079 * out on disk: see hfs_prepare_fork_for_update.
3080 *
3081 * Note that invalid ranges are ignored when neither the VNODE_READ or
3082 * the VNODE_WRITE flag is specified. This is useful for the
3083 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3084 * just want to know whether blocks are physically allocated or not.
3085 */
3086 int
3087 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
3088 /*
3089 struct vnop_blockmap_args {
3090 vnode_t a_vp;
3091 off_t a_foffset;
3092 size_t a_size;
3093 daddr64_t *a_bpn;
3094 size_t *a_run;
3095 void *a_poff;
3096 int a_flags;
3097 vfs_context_t a_context;
3098 };
3099 */
3100 {
3101 struct vnode *vp = ap->a_vp;
3102 struct cnode *cp;
3103 struct filefork *fp;
3104 struct hfsmount *hfsmp;
3105 size_t bytesContAvail = ap->a_size;
3106 int retval = E_NONE;
3107 int syslocks = 0;
3108 int lockflags = 0;
3109 struct rl_entry *invalid_range;
3110 enum rl_overlaptype overlaptype;
3111 int started_tr = 0;
3112 int tooklock = 0;
3113
3114 #if HFS_COMPRESSION
3115 if (VNODE_IS_RSRC(vp)) {
3116 /* allow blockmaps to the resource fork */
3117 } else {
3118 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
3119 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
3120 switch(state) {
3121 case FILE_IS_COMPRESSED:
3122 return ENOTSUP;
3123 case FILE_IS_CONVERTING:
3124 /* if FILE_IS_CONVERTING, we allow blockmap */
3125 break;
3126 default:
3127 printf("invalid state %d for compressed file\n", state);
3128 /* fall through */
3129 }
3130 }
3131 }
3132 #endif /* HFS_COMPRESSION */
3133
3134 /* Do not allow blockmap operation on a directory */
3135 if (vnode_isdir(vp)) {
3136 return (ENOTSUP);
3137 }
3138
3139 /*
3140 * Check for underlying vnode requests and ensure that logical
3141 * to physical mapping is requested.
3142 */
3143 if (ap->a_bpn == NULL)
3144 return (0);
3145
3146 hfsmp = VTOHFS(vp);
3147 cp = VTOC(vp);
3148 fp = VTOF(vp);
3149
3150 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
3151 if (cp->c_lockowner != current_thread()) {
3152 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3153 tooklock = 1;
3154 }
3155
3156 // For reads, check the invalid ranges
3157 if (ISSET(ap->a_flags, VNODE_READ)) {
3158 if (ap->a_foffset >= fp->ff_size) {
3159 retval = ERANGE;
3160 goto exit;
3161 }
3162
3163 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
3164 ap->a_foffset + (off_t)bytesContAvail - 1,
3165 &invalid_range);
3166 switch(overlaptype) {
3167 case RL_MATCHINGOVERLAP:
3168 case RL_OVERLAPCONTAINSRANGE:
3169 case RL_OVERLAPSTARTSBEFORE:
3170 /* There's no valid block for this byte offset */
3171 *ap->a_bpn = (daddr64_t)-1;
3172 /* There's no point limiting the amount to be returned
3173 * if the invalid range that was hit extends all the way
3174 * to the EOF (i.e. there's no valid bytes between the
3175 * end of this range and the file's EOF):
3176 */
3177 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3178 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3179 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3180 }
3181
3182 retval = 0;
3183 goto exit;
3184
3185 case RL_OVERLAPISCONTAINED:
3186 case RL_OVERLAPENDSAFTER:
3187 /* The range of interest hits an invalid block before the end: */
3188 if (invalid_range->rl_start == ap->a_foffset) {
3189 /* There's actually no valid information to be had starting here: */
3190 *ap->a_bpn = (daddr64_t)-1;
3191 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3192 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3193 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3194 }
3195
3196 retval = 0;
3197 goto exit;
3198 } else {
3199 /*
3200 * Sadly, the lower layers don't like us to
3201 * return unaligned ranges, so we skip over
3202 * any invalid ranges here that are less than
3203 * a page: zeroing of those bits is not our
3204 * responsibility (it's dealt with elsewhere).
3205 */
3206 do {
3207 off_t rounded_start = round_page_64(invalid_range->rl_start);
3208 if ((off_t)bytesContAvail < rounded_start - ap->a_foffset)
3209 break;
3210 if (rounded_start < invalid_range->rl_end + 1) {
3211 bytesContAvail = rounded_start - ap->a_foffset;
3212 break;
3213 }
3214 } while ((invalid_range = TAILQ_NEXT(invalid_range,
3215 rl_link)));
3216 }
3217 break;
3218
3219 case RL_NOOVERLAP:
3220 break;
3221 } // switch
3222 }
3223 }
3224
3225 #if CONFIG_PROTECT
3226 if (cp->c_cpentry) {
3227 const int direction = (ISSET(ap->a_flags, VNODE_WRITE)
3228 ? VNODE_WRITE : VNODE_READ);
3229
3230 cp_io_params_t io_params;
3231 cp_io_params(hfsmp, cp->c_cpentry,
3232 off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)),
3233 direction, &io_params);
3234
3235 if (io_params.max_len < (off_t)bytesContAvail)
3236 bytesContAvail = io_params.max_len;
3237
3238 if (io_params.phys_offset != -1) {
3239 *ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset)
3240 / hfsmp->hfs_logical_block_size);
3241
3242 retval = 0;
3243 goto exit;
3244 }
3245 }
3246 #endif
3247
3248 retry:
3249
3250 /* Check virtual blocks only when performing write operation */
3251 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3252 if (hfs_start_transaction(hfsmp) != 0) {
3253 retval = EINVAL;
3254 goto exit;
3255 } else {
3256 started_tr = 1;
3257 }
3258 syslocks = SFL_EXTENTS | SFL_BITMAP;
3259
3260 } else if (overflow_extents(fp)) {
3261 syslocks = SFL_EXTENTS;
3262 }
3263
3264 if (syslocks)
3265 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
3266
3267 /*
3268 * Check for any delayed allocations.
3269 */
3270 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3271 int64_t actbytes;
3272 u_int32_t loanedBlocks;
3273
3274 //
3275 // Make sure we have a transaction. It's possible
3276 // that we came in and fp->ff_unallocblocks was zero
3277 // but during the time we blocked acquiring the extents
3278 // btree, ff_unallocblocks became non-zero and so we
3279 // will need to start a transaction.
3280 //
3281 if (started_tr == 0) {
3282 if (syslocks) {
3283 hfs_systemfile_unlock(hfsmp, lockflags);
3284 syslocks = 0;
3285 }
3286 goto retry;
3287 }
3288
3289 /*
3290 * Note: ExtendFileC will Release any blocks on loan and
3291 * aquire real blocks. So we ask to extend by zero bytes
3292 * since ExtendFileC will account for the virtual blocks.
3293 */
3294
3295 loanedBlocks = fp->ff_unallocblocks;
3296 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
3297 kEFAllMask | kEFNoClumpMask, &actbytes);
3298
3299 if (retval) {
3300 fp->ff_unallocblocks = loanedBlocks;
3301 cp->c_blocks += loanedBlocks;
3302 fp->ff_blocks += loanedBlocks;
3303
3304 hfs_lock_mount (hfsmp);
3305 hfsmp->loanedBlocks += loanedBlocks;
3306 hfs_unlock_mount (hfsmp);
3307
3308 hfs_systemfile_unlock(hfsmp, lockflags);
3309 cp->c_flag |= C_MODIFIED;
3310 if (started_tr) {
3311 (void) hfs_update(vp, 0);
3312 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3313
3314 hfs_end_transaction(hfsmp);
3315 started_tr = 0;
3316 }
3317 goto exit;
3318 }
3319 }
3320
3321 retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
3322 ap->a_bpn, &bytesContAvail);
3323 if (syslocks) {
3324 hfs_systemfile_unlock(hfsmp, lockflags);
3325 syslocks = 0;
3326 }
3327
3328 if (retval) {
3329 /* On write, always return error because virtual blocks, if any,
3330 * should have been allocated in ExtendFileC(). We do not
3331 * allocate virtual blocks on read, therefore return error
3332 * only if no virtual blocks are allocated. Otherwise we search
3333 * rangelist for zero-fills
3334 */
3335 if ((MacToVFSError(retval) != ERANGE) ||
3336 (ap->a_flags & VNODE_WRITE) ||
3337 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
3338 goto exit;
3339 }
3340
3341 /* Validate if the start offset is within logical file size */
3342 if (ap->a_foffset >= fp->ff_size) {
3343 goto exit;
3344 }
3345
3346 /*
3347 * At this point, we have encountered a failure during
3348 * MapFileBlockC that resulted in ERANGE, and we are not
3349 * servicing a write, and there are borrowed blocks.
3350 *
3351 * However, the cluster layer will not call blockmap for
3352 * blocks that are borrowed and in-cache. We have to assume
3353 * that because we observed ERANGE being emitted from
3354 * MapFileBlockC, this extent range is not valid on-disk. So
3355 * we treat this as a mapping that needs to be zero-filled
3356 * prior to reading.
3357 */
3358
3359 if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail)
3360 bytesContAvail = fp->ff_size - ap->a_foffset;
3361
3362 *ap->a_bpn = (daddr64_t) -1;
3363 retval = 0;
3364
3365 goto exit;
3366 }
3367
3368 exit:
3369 if (retval == 0) {
3370 if (ISSET(ap->a_flags, VNODE_WRITE)) {
3371 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
3372
3373 // See if we might be overlapping invalid ranges...
3374 if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) {
3375 /*
3376 * Mark the file as needing an update if we think the
3377 * on-disk EOF has changed.
3378 */
3379 if (ap->a_foffset <= r->rl_start)
3380 SET(cp->c_flag, C_MODIFIED);
3381
3382 /*
3383 * This isn't the ideal place to put this. Ideally, we
3384 * should do something *after* we have successfully
3385 * written to the range, but that's difficult to do
3386 * because we cannot take locks in the callback. At
3387 * present, the cluster code will call us with VNODE_WRITE
3388 * set just before it's about to write the data so we know
3389 * that data is about to be written. If we get an I/O
3390 * error at this point then chances are the metadata
3391 * update to follow will also have an I/O error so the
3392 * risk here is small.
3393 */
3394 rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1,
3395 &fp->ff_invalidranges);
3396
3397 if (!TAILQ_FIRST(&fp->ff_invalidranges)) {
3398 cp->c_flag &= ~C_ZFWANTSYNC;
3399 cp->c_zftimeout = 0;
3400 }
3401 }
3402 }
3403
3404 if (ap->a_run)
3405 *ap->a_run = bytesContAvail;
3406
3407 if (ap->a_poff)
3408 *(int *)ap->a_poff = 0;
3409 }
3410
3411 if (started_tr) {
3412 hfs_update(vp, TRUE);
3413 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3414 hfs_end_transaction(hfsmp);
3415 started_tr = 0;
3416 }
3417
3418 if (tooklock)
3419 hfs_unlock(cp);
3420
3421 return (MacToVFSError(retval));
3422 }
3423
3424 /*
3425 * prepare and issue the I/O
3426 * buf_strategy knows how to deal
3427 * with requests that require
3428 * fragmented I/Os
3429 */
3430 int
3431 hfs_vnop_strategy(struct vnop_strategy_args *ap)
3432 {
3433 buf_t bp = ap->a_bp;
3434 vnode_t vp = buf_vnode(bp);
3435 int error = 0;
3436
3437 /* Mark buffer as containing static data if cnode flag set */
3438 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
3439 buf_markstatic(bp);
3440 }
3441
3442 /* Mark buffer as containing static data if cnode flag set */
3443 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
3444 bufattr_markgreedymode(buf_attr(bp));
3445 }
3446
3447 /* mark buffer as containing burst mode data if cnode flag set */
3448 if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
3449 bufattr_markisochronous(buf_attr(bp));
3450 }
3451
3452 #if CONFIG_PROTECT
3453 error = cp_handle_strategy(bp);
3454
3455 if (error)
3456 return error;
3457 #endif
3458
3459 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
3460
3461 return error;
3462 }
3463
3464 int
3465 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
3466 {
3467 register struct cnode *cp = VTOC(vp);
3468 struct filefork *fp = VTOF(vp);
3469 kauth_cred_t cred = vfs_context_ucred(context);
3470 int retval;
3471 off_t bytesToAdd;
3472 off_t actualBytesAdded;
3473 off_t filebytes;
3474 u_int32_t fileblocks;
3475 int blksize;
3476 struct hfsmount *hfsmp;
3477 int lockflags;
3478 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
3479
3480 blksize = VTOVCB(vp)->blockSize;
3481 fileblocks = fp->ff_blocks;
3482 filebytes = (off_t)fileblocks * (off_t)blksize;
3483
3484 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
3485 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3486
3487 if (length < 0)
3488 return (EINVAL);
3489
3490 /* This should only happen with a corrupt filesystem */
3491 if ((off_t)fp->ff_size < 0)
3492 return (EINVAL);
3493
3494 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3495 return (EFBIG);
3496
3497 hfsmp = VTOHFS(vp);
3498
3499 retval = E_NONE;
3500
3501 /* Files that are changing size are not hot file candidates. */
3502 if (hfsmp->hfc_stage == HFC_RECORDING) {
3503 fp->ff_bytesread = 0;
3504 }
3505
3506 /*
3507 * We cannot just check if fp->ff_size == length (as an optimization)
3508 * since there may be extra physical blocks that also need truncation.
3509 */
3510 #if QUOTA
3511 if ((retval = hfs_getinoquota(cp)))
3512 return(retval);
3513 #endif /* QUOTA */
3514
3515 /*
3516 * Lengthen the size of the file. We must ensure that the
3517 * last byte of the file is allocated. Since the smallest
3518 * value of ff_size is 0, length will be at least 1.
3519 */
3520 if (length > (off_t)fp->ff_size) {
3521 #if QUOTA
3522 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
3523 cred, 0);
3524 if (retval)
3525 goto Err_Exit;
3526 #endif /* QUOTA */
3527 /*
3528 * If we don't have enough physical space then
3529 * we need to extend the physical size.
3530 */
3531 if (length > filebytes) {
3532 int eflags;
3533 u_int32_t blockHint = 0;
3534
3535 /* All or nothing and don't round up to clumpsize. */
3536 eflags = kEFAllMask | kEFNoClumpMask;
3537
3538 if (cred && (suser(cred, NULL) != 0)) {
3539 eflags |= kEFReserveMask; /* keep a reserve */
3540 }
3541
3542 /*
3543 * Allocate Journal and Quota files in metadata zone.
3544 */
3545 if (filebytes == 0 &&
3546 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3547 hfs_virtualmetafile(cp)) {
3548 eflags |= kEFMetadataMask;
3549 blockHint = hfsmp->hfs_metazone_start;
3550 }
3551 if (hfs_start_transaction(hfsmp) != 0) {
3552 retval = EINVAL;
3553 goto Err_Exit;
3554 }
3555
3556 /* Protect extents b-tree and allocation bitmap */
3557 lockflags = SFL_BITMAP;
3558 if (overflow_extents(fp))
3559 lockflags |= SFL_EXTENTS;
3560 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3561
3562 /*
3563 * Keep growing the file as long as the current EOF is
3564 * less than the desired value.
3565 */
3566 while ((length > filebytes) && (retval == E_NONE)) {
3567 bytesToAdd = length - filebytes;
3568 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3569 (FCB*)fp,
3570 bytesToAdd,
3571 blockHint,
3572 eflags,
3573 &actualBytesAdded));
3574
3575 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3576 if (actualBytesAdded == 0 && retval == E_NONE) {
3577 if (length > filebytes)
3578 length = filebytes;
3579 break;
3580 }
3581 } /* endwhile */
3582
3583 hfs_systemfile_unlock(hfsmp, lockflags);
3584
3585 if (hfsmp->jnl) {
3586 hfs_update(vp, 0);
3587 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3588 }
3589
3590 hfs_end_transaction(hfsmp);
3591
3592 if (retval)
3593 goto Err_Exit;
3594
3595 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3596 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3597 }
3598
3599 if (ISSET(flags, IO_NOZEROFILL)) {
3600 // An optimisation for the hibernation file
3601 if (vnode_isswap(vp))
3602 rl_remove_all(&fp->ff_invalidranges);
3603 } else {
3604 if (!vnode_issystem(vp) && retval == E_NONE) {
3605 if (length > (off_t)fp->ff_size) {
3606 struct timeval tv;
3607
3608 /* Extending the file: time to fill out the current last page w. zeroes? */
3609 if (fp->ff_size & PAGE_MASK_64) {
3610 /* There might be some valid data at the start of the (current) last page
3611 of the file, so zero out the remainder of that page to ensure the
3612 entire page contains valid data. */
3613 hfs_unlock(cp);
3614 retval = hfs_zero_eof_page(vp, length);
3615 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3616 if (retval) goto Err_Exit;
3617 }
3618 microuptime(&tv);
3619 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
3620 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3621 }
3622 } else {
3623 panic("hfs_truncate: invoked on non-UBC object?!");
3624 };
3625 }
3626 if (suppress_times == 0) {
3627 cp->c_touch_modtime = TRUE;
3628 }
3629 fp->ff_size = length;
3630
3631 } else { /* Shorten the size of the file */
3632
3633 // An optimisation for the hibernation file
3634 if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
3635 rl_remove_all(&fp->ff_invalidranges);
3636 } else if ((off_t)fp->ff_size > length) {
3637 /* Any space previously marked as invalid is now irrelevant: */
3638 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3639 }
3640
3641 /*
3642 * Account for any unmapped blocks. Note that the new
3643 * file length can still end up with unmapped blocks.
3644 */
3645 if (fp->ff_unallocblocks > 0) {
3646 u_int32_t finalblks;
3647 u_int32_t loanedBlocks;
3648
3649 hfs_lock_mount(hfsmp);
3650 loanedBlocks = fp->ff_unallocblocks;
3651 cp->c_blocks -= loanedBlocks;
3652 fp->ff_blocks -= loanedBlocks;
3653 fp->ff_unallocblocks = 0;
3654
3655 hfsmp->loanedBlocks -= loanedBlocks;
3656
3657 finalblks = (length + blksize - 1) / blksize;
3658 if (finalblks > fp->ff_blocks) {
3659 /* calculate required unmapped blocks */
3660 loanedBlocks = finalblks - fp->ff_blocks;
3661 hfsmp->loanedBlocks += loanedBlocks;
3662
3663 fp->ff_unallocblocks = loanedBlocks;
3664 cp->c_blocks += loanedBlocks;
3665 fp->ff_blocks += loanedBlocks;
3666 }
3667 hfs_unlock_mount (hfsmp);
3668 }
3669
3670 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3671 if (hfs_start_transaction(hfsmp) != 0) {
3672 retval = EINVAL;
3673 goto Err_Exit;
3674 }
3675
3676 if (fp->ff_unallocblocks == 0) {
3677 /* Protect extents b-tree and allocation bitmap */
3678 lockflags = SFL_BITMAP;
3679 if (overflow_extents(fp))
3680 lockflags |= SFL_EXTENTS;
3681 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3682
3683 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3684 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
3685
3686 hfs_systemfile_unlock(hfsmp, lockflags);
3687 }
3688 if (hfsmp->jnl) {
3689 if (retval == 0) {
3690 fp->ff_size = length;
3691 }
3692 hfs_update(vp, 0);
3693 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3694 }
3695 hfs_end_transaction(hfsmp);
3696
3697 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3698 if (retval)
3699 goto Err_Exit;
3700 #if QUOTA
3701 /* These are bytesreleased */
3702 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3703 #endif /* QUOTA */
3704
3705 //
3706 // Unlike when growing a file, we adjust the hotfile block count here
3707 // instead of deeper down in the block allocation code because we do
3708 // not necessarily have a vnode or "fcb" at the time we're deleting
3709 // the file and so we wouldn't know if it was hotfile cached or not
3710 //
3711 hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize));
3712
3713
3714 /*
3715 * Only set update flag if the logical length changes & we aren't
3716 * suppressing modtime updates.
3717 */
3718 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
3719 cp->c_touch_modtime = TRUE;
3720 }
3721 fp->ff_size = length;
3722 }
3723 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3724 if (!vfs_context_issuser(context))
3725 cp->c_mode &= ~(S_ISUID | S_ISGID);
3726 }
3727 cp->c_flag |= C_MODIFIED;
3728 cp->c_touch_chgtime = TRUE; /* status changed */
3729 if (suppress_times == 0) {
3730 cp->c_touch_modtime = TRUE; /* file data was modified */
3731
3732 /*
3733 * If we are not suppressing the modtime update, then
3734 * update the gen count as well.
3735 */
3736 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3737 hfs_incr_gencount(cp);
3738 }
3739 }
3740
3741 retval = hfs_update(vp, 0);
3742 if (retval) {
3743 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3744 -1, -1, -1, retval, 0);
3745 }
3746
3747 Err_Exit:
3748
3749 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
3750 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
3751
3752 return (retval);
3753 }
3754
3755 /*
3756 * Preparation which must be done prior to deleting the catalog record
3757 * of a file or directory. In order to make the on-disk as safe as possible,
3758 * we remove the catalog entry before releasing the bitmap blocks and the
3759 * overflow extent records. However, some work must be done prior to deleting
3760 * the catalog record.
3761 *
3762 * When calling this function, the cnode must exist both in memory and on-disk.
3763 * If there are both resource fork and data fork vnodes, this function should
3764 * be called on both.
3765 */
3766
3767 int
3768 hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3769
3770 struct filefork *fp = VTOF(vp);
3771 struct cnode *cp = VTOC(vp);
3772 #if QUOTA
3773 int retval = 0;
3774 #endif /* QUOTA */
3775
3776 /* Cannot truncate an HFS directory! */
3777 if (vnode_isdir(vp)) {
3778 return (EISDIR);
3779 }
3780
3781 /*
3782 * See the comment below in hfs_truncate for why we need to call
3783 * setsize here. Essentially we want to avoid pending IO if we
3784 * already know that the blocks are going to be released here.
3785 * This function is only called when totally removing all storage for a file, so
3786 * we can take a shortcut and immediately setsize (0);
3787 */
3788 ubc_setsize(vp, 0);
3789
3790 /* This should only happen with a corrupt filesystem */
3791 if ((off_t)fp->ff_size < 0)
3792 return (EINVAL);
3793
3794 /*
3795 * We cannot just check if fp->ff_size == length (as an optimization)
3796 * since there may be extra physical blocks that also need truncation.
3797 */
3798 #if QUOTA
3799 if ((retval = hfs_getinoquota(cp))) {
3800 return(retval);
3801 }
3802 #endif /* QUOTA */
3803
3804 /* Wipe out any invalid ranges which have yet to be backed by disk */
3805 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3806
3807 /*
3808 * Account for any unmapped blocks. Since we're deleting the
3809 * entire file, we don't have to worry about just shrinking
3810 * to a smaller number of borrowed blocks.
3811 */
3812 if (fp->ff_unallocblocks > 0) {
3813 u_int32_t loanedBlocks;
3814
3815 hfs_lock_mount (hfsmp);
3816 loanedBlocks = fp->ff_unallocblocks;
3817 cp->c_blocks -= loanedBlocks;
3818 fp->ff_blocks -= loanedBlocks;
3819 fp->ff_unallocblocks = 0;
3820
3821 hfsmp->loanedBlocks -= loanedBlocks;
3822
3823 hfs_unlock_mount (hfsmp);
3824 }
3825
3826 return 0;
3827 }
3828
3829
3830 /*
3831 * Special wrapper around calling TruncateFileC. This function is useable
3832 * even when the catalog record does not exist any longer, making it ideal
3833 * for use when deleting a file. The simplification here is that we know
3834 * that we are releasing all blocks.
3835 *
3836 * Note that this function may be called when there is no vnode backing
3837 * the file fork in question. We may call this from hfs_vnop_inactive
3838 * to clear out resource fork data (and may not want to clear out the data
3839 * fork yet). As a result, we pointer-check both sets of inputs before
3840 * doing anything with them.
3841 *
3842 * The caller is responsible for saving off a copy of the filefork(s)
3843 * embedded within the cnode prior to calling this function. The pointers
3844 * supplied as arguments must be valid even if the cnode is no longer valid.
3845 */
3846
3847 int
3848 hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3849 struct filefork *rsrcfork, u_int32_t fileid) {
3850
3851 off_t filebytes;
3852 u_int32_t fileblocks;
3853 int blksize = 0;
3854 int error = 0;
3855 int lockflags;
3856
3857 blksize = hfsmp->blockSize;
3858
3859 /* Data Fork */
3860 if (datafork) {
3861 off_t prev_filebytes;
3862
3863 datafork->ff_size = 0;
3864
3865 fileblocks = datafork->ff_blocks;
3866 filebytes = (off_t)fileblocks * (off_t)blksize;
3867 prev_filebytes = filebytes;
3868
3869 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3870
3871 while (filebytes > 0) {
3872 if (filebytes > HFS_BIGFILE_SIZE) {
3873 filebytes -= HFS_BIGFILE_SIZE;
3874 } else {
3875 filebytes = 0;
3876 }
3877
3878 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3879 if (hfs_start_transaction(hfsmp) != 0) {
3880 error = EINVAL;
3881 break;
3882 }
3883
3884 if (datafork->ff_unallocblocks == 0) {
3885 /* Protect extents b-tree and allocation bitmap */
3886 lockflags = SFL_BITMAP;
3887 if (overflow_extents(datafork))
3888 lockflags |= SFL_EXTENTS;
3889 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3890
3891 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
3892
3893 hfs_systemfile_unlock(hfsmp, lockflags);
3894 }
3895 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3896
3897 struct cnode *cp = datafork ? FTOC(datafork) : NULL;
3898 struct vnode *vp;
3899 vp = cp ? CTOV(cp, 0) : NULL;
3900 hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize));
3901 prev_filebytes = filebytes;
3902
3903 /* Finish the transaction and start over if necessary */
3904 hfs_end_transaction(hfsmp);
3905
3906 if (error) {
3907 break;
3908 }
3909 }
3910 }
3911
3912 /* Resource fork */
3913 if (error == 0 && rsrcfork) {
3914 rsrcfork->ff_size = 0;
3915
3916 fileblocks = rsrcfork->ff_blocks;
3917 filebytes = (off_t)fileblocks * (off_t)blksize;
3918
3919 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3920
3921 while (filebytes > 0) {
3922 if (filebytes > HFS_BIGFILE_SIZE) {
3923 filebytes -= HFS_BIGFILE_SIZE;
3924 } else {
3925 filebytes = 0;
3926 }
3927
3928 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3929 if (hfs_start_transaction(hfsmp) != 0) {
3930 error = EINVAL;
3931 break;
3932 }
3933
3934 if (rsrcfork->ff_unallocblocks == 0) {
3935 /* Protect extents b-tree and allocation bitmap */
3936 lockflags = SFL_BITMAP;
3937 if (overflow_extents(rsrcfork))
3938 lockflags |= SFL_EXTENTS;
3939 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3940
3941 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
3942
3943 hfs_systemfile_unlock(hfsmp, lockflags);
3944 }
3945 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3946
3947 /* Finish the transaction and start over if necessary */
3948 hfs_end_transaction(hfsmp);
3949
3950 if (error) {
3951 break;
3952 }
3953 }
3954 }
3955
3956 return error;
3957 }
3958
3959 errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
3960 {
3961 errno_t error;
3962
3963 /*
3964 * Call ubc_setsize to give the VM subsystem a chance to do
3965 * whatever it needs to with existing pages before we delete
3966 * blocks. Note that symlinks don't use the UBC so we'll
3967 * get back ENOENT in that case.
3968 */
3969 if (have_cnode_lock) {
3970 error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
3971 if (error == EAGAIN) {
3972 cnode_t *cp = VTOC(vp);
3973
3974 if (cp->c_truncatelockowner != current_thread())
3975 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
3976
3977 hfs_unlock(cp);
3978 error = ubc_setsize_ex(vp, len, 0);
3979 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
3980 }
3981 } else
3982 error = ubc_setsize_ex(vp, len, 0);
3983
3984 return error == ENOENT ? 0 : error;
3985 }
3986
3987 /*
3988 * Truncate a cnode to at most length size, freeing (or adding) the
3989 * disk blocks.
3990 */
3991 int
3992 hfs_truncate(struct vnode *vp, off_t length, int flags,
3993 int truncateflags, vfs_context_t context)
3994 {
3995 struct filefork *fp = VTOF(vp);
3996 off_t filebytes;
3997 u_int32_t fileblocks;
3998 int blksize;
3999 errno_t error = 0;
4000 struct cnode *cp = VTOC(vp);
4001 hfsmount_t *hfsmp = VTOHFS(vp);
4002
4003 /* Cannot truncate an HFS directory! */
4004 if (vnode_isdir(vp)) {
4005 return (EISDIR);
4006 }
4007 /* A swap file cannot change size. */
4008 if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
4009 return (EPERM);
4010 }
4011
4012 blksize = hfsmp->blockSize;
4013 fileblocks = fp->ff_blocks;
4014 filebytes = (off_t)fileblocks * (off_t)blksize;
4015
4016 bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
4017
4018 error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
4019 if (error)
4020 return error;
4021
4022 if (!caller_has_cnode_lock) {
4023 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4024 if (error)
4025 return error;
4026 }
4027
4028 if (vnode_islnk(vp) && cp->c_datafork->ff_symlinkptr) {
4029 hfs_free(cp->c_datafork->ff_symlinkptr, cp->c_datafork->ff_size);
4030 cp->c_datafork->ff_symlinkptr = NULL;
4031 }
4032
4033 // have to loop truncating or growing files that are
4034 // really big because otherwise transactions can get
4035 // enormous and consume too many kernel resources.
4036
4037 if (length < filebytes) {
4038 while (filebytes > length) {
4039 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
4040 filebytes -= HFS_BIGFILE_SIZE;
4041 } else {
4042 filebytes = length;
4043 }
4044 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4045 if (error)
4046 break;
4047 }
4048 } else if (length > filebytes) {
4049 kauth_cred_t cred = vfs_context_ucred(context);
4050 const bool keep_reserve = cred && suser(cred, NULL) != 0;
4051
4052 if (hfs_freeblks(hfsmp, keep_reserve)
4053 < howmany(length - filebytes, blksize)) {
4054 error = ENOSPC;
4055 } else {
4056 while (filebytes < length) {
4057 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
4058 filebytes += HFS_BIGFILE_SIZE;
4059 } else {
4060 filebytes = length;
4061 }
4062 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4063 if (error)
4064 break;
4065 }
4066 }
4067 } else /* Same logical size */ {
4068
4069 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
4070 }
4071 /* Files that are changing size are not hot file candidates. */
4072 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4073 fp->ff_bytesread = 0;
4074 }
4075
4076 #if HFS_CONFIG_KEY_ROLL
4077 if (!error && cp->c_truncatelockowner == current_thread()) {
4078 hfs_key_roll_check(cp, true);
4079 }
4080 #endif
4081
4082 if (!caller_has_cnode_lock)
4083 hfs_unlock(cp);
4084
4085 // Make sure UBC's size matches up (in case we didn't completely succeed)
4086 errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
4087 if (!error)
4088 error = err2;
4089
4090 return error;
4091 }
4092
4093
4094 /*
4095 * Preallocate file storage space.
4096 */
4097 int
4098 hfs_vnop_allocate(struct vnop_allocate_args /* {
4099 vnode_t a_vp;
4100 off_t a_length;
4101 u_int32_t a_flags;
4102 off_t *a_bytesallocated;
4103 off_t a_offset;
4104 vfs_context_t a_context;
4105 } */ *ap)
4106 {
4107 struct vnode *vp = ap->a_vp;
4108 struct cnode *cp;
4109 struct filefork *fp;
4110 ExtendedVCB *vcb;
4111 off_t length = ap->a_length;
4112 off_t startingPEOF;
4113 off_t moreBytesRequested;
4114 off_t actualBytesAdded;
4115 off_t filebytes;
4116 u_int32_t fileblocks;
4117 int retval, retval2;
4118 u_int32_t blockHint;
4119 u_int32_t extendFlags; /* For call to ExtendFileC */
4120 struct hfsmount *hfsmp;
4121 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
4122 int lockflags;
4123 time_t orig_ctime;
4124
4125 *(ap->a_bytesallocated) = 0;
4126
4127 if (!vnode_isreg(vp))
4128 return (EISDIR);
4129 if (length < (off_t)0)
4130 return (EINVAL);
4131
4132 cp = VTOC(vp);
4133
4134 orig_ctime = VTOC(vp)->c_ctime;
4135
4136 nspace_snapshot_event(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
4137
4138 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4139
4140 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4141 goto Err_Exit;
4142 }
4143
4144 fp = VTOF(vp);
4145 hfsmp = VTOHFS(vp);
4146 vcb = VTOVCB(vp);
4147
4148 fileblocks = fp->ff_blocks;
4149 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
4150
4151 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
4152 retval = EINVAL;
4153 goto Err_Exit;
4154 }
4155
4156 /* Fill in the flags word for the call to Extend the file */
4157
4158 extendFlags = kEFNoClumpMask;
4159 if (ap->a_flags & ALLOCATECONTIG)
4160 extendFlags |= kEFContigMask;
4161 if (ap->a_flags & ALLOCATEALL)
4162 extendFlags |= kEFAllMask;
4163 if (cred && suser(cred, NULL) != 0)
4164 extendFlags |= kEFReserveMask;
4165 if (hfs_virtualmetafile(cp))
4166 extendFlags |= kEFMetadataMask;
4167
4168 retval = E_NONE;
4169 blockHint = 0;
4170 startingPEOF = filebytes;
4171
4172 if (ap->a_flags & ALLOCATEFROMPEOF)
4173 length += filebytes;
4174 else if (ap->a_flags & ALLOCATEFROMVOL)
4175 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
4176
4177 /* If no changes are necesary, then we're done */
4178 if (filebytes == length)
4179 goto Std_Exit;
4180
4181 /*
4182 * Lengthen the size of the file. We must ensure that the
4183 * last byte of the file is allocated. Since the smallest
4184 * value of filebytes is 0, length will be at least 1.
4185 */
4186 if (length > filebytes) {
4187 if (ISSET(extendFlags, kEFAllMask)
4188 && (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask))
4189 < howmany(length - filebytes, hfsmp->blockSize))) {
4190 retval = ENOSPC;
4191 goto Err_Exit;
4192 }
4193
4194 off_t total_bytes_added = 0, orig_request_size;
4195
4196 orig_request_size = moreBytesRequested = length - filebytes;
4197
4198 #if QUOTA
4199 retval = hfs_chkdq(cp,
4200 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
4201 cred, 0);
4202 if (retval)
4203 goto Err_Exit;
4204
4205 #endif /* QUOTA */
4206 /*
4207 * Metadata zone checks.
4208 */
4209 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
4210 /*
4211 * Allocate Journal and Quota files in metadata zone.
4212 */
4213 if (hfs_virtualmetafile(cp)) {
4214 blockHint = hfsmp->hfs_metazone_start;
4215 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
4216 (blockHint <= hfsmp->hfs_metazone_end)) {
4217 /*
4218 * Move blockHint outside metadata zone.
4219 */
4220 blockHint = hfsmp->hfs_metazone_end + 1;
4221 }
4222 }
4223
4224
4225 while ((length > filebytes) && (retval == E_NONE)) {
4226 off_t bytesRequested;
4227
4228 if (hfs_start_transaction(hfsmp) != 0) {
4229 retval = EINVAL;
4230 goto Err_Exit;
4231 }
4232
4233 /* Protect extents b-tree and allocation bitmap */
4234 lockflags = SFL_BITMAP;
4235 if (overflow_extents(fp))
4236 lockflags |= SFL_EXTENTS;
4237 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4238
4239 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
4240 bytesRequested = HFS_BIGFILE_SIZE;
4241 } else {
4242 bytesRequested = moreBytesRequested;
4243 }
4244
4245 if (extendFlags & kEFContigMask) {
4246 // if we're on a sparse device, this will force it to do a
4247 // full scan to find the space needed.
4248 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
4249 }
4250
4251 retval = MacToVFSError(ExtendFileC(vcb,
4252 (FCB*)fp,
4253 bytesRequested,
4254 blockHint,
4255 extendFlags,
4256 &actualBytesAdded));
4257
4258 if (retval == E_NONE) {
4259 *(ap->a_bytesallocated) += actualBytesAdded;
4260 total_bytes_added += actualBytesAdded;
4261 moreBytesRequested -= actualBytesAdded;
4262 if (blockHint != 0) {
4263 blockHint += actualBytesAdded / vcb->blockSize;
4264 }
4265 }
4266 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4267
4268 hfs_systemfile_unlock(hfsmp, lockflags);
4269
4270 if (hfsmp->jnl) {
4271 (void) hfs_update(vp, 0);
4272 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4273 }
4274
4275 hfs_end_transaction(hfsmp);
4276 }
4277
4278
4279 /*
4280 * if we get an error and no changes were made then exit
4281 * otherwise we must do the hfs_update to reflect the changes
4282 */
4283 if (retval && (startingPEOF == filebytes))
4284 goto Err_Exit;
4285
4286 /*
4287 * Adjust actualBytesAdded to be allocation block aligned, not
4288 * clump size aligned.
4289 * NOTE: So what we are reporting does not affect reality
4290 * until the file is closed, when we truncate the file to allocation
4291 * block size.
4292 */
4293 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
4294 *(ap->a_bytesallocated) =
4295 roundup(orig_request_size, (off_t)vcb->blockSize);
4296
4297 } else { /* Shorten the size of the file */
4298
4299 /*
4300 * N.B. At present, this code is never called. If and when we
4301 * do start using it, it looks like there might be slightly
4302 * strange semantics with the file size: it's possible for the
4303 * file size to *increase* e.g. if current file size is 5,
4304 * length is 1024 and filebytes is 4096, the file size will
4305 * end up being 1024 bytes. This isn't necessarily a problem
4306 * but it's not consistent with the code above which doesn't
4307 * change the file size.
4308 */
4309
4310 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
4311 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4312
4313 /*
4314 * if we get an error and no changes were made then exit
4315 * otherwise we must do the hfs_update to reflect the changes
4316 */
4317 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
4318 #if QUOTA
4319 /* These are bytesreleased */
4320 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
4321 #endif /* QUOTA */
4322
4323 if (fp->ff_size > filebytes) {
4324 fp->ff_size = filebytes;
4325
4326 hfs_ubc_setsize(vp, fp->ff_size, true);
4327 }
4328 }
4329
4330 Std_Exit:
4331 cp->c_flag |= C_MODIFIED;
4332 cp->c_touch_chgtime = TRUE;
4333 cp->c_touch_modtime = TRUE;
4334 retval2 = hfs_update(vp, 0);
4335
4336 if (retval == 0)
4337 retval = retval2;
4338 Err_Exit:
4339 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4340 hfs_unlock(cp);
4341 return (retval);
4342 }
4343
4344
4345 /*
4346 * Pagein for HFS filesystem
4347 */
4348 int
4349 hfs_vnop_pagein(struct vnop_pagein_args *ap)
4350 /*
4351 struct vnop_pagein_args {
4352 vnode_t a_vp,
4353 upl_t a_pl,
4354 vm_offset_t a_pl_offset,
4355 off_t a_f_offset,
4356 size_t a_size,
4357 int a_flags
4358 vfs_context_t a_context;
4359 };
4360 */
4361 {
4362 vnode_t vp;
4363 struct cnode *cp;
4364 struct filefork *fp;
4365 int error = 0;
4366 upl_t upl;
4367 upl_page_info_t *pl;
4368 off_t f_offset;
4369 off_t page_needed_f_offset;
4370 int offset;
4371 int isize;
4372 int upl_size;
4373 int pg_index;
4374 boolean_t truncate_lock_held = FALSE;
4375 boolean_t file_converted = FALSE;
4376 kern_return_t kret;
4377
4378 vp = ap->a_vp;
4379 cp = VTOC(vp);
4380 fp = VTOF(vp);
4381
4382 #if CONFIG_PROTECT
4383 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
4384 /*
4385 * If we errored here, then this means that one of two things occurred:
4386 * 1. there was a problem with the decryption of the key.
4387 * 2. the device is locked and we are not allowed to access this particular file.
4388 *
4389 * Either way, this means that we need to shut down this upl now. As long as
4390 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4391 * then we create a upl and immediately abort it.
4392 */
4393 if (ap->a_pl == NULL) {
4394 /* create the upl */
4395 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
4396 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4397 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4398 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
4399
4400 /* Abort the range */
4401 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4402 }
4403
4404
4405 return error;
4406 }
4407 #endif /* CONFIG_PROTECT */
4408
4409 if (ap->a_pl != NULL) {
4410 /*
4411 * this can only happen for swap files now that
4412 * we're asking for V2 paging behavior...
4413 * so don't need to worry about decompression, or
4414 * keeping track of blocks read or taking the truncate lock
4415 */
4416 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
4417 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
4418 goto pagein_done;
4419 }
4420
4421 page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
4422
4423 retry_pagein:
4424 /*
4425 * take truncate lock (shared/recursive) to guard against
4426 * zero-fill thru fsync interfering, but only for v2
4427 *
4428 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4429 * lock shared and we are allowed to recurse 1 level if this thread already
4430 * owns the lock exclusively... this can legally occur
4431 * if we are doing a shrinking ftruncate against a file
4432 * that is mapped private, and the pages being truncated
4433 * do not currently exist in the cache... in that case
4434 * we will have to page-in the missing pages in order
4435 * to provide them to the private mapping... we must
4436 * also call hfs_unlock_truncate with a postive been_recursed
4437 * arg to indicate that if we have recursed, there is no need to drop
4438 * the lock. Allowing this simple recursion is necessary
4439 * in order to avoid a certain deadlock... since the ftruncate
4440 * already holds the truncate lock exclusively, if we try
4441 * to acquire it shared to protect the pagein path, we will
4442 * hang this thread
4443 *
4444 * NOTE: The if () block below is a workaround in order to prevent a
4445 * VM deadlock. See rdar://7853471.
4446 *
4447 * If we are in a forced unmount, then launchd will still have the
4448 * dyld_shared_cache file mapped as it is trying to reboot. If we
4449 * take the truncate lock here to service a page fault, then our
4450 * thread could deadlock with the forced-unmount. The forced unmount
4451 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4452 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4453 * thread will think it needs to copy all of the data out of the file
4454 * and into a VM copy object. If we hold the cnode lock here, then that
4455 * VM operation will not be able to proceed, because we'll set a busy page
4456 * before attempting to grab the lock. Note that this isn't as simple as "don't
4457 * call ubc_setsize" because doing that would just shift the problem to the
4458 * ubc_msync done before the vnode is reclaimed.
4459 *
4460 * So, if a forced unmount on this volume is in flight AND the cnode is
4461 * marked C_DELETED, then just go ahead and do the page in without taking
4462 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4463 * that is not going to be available on the next mount, this seems like a
4464 * OK solution from a correctness point of view, even though it is hacky.
4465 */
4466 if (vfs_isforce(vnode_mount(vp))) {
4467 if (cp->c_flag & C_DELETED) {
4468 /* If we don't get it, then just go ahead and operate without the lock */
4469 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4470 }
4471 }
4472 else {
4473 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4474 truncate_lock_held = TRUE;
4475 }
4476
4477 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4478
4479 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4480 error = EINVAL;
4481 goto pagein_done;
4482 }
4483 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4484
4485 upl_size = isize = ap->a_size;
4486
4487 /*
4488 * Scan from the back to find the last page in the UPL, so that we
4489 * aren't looking at a UPL that may have already been freed by the
4490 * preceding aborts/completions.
4491 */
4492 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4493 if (upl_page_present(pl, --pg_index))
4494 break;
4495 if (pg_index == 0) {
4496 /*
4497 * no absent pages were found in the range specified
4498 * just abort the UPL to get rid of it and then we're done
4499 */
4500 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4501 goto pagein_done;
4502 }
4503 }
4504 /*
4505 * initialize the offset variables before we touch the UPL.
4506 * f_offset is the position into the file, in bytes
4507 * offset is the position into the UPL, in bytes
4508 * pg_index is the pg# of the UPL we're operating on
4509 * isize is the offset into the UPL of the last page that is present.
4510 */
4511 isize = ((pg_index + 1) * PAGE_SIZE);
4512 pg_index = 0;
4513 offset = 0;
4514 f_offset = ap->a_f_offset;
4515
4516 while (isize) {
4517 int xsize;
4518 int num_of_pages;
4519
4520 if ( !upl_page_present(pl, pg_index)) {
4521 /*
4522 * we asked for RET_ONLY_ABSENT, so it's possible
4523 * to get back empty slots in the UPL.
4524 * just skip over them
4525 */
4526 f_offset += PAGE_SIZE;
4527 offset += PAGE_SIZE;
4528 isize -= PAGE_SIZE;
4529 pg_index++;
4530
4531 continue;
4532 }
4533 /*
4534 * We know that we have at least one absent page.
4535 * Now checking to see how many in a row we have
4536 */
4537 num_of_pages = 1;
4538 xsize = isize - PAGE_SIZE;
4539
4540 while (xsize) {
4541 if ( !upl_page_present(pl, pg_index + num_of_pages))
4542 break;
4543 num_of_pages++;
4544 xsize -= PAGE_SIZE;
4545 }
4546 xsize = num_of_pages * PAGE_SIZE;
4547
4548 #if HFS_COMPRESSION
4549 if (VNODE_IS_RSRC(vp)) {
4550 /* allow pageins of the resource fork */
4551 } else {
4552 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4553
4554 if (compressed) {
4555
4556 if (truncate_lock_held) {
4557 /*
4558 * can't hold the truncate lock when calling into the decmpfs layer
4559 * since it calls back into this layer... even though we're only
4560 * holding the lock in shared mode, and the re-entrant path only
4561 * takes the lock shared, we can deadlock if some other thread
4562 * tries to grab the lock exclusively in between.
4563 */
4564 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4565 truncate_lock_held = FALSE;
4566 }
4567 ap->a_pl = upl;
4568 ap->a_pl_offset = offset;
4569 ap->a_f_offset = f_offset;
4570 ap->a_size = xsize;
4571
4572 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4573 /*
4574 * note that decpfs_pagein_compressed can change the state of
4575 * 'compressed'... it will set it to 0 if the file is no longer
4576 * compressed once the compression lock is successfully taken
4577 * i.e. we would block on that lock while the file is being inflated
4578 */
4579 if (error == 0 && vnode_isfastdevicecandidate(vp)) {
4580 (void) hfs_addhotfile(vp);
4581 }
4582 if (compressed) {
4583 if (error == 0) {
4584 /* successful page-in, update the access time */
4585 VTOC(vp)->c_touch_acctime = TRUE;
4586
4587 //
4588 // compressed files are not traditional hot file candidates
4589 // but they may be for CF (which ignores the ff_bytesread
4590 // field)
4591 //
4592 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4593 fp->ff_bytesread = 0;
4594 }
4595 } else if (error == EAGAIN) {
4596 /*
4597 * EAGAIN indicates someone else already holds the compression lock...
4598 * to avoid deadlocking, we'll abort this range of pages with an
4599 * indication that the pagein needs to be redriven
4600 */
4601 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
4602 } else if (error == ENOSPC) {
4603
4604 if (upl_size == PAGE_SIZE)
4605 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4606
4607 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4608
4609 ap->a_size = PAGE_SIZE;
4610 ap->a_pl = NULL;
4611 ap->a_pl_offset = 0;
4612 ap->a_f_offset = page_needed_f_offset;
4613
4614 goto retry_pagein;
4615 } else {
4616 ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4617 goto pagein_done;
4618 }
4619 goto pagein_next_range;
4620 }
4621 else {
4622 /*
4623 * Set file_converted only if the file became decompressed while we were
4624 * paging in. If it were still compressed, we would re-start the loop using the goto
4625 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4626 * condition below, since we could have avoided taking the truncate lock to prevent
4627 * a deadlock in the force unmount case.
4628 */
4629 file_converted = TRUE;
4630 }
4631 }
4632 if (file_converted == TRUE) {
4633 /*
4634 * the file was converted back to a regular file after we first saw it as compressed
4635 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4636 * reset a_size so that we consider what remains of the original request
4637 * and null out a_upl and a_pl_offset.
4638 *
4639 * We should only be able to get into this block if the decmpfs_pagein_compressed
4640 * successfully decompressed the range in question for this file.
4641 */
4642 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4643
4644 ap->a_size = isize;
4645 ap->a_pl = NULL;
4646 ap->a_pl_offset = 0;
4647
4648 /* Reset file_converted back to false so that we don't infinite-loop. */
4649 file_converted = FALSE;
4650 goto retry_pagein;
4651 }
4652 }
4653 #endif
4654 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
4655
4656 /*
4657 * Keep track of blocks read.
4658 */
4659 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4660 int bytesread;
4661 int took_cnode_lock = 0;
4662
4663 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4664 bytesread = fp->ff_size;
4665 else
4666 bytesread = xsize;
4667
4668 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4669 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
4670 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4671 took_cnode_lock = 1;
4672 }
4673 /*
4674 * If this file hasn't been seen since the start of
4675 * the current sampling period then start over.
4676 */
4677 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4678 struct timeval tv;
4679
4680 fp->ff_bytesread = bytesread;
4681 microtime(&tv);
4682 cp->c_atime = tv.tv_sec;
4683 } else {
4684 fp->ff_bytesread += bytesread;
4685 }
4686 cp->c_touch_acctime = TRUE;
4687
4688 if (vnode_isfastdevicecandidate(vp)) {
4689 (void) hfs_addhotfile(vp);
4690 }
4691 if (took_cnode_lock)
4692 hfs_unlock(cp);
4693 }
4694 pagein_next_range:
4695 f_offset += xsize;
4696 offset += xsize;
4697 isize -= xsize;
4698 pg_index += num_of_pages;
4699
4700 error = 0;
4701 }
4702
4703 pagein_done:
4704 if (truncate_lock_held == TRUE) {
4705 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4706 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4707 }
4708
4709 return (error);
4710 }
4711
4712 /*
4713 * Pageout for HFS filesystem.
4714 */
4715 int
4716 hfs_vnop_pageout(struct vnop_pageout_args *ap)
4717 /*
4718 struct vnop_pageout_args {
4719 vnode_t a_vp,
4720 upl_t a_pl,
4721 vm_offset_t a_pl_offset,
4722 off_t a_f_offset,
4723 size_t a_size,
4724 int a_flags
4725 vfs_context_t a_context;
4726 };
4727 */
4728 {
4729 vnode_t vp = ap->a_vp;
4730 struct cnode *cp;
4731 struct filefork *fp;
4732 int retval = 0;
4733 off_t filesize;
4734 upl_t upl;
4735 upl_page_info_t* pl = NULL;
4736 vm_offset_t a_pl_offset;
4737 int a_flags;
4738 int is_pageoutv2 = 0;
4739 kern_return_t kret;
4740
4741 cp = VTOC(vp);
4742 fp = VTOF(vp);
4743
4744 a_flags = ap->a_flags;
4745 a_pl_offset = ap->a_pl_offset;
4746
4747 /*
4748 * we can tell if we're getting the new or old behavior from the UPL
4749 */
4750 if ((upl = ap->a_pl) == NULL) {
4751 int request_flags;
4752
4753 is_pageoutv2 = 1;
4754 /*
4755 * we're in control of any UPL we commit
4756 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4757 */
4758 a_flags &= ~UPL_NOCOMMIT;
4759 a_pl_offset = 0;
4760
4761 /*
4762 * For V2 semantics, we want to take the cnode truncate lock
4763 * shared to guard against the file size changing via zero-filling.
4764 *
4765 * However, we have to be careful because we may be invoked
4766 * via the ubc_msync path to write out dirty mmap'd pages
4767 * in response to a lock event on a content-protected
4768 * filesystem (e.g. to write out class A files).
4769 * As a result, we want to take the truncate lock 'SHARED' with
4770 * the mini-recursion locktype so that we don't deadlock/panic
4771 * because we may be already holding the truncate lock exclusive to force any other
4772 * IOs to have blocked behind us.
4773 */
4774 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4775
4776 if (a_flags & UPL_MSYNC) {
4777 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4778 }
4779 else {
4780 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4781 }
4782
4783 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
4784
4785 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4786 retval = EINVAL;
4787 goto pageout_done;
4788 }
4789 }
4790 /*
4791 * from this point forward upl points at the UPL we're working with
4792 * it was either passed in or we succesfully created it
4793 */
4794
4795 /*
4796 * Figure out where the file ends, for pageout purposes. If
4797 * ff_new_size > ff_size, then we're in the middle of extending the
4798 * file via a write, so it is safe (and necessary) that we be able
4799 * to pageout up to that point.
4800 */
4801 filesize = fp->ff_size;
4802 if (fp->ff_new_size > filesize)
4803 filesize = fp->ff_new_size;
4804
4805 /*
4806 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4807 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4808 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4809 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4810 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4811 * lock in HFS so that we don't lock invert ourselves.
4812 *
4813 * Note that we can still get into this function on behalf of the default pager with
4814 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4815 * since fsync and other writing threads will grab the locks, then mark the
4816 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4817 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4818 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4819 * by the paging/VM system.
4820 */
4821
4822 if (is_pageoutv2) {
4823 off_t f_offset;
4824 int offset;
4825 int isize;
4826 int pg_index;
4827 int error;
4828 int error_ret = 0;
4829
4830 isize = ap->a_size;
4831 f_offset = ap->a_f_offset;
4832
4833 /*
4834 * Scan from the back to find the last page in the UPL, so that we
4835 * aren't looking at a UPL that may have already been freed by the
4836 * preceding aborts/completions.
4837 */
4838 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4839 if (upl_page_present(pl, --pg_index))
4840 break;
4841 if (pg_index == 0) {
4842 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4843 goto pageout_done;
4844 }
4845 }
4846
4847 /*
4848 * initialize the offset variables before we touch the UPL.
4849 * a_f_offset is the position into the file, in bytes
4850 * offset is the position into the UPL, in bytes
4851 * pg_index is the pg# of the UPL we're operating on.
4852 * isize is the offset into the UPL of the last non-clean page.
4853 */
4854 isize = ((pg_index + 1) * PAGE_SIZE);
4855
4856 offset = 0;
4857 pg_index = 0;
4858
4859 while (isize) {
4860 int xsize;
4861 int num_of_pages;
4862
4863 if ( !upl_page_present(pl, pg_index)) {
4864 /*
4865 * we asked for RET_ONLY_DIRTY, so it's possible
4866 * to get back empty slots in the UPL.
4867 * just skip over them
4868 */
4869 f_offset += PAGE_SIZE;
4870 offset += PAGE_SIZE;
4871 isize -= PAGE_SIZE;
4872 pg_index++;
4873
4874 continue;
4875 }
4876 if ( !upl_dirty_page(pl, pg_index)) {
4877 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
4878 }
4879
4880 /*
4881 * We know that we have at least one dirty page.
4882 * Now checking to see how many in a row we have
4883 */
4884 num_of_pages = 1;
4885 xsize = isize - PAGE_SIZE;
4886
4887 while (xsize) {
4888 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
4889 break;
4890 num_of_pages++;
4891 xsize -= PAGE_SIZE;
4892 }
4893 xsize = num_of_pages * PAGE_SIZE;
4894
4895 if ((error = cluster_pageout(vp, upl, offset, f_offset,
4896 xsize, filesize, a_flags))) {
4897 if (error_ret == 0)
4898 error_ret = error;
4899 }
4900 f_offset += xsize;
4901 offset += xsize;
4902 isize -= xsize;
4903 pg_index += num_of_pages;
4904 }
4905 /* capture errnos bubbled out of cluster_pageout if they occurred */
4906 if (error_ret != 0) {
4907 retval = error_ret;
4908 }
4909 } /* end block for v2 pageout behavior */
4910 else {
4911 /*
4912 * just call cluster_pageout for old pre-v2 behavior
4913 */
4914 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
4915 ap->a_size, filesize, a_flags);
4916 }
4917
4918 /*
4919 * If data was written, update the modification time of the file
4920 * but only if it's mapped writable; we will have touched the
4921 * modifcation time for direct writes.
4922 */
4923 if (retval == 0 && (ubc_is_mapped_writable(vp)
4924 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
4925 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4926
4927 // Check again with lock
4928 bool mapped_writable = ubc_is_mapped_writable(vp);
4929 if (mapped_writable
4930 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
4931 cp->c_touch_modtime = TRUE;
4932 cp->c_touch_chgtime = TRUE;
4933
4934 /*
4935 * We only need to increment the generation counter if
4936 * it's currently mapped writable because we incremented
4937 * the counter in hfs_vnop_mnomap.
4938 */
4939 if (mapped_writable)
4940 hfs_incr_gencount(VTOC(vp));
4941
4942 /*
4943 * If setuid or setgid bits are set and this process is
4944 * not the superuser then clear the setuid and setgid bits
4945 * as a precaution against tampering.
4946 */
4947 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
4948 (vfs_context_suser(ap->a_context) != 0)) {
4949 cp->c_mode &= ~(S_ISUID | S_ISGID);
4950 }
4951 }
4952
4953 hfs_unlock(cp);
4954 }
4955
4956 pageout_done:
4957 if (is_pageoutv2) {
4958 /*
4959 * Release the truncate lock. Note that because
4960 * we may have taken the lock recursively by
4961 * being invoked via ubc_msync due to lockdown,
4962 * we should release it recursively, too.
4963 */
4964 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4965 }
4966 return (retval);
4967 }
4968
4969 /*
4970 * Intercept B-Tree node writes to unswap them if necessary.
4971 */
4972 int
4973 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
4974 {
4975 int retval = 0;
4976 register struct buf *bp = ap->a_bp;
4977 register struct vnode *vp = buf_vnode(bp);
4978 BlockDescriptor block;
4979
4980 /* Trap B-Tree writes */
4981 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
4982 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
4983 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
4984 (vp == VTOHFS(vp)->hfc_filevp)) {
4985
4986 /*
4987 * Swap and validate the node if it is in native byte order.
4988 * This is always be true on big endian, so we always validate
4989 * before writing here. On little endian, the node typically has
4990 * been swapped and validated when it was written to the journal,
4991 * so we won't do anything here.
4992 */
4993 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
4994 /* Prepare the block pointer */
4995 block.blockHeader = bp;
4996 block.buffer = (char *)buf_dataptr(bp);
4997 block.blockNum = buf_lblkno(bp);
4998 /* not found in cache ==> came from disk */
4999 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
5000 block.blockSize = buf_count(bp);
5001
5002 /* Endian un-swap B-Tree node */
5003 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
5004 if (retval)
5005 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5006 }
5007 }
5008
5009 /* This buffer shouldn't be locked anymore but if it is clear it */
5010 if ((buf_flags(bp) & B_LOCKED)) {
5011 // XXXdbg
5012 if (VTOHFS(vp)->jnl) {
5013 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
5014 }
5015 buf_clearflags(bp, B_LOCKED);
5016 }
5017 retval = vn_bwrite (ap);
5018
5019 return (retval);
5020 }
5021
5022
5023 int
5024 hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks)
5025 {
5026 _dk_cs_pin_t pin;
5027 unsigned ioc;
5028 int err;
5029
5030 memset(&pin, 0, sizeof(pin));
5031 pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize;
5032 pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize;
5033 switch (pin_state) {
5034 case HFS_PIN_IT:
5035 ioc = _DKIOCCSPINEXTENT;
5036 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA;
5037 break;
5038 case HFS_PIN_IT | HFS_TEMP_PIN:
5039 ioc = _DKIOCCSPINEXTENT;
5040 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN;
5041 break;
5042 case HFS_PIN_IT | HFS_DATALESS_PIN:
5043 ioc = _DKIOCCSPINEXTENT;
5044 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE;
5045 break;
5046 case HFS_UNPIN_IT:
5047 ioc = _DKIOCCSUNPINEXTENT;
5048 pin.cp_flags = 0;
5049 break;
5050 case HFS_UNPIN_IT | HFS_EVICT_PIN:
5051 ioc = _DKIOCCSPINEXTENT;
5052 pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA;
5053 break;
5054 default:
5055 return EINVAL;
5056 }
5057 err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, vfs_context_kernel());
5058 return err;
5059 }
5060
5061 //
5062 // The cnode lock should already be held on entry to this function
5063 //
5064 int
5065 hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned)
5066 {
5067 struct filefork *fp = VTOF(vp);
5068 int i, err=0, need_put=0;
5069 struct vnode *rsrc_vp=NULL;
5070 uint32_t npinned = 0;
5071 off_t offset;
5072
5073 if (num_blocks_pinned) {
5074 *num_blocks_pinned = 0;
5075 }
5076
5077 if (vnode_vtype(vp) != VREG) {
5078 /* Not allowed to pin directories or symlinks */
5079 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp));
5080 return (EPERM);
5081 }
5082
5083 if (fp->ff_unallocblocks) {
5084 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks);
5085 return (EINVAL);
5086 }
5087
5088 /*
5089 * It is possible that if the caller unlocked/re-locked the cnode after checking
5090 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5091 * cnode was unlocked. So check the condition again and return ENOENT so that
5092 * the caller knows why we failed to pin the vnode.
5093 */
5094 if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) {
5095 // makes no sense to pin something that's pending deletion
5096 return ENOENT;
5097 }
5098
5099 if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
5100 if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) {
5101 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5102 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5103
5104 fp = VTOC(rsrc_vp)->c_rsrcfork;
5105 need_put = 1;
5106 }
5107 }
5108 if (fp->ff_blocks == 0) {
5109 if (need_put) {
5110 //
5111 // use a distinct error code for a compressed file that has no resource fork;
5112 // we return EALREADY to indicate that the data is already probably hot file
5113 // cached because it's in an EA and the attributes btree is on the ssd
5114 //
5115 err = EALREADY;
5116 } else {
5117 err = EINVAL;
5118 }
5119 goto out;
5120 }
5121
5122 offset = 0;
5123 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5124 if (fp->ff_extents[i].startBlock == 0) {
5125 break;
5126 }
5127
5128 err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount);
5129 if (err) {
5130 break;
5131 } else {
5132 npinned += fp->ff_extents[i].blockCount;
5133 }
5134 }
5135
5136 if (err || npinned == 0) {
5137 goto out;
5138 }
5139
5140 if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) {
5141 uint32_t pblocks;
5142 uint8_t forktype = 0;
5143
5144 if (fp == VTOC(vp)->c_rsrcfork) {
5145 forktype = 0xff;
5146 }
5147 /*
5148 * The file could have overflow extents, better pin them.
5149 *
5150 * We assume that since we are holding the cnode lock for this cnode,
5151 * the files extents cannot be manipulated, but the tree could, so we
5152 * need to ensure that it doesn't change behind our back as we iterate it.
5153 */
5154 int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
5155 err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks);
5156 hfs_systemfile_unlock (hfsmp, lockflags);
5157
5158 if (err) {
5159 goto out;
5160 }
5161 npinned += pblocks;
5162 }
5163
5164 out:
5165 if (num_blocks_pinned) {
5166 *num_blocks_pinned = npinned;
5167 }
5168
5169 if (need_put && rsrc_vp) {
5170 //
5171 // have to unlock the cnode since it's shared between the
5172 // resource fork vnode and the data fork vnode (and the
5173 // vnode_put() may need to re-acquire the cnode lock to
5174 // reclaim the resource fork vnode)
5175 //
5176 hfs_unlock(VTOC(vp));
5177 vnode_put(rsrc_vp);
5178 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5179 }
5180 return err;
5181 }
5182
5183
5184 /*
5185 * Relocate a file to a new location on disk
5186 * cnode must be locked on entry
5187 *
5188 * Relocation occurs by cloning the file's data from its
5189 * current set of blocks to a new set of blocks. During
5190 * the relocation all of the blocks (old and new) are
5191 * owned by the file.
5192 *
5193 * -----------------
5194 * |///////////////|
5195 * -----------------
5196 * 0 N (file offset)
5197 *
5198 * ----------------- -----------------
5199 * |///////////////| | | STEP 1 (acquire new blocks)
5200 * ----------------- -----------------
5201 * 0 N N+1 2N
5202 *
5203 * ----------------- -----------------
5204 * |///////////////| |///////////////| STEP 2 (clone data)
5205 * ----------------- -----------------
5206 * 0 N N+1 2N
5207 *
5208 * -----------------
5209 * |///////////////| STEP 3 (head truncate blocks)
5210 * -----------------
5211 * 0 N
5212 *
5213 * During steps 2 and 3 page-outs to file offsets less
5214 * than or equal to N are suspended.
5215 *
5216 * During step 3 page-ins to the file get suspended.
5217 */
5218 int
5219 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
5220 struct proc *p)
5221 {
5222 struct cnode *cp;
5223 struct filefork *fp;
5224 struct hfsmount *hfsmp;
5225 u_int32_t headblks;
5226 u_int32_t datablks;
5227 u_int32_t blksize;
5228 u_int32_t growsize;
5229 u_int32_t nextallocsave;
5230 daddr64_t sector_a, sector_b;
5231 int eflags;
5232 off_t newbytes;
5233 int retval;
5234 int lockflags = 0;
5235 int took_trunc_lock = 0;
5236 int started_tr = 0;
5237 enum vtype vnodetype;
5238
5239 vnodetype = vnode_vtype(vp);
5240 if (vnodetype != VREG) {
5241 /* Not allowed to move symlinks. */
5242 return (EPERM);
5243 }
5244
5245 hfsmp = VTOHFS(vp);
5246 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
5247 return (ENOSPC);
5248 }
5249
5250 cp = VTOC(vp);
5251 fp = VTOF(vp);
5252 if (fp->ff_unallocblocks)
5253 return (EINVAL);
5254
5255 #if CONFIG_PROTECT
5256 /*
5257 * <rdar://problem/9118426>
5258 * Disable HFS file relocation on content-protected filesystems
5259 */
5260 if (cp_fs_protected (hfsmp->hfs_mp)) {
5261 return EINVAL;
5262 }
5263 #endif
5264 /* If it's an SSD, also disable HFS relocation */
5265 if (hfsmp->hfs_flags & HFS_SSD) {
5266 return EINVAL;
5267 }
5268
5269
5270 blksize = hfsmp->blockSize;
5271 if (blockHint == 0)
5272 blockHint = hfsmp->nextAllocation;
5273
5274 if (fp->ff_size > 0x7fffffff) {
5275 return (EFBIG);
5276 }
5277
5278 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
5279 hfs_unlock(cp);
5280 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
5281 /* Force lock since callers expects lock to be held. */
5282 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
5283 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5284 return (retval);
5285 }
5286 /* No need to continue if file was removed. */
5287 if (cp->c_flag & C_NOEXISTS) {
5288 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5289 return (ENOENT);
5290 }
5291 took_trunc_lock = 1;
5292 }
5293 headblks = fp->ff_blocks;
5294 datablks = howmany(fp->ff_size, blksize);
5295 growsize = datablks * blksize;
5296 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
5297 if (blockHint >= hfsmp->hfs_metazone_start &&
5298 blockHint <= hfsmp->hfs_metazone_end)
5299 eflags |= kEFMetadataMask;
5300
5301 if (hfs_start_transaction(hfsmp) != 0) {
5302 if (took_trunc_lock)
5303 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5304 return (EINVAL);
5305 }
5306 started_tr = 1;
5307 /*
5308 * Protect the extents b-tree and the allocation bitmap
5309 * during MapFileBlockC and ExtendFileC operations.
5310 */
5311 lockflags = SFL_BITMAP;
5312 if (overflow_extents(fp))
5313 lockflags |= SFL_EXTENTS;
5314 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5315
5316 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
5317 if (retval) {
5318 retval = MacToVFSError(retval);
5319 goto out;
5320 }
5321
5322 /*
5323 * STEP 1 - acquire new allocation blocks.
5324 */
5325 nextallocsave = hfsmp->nextAllocation;
5326 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
5327 if (eflags & kEFMetadataMask) {
5328 hfs_lock_mount(hfsmp);
5329 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
5330 MarkVCBDirty(hfsmp);
5331 hfs_unlock_mount(hfsmp);
5332 }
5333
5334 retval = MacToVFSError(retval);
5335 if (retval == 0) {
5336 cp->c_flag |= C_MODIFIED;
5337 if (newbytes < growsize) {
5338 retval = ENOSPC;
5339 goto restore;
5340 } else if (fp->ff_blocks < (headblks + datablks)) {
5341 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
5342 retval = ENOSPC;
5343 goto restore;
5344 }
5345
5346 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
5347 if (retval) {
5348 retval = MacToVFSError(retval);
5349 } else if ((sector_a + 1) == sector_b) {
5350 retval = ENOSPC;
5351 goto restore;
5352 } else if ((eflags & kEFMetadataMask) &&
5353 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
5354 hfsmp->hfs_metazone_end)) {
5355 #if 0
5356 const char * filestr;
5357 char emptystr = '\0';
5358
5359 if (cp->c_desc.cd_nameptr != NULL) {
5360 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
5361 } else if (vnode_name(vp) != NULL) {
5362 filestr = vnode_name(vp);
5363 } else {
5364 filestr = &emptystr;
5365 }
5366 #endif
5367 retval = ENOSPC;
5368 goto restore;
5369 }
5370 }
5371 /* Done with system locks and journal for now. */
5372 hfs_systemfile_unlock(hfsmp, lockflags);
5373 lockflags = 0;
5374 hfs_end_transaction(hfsmp);
5375 started_tr = 0;
5376
5377 if (retval) {
5378 /*
5379 * Check to see if failure is due to excessive fragmentation.
5380 */
5381 if ((retval == ENOSPC) &&
5382 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
5383 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
5384 }
5385 goto out;
5386 }
5387 /*
5388 * STEP 2 - clone file data into the new allocation blocks.
5389 */
5390
5391 if (vnodetype == VLNK)
5392 retval = EPERM;
5393 else if (vnode_issystem(vp))
5394 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
5395 else
5396 retval = hfs_clonefile(vp, headblks, datablks, blksize);
5397
5398 /* Start transaction for step 3 or for a restore. */
5399 if (hfs_start_transaction(hfsmp) != 0) {
5400 retval = EINVAL;
5401 goto out;
5402 }
5403 started_tr = 1;
5404 if (retval)
5405 goto restore;
5406
5407 /*
5408 * STEP 3 - switch to cloned data and remove old blocks.
5409 */
5410 lockflags = SFL_BITMAP;
5411 if (overflow_extents(fp))
5412 lockflags |= SFL_EXTENTS;
5413 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5414
5415 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
5416
5417 hfs_systemfile_unlock(hfsmp, lockflags);
5418 lockflags = 0;
5419 if (retval)
5420 goto restore;
5421 out:
5422 if (took_trunc_lock)
5423 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5424
5425 if (lockflags) {
5426 hfs_systemfile_unlock(hfsmp, lockflags);
5427 lockflags = 0;
5428 }
5429
5430 /* Push cnode's new extent data to disk. */
5431 if (retval == 0) {
5432 hfs_update(vp, 0);
5433 }
5434 if (hfsmp->jnl) {
5435 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
5436 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
5437 else
5438 (void) hfs_flushvolumeheader(hfsmp, 0);
5439 }
5440 exit:
5441 if (started_tr)
5442 hfs_end_transaction(hfsmp);
5443
5444 return (retval);
5445
5446 restore:
5447 if (fp->ff_blocks == headblks) {
5448 if (took_trunc_lock)
5449 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5450 goto exit;
5451 }
5452 /*
5453 * Give back any newly allocated space.
5454 */
5455 if (lockflags == 0) {
5456 lockflags = SFL_BITMAP;
5457 if (overflow_extents(fp))
5458 lockflags |= SFL_EXTENTS;
5459 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5460 }
5461
5462 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
5463 FTOC(fp)->c_fileid, false);
5464
5465 hfs_systemfile_unlock(hfsmp, lockflags);
5466 lockflags = 0;
5467
5468 if (took_trunc_lock)
5469 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5470 goto exit;
5471 }
5472
5473
5474 /*
5475 * Clone a file's data within the file.
5476 *
5477 */
5478 static int
5479 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
5480 {
5481 caddr_t bufp;
5482 size_t bufsize;
5483 size_t copysize;
5484 size_t iosize;
5485 size_t offset;
5486 off_t writebase;
5487 uio_t auio;
5488 int error = 0;
5489
5490 writebase = blkstart * blksize;
5491 copysize = blkcnt * blksize;
5492 iosize = bufsize = MIN(copysize, 128 * 1024);
5493 offset = 0;
5494
5495 hfs_unlock(VTOC(vp));
5496
5497 #if CONFIG_PROTECT
5498 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
5499 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5500 return (error);
5501 }
5502 #endif /* CONFIG_PROTECT */
5503
5504 bufp = hfs_malloc(bufsize);
5505
5506 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
5507
5508 while (offset < copysize) {
5509 iosize = MIN(copysize - offset, iosize);
5510
5511 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
5512 uio_addiov(auio, (uintptr_t)bufp, iosize);
5513
5514 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
5515 if (error) {
5516 printf("hfs_clonefile: cluster_read failed - %d\n", error);
5517 break;
5518 }
5519 if (uio_resid(auio) != 0) {
5520 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
5521 error = EIO;
5522 break;
5523 }
5524
5525 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
5526 uio_addiov(auio, (uintptr_t)bufp, iosize);
5527
5528 error = cluster_write(vp, auio, writebase + offset,
5529 writebase + offset + iosize,
5530 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
5531 if (error) {
5532 printf("hfs_clonefile: cluster_write failed - %d\n", error);
5533 break;
5534 }
5535 if (uio_resid(auio) != 0) {
5536 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5537 error = EIO;
5538 break;
5539 }
5540 offset += iosize;
5541 }
5542 uio_free(auio);
5543
5544 if ((blksize & PAGE_MASK)) {
5545 /*
5546 * since the copy may not have started on a PAGE
5547 * boundary (or may not have ended on one), we
5548 * may have pages left in the cache since NOCACHE
5549 * will let partially written pages linger...
5550 * lets just flush the entire range to make sure
5551 * we don't have any pages left that are beyond
5552 * (or intersect) the real LEOF of this file
5553 */
5554 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
5555 } else {
5556 /*
5557 * No need to call ubc_msync or hfs_invalbuf
5558 * since the file was copied using IO_NOCACHE and
5559 * the copy was done starting and ending on a page
5560 * boundary in the file.
5561 */
5562 }
5563 hfs_free(bufp, bufsize);
5564
5565 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5566 return (error);
5567 }
5568
5569 /*
5570 * Clone a system (metadata) file.
5571 *
5572 */
5573 static int
5574 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
5575 kauth_cred_t cred, struct proc *p)
5576 {
5577 caddr_t bufp;
5578 char * offset;
5579 size_t bufsize;
5580 size_t iosize;
5581 struct buf *bp = NULL;
5582 daddr64_t blkno;
5583 daddr64_t blk;
5584 daddr64_t start_blk;
5585 daddr64_t last_blk;
5586 int breadcnt;
5587 int i;
5588 int error = 0;
5589
5590
5591 iosize = GetLogicalBlockSize(vp);
5592 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5593 breadcnt = bufsize / iosize;
5594
5595 bufp = hfs_malloc(bufsize);
5596
5597 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5598 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
5599 blkno = 0;
5600
5601 while (blkno < last_blk) {
5602 /*
5603 * Read up to a megabyte
5604 */
5605 offset = bufp;
5606 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5607 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
5608 if (error) {
5609 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5610 goto out;
5611 }
5612 if (buf_count(bp) != iosize) {
5613 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
5614 goto out;
5615 }
5616 bcopy((char *)buf_dataptr(bp), offset, iosize);
5617
5618 buf_markinvalid(bp);
5619 buf_brelse(bp);
5620 bp = NULL;
5621
5622 offset += iosize;
5623 }
5624
5625 /*
5626 * Write up to a megabyte
5627 */
5628 offset = bufp;
5629 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5630 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
5631 if (bp == NULL) {
5632 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
5633 error = EIO;
5634 goto out;
5635 }
5636 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5637 error = (int)buf_bwrite(bp);
5638 bp = NULL;
5639 if (error)
5640 goto out;
5641 offset += iosize;
5642 }
5643 }
5644 out:
5645 if (bp) {
5646 buf_brelse(bp);
5647 }
5648
5649 hfs_free(bufp, bufsize);
5650
5651 error = hfs_fsync(vp, MNT_WAIT, 0, p);
5652
5653 return (error);
5654 }
5655
5656 errno_t hfs_flush_invalid_ranges(vnode_t vp)
5657 {
5658 cnode_t *cp = VTOC(vp);
5659
5660 hfs_assert(cp->c_lockowner == current_thread());
5661 hfs_assert(cp->c_truncatelockowner == current_thread());
5662
5663 if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout)
5664 return 0;
5665
5666 filefork_t *fp = VTOF(vp);
5667
5668 /*
5669 * We can't hold the cnode lock whilst we call cluster_write so we
5670 * need to copy the extents into a local buffer.
5671 */
5672 int max_exts = 16;
5673 struct ext {
5674 off_t start, end;
5675 } exts_buf[max_exts]; // 256 bytes
5676 struct ext *exts = exts_buf;
5677 int ext_count = 0;
5678 errno_t ret;
5679
5680 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
5681
5682 while (r) {
5683 /* If we have more than can fit in our stack buffer, switch
5684 to a heap buffer. */
5685 if (exts == exts_buf && ext_count == max_exts) {
5686 max_exts = 256;
5687 exts = hfs_malloc(sizeof(struct ext) * max_exts);
5688 memcpy(exts, exts_buf, ext_count * sizeof(struct ext));
5689 }
5690
5691 struct rl_entry *next = TAILQ_NEXT(r, rl_link);
5692
5693 exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end };
5694
5695 if (!next || (ext_count == max_exts && exts != exts_buf)) {
5696 hfs_unlock(cp);
5697 for (int i = 0; i < ext_count; ++i) {
5698 ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1,
5699 exts[i].start, 0,
5700 IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
5701 if (ret) {
5702 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5703 goto exit;
5704 }
5705 }
5706
5707 if (!next) {
5708 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5709 break;
5710 }
5711
5712 /* Push any existing clusters which should clean up our invalid
5713 ranges as they go through hfs_vnop_blockmap. */
5714 cluster_push(vp, 0);
5715
5716 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5717
5718 /*
5719 * Get back to where we were (given we dropped the lock).
5720 * This shouldn't be many because we pushed above.
5721 */
5722 TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) {
5723 if (r->rl_end > exts[ext_count - 1].end)
5724 break;
5725 }
5726
5727 ext_count = 0;
5728 } else
5729 r = next;
5730 }
5731
5732 ret = 0;
5733
5734 exit:
5735
5736 if (exts != exts_buf)
5737 hfs_free(exts, sizeof(struct ext) * max_exts);
5738
5739 return ret;
5740 }