]> git.saurik.com Git - apple/hfs.git/blob - core/hfs_readwrite.c
879fe0a0c6bf161b19bd9bf1000f90c12364f114
[apple/hfs.git] / core / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/kernel.h>
39 #include <sys/fcntl.h>
40 #include <sys/stat.h>
41 #include <sys/buf.h>
42 #include <sys/proc.h>
43 #include <sys/kauth.h>
44 #include <sys/vnode.h>
45 #include <sys/uio.h>
46 #include <sys/vfs_context.h>
47 #include <sys/disk.h>
48 #include <sys/sysctl.h>
49 #include <sys/fsctl.h>
50 #include <sys/ubc.h>
51 #include <sys/fsevents.h>
52 #include <uuid/uuid.h>
53
54 #include <libkern/OSDebug.h>
55
56 #include <miscfs/specfs/specdev.h>
57
58 #include <sys/ubc.h>
59
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
62
63 #include <IOKit/IOBSD.h>
64
65 #include <sys/kdebug.h>
66
67 #include "hfs.h"
68 #include "hfs_attrlist.h"
69 #include "hfs_endian.h"
70 #include "hfs_fsctl.h"
71 #include "hfs_quota.h"
72 #include "FileMgrInternal.h"
73 #include "BTreesInternal.h"
74 #include "hfs_cnode.h"
75 #include "hfs_dbg.h"
76
77 #if HFS_CONFIG_KEY_ROLL
78 #include "hfs_key_roll.h"
79 #endif
80
81 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
82
83 enum {
84 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
85 };
86
87 /* from bsd/hfs/hfs_vfsops.c */
88 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
89
90 /* from hfs_hotfiles.c */
91 extern int hfs_pin_overflow_extents (struct hfsmount *hfsmp, uint32_t fileid,
92 uint8_t forktype, uint32_t *pinned);
93
94 static int hfs_clonefile(struct vnode *, int, int, int);
95 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
96 static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
97
98
99 /*
100 * Read data from a file.
101 */
102 int
103 hfs_vnop_read(struct vnop_read_args *ap)
104 {
105 /*
106 struct vnop_read_args {
107 struct vnodeop_desc *a_desc;
108 vnode_t a_vp;
109 struct uio *a_uio;
110 int a_ioflag;
111 vfs_context_t a_context;
112 };
113 */
114
115 uio_t uio = ap->a_uio;
116 struct vnode *vp = ap->a_vp;
117 struct cnode *cp;
118 struct filefork *fp;
119 struct hfsmount *hfsmp;
120 off_t filesize;
121 off_t filebytes;
122 off_t start_resid = uio_resid(uio);
123 off_t offset = uio_offset(uio);
124 int retval = 0;
125 int took_truncate_lock = 0;
126 int io_throttle = 0;
127 int throttled_count = 0;
128
129 /* Preflight checks */
130 if (!vnode_isreg(vp)) {
131 /* can only read regular files */
132 if (vnode_isdir(vp))
133 return (EISDIR);
134 else
135 return (EPERM);
136 }
137 if (start_resid == 0)
138 return (0); /* Nothing left to do */
139 if (offset < 0)
140 return (EINVAL); /* cant read from a negative offset */
141
142 #if SECURE_KERNEL
143 if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
144 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
145 /* Don't allow unencrypted io request from user space */
146 return EPERM;
147 }
148 #endif
149
150 #if HFS_COMPRESSION
151 if (VNODE_IS_RSRC(vp)) {
152 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
153 return 0;
154 }
155 /* otherwise read the resource fork normally */
156 } else {
157 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
158 if (compressed) {
159 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
160 if (retval == 0 && !(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
161 (void) hfs_addhotfile(vp);
162 }
163 if (compressed) {
164 if (retval == 0) {
165 /* successful read, update the access time */
166 VTOC(vp)->c_touch_acctime = TRUE;
167
168 //
169 // compressed files are not traditional hot file candidates
170 // but they may be for CF (which ignores the ff_bytesread
171 // field)
172 //
173 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
174 VTOF(vp)->ff_bytesread = 0;
175 }
176 }
177 return retval;
178 }
179 /* otherwise the file was converted back to a regular file while we were reading it */
180 retval = 0;
181 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
182 int error;
183
184 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
185 if (error) {
186 return error;
187 }
188
189 }
190 }
191 #endif /* HFS_COMPRESSION */
192
193 cp = VTOC(vp);
194 fp = VTOF(vp);
195 hfsmp = VTOHFS(vp);
196
197 #if CONFIG_PROTECT
198 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
199 goto exit;
200 }
201
202 #if HFS_CONFIG_KEY_ROLL
203 if (ISSET(ap->a_ioflag, IO_ENCRYPTED)) {
204 off_rsrc_t off_rsrc = off_rsrc_make(offset + start_resid,
205 VNODE_IS_RSRC(vp));
206
207 retval = hfs_key_roll_up_to(ap->a_context, vp, off_rsrc);
208 if (retval)
209 goto exit;
210 }
211 #endif // HFS_CONFIG_KEY_ROLL
212 #endif // CONFIG_PROTECT
213
214 /*
215 * If this read request originated from a syscall (as opposed to
216 * an in-kernel page fault or something), then set it up for
217 * throttle checks
218 */
219 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
220 io_throttle = IO_RETURN_ON_THROTTLE;
221 }
222
223 read_again:
224
225 /* Protect against a size change. */
226 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
227 took_truncate_lock = 1;
228
229 filesize = fp->ff_size;
230 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
231
232 /*
233 * Check the file size. Note that per POSIX spec, we return 0 at
234 * file EOF, so attempting a read at an offset that is too big
235 * should just return 0 on HFS+. Since the return value was initialized
236 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
237 */
238 if (offset > filesize) {
239 #if CONFIG_HFS_STD
240 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
241 (offset > (off_t)MAXHFSFILESIZE)) {
242 retval = EFBIG;
243 }
244 #endif
245 goto exit;
246 }
247
248 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
249 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
250
251 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
252
253 cp->c_touch_acctime = TRUE;
254
255 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
256 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
257
258 /*
259 * Keep track blocks read
260 */
261 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
262 int took_cnode_lock = 0;
263 off_t bytesread;
264
265 bytesread = start_resid - uio_resid(uio);
266
267 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
268 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
269 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
270 took_cnode_lock = 1;
271 }
272 /*
273 * If this file hasn't been seen since the start of
274 * the current sampling period then start over.
275 */
276 if (cp->c_atime < hfsmp->hfc_timebase) {
277 struct timeval tv;
278
279 fp->ff_bytesread = bytesread;
280 microtime(&tv);
281 cp->c_atime = tv.tv_sec;
282 } else {
283 fp->ff_bytesread += bytesread;
284 }
285
286 if (!(ap->a_ioflag & IO_EVTONLY) && vnode_isfastdevicecandidate(vp)) {
287 //
288 // We don't add hotfiles for processes doing IO_EVTONLY I/O
289 // on the assumption that they're system processes such as
290 // mdworker which scan everything in the system (and thus
291 // do not represent user-initiated access to files)
292 //
293 (void) hfs_addhotfile(vp);
294 }
295 if (took_cnode_lock)
296 hfs_unlock(cp);
297 }
298 exit:
299 if (took_truncate_lock) {
300 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
301 }
302 if (retval == EAGAIN) {
303 throttle_lowpri_io(1);
304 throttled_count++;
305
306 retval = 0;
307 goto read_again;
308 }
309 if (throttled_count)
310 throttle_info_reset_window(NULL);
311 return (retval);
312 }
313
314 /*
315 * Ideally, this wouldn't be necessary; the cluster code should be
316 * able to handle this on the read-side. See <rdar://20420068>.
317 */
318 static errno_t hfs_zero_eof_page(vnode_t vp, off_t zero_up_to)
319 {
320 hfs_assert(VTOC(vp)->c_lockowner != current_thread());
321 hfs_assert(VTOC(vp)->c_truncatelockowner == current_thread());
322
323 struct filefork *fp = VTOF(vp);
324
325 if (!(fp->ff_size & PAGE_MASK_64) || zero_up_to <= fp->ff_size) {
326 // Nothing to do
327 return 0;
328 }
329
330 zero_up_to = MIN(zero_up_to, (off_t)round_page_64(fp->ff_size));
331
332 /* N.B. At present, @zero_up_to is not important because the cluster
333 code will always zero up to the end of the page anyway. */
334 return cluster_write(vp, NULL, fp->ff_size, zero_up_to,
335 fp->ff_size, 0, IO_HEADZEROFILL);
336 }
337
338 /*
339 * Write data to a file.
340 */
341 int
342 hfs_vnop_write(struct vnop_write_args *ap)
343 {
344 uio_t uio = ap->a_uio;
345 struct vnode *vp = ap->a_vp;
346 struct cnode *cp;
347 struct filefork *fp;
348 struct hfsmount *hfsmp;
349 kauth_cred_t cred = NULL;
350 off_t origFileSize;
351 off_t writelimit;
352 off_t bytesToAdd = 0;
353 off_t actualBytesAdded;
354 off_t filebytes;
355 off_t offset;
356 ssize_t resid;
357 int eflags;
358 int ioflag = ap->a_ioflag;
359 int retval = 0;
360 int lockflags;
361 int cnode_locked = 0;
362 int partialwrite = 0;
363 int do_snapshot = 1;
364 time_t orig_ctime=VTOC(vp)->c_ctime;
365 int took_truncate_lock = 0;
366 int io_return_on_throttle = 0;
367 int throttled_count = 0;
368
369 #if HFS_COMPRESSION
370 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
371 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
372 switch(state) {
373 case FILE_IS_COMPRESSED:
374 return EACCES;
375 case FILE_IS_CONVERTING:
376 /* if FILE_IS_CONVERTING, we allow writes but do not
377 bother with snapshots or else we will deadlock.
378 */
379 do_snapshot = 0;
380 break;
381 default:
382 printf("invalid state %d for compressed file\n", state);
383 /* fall through */
384 }
385 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
386 int error;
387
388 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
389 if (error != 0) {
390 return error;
391 }
392 }
393
394 if (do_snapshot) {
395 nspace_snapshot_event(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
396 }
397
398 #endif
399
400 #if SECURE_KERNEL
401 if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
402 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
403 /* Don't allow unencrypted io request from user space */
404 return EPERM;
405 }
406 #endif
407
408 resid = uio_resid(uio);
409 offset = uio_offset(uio);
410
411 if (offset < 0)
412 return (EINVAL);
413 if (resid == 0)
414 return (E_NONE);
415 if (!vnode_isreg(vp))
416 return (EPERM); /* Can only write regular files */
417
418 cp = VTOC(vp);
419 fp = VTOF(vp);
420 hfsmp = VTOHFS(vp);
421
422 #if CONFIG_PROTECT
423 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
424 goto exit;
425 }
426 #endif
427
428 eflags = kEFDeferMask; /* defer file block allocations */
429 #if HFS_SPARSE_DEV
430 /*
431 * When the underlying device is sparse and space
432 * is low (< 8MB), stop doing delayed allocations
433 * and begin doing synchronous I/O.
434 */
435 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
436 (hfs_freeblks(hfsmp, 0) < 2048)) {
437 eflags &= ~kEFDeferMask;
438 ioflag |= IO_SYNC;
439 }
440 #endif /* HFS_SPARSE_DEV */
441
442 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
443 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
444 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
445 }
446
447 again:
448 /*
449 * Protect against a size change.
450 *
451 * Note: If took_truncate_lock is true, then we previously got the lock shared
452 * but needed to upgrade to exclusive. So try getting it exclusive from the
453 * start.
454 */
455 if (ioflag & IO_APPEND || took_truncate_lock) {
456 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
457 }
458 else {
459 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
460 }
461 took_truncate_lock = 1;
462
463 /* Update UIO */
464 if (ioflag & IO_APPEND) {
465 uio_setoffset(uio, fp->ff_size);
466 offset = fp->ff_size;
467 }
468 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
469 retval = EPERM;
470 goto exit;
471 }
472
473 cred = vfs_context_ucred(ap->a_context);
474 if (cred && suser(cred, NULL) != 0)
475 eflags |= kEFReserveMask;
476
477 origFileSize = fp->ff_size;
478 writelimit = offset + resid;
479
480 /*
481 * We may need an exclusive truncate lock for several reasons, all
482 * of which are because we may be writing to a (portion of a) block
483 * for the first time, and we need to make sure no readers see the
484 * prior, uninitialized contents of the block. The cases are:
485 *
486 * 1. We have unallocated (delayed allocation) blocks. We may be
487 * allocating new blocks to the file and writing to them.
488 * (A more precise check would be whether the range we're writing
489 * to contains delayed allocation blocks.)
490 * 2. We need to extend the file. The bytes between the old EOF
491 * and the new EOF are not yet initialized. This is important
492 * even if we're not allocating new blocks to the file. If the
493 * old EOF and new EOF are in the same block, we still need to
494 * protect that range of bytes until they are written for the
495 * first time.
496 *
497 * If we had a shared lock with the above cases, we need to try to upgrade
498 * to an exclusive lock. If the upgrade fails, we will lose the shared
499 * lock, and will need to take the truncate lock again; the took_truncate_lock
500 * flag will still be set, causing us to try for an exclusive lock next time.
501 */
502 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
503 ((fp->ff_unallocblocks != 0) ||
504 (writelimit > origFileSize))) {
505 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
506 /*
507 * Lock upgrade failed and we lost our shared lock, try again.
508 * Note: we do not set took_truncate_lock=0 here. Leaving it
509 * set to 1 will cause us to try to get the lock exclusive.
510 */
511 goto again;
512 }
513 else {
514 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
515 cp->c_truncatelockowner = current_thread();
516 }
517 }
518
519 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
520 goto exit;
521 }
522 cnode_locked = 1;
523
524 filebytes = hfs_blk_to_bytes(fp->ff_blocks, hfsmp->blockSize);
525
526 if (offset > filebytes
527 && (hfs_blk_to_bytes(hfs_freeblks(hfsmp, ISSET(eflags, kEFReserveMask)),
528 hfsmp->blockSize) < offset - filebytes)) {
529 retval = ENOSPC;
530 goto exit;
531 }
532
533 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
534 (int)offset, uio_resid(uio), (int)fp->ff_size,
535 (int)filebytes, 0);
536
537 /* Check if we do not need to extend the file */
538 if (writelimit <= filebytes) {
539 goto sizeok;
540 }
541
542 bytesToAdd = writelimit - filebytes;
543
544 #if QUOTA
545 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
546 cred, 0);
547 if (retval)
548 goto exit;
549 #endif /* QUOTA */
550
551 if (hfs_start_transaction(hfsmp) != 0) {
552 retval = EINVAL;
553 goto exit;
554 }
555
556 while (writelimit > filebytes) {
557 bytesToAdd = writelimit - filebytes;
558
559 /* Protect extents b-tree and allocation bitmap */
560 lockflags = SFL_BITMAP;
561 if (overflow_extents(fp))
562 lockflags |= SFL_EXTENTS;
563 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
564
565 /* Files that are changing size are not hot file candidates. */
566 if (hfsmp->hfc_stage == HFC_RECORDING) {
567 fp->ff_bytesread = 0;
568 }
569 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
570 0, eflags, &actualBytesAdded));
571
572 hfs_systemfile_unlock(hfsmp, lockflags);
573
574 if ((actualBytesAdded == 0) && (retval == E_NONE))
575 retval = ENOSPC;
576 if (retval != E_NONE)
577 break;
578 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
579 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
580 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
581 }
582 (void) hfs_update(vp, 0);
583 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
584 (void) hfs_end_transaction(hfsmp);
585
586 /*
587 * If we didn't grow the file enough try a partial write.
588 * POSIX expects this behavior.
589 */
590 if ((retval == ENOSPC) && (filebytes > offset)) {
591 retval = 0;
592 partialwrite = 1;
593 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
594 resid -= bytesToAdd;
595 writelimit = filebytes;
596 }
597 sizeok:
598 if (retval == E_NONE) {
599 off_t filesize;
600 off_t head_off;
601 int lflag;
602
603 if (writelimit > fp->ff_size) {
604 filesize = writelimit;
605 struct timeval tv;
606 rl_add(fp->ff_size, writelimit - 1 , &fp->ff_invalidranges);
607 microuptime(&tv);
608 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
609 } else
610 filesize = fp->ff_size;
611
612 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
613
614 /*
615 * We no longer use IO_HEADZEROFILL or IO_TAILZEROFILL (except
616 * for one case below). For the regions that lie before the
617 * beginning and after the end of this write that are in the
618 * same page, we let the cluster code handle zeroing that out
619 * if necessary. If those areas are not cached, the cluster
620 * code will try and read those areas in, and in the case
621 * where those regions have never been written to,
622 * hfs_vnop_blockmap will consult the invalid ranges and then
623 * indicate that. The cluster code will zero out those areas.
624 */
625
626 head_off = trunc_page_64(offset);
627
628 if (head_off < offset && head_off >= fp->ff_size) {
629 /*
630 * The first page is beyond current EOF, so as an
631 * optimisation, we can pass IO_HEADZEROFILL.
632 */
633 lflag |= IO_HEADZEROFILL;
634 }
635
636 hfs_unlock(cp);
637 cnode_locked = 0;
638
639 /*
640 * We need to tell UBC the fork's new size BEFORE calling
641 * cluster_write, in case any of the new pages need to be
642 * paged out before cluster_write completes (which does happen
643 * in embedded systems due to extreme memory pressure).
644 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
645 * will be, so that it can pass that on to cluster_pageout, and
646 * allow those pageouts.
647 *
648 * We don't update ff_size yet since we don't want pageins to
649 * be able to see uninitialized data between the old and new
650 * EOF, until cluster_write has completed and initialized that
651 * part of the file.
652 *
653 * The vnode pager relies on the file size last given to UBC via
654 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
655 * ff_size (whichever is larger). NOTE: ff_new_size is always
656 * zero, unless we are extending the file via write.
657 */
658 if (filesize > fp->ff_size) {
659 retval = hfs_zero_eof_page(vp, offset);
660 if (retval)
661 goto exit;
662 fp->ff_new_size = filesize;
663 ubc_setsize(vp, filesize);
664 }
665 retval = cluster_write(vp, uio, fp->ff_size, filesize, head_off,
666 0, lflag | IO_NOZERODIRTY | io_return_on_throttle);
667 if (retval) {
668 fp->ff_new_size = 0; /* no longer extending; use ff_size */
669
670 if (retval == EAGAIN) {
671 /*
672 * EAGAIN indicates that we still have I/O to do, but
673 * that we now need to be throttled
674 */
675 if (resid != uio_resid(uio)) {
676 /*
677 * did manage to do some I/O before returning EAGAIN
678 */
679 resid = uio_resid(uio);
680 offset = uio_offset(uio);
681
682 cp->c_touch_chgtime = TRUE;
683 cp->c_touch_modtime = TRUE;
684 hfs_incr_gencount(cp);
685 }
686 if (filesize > fp->ff_size) {
687 /*
688 * we called ubc_setsize before the call to
689 * cluster_write... since we only partially
690 * completed the I/O, we need to
691 * re-adjust our idea of the filesize based
692 * on our interim EOF
693 */
694 ubc_setsize(vp, offset);
695
696 fp->ff_size = offset;
697 }
698 goto exit;
699 }
700 if (filesize > origFileSize) {
701 ubc_setsize(vp, origFileSize);
702 }
703 goto ioerr_exit;
704 }
705
706 if (filesize > origFileSize) {
707 fp->ff_size = filesize;
708
709 /* Files that are changing size are not hot file candidates. */
710 if (hfsmp->hfc_stage == HFC_RECORDING) {
711 fp->ff_bytesread = 0;
712 }
713 }
714 fp->ff_new_size = 0; /* ff_size now has the correct size */
715 }
716 if (partialwrite) {
717 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
718 resid += bytesToAdd;
719 }
720
721 if (vnode_should_flush_after_write(vp, ioflag))
722 hfs_flush(hfsmp, HFS_FLUSH_CACHE);
723
724 ioerr_exit:
725 if (!cnode_locked) {
726 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
727 cnode_locked = 1;
728 }
729
730 if (resid > uio_resid(uio)) {
731 cp->c_touch_chgtime = TRUE;
732 cp->c_touch_modtime = TRUE;
733 hfs_incr_gencount(cp);
734
735 /*
736 * If we successfully wrote any data, and we are not the superuser
737 * we clear the setuid and setgid bits as a precaution against
738 * tampering.
739 */
740 if (cp->c_mode & (S_ISUID | S_ISGID)) {
741 cred = vfs_context_ucred(ap->a_context);
742 if (cred && suser(cred, NULL)) {
743 cp->c_mode &= ~(S_ISUID | S_ISGID);
744 }
745 }
746 }
747 if (retval) {
748 if (ioflag & IO_UNIT) {
749 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
750 0, ap->a_context);
751 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
752 uio_setresid(uio, resid);
753 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
754 }
755 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
756 retval = hfs_update(vp, 0);
757
758 /* Updating vcbWrCnt doesn't need to be atomic. */
759 hfsmp->vcbWrCnt++;
760
761 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
762 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
763 exit:
764 if (retval && took_truncate_lock
765 && cp->c_truncatelockowner == current_thread()) {
766 fp->ff_new_size = 0;
767 rl_remove(fp->ff_size, RL_INFINITY, &fp->ff_invalidranges);
768 }
769
770 if (cnode_locked)
771 hfs_unlock(cp);
772
773 if (took_truncate_lock) {
774 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
775 }
776 if (retval == EAGAIN) {
777 throttle_lowpri_io(1);
778 throttled_count++;
779
780 retval = 0;
781 goto again;
782 }
783 if (throttled_count)
784 throttle_info_reset_window(NULL);
785 return (retval);
786 }
787
788 /* support for the "bulk-access" fcntl */
789
790 #define CACHE_LEVELS 16
791 #define NUM_CACHE_ENTRIES (64*16)
792 #define PARENT_IDS_FLAG 0x100
793
794 struct access_cache {
795 int numcached;
796 int cachehits; /* these two for statistics gathering */
797 int lookups;
798 unsigned int *acache;
799 unsigned char *haveaccess;
800 };
801
802 struct access_t {
803 uid_t uid; /* IN: effective user id */
804 short flags; /* IN: access requested (i.e. R_OK) */
805 short num_groups; /* IN: number of groups user belongs to */
806 int num_files; /* IN: number of files to process */
807 int *file_ids; /* IN: array of file ids */
808 gid_t *groups; /* IN: array of groups */
809 short *access; /* OUT: access info for each file (0 for 'has access') */
810 } __attribute__((unavailable)); // this structure is for reference purposes only
811
812 struct user32_access_t {
813 uid_t uid; /* IN: effective user id */
814 short flags; /* IN: access requested (i.e. R_OK) */
815 short num_groups; /* IN: number of groups user belongs to */
816 int num_files; /* IN: number of files to process */
817 user32_addr_t file_ids; /* IN: array of file ids */
818 user32_addr_t groups; /* IN: array of groups */
819 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
820 };
821
822 struct user64_access_t {
823 uid_t uid; /* IN: effective user id */
824 short flags; /* IN: access requested (i.e. R_OK) */
825 short num_groups; /* IN: number of groups user belongs to */
826 int num_files; /* IN: number of files to process */
827 user64_addr_t file_ids; /* IN: array of file ids */
828 user64_addr_t groups; /* IN: array of groups */
829 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
830 };
831
832
833 // these are the "extended" versions of the above structures
834 // note that it is crucial that they be different sized than
835 // the regular version
836 struct ext_access_t {
837 uint32_t flags; /* IN: access requested (i.e. R_OK) */
838 uint32_t num_files; /* IN: number of files to process */
839 uint32_t map_size; /* IN: size of the bit map */
840 uint32_t *file_ids; /* IN: Array of file ids */
841 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
842 short *access; /* OUT: access info for each file (0 for 'has access') */
843 uint32_t num_parents; /* future use */
844 cnid_t *parents; /* future use */
845 } __attribute__((unavailable)); // this structure is for reference purposes only
846
847 struct user32_ext_access_t {
848 uint32_t flags; /* IN: access requested (i.e. R_OK) */
849 uint32_t num_files; /* IN: number of files to process */
850 uint32_t map_size; /* IN: size of the bit map */
851 user32_addr_t file_ids; /* IN: Array of file ids */
852 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
853 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
854 uint32_t num_parents; /* future use */
855 user32_addr_t parents; /* future use */
856 };
857
858 struct user64_ext_access_t {
859 uint32_t flags; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files; /* IN: number of files to process */
861 uint32_t map_size; /* IN: size of the bit map */
862 user64_addr_t file_ids; /* IN: array of file ids */
863 user64_addr_t bitmap; /* IN: array of groups */
864 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents;/* future use */
866 user64_addr_t parents;/* future use */
867 };
868
869
870 /*
871 * Perform a binary search for the given parent_id. Return value is
872 * the index if there is a match. If no_match_indexp is non-NULL it
873 * will be assigned with the index to insert the item (even if it was
874 * not found).
875 */
876 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
877 {
878 int index=-1;
879 unsigned int lo=0;
880
881 do {
882 unsigned int mid = ((hi - lo)/2) + lo;
883 unsigned int this_id = array[mid];
884
885 if (parent_id == this_id) {
886 hi = mid;
887 break;
888 }
889
890 if (parent_id < this_id) {
891 hi = mid;
892 continue;
893 }
894
895 if (parent_id > this_id) {
896 lo = mid + 1;
897 continue;
898 }
899 } while(lo < hi);
900
901 /* check if lo and hi converged on the match */
902 if (parent_id == array[hi]) {
903 index = hi;
904 }
905
906 if (no_match_indexp) {
907 *no_match_indexp = hi;
908 }
909
910 return index;
911 }
912
913
914 static int
915 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
916 {
917 unsigned int hi;
918 int matches = 0;
919 int index, no_match_index;
920
921 if (cache->numcached == 0) {
922 *indexp = 0;
923 return 0; // table is empty, so insert at index=0 and report no match
924 }
925
926 if (cache->numcached > NUM_CACHE_ENTRIES) {
927 cache->numcached = NUM_CACHE_ENTRIES;
928 }
929
930 hi = cache->numcached - 1;
931
932 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
933
934 /* if no existing entry found, find index for new one */
935 if (index == -1) {
936 index = no_match_index;
937 matches = 0;
938 } else {
939 matches = 1;
940 }
941
942 *indexp = index;
943 return matches;
944 }
945
946 /*
947 * Add a node to the access_cache at the given index (or do a lookup first
948 * to find the index if -1 is passed in). We currently do a replace rather
949 * than an insert if the cache is full.
950 */
951 static void
952 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
953 {
954 int lookup_index = -1;
955
956 /* need to do a lookup first if -1 passed for index */
957 if (index == -1) {
958 if (lookup_bucket(cache, &lookup_index, nodeID)) {
959 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
960 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
961 cache->haveaccess[lookup_index] = access;
962 }
963
964 /* mission accomplished */
965 return;
966 } else {
967 index = lookup_index;
968 }
969
970 }
971
972 /* if the cache is full, do a replace rather than an insert */
973 if (cache->numcached >= NUM_CACHE_ENTRIES) {
974 cache->numcached = NUM_CACHE_ENTRIES-1;
975
976 if (index > cache->numcached) {
977 index = cache->numcached;
978 }
979 }
980
981 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
982 index++;
983 }
984
985 if (index >= 0 && index < cache->numcached) {
986 /* only do bcopy if we're inserting */
987 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
988 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
989 }
990
991 cache->acache[index] = nodeID;
992 cache->haveaccess[index] = access;
993 cache->numcached++;
994 }
995
996
997 struct cinfo {
998 uid_t uid;
999 gid_t gid;
1000 mode_t mode;
1001 cnid_t parentcnid;
1002 u_int16_t recflags;
1003 };
1004
1005 static int
1006 snoop_callback(const cnode_t *cp, void *arg)
1007 {
1008 struct cinfo *cip = arg;
1009
1010 cip->uid = cp->c_uid;
1011 cip->gid = cp->c_gid;
1012 cip->mode = cp->c_mode;
1013 cip->parentcnid = cp->c_parentcnid;
1014 cip->recflags = cp->c_attr.ca_recflags;
1015
1016 return (0);
1017 }
1018
1019 /*
1020 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1021 * isn't incore, then go to the catalog.
1022 */
1023 static int
1024 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
1025 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
1026 {
1027 int error = 0;
1028
1029 /* if this id matches the one the fsctl was called with, skip the lookup */
1030 if (cnid == skip_cp->c_cnid) {
1031 cnattrp->ca_uid = skip_cp->c_uid;
1032 cnattrp->ca_gid = skip_cp->c_gid;
1033 cnattrp->ca_mode = skip_cp->c_mode;
1034 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1035 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1036 } else {
1037 struct cinfo c_info;
1038
1039 /* otherwise, check the cnode hash incase the file/dir is incore */
1040 error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
1041
1042 if (error == EACCES) {
1043 // File is deleted
1044 return ENOENT;
1045 } else if (!error) {
1046 cnattrp->ca_uid = c_info.uid;
1047 cnattrp->ca_gid = c_info.gid;
1048 cnattrp->ca_mode = c_info.mode;
1049 cnattrp->ca_recflags = c_info.recflags;
1050 keyp->hfsPlus.parentID = c_info.parentcnid;
1051 } else {
1052 int lockflags;
1053
1054 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1055 throttle_lowpri_io(1);
1056
1057 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1058
1059 /* lookup this cnid in the catalog */
1060 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
1061
1062 hfs_systemfile_unlock(hfsmp, lockflags);
1063
1064 cache->lookups++;
1065 }
1066 }
1067
1068 return (error);
1069 }
1070
1071
1072 /*
1073 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1074 * up to CACHE_LEVELS as we progress towards the root.
1075 */
1076 static int
1077 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
1078 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
1079 struct vfs_context *my_context,
1080 char *bitmap,
1081 uint32_t map_size,
1082 cnid_t* parents,
1083 uint32_t num_parents)
1084 {
1085 int myErr = 0;
1086 int myResult;
1087 HFSCatalogNodeID thisNodeID;
1088 unsigned int myPerms;
1089 struct cat_attr cnattr;
1090 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1091 CatalogKey catkey;
1092
1093 int i = 0, ids_to_cache = 0;
1094 int parent_ids[CACHE_LEVELS];
1095
1096 thisNodeID = nodeID;
1097 while (thisNodeID >= kRootDirID) {
1098 myResult = 0; /* default to "no access" */
1099
1100 /* check the cache before resorting to hitting the catalog */
1101
1102 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1103 * to look any further after hitting cached dir */
1104
1105 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1106 cache->cachehits++;
1107 myErr = cache->haveaccess[cache_index];
1108 if (scope_index != -1) {
1109 if (myErr == ESRCH) {
1110 myErr = 0;
1111 }
1112 } else {
1113 scope_index = 0; // so we'll just use the cache result
1114 scope_idx_start = ids_to_cache;
1115 }
1116 myResult = (myErr == 0) ? 1 : 0;
1117 goto ExitThisRoutine;
1118 }
1119
1120
1121 if (parents) {
1122 int tmp;
1123 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1124 if (scope_index == -1)
1125 scope_index = tmp;
1126 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1127 scope_idx_start = ids_to_cache;
1128 }
1129 }
1130
1131 /* remember which parents we want to cache */
1132 if (ids_to_cache < CACHE_LEVELS) {
1133 parent_ids[ids_to_cache] = thisNodeID;
1134 ids_to_cache++;
1135 }
1136 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1137 if (bitmap && map_size) {
1138 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1139 }
1140
1141
1142 /* do the lookup (checks the cnode hash, then the catalog) */
1143 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
1144 if (myErr) {
1145 goto ExitThisRoutine; /* no access */
1146 }
1147
1148 /* Root always gets access. */
1149 if (suser(myp_ucred, NULL) == 0) {
1150 thisNodeID = catkey.hfsPlus.parentID;
1151 myResult = 1;
1152 continue;
1153 }
1154
1155 // if the thing has acl's, do the full permission check
1156 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1157 struct vnode *vp;
1158
1159 /* get the vnode for this cnid */
1160 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
1161 if ( myErr ) {
1162 myResult = 0;
1163 goto ExitThisRoutine;
1164 }
1165
1166 thisNodeID = VTOC(vp)->c_parentcnid;
1167
1168 hfs_unlock(VTOC(vp));
1169
1170 if (vnode_vtype(vp) == VDIR) {
1171 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1172 } else {
1173 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1174 }
1175
1176 vnode_put(vp);
1177 if (myErr) {
1178 myResult = 0;
1179 goto ExitThisRoutine;
1180 }
1181 } else {
1182 unsigned int flags;
1183 int mode = cnattr.ca_mode & S_IFMT;
1184 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
1185
1186 if (mode == S_IFDIR) {
1187 flags = R_OK | X_OK;
1188 } else {
1189 flags = R_OK;
1190 }
1191 if ( (myPerms & flags) != flags) {
1192 myResult = 0;
1193 myErr = EACCES;
1194 goto ExitThisRoutine; /* no access */
1195 }
1196
1197 /* up the hierarchy we go */
1198 thisNodeID = catkey.hfsPlus.parentID;
1199 }
1200 }
1201
1202 /* if here, we have access to this node */
1203 myResult = 1;
1204
1205 ExitThisRoutine:
1206 if (parents && myErr == 0 && scope_index == -1) {
1207 myErr = ESRCH;
1208 }
1209
1210 if (myErr) {
1211 myResult = 0;
1212 }
1213 *err = myErr;
1214
1215 /* cache the parent directory(ies) */
1216 for (i = 0; i < ids_to_cache; i++) {
1217 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1218 add_node(cache, -1, parent_ids[i], ESRCH);
1219 } else {
1220 add_node(cache, -1, parent_ids[i], myErr);
1221 }
1222 }
1223
1224 return (myResult);
1225 }
1226
1227 static int
1228 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1229 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1230 {
1231 boolean_t is64bit;
1232
1233 /*
1234 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1235 * happens to be in our list of file_ids, we'll note it
1236 * avoid calling hfs_chashget_nowait() on that id as that
1237 * will cause a "locking against myself" panic.
1238 */
1239 Boolean check_leaf = true;
1240
1241 struct user64_ext_access_t *user_access_structp;
1242 struct user64_ext_access_t tmp_user_access;
1243 struct access_cache cache;
1244
1245 int error = 0, prev_parent_check_ok=1;
1246 unsigned int i;
1247
1248 short flags;
1249 unsigned int num_files = 0;
1250 int map_size = 0;
1251 int num_parents = 0;
1252 int *file_ids=NULL;
1253 short *access=NULL;
1254 char *bitmap=NULL;
1255 cnid_t *parents=NULL;
1256 int leaf_index;
1257
1258 cnid_t cnid;
1259 cnid_t prevParent_cnid = 0;
1260 unsigned int myPerms;
1261 short myaccess = 0;
1262 struct cat_attr cnattr;
1263 CatalogKey catkey;
1264 struct cnode *skip_cp = VTOC(vp);
1265 kauth_cred_t cred = vfs_context_ucred(context);
1266 proc_t p = vfs_context_proc(context);
1267
1268 is64bit = proc_is64bit(p);
1269
1270 /* initialize the local cache and buffers */
1271 cache.numcached = 0;
1272 cache.cachehits = 0;
1273 cache.lookups = 0;
1274 cache.acache = NULL;
1275 cache.haveaccess = NULL;
1276
1277 /* struct copyin done during dispatch... need to copy file_id array separately */
1278 if (ap->a_data == NULL) {
1279 error = EINVAL;
1280 goto err_exit_bulk_access;
1281 }
1282
1283 if (is64bit) {
1284 if (arg_size != sizeof(struct user64_ext_access_t)) {
1285 error = EINVAL;
1286 goto err_exit_bulk_access;
1287 }
1288
1289 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1290
1291 } else if (arg_size == sizeof(struct user32_access_t)) {
1292 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1293
1294 // convert an old style bulk-access struct to the new style
1295 tmp_user_access.flags = accessp->flags;
1296 tmp_user_access.num_files = accessp->num_files;
1297 tmp_user_access.map_size = 0;
1298 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1299 tmp_user_access.bitmap = USER_ADDR_NULL;
1300 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1301 tmp_user_access.num_parents = 0;
1302 user_access_structp = &tmp_user_access;
1303
1304 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1305 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1306
1307 // up-cast from a 32-bit version of the struct
1308 tmp_user_access.flags = accessp->flags;
1309 tmp_user_access.num_files = accessp->num_files;
1310 tmp_user_access.map_size = accessp->map_size;
1311 tmp_user_access.num_parents = accessp->num_parents;
1312
1313 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1314 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1315 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1316 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1317
1318 user_access_structp = &tmp_user_access;
1319 } else {
1320 error = EINVAL;
1321 goto err_exit_bulk_access;
1322 }
1323
1324 map_size = user_access_structp->map_size;
1325
1326 num_files = user_access_structp->num_files;
1327
1328 num_parents= user_access_structp->num_parents;
1329
1330 if (num_files < 1) {
1331 goto err_exit_bulk_access;
1332 }
1333 if (num_files > 1024) {
1334 error = EINVAL;
1335 goto err_exit_bulk_access;
1336 }
1337
1338 if (num_parents > 1024) {
1339 error = EINVAL;
1340 goto err_exit_bulk_access;
1341 }
1342
1343 file_ids = hfs_malloc(sizeof(int) * num_files);
1344 access = hfs_malloc(sizeof(short) * num_files);
1345 if (map_size) {
1346 bitmap = hfs_mallocz(sizeof(char) * map_size);
1347 }
1348
1349 if (num_parents) {
1350 parents = hfs_malloc(sizeof(cnid_t) * num_parents);
1351 }
1352
1353 cache.acache = hfs_malloc(sizeof(int) * NUM_CACHE_ENTRIES);
1354 cache.haveaccess = hfs_malloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1355
1356 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1357 num_files * sizeof(int)))) {
1358 goto err_exit_bulk_access;
1359 }
1360
1361 if (num_parents) {
1362 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1363 num_parents * sizeof(cnid_t)))) {
1364 goto err_exit_bulk_access;
1365 }
1366 }
1367
1368 flags = user_access_structp->flags;
1369 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1370 flags = R_OK;
1371 }
1372
1373 /* check if we've been passed leaf node ids or parent ids */
1374 if (flags & PARENT_IDS_FLAG) {
1375 check_leaf = false;
1376 }
1377
1378 /* Check access to each file_id passed in */
1379 for (i = 0; i < num_files; i++) {
1380 leaf_index=-1;
1381 cnid = (cnid_t) file_ids[i];
1382
1383 /* root always has access */
1384 if ((!parents) && (!suser(cred, NULL))) {
1385 access[i] = 0;
1386 continue;
1387 }
1388
1389 if (check_leaf) {
1390 /* do the lookup (checks the cnode hash, then the catalog) */
1391 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1392 if (error) {
1393 access[i] = (short) error;
1394 continue;
1395 }
1396
1397 if (parents) {
1398 // Check if the leaf matches one of the parent scopes
1399 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1400 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1401 prev_parent_check_ok = 0;
1402 else if (leaf_index >= 0)
1403 prev_parent_check_ok = 1;
1404 }
1405
1406 // if the thing has acl's, do the full permission check
1407 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1408 struct vnode *cvp;
1409 int myErr = 0;
1410 /* get the vnode for this cnid */
1411 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
1412 if ( myErr ) {
1413 access[i] = myErr;
1414 continue;
1415 }
1416
1417 hfs_unlock(VTOC(cvp));
1418
1419 if (vnode_vtype(cvp) == VDIR) {
1420 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1421 } else {
1422 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1423 }
1424
1425 vnode_put(cvp);
1426 if (myErr) {
1427 access[i] = myErr;
1428 continue;
1429 }
1430 } else {
1431 /* before calling CheckAccess(), check the target file for read access */
1432 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1433 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1434
1435 /* fail fast if no access */
1436 if ((myPerms & flags) == 0) {
1437 access[i] = EACCES;
1438 continue;
1439 }
1440 }
1441 } else {
1442 /* we were passed an array of parent ids */
1443 catkey.hfsPlus.parentID = cnid;
1444 }
1445
1446 /* if the last guy had the same parent and had access, we're done */
1447 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1448 cache.cachehits++;
1449 access[i] = 0;
1450 continue;
1451 }
1452
1453 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1454 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1455
1456 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1457 access[i] = 0; // have access.. no errors to report
1458 } else {
1459 access[i] = (error != 0 ? (short) error : EACCES);
1460 }
1461
1462 prevParent_cnid = catkey.hfsPlus.parentID;
1463 }
1464
1465 /* copyout the access array */
1466 if ((error = copyout((caddr_t)access, user_access_structp->access,
1467 num_files * sizeof (short)))) {
1468 goto err_exit_bulk_access;
1469 }
1470 if (map_size && bitmap) {
1471 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1472 map_size * sizeof (char)))) {
1473 goto err_exit_bulk_access;
1474 }
1475 }
1476
1477
1478 err_exit_bulk_access:
1479
1480 hfs_free(file_ids, sizeof(int) * num_files);
1481 hfs_free(parents, sizeof(cnid_t) * num_parents);
1482 hfs_free(bitmap, sizeof(char) * map_size);
1483 hfs_free(access, sizeof(short) * num_files);
1484 hfs_free(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1485 hfs_free(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1486
1487 return (error);
1488 }
1489
1490
1491 /* end "bulk-access" support */
1492
1493
1494 /*
1495 * Control filesystem operating characteristics.
1496 */
1497 int
1498 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1499 vnode_t a_vp;
1500 long a_command;
1501 caddr_t a_data;
1502 int a_fflag;
1503 vfs_context_t a_context;
1504 } */ *ap)
1505 {
1506 struct vnode * vp = ap->a_vp;
1507 struct hfsmount *hfsmp = VTOHFS(vp);
1508 vfs_context_t context = ap->a_context;
1509 kauth_cred_t cred = vfs_context_ucred(context);
1510 proc_t p = vfs_context_proc(context);
1511 struct vfsstatfs *vfsp;
1512 boolean_t is64bit;
1513 off_t jnl_start, jnl_size;
1514 struct hfs_journal_info *jip;
1515 #if HFS_COMPRESSION
1516 int compressed = 0;
1517 off_t uncompressed_size = -1;
1518 int decmpfs_error = 0;
1519
1520 if (ap->a_command == F_RDADVISE) {
1521 /* we need to inspect the decmpfs state of the file as early as possible */
1522 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1523 if (compressed) {
1524 if (VNODE_IS_RSRC(vp)) {
1525 /* if this is the resource fork, treat it as if it were empty */
1526 uncompressed_size = 0;
1527 } else {
1528 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1529 if (decmpfs_error != 0) {
1530 /* failed to get the uncompressed size, we'll check for this later */
1531 uncompressed_size = -1;
1532 }
1533 }
1534 }
1535 }
1536 #endif /* HFS_COMPRESSION */
1537
1538 is64bit = proc_is64bit(p);
1539
1540 #if CONFIG_PROTECT
1541 #if HFS_CONFIG_KEY_ROLL
1542 // The HFS_KEY_ROLL fsctl does its own access checks
1543 if (ap->a_command != HFS_KEY_ROLL)
1544 #endif
1545 {
1546 int error = 0;
1547 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
1548 return error;
1549 }
1550 }
1551 #endif /* CONFIG_PROTECT */
1552
1553 switch (ap->a_command) {
1554
1555 case HFS_GETPATH:
1556 {
1557 struct vnode *file_vp;
1558 cnid_t cnid;
1559 int outlen;
1560 char *bufptr;
1561 int error;
1562 int flags = 0;
1563
1564 /* Caller must be owner of file system. */
1565 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1566 if (suser(cred, NULL) &&
1567 kauth_cred_getuid(cred) != vfsp->f_owner) {
1568 return (EACCES);
1569 }
1570 /* Target vnode must be file system's root. */
1571 if (!vnode_isvroot(vp)) {
1572 return (EINVAL);
1573 }
1574 bufptr = (char *)ap->a_data;
1575 cnid = strtoul(bufptr, NULL, 10);
1576 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1577 flags |= BUILDPATH_VOLUME_RELATIVE;
1578 }
1579
1580 /* We need to call hfs_vfs_vget to leverage the code that will
1581 * fix the origin list for us if needed, as opposed to calling
1582 * hfs_vget, since we will need the parent for build_path call.
1583 */
1584
1585 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1586 return (error);
1587 }
1588
1589 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
1590 vnode_put(file_vp);
1591
1592 return (error);
1593 }
1594
1595 case HFS_SET_MAX_DEFRAG_SIZE:
1596 {
1597 int error = 0; /* Assume success */
1598 u_int32_t maxsize = 0;
1599
1600 if (vnode_vfsisrdonly(vp)) {
1601 return (EROFS);
1602 }
1603 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1604 if (!kauth_cred_issuser(cred)) {
1605 return (EACCES); /* must be root */
1606 }
1607
1608 maxsize = *(u_int32_t *)ap->a_data;
1609
1610 hfs_lock_mount(hfsmp);
1611 if (maxsize > HFS_MAX_DEFRAG_SIZE) {
1612 error = EINVAL;
1613 }
1614 else {
1615 hfsmp->hfs_defrag_max = maxsize;
1616 }
1617 hfs_unlock_mount(hfsmp);
1618
1619 return (error);
1620 }
1621
1622 case HFS_FORCE_ENABLE_DEFRAG:
1623 {
1624 int error = 0; /* Assume success */
1625 u_int32_t do_enable = 0;
1626
1627 if (vnode_vfsisrdonly(vp)) {
1628 return (EROFS);
1629 }
1630 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1631 if (!kauth_cred_issuser(cred)) {
1632 return (EACCES); /* must be root */
1633 }
1634
1635 do_enable = *(u_int32_t *)ap->a_data;
1636
1637 hfs_lock_mount(hfsmp);
1638 if (do_enable != 0) {
1639 hfsmp->hfs_defrag_nowait = 1;
1640 }
1641 else {
1642 error = EINVAL;
1643 }
1644
1645 hfs_unlock_mount(hfsmp);
1646
1647 return (error);
1648 }
1649
1650
1651 case HFS_TRANSFER_DOCUMENT_ID:
1652 {
1653 struct cnode *cp = NULL;
1654 int error;
1655 u_int32_t to_fd = *(u_int32_t *)ap->a_data;
1656 struct fileproc *to_fp;
1657 struct vnode *to_vp;
1658 struct cnode *to_cp;
1659
1660 cp = VTOC(vp);
1661
1662 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
1663 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1664 return error;
1665 }
1666 if ( (error = vnode_getwithref(to_vp)) ) {
1667 file_drop(to_fd);
1668 return error;
1669 }
1670
1671 if (VTOHFS(to_vp) != hfsmp) {
1672 error = EXDEV;
1673 goto transfer_cleanup;
1674 }
1675
1676 int need_unlock = 1;
1677 to_cp = VTOC(to_vp);
1678 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1679 if (error != 0) {
1680 //printf("could not lock the pair of cnodes (error %d)\n", error);
1681 goto transfer_cleanup;
1682 }
1683
1684 if (!(cp->c_bsdflags & UF_TRACKED)) {
1685 error = EINVAL;
1686 } else if (to_cp->c_bsdflags & UF_TRACKED) {
1687 //
1688 // if the destination is already tracked, return an error
1689 // as otherwise it's a silent deletion of the target's
1690 // document-id
1691 //
1692 error = EEXIST;
1693 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1694 //
1695 // we can use the FndrExtendedFileInfo because the doc-id is the first
1696 // thing in both it and the ExtendedDirInfo struct which is fixed in
1697 // format and can not change layout
1698 //
1699 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
1700 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
1701
1702 if (f_extinfo->document_id == 0) {
1703 uint32_t new_id;
1704
1705 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
1706
1707 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
1708 //
1709 // re-lock the pair now that we have the document-id
1710 //
1711 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1712 f_extinfo->document_id = new_id;
1713 } else {
1714 goto transfer_cleanup;
1715 }
1716 }
1717
1718 to_extinfo->document_id = f_extinfo->document_id;
1719 f_extinfo->document_id = 0;
1720 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1721
1722 // make sure the destination is also UF_TRACKED
1723 to_cp->c_bsdflags |= UF_TRACKED;
1724 cp->c_bsdflags &= ~UF_TRACKED;
1725
1726 // mark the cnodes dirty
1727 cp->c_flag |= C_MODIFIED;
1728 to_cp->c_flag |= C_MODIFIED;
1729
1730 int lockflags;
1731 if ((error = hfs_start_transaction(hfsmp)) == 0) {
1732
1733 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1734
1735 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
1736 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
1737
1738 hfs_systemfile_unlock (hfsmp, lockflags);
1739 (void) hfs_end_transaction(hfsmp);
1740 }
1741
1742 add_fsevent(FSE_DOCID_CHANGED, context,
1743 FSE_ARG_DEV, hfsmp->hfs_raw_dev,
1744 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
1745 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
1746 FSE_ARG_INT32, to_extinfo->document_id,
1747 FSE_ARG_DONE);
1748
1749 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1750 need_unlock = 0;
1751
1752 if (need_fsevent(FSE_STAT_CHANGED, vp)) {
1753 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1754 }
1755 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
1756 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
1757 }
1758 }
1759
1760 if (need_unlock) {
1761 hfs_unlockpair(cp, to_cp);
1762 }
1763
1764 transfer_cleanup:
1765 vnode_put(to_vp);
1766 file_drop(to_fd);
1767
1768 return error;
1769 }
1770
1771
1772
1773 case HFS_PREV_LINK:
1774 case HFS_NEXT_LINK:
1775 {
1776 cnid_t linkfileid;
1777 cnid_t nextlinkid;
1778 cnid_t prevlinkid;
1779 int error;
1780
1781 /* Caller must be owner of file system. */
1782 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1783 if (suser(cred, NULL) &&
1784 kauth_cred_getuid(cred) != vfsp->f_owner) {
1785 return (EACCES);
1786 }
1787 /* Target vnode must be file system's root. */
1788 if (!vnode_isvroot(vp)) {
1789 return (EINVAL);
1790 }
1791 linkfileid = *(cnid_t *)ap->a_data;
1792 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1793 return (EINVAL);
1794 }
1795 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1796 return (error);
1797 }
1798 if (ap->a_command == HFS_NEXT_LINK) {
1799 *(cnid_t *)ap->a_data = nextlinkid;
1800 } else {
1801 *(cnid_t *)ap->a_data = prevlinkid;
1802 }
1803 return (0);
1804 }
1805
1806 case HFS_RESIZE_PROGRESS: {
1807
1808 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1809 if (suser(cred, NULL) &&
1810 kauth_cred_getuid(cred) != vfsp->f_owner) {
1811 return (EACCES); /* must be owner of file system */
1812 }
1813 if (!vnode_isvroot(vp)) {
1814 return (EINVAL);
1815 }
1816 /* file system must not be mounted read-only */
1817 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1818 return (EROFS);
1819 }
1820
1821 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1822 }
1823
1824 case HFS_RESIZE_VOLUME: {
1825 u_int64_t newsize;
1826 u_int64_t cursize;
1827 int ret;
1828
1829 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1830 if (suser(cred, NULL) &&
1831 kauth_cred_getuid(cred) != vfsp->f_owner) {
1832 return (EACCES); /* must be owner of file system */
1833 }
1834 if (!vnode_isvroot(vp)) {
1835 return (EINVAL);
1836 }
1837
1838 /* filesystem must not be mounted read only */
1839 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1840 return (EROFS);
1841 }
1842 newsize = *(u_int64_t *)ap->a_data;
1843 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1844
1845 if (newsize == cursize) {
1846 return (0);
1847 }
1848 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeWillResize);
1849 if (newsize > cursize) {
1850 ret = hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1851 } else {
1852 ret = hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1853 }
1854 IOBSDMountChange(hfsmp->hfs_mp, kIOMountChangeDidResize);
1855 return (ret);
1856 }
1857 case HFS_CHANGE_NEXT_ALLOCATION: {
1858 int error = 0; /* Assume success */
1859 u_int32_t location;
1860
1861 if (vnode_vfsisrdonly(vp)) {
1862 return (EROFS);
1863 }
1864 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1865 if (suser(cred, NULL) &&
1866 kauth_cred_getuid(cred) != vfsp->f_owner) {
1867 return (EACCES); /* must be owner of file system */
1868 }
1869 if (!vnode_isvroot(vp)) {
1870 return (EINVAL);
1871 }
1872 hfs_lock_mount(hfsmp);
1873 location = *(u_int32_t *)ap->a_data;
1874 if ((location >= hfsmp->allocLimit) &&
1875 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1876 error = EINVAL;
1877 goto fail_change_next_allocation;
1878 }
1879 /* Return previous value. */
1880 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1881 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1882 /* On magic value for location, set nextAllocation to next block
1883 * after metadata zone and set flag in mount structure to indicate
1884 * that nextAllocation should not be updated again.
1885 */
1886 if (hfsmp->hfs_metazone_end != 0) {
1887 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1888 }
1889 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1890 } else {
1891 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1892 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1893 }
1894 MarkVCBDirty(hfsmp);
1895 fail_change_next_allocation:
1896 hfs_unlock_mount(hfsmp);
1897 return (error);
1898 }
1899
1900 #if HFS_SPARSE_DEV
1901 case HFS_SETBACKINGSTOREINFO: {
1902 struct vnode * di_vp;
1903 struct hfs_backingstoreinfo *bsdata;
1904 int error = 0;
1905
1906 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1907 return (EROFS);
1908 }
1909 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1910 return (EALREADY);
1911 }
1912 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1913 if (suser(cred, NULL) &&
1914 kauth_cred_getuid(cred) != vfsp->f_owner) {
1915 return (EACCES); /* must be owner of file system */
1916 }
1917 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1918 if (bsdata == NULL) {
1919 return (EINVAL);
1920 }
1921 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1922 return (error);
1923 }
1924 if ((error = vnode_getwithref(di_vp))) {
1925 file_drop(bsdata->backingfd);
1926 return(error);
1927 }
1928
1929 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1930 (void)vnode_put(di_vp);
1931 file_drop(bsdata->backingfd);
1932 return (EINVAL);
1933 }
1934
1935 // Dropped in unmount
1936 vnode_ref(di_vp);
1937
1938 hfs_lock_mount(hfsmp);
1939 hfsmp->hfs_backingvp = di_vp;
1940 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1941 hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
1942 hfs_unlock_mount(hfsmp);
1943
1944 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1945
1946 /*
1947 * If the sparse image is on a sparse image file (as opposed to a sparse
1948 * bundle), then we may need to limit the free space to the maximum size
1949 * of a file on that volume. So we query (using pathconf), and if we get
1950 * a meaningful result, we cache the number of blocks for later use in
1951 * hfs_freeblks().
1952 */
1953 hfsmp->hfs_backingfs_maxblocks = 0;
1954 if (vnode_vtype(di_vp) == VREG) {
1955 int terr;
1956 int hostbits;
1957 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1958 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1959 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1960
1961 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1962 }
1963 }
1964
1965 /* The free extent cache is managed differently for sparse devices.
1966 * There is a window between which the volume is mounted and the
1967 * device is marked as sparse, so the free extent cache for this
1968 * volume is currently initialized as normal volume (sorted by block
1969 * count). Reset the cache so that it will be rebuilt again
1970 * for sparse device (sorted by start block).
1971 */
1972 ResetVCBFreeExtCache(hfsmp);
1973
1974 (void)vnode_put(di_vp);
1975 file_drop(bsdata->backingfd);
1976 return (0);
1977 }
1978 case HFS_CLRBACKINGSTOREINFO: {
1979 struct vnode * tmpvp;
1980
1981 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1982 if (suser(cred, NULL) &&
1983 kauth_cred_getuid(cred) != vfsp->f_owner) {
1984 return (EACCES); /* must be owner of file system */
1985 }
1986 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1987 return (EROFS);
1988 }
1989
1990 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1991 hfsmp->hfs_backingvp) {
1992
1993 hfs_lock_mount(hfsmp);
1994 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1995 tmpvp = hfsmp->hfs_backingvp;
1996 hfsmp->hfs_backingvp = NULLVP;
1997 hfsmp->hfs_sparsebandblks = 0;
1998 hfs_unlock_mount(hfsmp);
1999
2000 vnode_rele(tmpvp);
2001 }
2002 return (0);
2003 }
2004 #endif /* HFS_SPARSE_DEV */
2005
2006 /* Change the next CNID stored in the VH */
2007 case HFS_CHANGE_NEXTCNID: {
2008 int error = 0; /* Assume success */
2009 u_int32_t fileid;
2010 int wraparound = 0;
2011 int lockflags = 0;
2012
2013 if (vnode_vfsisrdonly(vp)) {
2014 return (EROFS);
2015 }
2016 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2017 if (suser(cred, NULL) &&
2018 kauth_cred_getuid(cred) != vfsp->f_owner) {
2019 return (EACCES); /* must be owner of file system */
2020 }
2021
2022 fileid = *(u_int32_t *)ap->a_data;
2023
2024 /* Must have catalog lock excl. to advance the CNID pointer */
2025 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
2026
2027 hfs_lock_mount(hfsmp);
2028
2029 /* If it is less than the current next CNID, force the wraparound bit to be set */
2030 if (fileid < hfsmp->vcbNxtCNID) {
2031 wraparound=1;
2032 }
2033
2034 /* Return previous value. */
2035 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
2036
2037 hfsmp->vcbNxtCNID = fileid;
2038
2039 if (wraparound) {
2040 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
2041 }
2042
2043 MarkVCBDirty(hfsmp);
2044 hfs_unlock_mount(hfsmp);
2045 hfs_systemfile_unlock (hfsmp, lockflags);
2046
2047 return (error);
2048 }
2049
2050 case F_FREEZE_FS: {
2051 struct mount *mp;
2052
2053 mp = vnode_mount(vp);
2054 hfsmp = VFSTOHFS(mp);
2055
2056 if (!(hfsmp->jnl))
2057 return (ENOTSUP);
2058
2059 vfsp = vfs_statfs(mp);
2060
2061 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2062 !kauth_cred_issuser(cred))
2063 return (EACCES);
2064
2065 return hfs_freeze(hfsmp);
2066 }
2067
2068 case F_THAW_FS: {
2069 vfsp = vfs_statfs(vnode_mount(vp));
2070 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2071 !kauth_cred_issuser(cred))
2072 return (EACCES);
2073
2074 return hfs_thaw(hfsmp, current_proc());
2075 }
2076
2077 case HFS_EXT_BULKACCESS_FSCTL: {
2078 int size;
2079 #if CONFIG_HFS_STD
2080 if (hfsmp->hfs_flags & HFS_STANDARD) {
2081 return EINVAL;
2082 }
2083 #endif
2084
2085 if (is64bit) {
2086 size = sizeof(struct user64_ext_access_t);
2087 } else {
2088 size = sizeof(struct user32_ext_access_t);
2089 }
2090
2091 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2092 }
2093
2094 case HFS_SET_XATTREXTENTS_STATE: {
2095 int state;
2096
2097 if (ap->a_data == NULL) {
2098 return (EINVAL);
2099 }
2100
2101 state = *(int *)ap->a_data;
2102
2103 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2104 return (EROFS);
2105 }
2106
2107 /* Super-user can enable or disable extent-based extended
2108 * attribute support on a volume
2109 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2110 * are enabled by default, so any change will be transient only
2111 * till the volume is remounted.
2112 */
2113 if (!kauth_cred_issuser(kauth_cred_get())) {
2114 return (EPERM);
2115 }
2116 if (state == 0 || state == 1)
2117 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
2118 else
2119 return (EINVAL);
2120 }
2121
2122 case F_SETSTATICCONTENT: {
2123 int error;
2124 int enable_static = 0;
2125 struct cnode *cp = NULL;
2126 /*
2127 * lock the cnode, decorate the cnode flag, and bail out.
2128 * VFS should have already authenticated the caller for us.
2129 */
2130
2131 if (ap->a_data) {
2132 /*
2133 * Note that even though ap->a_data is of type caddr_t,
2134 * the fcntl layer at the syscall handler will pass in NULL
2135 * or 1 depending on what the argument supplied to the fcntl
2136 * was. So it is in fact correct to check the ap->a_data
2137 * argument for zero or non-zero value when deciding whether or not
2138 * to enable the static bit in the cnode.
2139 */
2140 enable_static = 1;
2141 }
2142 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2143 return EROFS;
2144 }
2145 cp = VTOC(vp);
2146
2147 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2148 if (error == 0) {
2149 if (enable_static) {
2150 cp->c_flag |= C_SSD_STATIC;
2151 }
2152 else {
2153 cp->c_flag &= ~C_SSD_STATIC;
2154 }
2155 hfs_unlock (cp);
2156 }
2157 return error;
2158 }
2159
2160 case F_SET_GREEDY_MODE: {
2161 int error;
2162 int enable_greedy_mode = 0;
2163 struct cnode *cp = NULL;
2164 /*
2165 * lock the cnode, decorate the cnode flag, and bail out.
2166 * VFS should have already authenticated the caller for us.
2167 */
2168
2169 if (ap->a_data) {
2170 /*
2171 * Note that even though ap->a_data is of type caddr_t,
2172 * the fcntl layer at the syscall handler will pass in NULL
2173 * or 1 depending on what the argument supplied to the fcntl
2174 * was. So it is in fact correct to check the ap->a_data
2175 * argument for zero or non-zero value when deciding whether or not
2176 * to enable the greedy mode bit in the cnode.
2177 */
2178 enable_greedy_mode = 1;
2179 }
2180 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2181 return EROFS;
2182 }
2183 cp = VTOC(vp);
2184
2185 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2186 if (error == 0) {
2187 if (enable_greedy_mode) {
2188 cp->c_flag |= C_SSD_GREEDY_MODE;
2189 }
2190 else {
2191 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2192 }
2193 hfs_unlock (cp);
2194 }
2195 return error;
2196 }
2197
2198 case F_SETIOTYPE: {
2199 int error;
2200 uint32_t iotypeflag = 0;
2201
2202 struct cnode *cp = NULL;
2203 /*
2204 * lock the cnode, decorate the cnode flag, and bail out.
2205 * VFS should have already authenticated the caller for us.
2206 */
2207
2208 if (ap->a_data == NULL) {
2209 return EINVAL;
2210 }
2211
2212 /*
2213 * Note that even though ap->a_data is of type caddr_t, we
2214 * can only use 32 bits of flag values.
2215 */
2216 iotypeflag = (uint32_t) ap->a_data;
2217 switch (iotypeflag) {
2218 case F_IOTYPE_ISOCHRONOUS:
2219 break;
2220 default:
2221 return EINVAL;
2222 }
2223
2224
2225 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2226 return EROFS;
2227 }
2228 cp = VTOC(vp);
2229
2230 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2231 if (error == 0) {
2232 switch (iotypeflag) {
2233 case F_IOTYPE_ISOCHRONOUS:
2234 cp->c_flag |= C_IO_ISOCHRONOUS;
2235 break;
2236 default:
2237 break;
2238 }
2239 hfs_unlock (cp);
2240 }
2241 return error;
2242 }
2243
2244 case F_MAKECOMPRESSED: {
2245 int error = 0;
2246 uint32_t gen_counter;
2247 struct cnode *cp = NULL;
2248 int reset_decmp = 0;
2249
2250 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2251 return EROFS;
2252 }
2253
2254 /*
2255 * acquire & lock the cnode.
2256 * VFS should have already authenticated the caller for us.
2257 */
2258
2259 if (ap->a_data) {
2260 /*
2261 * Cast the pointer into a uint32_t so we can extract the
2262 * supplied generation counter.
2263 */
2264 gen_counter = *((uint32_t*)ap->a_data);
2265 }
2266 else {
2267 return EINVAL;
2268 }
2269
2270 #if HFS_COMPRESSION
2271 cp = VTOC(vp);
2272 /* Grab truncate lock first; we may truncate the file */
2273 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2274
2275 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2276 if (error) {
2277 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2278 return error;
2279 }
2280
2281 /* Are there any other usecounts/FDs? */
2282 if (vnode_isinuse(vp, 1)) {
2283 hfs_unlock(cp);
2284 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2285 return EBUSY;
2286 }
2287
2288 /* now we have the cnode locked down; Validate arguments */
2289 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2290 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2291 hfs_unlock(cp);
2292 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2293 return EINVAL;
2294 }
2295
2296 if ((hfs_get_gencount (cp)) == gen_counter) {
2297 /*
2298 * OK, the gen_counter matched. Go for it:
2299 * Toggle state bits, truncate file, and suppress mtime update
2300 */
2301 reset_decmp = 1;
2302 cp->c_bsdflags |= UF_COMPRESSED;
2303
2304 error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
2305 ap->a_context);
2306 }
2307 else {
2308 error = ESTALE;
2309 }
2310
2311 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2312 hfs_unlock(cp);
2313
2314 /*
2315 * Reset the decmp state while still holding the truncate lock. We need to
2316 * serialize here against a listxattr on this node which may occur at any
2317 * time.
2318 *
2319 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2320 * that will still potentially require getting the com.apple.decmpfs EA. If the
2321 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2322 * generic(through VFS), and can't pass along any info telling it that we're already
2323 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2324 * and trying to fill in the hfs_file_is_compressed info during the callback
2325 * operation, which will result in deadlock against the b-tree node.
2326 *
2327 * So, to serialize against listxattr (which will grab buf_t meta references on
2328 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2329 * decmpfs payload.
2330 */
2331 if ((reset_decmp) && (error == 0)) {
2332 decmpfs_cnode *dp = VTOCMP (vp);
2333 if (dp != NULL) {
2334 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2335 }
2336
2337 /* Initialize the decmpfs node as needed */
2338 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2339 }
2340
2341 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2342
2343 #endif
2344 return error;
2345 }
2346
2347 case F_SETBACKINGSTORE: {
2348
2349 int error = 0;
2350
2351 /*
2352 * See comment in F_SETSTATICCONTENT re: using
2353 * a null check for a_data
2354 */
2355 if (ap->a_data) {
2356 error = hfs_set_backingstore (vp, 1);
2357 }
2358 else {
2359 error = hfs_set_backingstore (vp, 0);
2360 }
2361
2362 return error;
2363 }
2364
2365 case F_GETPATH_MTMINFO: {
2366 int error = 0;
2367
2368 int *data = (int*) ap->a_data;
2369
2370 /* Ask if this is a backingstore vnode */
2371 error = hfs_is_backingstore (vp, data);
2372
2373 return error;
2374 }
2375
2376 case F_FULLFSYNC: {
2377 int error;
2378
2379 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2380 return (EROFS);
2381 }
2382 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2383 if (error == 0) {
2384 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_FULL, p);
2385 hfs_unlock(VTOC(vp));
2386 }
2387
2388 return error;
2389 }
2390
2391 case F_BARRIERFSYNC: {
2392 int error;
2393
2394 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2395 return (EROFS);
2396 }
2397 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2398 if (error == 0) {
2399 error = hfs_fsync(vp, MNT_WAIT, HFS_FSYNC_BARRIER, p);
2400 hfs_unlock(VTOC(vp));
2401 }
2402
2403 return error;
2404 }
2405
2406 case F_CHKCLEAN: {
2407 register struct cnode *cp;
2408 int error;
2409
2410 if (!vnode_isreg(vp))
2411 return EINVAL;
2412
2413 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2414 if (error == 0) {
2415 cp = VTOC(vp);
2416 /*
2417 * used by regression test to determine if
2418 * all the dirty pages (via write) have been cleaned
2419 * after a call to 'fsysnc'.
2420 */
2421 error = is_file_clean(vp, VTOF(vp)->ff_size);
2422 hfs_unlock(cp);
2423 }
2424 return (error);
2425 }
2426
2427 case F_RDADVISE: {
2428 register struct radvisory *ra;
2429 struct filefork *fp;
2430 int error;
2431
2432 if (!vnode_isreg(vp))
2433 return EINVAL;
2434
2435 ra = (struct radvisory *)(ap->a_data);
2436 fp = VTOF(vp);
2437
2438 /* Protect against a size change. */
2439 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2440
2441 #if HFS_COMPRESSION
2442 if (compressed) {
2443 if (uncompressed_size == -1) {
2444 /* fetching the uncompressed size failed above, so return the error */
2445 error = decmpfs_error;
2446 } else if (ra->ra_offset >= uncompressed_size) {
2447 error = EFBIG;
2448 } else {
2449 error = advisory_read(vp, uncompressed_size, ra->ra_offset, ra->ra_count);
2450 }
2451 } else
2452 #endif /* HFS_COMPRESSION */
2453 if (ra->ra_offset >= fp->ff_size) {
2454 error = EFBIG;
2455 } else {
2456 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
2457 }
2458
2459 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
2460 return (error);
2461 }
2462
2463 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
2464 {
2465 if (is64bit) {
2466 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2467 }
2468 else {
2469 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2470 }
2471 return 0;
2472 }
2473
2474 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2475 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2476 break;
2477
2478 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2479 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2480 break;
2481
2482 case HFS_FSCTL_GET_VERY_LOW_DISK:
2483 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2484 break;
2485
2486 case HFS_FSCTL_SET_VERY_LOW_DISK:
2487 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2488 return EINVAL;
2489 }
2490
2491 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2492 break;
2493
2494 case HFS_FSCTL_GET_LOW_DISK:
2495 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2496 break;
2497
2498 case HFS_FSCTL_SET_LOW_DISK:
2499 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2500 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2501
2502 return EINVAL;
2503 }
2504
2505 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2506 break;
2507
2508 case HFS_FSCTL_GET_DESIRED_DISK:
2509 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2510 break;
2511
2512 case HFS_FSCTL_SET_DESIRED_DISK:
2513 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2514 return EINVAL;
2515 }
2516
2517 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2518 break;
2519
2520 case HFS_VOLUME_STATUS:
2521 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2522 break;
2523
2524 case HFS_SET_BOOT_INFO:
2525 if (!vnode_isvroot(vp))
2526 return(EINVAL);
2527 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2528 return(EACCES); /* must be superuser or owner of filesystem */
2529 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2530 return (EROFS);
2531 }
2532 hfs_lock_mount (hfsmp);
2533 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2534 /* Null out the cached UUID, to be safe */
2535 uuid_clear (hfsmp->hfs_full_uuid);
2536 hfs_unlock_mount (hfsmp);
2537 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT);
2538 break;
2539
2540 case HFS_GET_BOOT_INFO:
2541 if (!vnode_isvroot(vp))
2542 return(EINVAL);
2543 hfs_lock_mount (hfsmp);
2544 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2545 hfs_unlock_mount(hfsmp);
2546 break;
2547
2548 case HFS_MARK_BOOT_CORRUPT:
2549 /* Mark the boot volume corrupt by setting
2550 * kHFSVolumeInconsistentBit in the volume header. This will
2551 * force fsck_hfs on next mount.
2552 */
2553 if (!kauth_cred_issuser(kauth_cred_get())) {
2554 return EACCES;
2555 }
2556
2557 /* Allowed only on the root vnode of the boot volume */
2558 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2559 !vnode_isvroot(vp)) {
2560 return EINVAL;
2561 }
2562 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2563 return (EROFS);
2564 }
2565 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2566 hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
2567 break;
2568
2569 case HFS_FSCTL_GET_JOURNAL_INFO:
2570 jip = (struct hfs_journal_info*)ap->a_data;
2571
2572 if (vp == NULLVP)
2573 return EINVAL;
2574
2575 if (hfsmp->jnl == NULL) {
2576 jnl_start = 0;
2577 jnl_size = 0;
2578 } else {
2579 jnl_start = hfs_blk_to_bytes(hfsmp->jnl_start, hfsmp->blockSize) + hfsmp->hfsPlusIOPosOffset;
2580 jnl_size = hfsmp->jnl_size;
2581 }
2582
2583 jip->jstart = jnl_start;
2584 jip->jsize = jnl_size;
2585 break;
2586
2587 case HFS_SET_ALWAYS_ZEROFILL: {
2588 struct cnode *cp = VTOC(vp);
2589
2590 if (*(int *)ap->a_data) {
2591 cp->c_flag |= C_ALWAYS_ZEROFILL;
2592 } else {
2593 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2594 }
2595 break;
2596 }
2597
2598 case HFS_DISABLE_METAZONE: {
2599 /* Only root can disable metadata zone */
2600 if (!kauth_cred_issuser(kauth_cred_get())) {
2601 return EACCES;
2602 }
2603 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2604 return (EROFS);
2605 }
2606
2607 /* Disable metadata zone now */
2608 (void) hfs_metadatazone_init(hfsmp, true);
2609 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2610 break;
2611 }
2612
2613
2614 case HFS_FSINFO_METADATA_BLOCKS: {
2615 int error;
2616 struct hfsinfo_metadata *hinfo;
2617
2618 hinfo = (struct hfsinfo_metadata *)ap->a_data;
2619
2620 /* Get information about number of metadata blocks */
2621 error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
2622 if (error) {
2623 return error;
2624 }
2625
2626 break;
2627 }
2628
2629 case HFS_GET_FSINFO: {
2630 hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
2631
2632 /* Only root is allowed to get fsinfo */
2633 if (!kauth_cred_issuser(kauth_cred_get())) {
2634 return EACCES;
2635 }
2636
2637 /*
2638 * Make sure that the caller's version number matches with
2639 * the kernel's version number. This will make sure that
2640 * if the structures being read/written into are changed
2641 * by the kernel, the caller will not read incorrect data.
2642 *
2643 * The first three fields --- request_type, version and
2644 * flags are same for all the hfs_fsinfo structures, so
2645 * we can access the version number by assuming any
2646 * structure for now.
2647 */
2648 if (fsinfo->header.version != HFS_FSINFO_VERSION) {
2649 return ENOTSUP;
2650 }
2651
2652 /* Make sure that the current file system is not marked inconsistent */
2653 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2654 return EIO;
2655 }
2656
2657 return hfs_get_fsinfo(hfsmp, ap->a_data);
2658 }
2659
2660 case HFS_CS_FREESPACE_TRIM: {
2661 int error = 0;
2662 int lockflags = 0;
2663
2664 /* Only root allowed */
2665 if (!kauth_cred_issuser(kauth_cred_get())) {
2666 return EACCES;
2667 }
2668
2669 /*
2670 * This core functionality is similar to hfs_scan_blocks().
2671 * The main difference is that hfs_scan_blocks() is called
2672 * as part of mount where we are assured that the journal is
2673 * empty to start with. This fcntl() can be called on a
2674 * mounted volume, therefore it has to flush the content of
2675 * the journal as well as ensure the state of summary table.
2676 *
2677 * This fcntl scans over the entire allocation bitmap,
2678 * creates list of all the free blocks, and issues TRIM
2679 * down to the underlying device. This can take long time
2680 * as it can generate up to 512MB of read I/O.
2681 */
2682
2683 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
2684 error = hfs_init_summary(hfsmp);
2685 if (error) {
2686 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
2687 return error;
2688 }
2689 }
2690
2691 /*
2692 * The journal maintains list of recently deallocated blocks to
2693 * issue DKIOCUNMAPs when the corresponding journal transaction is
2694 * flushed to the disk. To avoid any race conditions, we only
2695 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2696 * Therefore we make sure that the journal trim list is sync'ed,
2697 * empty, and not modifiable for the duration of our scan.
2698 *
2699 * Take the journal lock before flushing the journal to the disk.
2700 * We will keep on holding the journal lock till we don't get the
2701 * bitmap lock to make sure that no new journal transactions can
2702 * start. This will make sure that the journal trim list is not
2703 * modified after the journal flush and before getting bitmap lock.
2704 * We can release the journal lock after we acquire the bitmap
2705 * lock as it will prevent any further block deallocations.
2706 */
2707 hfs_journal_lock(hfsmp);
2708
2709 /* Flush the journal and wait for all I/Os to finish up */
2710 error = hfs_flush(hfsmp, HFS_FLUSH_JOURNAL_META);
2711 if (error) {
2712 hfs_journal_unlock(hfsmp);
2713 return error;
2714 }
2715
2716 /* Take bitmap lock to ensure it is not being modified */
2717 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2718
2719 /* Release the journal lock */
2720 hfs_journal_unlock(hfsmp);
2721
2722 /*
2723 * ScanUnmapBlocks reads the bitmap in large block size
2724 * (up to 1MB) unlike the runtime which reads the bitmap
2725 * in the 4K block size. This can cause buf_t collisions
2726 * and potential data corruption. To avoid this, we
2727 * invalidate all the existing buffers associated with
2728 * the bitmap vnode before scanning it.
2729 *
2730 * Note: ScanUnmapBlock() cleans up all the buffers
2731 * after itself, so there won't be any large buffers left
2732 * for us to clean up after it returns.
2733 */
2734 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
2735 if (error) {
2736 hfs_systemfile_unlock(hfsmp, lockflags);
2737 return error;
2738 }
2739
2740 /* Traverse bitmap and issue DKIOCUNMAPs */
2741 error = ScanUnmapBlocks(hfsmp);
2742 hfs_systemfile_unlock(hfsmp, lockflags);
2743 if (error) {
2744 return error;
2745 }
2746
2747 break;
2748 }
2749
2750 case HFS_SET_HOTFILE_STATE: {
2751 int error;
2752 struct cnode *cp = VTOC(vp);
2753 uint32_t hf_state = *((uint32_t*)ap->a_data);
2754 uint32_t num_unpinned = 0;
2755
2756 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2757 if (error) {
2758 return error;
2759 }
2760
2761 // printf("hfs: setting hotfile state %d on %s\n", hf_state, vp->v_name);
2762 if (hf_state == HFS_MARK_FASTDEVCANDIDATE) {
2763 vnode_setfastdevicecandidate(vp);
2764
2765 cp->c_attr.ca_recflags |= kHFSFastDevCandidateMask;
2766 cp->c_attr.ca_recflags &= ~kHFSDoNotFastDevPinMask;
2767 cp->c_flag |= C_MODIFIED;
2768 } else if (hf_state == HFS_UNMARK_FASTDEVCANDIDATE || hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2769 vnode_clearfastdevicecandidate(vp);
2770 hfs_removehotfile(vp);
2771
2772 if (cp->c_attr.ca_recflags & kHFSFastDevPinnedMask) {
2773 hfs_pin_vnode(hfsmp, vp, HFS_UNPIN_IT, &num_unpinned);
2774 }
2775
2776 if (hf_state == HFS_NEVER_FASTDEVCANDIDATE) {
2777 cp->c_attr.ca_recflags |= kHFSDoNotFastDevPinMask;
2778 }
2779 cp->c_attr.ca_recflags &= ~(kHFSFastDevCandidateMask|kHFSFastDevPinnedMask);
2780 cp->c_flag |= C_MODIFIED;
2781
2782 } else {
2783 error = EINVAL;
2784 }
2785
2786 if (num_unpinned != 0) {
2787 lck_mtx_lock(&hfsmp->hfc_mutex);
2788 hfsmp->hfs_hotfile_freeblks += num_unpinned;
2789 lck_mtx_unlock(&hfsmp->hfc_mutex);
2790 }
2791
2792 hfs_unlock(cp);
2793 return error;
2794 }
2795
2796 case HFS_REPIN_HOTFILE_STATE: {
2797 int error=0;
2798 uint32_t repin_what = *((uint32_t*)ap->a_data);
2799
2800 /* Only root allowed */
2801 if (!kauth_cred_issuser(kauth_cred_get())) {
2802 return EACCES;
2803 }
2804
2805 if (!(hfsmp->hfs_flags & (HFS_CS_METADATA_PIN | HFS_CS_HOTFILE_PIN))) {
2806 // this system is neither regular Fusion or Cooperative Fusion
2807 // so this fsctl makes no sense.
2808 return EINVAL;
2809 }
2810
2811 //
2812 // After a converting a CoreStorage volume to be encrypted, the
2813 // extents could have moved around underneath us. This call
2814 // allows corestoraged to re-pin everything that should be
2815 // pinned (it would happen on the next reboot too but that could
2816 // be a long time away).
2817 //
2818 if ((repin_what & HFS_REPIN_METADATA) && (hfsmp->hfs_flags & HFS_CS_METADATA_PIN)) {
2819 hfs_pin_fs_metadata(hfsmp);
2820 }
2821 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_HOTFILE_PIN)) {
2822 hfs_repin_hotfiles(hfsmp);
2823 }
2824 if ((repin_what & HFS_REPIN_USERDATA) && (hfsmp->hfs_flags & HFS_CS_SWAPFILE_PIN)) {
2825 //XXX Swapfiles (marked SWAP_PINNED) may have moved too.
2826 //XXX Do we care? They have a more transient/dynamic nature/lifetime.
2827 }
2828
2829 return error;
2830 }
2831
2832 #if HFS_CONFIG_KEY_ROLL
2833
2834 case HFS_KEY_ROLL: {
2835 if (!kauth_cred_issuser(kauth_cred_get()))
2836 return EACCES;
2837
2838 hfs_key_roll_args_t *args = (hfs_key_roll_args_t *)ap->a_data;
2839
2840 return hfs_key_roll_op(ap->a_context, ap->a_vp, args);
2841 }
2842
2843 case HFS_GET_KEY_AUTO_ROLL: {
2844 if (!kauth_cred_issuser(kauth_cred_get()))
2845 return EACCES;
2846
2847 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2848 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2849 return ENOTSUP;
2850 args->flags = (ISSET(hfsmp->cproot_flags, CP_ROOT_AUTO_ROLL_OLD_CLASS_GENERATION)
2851 ? HFS_KEY_AUTO_ROLL_OLD_CLASS_GENERATION : 0);
2852 args->min_key_os_version = hfsmp->hfs_auto_roll_min_key_os_version;
2853 args->max_key_os_version = hfsmp->hfs_auto_roll_max_key_os_version;
2854 break;
2855 }
2856
2857 case HFS_SET_KEY_AUTO_ROLL: {
2858 if (!kauth_cred_issuser(kauth_cred_get()))
2859 return EACCES;
2860
2861 hfs_key_auto_roll_args_t *args = (hfs_key_auto_roll_args_t *)ap->a_data;
2862 if (args->api_version != HFS_KEY_AUTO_ROLL_API_VERSION_1)
2863 return ENOTSUP;
2864 return cp_set_auto_roll(hfsmp, args);
2865 }
2866
2867 #endif // HFS_CONFIG_KEY_ROLL
2868
2869 #if CONFIG_PROTECT
2870 case F_TRANSCODEKEY:
2871 /*
2872 * This API is only supported when called via kernel so
2873 * a_fflag must be set to 1 (it's not possible to get here
2874 * with it set to 1 via fsctl).
2875 */
2876 if (ap->a_fflag != 1)
2877 return ENOTTY;
2878 return cp_vnode_transcode(vp, (cp_key_t *)ap->a_data);
2879
2880 case F_GETPROTECTIONLEVEL:
2881 return cp_get_root_major_vers (vp, (uint32_t *)ap->a_data);
2882
2883 case F_GETDEFAULTPROTLEVEL:
2884 return cp_get_default_level(vp, (uint32_t *)ap->a_data);
2885 #endif // CONFIG_PROTECT
2886
2887 case FIOPINSWAP:
2888 return hfs_pin_vnode(hfsmp, vp, HFS_PIN_IT | HFS_DATALESS_PIN,
2889 NULL);
2890
2891 default:
2892 return (ENOTTY);
2893 }
2894
2895 return 0;
2896 }
2897
2898 /*
2899 * select
2900 */
2901 int
2902 hfs_vnop_select(__unused struct vnop_select_args *ap)
2903 /*
2904 struct vnop_select_args {
2905 vnode_t a_vp;
2906 int a_which;
2907 int a_fflags;
2908 void *a_wql;
2909 vfs_context_t a_context;
2910 };
2911 */
2912 {
2913 /*
2914 * We should really check to see if I/O is possible.
2915 */
2916 return (1);
2917 }
2918
2919 /*
2920 * Converts a logical block number to a physical block, and optionally returns
2921 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2922 * The physical block number is based on the device block size, currently its 512.
2923 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2924 */
2925 int
2926 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
2927 {
2928 struct filefork *fp = VTOF(vp);
2929 struct hfsmount *hfsmp = VTOHFS(vp);
2930 int retval = E_NONE;
2931 u_int32_t logBlockSize;
2932 size_t bytesContAvail = 0;
2933 off_t blockposition;
2934 int lockExtBtree;
2935 int lockflags = 0;
2936
2937 /*
2938 * Check for underlying vnode requests and ensure that logical
2939 * to physical mapping is requested.
2940 */
2941 if (vpp != NULL)
2942 *vpp = hfsmp->hfs_devvp;
2943 if (bnp == NULL)
2944 return (0);
2945
2946 logBlockSize = GetLogicalBlockSize(vp);
2947 blockposition = (off_t)bn * logBlockSize;
2948
2949 lockExtBtree = overflow_extents(fp);
2950
2951 if (lockExtBtree)
2952 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2953
2954 retval = MacToVFSError(
2955 MapFileBlockC (HFSTOVCB(hfsmp),
2956 (FCB*)fp,
2957 MAXPHYSIO,
2958 blockposition,
2959 bnp,
2960 &bytesContAvail));
2961
2962 if (lockExtBtree)
2963 hfs_systemfile_unlock(hfsmp, lockflags);
2964
2965 if (retval == E_NONE) {
2966 /* Figure out how many read ahead blocks there are */
2967 if (runp != NULL) {
2968 if (can_cluster(logBlockSize)) {
2969 /* Make sure this result never goes negative: */
2970 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2971 } else {
2972 *runp = 0;
2973 }
2974 }
2975 }
2976 return (retval);
2977 }
2978
2979 /*
2980 * Convert logical block number to file offset.
2981 */
2982 int
2983 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2984 /*
2985 struct vnop_blktooff_args {
2986 vnode_t a_vp;
2987 daddr64_t a_lblkno;
2988 off_t *a_offset;
2989 };
2990 */
2991 {
2992 if (ap->a_vp == NULL)
2993 return (EINVAL);
2994 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
2995
2996 return(0);
2997 }
2998
2999 /*
3000 * Convert file offset to logical block number.
3001 */
3002 int
3003 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
3004 /*
3005 struct vnop_offtoblk_args {
3006 vnode_t a_vp;
3007 off_t a_offset;
3008 daddr64_t *a_lblkno;
3009 };
3010 */
3011 {
3012 if (ap->a_vp == NULL)
3013 return (EINVAL);
3014 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
3015
3016 return(0);
3017 }
3018
3019 /*
3020 * Map file offset to physical block number.
3021 *
3022 * If this function is called for write operation, and if the file
3023 * had virtual blocks allocated (delayed allocation), real blocks
3024 * are allocated by calling ExtendFileC().
3025 *
3026 * If this function is called for read operation, and if the file
3027 * had virtual blocks allocated (delayed allocation), no change
3028 * to the size of file is done, and if required, rangelist is
3029 * searched for mapping.
3030 *
3031 * System file cnodes are expected to be locked (shared or exclusive).
3032 *
3033 * -- INVALID RANGES --
3034 *
3035 * Invalid ranges are used to keep track of where we have extended a
3036 * file, but have not yet written that data to disk. In the past we
3037 * would clear up the invalid ranges as we wrote to those areas, but
3038 * before data was actually flushed to disk. The problem with that
3039 * approach is that the data can be left in the cache and is therefore
3040 * still not valid on disk. So now we clear up the ranges here, when
3041 * the flags field has VNODE_WRITE set, indicating a write is about to
3042 * occur. This isn't ideal (ideally we want to clear them up when
3043 * know the data has been successfully written), but it's the best we
3044 * can do.
3045 *
3046 * For reads, we use the invalid ranges here in block map to indicate
3047 * to the caller that the data should be zeroed (a_bpn == -1). We
3048 * have to be careful about what ranges we return to the cluster code.
3049 * Currently the cluster code can only handle non-rounded values for
3050 * the EOF; it cannot handle funny sized ranges in the middle of the
3051 * file (the main problem is that it sends down odd sized I/Os to the
3052 * disk). Our code currently works because whilst the very first
3053 * offset and the last offset in the invalid ranges are not aligned,
3054 * gaps in the invalid ranges between the first and last, have to be
3055 * aligned (because we always write page sized blocks). For example,
3056 * consider this arrangement:
3057 *
3058 * +-------------+-----+-------+------+
3059 * | |XXXXX| |XXXXXX|
3060 * +-------------+-----+-------+------+
3061 * a b c d
3062 *
3063 * This shows two invalid ranges <a, b> and <c, d>. Whilst a and d
3064 * are not necessarily aligned, b and c *must* be.
3065 *
3066 * Zero-filling occurs in a number of ways:
3067 *
3068 * 1. When a read occurs and we return with a_bpn == -1.
3069 *
3070 * 2. When hfs_fsync or hfs_filedone calls hfs_flush_invalid_ranges
3071 * which will cause us to iterate over the ranges bringing in
3072 * pages that are not present in the cache and zeroing them. Any
3073 * pages that are already in the cache are left untouched. Note
3074 * that hfs_fsync does not always flush invalid ranges.
3075 *
3076 * 3. When we extend a file we zero out from the old EOF to the end
3077 * of the page. It would be nice if we didn't have to do this if
3078 * the page wasn't present (and could defer it), but because of
3079 * the problem described above, we have to.
3080 *
3081 * The invalid ranges are also used to restrict the size that we write
3082 * out on disk: see hfs_prepare_fork_for_update.
3083 *
3084 * Note that invalid ranges are ignored when neither the VNODE_READ or
3085 * the VNODE_WRITE flag is specified. This is useful for the
3086 * F_LOG2PHYS* fcntls which are not interested in invalid ranges: they
3087 * just want to know whether blocks are physically allocated or not.
3088 */
3089 int
3090 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
3091 /*
3092 struct vnop_blockmap_args {
3093 vnode_t a_vp;
3094 off_t a_foffset;
3095 size_t a_size;
3096 daddr64_t *a_bpn;
3097 size_t *a_run;
3098 void *a_poff;
3099 int a_flags;
3100 vfs_context_t a_context;
3101 };
3102 */
3103 {
3104 struct vnode *vp = ap->a_vp;
3105 struct cnode *cp;
3106 struct filefork *fp;
3107 struct hfsmount *hfsmp;
3108 size_t bytesContAvail = ap->a_size;
3109 int retval = E_NONE;
3110 int syslocks = 0;
3111 int lockflags = 0;
3112 struct rl_entry *invalid_range;
3113 enum rl_overlaptype overlaptype;
3114 int started_tr = 0;
3115 int tooklock = 0;
3116
3117 #if HFS_COMPRESSION
3118 if (VNODE_IS_RSRC(vp)) {
3119 /* allow blockmaps to the resource fork */
3120 } else {
3121 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
3122 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
3123 switch(state) {
3124 case FILE_IS_COMPRESSED:
3125 return ENOTSUP;
3126 case FILE_IS_CONVERTING:
3127 /* if FILE_IS_CONVERTING, we allow blockmap */
3128 break;
3129 default:
3130 printf("invalid state %d for compressed file\n", state);
3131 /* fall through */
3132 }
3133 }
3134 }
3135 #endif /* HFS_COMPRESSION */
3136
3137 /* Do not allow blockmap operation on a directory */
3138 if (vnode_isdir(vp)) {
3139 return (ENOTSUP);
3140 }
3141
3142 /*
3143 * Check for underlying vnode requests and ensure that logical
3144 * to physical mapping is requested.
3145 */
3146 if (ap->a_bpn == NULL)
3147 return (0);
3148
3149 hfsmp = VTOHFS(vp);
3150 cp = VTOC(vp);
3151 fp = VTOF(vp);
3152
3153 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
3154 if (cp->c_lockowner != current_thread()) {
3155 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3156 tooklock = 1;
3157 }
3158
3159 // For reads, check the invalid ranges
3160 if (ISSET(ap->a_flags, VNODE_READ)) {
3161 if (ap->a_foffset >= fp->ff_size) {
3162 retval = ERANGE;
3163 goto exit;
3164 }
3165
3166 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
3167 ap->a_foffset + (off_t)bytesContAvail - 1,
3168 &invalid_range);
3169 switch(overlaptype) {
3170 case RL_MATCHINGOVERLAP:
3171 case RL_OVERLAPCONTAINSRANGE:
3172 case RL_OVERLAPSTARTSBEFORE:
3173 /* There's no valid block for this byte offset */
3174 *ap->a_bpn = (daddr64_t)-1;
3175 /* There's no point limiting the amount to be returned
3176 * if the invalid range that was hit extends all the way
3177 * to the EOF (i.e. there's no valid bytes between the
3178 * end of this range and the file's EOF):
3179 */
3180 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3181 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3182 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3183 }
3184
3185 retval = 0;
3186 goto exit;
3187
3188 case RL_OVERLAPISCONTAINED:
3189 case RL_OVERLAPENDSAFTER:
3190 /* The range of interest hits an invalid block before the end: */
3191 if (invalid_range->rl_start == ap->a_foffset) {
3192 /* There's actually no valid information to be had starting here: */
3193 *ap->a_bpn = (daddr64_t)-1;
3194 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3195 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3196 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3197 }
3198
3199 retval = 0;
3200 goto exit;
3201 } else {
3202 /*
3203 * Sadly, the lower layers don't like us to
3204 * return unaligned ranges, so we skip over
3205 * any invalid ranges here that are less than
3206 * a page: zeroing of those bits is not our
3207 * responsibility (it's dealt with elsewhere).
3208 */
3209 do {
3210 off_t rounded_start = round_page_64(invalid_range->rl_start);
3211 if ((off_t)bytesContAvail < rounded_start - ap->a_foffset)
3212 break;
3213 if (rounded_start < invalid_range->rl_end + 1) {
3214 bytesContAvail = rounded_start - ap->a_foffset;
3215 break;
3216 }
3217 } while ((invalid_range = TAILQ_NEXT(invalid_range,
3218 rl_link)));
3219 }
3220 break;
3221
3222 case RL_NOOVERLAP:
3223 break;
3224 } // switch
3225 }
3226 }
3227
3228 #if CONFIG_PROTECT
3229 if (cp->c_cpentry) {
3230 const int direction = (ISSET(ap->a_flags, VNODE_WRITE)
3231 ? VNODE_WRITE : VNODE_READ);
3232
3233 cp_io_params_t io_params;
3234 cp_io_params(hfsmp, cp->c_cpentry,
3235 off_rsrc_make(ap->a_foffset, VNODE_IS_RSRC(vp)),
3236 direction, &io_params);
3237
3238 if (io_params.max_len < (off_t)bytesContAvail)
3239 bytesContAvail = io_params.max_len;
3240
3241 if (io_params.phys_offset != -1) {
3242 *ap->a_bpn = ((io_params.phys_offset + hfsmp->hfsPlusIOPosOffset)
3243 / hfsmp->hfs_logical_block_size);
3244
3245 retval = 0;
3246 goto exit;
3247 }
3248 }
3249 #endif
3250
3251 retry:
3252
3253 /* Check virtual blocks only when performing write operation */
3254 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3255 if (hfs_start_transaction(hfsmp) != 0) {
3256 retval = EINVAL;
3257 goto exit;
3258 } else {
3259 started_tr = 1;
3260 }
3261 syslocks = SFL_EXTENTS | SFL_BITMAP;
3262
3263 } else if (overflow_extents(fp)) {
3264 syslocks = SFL_EXTENTS;
3265 }
3266
3267 if (syslocks)
3268 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
3269
3270 /*
3271 * Check for any delayed allocations.
3272 */
3273 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3274 int64_t actbytes;
3275 u_int32_t loanedBlocks;
3276
3277 //
3278 // Make sure we have a transaction. It's possible
3279 // that we came in and fp->ff_unallocblocks was zero
3280 // but during the time we blocked acquiring the extents
3281 // btree, ff_unallocblocks became non-zero and so we
3282 // will need to start a transaction.
3283 //
3284 if (started_tr == 0) {
3285 if (syslocks) {
3286 hfs_systemfile_unlock(hfsmp, lockflags);
3287 syslocks = 0;
3288 }
3289 goto retry;
3290 }
3291
3292 /*
3293 * Note: ExtendFileC will Release any blocks on loan and
3294 * aquire real blocks. So we ask to extend by zero bytes
3295 * since ExtendFileC will account for the virtual blocks.
3296 */
3297
3298 loanedBlocks = fp->ff_unallocblocks;
3299 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
3300 kEFAllMask | kEFNoClumpMask, &actbytes);
3301
3302 if (retval) {
3303 fp->ff_unallocblocks = loanedBlocks;
3304 cp->c_blocks += loanedBlocks;
3305 fp->ff_blocks += loanedBlocks;
3306
3307 hfs_lock_mount (hfsmp);
3308 hfsmp->loanedBlocks += loanedBlocks;
3309 hfs_unlock_mount (hfsmp);
3310
3311 hfs_systemfile_unlock(hfsmp, lockflags);
3312 cp->c_flag |= C_MODIFIED;
3313 if (started_tr) {
3314 (void) hfs_update(vp, 0);
3315 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3316
3317 hfs_end_transaction(hfsmp);
3318 started_tr = 0;
3319 }
3320 goto exit;
3321 }
3322 }
3323
3324 retval = MapFileBlockC(hfsmp, (FCB *)fp, bytesContAvail, ap->a_foffset,
3325 ap->a_bpn, &bytesContAvail);
3326 if (syslocks) {
3327 hfs_systemfile_unlock(hfsmp, lockflags);
3328 syslocks = 0;
3329 }
3330
3331 if (retval) {
3332 /* On write, always return error because virtual blocks, if any,
3333 * should have been allocated in ExtendFileC(). We do not
3334 * allocate virtual blocks on read, therefore return error
3335 * only if no virtual blocks are allocated. Otherwise we search
3336 * rangelist for zero-fills
3337 */
3338 if ((MacToVFSError(retval) != ERANGE) ||
3339 (ap->a_flags & VNODE_WRITE) ||
3340 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
3341 goto exit;
3342 }
3343
3344 /* Validate if the start offset is within logical file size */
3345 if (ap->a_foffset >= fp->ff_size) {
3346 goto exit;
3347 }
3348
3349 /*
3350 * At this point, we have encountered a failure during
3351 * MapFileBlockC that resulted in ERANGE, and we are not
3352 * servicing a write, and there are borrowed blocks.
3353 *
3354 * However, the cluster layer will not call blockmap for
3355 * blocks that are borrowed and in-cache. We have to assume
3356 * that because we observed ERANGE being emitted from
3357 * MapFileBlockC, this extent range is not valid on-disk. So
3358 * we treat this as a mapping that needs to be zero-filled
3359 * prior to reading.
3360 */
3361
3362 if (fp->ff_size - ap->a_foffset < (off_t)bytesContAvail)
3363 bytesContAvail = fp->ff_size - ap->a_foffset;
3364
3365 *ap->a_bpn = (daddr64_t) -1;
3366 retval = 0;
3367
3368 goto exit;
3369 }
3370
3371 exit:
3372 if (retval == 0) {
3373 if (ISSET(ap->a_flags, VNODE_WRITE)) {
3374 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
3375
3376 // See if we might be overlapping invalid ranges...
3377 if (r && (ap->a_foffset + (off_t)bytesContAvail) > r->rl_start) {
3378 /*
3379 * Mark the file as needing an update if we think the
3380 * on-disk EOF has changed.
3381 */
3382 if (ap->a_foffset <= r->rl_start)
3383 SET(cp->c_flag, C_MODIFIED);
3384
3385 /*
3386 * This isn't the ideal place to put this. Ideally, we
3387 * should do something *after* we have successfully
3388 * written to the range, but that's difficult to do
3389 * because we cannot take locks in the callback. At
3390 * present, the cluster code will call us with VNODE_WRITE
3391 * set just before it's about to write the data so we know
3392 * that data is about to be written. If we get an I/O
3393 * error at this point then chances are the metadata
3394 * update to follow will also have an I/O error so the
3395 * risk here is small.
3396 */
3397 rl_remove(ap->a_foffset, ap->a_foffset + bytesContAvail - 1,
3398 &fp->ff_invalidranges);
3399
3400 if (!TAILQ_FIRST(&fp->ff_invalidranges)) {
3401 cp->c_flag &= ~C_ZFWANTSYNC;
3402 cp->c_zftimeout = 0;
3403 }
3404 }
3405 }
3406
3407 if (ap->a_run)
3408 *ap->a_run = bytesContAvail;
3409
3410 if (ap->a_poff)
3411 *(int *)ap->a_poff = 0;
3412 }
3413
3414 if (started_tr) {
3415 hfs_update(vp, TRUE);
3416 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3417 hfs_end_transaction(hfsmp);
3418 started_tr = 0;
3419 }
3420
3421 if (tooklock)
3422 hfs_unlock(cp);
3423
3424 return (MacToVFSError(retval));
3425 }
3426
3427 /*
3428 * prepare and issue the I/O
3429 * buf_strategy knows how to deal
3430 * with requests that require
3431 * fragmented I/Os
3432 */
3433 int
3434 hfs_vnop_strategy(struct vnop_strategy_args *ap)
3435 {
3436 buf_t bp = ap->a_bp;
3437 vnode_t vp = buf_vnode(bp);
3438 int error = 0;
3439
3440 /* Mark buffer as containing static data if cnode flag set */
3441 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
3442 buf_markstatic(bp);
3443 }
3444
3445 /* Mark buffer as containing static data if cnode flag set */
3446 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
3447 bufattr_markgreedymode(buf_attr(bp));
3448 }
3449
3450 /* mark buffer as containing burst mode data if cnode flag set */
3451 if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
3452 bufattr_markisochronous(buf_attr(bp));
3453 }
3454
3455 #if CONFIG_PROTECT
3456 error = cp_handle_strategy(bp);
3457
3458 if (error)
3459 return error;
3460 #endif
3461
3462 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
3463
3464 return error;
3465 }
3466
3467 int
3468 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
3469 {
3470 register struct cnode *cp = VTOC(vp);
3471 struct filefork *fp = VTOF(vp);
3472 kauth_cred_t cred = vfs_context_ucred(context);
3473 int retval;
3474 off_t bytesToAdd;
3475 off_t actualBytesAdded;
3476 off_t filebytes;
3477 u_int32_t fileblocks;
3478 int blksize;
3479 struct hfsmount *hfsmp;
3480 int lockflags;
3481 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
3482
3483 blksize = VTOVCB(vp)->blockSize;
3484 fileblocks = fp->ff_blocks;
3485 filebytes = (off_t)fileblocks * (off_t)blksize;
3486
3487 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
3488 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3489
3490 if (length < 0)
3491 return (EINVAL);
3492
3493 /* This should only happen with a corrupt filesystem */
3494 if ((off_t)fp->ff_size < 0)
3495 return (EINVAL);
3496
3497 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3498 return (EFBIG);
3499
3500 hfsmp = VTOHFS(vp);
3501
3502 retval = E_NONE;
3503
3504 /* Files that are changing size are not hot file candidates. */
3505 if (hfsmp->hfc_stage == HFC_RECORDING) {
3506 fp->ff_bytesread = 0;
3507 }
3508
3509 /*
3510 * We cannot just check if fp->ff_size == length (as an optimization)
3511 * since there may be extra physical blocks that also need truncation.
3512 */
3513 #if QUOTA
3514 if ((retval = hfs_getinoquota(cp)))
3515 return(retval);
3516 #endif /* QUOTA */
3517
3518 /*
3519 * Lengthen the size of the file. We must ensure that the
3520 * last byte of the file is allocated. Since the smallest
3521 * value of ff_size is 0, length will be at least 1.
3522 */
3523 if (length > (off_t)fp->ff_size) {
3524 #if QUOTA
3525 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
3526 cred, 0);
3527 if (retval)
3528 goto Err_Exit;
3529 #endif /* QUOTA */
3530 /*
3531 * If we don't have enough physical space then
3532 * we need to extend the physical size.
3533 */
3534 if (length > filebytes) {
3535 int eflags;
3536 u_int32_t blockHint = 0;
3537
3538 /* All or nothing and don't round up to clumpsize. */
3539 eflags = kEFAllMask | kEFNoClumpMask;
3540
3541 if (cred && (suser(cred, NULL) != 0)) {
3542 eflags |= kEFReserveMask; /* keep a reserve */
3543 }
3544
3545 /*
3546 * Allocate Journal and Quota files in metadata zone.
3547 */
3548 if (filebytes == 0 &&
3549 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3550 hfs_virtualmetafile(cp)) {
3551 eflags |= kEFMetadataMask;
3552 blockHint = hfsmp->hfs_metazone_start;
3553 }
3554 if (hfs_start_transaction(hfsmp) != 0) {
3555 retval = EINVAL;
3556 goto Err_Exit;
3557 }
3558
3559 /* Protect extents b-tree and allocation bitmap */
3560 lockflags = SFL_BITMAP;
3561 if (overflow_extents(fp))
3562 lockflags |= SFL_EXTENTS;
3563 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3564
3565 /*
3566 * Keep growing the file as long as the current EOF is
3567 * less than the desired value.
3568 */
3569 while ((length > filebytes) && (retval == E_NONE)) {
3570 bytesToAdd = length - filebytes;
3571 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3572 (FCB*)fp,
3573 bytesToAdd,
3574 blockHint,
3575 eflags,
3576 &actualBytesAdded));
3577
3578 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3579 if (actualBytesAdded == 0 && retval == E_NONE) {
3580 if (length > filebytes)
3581 length = filebytes;
3582 break;
3583 }
3584 } /* endwhile */
3585
3586 hfs_systemfile_unlock(hfsmp, lockflags);
3587
3588 if (hfsmp->jnl) {
3589 hfs_update(vp, 0);
3590 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3591 }
3592
3593 hfs_end_transaction(hfsmp);
3594
3595 if (retval)
3596 goto Err_Exit;
3597
3598 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3599 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3600 }
3601
3602 if (ISSET(flags, IO_NOZEROFILL)) {
3603 // An optimisation for the hibernation file
3604 if (vnode_isswap(vp))
3605 rl_remove_all(&fp->ff_invalidranges);
3606 } else {
3607 if (!vnode_issystem(vp) && retval == E_NONE) {
3608 if (length > (off_t)fp->ff_size) {
3609 struct timeval tv;
3610
3611 /* Extending the file: time to fill out the current last page w. zeroes? */
3612 if (fp->ff_size & PAGE_MASK_64) {
3613 /* There might be some valid data at the start of the (current) last page
3614 of the file, so zero out the remainder of that page to ensure the
3615 entire page contains valid data. */
3616 hfs_unlock(cp);
3617 retval = hfs_zero_eof_page(vp, length);
3618 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3619 if (retval) goto Err_Exit;
3620 }
3621 microuptime(&tv);
3622 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
3623 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3624 }
3625 } else {
3626 panic("hfs_truncate: invoked on non-UBC object?!");
3627 };
3628 }
3629 if (suppress_times == 0) {
3630 cp->c_touch_modtime = TRUE;
3631 }
3632 fp->ff_size = length;
3633
3634 } else { /* Shorten the size of the file */
3635
3636 // An optimisation for the hibernation file
3637 if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
3638 rl_remove_all(&fp->ff_invalidranges);
3639 } else if ((off_t)fp->ff_size > length) {
3640 /* Any space previously marked as invalid is now irrelevant: */
3641 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3642 }
3643
3644 /*
3645 * Account for any unmapped blocks. Note that the new
3646 * file length can still end up with unmapped blocks.
3647 */
3648 if (fp->ff_unallocblocks > 0) {
3649 u_int32_t finalblks;
3650 u_int32_t loanedBlocks;
3651
3652 hfs_lock_mount(hfsmp);
3653 loanedBlocks = fp->ff_unallocblocks;
3654 cp->c_blocks -= loanedBlocks;
3655 fp->ff_blocks -= loanedBlocks;
3656 fp->ff_unallocblocks = 0;
3657
3658 hfsmp->loanedBlocks -= loanedBlocks;
3659
3660 finalblks = (length + blksize - 1) / blksize;
3661 if (finalblks > fp->ff_blocks) {
3662 /* calculate required unmapped blocks */
3663 loanedBlocks = finalblks - fp->ff_blocks;
3664 hfsmp->loanedBlocks += loanedBlocks;
3665
3666 fp->ff_unallocblocks = loanedBlocks;
3667 cp->c_blocks += loanedBlocks;
3668 fp->ff_blocks += loanedBlocks;
3669 }
3670 hfs_unlock_mount (hfsmp);
3671 }
3672
3673 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3674 if (hfs_start_transaction(hfsmp) != 0) {
3675 retval = EINVAL;
3676 goto Err_Exit;
3677 }
3678
3679 if (fp->ff_unallocblocks == 0) {
3680 /* Protect extents b-tree and allocation bitmap */
3681 lockflags = SFL_BITMAP;
3682 if (overflow_extents(fp))
3683 lockflags |= SFL_EXTENTS;
3684 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3685
3686 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3687 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
3688
3689 hfs_systemfile_unlock(hfsmp, lockflags);
3690 }
3691 if (hfsmp->jnl) {
3692 if (retval == 0) {
3693 fp->ff_size = length;
3694 }
3695 hfs_update(vp, 0);
3696 hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3697 }
3698 hfs_end_transaction(hfsmp);
3699
3700 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3701 if (retval)
3702 goto Err_Exit;
3703 #if QUOTA
3704 /* These are bytesreleased */
3705 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3706 #endif /* QUOTA */
3707
3708 //
3709 // Unlike when growing a file, we adjust the hotfile block count here
3710 // instead of deeper down in the block allocation code because we do
3711 // not necessarily have a vnode or "fcb" at the time we're deleting
3712 // the file and so we wouldn't know if it was hotfile cached or not
3713 //
3714 hfs_hotfile_adjust_blocks(vp, (int64_t)((savedbytes - filebytes) / blksize));
3715
3716
3717 /*
3718 * Only set update flag if the logical length changes & we aren't
3719 * suppressing modtime updates.
3720 */
3721 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
3722 cp->c_touch_modtime = TRUE;
3723 }
3724 fp->ff_size = length;
3725 }
3726 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3727 if (!vfs_context_issuser(context))
3728 cp->c_mode &= ~(S_ISUID | S_ISGID);
3729 }
3730 cp->c_flag |= C_MODIFIED;
3731 cp->c_touch_chgtime = TRUE; /* status changed */
3732 if (suppress_times == 0) {
3733 cp->c_touch_modtime = TRUE; /* file data was modified */
3734
3735 /*
3736 * If we are not suppressing the modtime update, then
3737 * update the gen count as well.
3738 */
3739 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3740 hfs_incr_gencount(cp);
3741 }
3742 }
3743
3744 retval = hfs_update(vp, 0);
3745 if (retval) {
3746 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
3747 -1, -1, -1, retval, 0);
3748 }
3749
3750 Err_Exit:
3751
3752 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
3753 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
3754
3755 return (retval);
3756 }
3757
3758 /*
3759 * Preparation which must be done prior to deleting the catalog record
3760 * of a file or directory. In order to make the on-disk as safe as possible,
3761 * we remove the catalog entry before releasing the bitmap blocks and the
3762 * overflow extent records. However, some work must be done prior to deleting
3763 * the catalog record.
3764 *
3765 * When calling this function, the cnode must exist both in memory and on-disk.
3766 * If there are both resource fork and data fork vnodes, this function should
3767 * be called on both.
3768 */
3769
3770 int
3771 hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3772
3773 struct filefork *fp = VTOF(vp);
3774 struct cnode *cp = VTOC(vp);
3775 #if QUOTA
3776 int retval = 0;
3777 #endif /* QUOTA */
3778
3779 /* Cannot truncate an HFS directory! */
3780 if (vnode_isdir(vp)) {
3781 return (EISDIR);
3782 }
3783
3784 /*
3785 * See the comment below in hfs_truncate for why we need to call
3786 * setsize here. Essentially we want to avoid pending IO if we
3787 * already know that the blocks are going to be released here.
3788 * This function is only called when totally removing all storage for a file, so
3789 * we can take a shortcut and immediately setsize (0);
3790 */
3791 ubc_setsize(vp, 0);
3792
3793 /* This should only happen with a corrupt filesystem */
3794 if ((off_t)fp->ff_size < 0)
3795 return (EINVAL);
3796
3797 /*
3798 * We cannot just check if fp->ff_size == length (as an optimization)
3799 * since there may be extra physical blocks that also need truncation.
3800 */
3801 #if QUOTA
3802 if ((retval = hfs_getinoquota(cp))) {
3803 return(retval);
3804 }
3805 #endif /* QUOTA */
3806
3807 /* Wipe out any invalid ranges which have yet to be backed by disk */
3808 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3809
3810 /*
3811 * Account for any unmapped blocks. Since we're deleting the
3812 * entire file, we don't have to worry about just shrinking
3813 * to a smaller number of borrowed blocks.
3814 */
3815 if (fp->ff_unallocblocks > 0) {
3816 u_int32_t loanedBlocks;
3817
3818 hfs_lock_mount (hfsmp);
3819 loanedBlocks = fp->ff_unallocblocks;
3820 cp->c_blocks -= loanedBlocks;
3821 fp->ff_blocks -= loanedBlocks;
3822 fp->ff_unallocblocks = 0;
3823
3824 hfsmp->loanedBlocks -= loanedBlocks;
3825
3826 hfs_unlock_mount (hfsmp);
3827 }
3828
3829 return 0;
3830 }
3831
3832
3833 /*
3834 * Special wrapper around calling TruncateFileC. This function is useable
3835 * even when the catalog record does not exist any longer, making it ideal
3836 * for use when deleting a file. The simplification here is that we know
3837 * that we are releasing all blocks.
3838 *
3839 * Note that this function may be called when there is no vnode backing
3840 * the file fork in question. We may call this from hfs_vnop_inactive
3841 * to clear out resource fork data (and may not want to clear out the data
3842 * fork yet). As a result, we pointer-check both sets of inputs before
3843 * doing anything with them.
3844 *
3845 * The caller is responsible for saving off a copy of the filefork(s)
3846 * embedded within the cnode prior to calling this function. The pointers
3847 * supplied as arguments must be valid even if the cnode is no longer valid.
3848 */
3849
3850 int
3851 hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3852 struct filefork *rsrcfork, u_int32_t fileid) {
3853
3854 off_t filebytes;
3855 u_int32_t fileblocks;
3856 int blksize = 0;
3857 int error = 0;
3858 int lockflags;
3859
3860 blksize = hfsmp->blockSize;
3861
3862 /* Data Fork */
3863 if (datafork) {
3864 off_t prev_filebytes;
3865
3866 datafork->ff_size = 0;
3867
3868 fileblocks = datafork->ff_blocks;
3869 filebytes = (off_t)fileblocks * (off_t)blksize;
3870 prev_filebytes = filebytes;
3871
3872 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3873
3874 while (filebytes > 0) {
3875 if (filebytes > HFS_BIGFILE_SIZE) {
3876 filebytes -= HFS_BIGFILE_SIZE;
3877 } else {
3878 filebytes = 0;
3879 }
3880
3881 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3882 if (hfs_start_transaction(hfsmp) != 0) {
3883 error = EINVAL;
3884 break;
3885 }
3886
3887 if (datafork->ff_unallocblocks == 0) {
3888 /* Protect extents b-tree and allocation bitmap */
3889 lockflags = SFL_BITMAP;
3890 if (overflow_extents(datafork))
3891 lockflags |= SFL_EXTENTS;
3892 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3893
3894 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
3895
3896 hfs_systemfile_unlock(hfsmp, lockflags);
3897 }
3898 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3899
3900 struct cnode *cp = datafork ? FTOC(datafork) : NULL;
3901 struct vnode *vp;
3902 vp = cp ? CTOV(cp, 0) : NULL;
3903 hfs_hotfile_adjust_blocks(vp, (int64_t)((prev_filebytes - filebytes) / blksize));
3904 prev_filebytes = filebytes;
3905
3906 /* Finish the transaction and start over if necessary */
3907 hfs_end_transaction(hfsmp);
3908
3909 if (error) {
3910 break;
3911 }
3912 }
3913 }
3914
3915 /* Resource fork */
3916 if (error == 0 && rsrcfork) {
3917 rsrcfork->ff_size = 0;
3918
3919 fileblocks = rsrcfork->ff_blocks;
3920 filebytes = (off_t)fileblocks * (off_t)blksize;
3921
3922 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3923
3924 while (filebytes > 0) {
3925 if (filebytes > HFS_BIGFILE_SIZE) {
3926 filebytes -= HFS_BIGFILE_SIZE;
3927 } else {
3928 filebytes = 0;
3929 }
3930
3931 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3932 if (hfs_start_transaction(hfsmp) != 0) {
3933 error = EINVAL;
3934 break;
3935 }
3936
3937 if (rsrcfork->ff_unallocblocks == 0) {
3938 /* Protect extents b-tree and allocation bitmap */
3939 lockflags = SFL_BITMAP;
3940 if (overflow_extents(rsrcfork))
3941 lockflags |= SFL_EXTENTS;
3942 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3943
3944 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
3945
3946 hfs_systemfile_unlock(hfsmp, lockflags);
3947 }
3948 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3949
3950 /* Finish the transaction and start over if necessary */
3951 hfs_end_transaction(hfsmp);
3952
3953 if (error) {
3954 break;
3955 }
3956 }
3957 }
3958
3959 return error;
3960 }
3961
3962 errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
3963 {
3964 errno_t error;
3965
3966 /*
3967 * Call ubc_setsize to give the VM subsystem a chance to do
3968 * whatever it needs to with existing pages before we delete
3969 * blocks. Note that symlinks don't use the UBC so we'll
3970 * get back ENOENT in that case.
3971 */
3972 if (have_cnode_lock) {
3973 error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
3974 if (error == EAGAIN) {
3975 cnode_t *cp = VTOC(vp);
3976
3977 if (cp->c_truncatelockowner != current_thread())
3978 hfs_warn("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
3979
3980 hfs_unlock(cp);
3981 error = ubc_setsize_ex(vp, len, 0);
3982 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
3983 }
3984 } else
3985 error = ubc_setsize_ex(vp, len, 0);
3986
3987 return error == ENOENT ? 0 : error;
3988 }
3989
3990 /*
3991 * Truncate a cnode to at most length size, freeing (or adding) the
3992 * disk blocks.
3993 */
3994 int
3995 hfs_truncate(struct vnode *vp, off_t length, int flags,
3996 int truncateflags, vfs_context_t context)
3997 {
3998 struct filefork *fp = VTOF(vp);
3999 off_t filebytes;
4000 u_int32_t fileblocks;
4001 int blksize;
4002 errno_t error = 0;
4003 struct cnode *cp = VTOC(vp);
4004 hfsmount_t *hfsmp = VTOHFS(vp);
4005
4006 /* Cannot truncate an HFS directory! */
4007 if (vnode_isdir(vp)) {
4008 return (EISDIR);
4009 }
4010 /* A swap file cannot change size. */
4011 if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
4012 return (EPERM);
4013 }
4014
4015 blksize = hfsmp->blockSize;
4016 fileblocks = fp->ff_blocks;
4017 filebytes = (off_t)fileblocks * (off_t)blksize;
4018
4019 bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
4020
4021 error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
4022 if (error)
4023 return error;
4024
4025 if (!caller_has_cnode_lock) {
4026 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4027 if (error)
4028 return error;
4029 }
4030
4031 if (vnode_islnk(vp) && cp->c_datafork->ff_symlinkptr) {
4032 hfs_free(cp->c_datafork->ff_symlinkptr, cp->c_datafork->ff_size);
4033 cp->c_datafork->ff_symlinkptr = NULL;
4034 }
4035
4036 // have to loop truncating or growing files that are
4037 // really big because otherwise transactions can get
4038 // enormous and consume too many kernel resources.
4039
4040 if (length < filebytes) {
4041 while (filebytes > length) {
4042 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
4043 filebytes -= HFS_BIGFILE_SIZE;
4044 } else {
4045 filebytes = length;
4046 }
4047 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4048 if (error)
4049 break;
4050 }
4051 } else if (length > filebytes) {
4052 kauth_cred_t cred = vfs_context_ucred(context);
4053 const bool keep_reserve = cred && suser(cred, NULL) != 0;
4054
4055 if (hfs_freeblks(hfsmp, keep_reserve)
4056 < howmany(length - filebytes, blksize)) {
4057 error = ENOSPC;
4058 } else {
4059 while (filebytes < length) {
4060 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
4061 filebytes += HFS_BIGFILE_SIZE;
4062 } else {
4063 filebytes = length;
4064 }
4065 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
4066 if (error)
4067 break;
4068 }
4069 }
4070 } else /* Same logical size */ {
4071
4072 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
4073 }
4074 /* Files that are changing size are not hot file candidates. */
4075 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4076 fp->ff_bytesread = 0;
4077 }
4078
4079 #if HFS_CONFIG_KEY_ROLL
4080 if (!error && cp->c_truncatelockowner == current_thread()) {
4081 hfs_key_roll_check(cp, true);
4082 }
4083 #endif
4084
4085 if (!caller_has_cnode_lock)
4086 hfs_unlock(cp);
4087
4088 // Make sure UBC's size matches up (in case we didn't completely succeed)
4089 errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
4090 if (!error)
4091 error = err2;
4092
4093 return error;
4094 }
4095
4096
4097 /*
4098 * Preallocate file storage space.
4099 */
4100 int
4101 hfs_vnop_allocate(struct vnop_allocate_args /* {
4102 vnode_t a_vp;
4103 off_t a_length;
4104 u_int32_t a_flags;
4105 off_t *a_bytesallocated;
4106 off_t a_offset;
4107 vfs_context_t a_context;
4108 } */ *ap)
4109 {
4110 struct vnode *vp = ap->a_vp;
4111 struct cnode *cp;
4112 struct filefork *fp;
4113 ExtendedVCB *vcb;
4114 off_t length = ap->a_length;
4115 off_t startingPEOF;
4116 off_t moreBytesRequested;
4117 off_t actualBytesAdded;
4118 off_t filebytes;
4119 u_int32_t fileblocks;
4120 int retval, retval2;
4121 u_int32_t blockHint;
4122 u_int32_t extendFlags; /* For call to ExtendFileC */
4123 struct hfsmount *hfsmp;
4124 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
4125 int lockflags;
4126 time_t orig_ctime;
4127
4128 *(ap->a_bytesallocated) = 0;
4129
4130 if (!vnode_isreg(vp))
4131 return (EISDIR);
4132 if (length < (off_t)0)
4133 return (EINVAL);
4134
4135 cp = VTOC(vp);
4136
4137 orig_ctime = VTOC(vp)->c_ctime;
4138
4139 nspace_snapshot_event(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
4140
4141 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4142
4143 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4144 goto Err_Exit;
4145 }
4146
4147 fp = VTOF(vp);
4148 hfsmp = VTOHFS(vp);
4149 vcb = VTOVCB(vp);
4150
4151 fileblocks = fp->ff_blocks;
4152 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
4153
4154 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
4155 retval = EINVAL;
4156 goto Err_Exit;
4157 }
4158
4159 /* Fill in the flags word for the call to Extend the file */
4160
4161 extendFlags = kEFNoClumpMask;
4162 if (ap->a_flags & ALLOCATECONTIG)
4163 extendFlags |= kEFContigMask;
4164 if (ap->a_flags & ALLOCATEALL)
4165 extendFlags |= kEFAllMask;
4166 if (cred && suser(cred, NULL) != 0)
4167 extendFlags |= kEFReserveMask;
4168 if (hfs_virtualmetafile(cp))
4169 extendFlags |= kEFMetadataMask;
4170
4171 retval = E_NONE;
4172 blockHint = 0;
4173 startingPEOF = filebytes;
4174
4175 if (ap->a_flags & ALLOCATEFROMPEOF)
4176 length += filebytes;
4177 else if (ap->a_flags & ALLOCATEFROMVOL)
4178 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
4179
4180 /* If no changes are necesary, then we're done */
4181 if (filebytes == length)
4182 goto Std_Exit;
4183
4184 /*
4185 * Lengthen the size of the file. We must ensure that the
4186 * last byte of the file is allocated. Since the smallest
4187 * value of filebytes is 0, length will be at least 1.
4188 */
4189 if (length > filebytes) {
4190 if (ISSET(extendFlags, kEFAllMask)
4191 && (hfs_freeblks(hfsmp, ISSET(extendFlags, kEFReserveMask))
4192 < howmany(length - filebytes, hfsmp->blockSize))) {
4193 retval = ENOSPC;
4194 goto Err_Exit;
4195 }
4196
4197 off_t total_bytes_added = 0, orig_request_size;
4198
4199 orig_request_size = moreBytesRequested = length - filebytes;
4200
4201 #if QUOTA
4202 retval = hfs_chkdq(cp,
4203 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
4204 cred, 0);
4205 if (retval)
4206 goto Err_Exit;
4207
4208 #endif /* QUOTA */
4209 /*
4210 * Metadata zone checks.
4211 */
4212 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
4213 /*
4214 * Allocate Journal and Quota files in metadata zone.
4215 */
4216 if (hfs_virtualmetafile(cp)) {
4217 blockHint = hfsmp->hfs_metazone_start;
4218 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
4219 (blockHint <= hfsmp->hfs_metazone_end)) {
4220 /*
4221 * Move blockHint outside metadata zone.
4222 */
4223 blockHint = hfsmp->hfs_metazone_end + 1;
4224 }
4225 }
4226
4227
4228 while ((length > filebytes) && (retval == E_NONE)) {
4229 off_t bytesRequested;
4230
4231 if (hfs_start_transaction(hfsmp) != 0) {
4232 retval = EINVAL;
4233 goto Err_Exit;
4234 }
4235
4236 /* Protect extents b-tree and allocation bitmap */
4237 lockflags = SFL_BITMAP;
4238 if (overflow_extents(fp))
4239 lockflags |= SFL_EXTENTS;
4240 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4241
4242 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
4243 bytesRequested = HFS_BIGFILE_SIZE;
4244 } else {
4245 bytesRequested = moreBytesRequested;
4246 }
4247
4248 if (extendFlags & kEFContigMask) {
4249 // if we're on a sparse device, this will force it to do a
4250 // full scan to find the space needed.
4251 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
4252 }
4253
4254 retval = MacToVFSError(ExtendFileC(vcb,
4255 (FCB*)fp,
4256 bytesRequested,
4257 blockHint,
4258 extendFlags,
4259 &actualBytesAdded));
4260
4261 if (retval == E_NONE) {
4262 *(ap->a_bytesallocated) += actualBytesAdded;
4263 total_bytes_added += actualBytesAdded;
4264 moreBytesRequested -= actualBytesAdded;
4265 if (blockHint != 0) {
4266 blockHint += actualBytesAdded / vcb->blockSize;
4267 }
4268 }
4269 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4270
4271 hfs_systemfile_unlock(hfsmp, lockflags);
4272
4273 if (hfsmp->jnl) {
4274 (void) hfs_update(vp, 0);
4275 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4276 }
4277
4278 hfs_end_transaction(hfsmp);
4279 }
4280
4281
4282 /*
4283 * if we get an error and no changes were made then exit
4284 * otherwise we must do the hfs_update to reflect the changes
4285 */
4286 if (retval && (startingPEOF == filebytes))
4287 goto Err_Exit;
4288
4289 /*
4290 * Adjust actualBytesAdded to be allocation block aligned, not
4291 * clump size aligned.
4292 * NOTE: So what we are reporting does not affect reality
4293 * until the file is closed, when we truncate the file to allocation
4294 * block size.
4295 */
4296 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
4297 *(ap->a_bytesallocated) =
4298 roundup(orig_request_size, (off_t)vcb->blockSize);
4299
4300 } else { /* Shorten the size of the file */
4301
4302 /*
4303 * N.B. At present, this code is never called. If and when we
4304 * do start using it, it looks like there might be slightly
4305 * strange semantics with the file size: it's possible for the
4306 * file size to *increase* e.g. if current file size is 5,
4307 * length is 1024 and filebytes is 4096, the file size will
4308 * end up being 1024 bytes. This isn't necessarily a problem
4309 * but it's not consistent with the code above which doesn't
4310 * change the file size.
4311 */
4312
4313 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
4314 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4315
4316 /*
4317 * if we get an error and no changes were made then exit
4318 * otherwise we must do the hfs_update to reflect the changes
4319 */
4320 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
4321 #if QUOTA
4322 /* These are bytesreleased */
4323 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
4324 #endif /* QUOTA */
4325
4326 if (fp->ff_size > filebytes) {
4327 fp->ff_size = filebytes;
4328
4329 hfs_ubc_setsize(vp, fp->ff_size, true);
4330 }
4331 }
4332
4333 Std_Exit:
4334 cp->c_flag |= C_MODIFIED;
4335 cp->c_touch_chgtime = TRUE;
4336 cp->c_touch_modtime = TRUE;
4337 retval2 = hfs_update(vp, 0);
4338
4339 if (retval == 0)
4340 retval = retval2;
4341 Err_Exit:
4342 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4343 hfs_unlock(cp);
4344 return (retval);
4345 }
4346
4347
4348 /*
4349 * Pagein for HFS filesystem
4350 */
4351 int
4352 hfs_vnop_pagein(struct vnop_pagein_args *ap)
4353 /*
4354 struct vnop_pagein_args {
4355 vnode_t a_vp,
4356 upl_t a_pl,
4357 vm_offset_t a_pl_offset,
4358 off_t a_f_offset,
4359 size_t a_size,
4360 int a_flags
4361 vfs_context_t a_context;
4362 };
4363 */
4364 {
4365 vnode_t vp;
4366 struct cnode *cp;
4367 struct filefork *fp;
4368 int error = 0;
4369 upl_t upl;
4370 upl_page_info_t *pl;
4371 off_t f_offset;
4372 off_t page_needed_f_offset;
4373 int offset;
4374 int isize;
4375 int upl_size;
4376 int pg_index;
4377 boolean_t truncate_lock_held = FALSE;
4378 boolean_t file_converted = FALSE;
4379 kern_return_t kret;
4380
4381 vp = ap->a_vp;
4382 cp = VTOC(vp);
4383 fp = VTOF(vp);
4384
4385 #if CONFIG_PROTECT
4386 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
4387 /*
4388 * If we errored here, then this means that one of two things occurred:
4389 * 1. there was a problem with the decryption of the key.
4390 * 2. the device is locked and we are not allowed to access this particular file.
4391 *
4392 * Either way, this means that we need to shut down this upl now. As long as
4393 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4394 * then we create a upl and immediately abort it.
4395 */
4396 if (ap->a_pl == NULL) {
4397 /* create the upl */
4398 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
4399 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4400 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4401 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
4402
4403 /* Abort the range */
4404 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4405 }
4406
4407
4408 return error;
4409 }
4410 #endif /* CONFIG_PROTECT */
4411
4412 if (ap->a_pl != NULL) {
4413 /*
4414 * this can only happen for swap files now that
4415 * we're asking for V2 paging behavior...
4416 * so don't need to worry about decompression, or
4417 * keeping track of blocks read or taking the truncate lock
4418 */
4419 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
4420 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
4421 goto pagein_done;
4422 }
4423
4424 page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
4425
4426 retry_pagein:
4427 /*
4428 * take truncate lock (shared/recursive) to guard against
4429 * zero-fill thru fsync interfering, but only for v2
4430 *
4431 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4432 * lock shared and we are allowed to recurse 1 level if this thread already
4433 * owns the lock exclusively... this can legally occur
4434 * if we are doing a shrinking ftruncate against a file
4435 * that is mapped private, and the pages being truncated
4436 * do not currently exist in the cache... in that case
4437 * we will have to page-in the missing pages in order
4438 * to provide them to the private mapping... we must
4439 * also call hfs_unlock_truncate with a postive been_recursed
4440 * arg to indicate that if we have recursed, there is no need to drop
4441 * the lock. Allowing this simple recursion is necessary
4442 * in order to avoid a certain deadlock... since the ftruncate
4443 * already holds the truncate lock exclusively, if we try
4444 * to acquire it shared to protect the pagein path, we will
4445 * hang this thread
4446 *
4447 * NOTE: The if () block below is a workaround in order to prevent a
4448 * VM deadlock. See rdar://7853471.
4449 *
4450 * If we are in a forced unmount, then launchd will still have the
4451 * dyld_shared_cache file mapped as it is trying to reboot. If we
4452 * take the truncate lock here to service a page fault, then our
4453 * thread could deadlock with the forced-unmount. The forced unmount
4454 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4455 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4456 * thread will think it needs to copy all of the data out of the file
4457 * and into a VM copy object. If we hold the cnode lock here, then that
4458 * VM operation will not be able to proceed, because we'll set a busy page
4459 * before attempting to grab the lock. Note that this isn't as simple as "don't
4460 * call ubc_setsize" because doing that would just shift the problem to the
4461 * ubc_msync done before the vnode is reclaimed.
4462 *
4463 * So, if a forced unmount on this volume is in flight AND the cnode is
4464 * marked C_DELETED, then just go ahead and do the page in without taking
4465 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4466 * that is not going to be available on the next mount, this seems like a
4467 * OK solution from a correctness point of view, even though it is hacky.
4468 */
4469 if (vfs_isforce(vnode_mount(vp))) {
4470 if (cp->c_flag & C_DELETED) {
4471 /* If we don't get it, then just go ahead and operate without the lock */
4472 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4473 }
4474 }
4475 else {
4476 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4477 truncate_lock_held = TRUE;
4478 }
4479
4480 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4481
4482 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4483 error = EINVAL;
4484 goto pagein_done;
4485 }
4486 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4487
4488 upl_size = isize = ap->a_size;
4489
4490 /*
4491 * Scan from the back to find the last page in the UPL, so that we
4492 * aren't looking at a UPL that may have already been freed by the
4493 * preceding aborts/completions.
4494 */
4495 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4496 if (upl_page_present(pl, --pg_index))
4497 break;
4498 if (pg_index == 0) {
4499 /*
4500 * no absent pages were found in the range specified
4501 * just abort the UPL to get rid of it and then we're done
4502 */
4503 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4504 goto pagein_done;
4505 }
4506 }
4507 /*
4508 * initialize the offset variables before we touch the UPL.
4509 * f_offset is the position into the file, in bytes
4510 * offset is the position into the UPL, in bytes
4511 * pg_index is the pg# of the UPL we're operating on
4512 * isize is the offset into the UPL of the last page that is present.
4513 */
4514 isize = ((pg_index + 1) * PAGE_SIZE);
4515 pg_index = 0;
4516 offset = 0;
4517 f_offset = ap->a_f_offset;
4518
4519 while (isize) {
4520 int xsize;
4521 int num_of_pages;
4522
4523 if ( !upl_page_present(pl, pg_index)) {
4524 /*
4525 * we asked for RET_ONLY_ABSENT, so it's possible
4526 * to get back empty slots in the UPL.
4527 * just skip over them
4528 */
4529 f_offset += PAGE_SIZE;
4530 offset += PAGE_SIZE;
4531 isize -= PAGE_SIZE;
4532 pg_index++;
4533
4534 continue;
4535 }
4536 /*
4537 * We know that we have at least one absent page.
4538 * Now checking to see how many in a row we have
4539 */
4540 num_of_pages = 1;
4541 xsize = isize - PAGE_SIZE;
4542
4543 while (xsize) {
4544 if ( !upl_page_present(pl, pg_index + num_of_pages))
4545 break;
4546 num_of_pages++;
4547 xsize -= PAGE_SIZE;
4548 }
4549 xsize = num_of_pages * PAGE_SIZE;
4550
4551 #if HFS_COMPRESSION
4552 if (VNODE_IS_RSRC(vp)) {
4553 /* allow pageins of the resource fork */
4554 } else {
4555 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4556
4557 if (compressed) {
4558
4559 if (truncate_lock_held) {
4560 /*
4561 * can't hold the truncate lock when calling into the decmpfs layer
4562 * since it calls back into this layer... even though we're only
4563 * holding the lock in shared mode, and the re-entrant path only
4564 * takes the lock shared, we can deadlock if some other thread
4565 * tries to grab the lock exclusively in between.
4566 */
4567 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4568 truncate_lock_held = FALSE;
4569 }
4570 ap->a_pl = upl;
4571 ap->a_pl_offset = offset;
4572 ap->a_f_offset = f_offset;
4573 ap->a_size = xsize;
4574
4575 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4576 /*
4577 * note that decpfs_pagein_compressed can change the state of
4578 * 'compressed'... it will set it to 0 if the file is no longer
4579 * compressed once the compression lock is successfully taken
4580 * i.e. we would block on that lock while the file is being inflated
4581 */
4582 if (error == 0 && vnode_isfastdevicecandidate(vp)) {
4583 (void) hfs_addhotfile(vp);
4584 }
4585 if (compressed) {
4586 if (error == 0) {
4587 /* successful page-in, update the access time */
4588 VTOC(vp)->c_touch_acctime = TRUE;
4589
4590 //
4591 // compressed files are not traditional hot file candidates
4592 // but they may be for CF (which ignores the ff_bytesread
4593 // field)
4594 //
4595 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4596 fp->ff_bytesread = 0;
4597 }
4598 } else if (error == EAGAIN) {
4599 /*
4600 * EAGAIN indicates someone else already holds the compression lock...
4601 * to avoid deadlocking, we'll abort this range of pages with an
4602 * indication that the pagein needs to be redriven
4603 */
4604 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
4605 } else if (error == ENOSPC) {
4606
4607 if (upl_size == PAGE_SIZE)
4608 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4609
4610 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4611
4612 ap->a_size = PAGE_SIZE;
4613 ap->a_pl = NULL;
4614 ap->a_pl_offset = 0;
4615 ap->a_f_offset = page_needed_f_offset;
4616
4617 goto retry_pagein;
4618 } else {
4619 ubc_upl_abort(upl, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4620 goto pagein_done;
4621 }
4622 goto pagein_next_range;
4623 }
4624 else {
4625 /*
4626 * Set file_converted only if the file became decompressed while we were
4627 * paging in. If it were still compressed, we would re-start the loop using the goto
4628 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4629 * condition below, since we could have avoided taking the truncate lock to prevent
4630 * a deadlock in the force unmount case.
4631 */
4632 file_converted = TRUE;
4633 }
4634 }
4635 if (file_converted == TRUE) {
4636 /*
4637 * the file was converted back to a regular file after we first saw it as compressed
4638 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4639 * reset a_size so that we consider what remains of the original request
4640 * and null out a_upl and a_pl_offset.
4641 *
4642 * We should only be able to get into this block if the decmpfs_pagein_compressed
4643 * successfully decompressed the range in question for this file.
4644 */
4645 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4646
4647 ap->a_size = isize;
4648 ap->a_pl = NULL;
4649 ap->a_pl_offset = 0;
4650
4651 /* Reset file_converted back to false so that we don't infinite-loop. */
4652 file_converted = FALSE;
4653 goto retry_pagein;
4654 }
4655 }
4656 #endif
4657 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
4658
4659 /*
4660 * Keep track of blocks read.
4661 */
4662 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4663 int bytesread;
4664 int took_cnode_lock = 0;
4665
4666 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4667 bytesread = fp->ff_size;
4668 else
4669 bytesread = xsize;
4670
4671 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4672 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
4673 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4674 took_cnode_lock = 1;
4675 }
4676 /*
4677 * If this file hasn't been seen since the start of
4678 * the current sampling period then start over.
4679 */
4680 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4681 struct timeval tv;
4682
4683 fp->ff_bytesread = bytesread;
4684 microtime(&tv);
4685 cp->c_atime = tv.tv_sec;
4686 } else {
4687 fp->ff_bytesread += bytesread;
4688 }
4689 cp->c_touch_acctime = TRUE;
4690
4691 if (vnode_isfastdevicecandidate(vp)) {
4692 (void) hfs_addhotfile(vp);
4693 }
4694 if (took_cnode_lock)
4695 hfs_unlock(cp);
4696 }
4697 pagein_next_range:
4698 f_offset += xsize;
4699 offset += xsize;
4700 isize -= xsize;
4701 pg_index += num_of_pages;
4702
4703 error = 0;
4704 }
4705
4706 pagein_done:
4707 if (truncate_lock_held == TRUE) {
4708 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4709 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4710 }
4711
4712 return (error);
4713 }
4714
4715 /*
4716 * Pageout for HFS filesystem.
4717 */
4718 int
4719 hfs_vnop_pageout(struct vnop_pageout_args *ap)
4720 /*
4721 struct vnop_pageout_args {
4722 vnode_t a_vp,
4723 upl_t a_pl,
4724 vm_offset_t a_pl_offset,
4725 off_t a_f_offset,
4726 size_t a_size,
4727 int a_flags
4728 vfs_context_t a_context;
4729 };
4730 */
4731 {
4732 vnode_t vp = ap->a_vp;
4733 struct cnode *cp;
4734 struct filefork *fp;
4735 int retval = 0;
4736 off_t filesize;
4737 upl_t upl;
4738 upl_page_info_t* pl = NULL;
4739 vm_offset_t a_pl_offset;
4740 int a_flags;
4741 int is_pageoutv2 = 0;
4742 kern_return_t kret;
4743
4744 cp = VTOC(vp);
4745 fp = VTOF(vp);
4746
4747 a_flags = ap->a_flags;
4748 a_pl_offset = ap->a_pl_offset;
4749
4750 /*
4751 * we can tell if we're getting the new or old behavior from the UPL
4752 */
4753 if ((upl = ap->a_pl) == NULL) {
4754 int request_flags;
4755
4756 is_pageoutv2 = 1;
4757 /*
4758 * we're in control of any UPL we commit
4759 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4760 */
4761 a_flags &= ~UPL_NOCOMMIT;
4762 a_pl_offset = 0;
4763
4764 /*
4765 * For V2 semantics, we want to take the cnode truncate lock
4766 * shared to guard against the file size changing via zero-filling.
4767 *
4768 * However, we have to be careful because we may be invoked
4769 * via the ubc_msync path to write out dirty mmap'd pages
4770 * in response to a lock event on a content-protected
4771 * filesystem (e.g. to write out class A files).
4772 * As a result, we want to take the truncate lock 'SHARED' with
4773 * the mini-recursion locktype so that we don't deadlock/panic
4774 * because we may be already holding the truncate lock exclusive to force any other
4775 * IOs to have blocked behind us.
4776 */
4777 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4778
4779 if (a_flags & UPL_MSYNC) {
4780 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4781 }
4782 else {
4783 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4784 }
4785
4786 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
4787
4788 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4789 retval = EINVAL;
4790 goto pageout_done;
4791 }
4792 }
4793 /*
4794 * from this point forward upl points at the UPL we're working with
4795 * it was either passed in or we succesfully created it
4796 */
4797
4798 /*
4799 * Figure out where the file ends, for pageout purposes. If
4800 * ff_new_size > ff_size, then we're in the middle of extending the
4801 * file via a write, so it is safe (and necessary) that we be able
4802 * to pageout up to that point.
4803 */
4804 filesize = fp->ff_size;
4805 if (fp->ff_new_size > filesize)
4806 filesize = fp->ff_new_size;
4807
4808 /*
4809 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4810 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4811 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4812 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4813 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4814 * lock in HFS so that we don't lock invert ourselves.
4815 *
4816 * Note that we can still get into this function on behalf of the default pager with
4817 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4818 * since fsync and other writing threads will grab the locks, then mark the
4819 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4820 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4821 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4822 * by the paging/VM system.
4823 */
4824
4825 if (is_pageoutv2) {
4826 off_t f_offset;
4827 int offset;
4828 int isize;
4829 int pg_index;
4830 int error;
4831 int error_ret = 0;
4832
4833 isize = ap->a_size;
4834 f_offset = ap->a_f_offset;
4835
4836 /*
4837 * Scan from the back to find the last page in the UPL, so that we
4838 * aren't looking at a UPL that may have already been freed by the
4839 * preceding aborts/completions.
4840 */
4841 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4842 if (upl_page_present(pl, --pg_index))
4843 break;
4844 if (pg_index == 0) {
4845 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4846 goto pageout_done;
4847 }
4848 }
4849
4850 /*
4851 * initialize the offset variables before we touch the UPL.
4852 * a_f_offset is the position into the file, in bytes
4853 * offset is the position into the UPL, in bytes
4854 * pg_index is the pg# of the UPL we're operating on.
4855 * isize is the offset into the UPL of the last non-clean page.
4856 */
4857 isize = ((pg_index + 1) * PAGE_SIZE);
4858
4859 offset = 0;
4860 pg_index = 0;
4861
4862 while (isize) {
4863 int xsize;
4864 int num_of_pages;
4865
4866 if ( !upl_page_present(pl, pg_index)) {
4867 /*
4868 * we asked for RET_ONLY_DIRTY, so it's possible
4869 * to get back empty slots in the UPL.
4870 * just skip over them
4871 */
4872 f_offset += PAGE_SIZE;
4873 offset += PAGE_SIZE;
4874 isize -= PAGE_SIZE;
4875 pg_index++;
4876
4877 continue;
4878 }
4879 if ( !upl_dirty_page(pl, pg_index)) {
4880 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
4881 }
4882
4883 /*
4884 * We know that we have at least one dirty page.
4885 * Now checking to see how many in a row we have
4886 */
4887 num_of_pages = 1;
4888 xsize = isize - PAGE_SIZE;
4889
4890 while (xsize) {
4891 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
4892 break;
4893 num_of_pages++;
4894 xsize -= PAGE_SIZE;
4895 }
4896 xsize = num_of_pages * PAGE_SIZE;
4897
4898 if ((error = cluster_pageout(vp, upl, offset, f_offset,
4899 xsize, filesize, a_flags))) {
4900 if (error_ret == 0)
4901 error_ret = error;
4902 }
4903 f_offset += xsize;
4904 offset += xsize;
4905 isize -= xsize;
4906 pg_index += num_of_pages;
4907 }
4908 /* capture errnos bubbled out of cluster_pageout if they occurred */
4909 if (error_ret != 0) {
4910 retval = error_ret;
4911 }
4912 } /* end block for v2 pageout behavior */
4913 else {
4914 /*
4915 * just call cluster_pageout for old pre-v2 behavior
4916 */
4917 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
4918 ap->a_size, filesize, a_flags);
4919 }
4920
4921 /*
4922 * If data was written, update the modification time of the file
4923 * but only if it's mapped writable; we will have touched the
4924 * modifcation time for direct writes.
4925 */
4926 if (retval == 0 && (ubc_is_mapped_writable(vp)
4927 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
4928 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4929
4930 // Check again with lock
4931 bool mapped_writable = ubc_is_mapped_writable(vp);
4932 if (mapped_writable
4933 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
4934 cp->c_touch_modtime = TRUE;
4935 cp->c_touch_chgtime = TRUE;
4936
4937 /*
4938 * We only need to increment the generation counter if
4939 * it's currently mapped writable because we incremented
4940 * the counter in hfs_vnop_mnomap.
4941 */
4942 if (mapped_writable)
4943 hfs_incr_gencount(VTOC(vp));
4944
4945 /*
4946 * If setuid or setgid bits are set and this process is
4947 * not the superuser then clear the setuid and setgid bits
4948 * as a precaution against tampering.
4949 */
4950 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
4951 (vfs_context_suser(ap->a_context) != 0)) {
4952 cp->c_mode &= ~(S_ISUID | S_ISGID);
4953 }
4954 }
4955
4956 hfs_unlock(cp);
4957 }
4958
4959 pageout_done:
4960 if (is_pageoutv2) {
4961 /*
4962 * Release the truncate lock. Note that because
4963 * we may have taken the lock recursively by
4964 * being invoked via ubc_msync due to lockdown,
4965 * we should release it recursively, too.
4966 */
4967 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4968 }
4969 return (retval);
4970 }
4971
4972 /*
4973 * Intercept B-Tree node writes to unswap them if necessary.
4974 */
4975 int
4976 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
4977 {
4978 int retval = 0;
4979 register struct buf *bp = ap->a_bp;
4980 register struct vnode *vp = buf_vnode(bp);
4981 BlockDescriptor block;
4982
4983 /* Trap B-Tree writes */
4984 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
4985 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
4986 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
4987 (vp == VTOHFS(vp)->hfc_filevp)) {
4988
4989 /*
4990 * Swap and validate the node if it is in native byte order.
4991 * This is always be true on big endian, so we always validate
4992 * before writing here. On little endian, the node typically has
4993 * been swapped and validated when it was written to the journal,
4994 * so we won't do anything here.
4995 */
4996 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
4997 /* Prepare the block pointer */
4998 block.blockHeader = bp;
4999 block.buffer = (char *)buf_dataptr(bp);
5000 block.blockNum = buf_lblkno(bp);
5001 /* not found in cache ==> came from disk */
5002 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
5003 block.blockSize = buf_count(bp);
5004
5005 /* Endian un-swap B-Tree node */
5006 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
5007 if (retval)
5008 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
5009 }
5010 }
5011
5012 /* This buffer shouldn't be locked anymore but if it is clear it */
5013 if ((buf_flags(bp) & B_LOCKED)) {
5014 // XXXdbg
5015 if (VTOHFS(vp)->jnl) {
5016 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
5017 }
5018 buf_clearflags(bp, B_LOCKED);
5019 }
5020 retval = vn_bwrite (ap);
5021
5022 return (retval);
5023 }
5024
5025
5026 int
5027 hfs_pin_block_range(struct hfsmount *hfsmp, int pin_state, uint32_t start_block, uint32_t nblocks)
5028 {
5029 _dk_cs_pin_t pin;
5030 unsigned ioc;
5031 int err;
5032
5033 memset(&pin, 0, sizeof(pin));
5034 pin.cp_extent.offset = ((uint64_t)start_block) * HFSTOVCB(hfsmp)->blockSize;
5035 pin.cp_extent.length = ((uint64_t)nblocks) * HFSTOVCB(hfsmp)->blockSize;
5036 switch (pin_state) {
5037 case HFS_PIN_IT:
5038 ioc = _DKIOCCSPINEXTENT;
5039 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA;
5040 break;
5041 case HFS_PIN_IT | HFS_TEMP_PIN:
5042 ioc = _DKIOCCSPINEXTENT;
5043 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSTEMPORARYPIN;
5044 break;
5045 case HFS_PIN_IT | HFS_DATALESS_PIN:
5046 ioc = _DKIOCCSPINEXTENT;
5047 pin.cp_flags = _DKIOCCSPINTOFASTMEDIA | _DKIOCCSPINFORSWAPFILE;
5048 break;
5049 case HFS_UNPIN_IT:
5050 ioc = _DKIOCCSUNPINEXTENT;
5051 pin.cp_flags = 0;
5052 break;
5053 case HFS_UNPIN_IT | HFS_EVICT_PIN:
5054 ioc = _DKIOCCSPINEXTENT;
5055 pin.cp_flags = _DKIOCCSPINTOSLOWMEDIA;
5056 break;
5057 default:
5058 return EINVAL;
5059 }
5060 err = VNOP_IOCTL(hfsmp->hfs_devvp, ioc, (caddr_t)&pin, 0, vfs_context_kernel());
5061 return err;
5062 }
5063
5064 //
5065 // The cnode lock should already be held on entry to this function
5066 //
5067 int
5068 hfs_pin_vnode(struct hfsmount *hfsmp, struct vnode *vp, int pin_state, uint32_t *num_blocks_pinned)
5069 {
5070 struct filefork *fp = VTOF(vp);
5071 int i, err=0, need_put=0;
5072 struct vnode *rsrc_vp=NULL;
5073 uint32_t npinned = 0;
5074 off_t offset;
5075
5076 if (num_blocks_pinned) {
5077 *num_blocks_pinned = 0;
5078 }
5079
5080 if (vnode_vtype(vp) != VREG) {
5081 /* Not allowed to pin directories or symlinks */
5082 printf("hfs: can't pin vnode of type %d\n", vnode_vtype(vp));
5083 return (EPERM);
5084 }
5085
5086 if (fp->ff_unallocblocks) {
5087 printf("hfs: can't pin a vnode w/unalloced blocks (%d)\n", fp->ff_unallocblocks);
5088 return (EINVAL);
5089 }
5090
5091 /*
5092 * It is possible that if the caller unlocked/re-locked the cnode after checking
5093 * for C_NOEXISTS|C_DELETED that the file could have been deleted while the
5094 * cnode was unlocked. So check the condition again and return ENOENT so that
5095 * the caller knows why we failed to pin the vnode.
5096 */
5097 if (VTOC(vp)->c_flag & (C_NOEXISTS|C_DELETED)) {
5098 // makes no sense to pin something that's pending deletion
5099 return ENOENT;
5100 }
5101
5102 if (fp->ff_blocks == 0 && (VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
5103 if (!VNODE_IS_RSRC(vp) && hfs_vgetrsrc(hfsmp, vp, &rsrc_vp) == 0) {
5104 //printf("hfs: fileid %d resource fork nblocks: %d / size: %lld\n", VTOC(vp)->c_fileid,
5105 // VTOC(rsrc_vp)->c_rsrcfork->ff_blocks,VTOC(rsrc_vp)->c_rsrcfork->ff_size);
5106
5107 fp = VTOC(rsrc_vp)->c_rsrcfork;
5108 need_put = 1;
5109 }
5110 }
5111 if (fp->ff_blocks == 0) {
5112 if (need_put) {
5113 //
5114 // use a distinct error code for a compressed file that has no resource fork;
5115 // we return EALREADY to indicate that the data is already probably hot file
5116 // cached because it's in an EA and the attributes btree is on the ssd
5117 //
5118 err = EALREADY;
5119 } else {
5120 err = EINVAL;
5121 }
5122 goto out;
5123 }
5124
5125 offset = 0;
5126 for (i = 0; i < kHFSPlusExtentDensity; i++) {
5127 if (fp->ff_extents[i].startBlock == 0) {
5128 break;
5129 }
5130
5131 err = hfs_pin_block_range(hfsmp, pin_state, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount);
5132 if (err) {
5133 break;
5134 } else {
5135 npinned += fp->ff_extents[i].blockCount;
5136 }
5137 }
5138
5139 if (err || npinned == 0) {
5140 goto out;
5141 }
5142
5143 if (fp->ff_extents[kHFSPlusExtentDensity-1].startBlock) {
5144 uint32_t pblocks;
5145 uint8_t forktype = 0;
5146
5147 if (fp == VTOC(vp)->c_rsrcfork) {
5148 forktype = 0xff;
5149 }
5150 /*
5151 * The file could have overflow extents, better pin them.
5152 *
5153 * We assume that since we are holding the cnode lock for this cnode,
5154 * the files extents cannot be manipulated, but the tree could, so we
5155 * need to ensure that it doesn't change behind our back as we iterate it.
5156 */
5157 int lockflags = hfs_systemfile_lock (hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
5158 err = hfs_pin_overflow_extents(hfsmp, VTOC(vp)->c_fileid, forktype, &pblocks);
5159 hfs_systemfile_unlock (hfsmp, lockflags);
5160
5161 if (err) {
5162 goto out;
5163 }
5164 npinned += pblocks;
5165 }
5166
5167 out:
5168 if (num_blocks_pinned) {
5169 *num_blocks_pinned = npinned;
5170 }
5171
5172 if (need_put && rsrc_vp) {
5173 //
5174 // have to unlock the cnode since it's shared between the
5175 // resource fork vnode and the data fork vnode (and the
5176 // vnode_put() may need to re-acquire the cnode lock to
5177 // reclaim the resource fork vnode)
5178 //
5179 hfs_unlock(VTOC(vp));
5180 vnode_put(rsrc_vp);
5181 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5182 }
5183 return err;
5184 }
5185
5186
5187 /*
5188 * Relocate a file to a new location on disk
5189 * cnode must be locked on entry
5190 *
5191 * Relocation occurs by cloning the file's data from its
5192 * current set of blocks to a new set of blocks. During
5193 * the relocation all of the blocks (old and new) are
5194 * owned by the file.
5195 *
5196 * -----------------
5197 * |///////////////|
5198 * -----------------
5199 * 0 N (file offset)
5200 *
5201 * ----------------- -----------------
5202 * |///////////////| | | STEP 1 (acquire new blocks)
5203 * ----------------- -----------------
5204 * 0 N N+1 2N
5205 *
5206 * ----------------- -----------------
5207 * |///////////////| |///////////////| STEP 2 (clone data)
5208 * ----------------- -----------------
5209 * 0 N N+1 2N
5210 *
5211 * -----------------
5212 * |///////////////| STEP 3 (head truncate blocks)
5213 * -----------------
5214 * 0 N
5215 *
5216 * During steps 2 and 3 page-outs to file offsets less
5217 * than or equal to N are suspended.
5218 *
5219 * During step 3 page-ins to the file get suspended.
5220 */
5221 int
5222 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
5223 struct proc *p)
5224 {
5225 struct cnode *cp;
5226 struct filefork *fp;
5227 struct hfsmount *hfsmp;
5228 u_int32_t headblks;
5229 u_int32_t datablks;
5230 u_int32_t blksize;
5231 u_int32_t growsize;
5232 u_int32_t nextallocsave;
5233 daddr64_t sector_a, sector_b;
5234 int eflags;
5235 off_t newbytes;
5236 int retval;
5237 int lockflags = 0;
5238 int took_trunc_lock = 0;
5239 int started_tr = 0;
5240 enum vtype vnodetype;
5241
5242 vnodetype = vnode_vtype(vp);
5243 if (vnodetype != VREG) {
5244 /* Not allowed to move symlinks. */
5245 return (EPERM);
5246 }
5247
5248 hfsmp = VTOHFS(vp);
5249 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
5250 return (ENOSPC);
5251 }
5252
5253 cp = VTOC(vp);
5254 fp = VTOF(vp);
5255 if (fp->ff_unallocblocks)
5256 return (EINVAL);
5257
5258 #if CONFIG_PROTECT
5259 /*
5260 * <rdar://problem/9118426>
5261 * Disable HFS file relocation on content-protected filesystems
5262 */
5263 if (cp_fs_protected (hfsmp->hfs_mp)) {
5264 return EINVAL;
5265 }
5266 #endif
5267 /* If it's an SSD, also disable HFS relocation */
5268 if (hfsmp->hfs_flags & HFS_SSD) {
5269 return EINVAL;
5270 }
5271
5272
5273 blksize = hfsmp->blockSize;
5274 if (blockHint == 0)
5275 blockHint = hfsmp->nextAllocation;
5276
5277 if (fp->ff_size > 0x7fffffff) {
5278 return (EFBIG);
5279 }
5280
5281 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
5282 hfs_unlock(cp);
5283 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
5284 /* Force lock since callers expects lock to be held. */
5285 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
5286 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5287 return (retval);
5288 }
5289 /* No need to continue if file was removed. */
5290 if (cp->c_flag & C_NOEXISTS) {
5291 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5292 return (ENOENT);
5293 }
5294 took_trunc_lock = 1;
5295 }
5296 headblks = fp->ff_blocks;
5297 datablks = howmany(fp->ff_size, blksize);
5298 growsize = datablks * blksize;
5299 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
5300 if (blockHint >= hfsmp->hfs_metazone_start &&
5301 blockHint <= hfsmp->hfs_metazone_end)
5302 eflags |= kEFMetadataMask;
5303
5304 if (hfs_start_transaction(hfsmp) != 0) {
5305 if (took_trunc_lock)
5306 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5307 return (EINVAL);
5308 }
5309 started_tr = 1;
5310 /*
5311 * Protect the extents b-tree and the allocation bitmap
5312 * during MapFileBlockC and ExtendFileC operations.
5313 */
5314 lockflags = SFL_BITMAP;
5315 if (overflow_extents(fp))
5316 lockflags |= SFL_EXTENTS;
5317 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5318
5319 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
5320 if (retval) {
5321 retval = MacToVFSError(retval);
5322 goto out;
5323 }
5324
5325 /*
5326 * STEP 1 - acquire new allocation blocks.
5327 */
5328 nextallocsave = hfsmp->nextAllocation;
5329 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
5330 if (eflags & kEFMetadataMask) {
5331 hfs_lock_mount(hfsmp);
5332 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
5333 MarkVCBDirty(hfsmp);
5334 hfs_unlock_mount(hfsmp);
5335 }
5336
5337 retval = MacToVFSError(retval);
5338 if (retval == 0) {
5339 cp->c_flag |= C_MODIFIED;
5340 if (newbytes < growsize) {
5341 retval = ENOSPC;
5342 goto restore;
5343 } else if (fp->ff_blocks < (headblks + datablks)) {
5344 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
5345 retval = ENOSPC;
5346 goto restore;
5347 }
5348
5349 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
5350 if (retval) {
5351 retval = MacToVFSError(retval);
5352 } else if ((sector_a + 1) == sector_b) {
5353 retval = ENOSPC;
5354 goto restore;
5355 } else if ((eflags & kEFMetadataMask) &&
5356 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
5357 hfsmp->hfs_metazone_end)) {
5358 #if 0
5359 const char * filestr;
5360 char emptystr = '\0';
5361
5362 if (cp->c_desc.cd_nameptr != NULL) {
5363 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
5364 } else if (vnode_name(vp) != NULL) {
5365 filestr = vnode_name(vp);
5366 } else {
5367 filestr = &emptystr;
5368 }
5369 #endif
5370 retval = ENOSPC;
5371 goto restore;
5372 }
5373 }
5374 /* Done with system locks and journal for now. */
5375 hfs_systemfile_unlock(hfsmp, lockflags);
5376 lockflags = 0;
5377 hfs_end_transaction(hfsmp);
5378 started_tr = 0;
5379
5380 if (retval) {
5381 /*
5382 * Check to see if failure is due to excessive fragmentation.
5383 */
5384 if ((retval == ENOSPC) &&
5385 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
5386 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
5387 }
5388 goto out;
5389 }
5390 /*
5391 * STEP 2 - clone file data into the new allocation blocks.
5392 */
5393
5394 if (vnodetype == VLNK)
5395 retval = EPERM;
5396 else if (vnode_issystem(vp))
5397 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
5398 else
5399 retval = hfs_clonefile(vp, headblks, datablks, blksize);
5400
5401 /* Start transaction for step 3 or for a restore. */
5402 if (hfs_start_transaction(hfsmp) != 0) {
5403 retval = EINVAL;
5404 goto out;
5405 }
5406 started_tr = 1;
5407 if (retval)
5408 goto restore;
5409
5410 /*
5411 * STEP 3 - switch to cloned data and remove old blocks.
5412 */
5413 lockflags = SFL_BITMAP;
5414 if (overflow_extents(fp))
5415 lockflags |= SFL_EXTENTS;
5416 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5417
5418 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
5419
5420 hfs_systemfile_unlock(hfsmp, lockflags);
5421 lockflags = 0;
5422 if (retval)
5423 goto restore;
5424 out:
5425 if (took_trunc_lock)
5426 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5427
5428 if (lockflags) {
5429 hfs_systemfile_unlock(hfsmp, lockflags);
5430 lockflags = 0;
5431 }
5432
5433 /* Push cnode's new extent data to disk. */
5434 if (retval == 0) {
5435 hfs_update(vp, 0);
5436 }
5437 if (hfsmp->jnl) {
5438 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
5439 (void) hfs_flushvolumeheader(hfsmp, HFS_FVH_WAIT | HFS_FVH_WRITE_ALT);
5440 else
5441 (void) hfs_flushvolumeheader(hfsmp, 0);
5442 }
5443 exit:
5444 if (started_tr)
5445 hfs_end_transaction(hfsmp);
5446
5447 return (retval);
5448
5449 restore:
5450 if (fp->ff_blocks == headblks) {
5451 if (took_trunc_lock)
5452 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5453 goto exit;
5454 }
5455 /*
5456 * Give back any newly allocated space.
5457 */
5458 if (lockflags == 0) {
5459 lockflags = SFL_BITMAP;
5460 if (overflow_extents(fp))
5461 lockflags |= SFL_EXTENTS;
5462 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5463 }
5464
5465 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
5466 FTOC(fp)->c_fileid, false);
5467
5468 hfs_systemfile_unlock(hfsmp, lockflags);
5469 lockflags = 0;
5470
5471 if (took_trunc_lock)
5472 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5473 goto exit;
5474 }
5475
5476
5477 /*
5478 * Clone a file's data within the file.
5479 *
5480 */
5481 static int
5482 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
5483 {
5484 caddr_t bufp;
5485 size_t bufsize;
5486 size_t copysize;
5487 size_t iosize;
5488 size_t offset;
5489 off_t writebase;
5490 uio_t auio;
5491 int error = 0;
5492
5493 writebase = blkstart * blksize;
5494 copysize = blkcnt * blksize;
5495 iosize = bufsize = MIN(copysize, 128 * 1024);
5496 offset = 0;
5497
5498 hfs_unlock(VTOC(vp));
5499
5500 #if CONFIG_PROTECT
5501 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
5502 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5503 return (error);
5504 }
5505 #endif /* CONFIG_PROTECT */
5506
5507 bufp = hfs_malloc(bufsize);
5508
5509 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
5510
5511 while (offset < copysize) {
5512 iosize = MIN(copysize - offset, iosize);
5513
5514 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
5515 uio_addiov(auio, (uintptr_t)bufp, iosize);
5516
5517 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
5518 if (error) {
5519 printf("hfs_clonefile: cluster_read failed - %d\n", error);
5520 break;
5521 }
5522 if (uio_resid(auio) != 0) {
5523 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
5524 error = EIO;
5525 break;
5526 }
5527
5528 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
5529 uio_addiov(auio, (uintptr_t)bufp, iosize);
5530
5531 error = cluster_write(vp, auio, writebase + offset,
5532 writebase + offset + iosize,
5533 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
5534 if (error) {
5535 printf("hfs_clonefile: cluster_write failed - %d\n", error);
5536 break;
5537 }
5538 if (uio_resid(auio) != 0) {
5539 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5540 error = EIO;
5541 break;
5542 }
5543 offset += iosize;
5544 }
5545 uio_free(auio);
5546
5547 if ((blksize & PAGE_MASK)) {
5548 /*
5549 * since the copy may not have started on a PAGE
5550 * boundary (or may not have ended on one), we
5551 * may have pages left in the cache since NOCACHE
5552 * will let partially written pages linger...
5553 * lets just flush the entire range to make sure
5554 * we don't have any pages left that are beyond
5555 * (or intersect) the real LEOF of this file
5556 */
5557 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
5558 } else {
5559 /*
5560 * No need to call ubc_msync or hfs_invalbuf
5561 * since the file was copied using IO_NOCACHE and
5562 * the copy was done starting and ending on a page
5563 * boundary in the file.
5564 */
5565 }
5566 hfs_free(bufp, bufsize);
5567
5568 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5569 return (error);
5570 }
5571
5572 /*
5573 * Clone a system (metadata) file.
5574 *
5575 */
5576 static int
5577 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
5578 kauth_cred_t cred, struct proc *p)
5579 {
5580 caddr_t bufp;
5581 char * offset;
5582 size_t bufsize;
5583 size_t iosize;
5584 struct buf *bp = NULL;
5585 daddr64_t blkno;
5586 daddr64_t blk;
5587 daddr64_t start_blk;
5588 daddr64_t last_blk;
5589 int breadcnt;
5590 int i;
5591 int error = 0;
5592
5593
5594 iosize = GetLogicalBlockSize(vp);
5595 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5596 breadcnt = bufsize / iosize;
5597
5598 bufp = hfs_malloc(bufsize);
5599
5600 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5601 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
5602 blkno = 0;
5603
5604 while (blkno < last_blk) {
5605 /*
5606 * Read up to a megabyte
5607 */
5608 offset = bufp;
5609 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5610 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
5611 if (error) {
5612 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5613 goto out;
5614 }
5615 if (buf_count(bp) != iosize) {
5616 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
5617 goto out;
5618 }
5619 bcopy((char *)buf_dataptr(bp), offset, iosize);
5620
5621 buf_markinvalid(bp);
5622 buf_brelse(bp);
5623 bp = NULL;
5624
5625 offset += iosize;
5626 }
5627
5628 /*
5629 * Write up to a megabyte
5630 */
5631 offset = bufp;
5632 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5633 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
5634 if (bp == NULL) {
5635 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
5636 error = EIO;
5637 goto out;
5638 }
5639 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5640 error = (int)buf_bwrite(bp);
5641 bp = NULL;
5642 if (error)
5643 goto out;
5644 offset += iosize;
5645 }
5646 }
5647 out:
5648 if (bp) {
5649 buf_brelse(bp);
5650 }
5651
5652 hfs_free(bufp, bufsize);
5653
5654 error = hfs_fsync(vp, MNT_WAIT, 0, p);
5655
5656 return (error);
5657 }
5658
5659 errno_t hfs_flush_invalid_ranges(vnode_t vp)
5660 {
5661 cnode_t *cp = VTOC(vp);
5662
5663 hfs_assert(cp->c_lockowner == current_thread());
5664 hfs_assert(cp->c_truncatelockowner == current_thread());
5665
5666 if (!ISSET(cp->c_flag, C_ZFWANTSYNC) && !cp->c_zftimeout)
5667 return 0;
5668
5669 filefork_t *fp = VTOF(vp);
5670
5671 /*
5672 * We can't hold the cnode lock whilst we call cluster_write so we
5673 * need to copy the extents into a local buffer.
5674 */
5675 int max_exts = 16;
5676 struct ext {
5677 off_t start, end;
5678 } exts_buf[max_exts]; // 256 bytes
5679 struct ext *exts = exts_buf;
5680 int ext_count = 0;
5681 errno_t ret;
5682
5683 struct rl_entry *r = TAILQ_FIRST(&fp->ff_invalidranges);
5684
5685 while (r) {
5686 /* If we have more than can fit in our stack buffer, switch
5687 to a heap buffer. */
5688 if (exts == exts_buf && ext_count == max_exts) {
5689 max_exts = 256;
5690 exts = hfs_malloc(sizeof(struct ext) * max_exts);
5691 memcpy(exts, exts_buf, ext_count * sizeof(struct ext));
5692 }
5693
5694 struct rl_entry *next = TAILQ_NEXT(r, rl_link);
5695
5696 exts[ext_count++] = (struct ext){ r->rl_start, r->rl_end };
5697
5698 if (!next || (ext_count == max_exts && exts != exts_buf)) {
5699 hfs_unlock(cp);
5700 for (int i = 0; i < ext_count; ++i) {
5701 ret = cluster_write(vp, NULL, fp->ff_size, exts[i].end + 1,
5702 exts[i].start, 0,
5703 IO_HEADZEROFILL | IO_NOZERODIRTY | IO_NOCACHE);
5704 if (ret) {
5705 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5706 goto exit;
5707 }
5708 }
5709
5710 if (!next) {
5711 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5712 break;
5713 }
5714
5715 /* Push any existing clusters which should clean up our invalid
5716 ranges as they go through hfs_vnop_blockmap. */
5717 cluster_push(vp, 0);
5718
5719 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
5720
5721 /*
5722 * Get back to where we were (given we dropped the lock).
5723 * This shouldn't be many because we pushed above.
5724 */
5725 TAILQ_FOREACH(r, &fp->ff_invalidranges, rl_link) {
5726 if (r->rl_end > exts[ext_count - 1].end)
5727 break;
5728 }
5729
5730 ext_count = 0;
5731 } else
5732 r = next;
5733 }
5734
5735 ret = 0;
5736
5737 exit:
5738
5739 if (exts != exts_buf)
5740 hfs_free(exts, sizeof(struct ext) * max_exts);
5741
5742 return ret;
5743 }