]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
a3f653fc445e3f0d40af60bd9ee49891d164a181
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
42 #include <sys/stat.h>
43 #include <sys/buf.h>
44 #include <sys/buf_internal.h>
45 #include <sys/proc.h>
46 #include <sys/kauth.h>
47 #include <sys/vnode.h>
48 #include <sys/vnode_internal.h>
49 #include <sys/uio.h>
50 #include <sys/vfs_context.h>
51 #include <sys/fsevents.h>
52 #include <kern/kalloc.h>
53 #include <sys/disk.h>
54 #include <sys/sysctl.h>
55 #include <sys/fsctl.h>
56 #include <sys/mount_internal.h>
57
58 #include <miscfs/specfs/specdev.h>
59
60 #include <sys/ubc.h>
61 #include <sys/ubc_internal.h>
62
63 #include <vm/vm_pageout.h>
64 #include <vm/vm_kern.h>
65
66 #include <sys/kdebug.h>
67
68 #include "hfs.h"
69 #include "hfs_attrlist.h"
70 #include "hfs_endian.h"
71 #include "hfs_fsctl.h"
72 #include "hfs_quota.h"
73 #include "hfscommon/headers/FileMgrInternal.h"
74 #include "hfscommon/headers/BTreesInternal.h"
75 #include "hfs_cnode.h"
76 #include "hfs_dbg.h"
77
78 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
79
80 enum {
81 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
82 };
83
84 /* from bsd/hfs/hfs_vfsops.c */
85 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
86
87 static int hfs_clonefile(struct vnode *, int, int, int);
88 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
89 static int hfs_minorupdate(struct vnode *vp);
90 static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
91
92 /* from bsd/hfs/hfs_vnops.c */
93 extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
94
95
96
97 int flush_cache_on_write = 0;
98 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
99
100 /*
101 * Read data from a file.
102 */
103 int
104 hfs_vnop_read(struct vnop_read_args *ap)
105 {
106 /*
107 struct vnop_read_args {
108 struct vnodeop_desc *a_desc;
109 vnode_t a_vp;
110 struct uio *a_uio;
111 int a_ioflag;
112 vfs_context_t a_context;
113 };
114 */
115
116 uio_t uio = ap->a_uio;
117 struct vnode *vp = ap->a_vp;
118 struct cnode *cp;
119 struct filefork *fp;
120 struct hfsmount *hfsmp;
121 off_t filesize;
122 off_t filebytes;
123 off_t start_resid = uio_resid(uio);
124 off_t offset = uio_offset(uio);
125 int retval = 0;
126 int took_truncate_lock = 0;
127 int io_throttle = 0;
128
129 /* Preflight checks */
130 if (!vnode_isreg(vp)) {
131 /* can only read regular files */
132 if (vnode_isdir(vp))
133 return (EISDIR);
134 else
135 return (EPERM);
136 }
137 if (start_resid == 0)
138 return (0); /* Nothing left to do */
139 if (offset < 0)
140 return (EINVAL); /* cant read from a negative offset */
141
142
143
144 #if HFS_COMPRESSION
145 if (VNODE_IS_RSRC(vp)) {
146 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
147 return 0;
148 }
149 /* otherwise read the resource fork normally */
150 } else {
151 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
152 if (compressed) {
153 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
154 if (compressed) {
155 if (retval == 0) {
156 /* successful read, update the access time */
157 VTOC(vp)->c_touch_acctime = TRUE;
158
159 /* compressed files are not hot file candidates */
160 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
161 VTOF(vp)->ff_bytesread = 0;
162 }
163 }
164 return retval;
165 }
166 /* otherwise the file was converted back to a regular file while we were reading it */
167 retval = 0;
168 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
169 int error;
170
171 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
172 if (error) {
173 return error;
174 }
175
176 }
177 }
178 #endif /* HFS_COMPRESSION */
179
180 cp = VTOC(vp);
181 fp = VTOF(vp);
182 hfsmp = VTOHFS(vp);
183
184 #if CONFIG_PROTECT
185 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
186 goto exit;
187 }
188 #endif
189
190 /*
191 * If this read request originated from a syscall (as opposed to
192 * an in-kernel page fault or something), then set it up for
193 * throttle checks
194 */
195 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
196 io_throttle = IO_RETURN_ON_THROTTLE;
197 }
198
199 read_again:
200
201 /* Protect against a size change. */
202 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
203 took_truncate_lock = 1;
204
205 filesize = fp->ff_size;
206 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
207 if (offset > filesize) {
208 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
209 (offset > (off_t)MAXHFSFILESIZE)) {
210 retval = EFBIG;
211 }
212 goto exit;
213 }
214
215 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
216 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
217
218 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
219
220 cp->c_touch_acctime = TRUE;
221
222 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
223 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
224
225 /*
226 * Keep track blocks read
227 */
228 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
229 int took_cnode_lock = 0;
230 off_t bytesread;
231
232 bytesread = start_resid - uio_resid(uio);
233
234 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
235 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
236 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
237 took_cnode_lock = 1;
238 }
239 /*
240 * If this file hasn't been seen since the start of
241 * the current sampling period then start over.
242 */
243 if (cp->c_atime < hfsmp->hfc_timebase) {
244 struct timeval tv;
245
246 fp->ff_bytesread = bytesread;
247 microtime(&tv);
248 cp->c_atime = tv.tv_sec;
249 } else {
250 fp->ff_bytesread += bytesread;
251 }
252 if (took_cnode_lock)
253 hfs_unlock(cp);
254 }
255 exit:
256 if (took_truncate_lock) {
257 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
258 }
259 if (retval == EAGAIN) {
260 throttle_lowpri_io(1);
261
262 retval = 0;
263 goto read_again;
264 }
265 return (retval);
266 }
267
268 /*
269 * Write data to a file.
270 */
271 int
272 hfs_vnop_write(struct vnop_write_args *ap)
273 {
274 uio_t uio = ap->a_uio;
275 struct vnode *vp = ap->a_vp;
276 struct cnode *cp;
277 struct filefork *fp;
278 struct hfsmount *hfsmp;
279 kauth_cred_t cred = NULL;
280 off_t origFileSize;
281 off_t writelimit;
282 off_t bytesToAdd = 0;
283 off_t actualBytesAdded;
284 off_t filebytes;
285 off_t offset;
286 ssize_t resid;
287 int eflags;
288 int ioflag = ap->a_ioflag;
289 int retval = 0;
290 int lockflags;
291 int cnode_locked = 0;
292 int partialwrite = 0;
293 int do_snapshot = 1;
294 time_t orig_ctime=VTOC(vp)->c_ctime;
295 int took_truncate_lock = 0;
296 int io_return_on_throttle = 0;
297 struct rl_entry *invalid_range;
298
299 #if HFS_COMPRESSION
300 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
301 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
302 switch(state) {
303 case FILE_IS_COMPRESSED:
304 return EACCES;
305 case FILE_IS_CONVERTING:
306 /* if FILE_IS_CONVERTING, we allow writes but do not
307 bother with snapshots or else we will deadlock.
308 */
309 do_snapshot = 0;
310 break;
311 default:
312 printf("invalid state %d for compressed file\n", state);
313 /* fall through */
314 }
315 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
316 int error;
317
318 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
319 if (error != 0) {
320 return error;
321 }
322 }
323
324 if (do_snapshot) {
325 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
326 }
327
328 #endif
329
330 resid = uio_resid(uio);
331 offset = uio_offset(uio);
332
333 if (offset < 0)
334 return (EINVAL);
335 if (resid == 0)
336 return (E_NONE);
337 if (!vnode_isreg(vp))
338 return (EPERM); /* Can only write regular files */
339
340 cp = VTOC(vp);
341 fp = VTOF(vp);
342 hfsmp = VTOHFS(vp);
343
344 #if CONFIG_PROTECT
345 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
346 goto exit;
347 }
348 #endif
349
350 eflags = kEFDeferMask; /* defer file block allocations */
351 #if HFS_SPARSE_DEV
352 /*
353 * When the underlying device is sparse and space
354 * is low (< 8MB), stop doing delayed allocations
355 * and begin doing synchronous I/O.
356 */
357 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
358 (hfs_freeblks(hfsmp, 0) < 2048)) {
359 eflags &= ~kEFDeferMask;
360 ioflag |= IO_SYNC;
361 }
362 #endif /* HFS_SPARSE_DEV */
363
364 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
365 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
366 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
367 }
368
369 again:
370 /* Protect against a size change. */
371 /*
372 * Protect against a size change.
373 *
374 * Note: If took_truncate_lock is true, then we previously got the lock shared
375 * but needed to upgrade to exclusive. So try getting it exclusive from the
376 * start.
377 */
378 if (ioflag & IO_APPEND || took_truncate_lock) {
379 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
380 }
381 else {
382 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
383 }
384 took_truncate_lock = 1;
385
386 /* Update UIO */
387 if (ioflag & IO_APPEND) {
388 uio_setoffset(uio, fp->ff_size);
389 offset = fp->ff_size;
390 }
391 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
392 retval = EPERM;
393 goto exit;
394 }
395
396 origFileSize = fp->ff_size;
397 writelimit = offset + resid;
398 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
399
400 /*
401 * We may need an exclusive truncate lock for several reasons, all
402 * of which are because we may be writing to a (portion of a) block
403 * for the first time, and we need to make sure no readers see the
404 * prior, uninitialized contents of the block. The cases are:
405 *
406 * 1. We have unallocated (delayed allocation) blocks. We may be
407 * allocating new blocks to the file and writing to them.
408 * (A more precise check would be whether the range we're writing
409 * to contains delayed allocation blocks.)
410 * 2. We need to extend the file. The bytes between the old EOF
411 * and the new EOF are not yet initialized. This is important
412 * even if we're not allocating new blocks to the file. If the
413 * old EOF and new EOF are in the same block, we still need to
414 * protect that range of bytes until they are written for the
415 * first time.
416 * 3. The write overlaps some invalid ranges (delayed zero fill; that
417 * part of the file has been allocated, but not yet written).
418 *
419 * If we had a shared lock with the above cases, we need to try to upgrade
420 * to an exclusive lock. If the upgrade fails, we will lose the shared
421 * lock, and will need to take the truncate lock again; the took_truncate_lock
422 * flag will still be set, causing us to try for an exclusive lock next time.
423 *
424 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
425 * lock is held, since it protects the range lists.
426 */
427 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
428 ((fp->ff_unallocblocks != 0) ||
429 (writelimit > origFileSize))) {
430 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
431 /*
432 * Lock upgrade failed and we lost our shared lock, try again.
433 * Note: we do not set took_truncate_lock=0 here. Leaving it
434 * set to 1 will cause us to try to get the lock exclusive.
435 */
436 goto again;
437 }
438 else {
439 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
440 cp->c_truncatelockowner = current_thread();
441 }
442 }
443
444 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
445 goto exit;
446 }
447 cnode_locked = 1;
448
449 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
450 hfs_incr_gencount (cp);
451 }
452
453 /*
454 * Now that we have the cnode lock, see if there are delayed zero fill ranges
455 * overlapping our write. If so, we need the truncate lock exclusive (see above).
456 */
457 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
458 (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
459 /*
460 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
461 * a deadlock, rather than simply returning failure. (That is, it apparently does
462 * not behave like a "try_lock"). Since this condition is rare, just drop the
463 * cnode lock and try again. Since took_truncate_lock is set, we will
464 * automatically take the truncate lock exclusive.
465 */
466 hfs_unlock(cp);
467 cnode_locked = 0;
468 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
469 goto again;
470 }
471
472 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
473 (int)offset, uio_resid(uio), (int)fp->ff_size,
474 (int)filebytes, 0);
475
476 /* Check if we do not need to extend the file */
477 if (writelimit <= filebytes) {
478 goto sizeok;
479 }
480
481 cred = vfs_context_ucred(ap->a_context);
482 bytesToAdd = writelimit - filebytes;
483
484 #if QUOTA
485 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
486 cred, 0);
487 if (retval)
488 goto exit;
489 #endif /* QUOTA */
490
491 if (hfs_start_transaction(hfsmp) != 0) {
492 retval = EINVAL;
493 goto exit;
494 }
495
496 while (writelimit > filebytes) {
497 bytesToAdd = writelimit - filebytes;
498 if (cred && suser(cred, NULL) != 0)
499 eflags |= kEFReserveMask;
500
501 /* Protect extents b-tree and allocation bitmap */
502 lockflags = SFL_BITMAP;
503 if (overflow_extents(fp))
504 lockflags |= SFL_EXTENTS;
505 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
506
507 /* Files that are changing size are not hot file candidates. */
508 if (hfsmp->hfc_stage == HFC_RECORDING) {
509 fp->ff_bytesread = 0;
510 }
511 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
512 0, eflags, &actualBytesAdded));
513
514 hfs_systemfile_unlock(hfsmp, lockflags);
515
516 if ((actualBytesAdded == 0) && (retval == E_NONE))
517 retval = ENOSPC;
518 if (retval != E_NONE)
519 break;
520 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
521 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
522 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
523 }
524 (void) hfs_update(vp, TRUE);
525 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
526 (void) hfs_end_transaction(hfsmp);
527
528 /*
529 * If we didn't grow the file enough try a partial write.
530 * POSIX expects this behavior.
531 */
532 if ((retval == ENOSPC) && (filebytes > offset)) {
533 retval = 0;
534 partialwrite = 1;
535 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
536 resid -= bytesToAdd;
537 writelimit = filebytes;
538 }
539 sizeok:
540 if (retval == E_NONE) {
541 off_t filesize;
542 off_t zero_off;
543 off_t tail_off;
544 off_t inval_start;
545 off_t inval_end;
546 off_t io_start;
547 int lflag;
548
549 if (writelimit > fp->ff_size)
550 filesize = writelimit;
551 else
552 filesize = fp->ff_size;
553
554 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
555
556 if (offset <= fp->ff_size) {
557 zero_off = offset & ~PAGE_MASK_64;
558
559 /* Check to see whether the area between the zero_offset and the start
560 of the transfer to see whether is invalid and should be zero-filled
561 as part of the transfer:
562 */
563 if (offset > zero_off) {
564 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
565 lflag |= IO_HEADZEROFILL;
566 }
567 } else {
568 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
569
570 /* The bytes between fp->ff_size and uio->uio_offset must never be
571 read without being zeroed. The current last block is filled with zeroes
572 if it holds valid data but in all cases merely do a little bookkeeping
573 to track the area from the end of the current last page to the start of
574 the area actually written. For the same reason only the bytes up to the
575 start of the page where this write will start is invalidated; any remainder
576 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
577
578 Note that inval_start, the start of the page after the current EOF,
579 may be past the start of the write, in which case the zeroing
580 will be handled by the cluser_write of the actual data.
581 */
582 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
583 inval_end = offset & ~PAGE_MASK_64;
584 zero_off = fp->ff_size;
585
586 if ((fp->ff_size & PAGE_MASK_64) &&
587 (rl_scan(&fp->ff_invalidranges,
588 eof_page_base,
589 fp->ff_size - 1,
590 &invalid_range) != RL_NOOVERLAP)) {
591 /* The page containing the EOF is not valid, so the
592 entire page must be made inaccessible now. If the write
593 starts on a page beyond the page containing the eof
594 (inval_end > eof_page_base), add the
595 whole page to the range to be invalidated. Otherwise
596 (i.e. if the write starts on the same page), zero-fill
597 the entire page explicitly now:
598 */
599 if (inval_end > eof_page_base) {
600 inval_start = eof_page_base;
601 } else {
602 zero_off = eof_page_base;
603 };
604 };
605
606 if (inval_start < inval_end) {
607 struct timeval tv;
608 /* There's some range of data that's going to be marked invalid */
609
610 if (zero_off < inval_start) {
611 /* The pages between inval_start and inval_end are going to be invalidated,
612 and the actual write will start on a page past inval_end. Now's the last
613 chance to zero-fill the page containing the EOF:
614 */
615 hfs_unlock(cp);
616 cnode_locked = 0;
617 retval = cluster_write(vp, (uio_t) 0,
618 fp->ff_size, inval_start,
619 zero_off, (off_t)0,
620 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
621 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
622 cnode_locked = 1;
623 if (retval) goto ioerr_exit;
624 offset = uio_offset(uio);
625 };
626
627 /* Mark the remaining area of the newly allocated space as invalid: */
628 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
629 microuptime(&tv);
630 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
631 zero_off = fp->ff_size = inval_end;
632 };
633
634 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
635 };
636
637 /* Check to see whether the area between the end of the write and the end of
638 the page it falls in is invalid and should be zero-filled as part of the transfer:
639 */
640 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
641 if (tail_off > filesize) tail_off = filesize;
642 if (tail_off > writelimit) {
643 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
644 lflag |= IO_TAILZEROFILL;
645 };
646 };
647
648 /*
649 * if the write starts beyond the current EOF (possibly advanced in the
650 * zeroing of the last block, above), then we'll zero fill from the current EOF
651 * to where the write begins:
652 *
653 * NOTE: If (and ONLY if) the portion of the file about to be written is
654 * before the current EOF it might be marked as invalid now and must be
655 * made readable (removed from the invalid ranges) before cluster_write
656 * tries to write it:
657 */
658 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
659 if (io_start < fp->ff_size) {
660 off_t io_end;
661
662 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
663 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
664 };
665
666 hfs_unlock(cp);
667 cnode_locked = 0;
668
669 /*
670 * We need to tell UBC the fork's new size BEFORE calling
671 * cluster_write, in case any of the new pages need to be
672 * paged out before cluster_write completes (which does happen
673 * in embedded systems due to extreme memory pressure).
674 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
675 * will be, so that it can pass that on to cluster_pageout, and
676 * allow those pageouts.
677 *
678 * We don't update ff_size yet since we don't want pageins to
679 * be able to see uninitialized data between the old and new
680 * EOF, until cluster_write has completed and initialized that
681 * part of the file.
682 *
683 * The vnode pager relies on the file size last given to UBC via
684 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
685 * ff_size (whichever is larger). NOTE: ff_new_size is always
686 * zero, unless we are extending the file via write.
687 */
688 if (filesize > fp->ff_size) {
689 fp->ff_new_size = filesize;
690 ubc_setsize(vp, filesize);
691 }
692 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
693 tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle);
694 if (retval) {
695 fp->ff_new_size = 0; /* no longer extending; use ff_size */
696
697 if (retval == EAGAIN) {
698 /*
699 * EAGAIN indicates that we still have I/O to do, but
700 * that we now need to be throttled
701 */
702 if (resid != uio_resid(uio)) {
703 /*
704 * did manage to do some I/O before returning EAGAIN
705 */
706 resid = uio_resid(uio);
707 offset = uio_offset(uio);
708
709 cp->c_touch_chgtime = TRUE;
710 cp->c_touch_modtime = TRUE;
711 }
712 if (filesize > fp->ff_size) {
713 /*
714 * we called ubc_setsize before the call to
715 * cluster_write... since we only partially
716 * completed the I/O, we need to
717 * re-adjust our idea of the filesize based
718 * on our interim EOF
719 */
720 ubc_setsize(vp, offset);
721
722 fp->ff_size = offset;
723 }
724 goto exit;
725 }
726 if (filesize > origFileSize) {
727 ubc_setsize(vp, origFileSize);
728 }
729 goto ioerr_exit;
730 }
731
732 if (filesize > origFileSize) {
733 fp->ff_size = filesize;
734
735 /* Files that are changing size are not hot file candidates. */
736 if (hfsmp->hfc_stage == HFC_RECORDING) {
737 fp->ff_bytesread = 0;
738 }
739 }
740 fp->ff_new_size = 0; /* ff_size now has the correct size */
741
742 /* If we wrote some bytes, then touch the change and mod times */
743 if (resid > uio_resid(uio)) {
744 cp->c_touch_chgtime = TRUE;
745 cp->c_touch_modtime = TRUE;
746 }
747 }
748 if (partialwrite) {
749 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
750 resid += bytesToAdd;
751 }
752
753 // XXXdbg - see radar 4871353 for more info
754 {
755 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
756 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
757 }
758 }
759
760 ioerr_exit:
761 /*
762 * If we successfully wrote any data, and we are not the superuser
763 * we clear the setuid and setgid bits as a precaution against
764 * tampering.
765 */
766 if (cp->c_mode & (S_ISUID | S_ISGID)) {
767 cred = vfs_context_ucred(ap->a_context);
768 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
769 if (!cnode_locked) {
770 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
771 cnode_locked = 1;
772 }
773 cp->c_mode &= ~(S_ISUID | S_ISGID);
774 }
775 }
776 if (retval) {
777 if (ioflag & IO_UNIT) {
778 if (!cnode_locked) {
779 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
780 cnode_locked = 1;
781 }
782 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
783 0, 0, ap->a_context);
784 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
785 uio_setresid(uio, resid);
786 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
787 }
788 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
789 if (!cnode_locked) {
790 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
791 cnode_locked = 1;
792 }
793 retval = hfs_update(vp, TRUE);
794 }
795 /* Updating vcbWrCnt doesn't need to be atomic. */
796 hfsmp->vcbWrCnt++;
797
798 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
799 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
800 exit:
801 if (cnode_locked)
802 hfs_unlock(cp);
803
804 if (took_truncate_lock) {
805 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
806 }
807 if (retval == EAGAIN) {
808 throttle_lowpri_io(1);
809
810 retval = 0;
811 goto again;
812 }
813 return (retval);
814 }
815
816 /* support for the "bulk-access" fcntl */
817
818 #define CACHE_LEVELS 16
819 #define NUM_CACHE_ENTRIES (64*16)
820 #define PARENT_IDS_FLAG 0x100
821
822 struct access_cache {
823 int numcached;
824 int cachehits; /* these two for statistics gathering */
825 int lookups;
826 unsigned int *acache;
827 unsigned char *haveaccess;
828 };
829
830 struct access_t {
831 uid_t uid; /* IN: effective user id */
832 short flags; /* IN: access requested (i.e. R_OK) */
833 short num_groups; /* IN: number of groups user belongs to */
834 int num_files; /* IN: number of files to process */
835 int *file_ids; /* IN: array of file ids */
836 gid_t *groups; /* IN: array of groups */
837 short *access; /* OUT: access info for each file (0 for 'has access') */
838 } __attribute__((unavailable)); // this structure is for reference purposes only
839
840 struct user32_access_t {
841 uid_t uid; /* IN: effective user id */
842 short flags; /* IN: access requested (i.e. R_OK) */
843 short num_groups; /* IN: number of groups user belongs to */
844 int num_files; /* IN: number of files to process */
845 user32_addr_t file_ids; /* IN: array of file ids */
846 user32_addr_t groups; /* IN: array of groups */
847 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
848 };
849
850 struct user64_access_t {
851 uid_t uid; /* IN: effective user id */
852 short flags; /* IN: access requested (i.e. R_OK) */
853 short num_groups; /* IN: number of groups user belongs to */
854 int num_files; /* IN: number of files to process */
855 user64_addr_t file_ids; /* IN: array of file ids */
856 user64_addr_t groups; /* IN: array of groups */
857 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
858 };
859
860
861 // these are the "extended" versions of the above structures
862 // note that it is crucial that they be different sized than
863 // the regular version
864 struct ext_access_t {
865 uint32_t flags; /* IN: access requested (i.e. R_OK) */
866 uint32_t num_files; /* IN: number of files to process */
867 uint32_t map_size; /* IN: size of the bit map */
868 uint32_t *file_ids; /* IN: Array of file ids */
869 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
870 short *access; /* OUT: access info for each file (0 for 'has access') */
871 uint32_t num_parents; /* future use */
872 cnid_t *parents; /* future use */
873 } __attribute__((unavailable)); // this structure is for reference purposes only
874
875 struct user32_ext_access_t {
876 uint32_t flags; /* IN: access requested (i.e. R_OK) */
877 uint32_t num_files; /* IN: number of files to process */
878 uint32_t map_size; /* IN: size of the bit map */
879 user32_addr_t file_ids; /* IN: Array of file ids */
880 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
881 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
882 uint32_t num_parents; /* future use */
883 user32_addr_t parents; /* future use */
884 };
885
886 struct user64_ext_access_t {
887 uint32_t flags; /* IN: access requested (i.e. R_OK) */
888 uint32_t num_files; /* IN: number of files to process */
889 uint32_t map_size; /* IN: size of the bit map */
890 user64_addr_t file_ids; /* IN: array of file ids */
891 user64_addr_t bitmap; /* IN: array of groups */
892 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
893 uint32_t num_parents;/* future use */
894 user64_addr_t parents;/* future use */
895 };
896
897
898 /*
899 * Perform a binary search for the given parent_id. Return value is
900 * the index if there is a match. If no_match_indexp is non-NULL it
901 * will be assigned with the index to insert the item (even if it was
902 * not found).
903 */
904 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
905 {
906 int index=-1;
907 unsigned int lo=0;
908
909 do {
910 unsigned int mid = ((hi - lo)/2) + lo;
911 unsigned int this_id = array[mid];
912
913 if (parent_id == this_id) {
914 hi = mid;
915 break;
916 }
917
918 if (parent_id < this_id) {
919 hi = mid;
920 continue;
921 }
922
923 if (parent_id > this_id) {
924 lo = mid + 1;
925 continue;
926 }
927 } while(lo < hi);
928
929 /* check if lo and hi converged on the match */
930 if (parent_id == array[hi]) {
931 index = hi;
932 }
933
934 if (no_match_indexp) {
935 *no_match_indexp = hi;
936 }
937
938 return index;
939 }
940
941
942 static int
943 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
944 {
945 unsigned int hi;
946 int matches = 0;
947 int index, no_match_index;
948
949 if (cache->numcached == 0) {
950 *indexp = 0;
951 return 0; // table is empty, so insert at index=0 and report no match
952 }
953
954 if (cache->numcached > NUM_CACHE_ENTRIES) {
955 cache->numcached = NUM_CACHE_ENTRIES;
956 }
957
958 hi = cache->numcached - 1;
959
960 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
961
962 /* if no existing entry found, find index for new one */
963 if (index == -1) {
964 index = no_match_index;
965 matches = 0;
966 } else {
967 matches = 1;
968 }
969
970 *indexp = index;
971 return matches;
972 }
973
974 /*
975 * Add a node to the access_cache at the given index (or do a lookup first
976 * to find the index if -1 is passed in). We currently do a replace rather
977 * than an insert if the cache is full.
978 */
979 static void
980 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
981 {
982 int lookup_index = -1;
983
984 /* need to do a lookup first if -1 passed for index */
985 if (index == -1) {
986 if (lookup_bucket(cache, &lookup_index, nodeID)) {
987 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
988 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
989 cache->haveaccess[lookup_index] = access;
990 }
991
992 /* mission accomplished */
993 return;
994 } else {
995 index = lookup_index;
996 }
997
998 }
999
1000 /* if the cache is full, do a replace rather than an insert */
1001 if (cache->numcached >= NUM_CACHE_ENTRIES) {
1002 cache->numcached = NUM_CACHE_ENTRIES-1;
1003
1004 if (index > cache->numcached) {
1005 index = cache->numcached;
1006 }
1007 }
1008
1009 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
1010 index++;
1011 }
1012
1013 if (index >= 0 && index < cache->numcached) {
1014 /* only do bcopy if we're inserting */
1015 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
1016 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
1017 }
1018
1019 cache->acache[index] = nodeID;
1020 cache->haveaccess[index] = access;
1021 cache->numcached++;
1022 }
1023
1024
1025 struct cinfo {
1026 uid_t uid;
1027 gid_t gid;
1028 mode_t mode;
1029 cnid_t parentcnid;
1030 u_int16_t recflags;
1031 };
1032
1033 static int
1034 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
1035 {
1036 struct cinfo *cip = (struct cinfo *)arg;
1037
1038 cip->uid = attrp->ca_uid;
1039 cip->gid = attrp->ca_gid;
1040 cip->mode = attrp->ca_mode;
1041 cip->parentcnid = descp->cd_parentcnid;
1042 cip->recflags = attrp->ca_recflags;
1043
1044 return (0);
1045 }
1046
1047 /*
1048 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1049 * isn't incore, then go to the catalog.
1050 */
1051 static int
1052 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
1053 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
1054 {
1055 int error = 0;
1056
1057 /* if this id matches the one the fsctl was called with, skip the lookup */
1058 if (cnid == skip_cp->c_cnid) {
1059 cnattrp->ca_uid = skip_cp->c_uid;
1060 cnattrp->ca_gid = skip_cp->c_gid;
1061 cnattrp->ca_mode = skip_cp->c_mode;
1062 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1063 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1064 } else {
1065 struct cinfo c_info;
1066
1067 /* otherwise, check the cnode hash incase the file/dir is incore */
1068 if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) {
1069 cnattrp->ca_uid = c_info.uid;
1070 cnattrp->ca_gid = c_info.gid;
1071 cnattrp->ca_mode = c_info.mode;
1072 cnattrp->ca_recflags = c_info.recflags;
1073 keyp->hfsPlus.parentID = c_info.parentcnid;
1074 } else {
1075 int lockflags;
1076
1077 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1078 throttle_lowpri_io(1);
1079
1080 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1081
1082 /* lookup this cnid in the catalog */
1083 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
1084
1085 hfs_systemfile_unlock(hfsmp, lockflags);
1086
1087 cache->lookups++;
1088 }
1089 }
1090
1091 return (error);
1092 }
1093
1094
1095 /*
1096 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1097 * up to CACHE_LEVELS as we progress towards the root.
1098 */
1099 static int
1100 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
1101 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
1102 struct vfs_context *my_context,
1103 char *bitmap,
1104 uint32_t map_size,
1105 cnid_t* parents,
1106 uint32_t num_parents)
1107 {
1108 int myErr = 0;
1109 int myResult;
1110 HFSCatalogNodeID thisNodeID;
1111 unsigned int myPerms;
1112 struct cat_attr cnattr;
1113 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1114 CatalogKey catkey;
1115
1116 int i = 0, ids_to_cache = 0;
1117 int parent_ids[CACHE_LEVELS];
1118
1119 thisNodeID = nodeID;
1120 while (thisNodeID >= kRootDirID) {
1121 myResult = 0; /* default to "no access" */
1122
1123 /* check the cache before resorting to hitting the catalog */
1124
1125 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1126 * to look any further after hitting cached dir */
1127
1128 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1129 cache->cachehits++;
1130 myErr = cache->haveaccess[cache_index];
1131 if (scope_index != -1) {
1132 if (myErr == ESRCH) {
1133 myErr = 0;
1134 }
1135 } else {
1136 scope_index = 0; // so we'll just use the cache result
1137 scope_idx_start = ids_to_cache;
1138 }
1139 myResult = (myErr == 0) ? 1 : 0;
1140 goto ExitThisRoutine;
1141 }
1142
1143
1144 if (parents) {
1145 int tmp;
1146 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1147 if (scope_index == -1)
1148 scope_index = tmp;
1149 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1150 scope_idx_start = ids_to_cache;
1151 }
1152 }
1153
1154 /* remember which parents we want to cache */
1155 if (ids_to_cache < CACHE_LEVELS) {
1156 parent_ids[ids_to_cache] = thisNodeID;
1157 ids_to_cache++;
1158 }
1159 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1160 if (bitmap && map_size) {
1161 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1162 }
1163
1164
1165 /* do the lookup (checks the cnode hash, then the catalog) */
1166 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
1167 if (myErr) {
1168 goto ExitThisRoutine; /* no access */
1169 }
1170
1171 /* Root always gets access. */
1172 if (suser(myp_ucred, NULL) == 0) {
1173 thisNodeID = catkey.hfsPlus.parentID;
1174 myResult = 1;
1175 continue;
1176 }
1177
1178 // if the thing has acl's, do the full permission check
1179 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1180 struct vnode *vp;
1181
1182 /* get the vnode for this cnid */
1183 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
1184 if ( myErr ) {
1185 myResult = 0;
1186 goto ExitThisRoutine;
1187 }
1188
1189 thisNodeID = VTOC(vp)->c_parentcnid;
1190
1191 hfs_unlock(VTOC(vp));
1192
1193 if (vnode_vtype(vp) == VDIR) {
1194 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1195 } else {
1196 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1197 }
1198
1199 vnode_put(vp);
1200 if (myErr) {
1201 myResult = 0;
1202 goto ExitThisRoutine;
1203 }
1204 } else {
1205 unsigned int flags;
1206 int mode = cnattr.ca_mode & S_IFMT;
1207 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
1208
1209 if (mode == S_IFDIR) {
1210 flags = R_OK | X_OK;
1211 } else {
1212 flags = R_OK;
1213 }
1214 if ( (myPerms & flags) != flags) {
1215 myResult = 0;
1216 myErr = EACCES;
1217 goto ExitThisRoutine; /* no access */
1218 }
1219
1220 /* up the hierarchy we go */
1221 thisNodeID = catkey.hfsPlus.parentID;
1222 }
1223 }
1224
1225 /* if here, we have access to this node */
1226 myResult = 1;
1227
1228 ExitThisRoutine:
1229 if (parents && myErr == 0 && scope_index == -1) {
1230 myErr = ESRCH;
1231 }
1232
1233 if (myErr) {
1234 myResult = 0;
1235 }
1236 *err = myErr;
1237
1238 /* cache the parent directory(ies) */
1239 for (i = 0; i < ids_to_cache; i++) {
1240 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1241 add_node(cache, -1, parent_ids[i], ESRCH);
1242 } else {
1243 add_node(cache, -1, parent_ids[i], myErr);
1244 }
1245 }
1246
1247 return (myResult);
1248 }
1249
1250 static int
1251 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1252 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1253 {
1254 boolean_t is64bit;
1255
1256 /*
1257 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1258 * happens to be in our list of file_ids, we'll note it
1259 * avoid calling hfs_chashget_nowait() on that id as that
1260 * will cause a "locking against myself" panic.
1261 */
1262 Boolean check_leaf = true;
1263
1264 struct user64_ext_access_t *user_access_structp;
1265 struct user64_ext_access_t tmp_user_access;
1266 struct access_cache cache;
1267
1268 int error = 0, prev_parent_check_ok=1;
1269 unsigned int i;
1270
1271 short flags;
1272 unsigned int num_files = 0;
1273 int map_size = 0;
1274 int num_parents = 0;
1275 int *file_ids=NULL;
1276 short *access=NULL;
1277 char *bitmap=NULL;
1278 cnid_t *parents=NULL;
1279 int leaf_index;
1280
1281 cnid_t cnid;
1282 cnid_t prevParent_cnid = 0;
1283 unsigned int myPerms;
1284 short myaccess = 0;
1285 struct cat_attr cnattr;
1286 CatalogKey catkey;
1287 struct cnode *skip_cp = VTOC(vp);
1288 kauth_cred_t cred = vfs_context_ucred(context);
1289 proc_t p = vfs_context_proc(context);
1290
1291 is64bit = proc_is64bit(p);
1292
1293 /* initialize the local cache and buffers */
1294 cache.numcached = 0;
1295 cache.cachehits = 0;
1296 cache.lookups = 0;
1297 cache.acache = NULL;
1298 cache.haveaccess = NULL;
1299
1300 /* struct copyin done during dispatch... need to copy file_id array separately */
1301 if (ap->a_data == NULL) {
1302 error = EINVAL;
1303 goto err_exit_bulk_access;
1304 }
1305
1306 if (is64bit) {
1307 if (arg_size != sizeof(struct user64_ext_access_t)) {
1308 error = EINVAL;
1309 goto err_exit_bulk_access;
1310 }
1311
1312 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1313
1314 } else if (arg_size == sizeof(struct user32_access_t)) {
1315 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1316
1317 // convert an old style bulk-access struct to the new style
1318 tmp_user_access.flags = accessp->flags;
1319 tmp_user_access.num_files = accessp->num_files;
1320 tmp_user_access.map_size = 0;
1321 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1322 tmp_user_access.bitmap = USER_ADDR_NULL;
1323 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1324 tmp_user_access.num_parents = 0;
1325 user_access_structp = &tmp_user_access;
1326
1327 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1328 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1329
1330 // up-cast from a 32-bit version of the struct
1331 tmp_user_access.flags = accessp->flags;
1332 tmp_user_access.num_files = accessp->num_files;
1333 tmp_user_access.map_size = accessp->map_size;
1334 tmp_user_access.num_parents = accessp->num_parents;
1335
1336 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1337 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1338 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1339 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1340
1341 user_access_structp = &tmp_user_access;
1342 } else {
1343 error = EINVAL;
1344 goto err_exit_bulk_access;
1345 }
1346
1347 map_size = user_access_structp->map_size;
1348
1349 num_files = user_access_structp->num_files;
1350
1351 num_parents= user_access_structp->num_parents;
1352
1353 if (num_files < 1) {
1354 goto err_exit_bulk_access;
1355 }
1356 if (num_files > 1024) {
1357 error = EINVAL;
1358 goto err_exit_bulk_access;
1359 }
1360
1361 if (num_parents > 1024) {
1362 error = EINVAL;
1363 goto err_exit_bulk_access;
1364 }
1365
1366 file_ids = (int *) kalloc(sizeof(int) * num_files);
1367 access = (short *) kalloc(sizeof(short) * num_files);
1368 if (map_size) {
1369 bitmap = (char *) kalloc(sizeof(char) * map_size);
1370 }
1371
1372 if (num_parents) {
1373 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1374 }
1375
1376 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1377 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1378
1379 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1380 if (file_ids) {
1381 kfree(file_ids, sizeof(int) * num_files);
1382 }
1383 if (bitmap) {
1384 kfree(bitmap, sizeof(char) * map_size);
1385 }
1386 if (access) {
1387 kfree(access, sizeof(short) * num_files);
1388 }
1389 if (cache.acache) {
1390 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1391 }
1392 if (cache.haveaccess) {
1393 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1394 }
1395 if (parents) {
1396 kfree(parents, sizeof(cnid_t) * num_parents);
1397 }
1398 return ENOMEM;
1399 }
1400
1401 // make sure the bitmap is zero'ed out...
1402 if (bitmap) {
1403 bzero(bitmap, (sizeof(char) * map_size));
1404 }
1405
1406 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1407 num_files * sizeof(int)))) {
1408 goto err_exit_bulk_access;
1409 }
1410
1411 if (num_parents) {
1412 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1413 num_parents * sizeof(cnid_t)))) {
1414 goto err_exit_bulk_access;
1415 }
1416 }
1417
1418 flags = user_access_structp->flags;
1419 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1420 flags = R_OK;
1421 }
1422
1423 /* check if we've been passed leaf node ids or parent ids */
1424 if (flags & PARENT_IDS_FLAG) {
1425 check_leaf = false;
1426 }
1427
1428 /* Check access to each file_id passed in */
1429 for (i = 0; i < num_files; i++) {
1430 leaf_index=-1;
1431 cnid = (cnid_t) file_ids[i];
1432
1433 /* root always has access */
1434 if ((!parents) && (!suser(cred, NULL))) {
1435 access[i] = 0;
1436 continue;
1437 }
1438
1439 if (check_leaf) {
1440 /* do the lookup (checks the cnode hash, then the catalog) */
1441 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1442 if (error) {
1443 access[i] = (short) error;
1444 continue;
1445 }
1446
1447 if (parents) {
1448 // Check if the leaf matches one of the parent scopes
1449 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1450 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1451 prev_parent_check_ok = 0;
1452 else if (leaf_index >= 0)
1453 prev_parent_check_ok = 1;
1454 }
1455
1456 // if the thing has acl's, do the full permission check
1457 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1458 struct vnode *cvp;
1459 int myErr = 0;
1460 /* get the vnode for this cnid */
1461 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
1462 if ( myErr ) {
1463 access[i] = myErr;
1464 continue;
1465 }
1466
1467 hfs_unlock(VTOC(cvp));
1468
1469 if (vnode_vtype(cvp) == VDIR) {
1470 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1471 } else {
1472 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1473 }
1474
1475 vnode_put(cvp);
1476 if (myErr) {
1477 access[i] = myErr;
1478 continue;
1479 }
1480 } else {
1481 /* before calling CheckAccess(), check the target file for read access */
1482 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1483 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1484
1485 /* fail fast if no access */
1486 if ((myPerms & flags) == 0) {
1487 access[i] = EACCES;
1488 continue;
1489 }
1490 }
1491 } else {
1492 /* we were passed an array of parent ids */
1493 catkey.hfsPlus.parentID = cnid;
1494 }
1495
1496 /* if the last guy had the same parent and had access, we're done */
1497 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1498 cache.cachehits++;
1499 access[i] = 0;
1500 continue;
1501 }
1502
1503 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1504 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1505
1506 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1507 access[i] = 0; // have access.. no errors to report
1508 } else {
1509 access[i] = (error != 0 ? (short) error : EACCES);
1510 }
1511
1512 prevParent_cnid = catkey.hfsPlus.parentID;
1513 }
1514
1515 /* copyout the access array */
1516 if ((error = copyout((caddr_t)access, user_access_structp->access,
1517 num_files * sizeof (short)))) {
1518 goto err_exit_bulk_access;
1519 }
1520 if (map_size && bitmap) {
1521 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1522 map_size * sizeof (char)))) {
1523 goto err_exit_bulk_access;
1524 }
1525 }
1526
1527
1528 err_exit_bulk_access:
1529
1530 if (file_ids)
1531 kfree(file_ids, sizeof(int) * num_files);
1532 if (parents)
1533 kfree(parents, sizeof(cnid_t) * num_parents);
1534 if (bitmap)
1535 kfree(bitmap, sizeof(char) * map_size);
1536 if (access)
1537 kfree(access, sizeof(short) * num_files);
1538 if (cache.acache)
1539 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1540 if (cache.haveaccess)
1541 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1542
1543 return (error);
1544 }
1545
1546
1547 /* end "bulk-access" support */
1548
1549
1550 /*
1551 * Callback for use with freeze ioctl.
1552 */
1553 static int
1554 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
1555 {
1556 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1557
1558 return 0;
1559 }
1560
1561 /*
1562 * Control filesystem operating characteristics.
1563 */
1564 int
1565 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1566 vnode_t a_vp;
1567 int a_command;
1568 caddr_t a_data;
1569 int a_fflag;
1570 vfs_context_t a_context;
1571 } */ *ap)
1572 {
1573 struct vnode * vp = ap->a_vp;
1574 struct hfsmount *hfsmp = VTOHFS(vp);
1575 vfs_context_t context = ap->a_context;
1576 kauth_cred_t cred = vfs_context_ucred(context);
1577 proc_t p = vfs_context_proc(context);
1578 struct vfsstatfs *vfsp;
1579 boolean_t is64bit;
1580 off_t jnl_start, jnl_size;
1581 struct hfs_journal_info *jip;
1582 #if HFS_COMPRESSION
1583 int compressed = 0;
1584 off_t uncompressed_size = -1;
1585 int decmpfs_error = 0;
1586
1587 if (ap->a_command == F_RDADVISE) {
1588 /* we need to inspect the decmpfs state of the file as early as possible */
1589 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1590 if (compressed) {
1591 if (VNODE_IS_RSRC(vp)) {
1592 /* if this is the resource fork, treat it as if it were empty */
1593 uncompressed_size = 0;
1594 } else {
1595 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1596 if (decmpfs_error != 0) {
1597 /* failed to get the uncompressed size, we'll check for this later */
1598 uncompressed_size = -1;
1599 }
1600 }
1601 }
1602 }
1603 #endif /* HFS_COMPRESSION */
1604
1605 is64bit = proc_is64bit(p);
1606
1607 #if CONFIG_PROTECT
1608 {
1609 int error = 0;
1610 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
1611 return error;
1612 }
1613 }
1614 #endif /* CONFIG_PROTECT */
1615
1616 switch (ap->a_command) {
1617
1618 case HFS_GETPATH:
1619 {
1620 struct vnode *file_vp;
1621 cnid_t cnid;
1622 int outlen;
1623 char *bufptr;
1624 int error;
1625 int flags = 0;
1626
1627 /* Caller must be owner of file system. */
1628 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1629 if (suser(cred, NULL) &&
1630 kauth_cred_getuid(cred) != vfsp->f_owner) {
1631 return (EACCES);
1632 }
1633 /* Target vnode must be file system's root. */
1634 if (!vnode_isvroot(vp)) {
1635 return (EINVAL);
1636 }
1637 bufptr = (char *)ap->a_data;
1638 cnid = strtoul(bufptr, NULL, 10);
1639 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1640 flags |= BUILDPATH_VOLUME_RELATIVE;
1641 }
1642
1643 /* We need to call hfs_vfs_vget to leverage the code that will
1644 * fix the origin list for us if needed, as opposed to calling
1645 * hfs_vget, since we will need the parent for build_path call.
1646 */
1647
1648 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1649 return (error);
1650 }
1651 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
1652 vnode_put(file_vp);
1653
1654 return (error);
1655 }
1656
1657 case HFS_GET_WRITE_GEN_COUNTER:
1658 {
1659 struct cnode *cp = NULL;
1660 int error;
1661 u_int32_t *counter = (u_int32_t *)ap->a_data;
1662
1663 cp = VTOC(vp);
1664
1665 if (vnode_isdir (vp)) {
1666 error = EISDIR;
1667 *counter = 0;
1668 return error;
1669 }
1670
1671 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1672 if (error == 0) {
1673 struct ubc_info *uip;
1674 int is_mapped = 0;
1675
1676 if (UBCINFOEXISTS(vp)) {
1677 uip = vp->v_ubcinfo;
1678 if (uip->ui_flags & UI_ISMAPPED) {
1679 is_mapped = 1;
1680 }
1681 }
1682
1683
1684 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1685 uint32_t gcount = hfs_get_gencount(cp);
1686 //
1687 // Even though we return EBUSY for files that are mmap'ed
1688 // we also want to bump the value so that the write-gen
1689 // counter will always be different once the file is unmapped
1690 // (since the file may be unmapped but the pageouts have not
1691 // yet happened).
1692 //
1693 if (is_mapped) {
1694 hfs_incr_gencount (cp);
1695 gcount = hfs_get_gencount(cp);
1696 }
1697
1698 *counter = gcount;
1699
1700 }
1701 else {
1702 /* not a file or dir? silently return */
1703 *counter = 0;
1704 }
1705 hfs_unlock (cp);
1706
1707 if (is_mapped) {
1708 error = EBUSY;
1709 }
1710 }
1711
1712 return error;
1713 }
1714
1715 case HFS_PREV_LINK:
1716 case HFS_NEXT_LINK:
1717 {
1718 cnid_t linkfileid;
1719 cnid_t nextlinkid;
1720 cnid_t prevlinkid;
1721 int error;
1722
1723 /* Caller must be owner of file system. */
1724 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1725 if (suser(cred, NULL) &&
1726 kauth_cred_getuid(cred) != vfsp->f_owner) {
1727 return (EACCES);
1728 }
1729 /* Target vnode must be file system's root. */
1730 if (!vnode_isvroot(vp)) {
1731 return (EINVAL);
1732 }
1733 linkfileid = *(cnid_t *)ap->a_data;
1734 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1735 return (EINVAL);
1736 }
1737 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1738 return (error);
1739 }
1740 if (ap->a_command == HFS_NEXT_LINK) {
1741 *(cnid_t *)ap->a_data = nextlinkid;
1742 } else {
1743 *(cnid_t *)ap->a_data = prevlinkid;
1744 }
1745 return (0);
1746 }
1747
1748 case HFS_RESIZE_PROGRESS: {
1749
1750 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1751 if (suser(cred, NULL) &&
1752 kauth_cred_getuid(cred) != vfsp->f_owner) {
1753 return (EACCES); /* must be owner of file system */
1754 }
1755 if (!vnode_isvroot(vp)) {
1756 return (EINVAL);
1757 }
1758 /* file system must not be mounted read-only */
1759 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1760 return (EROFS);
1761 }
1762
1763 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1764 }
1765
1766 case HFS_RESIZE_VOLUME: {
1767 u_int64_t newsize;
1768 u_int64_t cursize;
1769
1770 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1771 if (suser(cred, NULL) &&
1772 kauth_cred_getuid(cred) != vfsp->f_owner) {
1773 return (EACCES); /* must be owner of file system */
1774 }
1775 if (!vnode_isvroot(vp)) {
1776 return (EINVAL);
1777 }
1778
1779 /* filesystem must not be mounted read only */
1780 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1781 return (EROFS);
1782 }
1783 newsize = *(u_int64_t *)ap->a_data;
1784 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1785
1786 if (newsize > cursize) {
1787 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1788 } else if (newsize < cursize) {
1789 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1790 } else {
1791 return (0);
1792 }
1793 }
1794 case HFS_CHANGE_NEXT_ALLOCATION: {
1795 int error = 0; /* Assume success */
1796 u_int32_t location;
1797
1798 if (vnode_vfsisrdonly(vp)) {
1799 return (EROFS);
1800 }
1801 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1802 if (suser(cred, NULL) &&
1803 kauth_cred_getuid(cred) != vfsp->f_owner) {
1804 return (EACCES); /* must be owner of file system */
1805 }
1806 if (!vnode_isvroot(vp)) {
1807 return (EINVAL);
1808 }
1809 hfs_lock_mount(hfsmp);
1810 location = *(u_int32_t *)ap->a_data;
1811 if ((location >= hfsmp->allocLimit) &&
1812 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1813 error = EINVAL;
1814 goto fail_change_next_allocation;
1815 }
1816 /* Return previous value. */
1817 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1818 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1819 /* On magic value for location, set nextAllocation to next block
1820 * after metadata zone and set flag in mount structure to indicate
1821 * that nextAllocation should not be updated again.
1822 */
1823 if (hfsmp->hfs_metazone_end != 0) {
1824 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1825 }
1826 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1827 } else {
1828 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1829 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1830 }
1831 MarkVCBDirty(hfsmp);
1832 fail_change_next_allocation:
1833 hfs_unlock_mount(hfsmp);
1834 return (error);
1835 }
1836
1837 #if HFS_SPARSE_DEV
1838 case HFS_SETBACKINGSTOREINFO: {
1839 struct vnode * bsfs_rootvp;
1840 struct vnode * di_vp;
1841 struct hfs_backingstoreinfo *bsdata;
1842 int error = 0;
1843
1844 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1845 return (EROFS);
1846 }
1847 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1848 return (EALREADY);
1849 }
1850 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1851 if (suser(cred, NULL) &&
1852 kauth_cred_getuid(cred) != vfsp->f_owner) {
1853 return (EACCES); /* must be owner of file system */
1854 }
1855 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1856 if (bsdata == NULL) {
1857 return (EINVAL);
1858 }
1859 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1860 return (error);
1861 }
1862 if ((error = vnode_getwithref(di_vp))) {
1863 file_drop(bsdata->backingfd);
1864 return(error);
1865 }
1866
1867 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1868 (void)vnode_put(di_vp);
1869 file_drop(bsdata->backingfd);
1870 return (EINVAL);
1871 }
1872
1873 /*
1874 * Obtain the backing fs root vnode and keep a reference
1875 * on it. This reference will be dropped in hfs_unmount.
1876 */
1877 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
1878 if (error) {
1879 (void)vnode_put(di_vp);
1880 file_drop(bsdata->backingfd);
1881 return (error);
1882 }
1883 vnode_ref(bsfs_rootvp);
1884 vnode_put(bsfs_rootvp);
1885
1886 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1887
1888 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1889 /* The free extent cache is managed differently for sparse devices.
1890 * There is a window between which the volume is mounted and the
1891 * device is marked as sparse, so the free extent cache for this
1892 * volume is currently initialized as normal volume (sorted by block
1893 * count). Reset the cache so that it will be rebuilt again
1894 * for sparse device (sorted by start block).
1895 */
1896 ResetVCBFreeExtCache(hfsmp);
1897
1898 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1899 hfsmp->hfs_sparsebandblks *= 4;
1900
1901 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
1902
1903 /*
1904 * If the sparse image is on a sparse image file (as opposed to a sparse
1905 * bundle), then we may need to limit the free space to the maximum size
1906 * of a file on that volume. So we query (using pathconf), and if we get
1907 * a meaningful result, we cache the number of blocks for later use in
1908 * hfs_freeblks().
1909 */
1910 hfsmp->hfs_backingfs_maxblocks = 0;
1911 if (vnode_vtype(di_vp) == VREG) {
1912 int terr;
1913 int hostbits;
1914 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1915 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1916 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1917
1918 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1919 }
1920 }
1921
1922 (void)vnode_put(di_vp);
1923 file_drop(bsdata->backingfd);
1924 return (0);
1925 }
1926 case HFS_CLRBACKINGSTOREINFO: {
1927 struct vnode * tmpvp;
1928
1929 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1930 if (suser(cred, NULL) &&
1931 kauth_cred_getuid(cred) != vfsp->f_owner) {
1932 return (EACCES); /* must be owner of file system */
1933 }
1934 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1935 return (EROFS);
1936 }
1937
1938 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1939 hfsmp->hfs_backingfs_rootvp) {
1940
1941 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1942 tmpvp = hfsmp->hfs_backingfs_rootvp;
1943 hfsmp->hfs_backingfs_rootvp = NULLVP;
1944 hfsmp->hfs_sparsebandblks = 0;
1945 vnode_rele(tmpvp);
1946 }
1947 return (0);
1948 }
1949 #endif /* HFS_SPARSE_DEV */
1950
1951 /* Change the next CNID stored in the VH */
1952 case HFS_CHANGE_NEXTCNID: {
1953 int error = 0; /* Assume success */
1954 u_int32_t fileid;
1955 int wraparound = 0;
1956 int lockflags = 0;
1957
1958 if (vnode_vfsisrdonly(vp)) {
1959 return (EROFS);
1960 }
1961 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1962 if (suser(cred, NULL) &&
1963 kauth_cred_getuid(cred) != vfsp->f_owner) {
1964 return (EACCES); /* must be owner of file system */
1965 }
1966
1967 fileid = *(u_int32_t *)ap->a_data;
1968
1969 /* Must have catalog lock excl. to advance the CNID pointer */
1970 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
1971
1972 hfs_lock_mount(hfsmp);
1973
1974 /* If it is less than the current next CNID, force the wraparound bit to be set */
1975 if (fileid < hfsmp->vcbNxtCNID) {
1976 wraparound=1;
1977 }
1978
1979 /* Return previous value. */
1980 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
1981
1982 hfsmp->vcbNxtCNID = fileid;
1983
1984 if (wraparound) {
1985 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
1986 }
1987
1988 MarkVCBDirty(hfsmp);
1989 hfs_unlock_mount(hfsmp);
1990 hfs_systemfile_unlock (hfsmp, lockflags);
1991
1992 return (error);
1993 }
1994
1995 case F_FREEZE_FS: {
1996 struct mount *mp;
1997
1998 mp = vnode_mount(vp);
1999 hfsmp = VFSTOHFS(mp);
2000
2001 if (!(hfsmp->jnl))
2002 return (ENOTSUP);
2003
2004 vfsp = vfs_statfs(mp);
2005
2006 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2007 !kauth_cred_issuser(cred))
2008 return (EACCES);
2009
2010 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
2011
2012 // flush things before we get started to try and prevent
2013 // dirty data from being paged out while we're frozen.
2014 // note: can't do this after taking the lock as it will
2015 // deadlock against ourselves.
2016 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
2017 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2018
2019 // DO NOT call hfs_journal_flush() because that takes a
2020 // shared lock on the global exclusive lock!
2021 journal_flush(hfsmp->jnl, TRUE);
2022
2023 // don't need to iterate on all vnodes, we just need to
2024 // wait for writes to the system files and the device vnode
2025 //
2026 // Now that journal flush waits for all metadata blocks to
2027 // be written out, waiting for btree writes is probably no
2028 // longer required.
2029 if (HFSTOVCB(hfsmp)->extentsRefNum)
2030 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
2031 if (HFSTOVCB(hfsmp)->catalogRefNum)
2032 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
2033 if (HFSTOVCB(hfsmp)->allocationsRefNum)
2034 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
2035 if (hfsmp->hfs_attribute_vp)
2036 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
2037 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
2038
2039 hfsmp->hfs_freezing_proc = current_proc();
2040
2041 return (0);
2042 }
2043
2044 case F_THAW_FS: {
2045 vfsp = vfs_statfs(vnode_mount(vp));
2046 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2047 !kauth_cred_issuser(cred))
2048 return (EACCES);
2049
2050 // if we're not the one who froze the fs then we
2051 // can't thaw it.
2052 if (hfsmp->hfs_freezing_proc != current_proc()) {
2053 return EPERM;
2054 }
2055
2056 // NOTE: if you add code here, also go check the
2057 // code that "thaws" the fs in hfs_vnop_close()
2058 //
2059 hfsmp->hfs_freezing_proc = NULL;
2060 hfs_unlock_global (hfsmp);
2061 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
2062
2063 return (0);
2064 }
2065
2066 case HFS_BULKACCESS_FSCTL: {
2067 int size;
2068
2069 if (hfsmp->hfs_flags & HFS_STANDARD) {
2070 return EINVAL;
2071 }
2072
2073 if (is64bit) {
2074 size = sizeof(struct user64_access_t);
2075 } else {
2076 size = sizeof(struct user32_access_t);
2077 }
2078
2079 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2080 }
2081
2082 case HFS_EXT_BULKACCESS_FSCTL: {
2083 int size;
2084
2085 if (hfsmp->hfs_flags & HFS_STANDARD) {
2086 return EINVAL;
2087 }
2088
2089 if (is64bit) {
2090 size = sizeof(struct user64_ext_access_t);
2091 } else {
2092 size = sizeof(struct user32_ext_access_t);
2093 }
2094
2095 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2096 }
2097
2098 case HFS_SET_XATTREXTENTS_STATE: {
2099 int state;
2100
2101 if (ap->a_data == NULL) {
2102 return (EINVAL);
2103 }
2104
2105 state = *(int *)ap->a_data;
2106
2107 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2108 return (EROFS);
2109 }
2110
2111 /* Super-user can enable or disable extent-based extended
2112 * attribute support on a volume
2113 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2114 * are enabled by default, so any change will be transient only
2115 * till the volume is remounted.
2116 */
2117 if (!kauth_cred_issuser(kauth_cred_get())) {
2118 return (EPERM);
2119 }
2120 if (state == 0 || state == 1)
2121 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
2122 else
2123 return (EINVAL);
2124 }
2125
2126 case F_SETSTATICCONTENT: {
2127 int error;
2128 int enable_static = 0;
2129 struct cnode *cp = NULL;
2130 /*
2131 * lock the cnode, decorate the cnode flag, and bail out.
2132 * VFS should have already authenticated the caller for us.
2133 */
2134
2135 if (ap->a_data) {
2136 /*
2137 * Note that even though ap->a_data is of type caddr_t,
2138 * the fcntl layer at the syscall handler will pass in NULL
2139 * or 1 depending on what the argument supplied to the fcntl
2140 * was. So it is in fact correct to check the ap->a_data
2141 * argument for zero or non-zero value when deciding whether or not
2142 * to enable the static bit in the cnode.
2143 */
2144 enable_static = 1;
2145 }
2146 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2147 return EROFS;
2148 }
2149 cp = VTOC(vp);
2150
2151 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2152 if (error == 0) {
2153 if (enable_static) {
2154 cp->c_flag |= C_SSD_STATIC;
2155 }
2156 else {
2157 cp->c_flag &= ~C_SSD_STATIC;
2158 }
2159 hfs_unlock (cp);
2160 }
2161 return error;
2162 }
2163
2164 case F_SET_GREEDY_MODE: {
2165 int error;
2166 int enable_greedy_mode = 0;
2167 struct cnode *cp = NULL;
2168 /*
2169 * lock the cnode, decorate the cnode flag, and bail out.
2170 * VFS should have already authenticated the caller for us.
2171 */
2172
2173 if (ap->a_data) {
2174 /*
2175 * Note that even though ap->a_data is of type caddr_t,
2176 * the fcntl layer at the syscall handler will pass in NULL
2177 * or 1 depending on what the argument supplied to the fcntl
2178 * was. So it is in fact correct to check the ap->a_data
2179 * argument for zero or non-zero value when deciding whether or not
2180 * to enable the greedy mode bit in the cnode.
2181 */
2182 enable_greedy_mode = 1;
2183 }
2184 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2185 return EROFS;
2186 }
2187 cp = VTOC(vp);
2188
2189 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2190 if (error == 0) {
2191 if (enable_greedy_mode) {
2192 cp->c_flag |= C_SSD_GREEDY_MODE;
2193 }
2194 else {
2195 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2196 }
2197 hfs_unlock (cp);
2198 }
2199 return error;
2200 }
2201
2202 case F_MAKECOMPRESSED: {
2203 int error = 0;
2204 uint32_t gen_counter;
2205 struct cnode *cp = NULL;
2206 int reset_decmp = 0;
2207
2208 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2209 return EROFS;
2210 }
2211
2212 /*
2213 * acquire & lock the cnode.
2214 * VFS should have already authenticated the caller for us.
2215 */
2216
2217 if (ap->a_data) {
2218 /*
2219 * Cast the pointer into a uint32_t so we can extract the
2220 * supplied generation counter.
2221 */
2222 gen_counter = *((uint32_t*)ap->a_data);
2223 }
2224 else {
2225 return EINVAL;
2226 }
2227
2228 #if HFS_COMPRESSION
2229 cp = VTOC(vp);
2230 /* Grab truncate lock first; we may truncate the file */
2231 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2232
2233 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2234 if (error) {
2235 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2236 return error;
2237 }
2238
2239 /* Are there any other usecounts/FDs? */
2240 if (vnode_isinuse(vp, 1)) {
2241 hfs_unlock(cp);
2242 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2243 return EBUSY;
2244 }
2245
2246
2247 /* now we have the cnode locked down; Validate arguments */
2248 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2249 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2250 hfs_unlock(cp);
2251 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2252 return EINVAL;
2253 }
2254
2255 if ((hfs_get_gencount (cp)) == gen_counter) {
2256 /*
2257 * OK, the gen_counter matched. Go for it:
2258 * Toggle state bits, truncate file, and suppress mtime update
2259 */
2260 reset_decmp = 1;
2261 cp->c_bsdflags |= UF_COMPRESSED;
2262
2263 error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context);
2264 }
2265 else {
2266 error = ESTALE;
2267 }
2268
2269 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2270 hfs_unlock(cp);
2271
2272 /*
2273 * Reset the decmp state while still holding the truncate lock. We need to
2274 * serialize here against a listxattr on this node which may occur at any
2275 * time.
2276 *
2277 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2278 * that will still potentially require getting the com.apple.decmpfs EA. If the
2279 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2280 * generic(through VFS), and can't pass along any info telling it that we're already
2281 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2282 * and trying to fill in the hfs_file_is_compressed info during the callback
2283 * operation, which will result in deadlock against the b-tree node.
2284 *
2285 * So, to serialize against listxattr (which will grab buf_t meta references on
2286 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2287 * decmpfs payload.
2288 */
2289 if ((reset_decmp) && (error == 0)) {
2290 decmpfs_cnode *dp = VTOCMP (vp);
2291 if (dp != NULL) {
2292 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2293 }
2294
2295 /* Initialize the decmpfs node as needed */
2296 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2297 }
2298
2299 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2300
2301 #endif
2302 return error;
2303 }
2304
2305 case F_SETBACKINGSTORE: {
2306
2307 int error = 0;
2308
2309 /*
2310 * See comment in F_SETSTATICCONTENT re: using
2311 * a null check for a_data
2312 */
2313 if (ap->a_data) {
2314 error = hfs_set_backingstore (vp, 1);
2315 }
2316 else {
2317 error = hfs_set_backingstore (vp, 0);
2318 }
2319
2320 return error;
2321 }
2322
2323 case F_GETPATH_MTMINFO: {
2324 int error = 0;
2325
2326 int *data = (int*) ap->a_data;
2327
2328 /* Ask if this is a backingstore vnode */
2329 error = hfs_is_backingstore (vp, data);
2330
2331 return error;
2332 }
2333
2334 case F_FULLFSYNC: {
2335 int error;
2336
2337 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2338 return (EROFS);
2339 }
2340 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2341 if (error == 0) {
2342 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
2343 hfs_unlock(VTOC(vp));
2344 }
2345
2346 return error;
2347 }
2348
2349 case F_CHKCLEAN: {
2350 register struct cnode *cp;
2351 int error;
2352
2353 if (!vnode_isreg(vp))
2354 return EINVAL;
2355
2356 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2357 if (error == 0) {
2358 cp = VTOC(vp);
2359 /*
2360 * used by regression test to determine if
2361 * all the dirty pages (via write) have been cleaned
2362 * after a call to 'fsysnc'.
2363 */
2364 error = is_file_clean(vp, VTOF(vp)->ff_size);
2365 hfs_unlock(cp);
2366 }
2367 return (error);
2368 }
2369
2370 case F_RDADVISE: {
2371 register struct radvisory *ra;
2372 struct filefork *fp;
2373 int error;
2374
2375 if (!vnode_isreg(vp))
2376 return EINVAL;
2377
2378 ra = (struct radvisory *)(ap->a_data);
2379 fp = VTOF(vp);
2380
2381 /* Protect against a size change. */
2382 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2383
2384 #if HFS_COMPRESSION
2385 if (compressed && (uncompressed_size == -1)) {
2386 /* fetching the uncompressed size failed above, so return the error */
2387 error = decmpfs_error;
2388 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
2389 (!compressed && (ra->ra_offset >= fp->ff_size))) {
2390 error = EFBIG;
2391 }
2392 #else /* HFS_COMPRESSION */
2393 if (ra->ra_offset >= fp->ff_size) {
2394 error = EFBIG;
2395 }
2396 #endif /* HFS_COMPRESSION */
2397 else {
2398 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
2399 }
2400
2401 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
2402 return (error);
2403 }
2404
2405 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
2406 {
2407 if (is64bit) {
2408 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2409 }
2410 else {
2411 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2412 }
2413 return 0;
2414 }
2415
2416 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2417 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2418 break;
2419
2420 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2421 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2422 break;
2423
2424 case HFS_FSCTL_GET_VERY_LOW_DISK:
2425 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2426 break;
2427
2428 case HFS_FSCTL_SET_VERY_LOW_DISK:
2429 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2430 return EINVAL;
2431 }
2432
2433 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2434 break;
2435
2436 case HFS_FSCTL_GET_LOW_DISK:
2437 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2438 break;
2439
2440 case HFS_FSCTL_SET_LOW_DISK:
2441 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2442 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2443
2444 return EINVAL;
2445 }
2446
2447 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2448 break;
2449
2450 case HFS_FSCTL_GET_DESIRED_DISK:
2451 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2452 break;
2453
2454 case HFS_FSCTL_SET_DESIRED_DISK:
2455 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2456 return EINVAL;
2457 }
2458
2459 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2460 break;
2461
2462 case HFS_VOLUME_STATUS:
2463 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2464 break;
2465
2466 case HFS_SET_BOOT_INFO:
2467 if (!vnode_isvroot(vp))
2468 return(EINVAL);
2469 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2470 return(EACCES); /* must be superuser or owner of filesystem */
2471 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2472 return (EROFS);
2473 }
2474 hfs_lock_mount (hfsmp);
2475 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2476 hfs_unlock_mount (hfsmp);
2477 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2478 break;
2479
2480 case HFS_GET_BOOT_INFO:
2481 if (!vnode_isvroot(vp))
2482 return(EINVAL);
2483 hfs_lock_mount (hfsmp);
2484 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2485 hfs_unlock_mount(hfsmp);
2486 break;
2487
2488 case HFS_MARK_BOOT_CORRUPT:
2489 /* Mark the boot volume corrupt by setting
2490 * kHFSVolumeInconsistentBit in the volume header. This will
2491 * force fsck_hfs on next mount.
2492 */
2493 if (!kauth_cred_issuser(kauth_cred_get())) {
2494 return EACCES;
2495 }
2496
2497 /* Allowed only on the root vnode of the boot volume */
2498 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2499 !vnode_isvroot(vp)) {
2500 return EINVAL;
2501 }
2502 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2503 return (EROFS);
2504 }
2505 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2506 hfs_mark_volume_inconsistent(hfsmp);
2507 break;
2508
2509 case HFS_FSCTL_GET_JOURNAL_INFO:
2510 jip = (struct hfs_journal_info*)ap->a_data;
2511
2512 if (vp == NULLVP)
2513 return EINVAL;
2514
2515 if (hfsmp->jnl == NULL) {
2516 jnl_start = 0;
2517 jnl_size = 0;
2518 } else {
2519 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2520 jnl_size = (off_t)hfsmp->jnl_size;
2521 }
2522
2523 jip->jstart = jnl_start;
2524 jip->jsize = jnl_size;
2525 break;
2526
2527 case HFS_SET_ALWAYS_ZEROFILL: {
2528 struct cnode *cp = VTOC(vp);
2529
2530 if (*(int *)ap->a_data) {
2531 cp->c_flag |= C_ALWAYS_ZEROFILL;
2532 } else {
2533 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2534 }
2535 break;
2536 }
2537
2538 case HFS_DISABLE_METAZONE: {
2539 /* Only root can disable metadata zone */
2540 if (!kauth_cred_issuser(kauth_cred_get())) {
2541 return EACCES;
2542 }
2543 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2544 return (EROFS);
2545 }
2546
2547 /* Disable metadata zone now */
2548 (void) hfs_metadatazone_init(hfsmp, true);
2549 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2550 break;
2551 }
2552
2553 default:
2554 return (ENOTTY);
2555 }
2556
2557 return 0;
2558 }
2559
2560 /*
2561 * select
2562 */
2563 int
2564 hfs_vnop_select(__unused struct vnop_select_args *ap)
2565 /*
2566 struct vnop_select_args {
2567 vnode_t a_vp;
2568 int a_which;
2569 int a_fflags;
2570 void *a_wql;
2571 vfs_context_t a_context;
2572 };
2573 */
2574 {
2575 /*
2576 * We should really check to see if I/O is possible.
2577 */
2578 return (1);
2579 }
2580
2581 /*
2582 * Converts a logical block number to a physical block, and optionally returns
2583 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2584 * The physical block number is based on the device block size, currently its 512.
2585 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2586 */
2587 int
2588 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
2589 {
2590 struct filefork *fp = VTOF(vp);
2591 struct hfsmount *hfsmp = VTOHFS(vp);
2592 int retval = E_NONE;
2593 u_int32_t logBlockSize;
2594 size_t bytesContAvail = 0;
2595 off_t blockposition;
2596 int lockExtBtree;
2597 int lockflags = 0;
2598
2599 /*
2600 * Check for underlying vnode requests and ensure that logical
2601 * to physical mapping is requested.
2602 */
2603 if (vpp != NULL)
2604 *vpp = hfsmp->hfs_devvp;
2605 if (bnp == NULL)
2606 return (0);
2607
2608 logBlockSize = GetLogicalBlockSize(vp);
2609 blockposition = (off_t)bn * logBlockSize;
2610
2611 lockExtBtree = overflow_extents(fp);
2612
2613 if (lockExtBtree)
2614 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2615
2616 retval = MacToVFSError(
2617 MapFileBlockC (HFSTOVCB(hfsmp),
2618 (FCB*)fp,
2619 MAXPHYSIO,
2620 blockposition,
2621 bnp,
2622 &bytesContAvail));
2623
2624 if (lockExtBtree)
2625 hfs_systemfile_unlock(hfsmp, lockflags);
2626
2627 if (retval == E_NONE) {
2628 /* Figure out how many read ahead blocks there are */
2629 if (runp != NULL) {
2630 if (can_cluster(logBlockSize)) {
2631 /* Make sure this result never goes negative: */
2632 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2633 } else {
2634 *runp = 0;
2635 }
2636 }
2637 }
2638 return (retval);
2639 }
2640
2641 /*
2642 * Convert logical block number to file offset.
2643 */
2644 int
2645 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2646 /*
2647 struct vnop_blktooff_args {
2648 vnode_t a_vp;
2649 daddr64_t a_lblkno;
2650 off_t *a_offset;
2651 };
2652 */
2653 {
2654 if (ap->a_vp == NULL)
2655 return (EINVAL);
2656 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
2657
2658 return(0);
2659 }
2660
2661 /*
2662 * Convert file offset to logical block number.
2663 */
2664 int
2665 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2666 /*
2667 struct vnop_offtoblk_args {
2668 vnode_t a_vp;
2669 off_t a_offset;
2670 daddr64_t *a_lblkno;
2671 };
2672 */
2673 {
2674 if (ap->a_vp == NULL)
2675 return (EINVAL);
2676 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
2677
2678 return(0);
2679 }
2680
2681 /*
2682 * Map file offset to physical block number.
2683 *
2684 * If this function is called for write operation, and if the file
2685 * had virtual blocks allocated (delayed allocation), real blocks
2686 * are allocated by calling ExtendFileC().
2687 *
2688 * If this function is called for read operation, and if the file
2689 * had virtual blocks allocated (delayed allocation), no change
2690 * to the size of file is done, and if required, rangelist is
2691 * searched for mapping.
2692 *
2693 * System file cnodes are expected to be locked (shared or exclusive).
2694 */
2695 int
2696 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2697 /*
2698 struct vnop_blockmap_args {
2699 vnode_t a_vp;
2700 off_t a_foffset;
2701 size_t a_size;
2702 daddr64_t *a_bpn;
2703 size_t *a_run;
2704 void *a_poff;
2705 int a_flags;
2706 vfs_context_t a_context;
2707 };
2708 */
2709 {
2710 struct vnode *vp = ap->a_vp;
2711 struct cnode *cp;
2712 struct filefork *fp;
2713 struct hfsmount *hfsmp;
2714 size_t bytesContAvail = 0;
2715 int retval = E_NONE;
2716 int syslocks = 0;
2717 int lockflags = 0;
2718 struct rl_entry *invalid_range;
2719 enum rl_overlaptype overlaptype;
2720 int started_tr = 0;
2721 int tooklock = 0;
2722
2723 #if HFS_COMPRESSION
2724 if (VNODE_IS_RSRC(vp)) {
2725 /* allow blockmaps to the resource fork */
2726 } else {
2727 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2728 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2729 switch(state) {
2730 case FILE_IS_COMPRESSED:
2731 return ENOTSUP;
2732 case FILE_IS_CONVERTING:
2733 /* if FILE_IS_CONVERTING, we allow blockmap */
2734 break;
2735 default:
2736 printf("invalid state %d for compressed file\n", state);
2737 /* fall through */
2738 }
2739 }
2740 }
2741 #endif /* HFS_COMPRESSION */
2742
2743 /* Do not allow blockmap operation on a directory */
2744 if (vnode_isdir(vp)) {
2745 return (ENOTSUP);
2746 }
2747
2748 /*
2749 * Check for underlying vnode requests and ensure that logical
2750 * to physical mapping is requested.
2751 */
2752 if (ap->a_bpn == NULL)
2753 return (0);
2754
2755 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
2756 if (VTOC(vp)->c_lockowner != current_thread()) {
2757 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
2758 tooklock = 1;
2759 }
2760 }
2761 hfsmp = VTOHFS(vp);
2762 cp = VTOC(vp);
2763 fp = VTOF(vp);
2764
2765 retry:
2766 /* Check virtual blocks only when performing write operation */
2767 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2768 if (hfs_start_transaction(hfsmp) != 0) {
2769 retval = EINVAL;
2770 goto exit;
2771 } else {
2772 started_tr = 1;
2773 }
2774 syslocks = SFL_EXTENTS | SFL_BITMAP;
2775
2776 } else if (overflow_extents(fp)) {
2777 syslocks = SFL_EXTENTS;
2778 }
2779
2780 if (syslocks)
2781 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
2782
2783 /*
2784 * Check for any delayed allocations.
2785 */
2786 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2787 int64_t actbytes;
2788 u_int32_t loanedBlocks;
2789
2790 //
2791 // Make sure we have a transaction. It's possible
2792 // that we came in and fp->ff_unallocblocks was zero
2793 // but during the time we blocked acquiring the extents
2794 // btree, ff_unallocblocks became non-zero and so we
2795 // will need to start a transaction.
2796 //
2797 if (started_tr == 0) {
2798 if (syslocks) {
2799 hfs_systemfile_unlock(hfsmp, lockflags);
2800 syslocks = 0;
2801 }
2802 goto retry;
2803 }
2804
2805 /*
2806 * Note: ExtendFileC will Release any blocks on loan and
2807 * aquire real blocks. So we ask to extend by zero bytes
2808 * since ExtendFileC will account for the virtual blocks.
2809 */
2810
2811 loanedBlocks = fp->ff_unallocblocks;
2812 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2813 kEFAllMask | kEFNoClumpMask, &actbytes);
2814
2815 if (retval) {
2816 fp->ff_unallocblocks = loanedBlocks;
2817 cp->c_blocks += loanedBlocks;
2818 fp->ff_blocks += loanedBlocks;
2819
2820 hfs_lock_mount (hfsmp);
2821 hfsmp->loanedBlocks += loanedBlocks;
2822 hfs_unlock_mount (hfsmp);
2823
2824 hfs_systemfile_unlock(hfsmp, lockflags);
2825 cp->c_flag |= C_MODIFIED;
2826 if (started_tr) {
2827 (void) hfs_update(vp, TRUE);
2828 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2829
2830 hfs_end_transaction(hfsmp);
2831 started_tr = 0;
2832 }
2833 goto exit;
2834 }
2835 }
2836
2837 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2838 ap->a_bpn, &bytesContAvail);
2839 if (syslocks) {
2840 hfs_systemfile_unlock(hfsmp, lockflags);
2841 syslocks = 0;
2842 }
2843
2844 if (started_tr) {
2845 (void) hfs_update(vp, TRUE);
2846 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2847 hfs_end_transaction(hfsmp);
2848 started_tr = 0;
2849 }
2850 if (retval) {
2851 /* On write, always return error because virtual blocks, if any,
2852 * should have been allocated in ExtendFileC(). We do not
2853 * allocate virtual blocks on read, therefore return error
2854 * only if no virtual blocks are allocated. Otherwise we search
2855 * rangelist for zero-fills
2856 */
2857 if ((MacToVFSError(retval) != ERANGE) ||
2858 (ap->a_flags & VNODE_WRITE) ||
2859 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2860 goto exit;
2861 }
2862
2863 /* Validate if the start offset is within logical file size */
2864 if (ap->a_foffset >= fp->ff_size) {
2865 goto exit;
2866 }
2867
2868 /*
2869 * At this point, we have encountered a failure during
2870 * MapFileBlockC that resulted in ERANGE, and we are not servicing
2871 * a write, and there are borrowed blocks.
2872 *
2873 * However, the cluster layer will not call blockmap for
2874 * blocks that are borrowed and in-cache. We have to assume that
2875 * because we observed ERANGE being emitted from MapFileBlockC, this
2876 * extent range is not valid on-disk. So we treat this as a
2877 * mapping that needs to be zero-filled prior to reading.
2878 *
2879 * Note that under certain circumstances (such as non-contiguous
2880 * userland VM mappings in the calling process), cluster_io
2881 * may be forced to split a large I/O driven by hfs_vnop_write
2882 * into multiple sub-I/Os that necessitate a RMW cycle. If this is
2883 * the case here, then we have already removed the invalid range list
2884 * mapping prior to getting to this blockmap call, so we should not
2885 * search the invalid rangelist for this byte range.
2886 */
2887
2888 bytesContAvail = fp->ff_size - ap->a_foffset;
2889 /*
2890 * Clip the contiguous available bytes to, at most, the allowable
2891 * maximum or the amount requested.
2892 */
2893
2894 if (bytesContAvail > ap->a_size) {
2895 bytesContAvail = ap->a_size;
2896 }
2897
2898 *ap->a_bpn = (daddr64_t) -1;
2899 retval = 0;
2900
2901 goto exit;
2902 }
2903
2904 /* MapFileC() found a valid extent in the filefork. Search the
2905 * mapping information further for invalid file ranges
2906 */
2907 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2908 ap->a_foffset + (off_t)bytesContAvail - 1,
2909 &invalid_range);
2910 if (overlaptype != RL_NOOVERLAP) {
2911 switch(overlaptype) {
2912 case RL_MATCHINGOVERLAP:
2913 case RL_OVERLAPCONTAINSRANGE:
2914 case RL_OVERLAPSTARTSBEFORE:
2915 /* There's no valid block for this byte offset */
2916 *ap->a_bpn = (daddr64_t)-1;
2917 /* There's no point limiting the amount to be returned
2918 * if the invalid range that was hit extends all the way
2919 * to the EOF (i.e. there's no valid bytes between the
2920 * end of this range and the file's EOF):
2921 */
2922 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2923 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
2924 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2925 }
2926 break;
2927
2928 case RL_OVERLAPISCONTAINED:
2929 case RL_OVERLAPENDSAFTER:
2930 /* The range of interest hits an invalid block before the end: */
2931 if (invalid_range->rl_start == ap->a_foffset) {
2932 /* There's actually no valid information to be had starting here: */
2933 *ap->a_bpn = (daddr64_t)-1;
2934 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2935 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
2936 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2937 }
2938 } else {
2939 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2940 }
2941 break;
2942
2943 case RL_NOOVERLAP:
2944 break;
2945 } /* end switch */
2946 if (bytesContAvail > ap->a_size)
2947 bytesContAvail = ap->a_size;
2948 }
2949
2950 exit:
2951 if (retval == 0) {
2952 if (ap->a_run)
2953 *ap->a_run = bytesContAvail;
2954
2955 if (ap->a_poff)
2956 *(int *)ap->a_poff = 0;
2957 }
2958
2959 if (tooklock)
2960 hfs_unlock(cp);
2961
2962 return (MacToVFSError(retval));
2963 }
2964
2965 /*
2966 * prepare and issue the I/O
2967 * buf_strategy knows how to deal
2968 * with requests that require
2969 * fragmented I/Os
2970 */
2971 int
2972 hfs_vnop_strategy(struct vnop_strategy_args *ap)
2973 {
2974 buf_t bp = ap->a_bp;
2975 vnode_t vp = buf_vnode(bp);
2976 int error = 0;
2977
2978 /* Mark buffer as containing static data if cnode flag set */
2979 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
2980 buf_markstatic(bp);
2981 }
2982
2983 /* Mark buffer as containing static data if cnode flag set */
2984 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
2985 bufattr_markgreedymode((bufattr_t)(&bp->b_attr));
2986 }
2987
2988 #if CONFIG_PROTECT
2989 cnode_t *cp = NULL;
2990
2991 if ((cp = cp_get_protected_cnode(vp)) != NULL) {
2992 /*
2993 * We rely upon the truncate lock to protect the
2994 * CP cache key from getting tossed prior to our IO finishing here.
2995 * Nearly all cluster io calls to manipulate file payload from HFS
2996 * take the truncate lock before calling into the cluster
2997 * layer to ensure the file size does not change, or that they
2998 * have exclusive right to change the EOF of the file.
2999 * That same guarantee protects us here since the code that
3000 * deals with CP lock events must now take the truncate lock
3001 * before doing anything.
3002 *
3003 * There is 1 exception here:
3004 * 1) One exception should be the VM swapfile IO, because HFS will
3005 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
3006 * swapfile code only without holding the truncate lock. This is because
3007 * individual swapfiles are maintained at fixed-length sizes by the VM code.
3008 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
3009 * create our own UPL and thus take the truncate lock before calling
3010 * into the cluster layer. In that case, however, we are not concerned
3011 * with the CP blob being wiped out in the middle of the IO
3012 * because there isn't anything to toss; the VM swapfile key stays
3013 * in-core as long as the file is open.
3014 *
3015 * NB:
3016 * For filesystem resize, we may not have access to the underlying
3017 * file's cache key for whatever reason (device may be locked). However,
3018 * we do not need it since we are going to use the temporary HFS-wide resize key
3019 * which is generated once we start relocating file content. If this file's I/O
3020 * should be done using the resize key, it will have been supplied already, so
3021 * do not attach the file's cp blob to the buffer.
3022 */
3023 if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
3024 buf_setcpaddr(bp, cp->c_cpentry);
3025 }
3026 }
3027 #endif /* CONFIG_PROTECT */
3028
3029 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
3030
3031 return error;
3032 }
3033
3034 static int
3035 hfs_minorupdate(struct vnode *vp) {
3036 struct cnode *cp = VTOC(vp);
3037 cp->c_flag &= ~C_MODIFIED;
3038 cp->c_touch_acctime = 0;
3039 cp->c_touch_chgtime = 0;
3040 cp->c_touch_modtime = 0;
3041
3042 return 0;
3043 }
3044
3045 int
3046 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
3047 {
3048 register struct cnode *cp = VTOC(vp);
3049 struct filefork *fp = VTOF(vp);
3050 struct proc *p = vfs_context_proc(context);;
3051 kauth_cred_t cred = vfs_context_ucred(context);
3052 int retval;
3053 off_t bytesToAdd;
3054 off_t actualBytesAdded;
3055 off_t filebytes;
3056 u_int32_t fileblocks;
3057 int blksize;
3058 struct hfsmount *hfsmp;
3059 int lockflags;
3060 int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE);
3061 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
3062
3063 blksize = VTOVCB(vp)->blockSize;
3064 fileblocks = fp->ff_blocks;
3065 filebytes = (off_t)fileblocks * (off_t)blksize;
3066
3067 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
3068 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3069
3070 if (length < 0)
3071 return (EINVAL);
3072
3073 /* This should only happen with a corrupt filesystem */
3074 if ((off_t)fp->ff_size < 0)
3075 return (EINVAL);
3076
3077 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3078 return (EFBIG);
3079
3080 hfsmp = VTOHFS(vp);
3081
3082 retval = E_NONE;
3083
3084 /* Files that are changing size are not hot file candidates. */
3085 if (hfsmp->hfc_stage == HFC_RECORDING) {
3086 fp->ff_bytesread = 0;
3087 }
3088
3089 /*
3090 * We cannot just check if fp->ff_size == length (as an optimization)
3091 * since there may be extra physical blocks that also need truncation.
3092 */
3093 #if QUOTA
3094 if ((retval = hfs_getinoquota(cp)))
3095 return(retval);
3096 #endif /* QUOTA */
3097
3098 /*
3099 * Lengthen the size of the file. We must ensure that the
3100 * last byte of the file is allocated. Since the smallest
3101 * value of ff_size is 0, length will be at least 1.
3102 */
3103 if (length > (off_t)fp->ff_size) {
3104 #if QUOTA
3105 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
3106 cred, 0);
3107 if (retval)
3108 goto Err_Exit;
3109 #endif /* QUOTA */
3110 /*
3111 * If we don't have enough physical space then
3112 * we need to extend the physical size.
3113 */
3114 if (length > filebytes) {
3115 int eflags;
3116 u_int32_t blockHint = 0;
3117
3118 /* All or nothing and don't round up to clumpsize. */
3119 eflags = kEFAllMask | kEFNoClumpMask;
3120
3121 if (cred && suser(cred, NULL) != 0)
3122 eflags |= kEFReserveMask; /* keep a reserve */
3123
3124 /*
3125 * Allocate Journal and Quota files in metadata zone.
3126 */
3127 if (filebytes == 0 &&
3128 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3129 hfs_virtualmetafile(cp)) {
3130 eflags |= kEFMetadataMask;
3131 blockHint = hfsmp->hfs_metazone_start;
3132 }
3133 if (hfs_start_transaction(hfsmp) != 0) {
3134 retval = EINVAL;
3135 goto Err_Exit;
3136 }
3137
3138 /* Protect extents b-tree and allocation bitmap */
3139 lockflags = SFL_BITMAP;
3140 if (overflow_extents(fp))
3141 lockflags |= SFL_EXTENTS;
3142 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3143
3144 while ((length > filebytes) && (retval == E_NONE)) {
3145 bytesToAdd = length - filebytes;
3146 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3147 (FCB*)fp,
3148 bytesToAdd,
3149 blockHint,
3150 eflags,
3151 &actualBytesAdded));
3152
3153 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3154 if (actualBytesAdded == 0 && retval == E_NONE) {
3155 if (length > filebytes)
3156 length = filebytes;
3157 break;
3158 }
3159 } /* endwhile */
3160
3161 hfs_systemfile_unlock(hfsmp, lockflags);
3162
3163 if (hfsmp->jnl) {
3164 if (skipupdate) {
3165 (void) hfs_minorupdate(vp);
3166 }
3167 else {
3168 (void) hfs_update(vp, TRUE);
3169 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3170 }
3171 }
3172
3173 hfs_end_transaction(hfsmp);
3174
3175 if (retval)
3176 goto Err_Exit;
3177
3178 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
3179 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3180 }
3181
3182 if (!(flags & IO_NOZEROFILL)) {
3183 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
3184 struct rl_entry *invalid_range;
3185 off_t zero_limit;
3186
3187 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
3188 if (length < zero_limit) zero_limit = length;
3189
3190 if (length > (off_t)fp->ff_size) {
3191 struct timeval tv;
3192
3193 /* Extending the file: time to fill out the current last page w. zeroes? */
3194 if ((fp->ff_size & PAGE_MASK_64) &&
3195 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
3196 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
3197
3198 /* There's some valid data at the start of the (current) last page
3199 of the file, so zero out the remainder of that page to ensure the
3200 entire page contains valid data. Since there is no invalid range
3201 possible past the (current) eof, there's no need to remove anything
3202 from the invalid range list before calling cluster_write(): */
3203 hfs_unlock(cp);
3204 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
3205 fp->ff_size, (off_t)0,
3206 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
3207 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3208 if (retval) goto Err_Exit;
3209
3210 /* Merely invalidate the remaining area, if necessary: */
3211 if (length > zero_limit) {
3212 microuptime(&tv);
3213 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
3214 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3215 }
3216 } else {
3217 /* The page containing the (current) eof is invalid: just add the
3218 remainder of the page to the invalid list, along with the area
3219 being newly allocated:
3220 */
3221 microuptime(&tv);
3222 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
3223 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3224 };
3225 }
3226 } else {
3227 panic("hfs_truncate: invoked on non-UBC object?!");
3228 };
3229 }
3230 if (suppress_times == 0) {
3231 cp->c_touch_modtime = TRUE;
3232 }
3233 fp->ff_size = length;
3234
3235 } else { /* Shorten the size of the file */
3236
3237 if ((off_t)fp->ff_size > length) {
3238 /* Any space previously marked as invalid is now irrelevant: */
3239 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3240 }
3241
3242 /*
3243 * Account for any unmapped blocks. Note that the new
3244 * file length can still end up with unmapped blocks.
3245 */
3246 if (fp->ff_unallocblocks > 0) {
3247 u_int32_t finalblks;
3248 u_int32_t loanedBlocks;
3249
3250 hfs_lock_mount(hfsmp);
3251 loanedBlocks = fp->ff_unallocblocks;
3252 cp->c_blocks -= loanedBlocks;
3253 fp->ff_blocks -= loanedBlocks;
3254 fp->ff_unallocblocks = 0;
3255
3256 hfsmp->loanedBlocks -= loanedBlocks;
3257
3258 finalblks = (length + blksize - 1) / blksize;
3259 if (finalblks > fp->ff_blocks) {
3260 /* calculate required unmapped blocks */
3261 loanedBlocks = finalblks - fp->ff_blocks;
3262 hfsmp->loanedBlocks += loanedBlocks;
3263
3264 fp->ff_unallocblocks = loanedBlocks;
3265 cp->c_blocks += loanedBlocks;
3266 fp->ff_blocks += loanedBlocks;
3267 }
3268 hfs_unlock_mount (hfsmp);
3269 }
3270
3271 /*
3272 * For a TBE process the deallocation of the file blocks is
3273 * delayed until the file is closed. And hfs_close calls
3274 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
3275 * isn't set, we make sure this isn't a TBE process.
3276 */
3277 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
3278 #if QUOTA
3279 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3280 #endif /* QUOTA */
3281 if (hfs_start_transaction(hfsmp) != 0) {
3282 retval = EINVAL;
3283 goto Err_Exit;
3284 }
3285
3286 if (fp->ff_unallocblocks == 0) {
3287 /* Protect extents b-tree and allocation bitmap */
3288 lockflags = SFL_BITMAP;
3289 if (overflow_extents(fp))
3290 lockflags |= SFL_EXTENTS;
3291 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3292
3293 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3294 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
3295
3296 hfs_systemfile_unlock(hfsmp, lockflags);
3297 }
3298 if (hfsmp->jnl) {
3299 if (retval == 0) {
3300 fp->ff_size = length;
3301 }
3302 if (skipupdate) {
3303 (void) hfs_minorupdate(vp);
3304 }
3305 else {
3306 (void) hfs_update(vp, TRUE);
3307 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3308 }
3309 }
3310 hfs_end_transaction(hfsmp);
3311
3312 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3313 if (retval)
3314 goto Err_Exit;
3315 #if QUOTA
3316 /* These are bytesreleased */
3317 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3318 #endif /* QUOTA */
3319 }
3320 /*
3321 * Only set update flag if the logical length changes & we aren't
3322 * suppressing modtime updates.
3323 */
3324 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
3325 cp->c_touch_modtime = TRUE;
3326 }
3327 fp->ff_size = length;
3328 }
3329 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3330 if (!vfs_context_issuser(context)) {
3331 cp->c_mode &= ~(S_ISUID | S_ISGID);
3332 skipupdate = 0;
3333 }
3334 }
3335 if (skipupdate) {
3336 retval = hfs_minorupdate(vp);
3337 }
3338 else {
3339 cp->c_touch_chgtime = TRUE; /* status changed */
3340 if (suppress_times == 0) {
3341 cp->c_touch_modtime = TRUE; /* file data was modified */
3342
3343 /*
3344 * If we are not suppressing the modtime update, then
3345 * update the gen count as well.
3346 */
3347 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3348 hfs_incr_gencount(cp);
3349 }
3350 }
3351
3352 retval = hfs_update(vp, MNT_WAIT);
3353 }
3354 if (retval) {
3355 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
3356 -1, -1, -1, retval, 0);
3357 }
3358
3359 Err_Exit:
3360
3361 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
3362 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
3363
3364 return (retval);
3365 }
3366
3367 /*
3368 * Preparation which must be done prior to deleting the catalog record
3369 * of a file or directory. In order to make the on-disk as safe as possible,
3370 * we remove the catalog entry before releasing the bitmap blocks and the
3371 * overflow extent records. However, some work must be done prior to deleting
3372 * the catalog record.
3373 *
3374 * When calling this function, the cnode must exist both in memory and on-disk.
3375 * If there are both resource fork and data fork vnodes, this function should
3376 * be called on both.
3377 */
3378
3379 int
3380 hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3381
3382 struct filefork *fp = VTOF(vp);
3383 struct cnode *cp = VTOC(vp);
3384 #if QUOTA
3385 int retval = 0;
3386 #endif /* QUOTA */
3387
3388 /* Cannot truncate an HFS directory! */
3389 if (vnode_isdir(vp)) {
3390 return (EISDIR);
3391 }
3392
3393 /*
3394 * See the comment below in hfs_truncate for why we need to call
3395 * setsize here. Essentially we want to avoid pending IO if we
3396 * already know that the blocks are going to be released here.
3397 * This function is only called when totally removing all storage for a file, so
3398 * we can take a shortcut and immediately setsize (0);
3399 */
3400 ubc_setsize(vp, 0);
3401
3402 /* This should only happen with a corrupt filesystem */
3403 if ((off_t)fp->ff_size < 0)
3404 return (EINVAL);
3405
3406 /*
3407 * We cannot just check if fp->ff_size == length (as an optimization)
3408 * since there may be extra physical blocks that also need truncation.
3409 */
3410 #if QUOTA
3411 if ((retval = hfs_getinoquota(cp))) {
3412 return(retval);
3413 }
3414 #endif /* QUOTA */
3415
3416 /* Wipe out any invalid ranges which have yet to be backed by disk */
3417 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3418
3419 /*
3420 * Account for any unmapped blocks. Since we're deleting the
3421 * entire file, we don't have to worry about just shrinking
3422 * to a smaller number of borrowed blocks.
3423 */
3424 if (fp->ff_unallocblocks > 0) {
3425 u_int32_t loanedBlocks;
3426
3427 hfs_lock_mount (hfsmp);
3428 loanedBlocks = fp->ff_unallocblocks;
3429 cp->c_blocks -= loanedBlocks;
3430 fp->ff_blocks -= loanedBlocks;
3431 fp->ff_unallocblocks = 0;
3432
3433 hfsmp->loanedBlocks -= loanedBlocks;
3434
3435 hfs_unlock_mount (hfsmp);
3436 }
3437
3438 return 0;
3439 }
3440
3441
3442 /*
3443 * Special wrapper around calling TruncateFileC. This function is useable
3444 * even when the catalog record does not exist any longer, making it ideal
3445 * for use when deleting a file. The simplification here is that we know
3446 * that we are releasing all blocks.
3447 *
3448 * Note that this function may be called when there is no vnode backing
3449 * the file fork in question. We may call this from hfs_vnop_inactive
3450 * to clear out resource fork data (and may not want to clear out the data
3451 * fork yet). As a result, we pointer-check both sets of inputs before
3452 * doing anything with them.
3453 *
3454 * The caller is responsible for saving off a copy of the filefork(s)
3455 * embedded within the cnode prior to calling this function. The pointers
3456 * supplied as arguments must be valid even if the cnode is no longer valid.
3457 */
3458
3459 int
3460 hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3461 struct filefork *rsrcfork, u_int32_t fileid) {
3462
3463 off_t filebytes;
3464 u_int32_t fileblocks;
3465 int blksize = 0;
3466 int error = 0;
3467 int lockflags;
3468
3469 blksize = hfsmp->blockSize;
3470
3471 /* Data Fork */
3472 if ((datafork != NULL) && (datafork->ff_blocks > 0)) {
3473 fileblocks = datafork->ff_blocks;
3474 filebytes = (off_t)fileblocks * (off_t)blksize;
3475
3476 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3477
3478 while (filebytes > 0) {
3479 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
3480 filebytes -= HFS_BIGFILE_SIZE;
3481 } else {
3482 filebytes = 0;
3483 }
3484
3485 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3486 if (hfs_start_transaction(hfsmp) != 0) {
3487 error = EINVAL;
3488 break;
3489 }
3490
3491 if (datafork->ff_unallocblocks == 0) {
3492 /* Protect extents b-tree and allocation bitmap */
3493 lockflags = SFL_BITMAP;
3494 if (overflow_extents(datafork))
3495 lockflags |= SFL_EXTENTS;
3496 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3497
3498 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
3499
3500 hfs_systemfile_unlock(hfsmp, lockflags);
3501 }
3502 if (error == 0) {
3503 datafork->ff_size = filebytes;
3504 }
3505 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3506
3507 /* Finish the transaction and start over if necessary */
3508 hfs_end_transaction(hfsmp);
3509
3510 if (error) {
3511 break;
3512 }
3513 }
3514 }
3515
3516 /* Resource fork */
3517 if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
3518 fileblocks = rsrcfork->ff_blocks;
3519 filebytes = (off_t)fileblocks * (off_t)blksize;
3520
3521 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3522
3523 while (filebytes > 0) {
3524 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
3525 filebytes -= HFS_BIGFILE_SIZE;
3526 } else {
3527 filebytes = 0;
3528 }
3529
3530 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3531 if (hfs_start_transaction(hfsmp) != 0) {
3532 error = EINVAL;
3533 break;
3534 }
3535
3536 if (rsrcfork->ff_unallocblocks == 0) {
3537 /* Protect extents b-tree and allocation bitmap */
3538 lockflags = SFL_BITMAP;
3539 if (overflow_extents(rsrcfork))
3540 lockflags |= SFL_EXTENTS;
3541 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3542
3543 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
3544
3545 hfs_systemfile_unlock(hfsmp, lockflags);
3546 }
3547 if (error == 0) {
3548 rsrcfork->ff_size = filebytes;
3549 }
3550 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3551
3552 /* Finish the transaction and start over if necessary */
3553 hfs_end_transaction(hfsmp);
3554
3555 if (error) {
3556 break;
3557 }
3558 }
3559 }
3560
3561 return error;
3562 }
3563
3564
3565 /*
3566 * Truncate a cnode to at most length size, freeing (or adding) the
3567 * disk blocks.
3568 */
3569 int
3570 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
3571 int truncateflags, vfs_context_t context)
3572 {
3573 struct filefork *fp = VTOF(vp);
3574 off_t filebytes;
3575 u_int32_t fileblocks;
3576 int blksize, error = 0;
3577 struct cnode *cp = VTOC(vp);
3578
3579 /* Cannot truncate an HFS directory! */
3580 if (vnode_isdir(vp)) {
3581 return (EISDIR);
3582 }
3583 /* A swap file cannot change size. */
3584 if (vnode_isswap(vp) && (length != 0)) {
3585 return (EPERM);
3586 }
3587
3588 blksize = VTOVCB(vp)->blockSize;
3589 fileblocks = fp->ff_blocks;
3590 filebytes = (off_t)fileblocks * (off_t)blksize;
3591
3592 //
3593 // Have to do this here so that we don't wind up with
3594 // i/o pending for blocks that are about to be released
3595 // if we truncate the file.
3596 //
3597 // If skipsetsize is set, then the caller is responsible
3598 // for the ubc_setsize.
3599 //
3600 // Even if skipsetsize is set, if the length is zero we
3601 // want to call ubc_setsize() because as of SnowLeopard
3602 // it will no longer cause any page-ins and it will drop
3603 // any dirty pages so that we don't do any i/o that we
3604 // don't have to. This also prevents a race where i/o
3605 // for truncated blocks may overwrite later data if the
3606 // blocks get reallocated to a different file.
3607 //
3608 if (!skipsetsize || length == 0)
3609 ubc_setsize(vp, length);
3610
3611 // have to loop truncating or growing files that are
3612 // really big because otherwise transactions can get
3613 // enormous and consume too many kernel resources.
3614
3615 if (length < filebytes) {
3616 while (filebytes > length) {
3617 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
3618 filebytes -= HFS_BIGFILE_SIZE;
3619 } else {
3620 filebytes = length;
3621 }
3622 cp->c_flag |= C_FORCEUPDATE;
3623 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
3624 if (error)
3625 break;
3626 }
3627 } else if (length > filebytes) {
3628 while (filebytes < length) {
3629 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
3630 filebytes += HFS_BIGFILE_SIZE;
3631 } else {
3632 filebytes = length;
3633 }
3634 cp->c_flag |= C_FORCEUPDATE;
3635 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
3636 if (error)
3637 break;
3638 }
3639 } else /* Same logical size */ {
3640
3641 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
3642 }
3643 /* Files that are changing size are not hot file candidates. */
3644 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3645 fp->ff_bytesread = 0;
3646 }
3647
3648 return (error);
3649 }
3650
3651
3652
3653 /*
3654 * Preallocate file storage space.
3655 */
3656 int
3657 hfs_vnop_allocate(struct vnop_allocate_args /* {
3658 vnode_t a_vp;
3659 off_t a_length;
3660 u_int32_t a_flags;
3661 off_t *a_bytesallocated;
3662 off_t a_offset;
3663 vfs_context_t a_context;
3664 } */ *ap)
3665 {
3666 struct vnode *vp = ap->a_vp;
3667 struct cnode *cp;
3668 struct filefork *fp;
3669 ExtendedVCB *vcb;
3670 off_t length = ap->a_length;
3671 off_t startingPEOF;
3672 off_t moreBytesRequested;
3673 off_t actualBytesAdded;
3674 off_t filebytes;
3675 u_int32_t fileblocks;
3676 int retval, retval2;
3677 u_int32_t blockHint;
3678 u_int32_t extendFlags; /* For call to ExtendFileC */
3679 struct hfsmount *hfsmp;
3680 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
3681 int lockflags;
3682 time_t orig_ctime;
3683
3684 *(ap->a_bytesallocated) = 0;
3685
3686 if (!vnode_isreg(vp))
3687 return (EISDIR);
3688 if (length < (off_t)0)
3689 return (EINVAL);
3690
3691 cp = VTOC(vp);
3692
3693 orig_ctime = VTOC(vp)->c_ctime;
3694
3695 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
3696
3697 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
3698
3699 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
3700 goto Err_Exit;
3701 }
3702
3703 fp = VTOF(vp);
3704 hfsmp = VTOHFS(vp);
3705 vcb = VTOVCB(vp);
3706
3707 fileblocks = fp->ff_blocks;
3708 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
3709
3710 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
3711 retval = EINVAL;
3712 goto Err_Exit;
3713 }
3714
3715 /* Fill in the flags word for the call to Extend the file */
3716
3717 extendFlags = kEFNoClumpMask;
3718 if (ap->a_flags & ALLOCATECONTIG)
3719 extendFlags |= kEFContigMask;
3720 if (ap->a_flags & ALLOCATEALL)
3721 extendFlags |= kEFAllMask;
3722 if (cred && suser(cred, NULL) != 0)
3723 extendFlags |= kEFReserveMask;
3724 if (hfs_virtualmetafile(cp))
3725 extendFlags |= kEFMetadataMask;
3726
3727 retval = E_NONE;
3728 blockHint = 0;
3729 startingPEOF = filebytes;
3730
3731 if (ap->a_flags & ALLOCATEFROMPEOF)
3732 length += filebytes;
3733 else if (ap->a_flags & ALLOCATEFROMVOL)
3734 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
3735
3736 /* If no changes are necesary, then we're done */
3737 if (filebytes == length)
3738 goto Std_Exit;
3739
3740 /*
3741 * Lengthen the size of the file. We must ensure that the
3742 * last byte of the file is allocated. Since the smallest
3743 * value of filebytes is 0, length will be at least 1.
3744 */
3745 if (length > filebytes) {
3746 off_t total_bytes_added = 0, orig_request_size;
3747
3748 orig_request_size = moreBytesRequested = length - filebytes;
3749
3750 #if QUOTA
3751 retval = hfs_chkdq(cp,
3752 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
3753 cred, 0);
3754 if (retval)
3755 goto Err_Exit;
3756
3757 #endif /* QUOTA */
3758 /*
3759 * Metadata zone checks.
3760 */
3761 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3762 /*
3763 * Allocate Journal and Quota files in metadata zone.
3764 */
3765 if (hfs_virtualmetafile(cp)) {
3766 blockHint = hfsmp->hfs_metazone_start;
3767 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
3768 (blockHint <= hfsmp->hfs_metazone_end)) {
3769 /*
3770 * Move blockHint outside metadata zone.
3771 */
3772 blockHint = hfsmp->hfs_metazone_end + 1;
3773 }
3774 }
3775
3776
3777 while ((length > filebytes) && (retval == E_NONE)) {
3778 off_t bytesRequested;
3779
3780 if (hfs_start_transaction(hfsmp) != 0) {
3781 retval = EINVAL;
3782 goto Err_Exit;
3783 }
3784
3785 /* Protect extents b-tree and allocation bitmap */
3786 lockflags = SFL_BITMAP;
3787 if (overflow_extents(fp))
3788 lockflags |= SFL_EXTENTS;
3789 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3790
3791 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
3792 bytesRequested = HFS_BIGFILE_SIZE;
3793 } else {
3794 bytesRequested = moreBytesRequested;
3795 }
3796
3797 if (extendFlags & kEFContigMask) {
3798 // if we're on a sparse device, this will force it to do a
3799 // full scan to find the space needed.
3800 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
3801 }
3802
3803 retval = MacToVFSError(ExtendFileC(vcb,
3804 (FCB*)fp,
3805 bytesRequested,
3806 blockHint,
3807 extendFlags,
3808 &actualBytesAdded));
3809
3810 if (retval == E_NONE) {
3811 *(ap->a_bytesallocated) += actualBytesAdded;
3812 total_bytes_added += actualBytesAdded;
3813 moreBytesRequested -= actualBytesAdded;
3814 if (blockHint != 0) {
3815 blockHint += actualBytesAdded / vcb->blockSize;
3816 }
3817 }
3818 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3819
3820 hfs_systemfile_unlock(hfsmp, lockflags);
3821
3822 if (hfsmp->jnl) {
3823 (void) hfs_update(vp, TRUE);
3824 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3825 }
3826
3827 hfs_end_transaction(hfsmp);
3828 }
3829
3830
3831 /*
3832 * if we get an error and no changes were made then exit
3833 * otherwise we must do the hfs_update to reflect the changes
3834 */
3835 if (retval && (startingPEOF == filebytes))
3836 goto Err_Exit;
3837
3838 /*
3839 * Adjust actualBytesAdded to be allocation block aligned, not
3840 * clump size aligned.
3841 * NOTE: So what we are reporting does not affect reality
3842 * until the file is closed, when we truncate the file to allocation
3843 * block size.
3844 */
3845 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
3846 *(ap->a_bytesallocated) =
3847 roundup(orig_request_size, (off_t)vcb->blockSize);
3848
3849 } else { /* Shorten the size of the file */
3850
3851 if (fp->ff_size > length) {
3852 /*
3853 * Any buffers that are past the truncation point need to be
3854 * invalidated (to maintain buffer cache consistency).
3855 */
3856 }
3857
3858 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
3859 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3860
3861 /*
3862 * if we get an error and no changes were made then exit
3863 * otherwise we must do the hfs_update to reflect the changes
3864 */
3865 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
3866 #if QUOTA
3867 /* These are bytesreleased */
3868 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
3869 #endif /* QUOTA */
3870
3871 if (fp->ff_size > filebytes) {
3872 fp->ff_size = filebytes;
3873
3874 hfs_unlock(cp);
3875 ubc_setsize(vp, fp->ff_size);
3876 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3877 }
3878 }
3879
3880 Std_Exit:
3881 cp->c_touch_chgtime = TRUE;
3882 cp->c_touch_modtime = TRUE;
3883 retval2 = hfs_update(vp, MNT_WAIT);
3884
3885 if (retval == 0)
3886 retval = retval2;
3887 Err_Exit:
3888 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
3889 hfs_unlock(cp);
3890 return (retval);
3891 }
3892
3893
3894 /*
3895 * Pagein for HFS filesystem
3896 */
3897 int
3898 hfs_vnop_pagein(struct vnop_pagein_args *ap)
3899 /*
3900 struct vnop_pagein_args {
3901 vnode_t a_vp,
3902 upl_t a_pl,
3903 vm_offset_t a_pl_offset,
3904 off_t a_f_offset,
3905 size_t a_size,
3906 int a_flags
3907 vfs_context_t a_context;
3908 };
3909 */
3910 {
3911 vnode_t vp;
3912 struct cnode *cp;
3913 struct filefork *fp;
3914 int error = 0;
3915 upl_t upl;
3916 upl_page_info_t *pl;
3917 off_t f_offset;
3918 int offset;
3919 int isize;
3920 int pg_index;
3921 boolean_t truncate_lock_held = FALSE;
3922 boolean_t file_converted = FALSE;
3923 kern_return_t kret;
3924
3925 vp = ap->a_vp;
3926 cp = VTOC(vp);
3927 fp = VTOF(vp);
3928
3929 #if CONFIG_PROTECT
3930 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
3931 /*
3932 * If we errored here, then this means that one of two things occurred:
3933 * 1. there was a problem with the decryption of the key.
3934 * 2. the device is locked and we are not allowed to access this particular file.
3935 *
3936 * Either way, this means that we need to shut down this upl now. As long as
3937 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
3938 * then we create a upl and immediately abort it.
3939 */
3940 if (ap->a_pl == NULL) {
3941 /* create the upl */
3942 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
3943 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
3944 /* mark the range as needed so it doesn't immediately get discarded upon abort */
3945 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
3946
3947 /* Abort the range */
3948 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
3949 }
3950
3951
3952 return error;
3953 }
3954 #endif /* CONFIG_PROTECT */
3955
3956 if (ap->a_pl != NULL) {
3957 /*
3958 * this can only happen for swap files now that
3959 * we're asking for V2 paging behavior...
3960 * so don't need to worry about decompression, or
3961 * keeping track of blocks read or taking the truncate lock
3962 */
3963 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
3964 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
3965 goto pagein_done;
3966 }
3967
3968 retry_pagein:
3969 /*
3970 * take truncate lock (shared/recursive) to guard against
3971 * zero-fill thru fsync interfering, but only for v2
3972 *
3973 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
3974 * lock shared and we are allowed to recurse 1 level if this thread already
3975 * owns the lock exclusively... this can legally occur
3976 * if we are doing a shrinking ftruncate against a file
3977 * that is mapped private, and the pages being truncated
3978 * do not currently exist in the cache... in that case
3979 * we will have to page-in the missing pages in order
3980 * to provide them to the private mapping... we must
3981 * also call hfs_unlock_truncate with a postive been_recursed
3982 * arg to indicate that if we have recursed, there is no need to drop
3983 * the lock. Allowing this simple recursion is necessary
3984 * in order to avoid a certain deadlock... since the ftruncate
3985 * already holds the truncate lock exclusively, if we try
3986 * to acquire it shared to protect the pagein path, we will
3987 * hang this thread
3988 *
3989 * NOTE: The if () block below is a workaround in order to prevent a
3990 * VM deadlock. See rdar://7853471.
3991 *
3992 * If we are in a forced unmount, then launchd will still have the
3993 * dyld_shared_cache file mapped as it is trying to reboot. If we
3994 * take the truncate lock here to service a page fault, then our
3995 * thread could deadlock with the forced-unmount. The forced unmount
3996 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
3997 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
3998 * thread will think it needs to copy all of the data out of the file
3999 * and into a VM copy object. If we hold the cnode lock here, then that
4000 * VM operation will not be able to proceed, because we'll set a busy page
4001 * before attempting to grab the lock. Note that this isn't as simple as "don't
4002 * call ubc_setsize" because doing that would just shift the problem to the
4003 * ubc_msync done before the vnode is reclaimed.
4004 *
4005 * So, if a forced unmount on this volume is in flight AND the cnode is
4006 * marked C_DELETED, then just go ahead and do the page in without taking
4007 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4008 * that is not going to be available on the next mount, this seems like a
4009 * OK solution from a correctness point of view, even though it is hacky.
4010 */
4011 if (vfs_isforce(vp->v_mount)) {
4012 if (cp->c_flag & C_DELETED) {
4013 /* If we don't get it, then just go ahead and operate without the lock */
4014 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4015 }
4016 }
4017 else {
4018 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4019 truncate_lock_held = TRUE;
4020 }
4021
4022 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4023
4024 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4025 error = EINVAL;
4026 goto pagein_done;
4027 }
4028 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4029
4030 isize = ap->a_size;
4031
4032 /*
4033 * Scan from the back to find the last page in the UPL, so that we
4034 * aren't looking at a UPL that may have already been freed by the
4035 * preceding aborts/completions.
4036 */
4037 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4038 if (upl_page_present(pl, --pg_index))
4039 break;
4040 if (pg_index == 0) {
4041 /*
4042 * no absent pages were found in the range specified
4043 * just abort the UPL to get rid of it and then we're done
4044 */
4045 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4046 goto pagein_done;
4047 }
4048 }
4049 /*
4050 * initialize the offset variables before we touch the UPL.
4051 * f_offset is the position into the file, in bytes
4052 * offset is the position into the UPL, in bytes
4053 * pg_index is the pg# of the UPL we're operating on
4054 * isize is the offset into the UPL of the last page that is present.
4055 */
4056 isize = ((pg_index + 1) * PAGE_SIZE);
4057 pg_index = 0;
4058 offset = 0;
4059 f_offset = ap->a_f_offset;
4060
4061 while (isize) {
4062 int xsize;
4063 int num_of_pages;
4064
4065 if ( !upl_page_present(pl, pg_index)) {
4066 /*
4067 * we asked for RET_ONLY_ABSENT, so it's possible
4068 * to get back empty slots in the UPL.
4069 * just skip over them
4070 */
4071 f_offset += PAGE_SIZE;
4072 offset += PAGE_SIZE;
4073 isize -= PAGE_SIZE;
4074 pg_index++;
4075
4076 continue;
4077 }
4078 /*
4079 * We know that we have at least one absent page.
4080 * Now checking to see how many in a row we have
4081 */
4082 num_of_pages = 1;
4083 xsize = isize - PAGE_SIZE;
4084
4085 while (xsize) {
4086 if ( !upl_page_present(pl, pg_index + num_of_pages))
4087 break;
4088 num_of_pages++;
4089 xsize -= PAGE_SIZE;
4090 }
4091 xsize = num_of_pages * PAGE_SIZE;
4092
4093 #if HFS_COMPRESSION
4094 if (VNODE_IS_RSRC(vp)) {
4095 /* allow pageins of the resource fork */
4096 } else {
4097 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4098
4099 if (compressed) {
4100 if (truncate_lock_held) {
4101 /*
4102 * can't hold the truncate lock when calling into the decmpfs layer
4103 * since it calls back into this layer... even though we're only
4104 * holding the lock in shared mode, and the re-entrant path only
4105 * takes the lock shared, we can deadlock if some other thread
4106 * tries to grab the lock exclusively in between.
4107 */
4108 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4109 truncate_lock_held = FALSE;
4110 }
4111 ap->a_pl = upl;
4112 ap->a_pl_offset = offset;
4113 ap->a_f_offset = f_offset;
4114 ap->a_size = xsize;
4115
4116 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4117 /*
4118 * note that decpfs_pagein_compressed can change the state of
4119 * 'compressed'... it will set it to 0 if the file is no longer
4120 * compressed once the compression lock is successfully taken
4121 * i.e. we would block on that lock while the file is being inflated
4122 */
4123 if (compressed) {
4124 if (error == 0) {
4125 /* successful page-in, update the access time */
4126 VTOC(vp)->c_touch_acctime = TRUE;
4127
4128 /* compressed files are not hot file candidates */
4129 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4130 fp->ff_bytesread = 0;
4131 }
4132 } else if (error == EAGAIN) {
4133 /*
4134 * EAGAIN indicates someone else already holds the compression lock...
4135 * to avoid deadlocking, we'll abort this range of pages with an
4136 * indication that the pagein needs to be redriven
4137 */
4138 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
4139 }
4140 goto pagein_next_range;
4141 }
4142 else {
4143 /*
4144 * Set file_converted only if the file became decompressed while we were
4145 * paging in. If it were still compressed, we would re-start the loop using the goto
4146 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4147 * condition below, since we could have avoided taking the truncate lock to prevent
4148 * a deadlock in the force unmount case.
4149 */
4150 file_converted = TRUE;
4151 }
4152 }
4153 if (file_converted == TRUE) {
4154 /*
4155 * the file was converted back to a regular file after we first saw it as compressed
4156 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4157 * reset a_size so that we consider what remains of the original request
4158 * and null out a_upl and a_pl_offset.
4159 *
4160 * We should only be able to get into this block if the decmpfs_pagein_compressed
4161 * successfully decompressed the range in question for this file.
4162 */
4163 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4164
4165 ap->a_size = isize;
4166 ap->a_pl = NULL;
4167 ap->a_pl_offset = 0;
4168
4169 /* Reset file_converted back to false so that we don't infinite-loop. */
4170 file_converted = FALSE;
4171 goto retry_pagein;
4172 }
4173 }
4174 #endif
4175 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
4176
4177 /*
4178 * Keep track of blocks read.
4179 */
4180 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4181 int bytesread;
4182 int took_cnode_lock = 0;
4183
4184 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4185 bytesread = fp->ff_size;
4186 else
4187 bytesread = xsize;
4188
4189 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4190 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
4191 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4192 took_cnode_lock = 1;
4193 }
4194 /*
4195 * If this file hasn't been seen since the start of
4196 * the current sampling period then start over.
4197 */
4198 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4199 struct timeval tv;
4200
4201 fp->ff_bytesread = bytesread;
4202 microtime(&tv);
4203 cp->c_atime = tv.tv_sec;
4204 } else {
4205 fp->ff_bytesread += bytesread;
4206 }
4207 cp->c_touch_acctime = TRUE;
4208 if (took_cnode_lock)
4209 hfs_unlock(cp);
4210 }
4211 pagein_next_range:
4212 f_offset += xsize;
4213 offset += xsize;
4214 isize -= xsize;
4215 pg_index += num_of_pages;
4216
4217 error = 0;
4218 }
4219
4220 pagein_done:
4221 if (truncate_lock_held == TRUE) {
4222 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4223 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4224 }
4225
4226 return (error);
4227 }
4228
4229 /*
4230 * Pageout for HFS filesystem.
4231 */
4232 int
4233 hfs_vnop_pageout(struct vnop_pageout_args *ap)
4234 /*
4235 struct vnop_pageout_args {
4236 vnode_t a_vp,
4237 upl_t a_pl,
4238 vm_offset_t a_pl_offset,
4239 off_t a_f_offset,
4240 size_t a_size,
4241 int a_flags
4242 vfs_context_t a_context;
4243 };
4244 */
4245 {
4246 vnode_t vp = ap->a_vp;
4247 struct cnode *cp;
4248 struct filefork *fp;
4249 int retval = 0;
4250 off_t filesize;
4251 upl_t upl;
4252 upl_page_info_t* pl;
4253 vm_offset_t a_pl_offset;
4254 int a_flags;
4255 int is_pageoutv2 = 0;
4256 kern_return_t kret;
4257
4258 cp = VTOC(vp);
4259 fp = VTOF(vp);
4260
4261 /*
4262 * Figure out where the file ends, for pageout purposes. If
4263 * ff_new_size > ff_size, then we're in the middle of extending the
4264 * file via a write, so it is safe (and necessary) that we be able
4265 * to pageout up to that point.
4266 */
4267 filesize = fp->ff_size;
4268 if (fp->ff_new_size > filesize)
4269 filesize = fp->ff_new_size;
4270
4271 a_flags = ap->a_flags;
4272 a_pl_offset = ap->a_pl_offset;
4273
4274 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
4275 hfs_incr_gencount (cp);
4276 }
4277
4278 /*
4279 * we can tell if we're getting the new or old behavior from the UPL
4280 */
4281 if ((upl = ap->a_pl) == NULL) {
4282 int request_flags;
4283
4284 is_pageoutv2 = 1;
4285 /*
4286 * we're in control of any UPL we commit
4287 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4288 */
4289 a_flags &= ~UPL_NOCOMMIT;
4290 a_pl_offset = 0;
4291
4292 /*
4293 * For V2 semantics, we want to take the cnode truncate lock
4294 * shared to guard against the file size changing via zero-filling.
4295 *
4296 * However, we have to be careful because we may be invoked
4297 * via the ubc_msync path to write out dirty mmap'd pages
4298 * in response to a lock event on a content-protected
4299 * filesystem (e.g. to write out class A files).
4300 * As a result, we want to take the truncate lock 'SHARED' with
4301 * the mini-recursion locktype so that we don't deadlock/panic
4302 * because we may be already holding the truncate lock exclusive to force any other
4303 * IOs to have blocked behind us.
4304 */
4305 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4306
4307 if (a_flags & UPL_MSYNC) {
4308 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4309 }
4310 else {
4311 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4312 }
4313
4314 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
4315
4316 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4317 retval = EINVAL;
4318 goto pageout_done;
4319 }
4320 }
4321 /*
4322 * from this point forward upl points at the UPL we're working with
4323 * it was either passed in or we succesfully created it
4324 */
4325
4326 /*
4327 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4328 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4329 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4330 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4331 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4332 * lock in HFS so that we don't lock invert ourselves.
4333 *
4334 * Note that we can still get into this function on behalf of the default pager with
4335 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4336 * since fsync and other writing threads will grab the locks, then mark the
4337 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4338 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4339 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4340 * by the paging/VM system.
4341 */
4342
4343 if (is_pageoutv2) {
4344 off_t f_offset;
4345 int offset;
4346 int isize;
4347 int pg_index;
4348 int error;
4349 int error_ret = 0;
4350
4351 isize = ap->a_size;
4352 f_offset = ap->a_f_offset;
4353
4354 /*
4355 * Scan from the back to find the last page in the UPL, so that we
4356 * aren't looking at a UPL that may have already been freed by the
4357 * preceding aborts/completions.
4358 */
4359 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4360 if (upl_page_present(pl, --pg_index))
4361 break;
4362 if (pg_index == 0) {
4363 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4364 goto pageout_done;
4365 }
4366 }
4367
4368 /*
4369 * initialize the offset variables before we touch the UPL.
4370 * a_f_offset is the position into the file, in bytes
4371 * offset is the position into the UPL, in bytes
4372 * pg_index is the pg# of the UPL we're operating on.
4373 * isize is the offset into the UPL of the last non-clean page.
4374 */
4375 isize = ((pg_index + 1) * PAGE_SIZE);
4376
4377 offset = 0;
4378 pg_index = 0;
4379
4380 while (isize) {
4381 int xsize;
4382 int num_of_pages;
4383
4384 if ( !upl_page_present(pl, pg_index)) {
4385 /*
4386 * we asked for RET_ONLY_DIRTY, so it's possible
4387 * to get back empty slots in the UPL.
4388 * just skip over them
4389 */
4390 f_offset += PAGE_SIZE;
4391 offset += PAGE_SIZE;
4392 isize -= PAGE_SIZE;
4393 pg_index++;
4394
4395 continue;
4396 }
4397 if ( !upl_dirty_page(pl, pg_index)) {
4398 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
4399 }
4400
4401 /*
4402 * We know that we have at least one dirty page.
4403 * Now checking to see how many in a row we have
4404 */
4405 num_of_pages = 1;
4406 xsize = isize - PAGE_SIZE;
4407
4408 while (xsize) {
4409 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
4410 break;
4411 num_of_pages++;
4412 xsize -= PAGE_SIZE;
4413 }
4414 xsize = num_of_pages * PAGE_SIZE;
4415
4416 if (!vnode_isswap(vp)) {
4417 off_t end_of_range;
4418 int tooklock;
4419
4420 tooklock = 0;
4421
4422 if (cp->c_lockowner != current_thread()) {
4423 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4424 /*
4425 * we're in the v2 path, so we are the
4426 * owner of the UPL... we may have already
4427 * processed some of the UPL, so abort it
4428 * from the current working offset to the
4429 * end of the UPL
4430 */
4431 ubc_upl_abort_range(upl,
4432 offset,
4433 ap->a_size - offset,
4434 UPL_ABORT_FREE_ON_EMPTY);
4435 goto pageout_done;
4436 }
4437 tooklock = 1;
4438 }
4439 end_of_range = f_offset + xsize - 1;
4440
4441 if (end_of_range >= filesize) {
4442 end_of_range = (off_t)(filesize - 1);
4443 }
4444 if (f_offset < filesize) {
4445 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
4446 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4447 }
4448 if (tooklock) {
4449 hfs_unlock(cp);
4450 }
4451 }
4452 if ((error = cluster_pageout(vp, upl, offset, f_offset,
4453 xsize, filesize, a_flags))) {
4454 if (error_ret == 0)
4455 error_ret = error;
4456 }
4457 f_offset += xsize;
4458 offset += xsize;
4459 isize -= xsize;
4460 pg_index += num_of_pages;
4461 }
4462 /* capture errnos bubbled out of cluster_pageout if they occurred */
4463 if (error_ret != 0) {
4464 retval = error_ret;
4465 }
4466 } /* end block for v2 pageout behavior */
4467 else {
4468 if (!vnode_isswap(vp)) {
4469 off_t end_of_range;
4470 int tooklock = 0;
4471
4472 if (cp->c_lockowner != current_thread()) {
4473 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4474 if (!(a_flags & UPL_NOCOMMIT)) {
4475 ubc_upl_abort_range(upl,
4476 a_pl_offset,
4477 ap->a_size,
4478 UPL_ABORT_FREE_ON_EMPTY);
4479 }
4480 goto pageout_done;
4481 }
4482 tooklock = 1;
4483 }
4484 end_of_range = ap->a_f_offset + ap->a_size - 1;
4485
4486 if (end_of_range >= filesize) {
4487 end_of_range = (off_t)(filesize - 1);
4488 }
4489 if (ap->a_f_offset < filesize) {
4490 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
4491 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4492 }
4493
4494 if (tooklock) {
4495 hfs_unlock(cp);
4496 }
4497 }
4498 /*
4499 * just call cluster_pageout for old pre-v2 behavior
4500 */
4501 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
4502 ap->a_size, filesize, a_flags);
4503 }
4504
4505 /*
4506 * If data was written, update the modification time of the file.
4507 * If setuid or setgid bits are set and this process is not the
4508 * superuser then clear the setuid and setgid bits as a precaution
4509 * against tampering.
4510 */
4511 if (retval == 0) {
4512 cp->c_touch_modtime = TRUE;
4513 cp->c_touch_chgtime = TRUE;
4514 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
4515 (vfs_context_suser(ap->a_context) != 0)) {
4516 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4517 cp->c_mode &= ~(S_ISUID | S_ISGID);
4518 hfs_unlock(cp);
4519 }
4520 }
4521
4522 pageout_done:
4523 if (is_pageoutv2) {
4524 /*
4525 * Release the truncate lock. Note that because
4526 * we may have taken the lock recursively by
4527 * being invoked via ubc_msync due to lockdown,
4528 * we should release it recursively, too.
4529 */
4530 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4531 }
4532 return (retval);
4533 }
4534
4535 /*
4536 * Intercept B-Tree node writes to unswap them if necessary.
4537 */
4538 int
4539 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
4540 {
4541 int retval = 0;
4542 register struct buf *bp = ap->a_bp;
4543 register struct vnode *vp = buf_vnode(bp);
4544 BlockDescriptor block;
4545
4546 /* Trap B-Tree writes */
4547 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
4548 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
4549 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
4550 (vp == VTOHFS(vp)->hfc_filevp)) {
4551
4552 /*
4553 * Swap and validate the node if it is in native byte order.
4554 * This is always be true on big endian, so we always validate
4555 * before writing here. On little endian, the node typically has
4556 * been swapped and validated when it was written to the journal,
4557 * so we won't do anything here.
4558 */
4559 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
4560 /* Prepare the block pointer */
4561 block.blockHeader = bp;
4562 block.buffer = (char *)buf_dataptr(bp);
4563 block.blockNum = buf_lblkno(bp);
4564 /* not found in cache ==> came from disk */
4565 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
4566 block.blockSize = buf_count(bp);
4567
4568 /* Endian un-swap B-Tree node */
4569 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
4570 if (retval)
4571 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4572 }
4573 }
4574
4575 /* This buffer shouldn't be locked anymore but if it is clear it */
4576 if ((buf_flags(bp) & B_LOCKED)) {
4577 // XXXdbg
4578 if (VTOHFS(vp)->jnl) {
4579 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
4580 }
4581 buf_clearflags(bp, B_LOCKED);
4582 }
4583 retval = vn_bwrite (ap);
4584
4585 return (retval);
4586 }
4587
4588 /*
4589 * Relocate a file to a new location on disk
4590 * cnode must be locked on entry
4591 *
4592 * Relocation occurs by cloning the file's data from its
4593 * current set of blocks to a new set of blocks. During
4594 * the relocation all of the blocks (old and new) are
4595 * owned by the file.
4596 *
4597 * -----------------
4598 * |///////////////|
4599 * -----------------
4600 * 0 N (file offset)
4601 *
4602 * ----------------- -----------------
4603 * |///////////////| | | STEP 1 (acquire new blocks)
4604 * ----------------- -----------------
4605 * 0 N N+1 2N
4606 *
4607 * ----------------- -----------------
4608 * |///////////////| |///////////////| STEP 2 (clone data)
4609 * ----------------- -----------------
4610 * 0 N N+1 2N
4611 *
4612 * -----------------
4613 * |///////////////| STEP 3 (head truncate blocks)
4614 * -----------------
4615 * 0 N
4616 *
4617 * During steps 2 and 3 page-outs to file offsets less
4618 * than or equal to N are suspended.
4619 *
4620 * During step 3 page-ins to the file get suspended.
4621 */
4622 int
4623 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
4624 struct proc *p)
4625 {
4626 struct cnode *cp;
4627 struct filefork *fp;
4628 struct hfsmount *hfsmp;
4629 u_int32_t headblks;
4630 u_int32_t datablks;
4631 u_int32_t blksize;
4632 u_int32_t growsize;
4633 u_int32_t nextallocsave;
4634 daddr64_t sector_a, sector_b;
4635 int eflags;
4636 off_t newbytes;
4637 int retval;
4638 int lockflags = 0;
4639 int took_trunc_lock = 0;
4640 int started_tr = 0;
4641 enum vtype vnodetype;
4642
4643 vnodetype = vnode_vtype(vp);
4644 if (vnodetype != VREG) {
4645 /* Not allowed to move symlinks. */
4646 return (EPERM);
4647 }
4648
4649 hfsmp = VTOHFS(vp);
4650 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
4651 return (ENOSPC);
4652 }
4653
4654 cp = VTOC(vp);
4655 fp = VTOF(vp);
4656 if (fp->ff_unallocblocks)
4657 return (EINVAL);
4658
4659 #if CONFIG_PROTECT
4660 /*
4661 * <rdar://problem/9118426>
4662 * Disable HFS file relocation on content-protected filesystems
4663 */
4664 if (cp_fs_protected (hfsmp->hfs_mp)) {
4665 return EINVAL;
4666 }
4667 #endif
4668 /* If it's an SSD, also disable HFS relocation */
4669 if (hfsmp->hfs_flags & HFS_SSD) {
4670 return EINVAL;
4671 }
4672
4673
4674 blksize = hfsmp->blockSize;
4675 if (blockHint == 0)
4676 blockHint = hfsmp->nextAllocation;
4677
4678 if (fp->ff_size > 0x7fffffff) {
4679 return (EFBIG);
4680 }
4681
4682 //
4683 // We do not believe that this call to hfs_fsync() is
4684 // necessary and it causes a journal transaction
4685 // deadlock so we are removing it.
4686 //
4687 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4688 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4689 // if (retval)
4690 // return (retval);
4691 //}
4692
4693 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
4694 hfs_unlock(cp);
4695 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4696 /* Force lock since callers expects lock to be held. */
4697 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
4698 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4699 return (retval);
4700 }
4701 /* No need to continue if file was removed. */
4702 if (cp->c_flag & C_NOEXISTS) {
4703 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4704 return (ENOENT);
4705 }
4706 took_trunc_lock = 1;
4707 }
4708 headblks = fp->ff_blocks;
4709 datablks = howmany(fp->ff_size, blksize);
4710 growsize = datablks * blksize;
4711 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
4712 if (blockHint >= hfsmp->hfs_metazone_start &&
4713 blockHint <= hfsmp->hfs_metazone_end)
4714 eflags |= kEFMetadataMask;
4715
4716 if (hfs_start_transaction(hfsmp) != 0) {
4717 if (took_trunc_lock)
4718 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4719 return (EINVAL);
4720 }
4721 started_tr = 1;
4722 /*
4723 * Protect the extents b-tree and the allocation bitmap
4724 * during MapFileBlockC and ExtendFileC operations.
4725 */
4726 lockflags = SFL_BITMAP;
4727 if (overflow_extents(fp))
4728 lockflags |= SFL_EXTENTS;
4729 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4730
4731 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
4732 if (retval) {
4733 retval = MacToVFSError(retval);
4734 goto out;
4735 }
4736
4737 /*
4738 * STEP 1 - acquire new allocation blocks.
4739 */
4740 nextallocsave = hfsmp->nextAllocation;
4741 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
4742 if (eflags & kEFMetadataMask) {
4743 hfs_lock_mount(hfsmp);
4744 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
4745 MarkVCBDirty(hfsmp);
4746 hfs_unlock_mount(hfsmp);
4747 }
4748
4749 retval = MacToVFSError(retval);
4750 if (retval == 0) {
4751 cp->c_flag |= C_MODIFIED;
4752 if (newbytes < growsize) {
4753 retval = ENOSPC;
4754 goto restore;
4755 } else if (fp->ff_blocks < (headblks + datablks)) {
4756 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
4757 retval = ENOSPC;
4758 goto restore;
4759 }
4760
4761 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
4762 if (retval) {
4763 retval = MacToVFSError(retval);
4764 } else if ((sector_a + 1) == sector_b) {
4765 retval = ENOSPC;
4766 goto restore;
4767 } else if ((eflags & kEFMetadataMask) &&
4768 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
4769 hfsmp->hfs_metazone_end)) {
4770 #if 0
4771 const char * filestr;
4772 char emptystr = '\0';
4773
4774 if (cp->c_desc.cd_nameptr != NULL) {
4775 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
4776 } else if (vnode_name(vp) != NULL) {
4777 filestr = vnode_name(vp);
4778 } else {
4779 filestr = &emptystr;
4780 }
4781 #endif
4782 retval = ENOSPC;
4783 goto restore;
4784 }
4785 }
4786 /* Done with system locks and journal for now. */
4787 hfs_systemfile_unlock(hfsmp, lockflags);
4788 lockflags = 0;
4789 hfs_end_transaction(hfsmp);
4790 started_tr = 0;
4791
4792 if (retval) {
4793 /*
4794 * Check to see if failure is due to excessive fragmentation.
4795 */
4796 if ((retval == ENOSPC) &&
4797 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
4798 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
4799 }
4800 goto out;
4801 }
4802 /*
4803 * STEP 2 - clone file data into the new allocation blocks.
4804 */
4805
4806 if (vnodetype == VLNK)
4807 retval = EPERM;
4808 else if (vnode_issystem(vp))
4809 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
4810 else
4811 retval = hfs_clonefile(vp, headblks, datablks, blksize);
4812
4813 /* Start transaction for step 3 or for a restore. */
4814 if (hfs_start_transaction(hfsmp) != 0) {
4815 retval = EINVAL;
4816 goto out;
4817 }
4818 started_tr = 1;
4819 if (retval)
4820 goto restore;
4821
4822 /*
4823 * STEP 3 - switch to cloned data and remove old blocks.
4824 */
4825 lockflags = SFL_BITMAP;
4826 if (overflow_extents(fp))
4827 lockflags |= SFL_EXTENTS;
4828 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4829
4830 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
4831
4832 hfs_systemfile_unlock(hfsmp, lockflags);
4833 lockflags = 0;
4834 if (retval)
4835 goto restore;
4836 out:
4837 if (took_trunc_lock)
4838 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4839
4840 if (lockflags) {
4841 hfs_systemfile_unlock(hfsmp, lockflags);
4842 lockflags = 0;
4843 }
4844
4845 /* Push cnode's new extent data to disk. */
4846 if (retval == 0) {
4847 (void) hfs_update(vp, MNT_WAIT);
4848 }
4849 if (hfsmp->jnl) {
4850 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
4851 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4852 else
4853 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
4854 }
4855 exit:
4856 if (started_tr)
4857 hfs_end_transaction(hfsmp);
4858
4859 return (retval);
4860
4861 restore:
4862 if (fp->ff_blocks == headblks) {
4863 if (took_trunc_lock)
4864 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4865 goto exit;
4866 }
4867 /*
4868 * Give back any newly allocated space.
4869 */
4870 if (lockflags == 0) {
4871 lockflags = SFL_BITMAP;
4872 if (overflow_extents(fp))
4873 lockflags |= SFL_EXTENTS;
4874 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4875 }
4876
4877 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
4878 FTOC(fp)->c_fileid, false);
4879
4880 hfs_systemfile_unlock(hfsmp, lockflags);
4881 lockflags = 0;
4882
4883 if (took_trunc_lock)
4884 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4885 goto exit;
4886 }
4887
4888
4889 /*
4890 * Clone a file's data within the file.
4891 *
4892 */
4893 static int
4894 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
4895 {
4896 caddr_t bufp;
4897 size_t bufsize;
4898 size_t copysize;
4899 size_t iosize;
4900 size_t offset;
4901 off_t writebase;
4902 uio_t auio;
4903 int error = 0;
4904
4905 writebase = blkstart * blksize;
4906 copysize = blkcnt * blksize;
4907 iosize = bufsize = MIN(copysize, 128 * 1024);
4908 offset = 0;
4909
4910 hfs_unlock(VTOC(vp));
4911
4912 #if CONFIG_PROTECT
4913 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
4914 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4915 return (error);
4916 }
4917 #endif /* CONFIG_PROTECT */
4918
4919 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
4920 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4921 return (ENOMEM);
4922 }
4923
4924 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
4925
4926 while (offset < copysize) {
4927 iosize = MIN(copysize - offset, iosize);
4928
4929 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
4930 uio_addiov(auio, (uintptr_t)bufp, iosize);
4931
4932 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
4933 if (error) {
4934 printf("hfs_clonefile: cluster_read failed - %d\n", error);
4935 break;
4936 }
4937 if (uio_resid(auio) != 0) {
4938 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
4939 error = EIO;
4940 break;
4941 }
4942
4943 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
4944 uio_addiov(auio, (uintptr_t)bufp, iosize);
4945
4946 error = cluster_write(vp, auio, writebase + offset,
4947 writebase + offset + iosize,
4948 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
4949 if (error) {
4950 printf("hfs_clonefile: cluster_write failed - %d\n", error);
4951 break;
4952 }
4953 if (uio_resid(auio) != 0) {
4954 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4955 error = EIO;
4956 break;
4957 }
4958 offset += iosize;
4959 }
4960 uio_free(auio);
4961
4962 if ((blksize & PAGE_MASK)) {
4963 /*
4964 * since the copy may not have started on a PAGE
4965 * boundary (or may not have ended on one), we
4966 * may have pages left in the cache since NOCACHE
4967 * will let partially written pages linger...
4968 * lets just flush the entire range to make sure
4969 * we don't have any pages left that are beyond
4970 * (or intersect) the real LEOF of this file
4971 */
4972 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
4973 } else {
4974 /*
4975 * No need to call ubc_sync_range or hfs_invalbuf
4976 * since the file was copied using IO_NOCACHE and
4977 * the copy was done starting and ending on a page
4978 * boundary in the file.
4979 */
4980 }
4981 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
4982
4983 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4984 return (error);
4985 }
4986
4987 /*
4988 * Clone a system (metadata) file.
4989 *
4990 */
4991 static int
4992 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
4993 kauth_cred_t cred, struct proc *p)
4994 {
4995 caddr_t bufp;
4996 char * offset;
4997 size_t bufsize;
4998 size_t iosize;
4999 struct buf *bp = NULL;
5000 daddr64_t blkno;
5001 daddr64_t blk;
5002 daddr64_t start_blk;
5003 daddr64_t last_blk;
5004 int breadcnt;
5005 int i;
5006 int error = 0;
5007
5008
5009 iosize = GetLogicalBlockSize(vp);
5010 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5011 breadcnt = bufsize / iosize;
5012
5013 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
5014 return (ENOMEM);
5015 }
5016 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5017 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
5018 blkno = 0;
5019
5020 while (blkno < last_blk) {
5021 /*
5022 * Read up to a megabyte
5023 */
5024 offset = bufp;
5025 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5026 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
5027 if (error) {
5028 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5029 goto out;
5030 }
5031 if (buf_count(bp) != iosize) {
5032 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
5033 goto out;
5034 }
5035 bcopy((char *)buf_dataptr(bp), offset, iosize);
5036
5037 buf_markinvalid(bp);
5038 buf_brelse(bp);
5039 bp = NULL;
5040
5041 offset += iosize;
5042 }
5043
5044 /*
5045 * Write up to a megabyte
5046 */
5047 offset = bufp;
5048 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5049 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
5050 if (bp == NULL) {
5051 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
5052 error = EIO;
5053 goto out;
5054 }
5055 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5056 error = (int)buf_bwrite(bp);
5057 bp = NULL;
5058 if (error)
5059 goto out;
5060 offset += iosize;
5061 }
5062 }
5063 out:
5064 if (bp) {
5065 buf_brelse(bp);
5066 }
5067
5068 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
5069
5070 error = hfs_fsync(vp, MNT_WAIT, 0, p);
5071
5072 return (error);
5073 }