]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-2422.100.13.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
42 #include <sys/stat.h>
43 #include <sys/buf.h>
44 #include <sys/buf_internal.h>
45 #include <sys/proc.h>
46 #include <sys/kauth.h>
47 #include <sys/vnode.h>
48 #include <sys/vnode_internal.h>
49 #include <sys/uio.h>
50 #include <sys/vfs_context.h>
51 #include <sys/fsevents.h>
52 #include <kern/kalloc.h>
53 #include <sys/disk.h>
54 #include <sys/sysctl.h>
55 #include <sys/fsctl.h>
56 #include <sys/mount_internal.h>
57 #include <sys/file_internal.h>
58
59 #include <miscfs/specfs/specdev.h>
60
61 #include <sys/ubc.h>
62 #include <sys/ubc_internal.h>
63
64 #include <vm/vm_pageout.h>
65 #include <vm/vm_kern.h>
66
67 #include <sys/kdebug.h>
68
69 #include "hfs.h"
70 #include "hfs_attrlist.h"
71 #include "hfs_endian.h"
72 #include "hfs_fsctl.h"
73 #include "hfs_quota.h"
74 #include "hfscommon/headers/FileMgrInternal.h"
75 #include "hfscommon/headers/BTreesInternal.h"
76 #include "hfs_cnode.h"
77 #include "hfs_dbg.h"
78
79 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
80
81 enum {
82 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
83 };
84
85 /* from bsd/hfs/hfs_vfsops.c */
86 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
87
88 static int hfs_clonefile(struct vnode *, int, int, int);
89 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
90 static int hfs_minorupdate(struct vnode *vp);
91 static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
92
93 /* from bsd/hfs/hfs_vnops.c */
94 extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
95
96
97
98 int flush_cache_on_write = 0;
99 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
100
101 /*
102 * Read data from a file.
103 */
104 int
105 hfs_vnop_read(struct vnop_read_args *ap)
106 {
107 /*
108 struct vnop_read_args {
109 struct vnodeop_desc *a_desc;
110 vnode_t a_vp;
111 struct uio *a_uio;
112 int a_ioflag;
113 vfs_context_t a_context;
114 };
115 */
116
117 uio_t uio = ap->a_uio;
118 struct vnode *vp = ap->a_vp;
119 struct cnode *cp;
120 struct filefork *fp;
121 struct hfsmount *hfsmp;
122 off_t filesize;
123 off_t filebytes;
124 off_t start_resid = uio_resid(uio);
125 off_t offset = uio_offset(uio);
126 int retval = 0;
127 int took_truncate_lock = 0;
128 int io_throttle = 0;
129
130 /* Preflight checks */
131 if (!vnode_isreg(vp)) {
132 /* can only read regular files */
133 if (vnode_isdir(vp))
134 return (EISDIR);
135 else
136 return (EPERM);
137 }
138 if (start_resid == 0)
139 return (0); /* Nothing left to do */
140 if (offset < 0)
141 return (EINVAL); /* cant read from a negative offset */
142
143
144
145 #if HFS_COMPRESSION
146 if (VNODE_IS_RSRC(vp)) {
147 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
148 return 0;
149 }
150 /* otherwise read the resource fork normally */
151 } else {
152 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
153 if (compressed) {
154 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
155 if (compressed) {
156 if (retval == 0) {
157 /* successful read, update the access time */
158 VTOC(vp)->c_touch_acctime = TRUE;
159
160 /* compressed files are not hot file candidates */
161 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
162 VTOF(vp)->ff_bytesread = 0;
163 }
164 }
165 return retval;
166 }
167 /* otherwise the file was converted back to a regular file while we were reading it */
168 retval = 0;
169 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
170 int error;
171
172 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
173 if (error) {
174 return error;
175 }
176
177 }
178 }
179 #endif /* HFS_COMPRESSION */
180
181 cp = VTOC(vp);
182 fp = VTOF(vp);
183 hfsmp = VTOHFS(vp);
184
185 #if CONFIG_PROTECT
186 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
187 goto exit;
188 }
189 #endif
190
191 /*
192 * If this read request originated from a syscall (as opposed to
193 * an in-kernel page fault or something), then set it up for
194 * throttle checks
195 */
196 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
197 io_throttle = IO_RETURN_ON_THROTTLE;
198 }
199
200 read_again:
201
202 /* Protect against a size change. */
203 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
204 took_truncate_lock = 1;
205
206 filesize = fp->ff_size;
207 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
208 if (offset > filesize) {
209 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
210 (offset > (off_t)MAXHFSFILESIZE)) {
211 retval = EFBIG;
212 }
213 goto exit;
214 }
215
216 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
217 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
218
219 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
220
221 cp->c_touch_acctime = TRUE;
222
223 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
224 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
225
226 /*
227 * Keep track blocks read
228 */
229 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
230 int took_cnode_lock = 0;
231 off_t bytesread;
232
233 bytesread = start_resid - uio_resid(uio);
234
235 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
236 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
237 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
238 took_cnode_lock = 1;
239 }
240 /*
241 * If this file hasn't been seen since the start of
242 * the current sampling period then start over.
243 */
244 if (cp->c_atime < hfsmp->hfc_timebase) {
245 struct timeval tv;
246
247 fp->ff_bytesread = bytesread;
248 microtime(&tv);
249 cp->c_atime = tv.tv_sec;
250 } else {
251 fp->ff_bytesread += bytesread;
252 }
253 if (took_cnode_lock)
254 hfs_unlock(cp);
255 }
256 exit:
257 if (took_truncate_lock) {
258 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
259 }
260 if (retval == EAGAIN) {
261 throttle_lowpri_io(1);
262
263 retval = 0;
264 goto read_again;
265 }
266 return (retval);
267 }
268
269 /*
270 * Write data to a file.
271 */
272 int
273 hfs_vnop_write(struct vnop_write_args *ap)
274 {
275 uio_t uio = ap->a_uio;
276 struct vnode *vp = ap->a_vp;
277 struct cnode *cp;
278 struct filefork *fp;
279 struct hfsmount *hfsmp;
280 kauth_cred_t cred = NULL;
281 off_t origFileSize;
282 off_t writelimit;
283 off_t bytesToAdd = 0;
284 off_t actualBytesAdded;
285 off_t filebytes;
286 off_t offset;
287 ssize_t resid;
288 int eflags;
289 int ioflag = ap->a_ioflag;
290 int retval = 0;
291 int lockflags;
292 int cnode_locked = 0;
293 int partialwrite = 0;
294 int do_snapshot = 1;
295 time_t orig_ctime=VTOC(vp)->c_ctime;
296 int took_truncate_lock = 0;
297 int io_return_on_throttle = 0;
298 struct rl_entry *invalid_range;
299
300 #if HFS_COMPRESSION
301 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
302 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
303 switch(state) {
304 case FILE_IS_COMPRESSED:
305 return EACCES;
306 case FILE_IS_CONVERTING:
307 /* if FILE_IS_CONVERTING, we allow writes but do not
308 bother with snapshots or else we will deadlock.
309 */
310 do_snapshot = 0;
311 break;
312 default:
313 printf("invalid state %d for compressed file\n", state);
314 /* fall through */
315 }
316 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
317 int error;
318
319 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
320 if (error != 0) {
321 return error;
322 }
323 }
324
325 if (do_snapshot) {
326 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
327 }
328
329 #endif
330
331 resid = uio_resid(uio);
332 offset = uio_offset(uio);
333
334 if (offset < 0)
335 return (EINVAL);
336 if (resid == 0)
337 return (E_NONE);
338 if (!vnode_isreg(vp))
339 return (EPERM); /* Can only write regular files */
340
341 cp = VTOC(vp);
342 fp = VTOF(vp);
343 hfsmp = VTOHFS(vp);
344
345 #if CONFIG_PROTECT
346 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
347 goto exit;
348 }
349 #endif
350
351 eflags = kEFDeferMask; /* defer file block allocations */
352 #if HFS_SPARSE_DEV
353 /*
354 * When the underlying device is sparse and space
355 * is low (< 8MB), stop doing delayed allocations
356 * and begin doing synchronous I/O.
357 */
358 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
359 (hfs_freeblks(hfsmp, 0) < 2048)) {
360 eflags &= ~kEFDeferMask;
361 ioflag |= IO_SYNC;
362 }
363 #endif /* HFS_SPARSE_DEV */
364
365 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
366 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
367 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
368 }
369
370 again:
371 /* Protect against a size change. */
372 /*
373 * Protect against a size change.
374 *
375 * Note: If took_truncate_lock is true, then we previously got the lock shared
376 * but needed to upgrade to exclusive. So try getting it exclusive from the
377 * start.
378 */
379 if (ioflag & IO_APPEND || took_truncate_lock) {
380 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
381 }
382 else {
383 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
384 }
385 took_truncate_lock = 1;
386
387 /* Update UIO */
388 if (ioflag & IO_APPEND) {
389 uio_setoffset(uio, fp->ff_size);
390 offset = fp->ff_size;
391 }
392 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
393 retval = EPERM;
394 goto exit;
395 }
396
397 origFileSize = fp->ff_size;
398 writelimit = offset + resid;
399 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
400
401 /*
402 * We may need an exclusive truncate lock for several reasons, all
403 * of which are because we may be writing to a (portion of a) block
404 * for the first time, and we need to make sure no readers see the
405 * prior, uninitialized contents of the block. The cases are:
406 *
407 * 1. We have unallocated (delayed allocation) blocks. We may be
408 * allocating new blocks to the file and writing to them.
409 * (A more precise check would be whether the range we're writing
410 * to contains delayed allocation blocks.)
411 * 2. We need to extend the file. The bytes between the old EOF
412 * and the new EOF are not yet initialized. This is important
413 * even if we're not allocating new blocks to the file. If the
414 * old EOF and new EOF are in the same block, we still need to
415 * protect that range of bytes until they are written for the
416 * first time.
417 * 3. The write overlaps some invalid ranges (delayed zero fill; that
418 * part of the file has been allocated, but not yet written).
419 *
420 * If we had a shared lock with the above cases, we need to try to upgrade
421 * to an exclusive lock. If the upgrade fails, we will lose the shared
422 * lock, and will need to take the truncate lock again; the took_truncate_lock
423 * flag will still be set, causing us to try for an exclusive lock next time.
424 *
425 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
426 * lock is held, since it protects the range lists.
427 */
428 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
429 ((fp->ff_unallocblocks != 0) ||
430 (writelimit > origFileSize))) {
431 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
432 /*
433 * Lock upgrade failed and we lost our shared lock, try again.
434 * Note: we do not set took_truncate_lock=0 here. Leaving it
435 * set to 1 will cause us to try to get the lock exclusive.
436 */
437 goto again;
438 }
439 else {
440 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
441 cp->c_truncatelockowner = current_thread();
442 }
443 }
444
445 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
446 goto exit;
447 }
448 cnode_locked = 1;
449
450 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
451 hfs_incr_gencount (cp);
452 }
453
454 /*
455 * Now that we have the cnode lock, see if there are delayed zero fill ranges
456 * overlapping our write. If so, we need the truncate lock exclusive (see above).
457 */
458 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
459 (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
460 /*
461 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
462 * a deadlock, rather than simply returning failure. (That is, it apparently does
463 * not behave like a "try_lock"). Since this condition is rare, just drop the
464 * cnode lock and try again. Since took_truncate_lock is set, we will
465 * automatically take the truncate lock exclusive.
466 */
467 hfs_unlock(cp);
468 cnode_locked = 0;
469 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
470 goto again;
471 }
472
473 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
474 (int)offset, uio_resid(uio), (int)fp->ff_size,
475 (int)filebytes, 0);
476
477 /* Check if we do not need to extend the file */
478 if (writelimit <= filebytes) {
479 goto sizeok;
480 }
481
482 cred = vfs_context_ucred(ap->a_context);
483 bytesToAdd = writelimit - filebytes;
484
485 #if QUOTA
486 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
487 cred, 0);
488 if (retval)
489 goto exit;
490 #endif /* QUOTA */
491
492 if (hfs_start_transaction(hfsmp) != 0) {
493 retval = EINVAL;
494 goto exit;
495 }
496
497 while (writelimit > filebytes) {
498 bytesToAdd = writelimit - filebytes;
499 if (cred && suser(cred, NULL) != 0)
500 eflags |= kEFReserveMask;
501
502 /* Protect extents b-tree and allocation bitmap */
503 lockflags = SFL_BITMAP;
504 if (overflow_extents(fp))
505 lockflags |= SFL_EXTENTS;
506 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
507
508 /* Files that are changing size are not hot file candidates. */
509 if (hfsmp->hfc_stage == HFC_RECORDING) {
510 fp->ff_bytesread = 0;
511 }
512 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
513 0, eflags, &actualBytesAdded));
514
515 hfs_systemfile_unlock(hfsmp, lockflags);
516
517 if ((actualBytesAdded == 0) && (retval == E_NONE))
518 retval = ENOSPC;
519 if (retval != E_NONE)
520 break;
521 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
522 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
523 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
524 }
525 (void) hfs_update(vp, TRUE);
526 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
527 (void) hfs_end_transaction(hfsmp);
528
529 /*
530 * If we didn't grow the file enough try a partial write.
531 * POSIX expects this behavior.
532 */
533 if ((retval == ENOSPC) && (filebytes > offset)) {
534 retval = 0;
535 partialwrite = 1;
536 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
537 resid -= bytesToAdd;
538 writelimit = filebytes;
539 }
540 sizeok:
541 if (retval == E_NONE) {
542 off_t filesize;
543 off_t zero_off;
544 off_t tail_off;
545 off_t inval_start;
546 off_t inval_end;
547 off_t io_start;
548 int lflag;
549
550 if (writelimit > fp->ff_size)
551 filesize = writelimit;
552 else
553 filesize = fp->ff_size;
554
555 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
556
557 if (offset <= fp->ff_size) {
558 zero_off = offset & ~PAGE_MASK_64;
559
560 /* Check to see whether the area between the zero_offset and the start
561 of the transfer to see whether is invalid and should be zero-filled
562 as part of the transfer:
563 */
564 if (offset > zero_off) {
565 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
566 lflag |= IO_HEADZEROFILL;
567 }
568 } else {
569 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
570
571 /* The bytes between fp->ff_size and uio->uio_offset must never be
572 read without being zeroed. The current last block is filled with zeroes
573 if it holds valid data but in all cases merely do a little bookkeeping
574 to track the area from the end of the current last page to the start of
575 the area actually written. For the same reason only the bytes up to the
576 start of the page where this write will start is invalidated; any remainder
577 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
578
579 Note that inval_start, the start of the page after the current EOF,
580 may be past the start of the write, in which case the zeroing
581 will be handled by the cluser_write of the actual data.
582 */
583 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
584 inval_end = offset & ~PAGE_MASK_64;
585 zero_off = fp->ff_size;
586
587 if ((fp->ff_size & PAGE_MASK_64) &&
588 (rl_scan(&fp->ff_invalidranges,
589 eof_page_base,
590 fp->ff_size - 1,
591 &invalid_range) != RL_NOOVERLAP)) {
592 /* The page containing the EOF is not valid, so the
593 entire page must be made inaccessible now. If the write
594 starts on a page beyond the page containing the eof
595 (inval_end > eof_page_base), add the
596 whole page to the range to be invalidated. Otherwise
597 (i.e. if the write starts on the same page), zero-fill
598 the entire page explicitly now:
599 */
600 if (inval_end > eof_page_base) {
601 inval_start = eof_page_base;
602 } else {
603 zero_off = eof_page_base;
604 };
605 };
606
607 if (inval_start < inval_end) {
608 struct timeval tv;
609 /* There's some range of data that's going to be marked invalid */
610
611 if (zero_off < inval_start) {
612 /* The pages between inval_start and inval_end are going to be invalidated,
613 and the actual write will start on a page past inval_end. Now's the last
614 chance to zero-fill the page containing the EOF:
615 */
616 hfs_unlock(cp);
617 cnode_locked = 0;
618 retval = cluster_write(vp, (uio_t) 0,
619 fp->ff_size, inval_start,
620 zero_off, (off_t)0,
621 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
622 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
623 cnode_locked = 1;
624 if (retval) goto ioerr_exit;
625 offset = uio_offset(uio);
626 };
627
628 /* Mark the remaining area of the newly allocated space as invalid: */
629 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
630 microuptime(&tv);
631 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
632 zero_off = fp->ff_size = inval_end;
633 };
634
635 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
636 };
637
638 /* Check to see whether the area between the end of the write and the end of
639 the page it falls in is invalid and should be zero-filled as part of the transfer:
640 */
641 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
642 if (tail_off > filesize) tail_off = filesize;
643 if (tail_off > writelimit) {
644 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
645 lflag |= IO_TAILZEROFILL;
646 };
647 };
648
649 /*
650 * if the write starts beyond the current EOF (possibly advanced in the
651 * zeroing of the last block, above), then we'll zero fill from the current EOF
652 * to where the write begins:
653 *
654 * NOTE: If (and ONLY if) the portion of the file about to be written is
655 * before the current EOF it might be marked as invalid now and must be
656 * made readable (removed from the invalid ranges) before cluster_write
657 * tries to write it:
658 */
659 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
660 if (io_start < fp->ff_size) {
661 off_t io_end;
662
663 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
664 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
665 };
666
667 hfs_unlock(cp);
668 cnode_locked = 0;
669
670 /*
671 * We need to tell UBC the fork's new size BEFORE calling
672 * cluster_write, in case any of the new pages need to be
673 * paged out before cluster_write completes (which does happen
674 * in embedded systems due to extreme memory pressure).
675 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
676 * will be, so that it can pass that on to cluster_pageout, and
677 * allow those pageouts.
678 *
679 * We don't update ff_size yet since we don't want pageins to
680 * be able to see uninitialized data between the old and new
681 * EOF, until cluster_write has completed and initialized that
682 * part of the file.
683 *
684 * The vnode pager relies on the file size last given to UBC via
685 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
686 * ff_size (whichever is larger). NOTE: ff_new_size is always
687 * zero, unless we are extending the file via write.
688 */
689 if (filesize > fp->ff_size) {
690 fp->ff_new_size = filesize;
691 ubc_setsize(vp, filesize);
692 }
693 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
694 tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle);
695 if (retval) {
696 fp->ff_new_size = 0; /* no longer extending; use ff_size */
697
698 if (retval == EAGAIN) {
699 /*
700 * EAGAIN indicates that we still have I/O to do, but
701 * that we now need to be throttled
702 */
703 if (resid != uio_resid(uio)) {
704 /*
705 * did manage to do some I/O before returning EAGAIN
706 */
707 resid = uio_resid(uio);
708 offset = uio_offset(uio);
709
710 cp->c_touch_chgtime = TRUE;
711 cp->c_touch_modtime = TRUE;
712 }
713 if (filesize > fp->ff_size) {
714 /*
715 * we called ubc_setsize before the call to
716 * cluster_write... since we only partially
717 * completed the I/O, we need to
718 * re-adjust our idea of the filesize based
719 * on our interim EOF
720 */
721 ubc_setsize(vp, offset);
722
723 fp->ff_size = offset;
724 }
725 goto exit;
726 }
727 if (filesize > origFileSize) {
728 ubc_setsize(vp, origFileSize);
729 }
730 goto ioerr_exit;
731 }
732
733 if (filesize > origFileSize) {
734 fp->ff_size = filesize;
735
736 /* Files that are changing size are not hot file candidates. */
737 if (hfsmp->hfc_stage == HFC_RECORDING) {
738 fp->ff_bytesread = 0;
739 }
740 }
741 fp->ff_new_size = 0; /* ff_size now has the correct size */
742
743 /* If we wrote some bytes, then touch the change and mod times */
744 if (resid > uio_resid(uio)) {
745 cp->c_touch_chgtime = TRUE;
746 cp->c_touch_modtime = TRUE;
747 }
748 }
749 if (partialwrite) {
750 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
751 resid += bytesToAdd;
752 }
753
754 // XXXdbg - see radar 4871353 for more info
755 {
756 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
757 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
758 }
759 }
760
761 ioerr_exit:
762 /*
763 * If we successfully wrote any data, and we are not the superuser
764 * we clear the setuid and setgid bits as a precaution against
765 * tampering.
766 */
767 if (cp->c_mode & (S_ISUID | S_ISGID)) {
768 cred = vfs_context_ucred(ap->a_context);
769 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
770 if (!cnode_locked) {
771 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
772 cnode_locked = 1;
773 }
774 cp->c_mode &= ~(S_ISUID | S_ISGID);
775 }
776 }
777 if (retval) {
778 if (ioflag & IO_UNIT) {
779 if (!cnode_locked) {
780 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
781 cnode_locked = 1;
782 }
783 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
784 0, 0, ap->a_context);
785 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
786 uio_setresid(uio, resid);
787 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
788 }
789 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
790 if (!cnode_locked) {
791 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
792 cnode_locked = 1;
793 }
794 retval = hfs_update(vp, TRUE);
795 }
796 /* Updating vcbWrCnt doesn't need to be atomic. */
797 hfsmp->vcbWrCnt++;
798
799 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
800 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
801 exit:
802 if (cnode_locked)
803 hfs_unlock(cp);
804
805 if (took_truncate_lock) {
806 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
807 }
808 if (retval == EAGAIN) {
809 throttle_lowpri_io(1);
810
811 retval = 0;
812 goto again;
813 }
814 return (retval);
815 }
816
817 /* support for the "bulk-access" fcntl */
818
819 #define CACHE_LEVELS 16
820 #define NUM_CACHE_ENTRIES (64*16)
821 #define PARENT_IDS_FLAG 0x100
822
823 struct access_cache {
824 int numcached;
825 int cachehits; /* these two for statistics gathering */
826 int lookups;
827 unsigned int *acache;
828 unsigned char *haveaccess;
829 };
830
831 struct access_t {
832 uid_t uid; /* IN: effective user id */
833 short flags; /* IN: access requested (i.e. R_OK) */
834 short num_groups; /* IN: number of groups user belongs to */
835 int num_files; /* IN: number of files to process */
836 int *file_ids; /* IN: array of file ids */
837 gid_t *groups; /* IN: array of groups */
838 short *access; /* OUT: access info for each file (0 for 'has access') */
839 } __attribute__((unavailable)); // this structure is for reference purposes only
840
841 struct user32_access_t {
842 uid_t uid; /* IN: effective user id */
843 short flags; /* IN: access requested (i.e. R_OK) */
844 short num_groups; /* IN: number of groups user belongs to */
845 int num_files; /* IN: number of files to process */
846 user32_addr_t file_ids; /* IN: array of file ids */
847 user32_addr_t groups; /* IN: array of groups */
848 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
849 };
850
851 struct user64_access_t {
852 uid_t uid; /* IN: effective user id */
853 short flags; /* IN: access requested (i.e. R_OK) */
854 short num_groups; /* IN: number of groups user belongs to */
855 int num_files; /* IN: number of files to process */
856 user64_addr_t file_ids; /* IN: array of file ids */
857 user64_addr_t groups; /* IN: array of groups */
858 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
859 };
860
861
862 // these are the "extended" versions of the above structures
863 // note that it is crucial that they be different sized than
864 // the regular version
865 struct ext_access_t {
866 uint32_t flags; /* IN: access requested (i.e. R_OK) */
867 uint32_t num_files; /* IN: number of files to process */
868 uint32_t map_size; /* IN: size of the bit map */
869 uint32_t *file_ids; /* IN: Array of file ids */
870 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
871 short *access; /* OUT: access info for each file (0 for 'has access') */
872 uint32_t num_parents; /* future use */
873 cnid_t *parents; /* future use */
874 } __attribute__((unavailable)); // this structure is for reference purposes only
875
876 struct user32_ext_access_t {
877 uint32_t flags; /* IN: access requested (i.e. R_OK) */
878 uint32_t num_files; /* IN: number of files to process */
879 uint32_t map_size; /* IN: size of the bit map */
880 user32_addr_t file_ids; /* IN: Array of file ids */
881 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
882 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
883 uint32_t num_parents; /* future use */
884 user32_addr_t parents; /* future use */
885 };
886
887 struct user64_ext_access_t {
888 uint32_t flags; /* IN: access requested (i.e. R_OK) */
889 uint32_t num_files; /* IN: number of files to process */
890 uint32_t map_size; /* IN: size of the bit map */
891 user64_addr_t file_ids; /* IN: array of file ids */
892 user64_addr_t bitmap; /* IN: array of groups */
893 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
894 uint32_t num_parents;/* future use */
895 user64_addr_t parents;/* future use */
896 };
897
898
899 /*
900 * Perform a binary search for the given parent_id. Return value is
901 * the index if there is a match. If no_match_indexp is non-NULL it
902 * will be assigned with the index to insert the item (even if it was
903 * not found).
904 */
905 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
906 {
907 int index=-1;
908 unsigned int lo=0;
909
910 do {
911 unsigned int mid = ((hi - lo)/2) + lo;
912 unsigned int this_id = array[mid];
913
914 if (parent_id == this_id) {
915 hi = mid;
916 break;
917 }
918
919 if (parent_id < this_id) {
920 hi = mid;
921 continue;
922 }
923
924 if (parent_id > this_id) {
925 lo = mid + 1;
926 continue;
927 }
928 } while(lo < hi);
929
930 /* check if lo and hi converged on the match */
931 if (parent_id == array[hi]) {
932 index = hi;
933 }
934
935 if (no_match_indexp) {
936 *no_match_indexp = hi;
937 }
938
939 return index;
940 }
941
942
943 static int
944 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
945 {
946 unsigned int hi;
947 int matches = 0;
948 int index, no_match_index;
949
950 if (cache->numcached == 0) {
951 *indexp = 0;
952 return 0; // table is empty, so insert at index=0 and report no match
953 }
954
955 if (cache->numcached > NUM_CACHE_ENTRIES) {
956 cache->numcached = NUM_CACHE_ENTRIES;
957 }
958
959 hi = cache->numcached - 1;
960
961 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
962
963 /* if no existing entry found, find index for new one */
964 if (index == -1) {
965 index = no_match_index;
966 matches = 0;
967 } else {
968 matches = 1;
969 }
970
971 *indexp = index;
972 return matches;
973 }
974
975 /*
976 * Add a node to the access_cache at the given index (or do a lookup first
977 * to find the index if -1 is passed in). We currently do a replace rather
978 * than an insert if the cache is full.
979 */
980 static void
981 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
982 {
983 int lookup_index = -1;
984
985 /* need to do a lookup first if -1 passed for index */
986 if (index == -1) {
987 if (lookup_bucket(cache, &lookup_index, nodeID)) {
988 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
989 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
990 cache->haveaccess[lookup_index] = access;
991 }
992
993 /* mission accomplished */
994 return;
995 } else {
996 index = lookup_index;
997 }
998
999 }
1000
1001 /* if the cache is full, do a replace rather than an insert */
1002 if (cache->numcached >= NUM_CACHE_ENTRIES) {
1003 cache->numcached = NUM_CACHE_ENTRIES-1;
1004
1005 if (index > cache->numcached) {
1006 index = cache->numcached;
1007 }
1008 }
1009
1010 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
1011 index++;
1012 }
1013
1014 if (index >= 0 && index < cache->numcached) {
1015 /* only do bcopy if we're inserting */
1016 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
1017 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
1018 }
1019
1020 cache->acache[index] = nodeID;
1021 cache->haveaccess[index] = access;
1022 cache->numcached++;
1023 }
1024
1025
1026 struct cinfo {
1027 uid_t uid;
1028 gid_t gid;
1029 mode_t mode;
1030 cnid_t parentcnid;
1031 u_int16_t recflags;
1032 };
1033
1034 static int
1035 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
1036 {
1037 struct cinfo *cip = (struct cinfo *)arg;
1038
1039 cip->uid = attrp->ca_uid;
1040 cip->gid = attrp->ca_gid;
1041 cip->mode = attrp->ca_mode;
1042 cip->parentcnid = descp->cd_parentcnid;
1043 cip->recflags = attrp->ca_recflags;
1044
1045 return (0);
1046 }
1047
1048 /*
1049 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1050 * isn't incore, then go to the catalog.
1051 */
1052 static int
1053 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
1054 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
1055 {
1056 int error = 0;
1057
1058 /* if this id matches the one the fsctl was called with, skip the lookup */
1059 if (cnid == skip_cp->c_cnid) {
1060 cnattrp->ca_uid = skip_cp->c_uid;
1061 cnattrp->ca_gid = skip_cp->c_gid;
1062 cnattrp->ca_mode = skip_cp->c_mode;
1063 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1064 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1065 } else {
1066 struct cinfo c_info;
1067
1068 /* otherwise, check the cnode hash incase the file/dir is incore */
1069 if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) {
1070 cnattrp->ca_uid = c_info.uid;
1071 cnattrp->ca_gid = c_info.gid;
1072 cnattrp->ca_mode = c_info.mode;
1073 cnattrp->ca_recflags = c_info.recflags;
1074 keyp->hfsPlus.parentID = c_info.parentcnid;
1075 } else {
1076 int lockflags;
1077
1078 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1079 throttle_lowpri_io(1);
1080
1081 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1082
1083 /* lookup this cnid in the catalog */
1084 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
1085
1086 hfs_systemfile_unlock(hfsmp, lockflags);
1087
1088 cache->lookups++;
1089 }
1090 }
1091
1092 return (error);
1093 }
1094
1095
1096 /*
1097 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1098 * up to CACHE_LEVELS as we progress towards the root.
1099 */
1100 static int
1101 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
1102 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
1103 struct vfs_context *my_context,
1104 char *bitmap,
1105 uint32_t map_size,
1106 cnid_t* parents,
1107 uint32_t num_parents)
1108 {
1109 int myErr = 0;
1110 int myResult;
1111 HFSCatalogNodeID thisNodeID;
1112 unsigned int myPerms;
1113 struct cat_attr cnattr;
1114 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1115 CatalogKey catkey;
1116
1117 int i = 0, ids_to_cache = 0;
1118 int parent_ids[CACHE_LEVELS];
1119
1120 thisNodeID = nodeID;
1121 while (thisNodeID >= kRootDirID) {
1122 myResult = 0; /* default to "no access" */
1123
1124 /* check the cache before resorting to hitting the catalog */
1125
1126 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1127 * to look any further after hitting cached dir */
1128
1129 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1130 cache->cachehits++;
1131 myErr = cache->haveaccess[cache_index];
1132 if (scope_index != -1) {
1133 if (myErr == ESRCH) {
1134 myErr = 0;
1135 }
1136 } else {
1137 scope_index = 0; // so we'll just use the cache result
1138 scope_idx_start = ids_to_cache;
1139 }
1140 myResult = (myErr == 0) ? 1 : 0;
1141 goto ExitThisRoutine;
1142 }
1143
1144
1145 if (parents) {
1146 int tmp;
1147 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1148 if (scope_index == -1)
1149 scope_index = tmp;
1150 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1151 scope_idx_start = ids_to_cache;
1152 }
1153 }
1154
1155 /* remember which parents we want to cache */
1156 if (ids_to_cache < CACHE_LEVELS) {
1157 parent_ids[ids_to_cache] = thisNodeID;
1158 ids_to_cache++;
1159 }
1160 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1161 if (bitmap && map_size) {
1162 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1163 }
1164
1165
1166 /* do the lookup (checks the cnode hash, then the catalog) */
1167 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
1168 if (myErr) {
1169 goto ExitThisRoutine; /* no access */
1170 }
1171
1172 /* Root always gets access. */
1173 if (suser(myp_ucred, NULL) == 0) {
1174 thisNodeID = catkey.hfsPlus.parentID;
1175 myResult = 1;
1176 continue;
1177 }
1178
1179 // if the thing has acl's, do the full permission check
1180 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1181 struct vnode *vp;
1182
1183 /* get the vnode for this cnid */
1184 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
1185 if ( myErr ) {
1186 myResult = 0;
1187 goto ExitThisRoutine;
1188 }
1189
1190 thisNodeID = VTOC(vp)->c_parentcnid;
1191
1192 hfs_unlock(VTOC(vp));
1193
1194 if (vnode_vtype(vp) == VDIR) {
1195 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1196 } else {
1197 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1198 }
1199
1200 vnode_put(vp);
1201 if (myErr) {
1202 myResult = 0;
1203 goto ExitThisRoutine;
1204 }
1205 } else {
1206 unsigned int flags;
1207 int mode = cnattr.ca_mode & S_IFMT;
1208 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
1209
1210 if (mode == S_IFDIR) {
1211 flags = R_OK | X_OK;
1212 } else {
1213 flags = R_OK;
1214 }
1215 if ( (myPerms & flags) != flags) {
1216 myResult = 0;
1217 myErr = EACCES;
1218 goto ExitThisRoutine; /* no access */
1219 }
1220
1221 /* up the hierarchy we go */
1222 thisNodeID = catkey.hfsPlus.parentID;
1223 }
1224 }
1225
1226 /* if here, we have access to this node */
1227 myResult = 1;
1228
1229 ExitThisRoutine:
1230 if (parents && myErr == 0 && scope_index == -1) {
1231 myErr = ESRCH;
1232 }
1233
1234 if (myErr) {
1235 myResult = 0;
1236 }
1237 *err = myErr;
1238
1239 /* cache the parent directory(ies) */
1240 for (i = 0; i < ids_to_cache; i++) {
1241 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1242 add_node(cache, -1, parent_ids[i], ESRCH);
1243 } else {
1244 add_node(cache, -1, parent_ids[i], myErr);
1245 }
1246 }
1247
1248 return (myResult);
1249 }
1250
1251 static int
1252 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1253 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1254 {
1255 boolean_t is64bit;
1256
1257 /*
1258 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1259 * happens to be in our list of file_ids, we'll note it
1260 * avoid calling hfs_chashget_nowait() on that id as that
1261 * will cause a "locking against myself" panic.
1262 */
1263 Boolean check_leaf = true;
1264
1265 struct user64_ext_access_t *user_access_structp;
1266 struct user64_ext_access_t tmp_user_access;
1267 struct access_cache cache;
1268
1269 int error = 0, prev_parent_check_ok=1;
1270 unsigned int i;
1271
1272 short flags;
1273 unsigned int num_files = 0;
1274 int map_size = 0;
1275 int num_parents = 0;
1276 int *file_ids=NULL;
1277 short *access=NULL;
1278 char *bitmap=NULL;
1279 cnid_t *parents=NULL;
1280 int leaf_index;
1281
1282 cnid_t cnid;
1283 cnid_t prevParent_cnid = 0;
1284 unsigned int myPerms;
1285 short myaccess = 0;
1286 struct cat_attr cnattr;
1287 CatalogKey catkey;
1288 struct cnode *skip_cp = VTOC(vp);
1289 kauth_cred_t cred = vfs_context_ucred(context);
1290 proc_t p = vfs_context_proc(context);
1291
1292 is64bit = proc_is64bit(p);
1293
1294 /* initialize the local cache and buffers */
1295 cache.numcached = 0;
1296 cache.cachehits = 0;
1297 cache.lookups = 0;
1298 cache.acache = NULL;
1299 cache.haveaccess = NULL;
1300
1301 /* struct copyin done during dispatch... need to copy file_id array separately */
1302 if (ap->a_data == NULL) {
1303 error = EINVAL;
1304 goto err_exit_bulk_access;
1305 }
1306
1307 if (is64bit) {
1308 if (arg_size != sizeof(struct user64_ext_access_t)) {
1309 error = EINVAL;
1310 goto err_exit_bulk_access;
1311 }
1312
1313 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1314
1315 } else if (arg_size == sizeof(struct user32_access_t)) {
1316 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1317
1318 // convert an old style bulk-access struct to the new style
1319 tmp_user_access.flags = accessp->flags;
1320 tmp_user_access.num_files = accessp->num_files;
1321 tmp_user_access.map_size = 0;
1322 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1323 tmp_user_access.bitmap = USER_ADDR_NULL;
1324 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1325 tmp_user_access.num_parents = 0;
1326 user_access_structp = &tmp_user_access;
1327
1328 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1329 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1330
1331 // up-cast from a 32-bit version of the struct
1332 tmp_user_access.flags = accessp->flags;
1333 tmp_user_access.num_files = accessp->num_files;
1334 tmp_user_access.map_size = accessp->map_size;
1335 tmp_user_access.num_parents = accessp->num_parents;
1336
1337 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1338 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1339 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1340 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1341
1342 user_access_structp = &tmp_user_access;
1343 } else {
1344 error = EINVAL;
1345 goto err_exit_bulk_access;
1346 }
1347
1348 map_size = user_access_structp->map_size;
1349
1350 num_files = user_access_structp->num_files;
1351
1352 num_parents= user_access_structp->num_parents;
1353
1354 if (num_files < 1) {
1355 goto err_exit_bulk_access;
1356 }
1357 if (num_files > 1024) {
1358 error = EINVAL;
1359 goto err_exit_bulk_access;
1360 }
1361
1362 if (num_parents > 1024) {
1363 error = EINVAL;
1364 goto err_exit_bulk_access;
1365 }
1366
1367 file_ids = (int *) kalloc(sizeof(int) * num_files);
1368 access = (short *) kalloc(sizeof(short) * num_files);
1369 if (map_size) {
1370 bitmap = (char *) kalloc(sizeof(char) * map_size);
1371 }
1372
1373 if (num_parents) {
1374 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1375 }
1376
1377 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1378 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1379
1380 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1381 if (file_ids) {
1382 kfree(file_ids, sizeof(int) * num_files);
1383 }
1384 if (bitmap) {
1385 kfree(bitmap, sizeof(char) * map_size);
1386 }
1387 if (access) {
1388 kfree(access, sizeof(short) * num_files);
1389 }
1390 if (cache.acache) {
1391 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1392 }
1393 if (cache.haveaccess) {
1394 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1395 }
1396 if (parents) {
1397 kfree(parents, sizeof(cnid_t) * num_parents);
1398 }
1399 return ENOMEM;
1400 }
1401
1402 // make sure the bitmap is zero'ed out...
1403 if (bitmap) {
1404 bzero(bitmap, (sizeof(char) * map_size));
1405 }
1406
1407 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1408 num_files * sizeof(int)))) {
1409 goto err_exit_bulk_access;
1410 }
1411
1412 if (num_parents) {
1413 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1414 num_parents * sizeof(cnid_t)))) {
1415 goto err_exit_bulk_access;
1416 }
1417 }
1418
1419 flags = user_access_structp->flags;
1420 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1421 flags = R_OK;
1422 }
1423
1424 /* check if we've been passed leaf node ids or parent ids */
1425 if (flags & PARENT_IDS_FLAG) {
1426 check_leaf = false;
1427 }
1428
1429 /* Check access to each file_id passed in */
1430 for (i = 0; i < num_files; i++) {
1431 leaf_index=-1;
1432 cnid = (cnid_t) file_ids[i];
1433
1434 /* root always has access */
1435 if ((!parents) && (!suser(cred, NULL))) {
1436 access[i] = 0;
1437 continue;
1438 }
1439
1440 if (check_leaf) {
1441 /* do the lookup (checks the cnode hash, then the catalog) */
1442 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1443 if (error) {
1444 access[i] = (short) error;
1445 continue;
1446 }
1447
1448 if (parents) {
1449 // Check if the leaf matches one of the parent scopes
1450 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1451 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1452 prev_parent_check_ok = 0;
1453 else if (leaf_index >= 0)
1454 prev_parent_check_ok = 1;
1455 }
1456
1457 // if the thing has acl's, do the full permission check
1458 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1459 struct vnode *cvp;
1460 int myErr = 0;
1461 /* get the vnode for this cnid */
1462 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
1463 if ( myErr ) {
1464 access[i] = myErr;
1465 continue;
1466 }
1467
1468 hfs_unlock(VTOC(cvp));
1469
1470 if (vnode_vtype(cvp) == VDIR) {
1471 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1472 } else {
1473 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1474 }
1475
1476 vnode_put(cvp);
1477 if (myErr) {
1478 access[i] = myErr;
1479 continue;
1480 }
1481 } else {
1482 /* before calling CheckAccess(), check the target file for read access */
1483 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1484 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1485
1486 /* fail fast if no access */
1487 if ((myPerms & flags) == 0) {
1488 access[i] = EACCES;
1489 continue;
1490 }
1491 }
1492 } else {
1493 /* we were passed an array of parent ids */
1494 catkey.hfsPlus.parentID = cnid;
1495 }
1496
1497 /* if the last guy had the same parent and had access, we're done */
1498 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1499 cache.cachehits++;
1500 access[i] = 0;
1501 continue;
1502 }
1503
1504 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1505 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1506
1507 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1508 access[i] = 0; // have access.. no errors to report
1509 } else {
1510 access[i] = (error != 0 ? (short) error : EACCES);
1511 }
1512
1513 prevParent_cnid = catkey.hfsPlus.parentID;
1514 }
1515
1516 /* copyout the access array */
1517 if ((error = copyout((caddr_t)access, user_access_structp->access,
1518 num_files * sizeof (short)))) {
1519 goto err_exit_bulk_access;
1520 }
1521 if (map_size && bitmap) {
1522 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1523 map_size * sizeof (char)))) {
1524 goto err_exit_bulk_access;
1525 }
1526 }
1527
1528
1529 err_exit_bulk_access:
1530
1531 if (file_ids)
1532 kfree(file_ids, sizeof(int) * num_files);
1533 if (parents)
1534 kfree(parents, sizeof(cnid_t) * num_parents);
1535 if (bitmap)
1536 kfree(bitmap, sizeof(char) * map_size);
1537 if (access)
1538 kfree(access, sizeof(short) * num_files);
1539 if (cache.acache)
1540 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1541 if (cache.haveaccess)
1542 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1543
1544 return (error);
1545 }
1546
1547
1548 /* end "bulk-access" support */
1549
1550
1551 /*
1552 * Callback for use with freeze ioctl.
1553 */
1554 static int
1555 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
1556 {
1557 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1558
1559 return 0;
1560 }
1561
1562 /*
1563 * Control filesystem operating characteristics.
1564 */
1565 int
1566 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1567 vnode_t a_vp;
1568 int a_command;
1569 caddr_t a_data;
1570 int a_fflag;
1571 vfs_context_t a_context;
1572 } */ *ap)
1573 {
1574 struct vnode * vp = ap->a_vp;
1575 struct hfsmount *hfsmp = VTOHFS(vp);
1576 vfs_context_t context = ap->a_context;
1577 kauth_cred_t cred = vfs_context_ucred(context);
1578 proc_t p = vfs_context_proc(context);
1579 struct vfsstatfs *vfsp;
1580 boolean_t is64bit;
1581 off_t jnl_start, jnl_size;
1582 struct hfs_journal_info *jip;
1583 #if HFS_COMPRESSION
1584 int compressed = 0;
1585 off_t uncompressed_size = -1;
1586 int decmpfs_error = 0;
1587
1588 if (ap->a_command == F_RDADVISE) {
1589 /* we need to inspect the decmpfs state of the file as early as possible */
1590 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1591 if (compressed) {
1592 if (VNODE_IS_RSRC(vp)) {
1593 /* if this is the resource fork, treat it as if it were empty */
1594 uncompressed_size = 0;
1595 } else {
1596 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1597 if (decmpfs_error != 0) {
1598 /* failed to get the uncompressed size, we'll check for this later */
1599 uncompressed_size = -1;
1600 }
1601 }
1602 }
1603 }
1604 #endif /* HFS_COMPRESSION */
1605
1606 is64bit = proc_is64bit(p);
1607
1608 #if CONFIG_PROTECT
1609 {
1610 int error = 0;
1611 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
1612 return error;
1613 }
1614 }
1615 #endif /* CONFIG_PROTECT */
1616
1617 switch (ap->a_command) {
1618
1619 case HFS_GETPATH:
1620 {
1621 struct vnode *file_vp;
1622 cnid_t cnid;
1623 int outlen;
1624 char *bufptr;
1625 int error;
1626 int flags = 0;
1627
1628 /* Caller must be owner of file system. */
1629 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1630 if (suser(cred, NULL) &&
1631 kauth_cred_getuid(cred) != vfsp->f_owner) {
1632 return (EACCES);
1633 }
1634 /* Target vnode must be file system's root. */
1635 if (!vnode_isvroot(vp)) {
1636 return (EINVAL);
1637 }
1638 bufptr = (char *)ap->a_data;
1639 cnid = strtoul(bufptr, NULL, 10);
1640 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1641 flags |= BUILDPATH_VOLUME_RELATIVE;
1642 }
1643
1644 /* We need to call hfs_vfs_vget to leverage the code that will
1645 * fix the origin list for us if needed, as opposed to calling
1646 * hfs_vget, since we will need the parent for build_path call.
1647 */
1648
1649 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1650 return (error);
1651 }
1652 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
1653 vnode_put(file_vp);
1654
1655 return (error);
1656 }
1657
1658 case HFS_GET_WRITE_GEN_COUNTER:
1659 {
1660 struct cnode *cp = NULL;
1661 int error;
1662 u_int32_t *counter = (u_int32_t *)ap->a_data;
1663
1664 cp = VTOC(vp);
1665
1666 if (!vnode_isdir(vp) && !(vnode_isreg(vp)) &&
1667 !(vnode_islnk(vp))) {
1668 error = EBADF;
1669 *counter = 0;
1670 return error;
1671 }
1672
1673 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1674 if (error == 0) {
1675 struct ubc_info *uip;
1676 int is_mapped_writable = 0;
1677
1678 if (UBCINFOEXISTS(vp)) {
1679 uip = vp->v_ubcinfo;
1680 if ((uip->ui_flags & UI_ISMAPPED) && (uip->ui_flags & UI_MAPPEDWRITE)) {
1681 is_mapped_writable = 1;
1682 }
1683 }
1684
1685
1686 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1687 uint32_t gcount = hfs_get_gencount(cp);
1688 //
1689 // Even though we return EBUSY for files that are mmap'ed
1690 // we also want to bump the value so that the write-gen
1691 // counter will always be different once the file is unmapped
1692 // (since the file may be unmapped but the pageouts have not
1693 // yet happened).
1694 //
1695 if (is_mapped_writable) {
1696 hfs_incr_gencount (cp);
1697 gcount = hfs_get_gencount(cp);
1698 }
1699
1700 *counter = gcount;
1701 } else if (S_ISDIR(cp->c_attr.ca_mode)) {
1702 *counter = hfs_get_gencount(cp);
1703 } else {
1704 /* not a file or dir? silently return */
1705 *counter = 0;
1706 }
1707 hfs_unlock (cp);
1708
1709 if (is_mapped_writable) {
1710 error = EBUSY;
1711 }
1712 }
1713
1714 return error;
1715 }
1716
1717 case HFS_GET_DOCUMENT_ID:
1718 {
1719 struct cnode *cp = NULL;
1720 int error=0;
1721 u_int32_t *document_id = (u_int32_t *)ap->a_data;
1722
1723 cp = VTOC(vp);
1724
1725 if (cp->c_desc.cd_cnid == kHFSRootFolderID) {
1726 // the root-dir always has document id '2' (aka kHFSRootFolderID)
1727 *document_id = kHFSRootFolderID;
1728
1729 } else if ((S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode))) {
1730 int mark_it = 0;
1731 uint32_t tmp_doc_id;
1732
1733 //
1734 // we can use the FndrExtendedFileInfo because the doc-id is the first
1735 // thing in both it and the FndrExtendedDirInfo struct which is fixed
1736 // in format and can not change layout
1737 //
1738 struct FndrExtendedFileInfo *extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
1739
1740 hfs_lock(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
1741
1742 //
1743 // if the cnode isn't UF_TRACKED and the doc-id-allocate flag isn't set
1744 // then just return a zero for the doc-id
1745 //
1746 if (!(cp->c_bsdflags & UF_TRACKED) && !(ap->a_fflag & HFS_DOCUMENT_ID_ALLOCATE)) {
1747 *document_id = 0;
1748 hfs_unlock(cp);
1749 return 0;
1750 }
1751
1752 //
1753 // if the cnode isn't UF_TRACKED and the doc-id-allocate flag IS set,
1754 // then set mark_it so we know to set the UF_TRACKED flag once the
1755 // cnode is locked.
1756 //
1757 if (!(cp->c_bsdflags & UF_TRACKED) && (ap->a_fflag & HFS_DOCUMENT_ID_ALLOCATE)) {
1758 mark_it = 1;
1759 }
1760
1761 tmp_doc_id = extinfo->document_id; // get a copy of this
1762
1763 hfs_unlock(cp); // in case we have to call hfs_generate_document_id()
1764
1765 //
1766 // If the document_id isn't set, get a new one and then set it.
1767 // Note: we first get the document id, then lock the cnode to
1768 // avoid any deadlock potential between cp and the root vnode.
1769 //
1770 uint32_t new_id;
1771 if (tmp_doc_id == 0 && (error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
1772
1773 if ((error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT)) == 0) {
1774 extinfo->document_id = tmp_doc_id = new_id;
1775 //printf("ASSIGNING: doc-id %d to ino %d\n", extinfo->document_id, cp->c_fileid);
1776
1777 if (mark_it) {
1778 cp->c_bsdflags |= UF_TRACKED;
1779 }
1780
1781 // mark the cnode dirty
1782 cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
1783
1784 int lockflags;
1785 if ((error = hfs_start_transaction(hfsmp)) == 0) {
1786 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1787
1788 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
1789
1790 hfs_systemfile_unlock (hfsmp, lockflags);
1791 (void) hfs_end_transaction(hfsmp);
1792 }
1793
1794 #if CONFIG_FSE
1795 add_fsevent(FSE_DOCID_CHANGED, context,
1796 FSE_ARG_DEV, hfsmp->hfs_raw_dev,
1797 FSE_ARG_INO, (ino64_t)0, // src inode #
1798 FSE_ARG_INO, (ino64_t)cp->c_fileid, // dst inode #
1799 FSE_ARG_INT32, extinfo->document_id,
1800 FSE_ARG_DONE);
1801
1802 hfs_unlock (cp); // so we can send the STAT_CHANGED event without deadlocking
1803
1804 if (need_fsevent(FSE_STAT_CHANGED, vp)) {
1805 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1806 }
1807 #else
1808 hfs_unlock (cp);
1809 #endif
1810 }
1811 }
1812
1813 *document_id = tmp_doc_id;
1814 } else {
1815 *document_id = 0;
1816 }
1817
1818 return error;
1819 }
1820
1821 case HFS_TRANSFER_DOCUMENT_ID:
1822 {
1823 struct cnode *cp = NULL;
1824 int error;
1825 u_int32_t to_fd = *(u_int32_t *)ap->a_data;
1826 struct fileproc *to_fp;
1827 struct vnode *to_vp;
1828 struct cnode *to_cp;
1829
1830 cp = VTOC(vp);
1831
1832 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
1833 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1834 return error;
1835 }
1836 if ( (error = vnode_getwithref(to_vp)) ) {
1837 file_drop(to_fd);
1838 return error;
1839 }
1840
1841 if (VTOHFS(to_vp) != hfsmp) {
1842 error = EXDEV;
1843 goto transfer_cleanup;
1844 }
1845
1846 int need_unlock = 1;
1847 to_cp = VTOC(to_vp);
1848 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1849 if (error != 0) {
1850 //printf("could not lock the pair of cnodes (error %d)\n", error);
1851 goto transfer_cleanup;
1852 }
1853
1854 if (!(cp->c_bsdflags & UF_TRACKED)) {
1855 error = EINVAL;
1856 } else if (to_cp->c_bsdflags & UF_TRACKED) {
1857 //
1858 // if the destination is already tracked, return an error
1859 // as otherwise it's a silent deletion of the target's
1860 // document-id
1861 //
1862 error = EEXIST;
1863 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1864 //
1865 // we can use the FndrExtendedFileInfo because the doc-id is the first
1866 // thing in both it and the ExtendedDirInfo struct which is fixed in
1867 // format and can not change layout
1868 //
1869 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
1870 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
1871
1872 if (f_extinfo->document_id == 0) {
1873 uint32_t new_id;
1874
1875 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
1876
1877 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
1878 //
1879 // re-lock the pair now that we have the document-id
1880 //
1881 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1882 f_extinfo->document_id = new_id;
1883 } else {
1884 goto transfer_cleanup;
1885 }
1886 }
1887
1888 to_extinfo->document_id = f_extinfo->document_id;
1889 f_extinfo->document_id = 0;
1890 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1891
1892 // make sure the destination is also UF_TRACKED
1893 to_cp->c_bsdflags |= UF_TRACKED;
1894 cp->c_bsdflags &= ~UF_TRACKED;
1895
1896 // mark the cnodes dirty
1897 cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
1898 to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
1899
1900 int lockflags;
1901 if ((error = hfs_start_transaction(hfsmp)) == 0) {
1902
1903 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1904
1905 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
1906 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
1907
1908 hfs_systemfile_unlock (hfsmp, lockflags);
1909 (void) hfs_end_transaction(hfsmp);
1910 }
1911
1912 #if CONFIG_FSE
1913 add_fsevent(FSE_DOCID_CHANGED, context,
1914 FSE_ARG_DEV, hfsmp->hfs_raw_dev,
1915 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
1916 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
1917 FSE_ARG_INT32, to_extinfo->document_id,
1918 FSE_ARG_DONE);
1919
1920 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1921 need_unlock = 0;
1922
1923 if (need_fsevent(FSE_STAT_CHANGED, vp)) {
1924 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1925 }
1926 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
1927 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
1928 }
1929 #else
1930 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1931 need_unlock = 0;
1932 #endif
1933 }
1934
1935 if (need_unlock) {
1936 hfs_unlockpair(cp, to_cp);
1937 }
1938
1939 transfer_cleanup:
1940 vnode_put(to_vp);
1941 file_drop(to_fd);
1942
1943 return error;
1944 }
1945
1946 case HFS_PREV_LINK:
1947 case HFS_NEXT_LINK:
1948 {
1949 cnid_t linkfileid;
1950 cnid_t nextlinkid;
1951 cnid_t prevlinkid;
1952 int error;
1953
1954 /* Caller must be owner of file system. */
1955 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1956 if (suser(cred, NULL) &&
1957 kauth_cred_getuid(cred) != vfsp->f_owner) {
1958 return (EACCES);
1959 }
1960 /* Target vnode must be file system's root. */
1961 if (!vnode_isvroot(vp)) {
1962 return (EINVAL);
1963 }
1964 linkfileid = *(cnid_t *)ap->a_data;
1965 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1966 return (EINVAL);
1967 }
1968 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1969 return (error);
1970 }
1971 if (ap->a_command == HFS_NEXT_LINK) {
1972 *(cnid_t *)ap->a_data = nextlinkid;
1973 } else {
1974 *(cnid_t *)ap->a_data = prevlinkid;
1975 }
1976 return (0);
1977 }
1978
1979 case HFS_RESIZE_PROGRESS: {
1980
1981 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1982 if (suser(cred, NULL) &&
1983 kauth_cred_getuid(cred) != vfsp->f_owner) {
1984 return (EACCES); /* must be owner of file system */
1985 }
1986 if (!vnode_isvroot(vp)) {
1987 return (EINVAL);
1988 }
1989 /* file system must not be mounted read-only */
1990 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1991 return (EROFS);
1992 }
1993
1994 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1995 }
1996
1997 case HFS_RESIZE_VOLUME: {
1998 u_int64_t newsize;
1999 u_int64_t cursize;
2000
2001 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2002 if (suser(cred, NULL) &&
2003 kauth_cred_getuid(cred) != vfsp->f_owner) {
2004 return (EACCES); /* must be owner of file system */
2005 }
2006 if (!vnode_isvroot(vp)) {
2007 return (EINVAL);
2008 }
2009
2010 /* filesystem must not be mounted read only */
2011 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2012 return (EROFS);
2013 }
2014 newsize = *(u_int64_t *)ap->a_data;
2015 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
2016
2017 if (newsize > cursize) {
2018 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
2019 } else if (newsize < cursize) {
2020 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
2021 } else {
2022 return (0);
2023 }
2024 }
2025 case HFS_CHANGE_NEXT_ALLOCATION: {
2026 int error = 0; /* Assume success */
2027 u_int32_t location;
2028
2029 if (vnode_vfsisrdonly(vp)) {
2030 return (EROFS);
2031 }
2032 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2033 if (suser(cred, NULL) &&
2034 kauth_cred_getuid(cred) != vfsp->f_owner) {
2035 return (EACCES); /* must be owner of file system */
2036 }
2037 if (!vnode_isvroot(vp)) {
2038 return (EINVAL);
2039 }
2040 hfs_lock_mount(hfsmp);
2041 location = *(u_int32_t *)ap->a_data;
2042 if ((location >= hfsmp->allocLimit) &&
2043 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
2044 error = EINVAL;
2045 goto fail_change_next_allocation;
2046 }
2047 /* Return previous value. */
2048 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
2049 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
2050 /* On magic value for location, set nextAllocation to next block
2051 * after metadata zone and set flag in mount structure to indicate
2052 * that nextAllocation should not be updated again.
2053 */
2054 if (hfsmp->hfs_metazone_end != 0) {
2055 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
2056 }
2057 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
2058 } else {
2059 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
2060 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
2061 }
2062 MarkVCBDirty(hfsmp);
2063 fail_change_next_allocation:
2064 hfs_unlock_mount(hfsmp);
2065 return (error);
2066 }
2067
2068 #if HFS_SPARSE_DEV
2069 case HFS_SETBACKINGSTOREINFO: {
2070 struct vnode * bsfs_rootvp;
2071 struct vnode * di_vp;
2072 struct hfs_backingstoreinfo *bsdata;
2073 int error = 0;
2074
2075 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2076 return (EROFS);
2077 }
2078 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
2079 return (EALREADY);
2080 }
2081 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2082 if (suser(cred, NULL) &&
2083 kauth_cred_getuid(cred) != vfsp->f_owner) {
2084 return (EACCES); /* must be owner of file system */
2085 }
2086 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
2087 if (bsdata == NULL) {
2088 return (EINVAL);
2089 }
2090 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
2091 return (error);
2092 }
2093 if ((error = vnode_getwithref(di_vp))) {
2094 file_drop(bsdata->backingfd);
2095 return(error);
2096 }
2097
2098 if (vnode_mount(vp) == vnode_mount(di_vp)) {
2099 (void)vnode_put(di_vp);
2100 file_drop(bsdata->backingfd);
2101 return (EINVAL);
2102 }
2103
2104 /*
2105 * Obtain the backing fs root vnode and keep a reference
2106 * on it. This reference will be dropped in hfs_unmount.
2107 */
2108 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
2109 if (error) {
2110 (void)vnode_put(di_vp);
2111 file_drop(bsdata->backingfd);
2112 return (error);
2113 }
2114 vnode_ref(bsfs_rootvp);
2115 vnode_put(bsfs_rootvp);
2116
2117 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
2118
2119 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
2120 /* The free extent cache is managed differently for sparse devices.
2121 * There is a window between which the volume is mounted and the
2122 * device is marked as sparse, so the free extent cache for this
2123 * volume is currently initialized as normal volume (sorted by block
2124 * count). Reset the cache so that it will be rebuilt again
2125 * for sparse device (sorted by start block).
2126 */
2127 ResetVCBFreeExtCache(hfsmp);
2128
2129 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
2130 hfsmp->hfs_sparsebandblks *= 4;
2131
2132 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
2133
2134 /*
2135 * If the sparse image is on a sparse image file (as opposed to a sparse
2136 * bundle), then we may need to limit the free space to the maximum size
2137 * of a file on that volume. So we query (using pathconf), and if we get
2138 * a meaningful result, we cache the number of blocks for later use in
2139 * hfs_freeblks().
2140 */
2141 hfsmp->hfs_backingfs_maxblocks = 0;
2142 if (vnode_vtype(di_vp) == VREG) {
2143 int terr;
2144 int hostbits;
2145 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
2146 if (terr == 0 && hostbits != 0 && hostbits < 64) {
2147 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
2148
2149 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
2150 }
2151 }
2152
2153 (void)vnode_put(di_vp);
2154 file_drop(bsdata->backingfd);
2155 return (0);
2156 }
2157 case HFS_CLRBACKINGSTOREINFO: {
2158 struct vnode * tmpvp;
2159
2160 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2161 if (suser(cred, NULL) &&
2162 kauth_cred_getuid(cred) != vfsp->f_owner) {
2163 return (EACCES); /* must be owner of file system */
2164 }
2165 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2166 return (EROFS);
2167 }
2168
2169 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
2170 hfsmp->hfs_backingfs_rootvp) {
2171
2172 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2173 tmpvp = hfsmp->hfs_backingfs_rootvp;
2174 hfsmp->hfs_backingfs_rootvp = NULLVP;
2175 hfsmp->hfs_sparsebandblks = 0;
2176 vnode_rele(tmpvp);
2177 }
2178 return (0);
2179 }
2180 #endif /* HFS_SPARSE_DEV */
2181
2182 /* Change the next CNID stored in the VH */
2183 case HFS_CHANGE_NEXTCNID: {
2184 int error = 0; /* Assume success */
2185 u_int32_t fileid;
2186 int wraparound = 0;
2187 int lockflags = 0;
2188
2189 if (vnode_vfsisrdonly(vp)) {
2190 return (EROFS);
2191 }
2192 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2193 if (suser(cred, NULL) &&
2194 kauth_cred_getuid(cred) != vfsp->f_owner) {
2195 return (EACCES); /* must be owner of file system */
2196 }
2197
2198 fileid = *(u_int32_t *)ap->a_data;
2199
2200 /* Must have catalog lock excl. to advance the CNID pointer */
2201 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
2202
2203 hfs_lock_mount(hfsmp);
2204
2205 /* If it is less than the current next CNID, force the wraparound bit to be set */
2206 if (fileid < hfsmp->vcbNxtCNID) {
2207 wraparound=1;
2208 }
2209
2210 /* Return previous value. */
2211 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
2212
2213 hfsmp->vcbNxtCNID = fileid;
2214
2215 if (wraparound) {
2216 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
2217 }
2218
2219 MarkVCBDirty(hfsmp);
2220 hfs_unlock_mount(hfsmp);
2221 hfs_systemfile_unlock (hfsmp, lockflags);
2222
2223 return (error);
2224 }
2225
2226 case F_FREEZE_FS: {
2227 struct mount *mp;
2228
2229 mp = vnode_mount(vp);
2230 hfsmp = VFSTOHFS(mp);
2231
2232 if (!(hfsmp->jnl))
2233 return (ENOTSUP);
2234
2235 vfsp = vfs_statfs(mp);
2236
2237 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2238 !kauth_cred_issuser(cred))
2239 return (EACCES);
2240
2241 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
2242
2243 // flush things before we get started to try and prevent
2244 // dirty data from being paged out while we're frozen.
2245 // note: can't do this after taking the lock as it will
2246 // deadlock against ourselves.
2247 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
2248 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
2249
2250 // DO NOT call hfs_journal_flush() because that takes a
2251 // shared lock on the global exclusive lock!
2252 journal_flush(hfsmp->jnl, TRUE);
2253
2254 // don't need to iterate on all vnodes, we just need to
2255 // wait for writes to the system files and the device vnode
2256 //
2257 // Now that journal flush waits for all metadata blocks to
2258 // be written out, waiting for btree writes is probably no
2259 // longer required.
2260 if (HFSTOVCB(hfsmp)->extentsRefNum)
2261 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
2262 if (HFSTOVCB(hfsmp)->catalogRefNum)
2263 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
2264 if (HFSTOVCB(hfsmp)->allocationsRefNum)
2265 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
2266 if (hfsmp->hfs_attribute_vp)
2267 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
2268 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
2269
2270 hfsmp->hfs_freezing_proc = current_proc();
2271
2272 return (0);
2273 }
2274
2275 case F_THAW_FS: {
2276 vfsp = vfs_statfs(vnode_mount(vp));
2277 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2278 !kauth_cred_issuser(cred))
2279 return (EACCES);
2280
2281 // if we're not the one who froze the fs then we
2282 // can't thaw it.
2283 if (hfsmp->hfs_freezing_proc != current_proc()) {
2284 return EPERM;
2285 }
2286
2287 // NOTE: if you add code here, also go check the
2288 // code that "thaws" the fs in hfs_vnop_close()
2289 //
2290 hfsmp->hfs_freezing_proc = NULL;
2291 hfs_unlock_global (hfsmp);
2292 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
2293
2294 return (0);
2295 }
2296
2297 case HFS_BULKACCESS_FSCTL: {
2298 int size;
2299
2300 if (hfsmp->hfs_flags & HFS_STANDARD) {
2301 return EINVAL;
2302 }
2303
2304 if (is64bit) {
2305 size = sizeof(struct user64_access_t);
2306 } else {
2307 size = sizeof(struct user32_access_t);
2308 }
2309
2310 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2311 }
2312
2313 case HFS_EXT_BULKACCESS_FSCTL: {
2314 int size;
2315
2316 if (hfsmp->hfs_flags & HFS_STANDARD) {
2317 return EINVAL;
2318 }
2319
2320 if (is64bit) {
2321 size = sizeof(struct user64_ext_access_t);
2322 } else {
2323 size = sizeof(struct user32_ext_access_t);
2324 }
2325
2326 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2327 }
2328
2329 case HFS_SET_XATTREXTENTS_STATE: {
2330 int state;
2331
2332 if (ap->a_data == NULL) {
2333 return (EINVAL);
2334 }
2335
2336 state = *(int *)ap->a_data;
2337
2338 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2339 return (EROFS);
2340 }
2341
2342 /* Super-user can enable or disable extent-based extended
2343 * attribute support on a volume
2344 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2345 * are enabled by default, so any change will be transient only
2346 * till the volume is remounted.
2347 */
2348 if (!kauth_cred_issuser(kauth_cred_get())) {
2349 return (EPERM);
2350 }
2351 if (state == 0 || state == 1)
2352 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
2353 else
2354 return (EINVAL);
2355 }
2356
2357 case F_SETSTATICCONTENT: {
2358 int error;
2359 int enable_static = 0;
2360 struct cnode *cp = NULL;
2361 /*
2362 * lock the cnode, decorate the cnode flag, and bail out.
2363 * VFS should have already authenticated the caller for us.
2364 */
2365
2366 if (ap->a_data) {
2367 /*
2368 * Note that even though ap->a_data is of type caddr_t,
2369 * the fcntl layer at the syscall handler will pass in NULL
2370 * or 1 depending on what the argument supplied to the fcntl
2371 * was. So it is in fact correct to check the ap->a_data
2372 * argument for zero or non-zero value when deciding whether or not
2373 * to enable the static bit in the cnode.
2374 */
2375 enable_static = 1;
2376 }
2377 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2378 return EROFS;
2379 }
2380 cp = VTOC(vp);
2381
2382 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2383 if (error == 0) {
2384 if (enable_static) {
2385 cp->c_flag |= C_SSD_STATIC;
2386 }
2387 else {
2388 cp->c_flag &= ~C_SSD_STATIC;
2389 }
2390 hfs_unlock (cp);
2391 }
2392 return error;
2393 }
2394
2395 case F_SET_GREEDY_MODE: {
2396 int error;
2397 int enable_greedy_mode = 0;
2398 struct cnode *cp = NULL;
2399 /*
2400 * lock the cnode, decorate the cnode flag, and bail out.
2401 * VFS should have already authenticated the caller for us.
2402 */
2403
2404 if (ap->a_data) {
2405 /*
2406 * Note that even though ap->a_data is of type caddr_t,
2407 * the fcntl layer at the syscall handler will pass in NULL
2408 * or 1 depending on what the argument supplied to the fcntl
2409 * was. So it is in fact correct to check the ap->a_data
2410 * argument for zero or non-zero value when deciding whether or not
2411 * to enable the greedy mode bit in the cnode.
2412 */
2413 enable_greedy_mode = 1;
2414 }
2415 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2416 return EROFS;
2417 }
2418 cp = VTOC(vp);
2419
2420 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2421 if (error == 0) {
2422 if (enable_greedy_mode) {
2423 cp->c_flag |= C_SSD_GREEDY_MODE;
2424 }
2425 else {
2426 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2427 }
2428 hfs_unlock (cp);
2429 }
2430 return error;
2431 }
2432
2433 case F_MAKECOMPRESSED: {
2434 int error = 0;
2435 uint32_t gen_counter;
2436 struct cnode *cp = NULL;
2437 int reset_decmp = 0;
2438
2439 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2440 return EROFS;
2441 }
2442
2443 /*
2444 * acquire & lock the cnode.
2445 * VFS should have already authenticated the caller for us.
2446 */
2447
2448 if (ap->a_data) {
2449 /*
2450 * Cast the pointer into a uint32_t so we can extract the
2451 * supplied generation counter.
2452 */
2453 gen_counter = *((uint32_t*)ap->a_data);
2454 }
2455 else {
2456 return EINVAL;
2457 }
2458
2459 #if HFS_COMPRESSION
2460 cp = VTOC(vp);
2461 /* Grab truncate lock first; we may truncate the file */
2462 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2463
2464 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2465 if (error) {
2466 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2467 return error;
2468 }
2469
2470 /* Are there any other usecounts/FDs? */
2471 if (vnode_isinuse(vp, 1)) {
2472 hfs_unlock(cp);
2473 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2474 return EBUSY;
2475 }
2476
2477
2478 /* now we have the cnode locked down; Validate arguments */
2479 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2480 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2481 hfs_unlock(cp);
2482 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2483 return EINVAL;
2484 }
2485
2486 if ((hfs_get_gencount (cp)) == gen_counter) {
2487 /*
2488 * OK, the gen_counter matched. Go for it:
2489 * Toggle state bits, truncate file, and suppress mtime update
2490 */
2491 reset_decmp = 1;
2492 cp->c_bsdflags |= UF_COMPRESSED;
2493
2494 error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context);
2495 }
2496 else {
2497 error = ESTALE;
2498 }
2499
2500 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2501 hfs_unlock(cp);
2502
2503 /*
2504 * Reset the decmp state while still holding the truncate lock. We need to
2505 * serialize here against a listxattr on this node which may occur at any
2506 * time.
2507 *
2508 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2509 * that will still potentially require getting the com.apple.decmpfs EA. If the
2510 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2511 * generic(through VFS), and can't pass along any info telling it that we're already
2512 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2513 * and trying to fill in the hfs_file_is_compressed info during the callback
2514 * operation, which will result in deadlock against the b-tree node.
2515 *
2516 * So, to serialize against listxattr (which will grab buf_t meta references on
2517 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2518 * decmpfs payload.
2519 */
2520 if ((reset_decmp) && (error == 0)) {
2521 decmpfs_cnode *dp = VTOCMP (vp);
2522 if (dp != NULL) {
2523 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2524 }
2525
2526 /* Initialize the decmpfs node as needed */
2527 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2528 }
2529
2530 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2531
2532 #endif
2533 return error;
2534 }
2535
2536 case F_SETBACKINGSTORE: {
2537
2538 int error = 0;
2539
2540 /*
2541 * See comment in F_SETSTATICCONTENT re: using
2542 * a null check for a_data
2543 */
2544 if (ap->a_data) {
2545 error = hfs_set_backingstore (vp, 1);
2546 }
2547 else {
2548 error = hfs_set_backingstore (vp, 0);
2549 }
2550
2551 return error;
2552 }
2553
2554 case F_GETPATH_MTMINFO: {
2555 int error = 0;
2556
2557 int *data = (int*) ap->a_data;
2558
2559 /* Ask if this is a backingstore vnode */
2560 error = hfs_is_backingstore (vp, data);
2561
2562 return error;
2563 }
2564
2565 case F_FULLFSYNC: {
2566 int error;
2567
2568 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2569 return (EROFS);
2570 }
2571 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2572 if (error == 0) {
2573 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
2574 hfs_unlock(VTOC(vp));
2575 }
2576
2577 return error;
2578 }
2579
2580 case F_CHKCLEAN: {
2581 register struct cnode *cp;
2582 int error;
2583
2584 if (!vnode_isreg(vp))
2585 return EINVAL;
2586
2587 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2588 if (error == 0) {
2589 cp = VTOC(vp);
2590 /*
2591 * used by regression test to determine if
2592 * all the dirty pages (via write) have been cleaned
2593 * after a call to 'fsysnc'.
2594 */
2595 error = is_file_clean(vp, VTOF(vp)->ff_size);
2596 hfs_unlock(cp);
2597 }
2598 return (error);
2599 }
2600
2601 case F_RDADVISE: {
2602 register struct radvisory *ra;
2603 struct filefork *fp;
2604 int error;
2605
2606 if (!vnode_isreg(vp))
2607 return EINVAL;
2608
2609 ra = (struct radvisory *)(ap->a_data);
2610 fp = VTOF(vp);
2611
2612 /* Protect against a size change. */
2613 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2614
2615 #if HFS_COMPRESSION
2616 if (compressed && (uncompressed_size == -1)) {
2617 /* fetching the uncompressed size failed above, so return the error */
2618 error = decmpfs_error;
2619 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
2620 (!compressed && (ra->ra_offset >= fp->ff_size))) {
2621 error = EFBIG;
2622 }
2623 #else /* HFS_COMPRESSION */
2624 if (ra->ra_offset >= fp->ff_size) {
2625 error = EFBIG;
2626 }
2627 #endif /* HFS_COMPRESSION */
2628 else {
2629 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
2630 }
2631
2632 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
2633 return (error);
2634 }
2635
2636 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
2637 {
2638 if (is64bit) {
2639 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2640 }
2641 else {
2642 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2643 }
2644 return 0;
2645 }
2646
2647 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2648 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2649 break;
2650
2651 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2652 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2653 break;
2654
2655 case HFS_FSCTL_GET_VERY_LOW_DISK:
2656 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2657 break;
2658
2659 case HFS_FSCTL_SET_VERY_LOW_DISK:
2660 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2661 return EINVAL;
2662 }
2663
2664 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2665 break;
2666
2667 case HFS_FSCTL_GET_LOW_DISK:
2668 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2669 break;
2670
2671 case HFS_FSCTL_SET_LOW_DISK:
2672 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2673 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2674
2675 return EINVAL;
2676 }
2677
2678 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2679 break;
2680
2681 case HFS_FSCTL_GET_DESIRED_DISK:
2682 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2683 break;
2684
2685 case HFS_FSCTL_SET_DESIRED_DISK:
2686 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2687 return EINVAL;
2688 }
2689
2690 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2691 break;
2692
2693 case HFS_VOLUME_STATUS:
2694 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2695 break;
2696
2697 case HFS_SET_BOOT_INFO:
2698 if (!vnode_isvroot(vp))
2699 return(EINVAL);
2700 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2701 return(EACCES); /* must be superuser or owner of filesystem */
2702 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2703 return (EROFS);
2704 }
2705 hfs_lock_mount (hfsmp);
2706 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2707 hfs_unlock_mount (hfsmp);
2708 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2709 break;
2710
2711 case HFS_GET_BOOT_INFO:
2712 if (!vnode_isvroot(vp))
2713 return(EINVAL);
2714 hfs_lock_mount (hfsmp);
2715 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2716 hfs_unlock_mount(hfsmp);
2717 break;
2718
2719 case HFS_MARK_BOOT_CORRUPT:
2720 /* Mark the boot volume corrupt by setting
2721 * kHFSVolumeInconsistentBit in the volume header. This will
2722 * force fsck_hfs on next mount.
2723 */
2724 if (!kauth_cred_issuser(kauth_cred_get())) {
2725 return EACCES;
2726 }
2727
2728 /* Allowed only on the root vnode of the boot volume */
2729 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2730 !vnode_isvroot(vp)) {
2731 return EINVAL;
2732 }
2733 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2734 return (EROFS);
2735 }
2736 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2737 hfs_mark_volume_inconsistent(hfsmp);
2738 break;
2739
2740 case HFS_FSCTL_GET_JOURNAL_INFO:
2741 jip = (struct hfs_journal_info*)ap->a_data;
2742
2743 if (vp == NULLVP)
2744 return EINVAL;
2745
2746 if (hfsmp->jnl == NULL) {
2747 jnl_start = 0;
2748 jnl_size = 0;
2749 } else {
2750 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2751 jnl_size = (off_t)hfsmp->jnl_size;
2752 }
2753
2754 jip->jstart = jnl_start;
2755 jip->jsize = jnl_size;
2756 break;
2757
2758 case HFS_SET_ALWAYS_ZEROFILL: {
2759 struct cnode *cp = VTOC(vp);
2760
2761 if (*(int *)ap->a_data) {
2762 cp->c_flag |= C_ALWAYS_ZEROFILL;
2763 } else {
2764 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2765 }
2766 break;
2767 }
2768
2769 case HFS_DISABLE_METAZONE: {
2770 /* Only root can disable metadata zone */
2771 if (!kauth_cred_issuser(kauth_cred_get())) {
2772 return EACCES;
2773 }
2774 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2775 return (EROFS);
2776 }
2777
2778 /* Disable metadata zone now */
2779 (void) hfs_metadatazone_init(hfsmp, true);
2780 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2781 break;
2782 }
2783
2784 default:
2785 return (ENOTTY);
2786 }
2787
2788 return 0;
2789 }
2790
2791 /*
2792 * select
2793 */
2794 int
2795 hfs_vnop_select(__unused struct vnop_select_args *ap)
2796 /*
2797 struct vnop_select_args {
2798 vnode_t a_vp;
2799 int a_which;
2800 int a_fflags;
2801 void *a_wql;
2802 vfs_context_t a_context;
2803 };
2804 */
2805 {
2806 /*
2807 * We should really check to see if I/O is possible.
2808 */
2809 return (1);
2810 }
2811
2812 /*
2813 * Converts a logical block number to a physical block, and optionally returns
2814 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2815 * The physical block number is based on the device block size, currently its 512.
2816 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2817 */
2818 int
2819 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
2820 {
2821 struct filefork *fp = VTOF(vp);
2822 struct hfsmount *hfsmp = VTOHFS(vp);
2823 int retval = E_NONE;
2824 u_int32_t logBlockSize;
2825 size_t bytesContAvail = 0;
2826 off_t blockposition;
2827 int lockExtBtree;
2828 int lockflags = 0;
2829
2830 /*
2831 * Check for underlying vnode requests and ensure that logical
2832 * to physical mapping is requested.
2833 */
2834 if (vpp != NULL)
2835 *vpp = hfsmp->hfs_devvp;
2836 if (bnp == NULL)
2837 return (0);
2838
2839 logBlockSize = GetLogicalBlockSize(vp);
2840 blockposition = (off_t)bn * logBlockSize;
2841
2842 lockExtBtree = overflow_extents(fp);
2843
2844 if (lockExtBtree)
2845 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2846
2847 retval = MacToVFSError(
2848 MapFileBlockC (HFSTOVCB(hfsmp),
2849 (FCB*)fp,
2850 MAXPHYSIO,
2851 blockposition,
2852 bnp,
2853 &bytesContAvail));
2854
2855 if (lockExtBtree)
2856 hfs_systemfile_unlock(hfsmp, lockflags);
2857
2858 if (retval == E_NONE) {
2859 /* Figure out how many read ahead blocks there are */
2860 if (runp != NULL) {
2861 if (can_cluster(logBlockSize)) {
2862 /* Make sure this result never goes negative: */
2863 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2864 } else {
2865 *runp = 0;
2866 }
2867 }
2868 }
2869 return (retval);
2870 }
2871
2872 /*
2873 * Convert logical block number to file offset.
2874 */
2875 int
2876 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2877 /*
2878 struct vnop_blktooff_args {
2879 vnode_t a_vp;
2880 daddr64_t a_lblkno;
2881 off_t *a_offset;
2882 };
2883 */
2884 {
2885 if (ap->a_vp == NULL)
2886 return (EINVAL);
2887 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
2888
2889 return(0);
2890 }
2891
2892 /*
2893 * Convert file offset to logical block number.
2894 */
2895 int
2896 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2897 /*
2898 struct vnop_offtoblk_args {
2899 vnode_t a_vp;
2900 off_t a_offset;
2901 daddr64_t *a_lblkno;
2902 };
2903 */
2904 {
2905 if (ap->a_vp == NULL)
2906 return (EINVAL);
2907 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
2908
2909 return(0);
2910 }
2911
2912 /*
2913 * Map file offset to physical block number.
2914 *
2915 * If this function is called for write operation, and if the file
2916 * had virtual blocks allocated (delayed allocation), real blocks
2917 * are allocated by calling ExtendFileC().
2918 *
2919 * If this function is called for read operation, and if the file
2920 * had virtual blocks allocated (delayed allocation), no change
2921 * to the size of file is done, and if required, rangelist is
2922 * searched for mapping.
2923 *
2924 * System file cnodes are expected to be locked (shared or exclusive).
2925 */
2926 int
2927 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2928 /*
2929 struct vnop_blockmap_args {
2930 vnode_t a_vp;
2931 off_t a_foffset;
2932 size_t a_size;
2933 daddr64_t *a_bpn;
2934 size_t *a_run;
2935 void *a_poff;
2936 int a_flags;
2937 vfs_context_t a_context;
2938 };
2939 */
2940 {
2941 struct vnode *vp = ap->a_vp;
2942 struct cnode *cp;
2943 struct filefork *fp;
2944 struct hfsmount *hfsmp;
2945 size_t bytesContAvail = 0;
2946 int retval = E_NONE;
2947 int syslocks = 0;
2948 int lockflags = 0;
2949 struct rl_entry *invalid_range;
2950 enum rl_overlaptype overlaptype;
2951 int started_tr = 0;
2952 int tooklock = 0;
2953
2954 #if HFS_COMPRESSION
2955 if (VNODE_IS_RSRC(vp)) {
2956 /* allow blockmaps to the resource fork */
2957 } else {
2958 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2959 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2960 switch(state) {
2961 case FILE_IS_COMPRESSED:
2962 return ENOTSUP;
2963 case FILE_IS_CONVERTING:
2964 /* if FILE_IS_CONVERTING, we allow blockmap */
2965 break;
2966 default:
2967 printf("invalid state %d for compressed file\n", state);
2968 /* fall through */
2969 }
2970 }
2971 }
2972 #endif /* HFS_COMPRESSION */
2973
2974 /* Do not allow blockmap operation on a directory */
2975 if (vnode_isdir(vp)) {
2976 return (ENOTSUP);
2977 }
2978
2979 /*
2980 * Check for underlying vnode requests and ensure that logical
2981 * to physical mapping is requested.
2982 */
2983 if (ap->a_bpn == NULL)
2984 return (0);
2985
2986 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
2987 if (VTOC(vp)->c_lockowner != current_thread()) {
2988 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
2989 tooklock = 1;
2990 }
2991 }
2992 hfsmp = VTOHFS(vp);
2993 cp = VTOC(vp);
2994 fp = VTOF(vp);
2995
2996 retry:
2997 /* Check virtual blocks only when performing write operation */
2998 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2999 if (hfs_start_transaction(hfsmp) != 0) {
3000 retval = EINVAL;
3001 goto exit;
3002 } else {
3003 started_tr = 1;
3004 }
3005 syslocks = SFL_EXTENTS | SFL_BITMAP;
3006
3007 } else if (overflow_extents(fp)) {
3008 syslocks = SFL_EXTENTS;
3009 }
3010
3011 if (syslocks)
3012 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
3013
3014 /*
3015 * Check for any delayed allocations.
3016 */
3017 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3018 int64_t actbytes;
3019 u_int32_t loanedBlocks;
3020
3021 //
3022 // Make sure we have a transaction. It's possible
3023 // that we came in and fp->ff_unallocblocks was zero
3024 // but during the time we blocked acquiring the extents
3025 // btree, ff_unallocblocks became non-zero and so we
3026 // will need to start a transaction.
3027 //
3028 if (started_tr == 0) {
3029 if (syslocks) {
3030 hfs_systemfile_unlock(hfsmp, lockflags);
3031 syslocks = 0;
3032 }
3033 goto retry;
3034 }
3035
3036 /*
3037 * Note: ExtendFileC will Release any blocks on loan and
3038 * aquire real blocks. So we ask to extend by zero bytes
3039 * since ExtendFileC will account for the virtual blocks.
3040 */
3041
3042 loanedBlocks = fp->ff_unallocblocks;
3043 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
3044 kEFAllMask | kEFNoClumpMask, &actbytes);
3045
3046 if (retval) {
3047 fp->ff_unallocblocks = loanedBlocks;
3048 cp->c_blocks += loanedBlocks;
3049 fp->ff_blocks += loanedBlocks;
3050
3051 hfs_lock_mount (hfsmp);
3052 hfsmp->loanedBlocks += loanedBlocks;
3053 hfs_unlock_mount (hfsmp);
3054
3055 hfs_systemfile_unlock(hfsmp, lockflags);
3056 cp->c_flag |= C_MODIFIED;
3057 if (started_tr) {
3058 (void) hfs_update(vp, TRUE);
3059 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3060
3061 hfs_end_transaction(hfsmp);
3062 started_tr = 0;
3063 }
3064 goto exit;
3065 }
3066 }
3067
3068 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
3069 ap->a_bpn, &bytesContAvail);
3070 if (syslocks) {
3071 hfs_systemfile_unlock(hfsmp, lockflags);
3072 syslocks = 0;
3073 }
3074
3075 if (started_tr) {
3076 (void) hfs_update(vp, TRUE);
3077 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3078 hfs_end_transaction(hfsmp);
3079 started_tr = 0;
3080 }
3081 if (retval) {
3082 /* On write, always return error because virtual blocks, if any,
3083 * should have been allocated in ExtendFileC(). We do not
3084 * allocate virtual blocks on read, therefore return error
3085 * only if no virtual blocks are allocated. Otherwise we search
3086 * rangelist for zero-fills
3087 */
3088 if ((MacToVFSError(retval) != ERANGE) ||
3089 (ap->a_flags & VNODE_WRITE) ||
3090 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
3091 goto exit;
3092 }
3093
3094 /* Validate if the start offset is within logical file size */
3095 if (ap->a_foffset >= fp->ff_size) {
3096 goto exit;
3097 }
3098
3099 /*
3100 * At this point, we have encountered a failure during
3101 * MapFileBlockC that resulted in ERANGE, and we are not servicing
3102 * a write, and there are borrowed blocks.
3103 *
3104 * However, the cluster layer will not call blockmap for
3105 * blocks that are borrowed and in-cache. We have to assume that
3106 * because we observed ERANGE being emitted from MapFileBlockC, this
3107 * extent range is not valid on-disk. So we treat this as a
3108 * mapping that needs to be zero-filled prior to reading.
3109 *
3110 * Note that under certain circumstances (such as non-contiguous
3111 * userland VM mappings in the calling process), cluster_io
3112 * may be forced to split a large I/O driven by hfs_vnop_write
3113 * into multiple sub-I/Os that necessitate a RMW cycle. If this is
3114 * the case here, then we have already removed the invalid range list
3115 * mapping prior to getting to this blockmap call, so we should not
3116 * search the invalid rangelist for this byte range.
3117 */
3118
3119 bytesContAvail = fp->ff_size - ap->a_foffset;
3120 /*
3121 * Clip the contiguous available bytes to, at most, the allowable
3122 * maximum or the amount requested.
3123 */
3124
3125 if (bytesContAvail > ap->a_size) {
3126 bytesContAvail = ap->a_size;
3127 }
3128
3129 *ap->a_bpn = (daddr64_t) -1;
3130 retval = 0;
3131
3132 goto exit;
3133 }
3134
3135 /* MapFileC() found a valid extent in the filefork. Search the
3136 * mapping information further for invalid file ranges
3137 */
3138 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
3139 ap->a_foffset + (off_t)bytesContAvail - 1,
3140 &invalid_range);
3141 if (overlaptype != RL_NOOVERLAP) {
3142 switch(overlaptype) {
3143 case RL_MATCHINGOVERLAP:
3144 case RL_OVERLAPCONTAINSRANGE:
3145 case RL_OVERLAPSTARTSBEFORE:
3146 /* There's no valid block for this byte offset */
3147 *ap->a_bpn = (daddr64_t)-1;
3148 /* There's no point limiting the amount to be returned
3149 * if the invalid range that was hit extends all the way
3150 * to the EOF (i.e. there's no valid bytes between the
3151 * end of this range and the file's EOF):
3152 */
3153 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3154 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3155 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3156 }
3157 break;
3158
3159 case RL_OVERLAPISCONTAINED:
3160 case RL_OVERLAPENDSAFTER:
3161 /* The range of interest hits an invalid block before the end: */
3162 if (invalid_range->rl_start == ap->a_foffset) {
3163 /* There's actually no valid information to be had starting here: */
3164 *ap->a_bpn = (daddr64_t)-1;
3165 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
3166 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
3167 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3168 }
3169 } else {
3170 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
3171 }
3172 break;
3173
3174 case RL_NOOVERLAP:
3175 break;
3176 } /* end switch */
3177 if (bytesContAvail > ap->a_size)
3178 bytesContAvail = ap->a_size;
3179 }
3180
3181 exit:
3182 if (retval == 0) {
3183 if (ap->a_run)
3184 *ap->a_run = bytesContAvail;
3185
3186 if (ap->a_poff)
3187 *(int *)ap->a_poff = 0;
3188 }
3189
3190 if (tooklock)
3191 hfs_unlock(cp);
3192
3193 return (MacToVFSError(retval));
3194 }
3195
3196 /*
3197 * prepare and issue the I/O
3198 * buf_strategy knows how to deal
3199 * with requests that require
3200 * fragmented I/Os
3201 */
3202 int
3203 hfs_vnop_strategy(struct vnop_strategy_args *ap)
3204 {
3205 buf_t bp = ap->a_bp;
3206 vnode_t vp = buf_vnode(bp);
3207 int error = 0;
3208
3209 /* Mark buffer as containing static data if cnode flag set */
3210 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
3211 buf_markstatic(bp);
3212 }
3213
3214 /* Mark buffer as containing static data if cnode flag set */
3215 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
3216 bufattr_markgreedymode((bufattr_t)(&bp->b_attr));
3217 }
3218
3219 #if CONFIG_PROTECT
3220 cnode_t *cp = NULL;
3221
3222 if ((cp = cp_get_protected_cnode(vp)) != NULL) {
3223 /*
3224 * We rely upon the truncate lock to protect the
3225 * CP cache key from getting tossed prior to our IO finishing here.
3226 * Nearly all cluster io calls to manipulate file payload from HFS
3227 * take the truncate lock before calling into the cluster
3228 * layer to ensure the file size does not change, or that they
3229 * have exclusive right to change the EOF of the file.
3230 * That same guarantee protects us here since the code that
3231 * deals with CP lock events must now take the truncate lock
3232 * before doing anything.
3233 *
3234 * There is 1 exception here:
3235 * 1) One exception should be the VM swapfile IO, because HFS will
3236 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
3237 * swapfile code only without holding the truncate lock. This is because
3238 * individual swapfiles are maintained at fixed-length sizes by the VM code.
3239 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
3240 * create our own UPL and thus take the truncate lock before calling
3241 * into the cluster layer. In that case, however, we are not concerned
3242 * with the CP blob being wiped out in the middle of the IO
3243 * because there isn't anything to toss; the VM swapfile key stays
3244 * in-core as long as the file is open.
3245 *
3246 * NB:
3247 * For filesystem resize, we may not have access to the underlying
3248 * file's cache key for whatever reason (device may be locked). However,
3249 * we do not need it since we are going to use the temporary HFS-wide resize key
3250 * which is generated once we start relocating file content. If this file's I/O
3251 * should be done using the resize key, it will have been supplied already, so
3252 * do not attach the file's cp blob to the buffer.
3253 */
3254 if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
3255 buf_setcpaddr(bp, cp->c_cpentry);
3256 }
3257 }
3258 #endif /* CONFIG_PROTECT */
3259
3260 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
3261
3262 return error;
3263 }
3264
3265 static int
3266 hfs_minorupdate(struct vnode *vp) {
3267 struct cnode *cp = VTOC(vp);
3268 cp->c_flag &= ~C_MODIFIED;
3269 cp->c_touch_acctime = 0;
3270 cp->c_touch_chgtime = 0;
3271 cp->c_touch_modtime = 0;
3272
3273 return 0;
3274 }
3275
3276 int
3277 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
3278 {
3279 register struct cnode *cp = VTOC(vp);
3280 struct filefork *fp = VTOF(vp);
3281 struct proc *p = vfs_context_proc(context);;
3282 kauth_cred_t cred = vfs_context_ucred(context);
3283 int retval;
3284 off_t bytesToAdd;
3285 off_t actualBytesAdded;
3286 off_t filebytes;
3287 u_int32_t fileblocks;
3288 int blksize;
3289 struct hfsmount *hfsmp;
3290 int lockflags;
3291 int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE);
3292 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
3293
3294 blksize = VTOVCB(vp)->blockSize;
3295 fileblocks = fp->ff_blocks;
3296 filebytes = (off_t)fileblocks * (off_t)blksize;
3297
3298 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
3299 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3300
3301 if (length < 0)
3302 return (EINVAL);
3303
3304 /* This should only happen with a corrupt filesystem */
3305 if ((off_t)fp->ff_size < 0)
3306 return (EINVAL);
3307
3308 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3309 return (EFBIG);
3310
3311 hfsmp = VTOHFS(vp);
3312
3313 retval = E_NONE;
3314
3315 /* Files that are changing size are not hot file candidates. */
3316 if (hfsmp->hfc_stage == HFC_RECORDING) {
3317 fp->ff_bytesread = 0;
3318 }
3319
3320 /*
3321 * We cannot just check if fp->ff_size == length (as an optimization)
3322 * since there may be extra physical blocks that also need truncation.
3323 */
3324 #if QUOTA
3325 if ((retval = hfs_getinoquota(cp)))
3326 return(retval);
3327 #endif /* QUOTA */
3328
3329 /*
3330 * Lengthen the size of the file. We must ensure that the
3331 * last byte of the file is allocated. Since the smallest
3332 * value of ff_size is 0, length will be at least 1.
3333 */
3334 if (length > (off_t)fp->ff_size) {
3335 #if QUOTA
3336 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
3337 cred, 0);
3338 if (retval)
3339 goto Err_Exit;
3340 #endif /* QUOTA */
3341 /*
3342 * If we don't have enough physical space then
3343 * we need to extend the physical size.
3344 */
3345 if (length > filebytes) {
3346 int eflags;
3347 u_int32_t blockHint = 0;
3348
3349 /* All or nothing and don't round up to clumpsize. */
3350 eflags = kEFAllMask | kEFNoClumpMask;
3351
3352 if (cred && suser(cred, NULL) != 0)
3353 eflags |= kEFReserveMask; /* keep a reserve */
3354
3355 /*
3356 * Allocate Journal and Quota files in metadata zone.
3357 */
3358 if (filebytes == 0 &&
3359 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3360 hfs_virtualmetafile(cp)) {
3361 eflags |= kEFMetadataMask;
3362 blockHint = hfsmp->hfs_metazone_start;
3363 }
3364 if (hfs_start_transaction(hfsmp) != 0) {
3365 retval = EINVAL;
3366 goto Err_Exit;
3367 }
3368
3369 /* Protect extents b-tree and allocation bitmap */
3370 lockflags = SFL_BITMAP;
3371 if (overflow_extents(fp))
3372 lockflags |= SFL_EXTENTS;
3373 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3374
3375 while ((length > filebytes) && (retval == E_NONE)) {
3376 bytesToAdd = length - filebytes;
3377 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3378 (FCB*)fp,
3379 bytesToAdd,
3380 blockHint,
3381 eflags,
3382 &actualBytesAdded));
3383
3384 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3385 if (actualBytesAdded == 0 && retval == E_NONE) {
3386 if (length > filebytes)
3387 length = filebytes;
3388 break;
3389 }
3390 } /* endwhile */
3391
3392 hfs_systemfile_unlock(hfsmp, lockflags);
3393
3394 if (hfsmp->jnl) {
3395 if (skipupdate) {
3396 (void) hfs_minorupdate(vp);
3397 }
3398 else {
3399 (void) hfs_update(vp, TRUE);
3400 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3401 }
3402 }
3403
3404 hfs_end_transaction(hfsmp);
3405
3406 if (retval)
3407 goto Err_Exit;
3408
3409 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
3410 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3411 }
3412
3413 if (!(flags & IO_NOZEROFILL)) {
3414 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
3415 struct rl_entry *invalid_range;
3416 off_t zero_limit;
3417
3418 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
3419 if (length < zero_limit) zero_limit = length;
3420
3421 if (length > (off_t)fp->ff_size) {
3422 struct timeval tv;
3423
3424 /* Extending the file: time to fill out the current last page w. zeroes? */
3425 if ((fp->ff_size & PAGE_MASK_64) &&
3426 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
3427 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
3428
3429 /* There's some valid data at the start of the (current) last page
3430 of the file, so zero out the remainder of that page to ensure the
3431 entire page contains valid data. Since there is no invalid range
3432 possible past the (current) eof, there's no need to remove anything
3433 from the invalid range list before calling cluster_write(): */
3434 hfs_unlock(cp);
3435 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
3436 fp->ff_size, (off_t)0,
3437 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
3438 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
3439 if (retval) goto Err_Exit;
3440
3441 /* Merely invalidate the remaining area, if necessary: */
3442 if (length > zero_limit) {
3443 microuptime(&tv);
3444 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
3445 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3446 }
3447 } else {
3448 /* The page containing the (current) eof is invalid: just add the
3449 remainder of the page to the invalid list, along with the area
3450 being newly allocated:
3451 */
3452 microuptime(&tv);
3453 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
3454 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3455 };
3456 }
3457 } else {
3458 panic("hfs_truncate: invoked on non-UBC object?!");
3459 };
3460 }
3461 if (suppress_times == 0) {
3462 cp->c_touch_modtime = TRUE;
3463 }
3464 fp->ff_size = length;
3465
3466 } else { /* Shorten the size of the file */
3467
3468 if ((off_t)fp->ff_size > length) {
3469 /* Any space previously marked as invalid is now irrelevant: */
3470 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3471 }
3472
3473 /*
3474 * Account for any unmapped blocks. Note that the new
3475 * file length can still end up with unmapped blocks.
3476 */
3477 if (fp->ff_unallocblocks > 0) {
3478 u_int32_t finalblks;
3479 u_int32_t loanedBlocks;
3480
3481 hfs_lock_mount(hfsmp);
3482 loanedBlocks = fp->ff_unallocblocks;
3483 cp->c_blocks -= loanedBlocks;
3484 fp->ff_blocks -= loanedBlocks;
3485 fp->ff_unallocblocks = 0;
3486
3487 hfsmp->loanedBlocks -= loanedBlocks;
3488
3489 finalblks = (length + blksize - 1) / blksize;
3490 if (finalblks > fp->ff_blocks) {
3491 /* calculate required unmapped blocks */
3492 loanedBlocks = finalblks - fp->ff_blocks;
3493 hfsmp->loanedBlocks += loanedBlocks;
3494
3495 fp->ff_unallocblocks = loanedBlocks;
3496 cp->c_blocks += loanedBlocks;
3497 fp->ff_blocks += loanedBlocks;
3498 }
3499 hfs_unlock_mount (hfsmp);
3500 }
3501
3502 /*
3503 * For a TBE process the deallocation of the file blocks is
3504 * delayed until the file is closed. And hfs_close calls
3505 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
3506 * isn't set, we make sure this isn't a TBE process.
3507 */
3508 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
3509 #if QUOTA
3510 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3511 #endif /* QUOTA */
3512 if (hfs_start_transaction(hfsmp) != 0) {
3513 retval = EINVAL;
3514 goto Err_Exit;
3515 }
3516
3517 if (fp->ff_unallocblocks == 0) {
3518 /* Protect extents b-tree and allocation bitmap */
3519 lockflags = SFL_BITMAP;
3520 if (overflow_extents(fp))
3521 lockflags |= SFL_EXTENTS;
3522 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3523
3524 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3525 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
3526
3527 hfs_systemfile_unlock(hfsmp, lockflags);
3528 }
3529 if (hfsmp->jnl) {
3530 if (retval == 0) {
3531 fp->ff_size = length;
3532 }
3533 if (skipupdate) {
3534 (void) hfs_minorupdate(vp);
3535 }
3536 else {
3537 (void) hfs_update(vp, TRUE);
3538 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3539 }
3540 }
3541 hfs_end_transaction(hfsmp);
3542
3543 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3544 if (retval)
3545 goto Err_Exit;
3546 #if QUOTA
3547 /* These are bytesreleased */
3548 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3549 #endif /* QUOTA */
3550 }
3551 /*
3552 * Only set update flag if the logical length changes & we aren't
3553 * suppressing modtime updates.
3554 */
3555 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
3556 cp->c_touch_modtime = TRUE;
3557 }
3558 fp->ff_size = length;
3559 }
3560 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3561 if (!vfs_context_issuser(context)) {
3562 cp->c_mode &= ~(S_ISUID | S_ISGID);
3563 skipupdate = 0;
3564 }
3565 }
3566 if (skipupdate) {
3567 retval = hfs_minorupdate(vp);
3568 }
3569 else {
3570 cp->c_touch_chgtime = TRUE; /* status changed */
3571 if (suppress_times == 0) {
3572 cp->c_touch_modtime = TRUE; /* file data was modified */
3573
3574 /*
3575 * If we are not suppressing the modtime update, then
3576 * update the gen count as well.
3577 */
3578 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3579 hfs_incr_gencount(cp);
3580 }
3581 }
3582
3583 retval = hfs_update(vp, MNT_WAIT);
3584 }
3585 if (retval) {
3586 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
3587 -1, -1, -1, retval, 0);
3588 }
3589
3590 Err_Exit:
3591
3592 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
3593 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
3594
3595 return (retval);
3596 }
3597
3598 /*
3599 * Preparation which must be done prior to deleting the catalog record
3600 * of a file or directory. In order to make the on-disk as safe as possible,
3601 * we remove the catalog entry before releasing the bitmap blocks and the
3602 * overflow extent records. However, some work must be done prior to deleting
3603 * the catalog record.
3604 *
3605 * When calling this function, the cnode must exist both in memory and on-disk.
3606 * If there are both resource fork and data fork vnodes, this function should
3607 * be called on both.
3608 */
3609
3610 int
3611 hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3612
3613 struct filefork *fp = VTOF(vp);
3614 struct cnode *cp = VTOC(vp);
3615 #if QUOTA
3616 int retval = 0;
3617 #endif /* QUOTA */
3618
3619 /* Cannot truncate an HFS directory! */
3620 if (vnode_isdir(vp)) {
3621 return (EISDIR);
3622 }
3623
3624 /*
3625 * See the comment below in hfs_truncate for why we need to call
3626 * setsize here. Essentially we want to avoid pending IO if we
3627 * already know that the blocks are going to be released here.
3628 * This function is only called when totally removing all storage for a file, so
3629 * we can take a shortcut and immediately setsize (0);
3630 */
3631 ubc_setsize(vp, 0);
3632
3633 /* This should only happen with a corrupt filesystem */
3634 if ((off_t)fp->ff_size < 0)
3635 return (EINVAL);
3636
3637 /*
3638 * We cannot just check if fp->ff_size == length (as an optimization)
3639 * since there may be extra physical blocks that also need truncation.
3640 */
3641 #if QUOTA
3642 if ((retval = hfs_getinoquota(cp))) {
3643 return(retval);
3644 }
3645 #endif /* QUOTA */
3646
3647 /* Wipe out any invalid ranges which have yet to be backed by disk */
3648 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3649
3650 /*
3651 * Account for any unmapped blocks. Since we're deleting the
3652 * entire file, we don't have to worry about just shrinking
3653 * to a smaller number of borrowed blocks.
3654 */
3655 if (fp->ff_unallocblocks > 0) {
3656 u_int32_t loanedBlocks;
3657
3658 hfs_lock_mount (hfsmp);
3659 loanedBlocks = fp->ff_unallocblocks;
3660 cp->c_blocks -= loanedBlocks;
3661 fp->ff_blocks -= loanedBlocks;
3662 fp->ff_unallocblocks = 0;
3663
3664 hfsmp->loanedBlocks -= loanedBlocks;
3665
3666 hfs_unlock_mount (hfsmp);
3667 }
3668
3669 return 0;
3670 }
3671
3672
3673 /*
3674 * Special wrapper around calling TruncateFileC. This function is useable
3675 * even when the catalog record does not exist any longer, making it ideal
3676 * for use when deleting a file. The simplification here is that we know
3677 * that we are releasing all blocks.
3678 *
3679 * Note that this function may be called when there is no vnode backing
3680 * the file fork in question. We may call this from hfs_vnop_inactive
3681 * to clear out resource fork data (and may not want to clear out the data
3682 * fork yet). As a result, we pointer-check both sets of inputs before
3683 * doing anything with them.
3684 *
3685 * The caller is responsible for saving off a copy of the filefork(s)
3686 * embedded within the cnode prior to calling this function. The pointers
3687 * supplied as arguments must be valid even if the cnode is no longer valid.
3688 */
3689
3690 int
3691 hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3692 struct filefork *rsrcfork, u_int32_t fileid) {
3693
3694 off_t filebytes;
3695 u_int32_t fileblocks;
3696 int blksize = 0;
3697 int error = 0;
3698 int lockflags;
3699
3700 blksize = hfsmp->blockSize;
3701
3702 /* Data Fork */
3703 if ((datafork != NULL) && (datafork->ff_blocks > 0)) {
3704 fileblocks = datafork->ff_blocks;
3705 filebytes = (off_t)fileblocks * (off_t)blksize;
3706
3707 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3708
3709 while (filebytes > 0) {
3710 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
3711 filebytes -= HFS_BIGFILE_SIZE;
3712 } else {
3713 filebytes = 0;
3714 }
3715
3716 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3717 if (hfs_start_transaction(hfsmp) != 0) {
3718 error = EINVAL;
3719 break;
3720 }
3721
3722 if (datafork->ff_unallocblocks == 0) {
3723 /* Protect extents b-tree and allocation bitmap */
3724 lockflags = SFL_BITMAP;
3725 if (overflow_extents(datafork))
3726 lockflags |= SFL_EXTENTS;
3727 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3728
3729 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
3730
3731 hfs_systemfile_unlock(hfsmp, lockflags);
3732 }
3733 if (error == 0) {
3734 datafork->ff_size = filebytes;
3735 }
3736 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3737
3738 /* Finish the transaction and start over if necessary */
3739 hfs_end_transaction(hfsmp);
3740
3741 if (error) {
3742 break;
3743 }
3744 }
3745 }
3746
3747 /* Resource fork */
3748 if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
3749 fileblocks = rsrcfork->ff_blocks;
3750 filebytes = (off_t)fileblocks * (off_t)blksize;
3751
3752 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3753
3754 while (filebytes > 0) {
3755 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
3756 filebytes -= HFS_BIGFILE_SIZE;
3757 } else {
3758 filebytes = 0;
3759 }
3760
3761 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3762 if (hfs_start_transaction(hfsmp) != 0) {
3763 error = EINVAL;
3764 break;
3765 }
3766
3767 if (rsrcfork->ff_unallocblocks == 0) {
3768 /* Protect extents b-tree and allocation bitmap */
3769 lockflags = SFL_BITMAP;
3770 if (overflow_extents(rsrcfork))
3771 lockflags |= SFL_EXTENTS;
3772 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3773
3774 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
3775
3776 hfs_systemfile_unlock(hfsmp, lockflags);
3777 }
3778 if (error == 0) {
3779 rsrcfork->ff_size = filebytes;
3780 }
3781 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3782
3783 /* Finish the transaction and start over if necessary */
3784 hfs_end_transaction(hfsmp);
3785
3786 if (error) {
3787 break;
3788 }
3789 }
3790 }
3791
3792 return error;
3793 }
3794
3795
3796 /*
3797 * Truncate a cnode to at most length size, freeing (or adding) the
3798 * disk blocks.
3799 */
3800 int
3801 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
3802 int truncateflags, vfs_context_t context)
3803 {
3804 struct filefork *fp = VTOF(vp);
3805 off_t filebytes;
3806 u_int32_t fileblocks;
3807 int blksize, error = 0;
3808 struct cnode *cp = VTOC(vp);
3809
3810 /* Cannot truncate an HFS directory! */
3811 if (vnode_isdir(vp)) {
3812 return (EISDIR);
3813 }
3814 /* A swap file cannot change size. */
3815 if (vnode_isswap(vp) && (length != 0)) {
3816 return (EPERM);
3817 }
3818
3819 blksize = VTOVCB(vp)->blockSize;
3820 fileblocks = fp->ff_blocks;
3821 filebytes = (off_t)fileblocks * (off_t)blksize;
3822
3823 //
3824 // Have to do this here so that we don't wind up with
3825 // i/o pending for blocks that are about to be released
3826 // if we truncate the file.
3827 //
3828 // If skipsetsize is set, then the caller is responsible
3829 // for the ubc_setsize.
3830 //
3831 // Even if skipsetsize is set, if the length is zero we
3832 // want to call ubc_setsize() because as of SnowLeopard
3833 // it will no longer cause any page-ins and it will drop
3834 // any dirty pages so that we don't do any i/o that we
3835 // don't have to. This also prevents a race where i/o
3836 // for truncated blocks may overwrite later data if the
3837 // blocks get reallocated to a different file.
3838 //
3839 if (!skipsetsize || length == 0)
3840 ubc_setsize(vp, length);
3841
3842 // have to loop truncating or growing files that are
3843 // really big because otherwise transactions can get
3844 // enormous and consume too many kernel resources.
3845
3846 if (length < filebytes) {
3847 while (filebytes > length) {
3848 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
3849 filebytes -= HFS_BIGFILE_SIZE;
3850 } else {
3851 filebytes = length;
3852 }
3853 cp->c_flag |= C_FORCEUPDATE;
3854 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
3855 if (error)
3856 break;
3857 }
3858 } else if (length > filebytes) {
3859 while (filebytes < length) {
3860 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
3861 filebytes += HFS_BIGFILE_SIZE;
3862 } else {
3863 filebytes = length;
3864 }
3865 cp->c_flag |= C_FORCEUPDATE;
3866 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
3867 if (error)
3868 break;
3869 }
3870 } else /* Same logical size */ {
3871
3872 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
3873 }
3874 /* Files that are changing size are not hot file candidates. */
3875 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3876 fp->ff_bytesread = 0;
3877 }
3878
3879 return (error);
3880 }
3881
3882
3883
3884 /*
3885 * Preallocate file storage space.
3886 */
3887 int
3888 hfs_vnop_allocate(struct vnop_allocate_args /* {
3889 vnode_t a_vp;
3890 off_t a_length;
3891 u_int32_t a_flags;
3892 off_t *a_bytesallocated;
3893 off_t a_offset;
3894 vfs_context_t a_context;
3895 } */ *ap)
3896 {
3897 struct vnode *vp = ap->a_vp;
3898 struct cnode *cp;
3899 struct filefork *fp;
3900 ExtendedVCB *vcb;
3901 off_t length = ap->a_length;
3902 off_t startingPEOF;
3903 off_t moreBytesRequested;
3904 off_t actualBytesAdded;
3905 off_t filebytes;
3906 u_int32_t fileblocks;
3907 int retval, retval2;
3908 u_int32_t blockHint;
3909 u_int32_t extendFlags; /* For call to ExtendFileC */
3910 struct hfsmount *hfsmp;
3911 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
3912 int lockflags;
3913 time_t orig_ctime;
3914
3915 *(ap->a_bytesallocated) = 0;
3916
3917 if (!vnode_isreg(vp))
3918 return (EISDIR);
3919 if (length < (off_t)0)
3920 return (EINVAL);
3921
3922 cp = VTOC(vp);
3923
3924 orig_ctime = VTOC(vp)->c_ctime;
3925
3926 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
3927
3928 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
3929
3930 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
3931 goto Err_Exit;
3932 }
3933
3934 fp = VTOF(vp);
3935 hfsmp = VTOHFS(vp);
3936 vcb = VTOVCB(vp);
3937
3938 fileblocks = fp->ff_blocks;
3939 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
3940
3941 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
3942 retval = EINVAL;
3943 goto Err_Exit;
3944 }
3945
3946 /* Fill in the flags word for the call to Extend the file */
3947
3948 extendFlags = kEFNoClumpMask;
3949 if (ap->a_flags & ALLOCATECONTIG)
3950 extendFlags |= kEFContigMask;
3951 if (ap->a_flags & ALLOCATEALL)
3952 extendFlags |= kEFAllMask;
3953 if (cred && suser(cred, NULL) != 0)
3954 extendFlags |= kEFReserveMask;
3955 if (hfs_virtualmetafile(cp))
3956 extendFlags |= kEFMetadataMask;
3957
3958 retval = E_NONE;
3959 blockHint = 0;
3960 startingPEOF = filebytes;
3961
3962 if (ap->a_flags & ALLOCATEFROMPEOF)
3963 length += filebytes;
3964 else if (ap->a_flags & ALLOCATEFROMVOL)
3965 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
3966
3967 /* If no changes are necesary, then we're done */
3968 if (filebytes == length)
3969 goto Std_Exit;
3970
3971 /*
3972 * Lengthen the size of the file. We must ensure that the
3973 * last byte of the file is allocated. Since the smallest
3974 * value of filebytes is 0, length will be at least 1.
3975 */
3976 if (length > filebytes) {
3977 off_t total_bytes_added = 0, orig_request_size;
3978
3979 orig_request_size = moreBytesRequested = length - filebytes;
3980
3981 #if QUOTA
3982 retval = hfs_chkdq(cp,
3983 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
3984 cred, 0);
3985 if (retval)
3986 goto Err_Exit;
3987
3988 #endif /* QUOTA */
3989 /*
3990 * Metadata zone checks.
3991 */
3992 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3993 /*
3994 * Allocate Journal and Quota files in metadata zone.
3995 */
3996 if (hfs_virtualmetafile(cp)) {
3997 blockHint = hfsmp->hfs_metazone_start;
3998 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
3999 (blockHint <= hfsmp->hfs_metazone_end)) {
4000 /*
4001 * Move blockHint outside metadata zone.
4002 */
4003 blockHint = hfsmp->hfs_metazone_end + 1;
4004 }
4005 }
4006
4007
4008 while ((length > filebytes) && (retval == E_NONE)) {
4009 off_t bytesRequested;
4010
4011 if (hfs_start_transaction(hfsmp) != 0) {
4012 retval = EINVAL;
4013 goto Err_Exit;
4014 }
4015
4016 /* Protect extents b-tree and allocation bitmap */
4017 lockflags = SFL_BITMAP;
4018 if (overflow_extents(fp))
4019 lockflags |= SFL_EXTENTS;
4020 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4021
4022 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
4023 bytesRequested = HFS_BIGFILE_SIZE;
4024 } else {
4025 bytesRequested = moreBytesRequested;
4026 }
4027
4028 if (extendFlags & kEFContigMask) {
4029 // if we're on a sparse device, this will force it to do a
4030 // full scan to find the space needed.
4031 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
4032 }
4033
4034 retval = MacToVFSError(ExtendFileC(vcb,
4035 (FCB*)fp,
4036 bytesRequested,
4037 blockHint,
4038 extendFlags,
4039 &actualBytesAdded));
4040
4041 if (retval == E_NONE) {
4042 *(ap->a_bytesallocated) += actualBytesAdded;
4043 total_bytes_added += actualBytesAdded;
4044 moreBytesRequested -= actualBytesAdded;
4045 if (blockHint != 0) {
4046 blockHint += actualBytesAdded / vcb->blockSize;
4047 }
4048 }
4049 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4050
4051 hfs_systemfile_unlock(hfsmp, lockflags);
4052
4053 if (hfsmp->jnl) {
4054 (void) hfs_update(vp, TRUE);
4055 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
4056 }
4057
4058 hfs_end_transaction(hfsmp);
4059 }
4060
4061
4062 /*
4063 * if we get an error and no changes were made then exit
4064 * otherwise we must do the hfs_update to reflect the changes
4065 */
4066 if (retval && (startingPEOF == filebytes))
4067 goto Err_Exit;
4068
4069 /*
4070 * Adjust actualBytesAdded to be allocation block aligned, not
4071 * clump size aligned.
4072 * NOTE: So what we are reporting does not affect reality
4073 * until the file is closed, when we truncate the file to allocation
4074 * block size.
4075 */
4076 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
4077 *(ap->a_bytesallocated) =
4078 roundup(orig_request_size, (off_t)vcb->blockSize);
4079
4080 } else { /* Shorten the size of the file */
4081
4082 if (fp->ff_size > length) {
4083 /*
4084 * Any buffers that are past the truncation point need to be
4085 * invalidated (to maintain buffer cache consistency).
4086 */
4087 }
4088
4089 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
4090 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4091
4092 /*
4093 * if we get an error and no changes were made then exit
4094 * otherwise we must do the hfs_update to reflect the changes
4095 */
4096 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
4097 #if QUOTA
4098 /* These are bytesreleased */
4099 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
4100 #endif /* QUOTA */
4101
4102 if (fp->ff_size > filebytes) {
4103 fp->ff_size = filebytes;
4104
4105 hfs_unlock(cp);
4106 ubc_setsize(vp, fp->ff_size);
4107 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4108 }
4109 }
4110
4111 Std_Exit:
4112 cp->c_touch_chgtime = TRUE;
4113 cp->c_touch_modtime = TRUE;
4114 retval2 = hfs_update(vp, MNT_WAIT);
4115
4116 if (retval == 0)
4117 retval = retval2;
4118 Err_Exit:
4119 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4120 hfs_unlock(cp);
4121 return (retval);
4122 }
4123
4124
4125 /*
4126 * Pagein for HFS filesystem
4127 */
4128 int
4129 hfs_vnop_pagein(struct vnop_pagein_args *ap)
4130 /*
4131 struct vnop_pagein_args {
4132 vnode_t a_vp,
4133 upl_t a_pl,
4134 vm_offset_t a_pl_offset,
4135 off_t a_f_offset,
4136 size_t a_size,
4137 int a_flags
4138 vfs_context_t a_context;
4139 };
4140 */
4141 {
4142 vnode_t vp;
4143 struct cnode *cp;
4144 struct filefork *fp;
4145 int error = 0;
4146 upl_t upl;
4147 upl_page_info_t *pl;
4148 off_t f_offset;
4149 int offset;
4150 int isize;
4151 int pg_index;
4152 boolean_t truncate_lock_held = FALSE;
4153 boolean_t file_converted = FALSE;
4154 kern_return_t kret;
4155
4156 vp = ap->a_vp;
4157 cp = VTOC(vp);
4158 fp = VTOF(vp);
4159
4160 #if CONFIG_PROTECT
4161 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
4162 /*
4163 * If we errored here, then this means that one of two things occurred:
4164 * 1. there was a problem with the decryption of the key.
4165 * 2. the device is locked and we are not allowed to access this particular file.
4166 *
4167 * Either way, this means that we need to shut down this upl now. As long as
4168 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4169 * then we create a upl and immediately abort it.
4170 */
4171 if (ap->a_pl == NULL) {
4172 /* create the upl */
4173 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
4174 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4175 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4176 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
4177
4178 /* Abort the range */
4179 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4180 }
4181
4182
4183 return error;
4184 }
4185 #endif /* CONFIG_PROTECT */
4186
4187 if (ap->a_pl != NULL) {
4188 /*
4189 * this can only happen for swap files now that
4190 * we're asking for V2 paging behavior...
4191 * so don't need to worry about decompression, or
4192 * keeping track of blocks read or taking the truncate lock
4193 */
4194 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
4195 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
4196 goto pagein_done;
4197 }
4198
4199 retry_pagein:
4200 /*
4201 * take truncate lock (shared/recursive) to guard against
4202 * zero-fill thru fsync interfering, but only for v2
4203 *
4204 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4205 * lock shared and we are allowed to recurse 1 level if this thread already
4206 * owns the lock exclusively... this can legally occur
4207 * if we are doing a shrinking ftruncate against a file
4208 * that is mapped private, and the pages being truncated
4209 * do not currently exist in the cache... in that case
4210 * we will have to page-in the missing pages in order
4211 * to provide them to the private mapping... we must
4212 * also call hfs_unlock_truncate with a postive been_recursed
4213 * arg to indicate that if we have recursed, there is no need to drop
4214 * the lock. Allowing this simple recursion is necessary
4215 * in order to avoid a certain deadlock... since the ftruncate
4216 * already holds the truncate lock exclusively, if we try
4217 * to acquire it shared to protect the pagein path, we will
4218 * hang this thread
4219 *
4220 * NOTE: The if () block below is a workaround in order to prevent a
4221 * VM deadlock. See rdar://7853471.
4222 *
4223 * If we are in a forced unmount, then launchd will still have the
4224 * dyld_shared_cache file mapped as it is trying to reboot. If we
4225 * take the truncate lock here to service a page fault, then our
4226 * thread could deadlock with the forced-unmount. The forced unmount
4227 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4228 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4229 * thread will think it needs to copy all of the data out of the file
4230 * and into a VM copy object. If we hold the cnode lock here, then that
4231 * VM operation will not be able to proceed, because we'll set a busy page
4232 * before attempting to grab the lock. Note that this isn't as simple as "don't
4233 * call ubc_setsize" because doing that would just shift the problem to the
4234 * ubc_msync done before the vnode is reclaimed.
4235 *
4236 * So, if a forced unmount on this volume is in flight AND the cnode is
4237 * marked C_DELETED, then just go ahead and do the page in without taking
4238 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4239 * that is not going to be available on the next mount, this seems like a
4240 * OK solution from a correctness point of view, even though it is hacky.
4241 */
4242 if (vfs_isforce(vp->v_mount)) {
4243 if (cp->c_flag & C_DELETED) {
4244 /* If we don't get it, then just go ahead and operate without the lock */
4245 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4246 }
4247 }
4248 else {
4249 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4250 truncate_lock_held = TRUE;
4251 }
4252
4253 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4254
4255 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4256 error = EINVAL;
4257 goto pagein_done;
4258 }
4259 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4260
4261 isize = ap->a_size;
4262
4263 /*
4264 * Scan from the back to find the last page in the UPL, so that we
4265 * aren't looking at a UPL that may have already been freed by the
4266 * preceding aborts/completions.
4267 */
4268 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4269 if (upl_page_present(pl, --pg_index))
4270 break;
4271 if (pg_index == 0) {
4272 /*
4273 * no absent pages were found in the range specified
4274 * just abort the UPL to get rid of it and then we're done
4275 */
4276 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4277 goto pagein_done;
4278 }
4279 }
4280 /*
4281 * initialize the offset variables before we touch the UPL.
4282 * f_offset is the position into the file, in bytes
4283 * offset is the position into the UPL, in bytes
4284 * pg_index is the pg# of the UPL we're operating on
4285 * isize is the offset into the UPL of the last page that is present.
4286 */
4287 isize = ((pg_index + 1) * PAGE_SIZE);
4288 pg_index = 0;
4289 offset = 0;
4290 f_offset = ap->a_f_offset;
4291
4292 while (isize) {
4293 int xsize;
4294 int num_of_pages;
4295
4296 if ( !upl_page_present(pl, pg_index)) {
4297 /*
4298 * we asked for RET_ONLY_ABSENT, so it's possible
4299 * to get back empty slots in the UPL.
4300 * just skip over them
4301 */
4302 f_offset += PAGE_SIZE;
4303 offset += PAGE_SIZE;
4304 isize -= PAGE_SIZE;
4305 pg_index++;
4306
4307 continue;
4308 }
4309 /*
4310 * We know that we have at least one absent page.
4311 * Now checking to see how many in a row we have
4312 */
4313 num_of_pages = 1;
4314 xsize = isize - PAGE_SIZE;
4315
4316 while (xsize) {
4317 if ( !upl_page_present(pl, pg_index + num_of_pages))
4318 break;
4319 num_of_pages++;
4320 xsize -= PAGE_SIZE;
4321 }
4322 xsize = num_of_pages * PAGE_SIZE;
4323
4324 #if HFS_COMPRESSION
4325 if (VNODE_IS_RSRC(vp)) {
4326 /* allow pageins of the resource fork */
4327 } else {
4328 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4329
4330 if (compressed) {
4331 if (truncate_lock_held) {
4332 /*
4333 * can't hold the truncate lock when calling into the decmpfs layer
4334 * since it calls back into this layer... even though we're only
4335 * holding the lock in shared mode, and the re-entrant path only
4336 * takes the lock shared, we can deadlock if some other thread
4337 * tries to grab the lock exclusively in between.
4338 */
4339 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4340 truncate_lock_held = FALSE;
4341 }
4342 ap->a_pl = upl;
4343 ap->a_pl_offset = offset;
4344 ap->a_f_offset = f_offset;
4345 ap->a_size = xsize;
4346
4347 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4348 /*
4349 * note that decpfs_pagein_compressed can change the state of
4350 * 'compressed'... it will set it to 0 if the file is no longer
4351 * compressed once the compression lock is successfully taken
4352 * i.e. we would block on that lock while the file is being inflated
4353 */
4354 if (compressed) {
4355 if (error == 0) {
4356 /* successful page-in, update the access time */
4357 VTOC(vp)->c_touch_acctime = TRUE;
4358
4359 /* compressed files are not hot file candidates */
4360 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4361 fp->ff_bytesread = 0;
4362 }
4363 } else if (error == EAGAIN) {
4364 /*
4365 * EAGAIN indicates someone else already holds the compression lock...
4366 * to avoid deadlocking, we'll abort this range of pages with an
4367 * indication that the pagein needs to be redriven
4368 */
4369 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
4370 }
4371 goto pagein_next_range;
4372 }
4373 else {
4374 /*
4375 * Set file_converted only if the file became decompressed while we were
4376 * paging in. If it were still compressed, we would re-start the loop using the goto
4377 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4378 * condition below, since we could have avoided taking the truncate lock to prevent
4379 * a deadlock in the force unmount case.
4380 */
4381 file_converted = TRUE;
4382 }
4383 }
4384 if (file_converted == TRUE) {
4385 /*
4386 * the file was converted back to a regular file after we first saw it as compressed
4387 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4388 * reset a_size so that we consider what remains of the original request
4389 * and null out a_upl and a_pl_offset.
4390 *
4391 * We should only be able to get into this block if the decmpfs_pagein_compressed
4392 * successfully decompressed the range in question for this file.
4393 */
4394 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4395
4396 ap->a_size = isize;
4397 ap->a_pl = NULL;
4398 ap->a_pl_offset = 0;
4399
4400 /* Reset file_converted back to false so that we don't infinite-loop. */
4401 file_converted = FALSE;
4402 goto retry_pagein;
4403 }
4404 }
4405 #endif
4406 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
4407
4408 /*
4409 * Keep track of blocks read.
4410 */
4411 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4412 int bytesread;
4413 int took_cnode_lock = 0;
4414
4415 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4416 bytesread = fp->ff_size;
4417 else
4418 bytesread = xsize;
4419
4420 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4421 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
4422 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4423 took_cnode_lock = 1;
4424 }
4425 /*
4426 * If this file hasn't been seen since the start of
4427 * the current sampling period then start over.
4428 */
4429 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4430 struct timeval tv;
4431
4432 fp->ff_bytesread = bytesread;
4433 microtime(&tv);
4434 cp->c_atime = tv.tv_sec;
4435 } else {
4436 fp->ff_bytesread += bytesread;
4437 }
4438 cp->c_touch_acctime = TRUE;
4439 if (took_cnode_lock)
4440 hfs_unlock(cp);
4441 }
4442 pagein_next_range:
4443 f_offset += xsize;
4444 offset += xsize;
4445 isize -= xsize;
4446 pg_index += num_of_pages;
4447
4448 error = 0;
4449 }
4450
4451 pagein_done:
4452 if (truncate_lock_held == TRUE) {
4453 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
4454 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4455 }
4456
4457 return (error);
4458 }
4459
4460 /*
4461 * Pageout for HFS filesystem.
4462 */
4463 int
4464 hfs_vnop_pageout(struct vnop_pageout_args *ap)
4465 /*
4466 struct vnop_pageout_args {
4467 vnode_t a_vp,
4468 upl_t a_pl,
4469 vm_offset_t a_pl_offset,
4470 off_t a_f_offset,
4471 size_t a_size,
4472 int a_flags
4473 vfs_context_t a_context;
4474 };
4475 */
4476 {
4477 vnode_t vp = ap->a_vp;
4478 struct cnode *cp;
4479 struct filefork *fp;
4480 int retval = 0;
4481 off_t filesize;
4482 upl_t upl;
4483 upl_page_info_t* pl;
4484 vm_offset_t a_pl_offset;
4485 int a_flags;
4486 int is_pageoutv2 = 0;
4487 kern_return_t kret;
4488
4489 cp = VTOC(vp);
4490 fp = VTOF(vp);
4491
4492 /*
4493 * Figure out where the file ends, for pageout purposes. If
4494 * ff_new_size > ff_size, then we're in the middle of extending the
4495 * file via a write, so it is safe (and necessary) that we be able
4496 * to pageout up to that point.
4497 */
4498 filesize = fp->ff_size;
4499 if (fp->ff_new_size > filesize)
4500 filesize = fp->ff_new_size;
4501
4502 a_flags = ap->a_flags;
4503 a_pl_offset = ap->a_pl_offset;
4504
4505 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
4506 hfs_incr_gencount (cp);
4507 }
4508
4509 /*
4510 * we can tell if we're getting the new or old behavior from the UPL
4511 */
4512 if ((upl = ap->a_pl) == NULL) {
4513 int request_flags;
4514
4515 is_pageoutv2 = 1;
4516 /*
4517 * we're in control of any UPL we commit
4518 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4519 */
4520 a_flags &= ~UPL_NOCOMMIT;
4521 a_pl_offset = 0;
4522
4523 /*
4524 * For V2 semantics, we want to take the cnode truncate lock
4525 * shared to guard against the file size changing via zero-filling.
4526 *
4527 * However, we have to be careful because we may be invoked
4528 * via the ubc_msync path to write out dirty mmap'd pages
4529 * in response to a lock event on a content-protected
4530 * filesystem (e.g. to write out class A files).
4531 * As a result, we want to take the truncate lock 'SHARED' with
4532 * the mini-recursion locktype so that we don't deadlock/panic
4533 * because we may be already holding the truncate lock exclusive to force any other
4534 * IOs to have blocked behind us.
4535 */
4536 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4537
4538 if (a_flags & UPL_MSYNC) {
4539 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4540 }
4541 else {
4542 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4543 }
4544
4545 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
4546
4547 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4548 retval = EINVAL;
4549 goto pageout_done;
4550 }
4551 }
4552 /*
4553 * from this point forward upl points at the UPL we're working with
4554 * it was either passed in or we succesfully created it
4555 */
4556
4557 /*
4558 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4559 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4560 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4561 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4562 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4563 * lock in HFS so that we don't lock invert ourselves.
4564 *
4565 * Note that we can still get into this function on behalf of the default pager with
4566 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4567 * since fsync and other writing threads will grab the locks, then mark the
4568 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4569 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4570 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4571 * by the paging/VM system.
4572 */
4573
4574 if (is_pageoutv2) {
4575 off_t f_offset;
4576 int offset;
4577 int isize;
4578 int pg_index;
4579 int error;
4580 int error_ret = 0;
4581
4582 isize = ap->a_size;
4583 f_offset = ap->a_f_offset;
4584
4585 /*
4586 * Scan from the back to find the last page in the UPL, so that we
4587 * aren't looking at a UPL that may have already been freed by the
4588 * preceding aborts/completions.
4589 */
4590 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4591 if (upl_page_present(pl, --pg_index))
4592 break;
4593 if (pg_index == 0) {
4594 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4595 goto pageout_done;
4596 }
4597 }
4598
4599 /*
4600 * initialize the offset variables before we touch the UPL.
4601 * a_f_offset is the position into the file, in bytes
4602 * offset is the position into the UPL, in bytes
4603 * pg_index is the pg# of the UPL we're operating on.
4604 * isize is the offset into the UPL of the last non-clean page.
4605 */
4606 isize = ((pg_index + 1) * PAGE_SIZE);
4607
4608 offset = 0;
4609 pg_index = 0;
4610
4611 while (isize) {
4612 int xsize;
4613 int num_of_pages;
4614
4615 if ( !upl_page_present(pl, pg_index)) {
4616 /*
4617 * we asked for RET_ONLY_DIRTY, so it's possible
4618 * to get back empty slots in the UPL.
4619 * just skip over them
4620 */
4621 f_offset += PAGE_SIZE;
4622 offset += PAGE_SIZE;
4623 isize -= PAGE_SIZE;
4624 pg_index++;
4625
4626 continue;
4627 }
4628 if ( !upl_dirty_page(pl, pg_index)) {
4629 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
4630 }
4631
4632 /*
4633 * We know that we have at least one dirty page.
4634 * Now checking to see how many in a row we have
4635 */
4636 num_of_pages = 1;
4637 xsize = isize - PAGE_SIZE;
4638
4639 while (xsize) {
4640 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
4641 break;
4642 num_of_pages++;
4643 xsize -= PAGE_SIZE;
4644 }
4645 xsize = num_of_pages * PAGE_SIZE;
4646
4647 if (!vnode_isswap(vp)) {
4648 off_t end_of_range;
4649 int tooklock;
4650
4651 tooklock = 0;
4652
4653 if (cp->c_lockowner != current_thread()) {
4654 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4655 /*
4656 * we're in the v2 path, so we are the
4657 * owner of the UPL... we may have already
4658 * processed some of the UPL, so abort it
4659 * from the current working offset to the
4660 * end of the UPL
4661 */
4662 ubc_upl_abort_range(upl,
4663 offset,
4664 ap->a_size - offset,
4665 UPL_ABORT_FREE_ON_EMPTY);
4666 goto pageout_done;
4667 }
4668 tooklock = 1;
4669 }
4670 end_of_range = f_offset + xsize - 1;
4671
4672 if (end_of_range >= filesize) {
4673 end_of_range = (off_t)(filesize - 1);
4674 }
4675 if (f_offset < filesize) {
4676 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
4677 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4678 }
4679 if (tooklock) {
4680 hfs_unlock(cp);
4681 }
4682 }
4683 if ((error = cluster_pageout(vp, upl, offset, f_offset,
4684 xsize, filesize, a_flags))) {
4685 if (error_ret == 0)
4686 error_ret = error;
4687 }
4688 f_offset += xsize;
4689 offset += xsize;
4690 isize -= xsize;
4691 pg_index += num_of_pages;
4692 }
4693 /* capture errnos bubbled out of cluster_pageout if they occurred */
4694 if (error_ret != 0) {
4695 retval = error_ret;
4696 }
4697 } /* end block for v2 pageout behavior */
4698 else {
4699 if (!vnode_isswap(vp)) {
4700 off_t end_of_range;
4701 int tooklock = 0;
4702
4703 if (cp->c_lockowner != current_thread()) {
4704 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
4705 if (!(a_flags & UPL_NOCOMMIT)) {
4706 ubc_upl_abort_range(upl,
4707 a_pl_offset,
4708 ap->a_size,
4709 UPL_ABORT_FREE_ON_EMPTY);
4710 }
4711 goto pageout_done;
4712 }
4713 tooklock = 1;
4714 }
4715 end_of_range = ap->a_f_offset + ap->a_size - 1;
4716
4717 if (end_of_range >= filesize) {
4718 end_of_range = (off_t)(filesize - 1);
4719 }
4720 if (ap->a_f_offset < filesize) {
4721 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
4722 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4723 }
4724
4725 if (tooklock) {
4726 hfs_unlock(cp);
4727 }
4728 }
4729 /*
4730 * just call cluster_pageout for old pre-v2 behavior
4731 */
4732 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
4733 ap->a_size, filesize, a_flags);
4734 }
4735
4736 /*
4737 * If data was written, update the modification time of the file.
4738 * If setuid or setgid bits are set and this process is not the
4739 * superuser then clear the setuid and setgid bits as a precaution
4740 * against tampering.
4741 */
4742 if (retval == 0) {
4743 cp->c_touch_modtime = TRUE;
4744 cp->c_touch_chgtime = TRUE;
4745 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
4746 (vfs_context_suser(ap->a_context) != 0)) {
4747 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4748 cp->c_mode &= ~(S_ISUID | S_ISGID);
4749 hfs_unlock(cp);
4750 }
4751 }
4752
4753 pageout_done:
4754 if (is_pageoutv2) {
4755 /*
4756 * Release the truncate lock. Note that because
4757 * we may have taken the lock recursively by
4758 * being invoked via ubc_msync due to lockdown,
4759 * we should release it recursively, too.
4760 */
4761 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
4762 }
4763 return (retval);
4764 }
4765
4766 /*
4767 * Intercept B-Tree node writes to unswap them if necessary.
4768 */
4769 int
4770 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
4771 {
4772 int retval = 0;
4773 register struct buf *bp = ap->a_bp;
4774 register struct vnode *vp = buf_vnode(bp);
4775 BlockDescriptor block;
4776
4777 /* Trap B-Tree writes */
4778 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
4779 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
4780 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
4781 (vp == VTOHFS(vp)->hfc_filevp)) {
4782
4783 /*
4784 * Swap and validate the node if it is in native byte order.
4785 * This is always be true on big endian, so we always validate
4786 * before writing here. On little endian, the node typically has
4787 * been swapped and validated when it was written to the journal,
4788 * so we won't do anything here.
4789 */
4790 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
4791 /* Prepare the block pointer */
4792 block.blockHeader = bp;
4793 block.buffer = (char *)buf_dataptr(bp);
4794 block.blockNum = buf_lblkno(bp);
4795 /* not found in cache ==> came from disk */
4796 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
4797 block.blockSize = buf_count(bp);
4798
4799 /* Endian un-swap B-Tree node */
4800 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
4801 if (retval)
4802 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4803 }
4804 }
4805
4806 /* This buffer shouldn't be locked anymore but if it is clear it */
4807 if ((buf_flags(bp) & B_LOCKED)) {
4808 // XXXdbg
4809 if (VTOHFS(vp)->jnl) {
4810 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
4811 }
4812 buf_clearflags(bp, B_LOCKED);
4813 }
4814 retval = vn_bwrite (ap);
4815
4816 return (retval);
4817 }
4818
4819 /*
4820 * Relocate a file to a new location on disk
4821 * cnode must be locked on entry
4822 *
4823 * Relocation occurs by cloning the file's data from its
4824 * current set of blocks to a new set of blocks. During
4825 * the relocation all of the blocks (old and new) are
4826 * owned by the file.
4827 *
4828 * -----------------
4829 * |///////////////|
4830 * -----------------
4831 * 0 N (file offset)
4832 *
4833 * ----------------- -----------------
4834 * |///////////////| | | STEP 1 (acquire new blocks)
4835 * ----------------- -----------------
4836 * 0 N N+1 2N
4837 *
4838 * ----------------- -----------------
4839 * |///////////////| |///////////////| STEP 2 (clone data)
4840 * ----------------- -----------------
4841 * 0 N N+1 2N
4842 *
4843 * -----------------
4844 * |///////////////| STEP 3 (head truncate blocks)
4845 * -----------------
4846 * 0 N
4847 *
4848 * During steps 2 and 3 page-outs to file offsets less
4849 * than or equal to N are suspended.
4850 *
4851 * During step 3 page-ins to the file get suspended.
4852 */
4853 int
4854 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
4855 struct proc *p)
4856 {
4857 struct cnode *cp;
4858 struct filefork *fp;
4859 struct hfsmount *hfsmp;
4860 u_int32_t headblks;
4861 u_int32_t datablks;
4862 u_int32_t blksize;
4863 u_int32_t growsize;
4864 u_int32_t nextallocsave;
4865 daddr64_t sector_a, sector_b;
4866 int eflags;
4867 off_t newbytes;
4868 int retval;
4869 int lockflags = 0;
4870 int took_trunc_lock = 0;
4871 int started_tr = 0;
4872 enum vtype vnodetype;
4873
4874 vnodetype = vnode_vtype(vp);
4875 if (vnodetype != VREG) {
4876 /* Not allowed to move symlinks. */
4877 return (EPERM);
4878 }
4879
4880 hfsmp = VTOHFS(vp);
4881 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
4882 return (ENOSPC);
4883 }
4884
4885 cp = VTOC(vp);
4886 fp = VTOF(vp);
4887 if (fp->ff_unallocblocks)
4888 return (EINVAL);
4889
4890 #if CONFIG_PROTECT
4891 /*
4892 * <rdar://problem/9118426>
4893 * Disable HFS file relocation on content-protected filesystems
4894 */
4895 if (cp_fs_protected (hfsmp->hfs_mp)) {
4896 return EINVAL;
4897 }
4898 #endif
4899 /* If it's an SSD, also disable HFS relocation */
4900 if (hfsmp->hfs_flags & HFS_SSD) {
4901 return EINVAL;
4902 }
4903
4904
4905 blksize = hfsmp->blockSize;
4906 if (blockHint == 0)
4907 blockHint = hfsmp->nextAllocation;
4908
4909 if (fp->ff_size > 0x7fffffff) {
4910 return (EFBIG);
4911 }
4912
4913 //
4914 // We do not believe that this call to hfs_fsync() is
4915 // necessary and it causes a journal transaction
4916 // deadlock so we are removing it.
4917 //
4918 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4919 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4920 // if (retval)
4921 // return (retval);
4922 //}
4923
4924 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
4925 hfs_unlock(cp);
4926 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
4927 /* Force lock since callers expects lock to be held. */
4928 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
4929 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4930 return (retval);
4931 }
4932 /* No need to continue if file was removed. */
4933 if (cp->c_flag & C_NOEXISTS) {
4934 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4935 return (ENOENT);
4936 }
4937 took_trunc_lock = 1;
4938 }
4939 headblks = fp->ff_blocks;
4940 datablks = howmany(fp->ff_size, blksize);
4941 growsize = datablks * blksize;
4942 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
4943 if (blockHint >= hfsmp->hfs_metazone_start &&
4944 blockHint <= hfsmp->hfs_metazone_end)
4945 eflags |= kEFMetadataMask;
4946
4947 if (hfs_start_transaction(hfsmp) != 0) {
4948 if (took_trunc_lock)
4949 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
4950 return (EINVAL);
4951 }
4952 started_tr = 1;
4953 /*
4954 * Protect the extents b-tree and the allocation bitmap
4955 * during MapFileBlockC and ExtendFileC operations.
4956 */
4957 lockflags = SFL_BITMAP;
4958 if (overflow_extents(fp))
4959 lockflags |= SFL_EXTENTS;
4960 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4961
4962 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
4963 if (retval) {
4964 retval = MacToVFSError(retval);
4965 goto out;
4966 }
4967
4968 /*
4969 * STEP 1 - acquire new allocation blocks.
4970 */
4971 nextallocsave = hfsmp->nextAllocation;
4972 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
4973 if (eflags & kEFMetadataMask) {
4974 hfs_lock_mount(hfsmp);
4975 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
4976 MarkVCBDirty(hfsmp);
4977 hfs_unlock_mount(hfsmp);
4978 }
4979
4980 retval = MacToVFSError(retval);
4981 if (retval == 0) {
4982 cp->c_flag |= C_MODIFIED;
4983 if (newbytes < growsize) {
4984 retval = ENOSPC;
4985 goto restore;
4986 } else if (fp->ff_blocks < (headblks + datablks)) {
4987 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
4988 retval = ENOSPC;
4989 goto restore;
4990 }
4991
4992 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
4993 if (retval) {
4994 retval = MacToVFSError(retval);
4995 } else if ((sector_a + 1) == sector_b) {
4996 retval = ENOSPC;
4997 goto restore;
4998 } else if ((eflags & kEFMetadataMask) &&
4999 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
5000 hfsmp->hfs_metazone_end)) {
5001 #if 0
5002 const char * filestr;
5003 char emptystr = '\0';
5004
5005 if (cp->c_desc.cd_nameptr != NULL) {
5006 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
5007 } else if (vnode_name(vp) != NULL) {
5008 filestr = vnode_name(vp);
5009 } else {
5010 filestr = &emptystr;
5011 }
5012 #endif
5013 retval = ENOSPC;
5014 goto restore;
5015 }
5016 }
5017 /* Done with system locks and journal for now. */
5018 hfs_systemfile_unlock(hfsmp, lockflags);
5019 lockflags = 0;
5020 hfs_end_transaction(hfsmp);
5021 started_tr = 0;
5022
5023 if (retval) {
5024 /*
5025 * Check to see if failure is due to excessive fragmentation.
5026 */
5027 if ((retval == ENOSPC) &&
5028 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
5029 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
5030 }
5031 goto out;
5032 }
5033 /*
5034 * STEP 2 - clone file data into the new allocation blocks.
5035 */
5036
5037 if (vnodetype == VLNK)
5038 retval = EPERM;
5039 else if (vnode_issystem(vp))
5040 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
5041 else
5042 retval = hfs_clonefile(vp, headblks, datablks, blksize);
5043
5044 /* Start transaction for step 3 or for a restore. */
5045 if (hfs_start_transaction(hfsmp) != 0) {
5046 retval = EINVAL;
5047 goto out;
5048 }
5049 started_tr = 1;
5050 if (retval)
5051 goto restore;
5052
5053 /*
5054 * STEP 3 - switch to cloned data and remove old blocks.
5055 */
5056 lockflags = SFL_BITMAP;
5057 if (overflow_extents(fp))
5058 lockflags |= SFL_EXTENTS;
5059 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5060
5061 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
5062
5063 hfs_systemfile_unlock(hfsmp, lockflags);
5064 lockflags = 0;
5065 if (retval)
5066 goto restore;
5067 out:
5068 if (took_trunc_lock)
5069 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5070
5071 if (lockflags) {
5072 hfs_systemfile_unlock(hfsmp, lockflags);
5073 lockflags = 0;
5074 }
5075
5076 /* Push cnode's new extent data to disk. */
5077 if (retval == 0) {
5078 (void) hfs_update(vp, MNT_WAIT);
5079 }
5080 if (hfsmp->jnl) {
5081 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
5082 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5083 else
5084 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
5085 }
5086 exit:
5087 if (started_tr)
5088 hfs_end_transaction(hfsmp);
5089
5090 return (retval);
5091
5092 restore:
5093 if (fp->ff_blocks == headblks) {
5094 if (took_trunc_lock)
5095 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5096 goto exit;
5097 }
5098 /*
5099 * Give back any newly allocated space.
5100 */
5101 if (lockflags == 0) {
5102 lockflags = SFL_BITMAP;
5103 if (overflow_extents(fp))
5104 lockflags |= SFL_EXTENTS;
5105 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5106 }
5107
5108 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
5109 FTOC(fp)->c_fileid, false);
5110
5111 hfs_systemfile_unlock(hfsmp, lockflags);
5112 lockflags = 0;
5113
5114 if (took_trunc_lock)
5115 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
5116 goto exit;
5117 }
5118
5119
5120 /*
5121 * Clone a file's data within the file.
5122 *
5123 */
5124 static int
5125 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
5126 {
5127 caddr_t bufp;
5128 size_t bufsize;
5129 size_t copysize;
5130 size_t iosize;
5131 size_t offset;
5132 off_t writebase;
5133 uio_t auio;
5134 int error = 0;
5135
5136 writebase = blkstart * blksize;
5137 copysize = blkcnt * blksize;
5138 iosize = bufsize = MIN(copysize, 128 * 1024);
5139 offset = 0;
5140
5141 hfs_unlock(VTOC(vp));
5142
5143 #if CONFIG_PROTECT
5144 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
5145 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5146 return (error);
5147 }
5148 #endif /* CONFIG_PROTECT */
5149
5150 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
5151 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5152 return (ENOMEM);
5153 }
5154
5155 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
5156
5157 while (offset < copysize) {
5158 iosize = MIN(copysize - offset, iosize);
5159
5160 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
5161 uio_addiov(auio, (uintptr_t)bufp, iosize);
5162
5163 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
5164 if (error) {
5165 printf("hfs_clonefile: cluster_read failed - %d\n", error);
5166 break;
5167 }
5168 if (uio_resid(auio) != 0) {
5169 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
5170 error = EIO;
5171 break;
5172 }
5173
5174 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
5175 uio_addiov(auio, (uintptr_t)bufp, iosize);
5176
5177 error = cluster_write(vp, auio, writebase + offset,
5178 writebase + offset + iosize,
5179 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
5180 if (error) {
5181 printf("hfs_clonefile: cluster_write failed - %d\n", error);
5182 break;
5183 }
5184 if (uio_resid(auio) != 0) {
5185 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5186 error = EIO;
5187 break;
5188 }
5189 offset += iosize;
5190 }
5191 uio_free(auio);
5192
5193 if ((blksize & PAGE_MASK)) {
5194 /*
5195 * since the copy may not have started on a PAGE
5196 * boundary (or may not have ended on one), we
5197 * may have pages left in the cache since NOCACHE
5198 * will let partially written pages linger...
5199 * lets just flush the entire range to make sure
5200 * we don't have any pages left that are beyond
5201 * (or intersect) the real LEOF of this file
5202 */
5203 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
5204 } else {
5205 /*
5206 * No need to call ubc_sync_range or hfs_invalbuf
5207 * since the file was copied using IO_NOCACHE and
5208 * the copy was done starting and ending on a page
5209 * boundary in the file.
5210 */
5211 }
5212 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
5213
5214 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
5215 return (error);
5216 }
5217
5218 /*
5219 * Clone a system (metadata) file.
5220 *
5221 */
5222 static int
5223 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
5224 kauth_cred_t cred, struct proc *p)
5225 {
5226 caddr_t bufp;
5227 char * offset;
5228 size_t bufsize;
5229 size_t iosize;
5230 struct buf *bp = NULL;
5231 daddr64_t blkno;
5232 daddr64_t blk;
5233 daddr64_t start_blk;
5234 daddr64_t last_blk;
5235 int breadcnt;
5236 int i;
5237 int error = 0;
5238
5239
5240 iosize = GetLogicalBlockSize(vp);
5241 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5242 breadcnt = bufsize / iosize;
5243
5244 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
5245 return (ENOMEM);
5246 }
5247 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5248 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
5249 blkno = 0;
5250
5251 while (blkno < last_blk) {
5252 /*
5253 * Read up to a megabyte
5254 */
5255 offset = bufp;
5256 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5257 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
5258 if (error) {
5259 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5260 goto out;
5261 }
5262 if (buf_count(bp) != iosize) {
5263 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
5264 goto out;
5265 }
5266 bcopy((char *)buf_dataptr(bp), offset, iosize);
5267
5268 buf_markinvalid(bp);
5269 buf_brelse(bp);
5270 bp = NULL;
5271
5272 offset += iosize;
5273 }
5274
5275 /*
5276 * Write up to a megabyte
5277 */
5278 offset = bufp;
5279 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5280 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
5281 if (bp == NULL) {
5282 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
5283 error = EIO;
5284 goto out;
5285 }
5286 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5287 error = (int)buf_bwrite(bp);
5288 bp = NULL;
5289 if (error)
5290 goto out;
5291 offset += iosize;
5292 }
5293 }
5294 out:
5295 if (bp) {
5296 buf_brelse(bp);
5297 }
5298
5299 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
5300
5301 error = hfs_fsync(vp, MNT_WAIT, 0, p);
5302
5303 return (error);
5304 }