]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-2050.24.15.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2012 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
42 #include <sys/stat.h>
43 #include <sys/buf.h>
44 #include <sys/buf_internal.h>
45 #include <sys/proc.h>
46 #include <sys/kauth.h>
47 #include <sys/vnode.h>
48 #include <sys/vnode_internal.h>
49 #include <sys/uio.h>
50 #include <sys/vfs_context.h>
51 #include <sys/fsevents.h>
52 #include <kern/kalloc.h>
53 #include <sys/disk.h>
54 #include <sys/sysctl.h>
55 #include <sys/fsctl.h>
56 #include <sys/mount_internal.h>
57
58 #include <miscfs/specfs/specdev.h>
59
60 #include <sys/ubc.h>
61 #include <sys/ubc_internal.h>
62
63 #include <vm/vm_pageout.h>
64 #include <vm/vm_kern.h>
65
66 #include <sys/kdebug.h>
67
68 #include "hfs.h"
69 #include "hfs_attrlist.h"
70 #include "hfs_endian.h"
71 #include "hfs_fsctl.h"
72 #include "hfs_quota.h"
73 #include "hfscommon/headers/FileMgrInternal.h"
74 #include "hfscommon/headers/BTreesInternal.h"
75 #include "hfs_cnode.h"
76 #include "hfs_dbg.h"
77
78 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
79
80 enum {
81 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
82 };
83
84 /* from bsd/hfs/hfs_vfsops.c */
85 extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
86
87 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
88 static int hfs_clonefile(struct vnode *, int, int, int);
89 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
90 static int hfs_minorupdate(struct vnode *vp);
91 static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
92
93
94 int flush_cache_on_write = 0;
95 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
96
97 /*
98 * Read data from a file.
99 */
100 int
101 hfs_vnop_read(struct vnop_read_args *ap)
102 {
103 /*
104 struct vnop_read_args {
105 struct vnodeop_desc *a_desc;
106 vnode_t a_vp;
107 struct uio *a_uio;
108 int a_ioflag;
109 vfs_context_t a_context;
110 };
111 */
112
113 uio_t uio = ap->a_uio;
114 struct vnode *vp = ap->a_vp;
115 struct cnode *cp;
116 struct filefork *fp;
117 struct hfsmount *hfsmp;
118 off_t filesize;
119 off_t filebytes;
120 off_t start_resid = uio_resid(uio);
121 off_t offset = uio_offset(uio);
122 int retval = 0;
123 int took_truncate_lock = 0;
124 int io_throttle = 0;
125
126 /* Preflight checks */
127 if (!vnode_isreg(vp)) {
128 /* can only read regular files */
129 if (vnode_isdir(vp))
130 return (EISDIR);
131 else
132 return (EPERM);
133 }
134 if (start_resid == 0)
135 return (0); /* Nothing left to do */
136 if (offset < 0)
137 return (EINVAL); /* cant read from a negative offset */
138
139 #if HFS_COMPRESSION
140 if (VNODE_IS_RSRC(vp)) {
141 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
142 return 0;
143 }
144 /* otherwise read the resource fork normally */
145 } else {
146 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
147 if (compressed) {
148 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
149 if (compressed) {
150 if (retval == 0) {
151 /* successful read, update the access time */
152 VTOC(vp)->c_touch_acctime = TRUE;
153
154 /* compressed files are not hot file candidates */
155 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
156 VTOF(vp)->ff_bytesread = 0;
157 }
158 }
159 return retval;
160 }
161 /* otherwise the file was converted back to a regular file while we were reading it */
162 retval = 0;
163 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
164 int error;
165
166 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
167 if (error) {
168 return error;
169 }
170
171 }
172 }
173 #endif /* HFS_COMPRESSION */
174
175 cp = VTOC(vp);
176 fp = VTOF(vp);
177 hfsmp = VTOHFS(vp);
178
179 #if CONFIG_PROTECT
180 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
181 goto exit;
182 }
183 #endif
184
185 /*
186 * If this read request originated from a syscall (as opposed to
187 * an in-kernel page fault or something), then set it up for
188 * throttle checks. For example, large EAs may cause a VNOP_READ
189 * to occur, and we wouldn't want to throttle I/O while holding the
190 * EA B-Tree lock.
191 */
192 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
193 io_throttle = IO_RETURN_ON_THROTTLE;
194 }
195
196 read_again:
197
198 /* Protect against a size change. */
199 hfs_lock_truncate(cp, HFS_SHARED_LOCK);
200 took_truncate_lock = 1;
201
202 filesize = fp->ff_size;
203 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
204 if (offset > filesize) {
205 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
206 (offset > (off_t)MAXHFSFILESIZE)) {
207 retval = EFBIG;
208 }
209 goto exit;
210 }
211
212 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
213 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
214
215 retval = cluster_read(vp, uio, filesize, ap->a_ioflag | (io_throttle));
216
217 cp->c_touch_acctime = TRUE;
218
219 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
220 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
221
222 /*
223 * Keep track blocks read
224 */
225 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
226 int took_cnode_lock = 0;
227 off_t bytesread;
228
229 bytesread = start_resid - uio_resid(uio);
230
231 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
232 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
233 hfs_lock(cp, HFS_FORCE_LOCK);
234 took_cnode_lock = 1;
235 }
236 /*
237 * If this file hasn't been seen since the start of
238 * the current sampling period then start over.
239 */
240 if (cp->c_atime < hfsmp->hfc_timebase) {
241 struct timeval tv;
242
243 fp->ff_bytesread = bytesread;
244 microtime(&tv);
245 cp->c_atime = tv.tv_sec;
246 } else {
247 fp->ff_bytesread += bytesread;
248 }
249 if (took_cnode_lock)
250 hfs_unlock(cp);
251 }
252 exit:
253 if (took_truncate_lock) {
254 hfs_unlock_truncate(cp, 0);
255 }
256 if (retval == EAGAIN) {
257 throttle_lowpri_io(1);
258
259 retval = 0;
260 goto read_again;
261 }
262 return (retval);
263 }
264
265 /*
266 * Write data to a file.
267 */
268 int
269 hfs_vnop_write(struct vnop_write_args *ap)
270 {
271 uio_t uio = ap->a_uio;
272 struct vnode *vp = ap->a_vp;
273 struct cnode *cp;
274 struct filefork *fp;
275 struct hfsmount *hfsmp;
276 kauth_cred_t cred = NULL;
277 off_t origFileSize;
278 off_t writelimit;
279 off_t bytesToAdd = 0;
280 off_t actualBytesAdded;
281 off_t filebytes;
282 off_t offset;
283 ssize_t resid;
284 int eflags;
285 int ioflag = ap->a_ioflag;
286 int retval = 0;
287 int lockflags;
288 int cnode_locked = 0;
289 int partialwrite = 0;
290 int do_snapshot = 1;
291 time_t orig_ctime=VTOC(vp)->c_ctime;
292 int took_truncate_lock = 0;
293 int io_return_on_throttle = 0;
294 struct rl_entry *invalid_range;
295
296 #if HFS_COMPRESSION
297 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
298 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
299 switch(state) {
300 case FILE_IS_COMPRESSED:
301 return EACCES;
302 case FILE_IS_CONVERTING:
303 /* if FILE_IS_CONVERTING, we allow writes but do not
304 bother with snapshots or else we will deadlock.
305 */
306 do_snapshot = 0;
307 break;
308 default:
309 printf("invalid state %d for compressed file\n", state);
310 /* fall through */
311 }
312 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
313 int error;
314
315 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
316 if (error != 0) {
317 return error;
318 }
319 }
320
321 if (do_snapshot) {
322 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
323 }
324
325 #endif
326
327 // LP64todo - fix this! uio_resid may be 64-bit value
328 resid = uio_resid(uio);
329 offset = uio_offset(uio);
330
331 if (offset < 0)
332 return (EINVAL);
333 if (resid == 0)
334 return (E_NONE);
335 if (!vnode_isreg(vp))
336 return (EPERM); /* Can only write regular files */
337
338 cp = VTOC(vp);
339 fp = VTOF(vp);
340 hfsmp = VTOHFS(vp);
341
342 #if CONFIG_PROTECT
343 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
344 goto exit;
345 }
346 #endif
347
348 eflags = kEFDeferMask; /* defer file block allocations */
349 #if HFS_SPARSE_DEV
350 /*
351 * When the underlying device is sparse and space
352 * is low (< 8MB), stop doing delayed allocations
353 * and begin doing synchronous I/O.
354 */
355 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
356 (hfs_freeblks(hfsmp, 0) < 2048)) {
357 eflags &= ~kEFDeferMask;
358 ioflag |= IO_SYNC;
359 }
360 #endif /* HFS_SPARSE_DEV */
361
362 if ((ioflag & (IO_SINGLE_WRITER | IO_RETURN_ON_THROTTLE)) ==
363 (IO_SINGLE_WRITER | IO_RETURN_ON_THROTTLE)) {
364 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
365 }
366 again:
367 /* Protect against a size change. */
368 /*
369 * Protect against a size change.
370 *
371 * Note: If took_truncate_lock is true, then we previously got the lock shared
372 * but needed to upgrade to exclusive. So try getting it exclusive from the
373 * start.
374 */
375 if (ioflag & IO_APPEND || took_truncate_lock) {
376 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
377 }
378 else {
379 hfs_lock_truncate(cp, HFS_SHARED_LOCK);
380 }
381 took_truncate_lock = 1;
382
383 /* Update UIO */
384 if (ioflag & IO_APPEND) {
385 uio_setoffset(uio, fp->ff_size);
386 offset = fp->ff_size;
387 }
388 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
389 retval = EPERM;
390 goto exit;
391 }
392
393 origFileSize = fp->ff_size;
394 writelimit = offset + resid;
395 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
396
397 /*
398 * We may need an exclusive truncate lock for several reasons, all
399 * of which are because we may be writing to a (portion of a) block
400 * for the first time, and we need to make sure no readers see the
401 * prior, uninitialized contents of the block. The cases are:
402 *
403 * 1. We have unallocated (delayed allocation) blocks. We may be
404 * allocating new blocks to the file and writing to them.
405 * (A more precise check would be whether the range we're writing
406 * to contains delayed allocation blocks.)
407 * 2. We need to extend the file. The bytes between the old EOF
408 * and the new EOF are not yet initialized. This is important
409 * even if we're not allocating new blocks to the file. If the
410 * old EOF and new EOF are in the same block, we still need to
411 * protect that range of bytes until they are written for the
412 * first time.
413 * 3. The write overlaps some invalid ranges (delayed zero fill; that
414 * part of the file has been allocated, but not yet written).
415 *
416 * If we had a shared lock with the above cases, we need to try to upgrade
417 * to an exclusive lock. If the upgrade fails, we will lose the shared
418 * lock, and will need to take the truncate lock again; the took_truncate_lock
419 * flag will still be set, causing us to try for an exclusive lock next time.
420 *
421 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
422 * lock is held, since it protects the range lists.
423 */
424 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
425 ((fp->ff_unallocblocks != 0) ||
426 (writelimit > origFileSize))) {
427 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
428 /*
429 * Lock upgrade failed and we lost our shared lock, try again.
430 * Note: we do not set took_truncate_lock=0 here. Leaving it
431 * set to 1 will cause us to try to get the lock exclusive.
432 */
433 goto again;
434 }
435 else {
436 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
437 cp->c_truncatelockowner = current_thread();
438 }
439 }
440
441 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
442 goto exit;
443 }
444 cnode_locked = 1;
445
446 /*
447 * Now that we have the cnode lock, see if there are delayed zero fill ranges
448 * overlapping our write. If so, we need the truncate lock exclusive (see above).
449 */
450 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
451 (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
452 /*
453 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
454 * a deadlock, rather than simply returning failure. (That is, it apparently does
455 * not behave like a "try_lock"). Since this condition is rare, just drop the
456 * cnode lock and try again. Since took_truncate_lock is set, we will
457 * automatically take the truncate lock exclusive.
458 */
459 hfs_unlock(cp);
460 cnode_locked = 0;
461 hfs_unlock_truncate(cp, 0);
462 goto again;
463 }
464
465 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
466 (int)offset, uio_resid(uio), (int)fp->ff_size,
467 (int)filebytes, 0);
468
469 /* Check if we do not need to extend the file */
470 if (writelimit <= filebytes) {
471 goto sizeok;
472 }
473
474 cred = vfs_context_ucred(ap->a_context);
475 bytesToAdd = writelimit - filebytes;
476
477 #if QUOTA
478 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
479 cred, 0);
480 if (retval)
481 goto exit;
482 #endif /* QUOTA */
483
484 if (hfs_start_transaction(hfsmp) != 0) {
485 retval = EINVAL;
486 goto exit;
487 }
488
489 while (writelimit > filebytes) {
490 bytesToAdd = writelimit - filebytes;
491 if (cred && suser(cred, NULL) != 0)
492 eflags |= kEFReserveMask;
493
494 /* Protect extents b-tree and allocation bitmap */
495 lockflags = SFL_BITMAP;
496 if (overflow_extents(fp))
497 lockflags |= SFL_EXTENTS;
498 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
499
500 /* Files that are changing size are not hot file candidates. */
501 if (hfsmp->hfc_stage == HFC_RECORDING) {
502 fp->ff_bytesread = 0;
503 }
504 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
505 0, eflags, &actualBytesAdded));
506
507 hfs_systemfile_unlock(hfsmp, lockflags);
508
509 if ((actualBytesAdded == 0) && (retval == E_NONE))
510 retval = ENOSPC;
511 if (retval != E_NONE)
512 break;
513 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
514 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
515 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
516 }
517 (void) hfs_update(vp, TRUE);
518 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
519 (void) hfs_end_transaction(hfsmp);
520
521 /*
522 * If we didn't grow the file enough try a partial write.
523 * POSIX expects this behavior.
524 */
525 if ((retval == ENOSPC) && (filebytes > offset)) {
526 retval = 0;
527 partialwrite = 1;
528 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
529 resid -= bytesToAdd;
530 writelimit = filebytes;
531 }
532 sizeok:
533 if (retval == E_NONE) {
534 off_t filesize;
535 off_t zero_off;
536 off_t tail_off;
537 off_t inval_start;
538 off_t inval_end;
539 off_t io_start;
540 int lflag;
541
542 if (writelimit > fp->ff_size)
543 filesize = writelimit;
544 else
545 filesize = fp->ff_size;
546
547 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
548
549 if (offset <= fp->ff_size) {
550 zero_off = offset & ~PAGE_MASK_64;
551
552 /* Check to see whether the area between the zero_offset and the start
553 of the transfer to see whether is invalid and should be zero-filled
554 as part of the transfer:
555 */
556 if (offset > zero_off) {
557 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
558 lflag |= IO_HEADZEROFILL;
559 }
560 } else {
561 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
562
563 /* The bytes between fp->ff_size and uio->uio_offset must never be
564 read without being zeroed. The current last block is filled with zeroes
565 if it holds valid data but in all cases merely do a little bookkeeping
566 to track the area from the end of the current last page to the start of
567 the area actually written. For the same reason only the bytes up to the
568 start of the page where this write will start is invalidated; any remainder
569 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
570
571 Note that inval_start, the start of the page after the current EOF,
572 may be past the start of the write, in which case the zeroing
573 will be handled by the cluser_write of the actual data.
574 */
575 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
576 inval_end = offset & ~PAGE_MASK_64;
577 zero_off = fp->ff_size;
578
579 if ((fp->ff_size & PAGE_MASK_64) &&
580 (rl_scan(&fp->ff_invalidranges,
581 eof_page_base,
582 fp->ff_size - 1,
583 &invalid_range) != RL_NOOVERLAP)) {
584 /* The page containing the EOF is not valid, so the
585 entire page must be made inaccessible now. If the write
586 starts on a page beyond the page containing the eof
587 (inval_end > eof_page_base), add the
588 whole page to the range to be invalidated. Otherwise
589 (i.e. if the write starts on the same page), zero-fill
590 the entire page explicitly now:
591 */
592 if (inval_end > eof_page_base) {
593 inval_start = eof_page_base;
594 } else {
595 zero_off = eof_page_base;
596 };
597 };
598
599 if (inval_start < inval_end) {
600 struct timeval tv;
601 /* There's some range of data that's going to be marked invalid */
602
603 if (zero_off < inval_start) {
604 /* The pages between inval_start and inval_end are going to be invalidated,
605 and the actual write will start on a page past inval_end. Now's the last
606 chance to zero-fill the page containing the EOF:
607 */
608 hfs_unlock(cp);
609 cnode_locked = 0;
610 retval = cluster_write(vp, (uio_t) 0,
611 fp->ff_size, inval_start,
612 zero_off, (off_t)0,
613 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
614 hfs_lock(cp, HFS_FORCE_LOCK);
615 cnode_locked = 1;
616 if (retval) goto ioerr_exit;
617 offset = uio_offset(uio);
618 };
619
620 /* Mark the remaining area of the newly allocated space as invalid: */
621 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
622 microuptime(&tv);
623 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
624 zero_off = fp->ff_size = inval_end;
625 };
626
627 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
628 };
629
630 /* Check to see whether the area between the end of the write and the end of
631 the page it falls in is invalid and should be zero-filled as part of the transfer:
632 */
633 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
634 if (tail_off > filesize) tail_off = filesize;
635 if (tail_off > writelimit) {
636 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
637 lflag |= IO_TAILZEROFILL;
638 };
639 };
640
641 /*
642 * if the write starts beyond the current EOF (possibly advanced in the
643 * zeroing of the last block, above), then we'll zero fill from the current EOF
644 * to where the write begins:
645 *
646 * NOTE: If (and ONLY if) the portion of the file about to be written is
647 * before the current EOF it might be marked as invalid now and must be
648 * made readable (removed from the invalid ranges) before cluster_write
649 * tries to write it:
650 */
651 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
652 if (io_start < fp->ff_size) {
653 off_t io_end;
654
655 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
656 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
657 };
658
659 hfs_unlock(cp);
660 cnode_locked = 0;
661
662 /*
663 * We need to tell UBC the fork's new size BEFORE calling
664 * cluster_write, in case any of the new pages need to be
665 * paged out before cluster_write completes (which does happen
666 * in embedded systems due to extreme memory pressure).
667 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
668 * will be, so that it can pass that on to cluster_pageout, and
669 * allow those pageouts.
670 *
671 * We don't update ff_size yet since we don't want pageins to
672 * be able to see uninitialized data between the old and new
673 * EOF, until cluster_write has completed and initialized that
674 * part of the file.
675 *
676 * The vnode pager relies on the file size last given to UBC via
677 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
678 * ff_size (whichever is larger). NOTE: ff_new_size is always
679 * zero, unless we are extending the file via write.
680 */
681 if (filesize > fp->ff_size) {
682 fp->ff_new_size = filesize;
683 ubc_setsize(vp, filesize);
684 }
685 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
686 tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle);
687 if (retval) {
688 fp->ff_new_size = 0; /* no longer extending; use ff_size */
689
690 if (retval == EAGAIN) {
691 /*
692 * EAGAIN indicates that we still have I/O to do, but
693 * that we now need to be throttled
694 */
695 if (resid != uio_resid(uio)) {
696 /*
697 * did manage to do some I/O before returning EAGAIN
698 */
699 resid = uio_resid(uio);
700 offset = uio_offset(uio);
701
702 cp->c_touch_chgtime = TRUE;
703 cp->c_touch_modtime = TRUE;
704 }
705 if (filesize > fp->ff_size) {
706 /*
707 * we called ubc_setsize before the call to
708 * cluster_write... since we only partially
709 * completed the I/O, we need to
710 * re-adjust our idea of the filesize based
711 * on our interim EOF
712 */
713 ubc_setsize(vp, offset);
714
715 fp->ff_size = offset;
716 }
717 goto exit;
718 }
719 if (filesize > origFileSize) {
720 ubc_setsize(vp, origFileSize);
721 }
722 goto ioerr_exit;
723 }
724
725 if (filesize > origFileSize) {
726 fp->ff_size = filesize;
727
728 /* Files that are changing size are not hot file candidates. */
729 if (hfsmp->hfc_stage == HFC_RECORDING) {
730 fp->ff_bytesread = 0;
731 }
732 }
733 fp->ff_new_size = 0; /* ff_size now has the correct size */
734
735 /* If we wrote some bytes, then touch the change and mod times */
736 if (resid > uio_resid(uio)) {
737 cp->c_touch_chgtime = TRUE;
738 cp->c_touch_modtime = TRUE;
739 }
740 }
741 if (partialwrite) {
742 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
743 resid += bytesToAdd;
744 }
745
746 // XXXdbg - see radar 4871353 for more info
747 {
748 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
749 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
750 }
751 }
752
753 ioerr_exit:
754 /*
755 * If we successfully wrote any data, and we are not the superuser
756 * we clear the setuid and setgid bits as a precaution against
757 * tampering.
758 */
759 if (cp->c_mode & (S_ISUID | S_ISGID)) {
760 cred = vfs_context_ucred(ap->a_context);
761 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
762 if (!cnode_locked) {
763 hfs_lock(cp, HFS_FORCE_LOCK);
764 cnode_locked = 1;
765 }
766 cp->c_mode &= ~(S_ISUID | S_ISGID);
767 }
768 }
769 if (retval) {
770 if (ioflag & IO_UNIT) {
771 if (!cnode_locked) {
772 hfs_lock(cp, HFS_FORCE_LOCK);
773 cnode_locked = 1;
774 }
775 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
776 0, 0, ap->a_context);
777 // LP64todo - fix this! resid needs to by user_ssize_t
778 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
779 uio_setresid(uio, resid);
780 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
781 }
782 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
783 if (!cnode_locked) {
784 hfs_lock(cp, HFS_FORCE_LOCK);
785 cnode_locked = 1;
786 }
787 retval = hfs_update(vp, TRUE);
788 }
789 /* Updating vcbWrCnt doesn't need to be atomic. */
790 hfsmp->vcbWrCnt++;
791
792 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
793 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
794 exit:
795 if (cnode_locked)
796 hfs_unlock(cp);
797
798 if (took_truncate_lock) {
799 hfs_unlock_truncate(cp, 0);
800 }
801 if (retval == EAGAIN) {
802 throttle_lowpri_io(1);
803
804 retval = 0;
805 goto again;
806 }
807 return (retval);
808 }
809
810 /* support for the "bulk-access" fcntl */
811
812 #define CACHE_LEVELS 16
813 #define NUM_CACHE_ENTRIES (64*16)
814 #define PARENT_IDS_FLAG 0x100
815
816 struct access_cache {
817 int numcached;
818 int cachehits; /* these two for statistics gathering */
819 int lookups;
820 unsigned int *acache;
821 unsigned char *haveaccess;
822 };
823
824 struct access_t {
825 uid_t uid; /* IN: effective user id */
826 short flags; /* IN: access requested (i.e. R_OK) */
827 short num_groups; /* IN: number of groups user belongs to */
828 int num_files; /* IN: number of files to process */
829 int *file_ids; /* IN: array of file ids */
830 gid_t *groups; /* IN: array of groups */
831 short *access; /* OUT: access info for each file (0 for 'has access') */
832 } __attribute__((unavailable)); // this structure is for reference purposes only
833
834 struct user32_access_t {
835 uid_t uid; /* IN: effective user id */
836 short flags; /* IN: access requested (i.e. R_OK) */
837 short num_groups; /* IN: number of groups user belongs to */
838 int num_files; /* IN: number of files to process */
839 user32_addr_t file_ids; /* IN: array of file ids */
840 user32_addr_t groups; /* IN: array of groups */
841 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
842 };
843
844 struct user64_access_t {
845 uid_t uid; /* IN: effective user id */
846 short flags; /* IN: access requested (i.e. R_OK) */
847 short num_groups; /* IN: number of groups user belongs to */
848 int num_files; /* IN: number of files to process */
849 user64_addr_t file_ids; /* IN: array of file ids */
850 user64_addr_t groups; /* IN: array of groups */
851 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
852 };
853
854
855 // these are the "extended" versions of the above structures
856 // note that it is crucial that they be different sized than
857 // the regular version
858 struct ext_access_t {
859 uint32_t flags; /* IN: access requested (i.e. R_OK) */
860 uint32_t num_files; /* IN: number of files to process */
861 uint32_t map_size; /* IN: size of the bit map */
862 uint32_t *file_ids; /* IN: Array of file ids */
863 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
864 short *access; /* OUT: access info for each file (0 for 'has access') */
865 uint32_t num_parents; /* future use */
866 cnid_t *parents; /* future use */
867 } __attribute__((unavailable)); // this structure is for reference purposes only
868
869 struct user32_ext_access_t {
870 uint32_t flags; /* IN: access requested (i.e. R_OK) */
871 uint32_t num_files; /* IN: number of files to process */
872 uint32_t map_size; /* IN: size of the bit map */
873 user32_addr_t file_ids; /* IN: Array of file ids */
874 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
875 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
876 uint32_t num_parents; /* future use */
877 user32_addr_t parents; /* future use */
878 };
879
880 struct user64_ext_access_t {
881 uint32_t flags; /* IN: access requested (i.e. R_OK) */
882 uint32_t num_files; /* IN: number of files to process */
883 uint32_t map_size; /* IN: size of the bit map */
884 user64_addr_t file_ids; /* IN: array of file ids */
885 user64_addr_t bitmap; /* IN: array of groups */
886 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
887 uint32_t num_parents;/* future use */
888 user64_addr_t parents;/* future use */
889 };
890
891
892 /*
893 * Perform a binary search for the given parent_id. Return value is
894 * the index if there is a match. If no_match_indexp is non-NULL it
895 * will be assigned with the index to insert the item (even if it was
896 * not found).
897 */
898 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
899 {
900 int index=-1;
901 unsigned int lo=0;
902
903 do {
904 unsigned int mid = ((hi - lo)/2) + lo;
905 unsigned int this_id = array[mid];
906
907 if (parent_id == this_id) {
908 hi = mid;
909 break;
910 }
911
912 if (parent_id < this_id) {
913 hi = mid;
914 continue;
915 }
916
917 if (parent_id > this_id) {
918 lo = mid + 1;
919 continue;
920 }
921 } while(lo < hi);
922
923 /* check if lo and hi converged on the match */
924 if (parent_id == array[hi]) {
925 index = hi;
926 }
927
928 if (no_match_indexp) {
929 *no_match_indexp = hi;
930 }
931
932 return index;
933 }
934
935
936 static int
937 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
938 {
939 unsigned int hi;
940 int matches = 0;
941 int index, no_match_index;
942
943 if (cache->numcached == 0) {
944 *indexp = 0;
945 return 0; // table is empty, so insert at index=0 and report no match
946 }
947
948 if (cache->numcached > NUM_CACHE_ENTRIES) {
949 /*printf("hfs: EGAD! numcached is %d... cut our losses and trim to %d\n",
950 cache->numcached, NUM_CACHE_ENTRIES);*/
951 cache->numcached = NUM_CACHE_ENTRIES;
952 }
953
954 hi = cache->numcached - 1;
955
956 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
957
958 /* if no existing entry found, find index for new one */
959 if (index == -1) {
960 index = no_match_index;
961 matches = 0;
962 } else {
963 matches = 1;
964 }
965
966 *indexp = index;
967 return matches;
968 }
969
970 /*
971 * Add a node to the access_cache at the given index (or do a lookup first
972 * to find the index if -1 is passed in). We currently do a replace rather
973 * than an insert if the cache is full.
974 */
975 static void
976 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
977 {
978 int lookup_index = -1;
979
980 /* need to do a lookup first if -1 passed for index */
981 if (index == -1) {
982 if (lookup_bucket(cache, &lookup_index, nodeID)) {
983 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
984 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
985 cache->haveaccess[lookup_index] = access;
986 }
987
988 /* mission accomplished */
989 return;
990 } else {
991 index = lookup_index;
992 }
993
994 }
995
996 /* if the cache is full, do a replace rather than an insert */
997 if (cache->numcached >= NUM_CACHE_ENTRIES) {
998 //printf("hfs: cache is full (%d). replace at index %d\n", cache->numcached, index);
999 cache->numcached = NUM_CACHE_ENTRIES-1;
1000
1001 if (index > cache->numcached) {
1002 // printf("hfs: index %d pinned to %d\n", index, cache->numcached);
1003 index = cache->numcached;
1004 }
1005 }
1006
1007 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
1008 index++;
1009 }
1010
1011 if (index >= 0 && index < cache->numcached) {
1012 /* only do bcopy if we're inserting */
1013 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
1014 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
1015 }
1016
1017 cache->acache[index] = nodeID;
1018 cache->haveaccess[index] = access;
1019 cache->numcached++;
1020 }
1021
1022
1023 struct cinfo {
1024 uid_t uid;
1025 gid_t gid;
1026 mode_t mode;
1027 cnid_t parentcnid;
1028 u_int16_t recflags;
1029 };
1030
1031 static int
1032 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
1033 {
1034 struct cinfo *cip = (struct cinfo *)arg;
1035
1036 cip->uid = attrp->ca_uid;
1037 cip->gid = attrp->ca_gid;
1038 cip->mode = attrp->ca_mode;
1039 cip->parentcnid = descp->cd_parentcnid;
1040 cip->recflags = attrp->ca_recflags;
1041
1042 return (0);
1043 }
1044
1045 /*
1046 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1047 * isn't incore, then go to the catalog.
1048 */
1049 static int
1050 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
1051 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
1052 {
1053 int error = 0;
1054
1055 /* if this id matches the one the fsctl was called with, skip the lookup */
1056 if (cnid == skip_cp->c_cnid) {
1057 cnattrp->ca_uid = skip_cp->c_uid;
1058 cnattrp->ca_gid = skip_cp->c_gid;
1059 cnattrp->ca_mode = skip_cp->c_mode;
1060 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1061 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1062 } else {
1063 struct cinfo c_info;
1064
1065 /* otherwise, check the cnode hash incase the file/dir is incore */
1066 if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) {
1067 cnattrp->ca_uid = c_info.uid;
1068 cnattrp->ca_gid = c_info.gid;
1069 cnattrp->ca_mode = c_info.mode;
1070 cnattrp->ca_recflags = c_info.recflags;
1071 keyp->hfsPlus.parentID = c_info.parentcnid;
1072 } else {
1073 int lockflags;
1074
1075 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1076 throttle_lowpri_io(1);
1077
1078 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
1079
1080 /* lookup this cnid in the catalog */
1081 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
1082
1083 hfs_systemfile_unlock(hfsmp, lockflags);
1084
1085 cache->lookups++;
1086 }
1087 }
1088
1089 return (error);
1090 }
1091
1092
1093 /*
1094 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1095 * up to CACHE_LEVELS as we progress towards the root.
1096 */
1097 static int
1098 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
1099 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
1100 struct vfs_context *my_context,
1101 char *bitmap,
1102 uint32_t map_size,
1103 cnid_t* parents,
1104 uint32_t num_parents)
1105 {
1106 int myErr = 0;
1107 int myResult;
1108 HFSCatalogNodeID thisNodeID;
1109 unsigned int myPerms;
1110 struct cat_attr cnattr;
1111 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1112 CatalogKey catkey;
1113
1114 int i = 0, ids_to_cache = 0;
1115 int parent_ids[CACHE_LEVELS];
1116
1117 thisNodeID = nodeID;
1118 while (thisNodeID >= kRootDirID) {
1119 myResult = 0; /* default to "no access" */
1120
1121 /* check the cache before resorting to hitting the catalog */
1122
1123 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1124 * to look any further after hitting cached dir */
1125
1126 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1127 cache->cachehits++;
1128 myErr = cache->haveaccess[cache_index];
1129 if (scope_index != -1) {
1130 if (myErr == ESRCH) {
1131 myErr = 0;
1132 }
1133 } else {
1134 scope_index = 0; // so we'll just use the cache result
1135 scope_idx_start = ids_to_cache;
1136 }
1137 myResult = (myErr == 0) ? 1 : 0;
1138 goto ExitThisRoutine;
1139 }
1140
1141
1142 if (parents) {
1143 int tmp;
1144 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1145 if (scope_index == -1)
1146 scope_index = tmp;
1147 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1148 scope_idx_start = ids_to_cache;
1149 }
1150 }
1151
1152 /* remember which parents we want to cache */
1153 if (ids_to_cache < CACHE_LEVELS) {
1154 parent_ids[ids_to_cache] = thisNodeID;
1155 ids_to_cache++;
1156 }
1157 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1158 if (bitmap && map_size) {
1159 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1160 }
1161
1162
1163 /* do the lookup (checks the cnode hash, then the catalog) */
1164 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
1165 if (myErr) {
1166 goto ExitThisRoutine; /* no access */
1167 }
1168
1169 /* Root always gets access. */
1170 if (suser(myp_ucred, NULL) == 0) {
1171 thisNodeID = catkey.hfsPlus.parentID;
1172 myResult = 1;
1173 continue;
1174 }
1175
1176 // if the thing has acl's, do the full permission check
1177 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1178 struct vnode *vp;
1179
1180 /* get the vnode for this cnid */
1181 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
1182 if ( myErr ) {
1183 myResult = 0;
1184 goto ExitThisRoutine;
1185 }
1186
1187 thisNodeID = VTOC(vp)->c_parentcnid;
1188
1189 hfs_unlock(VTOC(vp));
1190
1191 if (vnode_vtype(vp) == VDIR) {
1192 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1193 } else {
1194 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1195 }
1196
1197 vnode_put(vp);
1198 if (myErr) {
1199 myResult = 0;
1200 goto ExitThisRoutine;
1201 }
1202 } else {
1203 unsigned int flags;
1204 int mode = cnattr.ca_mode & S_IFMT;
1205 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
1206
1207 if (mode == S_IFDIR) {
1208 flags = R_OK | X_OK;
1209 } else {
1210 flags = R_OK;
1211 }
1212 if ( (myPerms & flags) != flags) {
1213 myResult = 0;
1214 myErr = EACCES;
1215 goto ExitThisRoutine; /* no access */
1216 }
1217
1218 /* up the hierarchy we go */
1219 thisNodeID = catkey.hfsPlus.parentID;
1220 }
1221 }
1222
1223 /* if here, we have access to this node */
1224 myResult = 1;
1225
1226 ExitThisRoutine:
1227 if (parents && myErr == 0 && scope_index == -1) {
1228 myErr = ESRCH;
1229 }
1230
1231 if (myErr) {
1232 myResult = 0;
1233 }
1234 *err = myErr;
1235
1236 /* cache the parent directory(ies) */
1237 for (i = 0; i < ids_to_cache; i++) {
1238 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1239 add_node(cache, -1, parent_ids[i], ESRCH);
1240 } else {
1241 add_node(cache, -1, parent_ids[i], myErr);
1242 }
1243 }
1244
1245 return (myResult);
1246 }
1247
1248 static int
1249 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1250 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1251 {
1252 boolean_t is64bit;
1253
1254 /*
1255 * NOTE: on entry, the vnode has an io_ref. In case this vnode
1256 * happens to be in our list of file_ids, we'll note it
1257 * avoid calling hfs_chashget_nowait() on that id as that
1258 * will cause a "locking against myself" panic.
1259 */
1260 Boolean check_leaf = true;
1261
1262 struct user64_ext_access_t *user_access_structp;
1263 struct user64_ext_access_t tmp_user_access;
1264 struct access_cache cache;
1265
1266 int error = 0, prev_parent_check_ok=1;
1267 unsigned int i;
1268
1269 short flags;
1270 unsigned int num_files = 0;
1271 int map_size = 0;
1272 int num_parents = 0;
1273 int *file_ids=NULL;
1274 short *access=NULL;
1275 char *bitmap=NULL;
1276 cnid_t *parents=NULL;
1277 int leaf_index;
1278
1279 cnid_t cnid;
1280 cnid_t prevParent_cnid = 0;
1281 unsigned int myPerms;
1282 short myaccess = 0;
1283 struct cat_attr cnattr;
1284 CatalogKey catkey;
1285 struct cnode *skip_cp = VTOC(vp);
1286 kauth_cred_t cred = vfs_context_ucred(context);
1287 proc_t p = vfs_context_proc(context);
1288
1289 is64bit = proc_is64bit(p);
1290
1291 /* initialize the local cache and buffers */
1292 cache.numcached = 0;
1293 cache.cachehits = 0;
1294 cache.lookups = 0;
1295 cache.acache = NULL;
1296 cache.haveaccess = NULL;
1297
1298 /* struct copyin done during dispatch... need to copy file_id array separately */
1299 if (ap->a_data == NULL) {
1300 error = EINVAL;
1301 goto err_exit_bulk_access;
1302 }
1303
1304 if (is64bit) {
1305 if (arg_size != sizeof(struct user64_ext_access_t)) {
1306 error = EINVAL;
1307 goto err_exit_bulk_access;
1308 }
1309
1310 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
1311
1312 } else if (arg_size == sizeof(struct user32_access_t)) {
1313 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
1314
1315 // convert an old style bulk-access struct to the new style
1316 tmp_user_access.flags = accessp->flags;
1317 tmp_user_access.num_files = accessp->num_files;
1318 tmp_user_access.map_size = 0;
1319 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1320 tmp_user_access.bitmap = USER_ADDR_NULL;
1321 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1322 tmp_user_access.num_parents = 0;
1323 user_access_structp = &tmp_user_access;
1324
1325 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1326 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
1327
1328 // up-cast from a 32-bit version of the struct
1329 tmp_user_access.flags = accessp->flags;
1330 tmp_user_access.num_files = accessp->num_files;
1331 tmp_user_access.map_size = accessp->map_size;
1332 tmp_user_access.num_parents = accessp->num_parents;
1333
1334 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1335 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1336 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1337 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1338
1339 user_access_structp = &tmp_user_access;
1340 } else {
1341 error = EINVAL;
1342 goto err_exit_bulk_access;
1343 }
1344
1345 map_size = user_access_structp->map_size;
1346
1347 num_files = user_access_structp->num_files;
1348
1349 num_parents= user_access_structp->num_parents;
1350
1351 if (num_files < 1) {
1352 goto err_exit_bulk_access;
1353 }
1354 if (num_files > 1024) {
1355 error = EINVAL;
1356 goto err_exit_bulk_access;
1357 }
1358
1359 if (num_parents > 1024) {
1360 error = EINVAL;
1361 goto err_exit_bulk_access;
1362 }
1363
1364 file_ids = (int *) kalloc(sizeof(int) * num_files);
1365 access = (short *) kalloc(sizeof(short) * num_files);
1366 if (map_size) {
1367 bitmap = (char *) kalloc(sizeof(char) * map_size);
1368 }
1369
1370 if (num_parents) {
1371 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1372 }
1373
1374 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1375 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1376
1377 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1378 if (file_ids) {
1379 kfree(file_ids, sizeof(int) * num_files);
1380 }
1381 if (bitmap) {
1382 kfree(bitmap, sizeof(char) * map_size);
1383 }
1384 if (access) {
1385 kfree(access, sizeof(short) * num_files);
1386 }
1387 if (cache.acache) {
1388 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1389 }
1390 if (cache.haveaccess) {
1391 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1392 }
1393 if (parents) {
1394 kfree(parents, sizeof(cnid_t) * num_parents);
1395 }
1396 return ENOMEM;
1397 }
1398
1399 // make sure the bitmap is zero'ed out...
1400 if (bitmap) {
1401 bzero(bitmap, (sizeof(char) * map_size));
1402 }
1403
1404 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1405 num_files * sizeof(int)))) {
1406 goto err_exit_bulk_access;
1407 }
1408
1409 if (num_parents) {
1410 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1411 num_parents * sizeof(cnid_t)))) {
1412 goto err_exit_bulk_access;
1413 }
1414 }
1415
1416 flags = user_access_structp->flags;
1417 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1418 flags = R_OK;
1419 }
1420
1421 /* check if we've been passed leaf node ids or parent ids */
1422 if (flags & PARENT_IDS_FLAG) {
1423 check_leaf = false;
1424 }
1425
1426 /* Check access to each file_id passed in */
1427 for (i = 0; i < num_files; i++) {
1428 leaf_index=-1;
1429 cnid = (cnid_t) file_ids[i];
1430
1431 /* root always has access */
1432 if ((!parents) && (!suser(cred, NULL))) {
1433 access[i] = 0;
1434 continue;
1435 }
1436
1437 if (check_leaf) {
1438 /* do the lookup (checks the cnode hash, then the catalog) */
1439 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
1440 if (error) {
1441 access[i] = (short) error;
1442 continue;
1443 }
1444
1445 if (parents) {
1446 // Check if the leaf matches one of the parent scopes
1447 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1448 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1449 prev_parent_check_ok = 0;
1450 else if (leaf_index >= 0)
1451 prev_parent_check_ok = 1;
1452 }
1453
1454 // if the thing has acl's, do the full permission check
1455 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1456 struct vnode *cvp;
1457 int myErr = 0;
1458 /* get the vnode for this cnid */
1459 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
1460 if ( myErr ) {
1461 access[i] = myErr;
1462 continue;
1463 }
1464
1465 hfs_unlock(VTOC(cvp));
1466
1467 if (vnode_vtype(cvp) == VDIR) {
1468 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1469 } else {
1470 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1471 }
1472
1473 vnode_put(cvp);
1474 if (myErr) {
1475 access[i] = myErr;
1476 continue;
1477 }
1478 } else {
1479 /* before calling CheckAccess(), check the target file for read access */
1480 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1481 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1482
1483 /* fail fast if no access */
1484 if ((myPerms & flags) == 0) {
1485 access[i] = EACCES;
1486 continue;
1487 }
1488 }
1489 } else {
1490 /* we were passed an array of parent ids */
1491 catkey.hfsPlus.parentID = cnid;
1492 }
1493
1494 /* if the last guy had the same parent and had access, we're done */
1495 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
1496 cache.cachehits++;
1497 access[i] = 0;
1498 continue;
1499 }
1500
1501 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1502 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
1503
1504 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1505 access[i] = 0; // have access.. no errors to report
1506 } else {
1507 access[i] = (error != 0 ? (short) error : EACCES);
1508 }
1509
1510 prevParent_cnid = catkey.hfsPlus.parentID;
1511 }
1512
1513 /* copyout the access array */
1514 if ((error = copyout((caddr_t)access, user_access_structp->access,
1515 num_files * sizeof (short)))) {
1516 goto err_exit_bulk_access;
1517 }
1518 if (map_size && bitmap) {
1519 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1520 map_size * sizeof (char)))) {
1521 goto err_exit_bulk_access;
1522 }
1523 }
1524
1525
1526 err_exit_bulk_access:
1527
1528 //printf("hfs: on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1529
1530 if (file_ids)
1531 kfree(file_ids, sizeof(int) * num_files);
1532 if (parents)
1533 kfree(parents, sizeof(cnid_t) * num_parents);
1534 if (bitmap)
1535 kfree(bitmap, sizeof(char) * map_size);
1536 if (access)
1537 kfree(access, sizeof(short) * num_files);
1538 if (cache.acache)
1539 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1540 if (cache.haveaccess)
1541 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1542
1543 return (error);
1544 }
1545
1546
1547 /* end "bulk-access" support */
1548
1549
1550 /*
1551 * Callback for use with freeze ioctl.
1552 */
1553 static int
1554 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
1555 {
1556 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1557
1558 return 0;
1559 }
1560
1561 /*
1562 * Control filesystem operating characteristics.
1563 */
1564 int
1565 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1566 vnode_t a_vp;
1567 int a_command;
1568 caddr_t a_data;
1569 int a_fflag;
1570 vfs_context_t a_context;
1571 } */ *ap)
1572 {
1573 struct vnode * vp = ap->a_vp;
1574 struct hfsmount *hfsmp = VTOHFS(vp);
1575 vfs_context_t context = ap->a_context;
1576 kauth_cred_t cred = vfs_context_ucred(context);
1577 proc_t p = vfs_context_proc(context);
1578 struct vfsstatfs *vfsp;
1579 boolean_t is64bit;
1580 off_t jnl_start, jnl_size;
1581 struct hfs_journal_info *jip;
1582 #if HFS_COMPRESSION
1583 int compressed = 0;
1584 off_t uncompressed_size = -1;
1585 int decmpfs_error = 0;
1586
1587 if (ap->a_command == F_RDADVISE) {
1588 /* we need to inspect the decmpfs state of the file as early as possible */
1589 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1590 if (compressed) {
1591 if (VNODE_IS_RSRC(vp)) {
1592 /* if this is the resource fork, treat it as if it were empty */
1593 uncompressed_size = 0;
1594 } else {
1595 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1596 if (decmpfs_error != 0) {
1597 /* failed to get the uncompressed size, we'll check for this later */
1598 uncompressed_size = -1;
1599 }
1600 }
1601 }
1602 }
1603 #endif /* HFS_COMPRESSION */
1604
1605 is64bit = proc_is64bit(p);
1606
1607 #if CONFIG_PROTECT
1608 {
1609 int error = 0;
1610 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
1611 return error;
1612 }
1613 }
1614 #endif /* CONFIG_PROTECT */
1615
1616 switch (ap->a_command) {
1617
1618 case HFS_GETPATH:
1619 {
1620 struct vnode *file_vp;
1621 cnid_t cnid;
1622 int outlen;
1623 char *bufptr;
1624 int error;
1625
1626 /* Caller must be owner of file system. */
1627 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1628 if (suser(cred, NULL) &&
1629 kauth_cred_getuid(cred) != vfsp->f_owner) {
1630 return (EACCES);
1631 }
1632 /* Target vnode must be file system's root. */
1633 if (!vnode_isvroot(vp)) {
1634 return (EINVAL);
1635 }
1636 bufptr = (char *)ap->a_data;
1637 cnid = strtoul(bufptr, NULL, 10);
1638
1639 /* We need to call hfs_vfs_vget to leverage the code that will
1640 * fix the origin list for us if needed, as opposed to calling
1641 * hfs_vget, since we will need the parent for build_path call.
1642 */
1643
1644 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1645 return (error);
1646 }
1647 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1648 vnode_put(file_vp);
1649
1650 return (error);
1651 }
1652
1653 case HFS_PREV_LINK:
1654 case HFS_NEXT_LINK:
1655 {
1656 cnid_t linkfileid;
1657 cnid_t nextlinkid;
1658 cnid_t prevlinkid;
1659 int error;
1660
1661 /* Caller must be owner of file system. */
1662 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1663 if (suser(cred, NULL) &&
1664 kauth_cred_getuid(cred) != vfsp->f_owner) {
1665 return (EACCES);
1666 }
1667 /* Target vnode must be file system's root. */
1668 if (!vnode_isvroot(vp)) {
1669 return (EINVAL);
1670 }
1671 linkfileid = *(cnid_t *)ap->a_data;
1672 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1673 return (EINVAL);
1674 }
1675 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1676 return (error);
1677 }
1678 if (ap->a_command == HFS_NEXT_LINK) {
1679 *(cnid_t *)ap->a_data = nextlinkid;
1680 } else {
1681 *(cnid_t *)ap->a_data = prevlinkid;
1682 }
1683 return (0);
1684 }
1685
1686 case HFS_RESIZE_PROGRESS: {
1687
1688 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1689 if (suser(cred, NULL) &&
1690 kauth_cred_getuid(cred) != vfsp->f_owner) {
1691 return (EACCES); /* must be owner of file system */
1692 }
1693 if (!vnode_isvroot(vp)) {
1694 return (EINVAL);
1695 }
1696 /* file system must not be mounted read-only */
1697 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1698 return (EROFS);
1699 }
1700
1701 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1702 }
1703
1704 case HFS_RESIZE_VOLUME: {
1705 u_int64_t newsize;
1706 u_int64_t cursize;
1707
1708 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1709 if (suser(cred, NULL) &&
1710 kauth_cred_getuid(cred) != vfsp->f_owner) {
1711 return (EACCES); /* must be owner of file system */
1712 }
1713 if (!vnode_isvroot(vp)) {
1714 return (EINVAL);
1715 }
1716
1717 /* filesystem must not be mounted read only */
1718 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1719 return (EROFS);
1720 }
1721 newsize = *(u_int64_t *)ap->a_data;
1722 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1723
1724 if (newsize > cursize) {
1725 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1726 } else if (newsize < cursize) {
1727 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1728 } else {
1729 return (0);
1730 }
1731 }
1732 case HFS_CHANGE_NEXT_ALLOCATION: {
1733 int error = 0; /* Assume success */
1734 u_int32_t location;
1735
1736 if (vnode_vfsisrdonly(vp)) {
1737 return (EROFS);
1738 }
1739 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1740 if (suser(cred, NULL) &&
1741 kauth_cred_getuid(cred) != vfsp->f_owner) {
1742 return (EACCES); /* must be owner of file system */
1743 }
1744 if (!vnode_isvroot(vp)) {
1745 return (EINVAL);
1746 }
1747 HFS_MOUNT_LOCK(hfsmp, TRUE);
1748 location = *(u_int32_t *)ap->a_data;
1749 if ((location >= hfsmp->allocLimit) &&
1750 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1751 error = EINVAL;
1752 goto fail_change_next_allocation;
1753 }
1754 /* Return previous value. */
1755 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1756 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1757 /* On magic value for location, set nextAllocation to next block
1758 * after metadata zone and set flag in mount structure to indicate
1759 * that nextAllocation should not be updated again.
1760 */
1761 if (hfsmp->hfs_metazone_end != 0) {
1762 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1763 }
1764 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1765 } else {
1766 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1767 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1768 }
1769 MarkVCBDirty(hfsmp);
1770 fail_change_next_allocation:
1771 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1772 return (error);
1773 }
1774
1775 #if HFS_SPARSE_DEV
1776 case HFS_SETBACKINGSTOREINFO: {
1777 struct vnode * bsfs_rootvp;
1778 struct vnode * di_vp;
1779 struct hfs_backingstoreinfo *bsdata;
1780 int error = 0;
1781
1782 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1783 return (EROFS);
1784 }
1785 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1786 return (EALREADY);
1787 }
1788 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1789 if (suser(cred, NULL) &&
1790 kauth_cred_getuid(cred) != vfsp->f_owner) {
1791 return (EACCES); /* must be owner of file system */
1792 }
1793 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1794 if (bsdata == NULL) {
1795 return (EINVAL);
1796 }
1797 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1798 return (error);
1799 }
1800 if ((error = vnode_getwithref(di_vp))) {
1801 file_drop(bsdata->backingfd);
1802 return(error);
1803 }
1804
1805 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1806 (void)vnode_put(di_vp);
1807 file_drop(bsdata->backingfd);
1808 return (EINVAL);
1809 }
1810
1811 /*
1812 * Obtain the backing fs root vnode and keep a reference
1813 * on it. This reference will be dropped in hfs_unmount.
1814 */
1815 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
1816 if (error) {
1817 (void)vnode_put(di_vp);
1818 file_drop(bsdata->backingfd);
1819 return (error);
1820 }
1821 vnode_ref(bsfs_rootvp);
1822 vnode_put(bsfs_rootvp);
1823
1824 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1825
1826 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1827 /* The free extent cache is managed differently for sparse devices.
1828 * There is a window between which the volume is mounted and the
1829 * device is marked as sparse, so the free extent cache for this
1830 * volume is currently initialized as normal volume (sorted by block
1831 * count). Reset the cache so that it will be rebuilt again
1832 * for sparse device (sorted by start block).
1833 */
1834 ResetVCBFreeExtCache(hfsmp);
1835
1836 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1837 hfsmp->hfs_sparsebandblks *= 4;
1838
1839 vfs_markdependency(hfsmp->hfs_mp);
1840
1841 /*
1842 * If the sparse image is on a sparse image file (as opposed to a sparse
1843 * bundle), then we may need to limit the free space to the maximum size
1844 * of a file on that volume. So we query (using pathconf), and if we get
1845 * a meaningful result, we cache the number of blocks for later use in
1846 * hfs_freeblks().
1847 */
1848 hfsmp->hfs_backingfs_maxblocks = 0;
1849 if (vnode_vtype(di_vp) == VREG) {
1850 int terr;
1851 int hostbits;
1852 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1853 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1854 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1855
1856 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1857 }
1858 }
1859
1860 (void)vnode_put(di_vp);
1861 file_drop(bsdata->backingfd);
1862 return (0);
1863 }
1864 case HFS_CLRBACKINGSTOREINFO: {
1865 struct vnode * tmpvp;
1866
1867 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1868 if (suser(cred, NULL) &&
1869 kauth_cred_getuid(cred) != vfsp->f_owner) {
1870 return (EACCES); /* must be owner of file system */
1871 }
1872 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1873 return (EROFS);
1874 }
1875
1876 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1877 hfsmp->hfs_backingfs_rootvp) {
1878
1879 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1880 tmpvp = hfsmp->hfs_backingfs_rootvp;
1881 hfsmp->hfs_backingfs_rootvp = NULLVP;
1882 hfsmp->hfs_sparsebandblks = 0;
1883 vnode_rele(tmpvp);
1884 }
1885 return (0);
1886 }
1887 #endif /* HFS_SPARSE_DEV */
1888
1889 /* Change the next CNID stored in the VH */
1890 case HFS_CHANGE_NEXTCNID: {
1891 int error = 0; /* Assume success */
1892 u_int32_t fileid;
1893 int wraparound = 0;
1894 int lockflags = 0;
1895
1896 if (vnode_vfsisrdonly(vp)) {
1897 return (EROFS);
1898 }
1899 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1900 if (suser(cred, NULL) &&
1901 kauth_cred_getuid(cred) != vfsp->f_owner) {
1902 return (EACCES); /* must be owner of file system */
1903 }
1904
1905 fileid = *(u_int32_t *)ap->a_data;
1906
1907 /* Must have catalog lock excl. to advance the CNID pointer */
1908 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
1909
1910 HFS_MOUNT_LOCK(hfsmp, TRUE);
1911
1912 /* If it is less than the current next CNID, force the wraparound bit to be set */
1913 if (fileid < hfsmp->vcbNxtCNID) {
1914 wraparound=1;
1915 }
1916
1917 /* Return previous value. */
1918 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
1919
1920 hfsmp->vcbNxtCNID = fileid;
1921
1922 if (wraparound) {
1923 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
1924 }
1925
1926 MarkVCBDirty(hfsmp);
1927 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1928 hfs_systemfile_unlock (hfsmp, lockflags);
1929
1930 return (error);
1931 }
1932
1933 case F_FREEZE_FS: {
1934 struct mount *mp;
1935
1936 mp = vnode_mount(vp);
1937 hfsmp = VFSTOHFS(mp);
1938
1939 if (!(hfsmp->jnl))
1940 return (ENOTSUP);
1941
1942 vfsp = vfs_statfs(mp);
1943
1944 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1945 !kauth_cred_issuser(cred))
1946 return (EACCES);
1947
1948 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1949
1950 // flush things before we get started to try and prevent
1951 // dirty data from being paged out while we're frozen.
1952 // note: can't do this after taking the lock as it will
1953 // deadlock against ourselves.
1954 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1955 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
1956
1957 // DO NOT call hfs_journal_flush() because that takes a
1958 // shared lock on the global exclusive lock!
1959 journal_flush(hfsmp->jnl, TRUE);
1960
1961 // don't need to iterate on all vnodes, we just need to
1962 // wait for writes to the system files and the device vnode
1963 //
1964 // Now that journal flush waits for all metadata blocks to
1965 // be written out, waiting for btree writes is probably no
1966 // longer required.
1967 if (HFSTOVCB(hfsmp)->extentsRefNum)
1968 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1969 if (HFSTOVCB(hfsmp)->catalogRefNum)
1970 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1971 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1972 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1973 if (hfsmp->hfs_attribute_vp)
1974 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1975 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1976
1977 hfsmp->hfs_freezing_proc = current_proc();
1978
1979 return (0);
1980 }
1981
1982 case F_THAW_FS: {
1983 vfsp = vfs_statfs(vnode_mount(vp));
1984 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
1985 !kauth_cred_issuser(cred))
1986 return (EACCES);
1987
1988 // if we're not the one who froze the fs then we
1989 // can't thaw it.
1990 if (hfsmp->hfs_freezing_proc != current_proc()) {
1991 return EPERM;
1992 }
1993
1994 // NOTE: if you add code here, also go check the
1995 // code that "thaws" the fs in hfs_vnop_close()
1996 //
1997 hfsmp->hfs_freezing_proc = NULL;
1998 hfs_unlock_global (hfsmp);
1999 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
2000
2001 return (0);
2002 }
2003
2004 case HFS_BULKACCESS_FSCTL: {
2005 int size;
2006
2007 if (hfsmp->hfs_flags & HFS_STANDARD) {
2008 return EINVAL;
2009 }
2010
2011 if (is64bit) {
2012 size = sizeof(struct user64_access_t);
2013 } else {
2014 size = sizeof(struct user32_access_t);
2015 }
2016
2017 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2018 }
2019
2020 case HFS_EXT_BULKACCESS_FSCTL: {
2021 int size;
2022
2023 if (hfsmp->hfs_flags & HFS_STANDARD) {
2024 return EINVAL;
2025 }
2026
2027 if (is64bit) {
2028 size = sizeof(struct user64_ext_access_t);
2029 } else {
2030 size = sizeof(struct user32_ext_access_t);
2031 }
2032
2033 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2034 }
2035
2036 case HFS_SET_XATTREXTENTS_STATE: {
2037 int state;
2038
2039 if (ap->a_data == NULL) {
2040 return (EINVAL);
2041 }
2042
2043 state = *(int *)ap->a_data;
2044
2045 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2046 return (EROFS);
2047 }
2048
2049 /* Super-user can enable or disable extent-based extended
2050 * attribute support on a volume
2051 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2052 * are enabled by default, so any change will be transient only
2053 * till the volume is remounted.
2054 */
2055 if (!is_suser()) {
2056 return (EPERM);
2057 }
2058 if (state == 0 || state == 1)
2059 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
2060 else
2061 return (EINVAL);
2062 }
2063
2064 case F_SETSTATICCONTENT: {
2065 int error;
2066 int enable_static = 0;
2067 struct cnode *cp = NULL;
2068 /*
2069 * lock the cnode, decorate the cnode flag, and bail out.
2070 * VFS should have already authenticated the caller for us.
2071 */
2072
2073 if (ap->a_data) {
2074 /*
2075 * Note that even though ap->a_data is of type caddr_t,
2076 * the fcntl layer at the syscall handler will pass in NULL
2077 * or 1 depending on what the argument supplied to the fcntl
2078 * was. So it is in fact correct to check the ap->a_data
2079 * argument for zero or non-zero value when deciding whether or not
2080 * to enable the static bit in the cnode.
2081 */
2082 enable_static = 1;
2083 }
2084 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2085 return EROFS;
2086 }
2087 cp = VTOC(vp);
2088
2089 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK);
2090 if (error == 0) {
2091 if (enable_static) {
2092 cp->c_flag |= C_SSD_STATIC;
2093 }
2094 else {
2095 cp->c_flag &= ~C_SSD_STATIC;
2096 }
2097 hfs_unlock (cp);
2098 }
2099 return error;
2100 }
2101
2102 case F_SETBACKINGSTORE: {
2103
2104 int error = 0;
2105
2106 /*
2107 * See comment in F_SETSTATICCONTENT re: using
2108 * a null check for a_data
2109 */
2110 if (ap->a_data) {
2111 error = hfs_set_backingstore (vp, 1);
2112 }
2113 else {
2114 error = hfs_set_backingstore (vp, 0);
2115 }
2116
2117 return error;
2118 }
2119
2120 case F_GETPATH_MTMINFO: {
2121 int error = 0;
2122
2123 int *data = (int*) ap->a_data;
2124
2125 /* Ask if this is a backingstore vnode */
2126 error = hfs_is_backingstore (vp, data);
2127
2128 return error;
2129 }
2130
2131 case F_FULLFSYNC: {
2132 int error;
2133
2134 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2135 return (EROFS);
2136 }
2137 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
2138 if (error == 0) {
2139 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
2140 hfs_unlock(VTOC(vp));
2141 }
2142
2143 return error;
2144 }
2145
2146 case F_CHKCLEAN: {
2147 register struct cnode *cp;
2148 int error;
2149
2150 if (!vnode_isreg(vp))
2151 return EINVAL;
2152
2153 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
2154 if (error == 0) {
2155 cp = VTOC(vp);
2156 /*
2157 * used by regression test to determine if
2158 * all the dirty pages (via write) have been cleaned
2159 * after a call to 'fsysnc'.
2160 */
2161 error = is_file_clean(vp, VTOF(vp)->ff_size);
2162 hfs_unlock(cp);
2163 }
2164 return (error);
2165 }
2166
2167 case F_RDADVISE: {
2168 register struct radvisory *ra;
2169 struct filefork *fp;
2170 int error;
2171
2172 if (!vnode_isreg(vp))
2173 return EINVAL;
2174
2175 ra = (struct radvisory *)(ap->a_data);
2176 fp = VTOF(vp);
2177
2178 /* Protect against a size change. */
2179 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK);
2180
2181 #if HFS_COMPRESSION
2182 if (compressed && (uncompressed_size == -1)) {
2183 /* fetching the uncompressed size failed above, so return the error */
2184 error = decmpfs_error;
2185 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
2186 (!compressed && (ra->ra_offset >= fp->ff_size))) {
2187 error = EFBIG;
2188 }
2189 #else /* HFS_COMPRESSION */
2190 if (ra->ra_offset >= fp->ff_size) {
2191 error = EFBIG;
2192 }
2193 #endif /* HFS_COMPRESSION */
2194 else {
2195 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
2196 }
2197
2198 hfs_unlock_truncate(VTOC(vp), 0);
2199 return (error);
2200 }
2201
2202 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
2203 {
2204 if (is64bit) {
2205 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2206 }
2207 else {
2208 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2209 }
2210 return 0;
2211 }
2212
2213 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2214 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2215 break;
2216
2217 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2218 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2219 break;
2220
2221 case HFS_FSCTL_GET_VERY_LOW_DISK:
2222 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2223 break;
2224
2225 case HFS_FSCTL_SET_VERY_LOW_DISK:
2226 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2227 return EINVAL;
2228 }
2229
2230 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2231 break;
2232
2233 case HFS_FSCTL_GET_LOW_DISK:
2234 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2235 break;
2236
2237 case HFS_FSCTL_SET_LOW_DISK:
2238 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2239 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2240
2241 return EINVAL;
2242 }
2243
2244 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2245 break;
2246
2247 case HFS_FSCTL_GET_DESIRED_DISK:
2248 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2249 break;
2250
2251 case HFS_FSCTL_SET_DESIRED_DISK:
2252 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2253 return EINVAL;
2254 }
2255
2256 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2257 break;
2258
2259 case HFS_VOLUME_STATUS:
2260 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2261 break;
2262
2263 case HFS_SET_BOOT_INFO:
2264 if (!vnode_isvroot(vp))
2265 return(EINVAL);
2266 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2267 return(EACCES); /* must be superuser or owner of filesystem */
2268 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2269 return (EROFS);
2270 }
2271 HFS_MOUNT_LOCK(hfsmp, TRUE);
2272 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
2273 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2274 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2275 break;
2276
2277 case HFS_GET_BOOT_INFO:
2278 if (!vnode_isvroot(vp))
2279 return(EINVAL);
2280 HFS_MOUNT_LOCK(hfsmp, TRUE);
2281 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
2282 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2283 break;
2284
2285 case HFS_MARK_BOOT_CORRUPT:
2286 /* Mark the boot volume corrupt by setting
2287 * kHFSVolumeInconsistentBit in the volume header. This will
2288 * force fsck_hfs on next mount.
2289 */
2290 if (!is_suser()) {
2291 return EACCES;
2292 }
2293
2294 /* Allowed only on the root vnode of the boot volume */
2295 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2296 !vnode_isvroot(vp)) {
2297 return EINVAL;
2298 }
2299 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2300 return (EROFS);
2301 }
2302 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2303 hfs_mark_volume_inconsistent(hfsmp);
2304 break;
2305
2306 case HFS_FSCTL_GET_JOURNAL_INFO:
2307 jip = (struct hfs_journal_info*)ap->a_data;
2308
2309 if (vp == NULLVP)
2310 return EINVAL;
2311
2312 if (hfsmp->jnl == NULL) {
2313 jnl_start = 0;
2314 jnl_size = 0;
2315 } else {
2316 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2317 jnl_size = (off_t)hfsmp->jnl_size;
2318 }
2319
2320 jip->jstart = jnl_start;
2321 jip->jsize = jnl_size;
2322 break;
2323
2324 case HFS_SET_ALWAYS_ZEROFILL: {
2325 struct cnode *cp = VTOC(vp);
2326
2327 if (*(int *)ap->a_data) {
2328 cp->c_flag |= C_ALWAYS_ZEROFILL;
2329 } else {
2330 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2331 }
2332 break;
2333 }
2334
2335 case HFS_DISABLE_METAZONE: {
2336 /* Only root can disable metadata zone */
2337 if (!is_suser()) {
2338 return EACCES;
2339 }
2340 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2341 return (EROFS);
2342 }
2343
2344 /* Disable metadata zone now */
2345 (void) hfs_metadatazone_init(hfsmp, true);
2346 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2347 break;
2348 }
2349
2350 default:
2351 return (ENOTTY);
2352 }
2353
2354 return 0;
2355 }
2356
2357 /*
2358 * select
2359 */
2360 int
2361 hfs_vnop_select(__unused struct vnop_select_args *ap)
2362 /*
2363 struct vnop_select_args {
2364 vnode_t a_vp;
2365 int a_which;
2366 int a_fflags;
2367 void *a_wql;
2368 vfs_context_t a_context;
2369 };
2370 */
2371 {
2372 /*
2373 * We should really check to see if I/O is possible.
2374 */
2375 return (1);
2376 }
2377
2378 /*
2379 * Converts a logical block number to a physical block, and optionally returns
2380 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2381 * The physical block number is based on the device block size, currently its 512.
2382 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2383 */
2384 int
2385 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
2386 {
2387 struct filefork *fp = VTOF(vp);
2388 struct hfsmount *hfsmp = VTOHFS(vp);
2389 int retval = E_NONE;
2390 u_int32_t logBlockSize;
2391 size_t bytesContAvail = 0;
2392 off_t blockposition;
2393 int lockExtBtree;
2394 int lockflags = 0;
2395
2396 /*
2397 * Check for underlying vnode requests and ensure that logical
2398 * to physical mapping is requested.
2399 */
2400 if (vpp != NULL)
2401 *vpp = hfsmp->hfs_devvp;
2402 if (bnp == NULL)
2403 return (0);
2404
2405 logBlockSize = GetLogicalBlockSize(vp);
2406 blockposition = (off_t)bn * logBlockSize;
2407
2408 lockExtBtree = overflow_extents(fp);
2409
2410 if (lockExtBtree)
2411 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
2412
2413 retval = MacToVFSError(
2414 MapFileBlockC (HFSTOVCB(hfsmp),
2415 (FCB*)fp,
2416 MAXPHYSIO,
2417 blockposition,
2418 bnp,
2419 &bytesContAvail));
2420
2421 if (lockExtBtree)
2422 hfs_systemfile_unlock(hfsmp, lockflags);
2423
2424 if (retval == E_NONE) {
2425 /* Figure out how many read ahead blocks there are */
2426 if (runp != NULL) {
2427 if (can_cluster(logBlockSize)) {
2428 /* Make sure this result never goes negative: */
2429 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2430 } else {
2431 *runp = 0;
2432 }
2433 }
2434 }
2435 return (retval);
2436 }
2437
2438 /*
2439 * Convert logical block number to file offset.
2440 */
2441 int
2442 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2443 /*
2444 struct vnop_blktooff_args {
2445 vnode_t a_vp;
2446 daddr64_t a_lblkno;
2447 off_t *a_offset;
2448 };
2449 */
2450 {
2451 if (ap->a_vp == NULL)
2452 return (EINVAL);
2453 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
2454
2455 return(0);
2456 }
2457
2458 /*
2459 * Convert file offset to logical block number.
2460 */
2461 int
2462 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2463 /*
2464 struct vnop_offtoblk_args {
2465 vnode_t a_vp;
2466 off_t a_offset;
2467 daddr64_t *a_lblkno;
2468 };
2469 */
2470 {
2471 if (ap->a_vp == NULL)
2472 return (EINVAL);
2473 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
2474
2475 return(0);
2476 }
2477
2478 /*
2479 * Map file offset to physical block number.
2480 *
2481 * If this function is called for write operation, and if the file
2482 * had virtual blocks allocated (delayed allocation), real blocks
2483 * are allocated by calling ExtendFileC().
2484 *
2485 * If this function is called for read operation, and if the file
2486 * had virtual blocks allocated (delayed allocation), no change
2487 * to the size of file is done, and if required, rangelist is
2488 * searched for mapping.
2489 *
2490 * System file cnodes are expected to be locked (shared or exclusive).
2491 */
2492 int
2493 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2494 /*
2495 struct vnop_blockmap_args {
2496 vnode_t a_vp;
2497 off_t a_foffset;
2498 size_t a_size;
2499 daddr64_t *a_bpn;
2500 size_t *a_run;
2501 void *a_poff;
2502 int a_flags;
2503 vfs_context_t a_context;
2504 };
2505 */
2506 {
2507 struct vnode *vp = ap->a_vp;
2508 struct cnode *cp;
2509 struct filefork *fp;
2510 struct hfsmount *hfsmp;
2511 size_t bytesContAvail = 0;
2512 int retval = E_NONE;
2513 int syslocks = 0;
2514 int lockflags = 0;
2515 struct rl_entry *invalid_range;
2516 enum rl_overlaptype overlaptype;
2517 int started_tr = 0;
2518 int tooklock = 0;
2519
2520 #if HFS_COMPRESSION
2521 if (VNODE_IS_RSRC(vp)) {
2522 /* allow blockmaps to the resource fork */
2523 } else {
2524 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2525 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2526 switch(state) {
2527 case FILE_IS_COMPRESSED:
2528 return ENOTSUP;
2529 case FILE_IS_CONVERTING:
2530 /* if FILE_IS_CONVERTING, we allow blockmap */
2531 break;
2532 default:
2533 printf("invalid state %d for compressed file\n", state);
2534 /* fall through */
2535 }
2536 }
2537 }
2538 #endif /* HFS_COMPRESSION */
2539
2540 /* Do not allow blockmap operation on a directory */
2541 if (vnode_isdir(vp)) {
2542 return (ENOTSUP);
2543 }
2544
2545 /*
2546 * Check for underlying vnode requests and ensure that logical
2547 * to physical mapping is requested.
2548 */
2549 if (ap->a_bpn == NULL)
2550 return (0);
2551
2552 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
2553 if (VTOC(vp)->c_lockowner != current_thread()) {
2554 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2555 tooklock = 1;
2556 }
2557 }
2558 hfsmp = VTOHFS(vp);
2559 cp = VTOC(vp);
2560 fp = VTOF(vp);
2561
2562 retry:
2563 /* Check virtual blocks only when performing write operation */
2564 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2565 if (hfs_start_transaction(hfsmp) != 0) {
2566 retval = EINVAL;
2567 goto exit;
2568 } else {
2569 started_tr = 1;
2570 }
2571 syslocks = SFL_EXTENTS | SFL_BITMAP;
2572
2573 } else if (overflow_extents(fp)) {
2574 syslocks = SFL_EXTENTS;
2575 }
2576
2577 if (syslocks)
2578 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
2579
2580 /*
2581 * Check for any delayed allocations.
2582 */
2583 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2584 int64_t actbytes;
2585 u_int32_t loanedBlocks;
2586
2587 //
2588 // Make sure we have a transaction. It's possible
2589 // that we came in and fp->ff_unallocblocks was zero
2590 // but during the time we blocked acquiring the extents
2591 // btree, ff_unallocblocks became non-zero and so we
2592 // will need to start a transaction.
2593 //
2594 if (started_tr == 0) {
2595 if (syslocks) {
2596 hfs_systemfile_unlock(hfsmp, lockflags);
2597 syslocks = 0;
2598 }
2599 goto retry;
2600 }
2601
2602 /*
2603 * Note: ExtendFileC will Release any blocks on loan and
2604 * aquire real blocks. So we ask to extend by zero bytes
2605 * since ExtendFileC will account for the virtual blocks.
2606 */
2607
2608 loanedBlocks = fp->ff_unallocblocks;
2609 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2610 kEFAllMask | kEFNoClumpMask, &actbytes);
2611
2612 if (retval) {
2613 fp->ff_unallocblocks = loanedBlocks;
2614 cp->c_blocks += loanedBlocks;
2615 fp->ff_blocks += loanedBlocks;
2616
2617 HFS_MOUNT_LOCK(hfsmp, TRUE);
2618 hfsmp->loanedBlocks += loanedBlocks;
2619 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2620
2621 hfs_systemfile_unlock(hfsmp, lockflags);
2622 cp->c_flag |= C_MODIFIED;
2623 if (started_tr) {
2624 (void) hfs_update(vp, TRUE);
2625 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2626
2627 hfs_end_transaction(hfsmp);
2628 started_tr = 0;
2629 }
2630 goto exit;
2631 }
2632 }
2633
2634 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2635 ap->a_bpn, &bytesContAvail);
2636 if (syslocks) {
2637 hfs_systemfile_unlock(hfsmp, lockflags);
2638 syslocks = 0;
2639 }
2640
2641 if (started_tr) {
2642 (void) hfs_update(vp, TRUE);
2643 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2644 hfs_end_transaction(hfsmp);
2645 started_tr = 0;
2646 }
2647 if (retval) {
2648 /* On write, always return error because virtual blocks, if any,
2649 * should have been allocated in ExtendFileC(). We do not
2650 * allocate virtual blocks on read, therefore return error
2651 * only if no virtual blocks are allocated. Otherwise we search
2652 * rangelist for zero-fills
2653 */
2654 if ((MacToVFSError(retval) != ERANGE) ||
2655 (ap->a_flags & VNODE_WRITE) ||
2656 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2657 goto exit;
2658 }
2659
2660 /* Validate if the start offset is within logical file size */
2661 if (ap->a_foffset >= fp->ff_size) {
2662 goto exit;
2663 }
2664
2665 /*
2666 * At this point, we have encountered a failure during
2667 * MapFileBlockC that resulted in ERANGE, and we are not servicing
2668 * a write, and there are borrowed blocks.
2669 *
2670 * However, the cluster layer will not call blockmap for
2671 * blocks that are borrowed and in-cache. We have to assume that
2672 * because we observed ERANGE being emitted from MapFileBlockC, this
2673 * extent range is not valid on-disk. So we treat this as a
2674 * mapping that needs to be zero-filled prior to reading.
2675 *
2676 * Note that under certain circumstances (such as non-contiguous
2677 * userland VM mappings in the calling process), cluster_io
2678 * may be forced to split a large I/O driven by hfs_vnop_write
2679 * into multiple sub-I/Os that necessitate a RMW cycle. If this is
2680 * the case here, then we have already removed the invalid range list
2681 * mapping prior to getting to this blockmap call, so we should not
2682 * search the invalid rangelist for this byte range.
2683 */
2684
2685 bytesContAvail = fp->ff_size - ap->a_foffset;
2686 /*
2687 * Clip the contiguous available bytes to, at most, the allowable
2688 * maximum or the amount requested.
2689 */
2690
2691 if (bytesContAvail > ap->a_size) {
2692 bytesContAvail = ap->a_size;
2693 }
2694
2695 *ap->a_bpn = (daddr64_t) -1;
2696 retval = 0;
2697
2698 goto exit;
2699 }
2700
2701 /* MapFileC() found a valid extent in the filefork. Search the
2702 * mapping information further for invalid file ranges
2703 */
2704 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2705 ap->a_foffset + (off_t)bytesContAvail - 1,
2706 &invalid_range);
2707 if (overlaptype != RL_NOOVERLAP) {
2708 switch(overlaptype) {
2709 case RL_MATCHINGOVERLAP:
2710 case RL_OVERLAPCONTAINSRANGE:
2711 case RL_OVERLAPSTARTSBEFORE:
2712 /* There's no valid block for this byte offset */
2713 *ap->a_bpn = (daddr64_t)-1;
2714 /* There's no point limiting the amount to be returned
2715 * if the invalid range that was hit extends all the way
2716 * to the EOF (i.e. there's no valid bytes between the
2717 * end of this range and the file's EOF):
2718 */
2719 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2720 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
2721 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2722 }
2723 break;
2724
2725 case RL_OVERLAPISCONTAINED:
2726 case RL_OVERLAPENDSAFTER:
2727 /* The range of interest hits an invalid block before the end: */
2728 if (invalid_range->rl_start == ap->a_foffset) {
2729 /* There's actually no valid information to be had starting here: */
2730 *ap->a_bpn = (daddr64_t)-1;
2731 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2732 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
2733 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2734 }
2735 } else {
2736 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2737 }
2738 break;
2739
2740 case RL_NOOVERLAP:
2741 break;
2742 } /* end switch */
2743 if (bytesContAvail > ap->a_size)
2744 bytesContAvail = ap->a_size;
2745 }
2746
2747 exit:
2748 if (retval == 0) {
2749 if (ap->a_run)
2750 *ap->a_run = bytesContAvail;
2751
2752 if (ap->a_poff)
2753 *(int *)ap->a_poff = 0;
2754 }
2755
2756 if (tooklock)
2757 hfs_unlock(cp);
2758
2759 return (MacToVFSError(retval));
2760 }
2761
2762 /*
2763 * prepare and issue the I/O
2764 * buf_strategy knows how to deal
2765 * with requests that require
2766 * fragmented I/Os
2767 */
2768 int
2769 hfs_vnop_strategy(struct vnop_strategy_args *ap)
2770 {
2771 buf_t bp = ap->a_bp;
2772 vnode_t vp = buf_vnode(bp);
2773 int error = 0;
2774
2775 /* Mark buffer as containing static data if cnode flag set */
2776 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
2777 buf_markstatic(bp);
2778 }
2779
2780 #if CONFIG_PROTECT
2781 cnode_t *cp = NULL;
2782
2783 if ((cp = cp_get_protected_cnode(vp)) != NULL) {
2784 /*
2785 * We rely upon the truncate lock to protect the
2786 * CP cache key from getting tossed prior to our IO finishing here.
2787 * Nearly all cluster io calls to manipulate file payload from HFS
2788 * take the truncate lock before calling into the cluster
2789 * layer to ensure the file size does not change, or that they
2790 * have exclusive right to change the EOF of the file.
2791 * That same guarantee protects us here since the code that
2792 * deals with CP lock events must now take the truncate lock
2793 * before doing anything.
2794 *
2795 * There is 1 exception here:
2796 * 1) One exception should be the VM swapfile IO, because HFS will
2797 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
2798 * swapfile code only without holding the truncate lock. This is because
2799 * individual swapfiles are maintained at fixed-length sizes by the VM code.
2800 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
2801 * create our own UPL and thus take the truncate lock before calling
2802 * into the cluster layer. In that case, however, we are not concerned
2803 * with the CP blob being wiped out in the middle of the IO
2804 * because there isn't anything to toss; the VM swapfile key stays
2805 * in-core as long as the file is open.
2806 *
2807 * NB:
2808 * For filesystem resize, we may not have access to the underlying
2809 * file's cache key for whatever reason (device may be locked). However,
2810 * we do not need it since we are going to use the temporary HFS-wide resize key
2811 * which is generated once we start relocating file content. If this file's I/O
2812 * should be done using the resize key, it will have been supplied already, so
2813 * do not attach the file's cp blob to the buffer.
2814 */
2815 if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
2816 buf_setcpaddr(bp, cp->c_cpentry);
2817 }
2818 }
2819 #endif /* CONFIG_PROTECT */
2820
2821 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
2822
2823 return error;
2824 }
2825
2826 static int
2827 hfs_minorupdate(struct vnode *vp) {
2828 struct cnode *cp = VTOC(vp);
2829 cp->c_flag &= ~C_MODIFIED;
2830 cp->c_touch_acctime = 0;
2831 cp->c_touch_chgtime = 0;
2832 cp->c_touch_modtime = 0;
2833
2834 return 0;
2835 }
2836
2837 int
2838 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipupdate, vfs_context_t context)
2839 {
2840 register struct cnode *cp = VTOC(vp);
2841 struct filefork *fp = VTOF(vp);
2842 struct proc *p = vfs_context_proc(context);;
2843 kauth_cred_t cred = vfs_context_ucred(context);
2844 int retval;
2845 off_t bytesToAdd;
2846 off_t actualBytesAdded;
2847 off_t filebytes;
2848 u_int32_t fileblocks;
2849 int blksize;
2850 struct hfsmount *hfsmp;
2851 int lockflags;
2852
2853 blksize = VTOVCB(vp)->blockSize;
2854 fileblocks = fp->ff_blocks;
2855 filebytes = (off_t)fileblocks * (off_t)blksize;
2856
2857 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2858 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2859
2860 if (length < 0)
2861 return (EINVAL);
2862
2863 /* This should only happen with a corrupt filesystem */
2864 if ((off_t)fp->ff_size < 0)
2865 return (EINVAL);
2866
2867 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2868 return (EFBIG);
2869
2870 hfsmp = VTOHFS(vp);
2871
2872 retval = E_NONE;
2873
2874 /* Files that are changing size are not hot file candidates. */
2875 if (hfsmp->hfc_stage == HFC_RECORDING) {
2876 fp->ff_bytesread = 0;
2877 }
2878
2879 /*
2880 * We cannot just check if fp->ff_size == length (as an optimization)
2881 * since there may be extra physical blocks that also need truncation.
2882 */
2883 #if QUOTA
2884 if ((retval = hfs_getinoquota(cp)))
2885 return(retval);
2886 #endif /* QUOTA */
2887
2888 /*
2889 * Lengthen the size of the file. We must ensure that the
2890 * last byte of the file is allocated. Since the smallest
2891 * value of ff_size is 0, length will be at least 1.
2892 */
2893 if (length > (off_t)fp->ff_size) {
2894 #if QUOTA
2895 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
2896 cred, 0);
2897 if (retval)
2898 goto Err_Exit;
2899 #endif /* QUOTA */
2900 /*
2901 * If we don't have enough physical space then
2902 * we need to extend the physical size.
2903 */
2904 if (length > filebytes) {
2905 int eflags;
2906 u_int32_t blockHint = 0;
2907
2908 /* All or nothing and don't round up to clumpsize. */
2909 eflags = kEFAllMask | kEFNoClumpMask;
2910
2911 if (cred && suser(cred, NULL) != 0)
2912 eflags |= kEFReserveMask; /* keep a reserve */
2913
2914 /*
2915 * Allocate Journal and Quota files in metadata zone.
2916 */
2917 if (filebytes == 0 &&
2918 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2919 hfs_virtualmetafile(cp)) {
2920 eflags |= kEFMetadataMask;
2921 blockHint = hfsmp->hfs_metazone_start;
2922 }
2923 if (hfs_start_transaction(hfsmp) != 0) {
2924 retval = EINVAL;
2925 goto Err_Exit;
2926 }
2927
2928 /* Protect extents b-tree and allocation bitmap */
2929 lockflags = SFL_BITMAP;
2930 if (overflow_extents(fp))
2931 lockflags |= SFL_EXTENTS;
2932 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2933
2934 while ((length > filebytes) && (retval == E_NONE)) {
2935 bytesToAdd = length - filebytes;
2936 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2937 (FCB*)fp,
2938 bytesToAdd,
2939 blockHint,
2940 eflags,
2941 &actualBytesAdded));
2942
2943 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2944 if (actualBytesAdded == 0 && retval == E_NONE) {
2945 if (length > filebytes)
2946 length = filebytes;
2947 break;
2948 }
2949 } /* endwhile */
2950
2951 hfs_systemfile_unlock(hfsmp, lockflags);
2952
2953 if (hfsmp->jnl) {
2954 if (skipupdate) {
2955 (void) hfs_minorupdate(vp);
2956 }
2957 else {
2958 (void) hfs_update(vp, TRUE);
2959 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2960 }
2961 }
2962
2963 hfs_end_transaction(hfsmp);
2964
2965 if (retval)
2966 goto Err_Exit;
2967
2968 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2969 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2970 }
2971
2972 if (!(flags & IO_NOZEROFILL)) {
2973 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
2974 struct rl_entry *invalid_range;
2975 off_t zero_limit;
2976
2977 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2978 if (length < zero_limit) zero_limit = length;
2979
2980 if (length > (off_t)fp->ff_size) {
2981 struct timeval tv;
2982
2983 /* Extending the file: time to fill out the current last page w. zeroes? */
2984 if ((fp->ff_size & PAGE_MASK_64) &&
2985 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2986 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
2987
2988 /* There's some valid data at the start of the (current) last page
2989 of the file, so zero out the remainder of that page to ensure the
2990 entire page contains valid data. Since there is no invalid range
2991 possible past the (current) eof, there's no need to remove anything
2992 from the invalid range list before calling cluster_write(): */
2993 hfs_unlock(cp);
2994 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
2995 fp->ff_size, (off_t)0,
2996 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2997 hfs_lock(cp, HFS_FORCE_LOCK);
2998 if (retval) goto Err_Exit;
2999
3000 /* Merely invalidate the remaining area, if necessary: */
3001 if (length > zero_limit) {
3002 microuptime(&tv);
3003 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
3004 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3005 }
3006 } else {
3007 /* The page containing the (current) eof is invalid: just add the
3008 remainder of the page to the invalid list, along with the area
3009 being newly allocated:
3010 */
3011 microuptime(&tv);
3012 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
3013 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
3014 };
3015 }
3016 } else {
3017 panic("hfs_truncate: invoked on non-UBC object?!");
3018 };
3019 }
3020 cp->c_touch_modtime = TRUE;
3021 fp->ff_size = length;
3022
3023 } else { /* Shorten the size of the file */
3024
3025 if ((off_t)fp->ff_size > length) {
3026 /* Any space previously marked as invalid is now irrelevant: */
3027 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3028 }
3029
3030 /*
3031 * Account for any unmapped blocks. Note that the new
3032 * file length can still end up with unmapped blocks.
3033 */
3034 if (fp->ff_unallocblocks > 0) {
3035 u_int32_t finalblks;
3036 u_int32_t loanedBlocks;
3037
3038 HFS_MOUNT_LOCK(hfsmp, TRUE);
3039
3040 loanedBlocks = fp->ff_unallocblocks;
3041 cp->c_blocks -= loanedBlocks;
3042 fp->ff_blocks -= loanedBlocks;
3043 fp->ff_unallocblocks = 0;
3044
3045 hfsmp->loanedBlocks -= loanedBlocks;
3046
3047 finalblks = (length + blksize - 1) / blksize;
3048 if (finalblks > fp->ff_blocks) {
3049 /* calculate required unmapped blocks */
3050 loanedBlocks = finalblks - fp->ff_blocks;
3051 hfsmp->loanedBlocks += loanedBlocks;
3052
3053 fp->ff_unallocblocks = loanedBlocks;
3054 cp->c_blocks += loanedBlocks;
3055 fp->ff_blocks += loanedBlocks;
3056 }
3057 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3058 }
3059
3060 /*
3061 * For a TBE process the deallocation of the file blocks is
3062 * delayed until the file is closed. And hfs_close calls
3063 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
3064 * isn't set, we make sure this isn't a TBE process.
3065 */
3066 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
3067 #if QUOTA
3068 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3069 #endif /* QUOTA */
3070 if (hfs_start_transaction(hfsmp) != 0) {
3071 retval = EINVAL;
3072 goto Err_Exit;
3073 }
3074
3075 if (fp->ff_unallocblocks == 0) {
3076 /* Protect extents b-tree and allocation bitmap */
3077 lockflags = SFL_BITMAP;
3078 if (overflow_extents(fp))
3079 lockflags |= SFL_EXTENTS;
3080 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3081
3082 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3083 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
3084
3085 hfs_systemfile_unlock(hfsmp, lockflags);
3086 }
3087 if (hfsmp->jnl) {
3088 if (retval == 0) {
3089 fp->ff_size = length;
3090 }
3091 if (skipupdate) {
3092 (void) hfs_minorupdate(vp);
3093 }
3094 else {
3095 (void) hfs_update(vp, TRUE);
3096 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3097 }
3098 }
3099 hfs_end_transaction(hfsmp);
3100
3101 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3102 if (retval)
3103 goto Err_Exit;
3104 #if QUOTA
3105 /* These are bytesreleased */
3106 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3107 #endif /* QUOTA */
3108 }
3109 /* Only set update flag if the logical length changes */
3110 if ((off_t)fp->ff_size != length)
3111 cp->c_touch_modtime = TRUE;
3112 fp->ff_size = length;
3113 }
3114 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3115 if (!vfs_context_issuser(context)) {
3116 cp->c_mode &= ~(S_ISUID | S_ISGID);
3117 skipupdate = 0;
3118 }
3119 }
3120 if (skipupdate) {
3121 retval = hfs_minorupdate(vp);
3122 }
3123 else {
3124 cp->c_touch_chgtime = TRUE; /* status changed */
3125 cp->c_touch_modtime = TRUE; /* file data was modified */
3126 retval = hfs_update(vp, MNT_WAIT);
3127 }
3128 if (retval) {
3129 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
3130 -1, -1, -1, retval, 0);
3131 }
3132
3133 Err_Exit:
3134
3135 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
3136 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
3137
3138 return (retval);
3139 }
3140
3141 /*
3142 * Preparation which must be done prior to deleting the catalog record
3143 * of a file or directory. In order to make the on-disk as safe as possible,
3144 * we remove the catalog entry before releasing the bitmap blocks and the
3145 * overflow extent records. However, some work must be done prior to deleting
3146 * the catalog record.
3147 *
3148 * When calling this function, the cnode must exist both in memory and on-disk.
3149 * If there are both resource fork and data fork vnodes, this function should
3150 * be called on both.
3151 */
3152
3153 int
3154 hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3155
3156 struct filefork *fp = VTOF(vp);
3157 struct cnode *cp = VTOC(vp);
3158 #if QUOTA
3159 int retval = 0;
3160 #endif /* QUOTA */
3161
3162 /* Cannot truncate an HFS directory! */
3163 if (vnode_isdir(vp)) {
3164 return (EISDIR);
3165 }
3166
3167 /*
3168 * See the comment below in hfs_truncate for why we need to call
3169 * setsize here. Essentially we want to avoid pending IO if we
3170 * already know that the blocks are going to be released here.
3171 * This function is only called when totally removing all storage for a file, so
3172 * we can take a shortcut and immediately setsize (0);
3173 */
3174 ubc_setsize(vp, 0);
3175
3176 /* This should only happen with a corrupt filesystem */
3177 if ((off_t)fp->ff_size < 0)
3178 return (EINVAL);
3179
3180 /*
3181 * We cannot just check if fp->ff_size == length (as an optimization)
3182 * since there may be extra physical blocks that also need truncation.
3183 */
3184 #if QUOTA
3185 if ((retval = hfs_getinoquota(cp))) {
3186 return(retval);
3187 }
3188 #endif /* QUOTA */
3189
3190 /* Wipe out any invalid ranges which have yet to be backed by disk */
3191 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3192
3193 /*
3194 * Account for any unmapped blocks. Since we're deleting the
3195 * entire file, we don't have to worry about just shrinking
3196 * to a smaller number of borrowed blocks.
3197 */
3198 if (fp->ff_unallocblocks > 0) {
3199 u_int32_t loanedBlocks;
3200
3201 HFS_MOUNT_LOCK(hfsmp, TRUE);
3202
3203 loanedBlocks = fp->ff_unallocblocks;
3204 cp->c_blocks -= loanedBlocks;
3205 fp->ff_blocks -= loanedBlocks;
3206 fp->ff_unallocblocks = 0;
3207
3208 hfsmp->loanedBlocks -= loanedBlocks;
3209
3210 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3211 }
3212
3213 return 0;
3214 }
3215
3216
3217 /*
3218 * Special wrapper around calling TruncateFileC. This function is useable
3219 * even when the catalog record does not exist any longer, making it ideal
3220 * for use when deleting a file. The simplification here is that we know
3221 * that we are releasing all blocks.
3222 *
3223 * Note that this function may be called when there is no vnode backing
3224 * the file fork in question. We may call this from hfs_vnop_inactive
3225 * to clear out resource fork data (and may not want to clear out the data
3226 * fork yet). As a result, we pointer-check both sets of inputs before
3227 * doing anything with them.
3228 *
3229 * The caller is responsible for saving off a copy of the filefork(s)
3230 * embedded within the cnode prior to calling this function. The pointers
3231 * supplied as arguments must be valid even if the cnode is no longer valid.
3232 */
3233
3234 int
3235 hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3236 struct filefork *rsrcfork, u_int32_t fileid) {
3237
3238 off_t filebytes;
3239 u_int32_t fileblocks;
3240 int blksize = 0;
3241 int error = 0;
3242 int lockflags;
3243
3244 blksize = hfsmp->blockSize;
3245
3246 /* Data Fork */
3247 if ((datafork != NULL) && (datafork->ff_blocks > 0)) {
3248 fileblocks = datafork->ff_blocks;
3249 filebytes = (off_t)fileblocks * (off_t)blksize;
3250
3251 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3252
3253 while (filebytes > 0) {
3254 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
3255 filebytes -= HFS_BIGFILE_SIZE;
3256 } else {
3257 filebytes = 0;
3258 }
3259
3260 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3261 if (hfs_start_transaction(hfsmp) != 0) {
3262 error = EINVAL;
3263 break;
3264 }
3265
3266 if (datafork->ff_unallocblocks == 0) {
3267 /* Protect extents b-tree and allocation bitmap */
3268 lockflags = SFL_BITMAP;
3269 if (overflow_extents(datafork))
3270 lockflags |= SFL_EXTENTS;
3271 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3272
3273 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
3274
3275 hfs_systemfile_unlock(hfsmp, lockflags);
3276 }
3277 if (error == 0) {
3278 datafork->ff_size = filebytes;
3279 }
3280 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3281
3282 /* Finish the transaction and start over if necessary */
3283 hfs_end_transaction(hfsmp);
3284
3285 if (error) {
3286 break;
3287 }
3288 }
3289 }
3290
3291 /* Resource fork */
3292 if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
3293 fileblocks = rsrcfork->ff_blocks;
3294 filebytes = (off_t)fileblocks * (off_t)blksize;
3295
3296 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3297
3298 while (filebytes > 0) {
3299 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
3300 filebytes -= HFS_BIGFILE_SIZE;
3301 } else {
3302 filebytes = 0;
3303 }
3304
3305 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3306 if (hfs_start_transaction(hfsmp) != 0) {
3307 error = EINVAL;
3308 break;
3309 }
3310
3311 if (rsrcfork->ff_unallocblocks == 0) {
3312 /* Protect extents b-tree and allocation bitmap */
3313 lockflags = SFL_BITMAP;
3314 if (overflow_extents(rsrcfork))
3315 lockflags |= SFL_EXTENTS;
3316 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3317
3318 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
3319
3320 hfs_systemfile_unlock(hfsmp, lockflags);
3321 }
3322 if (error == 0) {
3323 rsrcfork->ff_size = filebytes;
3324 }
3325 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3326
3327 /* Finish the transaction and start over if necessary */
3328 hfs_end_transaction(hfsmp);
3329
3330 if (error) {
3331 break;
3332 }
3333 }
3334 }
3335
3336 return error;
3337 }
3338
3339
3340 /*
3341 * Truncate a cnode to at most length size, freeing (or adding) the
3342 * disk blocks.
3343 */
3344 int
3345 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
3346 int skipupdate, vfs_context_t context)
3347 {
3348 struct filefork *fp = VTOF(vp);
3349 off_t filebytes;
3350 u_int32_t fileblocks;
3351 int blksize, error = 0;
3352 struct cnode *cp = VTOC(vp);
3353
3354 /* Cannot truncate an HFS directory! */
3355 if (vnode_isdir(vp)) {
3356 return (EISDIR);
3357 }
3358 /* A swap file cannot change size. */
3359 if (vnode_isswap(vp) && (length != 0)) {
3360 return (EPERM);
3361 }
3362
3363 blksize = VTOVCB(vp)->blockSize;
3364 fileblocks = fp->ff_blocks;
3365 filebytes = (off_t)fileblocks * (off_t)blksize;
3366
3367 //
3368 // Have to do this here so that we don't wind up with
3369 // i/o pending for blocks that are about to be released
3370 // if we truncate the file.
3371 //
3372 // If skipsetsize is set, then the caller is responsible
3373 // for the ubc_setsize.
3374 //
3375 // Even if skipsetsize is set, if the length is zero we
3376 // want to call ubc_setsize() because as of SnowLeopard
3377 // it will no longer cause any page-ins and it will drop
3378 // any dirty pages so that we don't do any i/o that we
3379 // don't have to. This also prevents a race where i/o
3380 // for truncated blocks may overwrite later data if the
3381 // blocks get reallocated to a different file.
3382 //
3383 if (!skipsetsize || length == 0)
3384 ubc_setsize(vp, length);
3385
3386 // have to loop truncating or growing files that are
3387 // really big because otherwise transactions can get
3388 // enormous and consume too many kernel resources.
3389
3390 if (length < filebytes) {
3391 while (filebytes > length) {
3392 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
3393 filebytes -= HFS_BIGFILE_SIZE;
3394 } else {
3395 filebytes = length;
3396 }
3397 cp->c_flag |= C_FORCEUPDATE;
3398 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
3399 if (error)
3400 break;
3401 }
3402 } else if (length > filebytes) {
3403 while (filebytes < length) {
3404 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
3405 filebytes += HFS_BIGFILE_SIZE;
3406 } else {
3407 filebytes = length;
3408 }
3409 cp->c_flag |= C_FORCEUPDATE;
3410 error = do_hfs_truncate(vp, filebytes, flags, skipupdate, context);
3411 if (error)
3412 break;
3413 }
3414 } else /* Same logical size */ {
3415
3416 error = do_hfs_truncate(vp, length, flags, skipupdate, context);
3417 }
3418 /* Files that are changing size are not hot file candidates. */
3419 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3420 fp->ff_bytesread = 0;
3421 }
3422
3423 return (error);
3424 }
3425
3426
3427
3428 /*
3429 * Preallocate file storage space.
3430 */
3431 int
3432 hfs_vnop_allocate(struct vnop_allocate_args /* {
3433 vnode_t a_vp;
3434 off_t a_length;
3435 u_int32_t a_flags;
3436 off_t *a_bytesallocated;
3437 off_t a_offset;
3438 vfs_context_t a_context;
3439 } */ *ap)
3440 {
3441 struct vnode *vp = ap->a_vp;
3442 struct cnode *cp;
3443 struct filefork *fp;
3444 ExtendedVCB *vcb;
3445 off_t length = ap->a_length;
3446 off_t startingPEOF;
3447 off_t moreBytesRequested;
3448 off_t actualBytesAdded;
3449 off_t filebytes;
3450 u_int32_t fileblocks;
3451 int retval, retval2;
3452 u_int32_t blockHint;
3453 u_int32_t extendFlags; /* For call to ExtendFileC */
3454 struct hfsmount *hfsmp;
3455 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
3456 int lockflags;
3457 time_t orig_ctime;
3458
3459 *(ap->a_bytesallocated) = 0;
3460
3461 if (!vnode_isreg(vp))
3462 return (EISDIR);
3463 if (length < (off_t)0)
3464 return (EINVAL);
3465
3466 cp = VTOC(vp);
3467
3468 orig_ctime = VTOC(vp)->c_ctime;
3469
3470 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
3471
3472 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
3473
3474 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3475 goto Err_Exit;
3476 }
3477
3478 fp = VTOF(vp);
3479 hfsmp = VTOHFS(vp);
3480 vcb = VTOVCB(vp);
3481
3482 fileblocks = fp->ff_blocks;
3483 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
3484
3485 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
3486 retval = EINVAL;
3487 goto Err_Exit;
3488 }
3489
3490 /* Fill in the flags word for the call to Extend the file */
3491
3492 extendFlags = kEFNoClumpMask;
3493 if (ap->a_flags & ALLOCATECONTIG)
3494 extendFlags |= kEFContigMask;
3495 if (ap->a_flags & ALLOCATEALL)
3496 extendFlags |= kEFAllMask;
3497 if (cred && suser(cred, NULL) != 0)
3498 extendFlags |= kEFReserveMask;
3499 if (hfs_virtualmetafile(cp))
3500 extendFlags |= kEFMetadataMask;
3501
3502 retval = E_NONE;
3503 blockHint = 0;
3504 startingPEOF = filebytes;
3505
3506 if (ap->a_flags & ALLOCATEFROMPEOF)
3507 length += filebytes;
3508 else if (ap->a_flags & ALLOCATEFROMVOL)
3509 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
3510
3511 /* If no changes are necesary, then we're done */
3512 if (filebytes == length)
3513 goto Std_Exit;
3514
3515 /*
3516 * Lengthen the size of the file. We must ensure that the
3517 * last byte of the file is allocated. Since the smallest
3518 * value of filebytes is 0, length will be at least 1.
3519 */
3520 if (length > filebytes) {
3521 off_t total_bytes_added = 0, orig_request_size;
3522
3523 orig_request_size = moreBytesRequested = length - filebytes;
3524
3525 #if QUOTA
3526 retval = hfs_chkdq(cp,
3527 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
3528 cred, 0);
3529 if (retval)
3530 goto Err_Exit;
3531
3532 #endif /* QUOTA */
3533 /*
3534 * Metadata zone checks.
3535 */
3536 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3537 /*
3538 * Allocate Journal and Quota files in metadata zone.
3539 */
3540 if (hfs_virtualmetafile(cp)) {
3541 blockHint = hfsmp->hfs_metazone_start;
3542 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
3543 (blockHint <= hfsmp->hfs_metazone_end)) {
3544 /*
3545 * Move blockHint outside metadata zone.
3546 */
3547 blockHint = hfsmp->hfs_metazone_end + 1;
3548 }
3549 }
3550
3551
3552 while ((length > filebytes) && (retval == E_NONE)) {
3553 off_t bytesRequested;
3554
3555 if (hfs_start_transaction(hfsmp) != 0) {
3556 retval = EINVAL;
3557 goto Err_Exit;
3558 }
3559
3560 /* Protect extents b-tree and allocation bitmap */
3561 lockflags = SFL_BITMAP;
3562 if (overflow_extents(fp))
3563 lockflags |= SFL_EXTENTS;
3564 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3565
3566 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
3567 bytesRequested = HFS_BIGFILE_SIZE;
3568 } else {
3569 bytesRequested = moreBytesRequested;
3570 }
3571
3572 if (extendFlags & kEFContigMask) {
3573 // if we're on a sparse device, this will force it to do a
3574 // full scan to find the space needed.
3575 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
3576 }
3577
3578 retval = MacToVFSError(ExtendFileC(vcb,
3579 (FCB*)fp,
3580 bytesRequested,
3581 blockHint,
3582 extendFlags,
3583 &actualBytesAdded));
3584
3585 if (retval == E_NONE) {
3586 *(ap->a_bytesallocated) += actualBytesAdded;
3587 total_bytes_added += actualBytesAdded;
3588 moreBytesRequested -= actualBytesAdded;
3589 if (blockHint != 0) {
3590 blockHint += actualBytesAdded / vcb->blockSize;
3591 }
3592 }
3593 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3594
3595 hfs_systemfile_unlock(hfsmp, lockflags);
3596
3597 if (hfsmp->jnl) {
3598 (void) hfs_update(vp, TRUE);
3599 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3600 }
3601
3602 hfs_end_transaction(hfsmp);
3603 }
3604
3605
3606 /*
3607 * if we get an error and no changes were made then exit
3608 * otherwise we must do the hfs_update to reflect the changes
3609 */
3610 if (retval && (startingPEOF == filebytes))
3611 goto Err_Exit;
3612
3613 /*
3614 * Adjust actualBytesAdded to be allocation block aligned, not
3615 * clump size aligned.
3616 * NOTE: So what we are reporting does not affect reality
3617 * until the file is closed, when we truncate the file to allocation
3618 * block size.
3619 */
3620 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
3621 *(ap->a_bytesallocated) =
3622 roundup(orig_request_size, (off_t)vcb->blockSize);
3623
3624 } else { /* Shorten the size of the file */
3625
3626 if (fp->ff_size > length) {
3627 /*
3628 * Any buffers that are past the truncation point need to be
3629 * invalidated (to maintain buffer cache consistency).
3630 */
3631 }
3632
3633 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
3634 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3635
3636 /*
3637 * if we get an error and no changes were made then exit
3638 * otherwise we must do the hfs_update to reflect the changes
3639 */
3640 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
3641 #if QUOTA
3642 /* These are bytesreleased */
3643 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
3644 #endif /* QUOTA */
3645
3646 if (fp->ff_size > filebytes) {
3647 fp->ff_size = filebytes;
3648
3649 hfs_unlock(cp);
3650 ubc_setsize(vp, fp->ff_size);
3651 hfs_lock(cp, HFS_FORCE_LOCK);
3652 }
3653 }
3654
3655 Std_Exit:
3656 cp->c_touch_chgtime = TRUE;
3657 cp->c_touch_modtime = TRUE;
3658 retval2 = hfs_update(vp, MNT_WAIT);
3659
3660 if (retval == 0)
3661 retval = retval2;
3662 Err_Exit:
3663 hfs_unlock_truncate(cp, 0);
3664 hfs_unlock(cp);
3665 return (retval);
3666 }
3667
3668
3669 /*
3670 * Pagein for HFS filesystem
3671 */
3672 int
3673 hfs_vnop_pagein(struct vnop_pagein_args *ap)
3674 /*
3675 struct vnop_pagein_args {
3676 vnode_t a_vp,
3677 upl_t a_pl,
3678 vm_offset_t a_pl_offset,
3679 off_t a_f_offset,
3680 size_t a_size,
3681 int a_flags
3682 vfs_context_t a_context;
3683 };
3684 */
3685 {
3686 vnode_t vp;
3687 struct cnode *cp;
3688 struct filefork *fp;
3689 int error = 0;
3690 upl_t upl;
3691 upl_page_info_t *pl;
3692 off_t f_offset;
3693 int offset;
3694 int isize;
3695 int pg_index;
3696 boolean_t truncate_lock_held = FALSE;
3697 boolean_t file_converted = FALSE;
3698 kern_return_t kret;
3699
3700 vp = ap->a_vp;
3701 cp = VTOC(vp);
3702 fp = VTOF(vp);
3703
3704 #if CONFIG_PROTECT
3705 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
3706 return error;
3707 }
3708 #endif /* CONFIG_PROTECT */
3709
3710 if (ap->a_pl != NULL) {
3711 /*
3712 * this can only happen for swap files now that
3713 * we're asking for V2 paging behavior...
3714 * so don't need to worry about decompression, or
3715 * keeping track of blocks read or taking the truncate lock
3716 */
3717 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
3718 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
3719 goto pagein_done;
3720 }
3721
3722 retry_pagein:
3723 /*
3724 * take truncate lock (shared/recursive) to guard against
3725 * zero-fill thru fsync interfering, but only for v2
3726 *
3727 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
3728 * lock shared and we are allowed to recurse 1 level if this thread already
3729 * owns the lock exclusively... this can legally occur
3730 * if we are doing a shrinking ftruncate against a file
3731 * that is mapped private, and the pages being truncated
3732 * do not currently exist in the cache... in that case
3733 * we will have to page-in the missing pages in order
3734 * to provide them to the private mapping... we must
3735 * also call hfs_unlock_truncate with a postive been_recursed
3736 * arg to indicate that if we have recursed, there is no need to drop
3737 * the lock. Allowing this simple recursion is necessary
3738 * in order to avoid a certain deadlock... since the ftruncate
3739 * already holds the truncate lock exclusively, if we try
3740 * to acquire it shared to protect the pagein path, we will
3741 * hang this thread
3742 *
3743 * NOTE: The if () block below is a workaround in order to prevent a
3744 * VM deadlock. See rdar://7853471.
3745 *
3746 * If we are in a forced unmount, then launchd will still have the
3747 * dyld_shared_cache file mapped as it is trying to reboot. If we
3748 * take the truncate lock here to service a page fault, then our
3749 * thread could deadlock with the forced-unmount. The forced unmount
3750 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
3751 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
3752 * thread will think it needs to copy all of the data out of the file
3753 * and into a VM copy object. If we hold the cnode lock here, then that
3754 * VM operation will not be able to proceed, because we'll set a busy page
3755 * before attempting to grab the lock. Note that this isn't as simple as "don't
3756 * call ubc_setsize" because doing that would just shift the problem to the
3757 * ubc_msync done before the vnode is reclaimed.
3758 *
3759 * So, if a forced unmount on this volume is in flight AND the cnode is
3760 * marked C_DELETED, then just go ahead and do the page in without taking
3761 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
3762 * that is not going to be available on the next mount, this seems like a
3763 * OK solution from a correctness point of view, even though it is hacky.
3764 */
3765 if (vfs_isforce(vp->v_mount)) {
3766 if (cp->c_flag & C_DELETED) {
3767 /* If we don't get it, then just go ahead and operate without the lock */
3768 truncate_lock_held = hfs_try_trunclock(cp, HFS_RECURSE_TRUNCLOCK);
3769 }
3770 }
3771 else {
3772 hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK);
3773 truncate_lock_held = TRUE;
3774 }
3775
3776 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
3777
3778 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
3779 error = EINVAL;
3780 goto pagein_done;
3781 }
3782 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
3783
3784 isize = ap->a_size;
3785
3786 /*
3787 * Scan from the back to find the last page in the UPL, so that we
3788 * aren't looking at a UPL that may have already been freed by the
3789 * preceding aborts/completions.
3790 */
3791 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
3792 if (upl_page_present(pl, --pg_index))
3793 break;
3794 if (pg_index == 0) {
3795 /*
3796 * no absent pages were found in the range specified
3797 * just abort the UPL to get rid of it and then we're done
3798 */
3799 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
3800 goto pagein_done;
3801 }
3802 }
3803 /*
3804 * initialize the offset variables before we touch the UPL.
3805 * f_offset is the position into the file, in bytes
3806 * offset is the position into the UPL, in bytes
3807 * pg_index is the pg# of the UPL we're operating on
3808 * isize is the offset into the UPL of the last page that is present.
3809 */
3810 isize = ((pg_index + 1) * PAGE_SIZE);
3811 pg_index = 0;
3812 offset = 0;
3813 f_offset = ap->a_f_offset;
3814
3815 while (isize) {
3816 int xsize;
3817 int num_of_pages;
3818
3819 if ( !upl_page_present(pl, pg_index)) {
3820 /*
3821 * we asked for RET_ONLY_ABSENT, so it's possible
3822 * to get back empty slots in the UPL.
3823 * just skip over them
3824 */
3825 f_offset += PAGE_SIZE;
3826 offset += PAGE_SIZE;
3827 isize -= PAGE_SIZE;
3828 pg_index++;
3829
3830 continue;
3831 }
3832 /*
3833 * We know that we have at least one absent page.
3834 * Now checking to see how many in a row we have
3835 */
3836 num_of_pages = 1;
3837 xsize = isize - PAGE_SIZE;
3838
3839 while (xsize) {
3840 if ( !upl_page_present(pl, pg_index + num_of_pages))
3841 break;
3842 num_of_pages++;
3843 xsize -= PAGE_SIZE;
3844 }
3845 xsize = num_of_pages * PAGE_SIZE;
3846
3847 #if HFS_COMPRESSION
3848 if (VNODE_IS_RSRC(vp)) {
3849 /* allow pageins of the resource fork */
3850 } else {
3851 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
3852
3853 if (compressed) {
3854 if (truncate_lock_held) {
3855 /*
3856 * can't hold the truncate lock when calling into the decmpfs layer
3857 * since it calls back into this layer... even though we're only
3858 * holding the lock in shared mode, and the re-entrant path only
3859 * takes the lock shared, we can deadlock if some other thread
3860 * tries to grab the lock exclusively in between.
3861 */
3862 hfs_unlock_truncate(cp, 1);
3863 truncate_lock_held = FALSE;
3864 }
3865 ap->a_pl = upl;
3866 ap->a_pl_offset = offset;
3867 ap->a_f_offset = f_offset;
3868 ap->a_size = xsize;
3869
3870 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
3871 /*
3872 * note that decpfs_pagein_compressed can change the state of
3873 * 'compressed'... it will set it to 0 if the file is no longer
3874 * compressed once the compression lock is successfully taken
3875 * i.e. we would block on that lock while the file is being inflated
3876 */
3877 if (compressed) {
3878 if (error == 0) {
3879 /* successful page-in, update the access time */
3880 VTOC(vp)->c_touch_acctime = TRUE;
3881
3882 /* compressed files are not hot file candidates */
3883 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3884 fp->ff_bytesread = 0;
3885 }
3886 } else if (error == EAGAIN) {
3887 /*
3888 * EAGAIN indicates someone else already holds the compression lock...
3889 * to avoid deadlocking, we'll abort this range of pages with an
3890 * indication that the pagein needs to be redriven
3891 */
3892 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
3893 }
3894 goto pagein_next_range;
3895 }
3896 else {
3897 /*
3898 * Set file_converted only if the file became decompressed while we were
3899 * paging in. If it were still compressed, we would re-start the loop using the goto
3900 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
3901 * condition below, since we could have avoided taking the truncate lock to prevent
3902 * a deadlock in the force unmount case.
3903 */
3904 file_converted = TRUE;
3905 }
3906 }
3907 if (file_converted == TRUE) {
3908 /*
3909 * the file was converted back to a regular file after we first saw it as compressed
3910 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
3911 * reset a_size so that we consider what remains of the original request
3912 * and null out a_upl and a_pl_offset.
3913 *
3914 * We should only be able to get into this block if the decmpfs_pagein_compressed
3915 * successfully decompressed the range in question for this file.
3916 */
3917 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
3918
3919 ap->a_size = isize;
3920 ap->a_pl = NULL;
3921 ap->a_pl_offset = 0;
3922
3923 /* Reset file_converted back to false so that we don't infinite-loop. */
3924 file_converted = FALSE;
3925 goto retry_pagein;
3926 }
3927 }
3928 #endif
3929 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
3930
3931 /*
3932 * Keep track of blocks read.
3933 */
3934 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
3935 int bytesread;
3936 int took_cnode_lock = 0;
3937
3938 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
3939 bytesread = fp->ff_size;
3940 else
3941 bytesread = xsize;
3942
3943 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
3944 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
3945 hfs_lock(cp, HFS_FORCE_LOCK);
3946 took_cnode_lock = 1;
3947 }
3948 /*
3949 * If this file hasn't been seen since the start of
3950 * the current sampling period then start over.
3951 */
3952 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
3953 struct timeval tv;
3954
3955 fp->ff_bytesread = bytesread;
3956 microtime(&tv);
3957 cp->c_atime = tv.tv_sec;
3958 } else {
3959 fp->ff_bytesread += bytesread;
3960 }
3961 cp->c_touch_acctime = TRUE;
3962 if (took_cnode_lock)
3963 hfs_unlock(cp);
3964 }
3965 pagein_next_range:
3966 f_offset += xsize;
3967 offset += xsize;
3968 isize -= xsize;
3969 pg_index += num_of_pages;
3970
3971 error = 0;
3972 }
3973
3974 pagein_done:
3975 if (truncate_lock_held == TRUE) {
3976 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
3977 hfs_unlock_truncate(cp, 1);
3978 }
3979
3980 return (error);
3981 }
3982
3983 /*
3984 * Pageout for HFS filesystem.
3985 */
3986 int
3987 hfs_vnop_pageout(struct vnop_pageout_args *ap)
3988 /*
3989 struct vnop_pageout_args {
3990 vnode_t a_vp,
3991 upl_t a_pl,
3992 vm_offset_t a_pl_offset,
3993 off_t a_f_offset,
3994 size_t a_size,
3995 int a_flags
3996 vfs_context_t a_context;
3997 };
3998 */
3999 {
4000 vnode_t vp = ap->a_vp;
4001 struct cnode *cp;
4002 struct filefork *fp;
4003 int retval = 0;
4004 off_t filesize;
4005 upl_t upl;
4006 upl_page_info_t* pl;
4007 vm_offset_t a_pl_offset;
4008 int a_flags;
4009 int is_pageoutv2 = 0;
4010 kern_return_t kret;
4011
4012 cp = VTOC(vp);
4013 fp = VTOF(vp);
4014
4015 /*
4016 * Figure out where the file ends, for pageout purposes. If
4017 * ff_new_size > ff_size, then we're in the middle of extending the
4018 * file via a write, so it is safe (and necessary) that we be able
4019 * to pageout up to that point.
4020 */
4021 filesize = fp->ff_size;
4022 if (fp->ff_new_size > filesize)
4023 filesize = fp->ff_new_size;
4024
4025 a_flags = ap->a_flags;
4026 a_pl_offset = ap->a_pl_offset;
4027
4028 /*
4029 * we can tell if we're getting the new or old behavior from the UPL
4030 */
4031 if ((upl = ap->a_pl) == NULL) {
4032 int request_flags;
4033
4034 is_pageoutv2 = 1;
4035 /*
4036 * we're in control of any UPL we commit
4037 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4038 */
4039 a_flags &= ~UPL_NOCOMMIT;
4040 a_pl_offset = 0;
4041
4042 /*
4043 * For V2 semantics, we want to take the cnode truncate lock
4044 * shared to guard against the file size changing via zero-filling.
4045 *
4046 * However, we have to be careful because we may be invoked
4047 * via the ubc_msync path to write out dirty mmap'd pages
4048 * in response to a lock event on a content-protected
4049 * filesystem (e.g. to write out class A files).
4050 * As a result, we want to take the truncate lock 'SHARED' with
4051 * the mini-recursion locktype so that we don't deadlock/panic
4052 * because we may be already holding the truncate lock exclusive to force any other
4053 * IOs to have blocked behind us.
4054 */
4055 hfs_lock_truncate(cp, HFS_RECURSE_TRUNCLOCK);
4056
4057 if (a_flags & UPL_MSYNC) {
4058 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4059 }
4060 else {
4061 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4062 }
4063
4064 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
4065
4066 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4067 retval = EINVAL;
4068 goto pageout_done;
4069 }
4070 }
4071 /*
4072 * from this point forward upl points at the UPL we're working with
4073 * it was either passed in or we succesfully created it
4074 */
4075
4076 /*
4077 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4078 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4079 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4080 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4081 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4082 * lock in HFS so that we don't lock invert ourselves.
4083 *
4084 * Note that we can still get into this function on behalf of the default pager with
4085 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4086 * since fsync and other writing threads will grab the locks, then mark the
4087 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4088 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4089 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4090 * by the paging/VM system.
4091 */
4092
4093 if (is_pageoutv2) {
4094 off_t f_offset;
4095 int offset;
4096 int isize;
4097 int pg_index;
4098 int error;
4099 int error_ret = 0;
4100
4101 isize = ap->a_size;
4102 f_offset = ap->a_f_offset;
4103
4104 /*
4105 * Scan from the back to find the last page in the UPL, so that we
4106 * aren't looking at a UPL that may have already been freed by the
4107 * preceding aborts/completions.
4108 */
4109 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4110 if (upl_page_present(pl, --pg_index))
4111 break;
4112 if (pg_index == 0) {
4113 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4114 goto pageout_done;
4115 }
4116 }
4117
4118 /*
4119 * initialize the offset variables before we touch the UPL.
4120 * a_f_offset is the position into the file, in bytes
4121 * offset is the position into the UPL, in bytes
4122 * pg_index is the pg# of the UPL we're operating on.
4123 * isize is the offset into the UPL of the last non-clean page.
4124 */
4125 isize = ((pg_index + 1) * PAGE_SIZE);
4126
4127 offset = 0;
4128 pg_index = 0;
4129
4130 while (isize) {
4131 int xsize;
4132 int num_of_pages;
4133
4134 if ( !upl_page_present(pl, pg_index)) {
4135 /*
4136 * we asked for RET_ONLY_DIRTY, so it's possible
4137 * to get back empty slots in the UPL.
4138 * just skip over them
4139 */
4140 f_offset += PAGE_SIZE;
4141 offset += PAGE_SIZE;
4142 isize -= PAGE_SIZE;
4143 pg_index++;
4144
4145 continue;
4146 }
4147 if ( !upl_dirty_page(pl, pg_index)) {
4148 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
4149 }
4150
4151 /*
4152 * We know that we have at least one dirty page.
4153 * Now checking to see how many in a row we have
4154 */
4155 num_of_pages = 1;
4156 xsize = isize - PAGE_SIZE;
4157
4158 while (xsize) {
4159 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
4160 break;
4161 num_of_pages++;
4162 xsize -= PAGE_SIZE;
4163 }
4164 xsize = num_of_pages * PAGE_SIZE;
4165
4166 if (!vnode_isswap(vp)) {
4167 off_t end_of_range;
4168 int tooklock;
4169
4170 tooklock = 0;
4171
4172 if (cp->c_lockowner != current_thread()) {
4173 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
4174 /*
4175 * we're in the v2 path, so we are the
4176 * owner of the UPL... we may have already
4177 * processed some of the UPL, so abort it
4178 * from the current working offset to the
4179 * end of the UPL
4180 */
4181 ubc_upl_abort_range(upl,
4182 offset,
4183 ap->a_size - offset,
4184 UPL_ABORT_FREE_ON_EMPTY);
4185 goto pageout_done;
4186 }
4187 tooklock = 1;
4188 }
4189 end_of_range = f_offset + xsize - 1;
4190
4191 if (end_of_range >= filesize) {
4192 end_of_range = (off_t)(filesize - 1);
4193 }
4194 if (f_offset < filesize) {
4195 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
4196 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4197 }
4198 if (tooklock) {
4199 hfs_unlock(cp);
4200 }
4201 }
4202 if ((error = cluster_pageout(vp, upl, offset, f_offset,
4203 xsize, filesize, a_flags))) {
4204 if (error_ret == 0)
4205 error_ret = error;
4206 }
4207 f_offset += xsize;
4208 offset += xsize;
4209 isize -= xsize;
4210 pg_index += num_of_pages;
4211 }
4212 /* capture errnos bubbled out of cluster_pageout if they occurred */
4213 if (error_ret != 0) {
4214 retval = error_ret;
4215 }
4216 } /* end block for v2 pageout behavior */
4217 else {
4218 if (!vnode_isswap(vp)) {
4219 off_t end_of_range;
4220 int tooklock = 0;
4221
4222 if (cp->c_lockowner != current_thread()) {
4223 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
4224 if (!(a_flags & UPL_NOCOMMIT)) {
4225 ubc_upl_abort_range(upl,
4226 a_pl_offset,
4227 ap->a_size,
4228 UPL_ABORT_FREE_ON_EMPTY);
4229 }
4230 goto pageout_done;
4231 }
4232 tooklock = 1;
4233 }
4234 end_of_range = ap->a_f_offset + ap->a_size - 1;
4235
4236 if (end_of_range >= filesize) {
4237 end_of_range = (off_t)(filesize - 1);
4238 }
4239 if (ap->a_f_offset < filesize) {
4240 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
4241 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4242 }
4243
4244 if (tooklock) {
4245 hfs_unlock(cp);
4246 }
4247 }
4248 /*
4249 * just call cluster_pageout for old pre-v2 behavior
4250 */
4251 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
4252 ap->a_size, filesize, a_flags);
4253 }
4254
4255 /*
4256 * If data was written, update the modification time of the file.
4257 * If setuid or setgid bits are set and this process is not the
4258 * superuser then clear the setuid and setgid bits as a precaution
4259 * against tampering.
4260 */
4261 if (retval == 0) {
4262 cp->c_touch_modtime = TRUE;
4263 cp->c_touch_chgtime = TRUE;
4264 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
4265 (vfs_context_suser(ap->a_context) != 0)) {
4266 hfs_lock(cp, HFS_FORCE_LOCK);
4267 cp->c_mode &= ~(S_ISUID | S_ISGID);
4268 hfs_unlock(cp);
4269 }
4270 }
4271
4272 pageout_done:
4273 if (is_pageoutv2) {
4274 /*
4275 * Release the truncate lock. Note that because
4276 * we may have taken the lock recursively by
4277 * being invoked via ubc_msync due to lockdown,
4278 * we should release it recursively, too.
4279 */
4280 hfs_unlock_truncate(cp, 1);
4281 }
4282 return (retval);
4283 }
4284
4285 /*
4286 * Intercept B-Tree node writes to unswap them if necessary.
4287 */
4288 int
4289 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
4290 {
4291 int retval = 0;
4292 register struct buf *bp = ap->a_bp;
4293 register struct vnode *vp = buf_vnode(bp);
4294 BlockDescriptor block;
4295
4296 /* Trap B-Tree writes */
4297 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
4298 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
4299 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
4300 (vp == VTOHFS(vp)->hfc_filevp)) {
4301
4302 /*
4303 * Swap and validate the node if it is in native byte order.
4304 * This is always be true on big endian, so we always validate
4305 * before writing here. On little endian, the node typically has
4306 * been swapped and validated when it was written to the journal,
4307 * so we won't do anything here.
4308 */
4309 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
4310 /* Prepare the block pointer */
4311 block.blockHeader = bp;
4312 block.buffer = (char *)buf_dataptr(bp);
4313 block.blockNum = buf_lblkno(bp);
4314 /* not found in cache ==> came from disk */
4315 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
4316 block.blockSize = buf_count(bp);
4317
4318 /* Endian un-swap B-Tree node */
4319 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
4320 if (retval)
4321 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
4322 }
4323 }
4324
4325 /* This buffer shouldn't be locked anymore but if it is clear it */
4326 if ((buf_flags(bp) & B_LOCKED)) {
4327 // XXXdbg
4328 if (VTOHFS(vp)->jnl) {
4329 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
4330 }
4331 buf_clearflags(bp, B_LOCKED);
4332 }
4333 retval = vn_bwrite (ap);
4334
4335 return (retval);
4336 }
4337
4338 /*
4339 * Relocate a file to a new location on disk
4340 * cnode must be locked on entry
4341 *
4342 * Relocation occurs by cloning the file's data from its
4343 * current set of blocks to a new set of blocks. During
4344 * the relocation all of the blocks (old and new) are
4345 * owned by the file.
4346 *
4347 * -----------------
4348 * |///////////////|
4349 * -----------------
4350 * 0 N (file offset)
4351 *
4352 * ----------------- -----------------
4353 * |///////////////| | | STEP 1 (acquire new blocks)
4354 * ----------------- -----------------
4355 * 0 N N+1 2N
4356 *
4357 * ----------------- -----------------
4358 * |///////////////| |///////////////| STEP 2 (clone data)
4359 * ----------------- -----------------
4360 * 0 N N+1 2N
4361 *
4362 * -----------------
4363 * |///////////////| STEP 3 (head truncate blocks)
4364 * -----------------
4365 * 0 N
4366 *
4367 * During steps 2 and 3 page-outs to file offsets less
4368 * than or equal to N are suspended.
4369 *
4370 * During step 3 page-ins to the file get suspended.
4371 */
4372 int
4373 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
4374 struct proc *p)
4375 {
4376 struct cnode *cp;
4377 struct filefork *fp;
4378 struct hfsmount *hfsmp;
4379 u_int32_t headblks;
4380 u_int32_t datablks;
4381 u_int32_t blksize;
4382 u_int32_t growsize;
4383 u_int32_t nextallocsave;
4384 daddr64_t sector_a, sector_b;
4385 int eflags;
4386 off_t newbytes;
4387 int retval;
4388 int lockflags = 0;
4389 int took_trunc_lock = 0;
4390 int started_tr = 0;
4391 enum vtype vnodetype;
4392
4393 vnodetype = vnode_vtype(vp);
4394 if (vnodetype != VREG && vnodetype != VLNK) {
4395 return (EPERM);
4396 }
4397
4398 hfsmp = VTOHFS(vp);
4399 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
4400 return (ENOSPC);
4401 }
4402
4403 cp = VTOC(vp);
4404 fp = VTOF(vp);
4405 if (fp->ff_unallocblocks)
4406 return (EINVAL);
4407
4408 #if CONFIG_PROTECT
4409 /*
4410 * <rdar://problem/9118426>
4411 * Disable HFS file relocation on content-protected filesystems
4412 */
4413 if (cp_fs_protected (hfsmp->hfs_mp)) {
4414 return EINVAL;
4415 }
4416 #endif
4417 /* If it's an SSD, also disable HFS relocation */
4418 if (hfsmp->hfs_flags & HFS_SSD) {
4419 return EINVAL;
4420 }
4421
4422
4423 blksize = hfsmp->blockSize;
4424 if (blockHint == 0)
4425 blockHint = hfsmp->nextAllocation;
4426
4427 if ((fp->ff_size > 0x7fffffff) ||
4428 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
4429 return (EFBIG);
4430 }
4431
4432 //
4433 // We do not believe that this call to hfs_fsync() is
4434 // necessary and it causes a journal transaction
4435 // deadlock so we are removing it.
4436 //
4437 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4438 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4439 // if (retval)
4440 // return (retval);
4441 //}
4442
4443 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
4444 hfs_unlock(cp);
4445 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK);
4446 /* Force lock since callers expects lock to be held. */
4447 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
4448 hfs_unlock_truncate(cp, 0);
4449 return (retval);
4450 }
4451 /* No need to continue if file was removed. */
4452 if (cp->c_flag & C_NOEXISTS) {
4453 hfs_unlock_truncate(cp, 0);
4454 return (ENOENT);
4455 }
4456 took_trunc_lock = 1;
4457 }
4458 headblks = fp->ff_blocks;
4459 datablks = howmany(fp->ff_size, blksize);
4460 growsize = datablks * blksize;
4461 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
4462 if (blockHint >= hfsmp->hfs_metazone_start &&
4463 blockHint <= hfsmp->hfs_metazone_end)
4464 eflags |= kEFMetadataMask;
4465
4466 if (hfs_start_transaction(hfsmp) != 0) {
4467 if (took_trunc_lock)
4468 hfs_unlock_truncate(cp, 0);
4469 return (EINVAL);
4470 }
4471 started_tr = 1;
4472 /*
4473 * Protect the extents b-tree and the allocation bitmap
4474 * during MapFileBlockC and ExtendFileC operations.
4475 */
4476 lockflags = SFL_BITMAP;
4477 if (overflow_extents(fp))
4478 lockflags |= SFL_EXTENTS;
4479 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4480
4481 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
4482 if (retval) {
4483 retval = MacToVFSError(retval);
4484 goto out;
4485 }
4486
4487 /*
4488 * STEP 1 - acquire new allocation blocks.
4489 */
4490 nextallocsave = hfsmp->nextAllocation;
4491 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
4492 if (eflags & kEFMetadataMask) {
4493 HFS_MOUNT_LOCK(hfsmp, TRUE);
4494 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
4495 MarkVCBDirty(hfsmp);
4496 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
4497 }
4498
4499 retval = MacToVFSError(retval);
4500 if (retval == 0) {
4501 cp->c_flag |= C_MODIFIED;
4502 if (newbytes < growsize) {
4503 retval = ENOSPC;
4504 goto restore;
4505 } else if (fp->ff_blocks < (headblks + datablks)) {
4506 printf("hfs_relocate: allocation failed");
4507 retval = ENOSPC;
4508 goto restore;
4509 }
4510
4511 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
4512 if (retval) {
4513 retval = MacToVFSError(retval);
4514 } else if ((sector_a + 1) == sector_b) {
4515 retval = ENOSPC;
4516 goto restore;
4517 } else if ((eflags & kEFMetadataMask) &&
4518 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
4519 hfsmp->hfs_metazone_end)) {
4520 #if 0
4521 const char * filestr;
4522 char emptystr = '\0';
4523
4524 if (cp->c_desc.cd_nameptr != NULL) {
4525 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
4526 } else if (vnode_name(vp) != NULL) {
4527 filestr = vnode_name(vp);
4528 } else {
4529 filestr = &emptystr;
4530 }
4531 #endif
4532 retval = ENOSPC;
4533 goto restore;
4534 }
4535 }
4536 /* Done with system locks and journal for now. */
4537 hfs_systemfile_unlock(hfsmp, lockflags);
4538 lockflags = 0;
4539 hfs_end_transaction(hfsmp);
4540 started_tr = 0;
4541
4542 if (retval) {
4543 /*
4544 * Check to see if failure is due to excessive fragmentation.
4545 */
4546 if ((retval == ENOSPC) &&
4547 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
4548 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
4549 }
4550 goto out;
4551 }
4552 /*
4553 * STEP 2 - clone file data into the new allocation blocks.
4554 */
4555
4556 if (vnodetype == VLNK)
4557 retval = hfs_clonelink(vp, blksize, cred, p);
4558 else if (vnode_issystem(vp))
4559 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
4560 else
4561 retval = hfs_clonefile(vp, headblks, datablks, blksize);
4562
4563 /* Start transaction for step 3 or for a restore. */
4564 if (hfs_start_transaction(hfsmp) != 0) {
4565 retval = EINVAL;
4566 goto out;
4567 }
4568 started_tr = 1;
4569 if (retval)
4570 goto restore;
4571
4572 /*
4573 * STEP 3 - switch to cloned data and remove old blocks.
4574 */
4575 lockflags = SFL_BITMAP;
4576 if (overflow_extents(fp))
4577 lockflags |= SFL_EXTENTS;
4578 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4579
4580 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
4581
4582 hfs_systemfile_unlock(hfsmp, lockflags);
4583 lockflags = 0;
4584 if (retval)
4585 goto restore;
4586 out:
4587 if (took_trunc_lock)
4588 hfs_unlock_truncate(cp, 0);
4589
4590 if (lockflags) {
4591 hfs_systemfile_unlock(hfsmp, lockflags);
4592 lockflags = 0;
4593 }
4594
4595 /* Push cnode's new extent data to disk. */
4596 if (retval == 0) {
4597 (void) hfs_update(vp, MNT_WAIT);
4598 }
4599 if (hfsmp->jnl) {
4600 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
4601 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4602 else
4603 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
4604 }
4605 exit:
4606 if (started_tr)
4607 hfs_end_transaction(hfsmp);
4608
4609 return (retval);
4610
4611 restore:
4612 if (fp->ff_blocks == headblks) {
4613 if (took_trunc_lock)
4614 hfs_unlock_truncate(cp, 0);
4615 goto exit;
4616 }
4617 /*
4618 * Give back any newly allocated space.
4619 */
4620 if (lockflags == 0) {
4621 lockflags = SFL_BITMAP;
4622 if (overflow_extents(fp))
4623 lockflags |= SFL_EXTENTS;
4624 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4625 }
4626
4627 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
4628 FTOC(fp)->c_fileid, false);
4629
4630 hfs_systemfile_unlock(hfsmp, lockflags);
4631 lockflags = 0;
4632
4633 if (took_trunc_lock)
4634 hfs_unlock_truncate(cp, 0);
4635 goto exit;
4636 }
4637
4638
4639 /*
4640 * Clone a symlink.
4641 *
4642 */
4643 static int
4644 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
4645 {
4646 struct buf *head_bp = NULL;
4647 struct buf *tail_bp = NULL;
4648 int error;
4649
4650
4651 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
4652 if (error)
4653 goto out;
4654
4655 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
4656 if (tail_bp == NULL) {
4657 error = EIO;
4658 goto out;
4659 }
4660 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
4661 error = (int)buf_bwrite(tail_bp);
4662 out:
4663 if (head_bp) {
4664 buf_markinvalid(head_bp);
4665 buf_brelse(head_bp);
4666 }
4667 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
4668
4669 return (error);
4670 }
4671
4672 /*
4673 * Clone a file's data within the file.
4674 *
4675 */
4676 static int
4677 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
4678 {
4679 caddr_t bufp;
4680 size_t bufsize;
4681 size_t copysize;
4682 size_t iosize;
4683 size_t offset;
4684 off_t writebase;
4685 uio_t auio;
4686 int error = 0;
4687
4688 writebase = blkstart * blksize;
4689 copysize = blkcnt * blksize;
4690 iosize = bufsize = MIN(copysize, 128 * 1024);
4691 offset = 0;
4692
4693 hfs_unlock(VTOC(vp));
4694
4695 #if CONFIG_PROTECT
4696 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
4697 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
4698 return (error);
4699 }
4700 #endif /* CONFIG_PROTECT */
4701
4702 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
4703 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
4704 return (ENOMEM);
4705 }
4706
4707 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
4708
4709 while (offset < copysize) {
4710 iosize = MIN(copysize - offset, iosize);
4711
4712 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
4713 uio_addiov(auio, (uintptr_t)bufp, iosize);
4714
4715 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
4716 if (error) {
4717 printf("hfs_clonefile: cluster_read failed - %d\n", error);
4718 break;
4719 }
4720 if (uio_resid(auio) != 0) {
4721 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
4722 error = EIO;
4723 break;
4724 }
4725
4726 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
4727 uio_addiov(auio, (uintptr_t)bufp, iosize);
4728
4729 error = cluster_write(vp, auio, writebase + offset,
4730 writebase + offset + iosize,
4731 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
4732 if (error) {
4733 printf("hfs_clonefile: cluster_write failed - %d\n", error);
4734 break;
4735 }
4736 if (uio_resid(auio) != 0) {
4737 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4738 error = EIO;
4739 break;
4740 }
4741 offset += iosize;
4742 }
4743 uio_free(auio);
4744
4745 if ((blksize & PAGE_MASK)) {
4746 /*
4747 * since the copy may not have started on a PAGE
4748 * boundary (or may not have ended on one), we
4749 * may have pages left in the cache since NOCACHE
4750 * will let partially written pages linger...
4751 * lets just flush the entire range to make sure
4752 * we don't have any pages left that are beyond
4753 * (or intersect) the real LEOF of this file
4754 */
4755 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
4756 } else {
4757 /*
4758 * No need to call ubc_sync_range or hfs_invalbuf
4759 * since the file was copied using IO_NOCACHE and
4760 * the copy was done starting and ending on a page
4761 * boundary in the file.
4762 */
4763 }
4764 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
4765
4766 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
4767 return (error);
4768 }
4769
4770 /*
4771 * Clone a system (metadata) file.
4772 *
4773 */
4774 static int
4775 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
4776 kauth_cred_t cred, struct proc *p)
4777 {
4778 caddr_t bufp;
4779 char * offset;
4780 size_t bufsize;
4781 size_t iosize;
4782 struct buf *bp = NULL;
4783 daddr64_t blkno;
4784 daddr64_t blk;
4785 daddr64_t start_blk;
4786 daddr64_t last_blk;
4787 int breadcnt;
4788 int i;
4789 int error = 0;
4790
4791
4792 iosize = GetLogicalBlockSize(vp);
4793 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
4794 breadcnt = bufsize / iosize;
4795
4796 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
4797 return (ENOMEM);
4798 }
4799 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
4800 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
4801 blkno = 0;
4802
4803 while (blkno < last_blk) {
4804 /*
4805 * Read up to a megabyte
4806 */
4807 offset = bufp;
4808 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
4809 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
4810 if (error) {
4811 printf("hfs_clonesysfile: meta_bread error %d\n", error);
4812 goto out;
4813 }
4814 if (buf_count(bp) != iosize) {
4815 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
4816 goto out;
4817 }
4818 bcopy((char *)buf_dataptr(bp), offset, iosize);
4819
4820 buf_markinvalid(bp);
4821 buf_brelse(bp);
4822 bp = NULL;
4823
4824 offset += iosize;
4825 }
4826
4827 /*
4828 * Write up to a megabyte
4829 */
4830 offset = bufp;
4831 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
4832 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
4833 if (bp == NULL) {
4834 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
4835 error = EIO;
4836 goto out;
4837 }
4838 bcopy(offset, (char *)buf_dataptr(bp), iosize);
4839 error = (int)buf_bwrite(bp);
4840 bp = NULL;
4841 if (error)
4842 goto out;
4843 offset += iosize;
4844 }
4845 }
4846 out:
4847 if (bp) {
4848 buf_brelse(bp);
4849 }
4850
4851 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
4852
4853 error = hfs_fsync(vp, MNT_WAIT, 0, p);
4854
4855 return (error);
4856 }