]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_readwrite.c
xnu-2782.30.5.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
CommitLineData
1c79356b 1/*
04b8595b 2 * Copyright (c) 2000-2015 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* @(#)hfs_readwrite.c 1.0
29 *
9bccf70c 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
1c79356b 31 *
1c79356b
A
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
1c79356b
A
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/resourcevar.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
55e303ae 41#include <sys/filedesc.h>
1c79356b
A
42#include <sys/stat.h>
43#include <sys/buf.h>
316670eb 44#include <sys/buf_internal.h>
1c79356b 45#include <sys/proc.h>
91447636 46#include <sys/kauth.h>
1c79356b 47#include <sys/vnode.h>
2d21ac55 48#include <sys/vnode_internal.h>
1c79356b 49#include <sys/uio.h>
91447636 50#include <sys/vfs_context.h>
2d21ac55
A
51#include <sys/fsevents.h>
52#include <kern/kalloc.h>
8f6c56a5
A
53#include <sys/disk.h>
54#include <sys/sysctl.h>
b0d623f7 55#include <sys/fsctl.h>
316670eb 56#include <sys/mount_internal.h>
22ba694c 57#include <sys/file_internal.h>
1c79356b
A
58
59#include <miscfs/specfs/specdev.h>
60
1c79356b 61#include <sys/ubc.h>
2d21ac55
A
62#include <sys/ubc_internal.h>
63
1c79356b 64#include <vm/vm_pageout.h>
91447636 65#include <vm/vm_kern.h>
1c79356b 66
1c79356b
A
67#include <sys/kdebug.h>
68
69#include "hfs.h"
2d21ac55 70#include "hfs_attrlist.h"
1c79356b 71#include "hfs_endian.h"
2d21ac55 72#include "hfs_fsctl.h"
9bccf70c 73#include "hfs_quota.h"
1c79356b
A
74#include "hfscommon/headers/FileMgrInternal.h"
75#include "hfscommon/headers/BTreesInternal.h"
9bccf70c
A
76#include "hfs_cnode.h"
77#include "hfs_dbg.h"
1c79356b 78
1c79356b
A
79#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
80
81enum {
82 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
83};
84
935ed37a 85/* from bsd/hfs/hfs_vfsops.c */
b0d623f7 86extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
91447636 87
91447636
A
88static int hfs_clonefile(struct vnode *, int, int, int);
89static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
b0d623f7
A
90static int hfs_minorupdate(struct vnode *vp);
91static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
92
39236c6e
A
93/* from bsd/hfs/hfs_vnops.c */
94extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
95
96
55e303ae 97
8f6c56a5 98int flush_cache_on_write = 0;
6d2010ae 99SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
8f6c56a5 100
91447636
A
101/*
102 * Read data from a file.
103 */
1c79356b 104int
91447636 105hfs_vnop_read(struct vnop_read_args *ap)
1c79356b 106{
316670eb
A
107 /*
108 struct vnop_read_args {
109 struct vnodeop_desc *a_desc;
110 vnode_t a_vp;
111 struct uio *a_uio;
112 int a_ioflag;
113 vfs_context_t a_context;
114 };
115 */
116
91447636
A
117 uio_t uio = ap->a_uio;
118 struct vnode *vp = ap->a_vp;
9bccf70c
A
119 struct cnode *cp;
120 struct filefork *fp;
91447636
A
121 struct hfsmount *hfsmp;
122 off_t filesize;
123 off_t filebytes;
124 off_t start_resid = uio_resid(uio);
125 off_t offset = uio_offset(uio);
9bccf70c 126 int retval = 0;
6d2010ae 127 int took_truncate_lock = 0;
316670eb 128 int io_throttle = 0;
fe8ab488 129 int throttled_count = 0;
55e303ae 130
9bccf70c 131 /* Preflight checks */
91447636
A
132 if (!vnode_isreg(vp)) {
133 /* can only read regular files */
134 if (vnode_isdir(vp))
135 return (EISDIR);
136 else
137 return (EPERM);
138 }
139 if (start_resid == 0)
9bccf70c 140 return (0); /* Nothing left to do */
91447636 141 if (offset < 0)
9bccf70c 142 return (EINVAL); /* cant read from a negative offset */
fe8ab488
A
143
144 if ((ap->a_ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
145 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
146 /* Don't allow unencrypted io request from user space */
147 return EPERM;
148 }
149
150
39236c6e 151
b0d623f7
A
152#if HFS_COMPRESSION
153 if (VNODE_IS_RSRC(vp)) {
154 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
155 return 0;
156 }
157 /* otherwise read the resource fork normally */
158 } else {
159 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
160 if (compressed) {
161 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
162 if (compressed) {
163 if (retval == 0) {
164 /* successful read, update the access time */
165 VTOC(vp)->c_touch_acctime = TRUE;
166
167 /* compressed files are not hot file candidates */
168 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
169 VTOF(vp)->ff_bytesread = 0;
170 }
171 }
172 return retval;
173 }
174 /* otherwise the file was converted back to a regular file while we were reading it */
175 retval = 0;
316670eb 176 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
6d2010ae
A
177 int error;
178
179 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
180 if (error) {
181 return error;
182 }
183
b0d623f7
A
184 }
185 }
186#endif /* HFS_COMPRESSION */
9bccf70c
A
187
188 cp = VTOC(vp);
189 fp = VTOF(vp);
91447636
A
190 hfsmp = VTOHFS(vp);
191
6d2010ae 192#if CONFIG_PROTECT
316670eb 193 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
6d2010ae
A
194 goto exit;
195 }
196#endif
197
316670eb
A
198 /*
199 * If this read request originated from a syscall (as opposed to
200 * an in-kernel page fault or something), then set it up for
39236c6e 201 * throttle checks
316670eb
A
202 */
203 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
204 io_throttle = IO_RETURN_ON_THROTTLE;
205 }
206
207read_again:
208
91447636 209 /* Protect against a size change. */
39236c6e 210 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
6d2010ae 211 took_truncate_lock = 1;
91447636 212
9bccf70c 213 filesize = fp->ff_size;
91447636 214 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
fe8ab488
A
215
216 /*
217 * Check the file size. Note that per POSIX spec, we return 0 at
218 * file EOF, so attempting a read at an offset that is too big
219 * should just return 0 on HFS+. Since the return value was initialized
220 * to 0 above, we just jump to exit. HFS Standard has its own behavior.
221 */
91447636
A
222 if (offset > filesize) {
223 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
224 (offset > (off_t)MAXHFSFILESIZE)) {
225 retval = EFBIG;
226 }
227 goto exit;
9bccf70c 228 }
1c79356b 229
fe8ab488 230 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_START,
91447636 231 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 232
39236c6e 233 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
1c79356b 234
91447636 235 cp->c_touch_acctime = TRUE;
1c79356b 236
fe8ab488 237 KERNEL_DEBUG(HFSDBG_READ | DBG_FUNC_END,
91447636 238 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 239
55e303ae
A
240 /*
241 * Keep track blocks read
242 */
2d21ac55 243 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
91447636
A
244 int took_cnode_lock = 0;
245 off_t bytesread;
246
247 bytesread = start_resid - uio_resid(uio);
248
249 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
250 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
39236c6e 251 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636
A
252 took_cnode_lock = 1;
253 }
55e303ae
A
254 /*
255 * If this file hasn't been seen since the start of
256 * the current sampling period then start over.
257 */
2d21ac55 258 if (cp->c_atime < hfsmp->hfc_timebase) {
91447636
A
259 struct timeval tv;
260
261 fp->ff_bytesread = bytesread;
262 microtime(&tv);
263 cp->c_atime = tv.tv_sec;
55e303ae 264 } else {
91447636 265 fp->ff_bytesread += bytesread;
55e303ae 266 }
91447636
A
267 if (took_cnode_lock)
268 hfs_unlock(cp);
55e303ae 269 }
91447636 270exit:
6d2010ae 271 if (took_truncate_lock) {
39236c6e 272 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
6d2010ae 273 }
316670eb
A
274 if (retval == EAGAIN) {
275 throttle_lowpri_io(1);
fe8ab488 276 throttled_count++;
6d2010ae 277
316670eb
A
278 retval = 0;
279 goto read_again;
280 }
fe8ab488
A
281 if (throttled_count) {
282 throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread()));
283 }
9bccf70c 284 return (retval);
1c79356b
A
285}
286
287/*
91447636
A
288 * Write data to a file.
289 */
1c79356b 290int
91447636 291hfs_vnop_write(struct vnop_write_args *ap)
1c79356b 292{
91447636 293 uio_t uio = ap->a_uio;
9bccf70c 294 struct vnode *vp = ap->a_vp;
9bccf70c
A
295 struct cnode *cp;
296 struct filefork *fp;
91447636
A
297 struct hfsmount *hfsmp;
298 kauth_cred_t cred = NULL;
299 off_t origFileSize;
300 off_t writelimit;
2d21ac55 301 off_t bytesToAdd = 0;
55e303ae 302 off_t actualBytesAdded;
9bccf70c 303 off_t filebytes;
91447636 304 off_t offset;
b0d623f7 305 ssize_t resid;
91447636
A
306 int eflags;
307 int ioflag = ap->a_ioflag;
308 int retval = 0;
309 int lockflags;
310 int cnode_locked = 0;
2d21ac55 311 int partialwrite = 0;
6d2010ae
A
312 int do_snapshot = 1;
313 time_t orig_ctime=VTOC(vp)->c_ctime;
314 int took_truncate_lock = 0;
316670eb 315 int io_return_on_throttle = 0;
fe8ab488 316 int throttled_count = 0;
7ddcb079 317 struct rl_entry *invalid_range;
1c79356b 318
b0d623f7
A
319#if HFS_COMPRESSION
320 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
321 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
322 switch(state) {
323 case FILE_IS_COMPRESSED:
324 return EACCES;
325 case FILE_IS_CONVERTING:
6d2010ae
A
326 /* if FILE_IS_CONVERTING, we allow writes but do not
327 bother with snapshots or else we will deadlock.
328 */
329 do_snapshot = 0;
b0d623f7
A
330 break;
331 default:
332 printf("invalid state %d for compressed file\n", state);
333 /* fall through */
334 }
316670eb 335 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
6d2010ae
A
336 int error;
337
338 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
339 if (error != 0) {
340 return error;
341 }
b0d623f7 342 }
6d2010ae
A
343
344 if (do_snapshot) {
345 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
346 }
347
b0d623f7
A
348#endif
349
fe8ab488
A
350 if ((ioflag & (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) ==
351 (IO_SKIP_ENCRYPTION|IO_SYSCALL_DISPATCH)) {
352 /* Don't allow unencrypted io request from user space */
353 return EPERM;
354 }
355
356
91447636
A
357 resid = uio_resid(uio);
358 offset = uio_offset(uio);
1c79356b 359
91447636 360 if (offset < 0)
9bccf70c 361 return (EINVAL);
91447636 362 if (resid == 0)
9bccf70c 363 return (E_NONE);
91447636
A
364 if (!vnode_isreg(vp))
365 return (EPERM); /* Can only write regular files */
366
9bccf70c
A
367 cp = VTOC(vp);
368 fp = VTOF(vp);
91447636 369 hfsmp = VTOHFS(vp);
b4c24cb9 370
6d2010ae 371#if CONFIG_PROTECT
316670eb 372 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
6d2010ae
A
373 goto exit;
374 }
375#endif
376
9bccf70c 377 eflags = kEFDeferMask; /* defer file block allocations */
6d2010ae 378#if HFS_SPARSE_DEV
55e303ae
A
379 /*
380 * When the underlying device is sparse and space
381 * is low (< 8MB), stop doing delayed allocations
382 * and begin doing synchronous I/O.
383 */
384 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
385 (hfs_freeblks(hfsmp, 0) < 2048)) {
386 eflags &= ~kEFDeferMask;
387 ioflag |= IO_SYNC;
388 }
389#endif /* HFS_SPARSE_DEV */
390
39236c6e
A
391 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
392 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
316670eb
A
393 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
394 }
39236c6e 395
2d21ac55 396again:
7ddcb079
A
397 /*
398 * Protect against a size change.
399 *
400 * Note: If took_truncate_lock is true, then we previously got the lock shared
401 * but needed to upgrade to exclusive. So try getting it exclusive from the
402 * start.
403 */
404 if (ioflag & IO_APPEND || took_truncate_lock) {
39236c6e 405 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
6d2010ae
A
406 }
407 else {
39236c6e 408 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
6d2010ae
A
409 }
410 took_truncate_lock = 1;
91447636 411
6d2010ae 412 /* Update UIO */
2d21ac55
A
413 if (ioflag & IO_APPEND) {
414 uio_setoffset(uio, fp->ff_size);
415 offset = fp->ff_size;
416 }
316670eb 417 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
2d21ac55
A
418 retval = EPERM;
419 goto exit;
420 }
91447636 421
2d21ac55 422 origFileSize = fp->ff_size;
91447636 423 writelimit = offset + resid;
2d21ac55
A
424 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
425
7ddcb079
A
426 /*
427 * We may need an exclusive truncate lock for several reasons, all
428 * of which are because we may be writing to a (portion of a) block
429 * for the first time, and we need to make sure no readers see the
430 * prior, uninitialized contents of the block. The cases are:
431 *
432 * 1. We have unallocated (delayed allocation) blocks. We may be
433 * allocating new blocks to the file and writing to them.
434 * (A more precise check would be whether the range we're writing
435 * to contains delayed allocation blocks.)
436 * 2. We need to extend the file. The bytes between the old EOF
437 * and the new EOF are not yet initialized. This is important
438 * even if we're not allocating new blocks to the file. If the
439 * old EOF and new EOF are in the same block, we still need to
440 * protect that range of bytes until they are written for the
441 * first time.
442 * 3. The write overlaps some invalid ranges (delayed zero fill; that
443 * part of the file has been allocated, but not yet written).
444 *
445 * If we had a shared lock with the above cases, we need to try to upgrade
446 * to an exclusive lock. If the upgrade fails, we will lose the shared
447 * lock, and will need to take the truncate lock again; the took_truncate_lock
448 * flag will still be set, causing us to try for an exclusive lock next time.
449 *
450 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
451 * lock is held, since it protects the range lists.
2d21ac55 452 */
6d2010ae 453 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
7ddcb079
A
454 ((fp->ff_unallocblocks != 0) ||
455 (writelimit > origFileSize))) {
2d21ac55 456 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
7ddcb079
A
457 /*
458 * Lock upgrade failed and we lost our shared lock, try again.
459 * Note: we do not set took_truncate_lock=0 here. Leaving it
460 * set to 1 will cause us to try to get the lock exclusive.
461 */
2d21ac55
A
462 goto again;
463 }
6d2010ae
A
464 else {
465 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
466 cp->c_truncatelockowner = current_thread();
467 }
2d21ac55
A
468 }
469
39236c6e 470 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2d21ac55
A
471 goto exit;
472 }
473 cnode_locked = 1;
474
7ddcb079
A
475 /*
476 * Now that we have the cnode lock, see if there are delayed zero fill ranges
477 * overlapping our write. If so, we need the truncate lock exclusive (see above).
478 */
479 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
480 (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
481 /*
482 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
483 * a deadlock, rather than simply returning failure. (That is, it apparently does
484 * not behave like a "try_lock"). Since this condition is rare, just drop the
485 * cnode lock and try again. Since took_truncate_lock is set, we will
486 * automatically take the truncate lock exclusive.
487 */
488 hfs_unlock(cp);
489 cnode_locked = 0;
39236c6e 490 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
7ddcb079 491 goto again;
2d21ac55 492 }
7ddcb079 493
fe8ab488 494 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_START,
7ddcb079
A
495 (int)offset, uio_resid(uio), (int)fp->ff_size,
496 (int)filebytes, 0);
2d21ac55
A
497
498 /* Check if we do not need to extend the file */
499 if (writelimit <= filebytes) {
91447636 500 goto sizeok;
2d21ac55 501 }
91447636
A
502
503 cred = vfs_context_ucred(ap->a_context);
91447636 504 bytesToAdd = writelimit - filebytes;
2d21ac55
A
505
506#if QUOTA
91447636
A
507 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
508 cred, 0);
509 if (retval)
510 goto exit;
511#endif /* QUOTA */
512
513 if (hfs_start_transaction(hfsmp) != 0) {
514 retval = EINVAL;
515 goto exit;
b4c24cb9
A
516 }
517
9bccf70c 518 while (writelimit > filebytes) {
9bccf70c 519 bytesToAdd = writelimit - filebytes;
91447636 520 if (cred && suser(cred, NULL) != 0)
9bccf70c
A
521 eflags |= kEFReserveMask;
522
91447636
A
523 /* Protect extents b-tree and allocation bitmap */
524 lockflags = SFL_BITMAP;
525 if (overflow_extents(fp))
526 lockflags |= SFL_EXTENTS;
527 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae
A
528
529 /* Files that are changing size are not hot file candidates. */
530 if (hfsmp->hfc_stage == HFC_RECORDING) {
531 fp->ff_bytesread = 0;
532 }
91447636 533 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
9bccf70c
A
534 0, eflags, &actualBytesAdded));
535
91447636
A
536 hfs_systemfile_unlock(hfsmp, lockflags);
537
9bccf70c
A
538 if ((actualBytesAdded == 0) && (retval == E_NONE))
539 retval = ENOSPC;
540 if (retval != E_NONE)
541 break;
91447636 542 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
fe8ab488 543 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_NONE,
91447636 544 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
b4c24cb9 545 }
91447636
A
546 (void) hfs_update(vp, TRUE);
547 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
548 (void) hfs_end_transaction(hfsmp);
b4c24cb9 549
2d21ac55
A
550 /*
551 * If we didn't grow the file enough try a partial write.
552 * POSIX expects this behavior.
553 */
554 if ((retval == ENOSPC) && (filebytes > offset)) {
555 retval = 0;
556 partialwrite = 1;
557 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
558 resid -= bytesToAdd;
559 writelimit = filebytes;
560 }
91447636 561sizeok:
55e303ae 562 if (retval == E_NONE) {
0b4e3aa0
A
563 off_t filesize;
564 off_t zero_off;
565 off_t tail_off;
566 off_t inval_start;
567 off_t inval_end;
91447636 568 off_t io_start;
0b4e3aa0 569 int lflag;
0b4e3aa0 570
9bccf70c 571 if (writelimit > fp->ff_size)
0b4e3aa0
A
572 filesize = writelimit;
573 else
9bccf70c 574 filesize = fp->ff_size;
1c79356b 575
2d21ac55 576 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
1c79356b 577
91447636
A
578 if (offset <= fp->ff_size) {
579 zero_off = offset & ~PAGE_MASK_64;
0b4e3aa0
A
580
581 /* Check to see whether the area between the zero_offset and the start
582 of the transfer to see whether is invalid and should be zero-filled
583 as part of the transfer:
584 */
91447636
A
585 if (offset > zero_off) {
586 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
55e303ae
A
587 lflag |= IO_HEADZEROFILL;
588 }
0b4e3aa0 589 } else {
9bccf70c 590 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
0b4e3aa0 591
9bccf70c 592 /* The bytes between fp->ff_size and uio->uio_offset must never be
0b4e3aa0
A
593 read without being zeroed. The current last block is filled with zeroes
594 if it holds valid data but in all cases merely do a little bookkeeping
595 to track the area from the end of the current last page to the start of
596 the area actually written. For the same reason only the bytes up to the
597 start of the page where this write will start is invalidated; any remainder
598 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
599
600 Note that inval_start, the start of the page after the current EOF,
601 may be past the start of the write, in which case the zeroing
602 will be handled by the cluser_write of the actual data.
603 */
9bccf70c 604 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
91447636 605 inval_end = offset & ~PAGE_MASK_64;
9bccf70c 606 zero_off = fp->ff_size;
0b4e3aa0 607
9bccf70c
A
608 if ((fp->ff_size & PAGE_MASK_64) &&
609 (rl_scan(&fp->ff_invalidranges,
0b4e3aa0 610 eof_page_base,
9bccf70c 611 fp->ff_size - 1,
0b4e3aa0
A
612 &invalid_range) != RL_NOOVERLAP)) {
613 /* The page containing the EOF is not valid, so the
614 entire page must be made inaccessible now. If the write
615 starts on a page beyond the page containing the eof
616 (inval_end > eof_page_base), add the
617 whole page to the range to be invalidated. Otherwise
618 (i.e. if the write starts on the same page), zero-fill
619 the entire page explicitly now:
620 */
621 if (inval_end > eof_page_base) {
622 inval_start = eof_page_base;
623 } else {
624 zero_off = eof_page_base;
625 };
626 };
627
628 if (inval_start < inval_end) {
91447636 629 struct timeval tv;
0b4e3aa0
A
630 /* There's some range of data that's going to be marked invalid */
631
632 if (zero_off < inval_start) {
633 /* The pages between inval_start and inval_end are going to be invalidated,
634 and the actual write will start on a page past inval_end. Now's the last
635 chance to zero-fill the page containing the EOF:
636 */
91447636
A
637 hfs_unlock(cp);
638 cnode_locked = 0;
639 retval = cluster_write(vp, (uio_t) 0,
9bccf70c 640 fp->ff_size, inval_start,
91447636 641 zero_off, (off_t)0,
9bccf70c 642 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
39236c6e 643 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636 644 cnode_locked = 1;
0b4e3aa0 645 if (retval) goto ioerr_exit;
91447636 646 offset = uio_offset(uio);
0b4e3aa0
A
647 };
648
649 /* Mark the remaining area of the newly allocated space as invalid: */
9bccf70c 650 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
91447636
A
651 microuptime(&tv);
652 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c 653 zero_off = fp->ff_size = inval_end;
0b4e3aa0
A
654 };
655
91447636 656 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
0b4e3aa0 657 };
1c79356b 658
0b4e3aa0
A
659 /* Check to see whether the area between the end of the write and the end of
660 the page it falls in is invalid and should be zero-filled as part of the transfer:
661 */
662 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
663 if (tail_off > filesize) tail_off = filesize;
664 if (tail_off > writelimit) {
9bccf70c 665 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
0b4e3aa0
A
666 lflag |= IO_TAILZEROFILL;
667 };
668 };
669
670 /*
671 * if the write starts beyond the current EOF (possibly advanced in the
672 * zeroing of the last block, above), then we'll zero fill from the current EOF
673 * to where the write begins:
674 *
675 * NOTE: If (and ONLY if) the portion of the file about to be written is
676 * before the current EOF it might be marked as invalid now and must be
677 * made readable (removed from the invalid ranges) before cluster_write
678 * tries to write it:
679 */
91447636 680 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
9bccf70c 681 if (io_start < fp->ff_size) {
91447636
A
682 off_t io_end;
683
684 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
9bccf70c 685 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
0b4e3aa0 686 };
91447636
A
687
688 hfs_unlock(cp);
689 cnode_locked = 0;
593a1d5f
A
690
691 /*
692 * We need to tell UBC the fork's new size BEFORE calling
693 * cluster_write, in case any of the new pages need to be
694 * paged out before cluster_write completes (which does happen
695 * in embedded systems due to extreme memory pressure).
696 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
697 * will be, so that it can pass that on to cluster_pageout, and
698 * allow those pageouts.
699 *
700 * We don't update ff_size yet since we don't want pageins to
701 * be able to see uninitialized data between the old and new
702 * EOF, until cluster_write has completed and initialized that
703 * part of the file.
704 *
705 * The vnode pager relies on the file size last given to UBC via
706 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
707 * ff_size (whichever is larger). NOTE: ff_new_size is always
708 * zero, unless we are extending the file via write.
709 */
710 if (filesize > fp->ff_size) {
711 fp->ff_new_size = filesize;
712 ubc_setsize(vp, filesize);
713 }
9bccf70c 714 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
316670eb 715 tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle);
2d21ac55 716 if (retval) {
593a1d5f 717 fp->ff_new_size = 0; /* no longer extending; use ff_size */
316670eb
A
718
719 if (retval == EAGAIN) {
720 /*
721 * EAGAIN indicates that we still have I/O to do, but
722 * that we now need to be throttled
723 */
724 if (resid != uio_resid(uio)) {
725 /*
726 * did manage to do some I/O before returning EAGAIN
727 */
728 resid = uio_resid(uio);
729 offset = uio_offset(uio);
730
731 cp->c_touch_chgtime = TRUE;
732 cp->c_touch_modtime = TRUE;
fe8ab488 733 hfs_incr_gencount(cp);
316670eb
A
734 }
735 if (filesize > fp->ff_size) {
736 /*
737 * we called ubc_setsize before the call to
738 * cluster_write... since we only partially
739 * completed the I/O, we need to
740 * re-adjust our idea of the filesize based
741 * on our interim EOF
742 */
743 ubc_setsize(vp, offset);
744
745 fp->ff_size = offset;
746 }
747 goto exit;
748 }
593a1d5f
A
749 if (filesize > origFileSize) {
750 ubc_setsize(vp, origFileSize);
751 }
2d21ac55
A
752 goto ioerr_exit;
753 }
593a1d5f
A
754
755 if (filesize > origFileSize) {
756 fp->ff_size = filesize;
757
91447636 758 /* Files that are changing size are not hot file candidates. */
593a1d5f 759 if (hfsmp->hfc_stage == HFC_RECORDING) {
91447636 760 fp->ff_bytesread = 0;
593a1d5f 761 }
91447636 762 }
fe8ab488 763 fp->ff_new_size = 0; /* ff_size now has the correct size */
9bccf70c 764 }
2d21ac55
A
765 if (partialwrite) {
766 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
767 resid += bytesToAdd;
768 }
8f6c56a5 769
2d21ac55 770 // XXXdbg - see radar 4871353 for more info
8f6c56a5
A
771 {
772 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
773 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
774 }
775 }
55e303ae 776
0b4e3aa0 777ioerr_exit:
fe8ab488
A
778 if (resid > uio_resid(uio)) {
779 if (!cnode_locked) {
780 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
781 cnode_locked = 1;
782 }
783
784 cp->c_touch_chgtime = TRUE;
785 cp->c_touch_modtime = TRUE;
786 hfs_incr_gencount(cp);
787
788 /*
789 * If we successfully wrote any data, and we are not the superuser
790 * we clear the setuid and setgid bits as a precaution against
791 * tampering.
792 */
793 if (cp->c_mode & (S_ISUID | S_ISGID)) {
794 cred = vfs_context_ucred(ap->a_context);
795 if (cred && suser(cred, NULL)) {
796 cp->c_mode &= ~(S_ISUID | S_ISGID);
91447636 797 }
91447636
A
798 }
799 }
9bccf70c
A
800 if (retval) {
801 if (ioflag & IO_UNIT) {
91447636 802 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
fe8ab488 803 0, ap->a_context);
91447636
A
804 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
805 uio_setresid(uio, resid);
806 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
807 }
fe8ab488 808 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio)))
91447636 809 retval = hfs_update(vp, TRUE);
fe8ab488 810
91447636
A
811 /* Updating vcbWrCnt doesn't need to be atomic. */
812 hfsmp->vcbWrCnt++;
1c79356b 813
fe8ab488 814 KERNEL_DEBUG(HFSDBG_WRITE | DBG_FUNC_END,
91447636
A
815 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
816exit:
817 if (cnode_locked)
818 hfs_unlock(cp);
6d2010ae
A
819
820 if (took_truncate_lock) {
39236c6e 821 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
6d2010ae 822 }
316670eb
A
823 if (retval == EAGAIN) {
824 throttle_lowpri_io(1);
fe8ab488 825 throttled_count++;
316670eb
A
826
827 retval = 0;
828 goto again;
829 }
fe8ab488
A
830 if (throttled_count) {
831 throttle_info_reset_window((uthread_t)get_bsdthread_info(current_thread()));
832 }
9bccf70c 833 return (retval);
1c79356b
A
834}
835
91447636 836/* support for the "bulk-access" fcntl */
1c79356b 837
91447636 838#define CACHE_LEVELS 16
2d21ac55 839#define NUM_CACHE_ENTRIES (64*16)
91447636
A
840#define PARENT_IDS_FLAG 0x100
841
91447636
A
842struct access_cache {
843 int numcached;
844 int cachehits; /* these two for statistics gathering */
845 int lookups;
846 unsigned int *acache;
2d21ac55 847 unsigned char *haveaccess;
55e303ae
A
848};
849
91447636
A
850struct access_t {
851 uid_t uid; /* IN: effective user id */
852 short flags; /* IN: access requested (i.e. R_OK) */
853 short num_groups; /* IN: number of groups user belongs to */
854 int num_files; /* IN: number of files to process */
855 int *file_ids; /* IN: array of file ids */
856 gid_t *groups; /* IN: array of groups */
857 short *access; /* OUT: access info for each file (0 for 'has access') */
b0d623f7
A
858} __attribute__((unavailable)); // this structure is for reference purposes only
859
860struct user32_access_t {
861 uid_t uid; /* IN: effective user id */
862 short flags; /* IN: access requested (i.e. R_OK) */
863 short num_groups; /* IN: number of groups user belongs to */
864 int num_files; /* IN: number of files to process */
865 user32_addr_t file_ids; /* IN: array of file ids */
866 user32_addr_t groups; /* IN: array of groups */
867 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 868};
55e303ae 869
b0d623f7 870struct user64_access_t {
91447636
A
871 uid_t uid; /* IN: effective user id */
872 short flags; /* IN: access requested (i.e. R_OK) */
873 short num_groups; /* IN: number of groups user belongs to */
2d21ac55 874 int num_files; /* IN: number of files to process */
b0d623f7
A
875 user64_addr_t file_ids; /* IN: array of file ids */
876 user64_addr_t groups; /* IN: array of groups */
877 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 878};
55e303ae 879
2d21ac55
A
880
881// these are the "extended" versions of the above structures
882// note that it is crucial that they be different sized than
883// the regular version
884struct ext_access_t {
885 uint32_t flags; /* IN: access requested (i.e. R_OK) */
886 uint32_t num_files; /* IN: number of files to process */
887 uint32_t map_size; /* IN: size of the bit map */
888 uint32_t *file_ids; /* IN: Array of file ids */
889 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
890 short *access; /* OUT: access info for each file (0 for 'has access') */
891 uint32_t num_parents; /* future use */
892 cnid_t *parents; /* future use */
b0d623f7
A
893} __attribute__((unavailable)); // this structure is for reference purposes only
894
895struct user32_ext_access_t {
896 uint32_t flags; /* IN: access requested (i.e. R_OK) */
897 uint32_t num_files; /* IN: number of files to process */
898 uint32_t map_size; /* IN: size of the bit map */
899 user32_addr_t file_ids; /* IN: Array of file ids */
900 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
901 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
902 uint32_t num_parents; /* future use */
903 user32_addr_t parents; /* future use */
2d21ac55
A
904};
905
b0d623f7 906struct user64_ext_access_t {
2d21ac55
A
907 uint32_t flags; /* IN: access requested (i.e. R_OK) */
908 uint32_t num_files; /* IN: number of files to process */
909 uint32_t map_size; /* IN: size of the bit map */
b0d623f7
A
910 user64_addr_t file_ids; /* IN: array of file ids */
911 user64_addr_t bitmap; /* IN: array of groups */
912 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
2d21ac55 913 uint32_t num_parents;/* future use */
b0d623f7 914 user64_addr_t parents;/* future use */
2d21ac55
A
915};
916
917
91447636
A
918/*
919 * Perform a binary search for the given parent_id. Return value is
2d21ac55
A
920 * the index if there is a match. If no_match_indexp is non-NULL it
921 * will be assigned with the index to insert the item (even if it was
922 * not found).
91447636 923 */
2d21ac55 924static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
91447636 925{
2d21ac55
A
926 int index=-1;
927 unsigned int lo=0;
91447636 928
2d21ac55
A
929 do {
930 unsigned int mid = ((hi - lo)/2) + lo;
931 unsigned int this_id = array[mid];
932
933 if (parent_id == this_id) {
934 hi = mid;
935 break;
91447636 936 }
2d21ac55
A
937
938 if (parent_id < this_id) {
939 hi = mid;
940 continue;
91447636 941 }
2d21ac55
A
942
943 if (parent_id > this_id) {
944 lo = mid + 1;
945 continue;
946 }
947 } while(lo < hi);
948
949 /* check if lo and hi converged on the match */
950 if (parent_id == array[hi]) {
951 index = hi;
952 }
91447636 953
2d21ac55
A
954 if (no_match_indexp) {
955 *no_match_indexp = hi;
956 }
957
958 return index;
959}
960
961
962static int
963lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
964{
965 unsigned int hi;
966 int matches = 0;
967 int index, no_match_index;
91447636 968
2d21ac55
A
969 if (cache->numcached == 0) {
970 *indexp = 0;
971 return 0; // table is empty, so insert at index=0 and report no match
972 }
91447636 973
2d21ac55 974 if (cache->numcached > NUM_CACHE_ENTRIES) {
2d21ac55
A
975 cache->numcached = NUM_CACHE_ENTRIES;
976 }
91447636 977
2d21ac55 978 hi = cache->numcached - 1;
91447636 979
2d21ac55
A
980 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
981
982 /* if no existing entry found, find index for new one */
983 if (index == -1) {
984 index = no_match_index;
985 matches = 0;
986 } else {
987 matches = 1;
988 }
989
990 *indexp = index;
991 return matches;
91447636
A
992}
993
994/*
995 * Add a node to the access_cache at the given index (or do a lookup first
996 * to find the index if -1 is passed in). We currently do a replace rather
997 * than an insert if the cache is full.
998 */
999static void
1000add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
1001{
2d21ac55
A
1002 int lookup_index = -1;
1003
1004 /* need to do a lookup first if -1 passed for index */
1005 if (index == -1) {
1006 if (lookup_bucket(cache, &lookup_index, nodeID)) {
1007 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
1008 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
1009 cache->haveaccess[lookup_index] = access;
1010 }
1011
1012 /* mission accomplished */
1013 return;
1014 } else {
1015 index = lookup_index;
1016 }
1017
1018 }
1019
1020 /* if the cache is full, do a replace rather than an insert */
1021 if (cache->numcached >= NUM_CACHE_ENTRIES) {
2d21ac55
A
1022 cache->numcached = NUM_CACHE_ENTRIES-1;
1023
1024 if (index > cache->numcached) {
2d21ac55
A
1025 index = cache->numcached;
1026 }
1027 }
1028
1029 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
1030 index++;
1031 }
1032
1033 if (index >= 0 && index < cache->numcached) {
1034 /* only do bcopy if we're inserting */
1035 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
1036 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
1037 }
1038
1039 cache->acache[index] = nodeID;
1040 cache->haveaccess[index] = access;
1041 cache->numcached++;
91447636
A
1042}
1043
1044
1045struct cinfo {
2d21ac55
A
1046 uid_t uid;
1047 gid_t gid;
1048 mode_t mode;
1049 cnid_t parentcnid;
1050 u_int16_t recflags;
91447636
A
1051};
1052
1053static int
fe8ab488 1054snoop_callback(const cnode_t *cp, void *arg)
91447636 1055{
fe8ab488 1056 struct cinfo *cip = arg;
91447636 1057
fe8ab488
A
1058 cip->uid = cp->c_uid;
1059 cip->gid = cp->c_gid;
1060 cip->mode = cp->c_mode;
1061 cip->parentcnid = cp->c_parentcnid;
1062 cip->recflags = cp->c_attr.ca_recflags;
91447636 1063
2d21ac55 1064 return (0);
91447636
A
1065}
1066
1067/*
1068 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1069 * isn't incore, then go to the catalog.
1070 */
1071static int
b0d623f7 1072do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
2d21ac55 1073 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
91447636 1074{
2d21ac55
A
1075 int error = 0;
1076
1077 /* if this id matches the one the fsctl was called with, skip the lookup */
1078 if (cnid == skip_cp->c_cnid) {
fe8ab488
A
1079 cnattrp->ca_uid = skip_cp->c_uid;
1080 cnattrp->ca_gid = skip_cp->c_gid;
1081 cnattrp->ca_mode = skip_cp->c_mode;
1082 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
1083 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
2d21ac55 1084 } else {
fe8ab488
A
1085 struct cinfo c_info;
1086
1087 /* otherwise, check the cnode hash incase the file/dir is incore */
1088 error = hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info);
1089
1090 if (error == EACCES) {
1091 // File is deleted
1092 return ENOENT;
1093 } else if (!error) {
1094 cnattrp->ca_uid = c_info.uid;
1095 cnattrp->ca_gid = c_info.gid;
1096 cnattrp->ca_mode = c_info.mode;
1097 cnattrp->ca_recflags = c_info.recflags;
1098 keyp->hfsPlus.parentID = c_info.parentcnid;
1099 } else {
1100 int lockflags;
1101
1102 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1103 throttle_lowpri_io(1);
316670eb 1104
fe8ab488 1105 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
316670eb 1106
fe8ab488
A
1107 /* lookup this cnid in the catalog */
1108 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
91447636 1109
fe8ab488 1110 hfs_systemfile_unlock(hfsmp, lockflags);
91447636 1111
fe8ab488
A
1112 cache->lookups++;
1113 }
2d21ac55 1114 }
91447636 1115
2d21ac55 1116 return (error);
91447636 1117}
55e303ae 1118
2d21ac55 1119
1c79356b 1120/*
91447636
A
1121 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1122 * up to CACHE_LEVELS as we progress towards the root.
1123 */
1124static int
1125do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
b0d623f7 1126 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
2d21ac55
A
1127 struct vfs_context *my_context,
1128 char *bitmap,
1129 uint32_t map_size,
1130 cnid_t* parents,
1131 uint32_t num_parents)
91447636 1132{
2d21ac55
A
1133 int myErr = 0;
1134 int myResult;
1135 HFSCatalogNodeID thisNodeID;
1136 unsigned int myPerms;
1137 struct cat_attr cnattr;
1138 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1139 CatalogKey catkey;
1140
1141 int i = 0, ids_to_cache = 0;
1142 int parent_ids[CACHE_LEVELS];
1143
1144 thisNodeID = nodeID;
1145 while (thisNodeID >= kRootDirID) {
1146 myResult = 0; /* default to "no access" */
91447636 1147
2d21ac55
A
1148 /* check the cache before resorting to hitting the catalog */
1149
1150 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1151 * to look any further after hitting cached dir */
1152
1153 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1154 cache->cachehits++;
1155 myErr = cache->haveaccess[cache_index];
1156 if (scope_index != -1) {
1157 if (myErr == ESRCH) {
1158 myErr = 0;
1159 }
1160 } else {
1161 scope_index = 0; // so we'll just use the cache result
1162 scope_idx_start = ids_to_cache;
1163 }
1164 myResult = (myErr == 0) ? 1 : 0;
1165 goto ExitThisRoutine;
1166 }
1167
1168
1169 if (parents) {
1170 int tmp;
1171 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1172 if (scope_index == -1)
1173 scope_index = tmp;
1174 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1175 scope_idx_start = ids_to_cache;
1176 }
1177 }
1178
1179 /* remember which parents we want to cache */
1180 if (ids_to_cache < CACHE_LEVELS) {
1181 parent_ids[ids_to_cache] = thisNodeID;
1182 ids_to_cache++;
1183 }
1184 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1185 if (bitmap && map_size) {
1186 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1187 }
1188
1189
1190 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 1191 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
2d21ac55
A
1192 if (myErr) {
1193 goto ExitThisRoutine; /* no access */
1194 }
1195
1196 /* Root always gets access. */
1197 if (suser(myp_ucred, NULL) == 0) {
1198 thisNodeID = catkey.hfsPlus.parentID;
1199 myResult = 1;
1200 continue;
1201 }
1202
1203 // if the thing has acl's, do the full permission check
1204 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1205 struct vnode *vp;
1206
1207 /* get the vnode for this cnid */
6d2010ae 1208 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
2d21ac55
A
1209 if ( myErr ) {
1210 myResult = 0;
1211 goto ExitThisRoutine;
1212 }
1213
1214 thisNodeID = VTOC(vp)->c_parentcnid;
1215
1216 hfs_unlock(VTOC(vp));
1217
1218 if (vnode_vtype(vp) == VDIR) {
1219 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1220 } else {
1221 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1222 }
1223
1224 vnode_put(vp);
1225 if (myErr) {
1226 myResult = 0;
1227 goto ExitThisRoutine;
1228 }
1229 } else {
1230 unsigned int flags;
6d2010ae
A
1231 int mode = cnattr.ca_mode & S_IFMT;
1232 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
2d21ac55 1233
6d2010ae
A
1234 if (mode == S_IFDIR) {
1235 flags = R_OK | X_OK;
1236 } else {
1237 flags = R_OK;
1238 }
1239 if ( (myPerms & flags) != flags) {
1240 myResult = 0;
1241 myErr = EACCES;
1242 goto ExitThisRoutine; /* no access */
1243 }
2d21ac55
A
1244
1245 /* up the hierarchy we go */
1246 thisNodeID = catkey.hfsPlus.parentID;
1247 }
1248 }
1249
1250 /* if here, we have access to this node */
1251 myResult = 1;
1252
1253 ExitThisRoutine:
1254 if (parents && myErr == 0 && scope_index == -1) {
1255 myErr = ESRCH;
1256 }
1257
1258 if (myErr) {
1259 myResult = 0;
1260 }
1261 *err = myErr;
1262
1263 /* cache the parent directory(ies) */
1264 for (i = 0; i < ids_to_cache; i++) {
1265 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1266 add_node(cache, -1, parent_ids[i], ESRCH);
1267 } else {
1268 add_node(cache, -1, parent_ids[i], myErr);
1269 }
1270 }
1271
1272 return (myResult);
91447636 1273}
1c79356b 1274
2d21ac55
A
1275static int
1276do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1277 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1278{
1279 boolean_t is64bit;
1280
1281 /*
316670eb 1282 * NOTE: on entry, the vnode has an io_ref. In case this vnode
2d21ac55
A
1283 * happens to be in our list of file_ids, we'll note it
1284 * avoid calling hfs_chashget_nowait() on that id as that
1285 * will cause a "locking against myself" panic.
1286 */
1287 Boolean check_leaf = true;
1288
b0d623f7
A
1289 struct user64_ext_access_t *user_access_structp;
1290 struct user64_ext_access_t tmp_user_access;
2d21ac55
A
1291 struct access_cache cache;
1292
b0d623f7 1293 int error = 0, prev_parent_check_ok=1;
2d21ac55
A
1294 unsigned int i;
1295
2d21ac55
A
1296 short flags;
1297 unsigned int num_files = 0;
1298 int map_size = 0;
1299 int num_parents = 0;
1300 int *file_ids=NULL;
1301 short *access=NULL;
1302 char *bitmap=NULL;
1303 cnid_t *parents=NULL;
1304 int leaf_index;
1305
1306 cnid_t cnid;
1307 cnid_t prevParent_cnid = 0;
1308 unsigned int myPerms;
1309 short myaccess = 0;
1310 struct cat_attr cnattr;
1311 CatalogKey catkey;
1312 struct cnode *skip_cp = VTOC(vp);
1313 kauth_cred_t cred = vfs_context_ucred(context);
1314 proc_t p = vfs_context_proc(context);
1315
1316 is64bit = proc_is64bit(p);
1317
1318 /* initialize the local cache and buffers */
1319 cache.numcached = 0;
1320 cache.cachehits = 0;
1321 cache.lookups = 0;
1322 cache.acache = NULL;
1323 cache.haveaccess = NULL;
1324
1325 /* struct copyin done during dispatch... need to copy file_id array separately */
1326 if (ap->a_data == NULL) {
1327 error = EINVAL;
1328 goto err_exit_bulk_access;
1329 }
1330
1331 if (is64bit) {
b0d623f7 1332 if (arg_size != sizeof(struct user64_ext_access_t)) {
2d21ac55
A
1333 error = EINVAL;
1334 goto err_exit_bulk_access;
1335 }
1336
b0d623f7 1337 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
2d21ac55 1338
b0d623f7
A
1339 } else if (arg_size == sizeof(struct user32_access_t)) {
1340 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
2d21ac55
A
1341
1342 // convert an old style bulk-access struct to the new style
1343 tmp_user_access.flags = accessp->flags;
1344 tmp_user_access.num_files = accessp->num_files;
1345 tmp_user_access.map_size = 0;
1346 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
cf7d32b8 1347 tmp_user_access.bitmap = USER_ADDR_NULL;
2d21ac55
A
1348 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1349 tmp_user_access.num_parents = 0;
1350 user_access_structp = &tmp_user_access;
1351
b0d623f7
A
1352 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1353 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
2d21ac55
A
1354
1355 // up-cast from a 32-bit version of the struct
1356 tmp_user_access.flags = accessp->flags;
1357 tmp_user_access.num_files = accessp->num_files;
1358 tmp_user_access.map_size = accessp->map_size;
1359 tmp_user_access.num_parents = accessp->num_parents;
1360
1361 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1362 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1363 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1364 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1365
1366 user_access_structp = &tmp_user_access;
1367 } else {
1368 error = EINVAL;
1369 goto err_exit_bulk_access;
1370 }
1371
1372 map_size = user_access_structp->map_size;
1373
1374 num_files = user_access_structp->num_files;
1375
1376 num_parents= user_access_structp->num_parents;
1377
1378 if (num_files < 1) {
1379 goto err_exit_bulk_access;
1380 }
1381 if (num_files > 1024) {
1382 error = EINVAL;
1383 goto err_exit_bulk_access;
1384 }
1385
1386 if (num_parents > 1024) {
1387 error = EINVAL;
1388 goto err_exit_bulk_access;
1389 }
1390
1391 file_ids = (int *) kalloc(sizeof(int) * num_files);
1392 access = (short *) kalloc(sizeof(short) * num_files);
1393 if (map_size) {
1394 bitmap = (char *) kalloc(sizeof(char) * map_size);
1395 }
1396
1397 if (num_parents) {
1398 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1399 }
1400
1401 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1402 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1403
1404 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1405 if (file_ids) {
1406 kfree(file_ids, sizeof(int) * num_files);
1407 }
1408 if (bitmap) {
1409 kfree(bitmap, sizeof(char) * map_size);
1410 }
1411 if (access) {
1412 kfree(access, sizeof(short) * num_files);
1413 }
1414 if (cache.acache) {
1415 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1416 }
1417 if (cache.haveaccess) {
1418 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1419 }
1420 if (parents) {
1421 kfree(parents, sizeof(cnid_t) * num_parents);
1422 }
1423 return ENOMEM;
1424 }
1425
1426 // make sure the bitmap is zero'ed out...
1427 if (bitmap) {
1428 bzero(bitmap, (sizeof(char) * map_size));
1429 }
1430
1431 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1432 num_files * sizeof(int)))) {
1433 goto err_exit_bulk_access;
1434 }
1435
1436 if (num_parents) {
1437 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1438 num_parents * sizeof(cnid_t)))) {
1439 goto err_exit_bulk_access;
1440 }
1441 }
1442
1443 flags = user_access_structp->flags;
1444 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1445 flags = R_OK;
1446 }
1447
1448 /* check if we've been passed leaf node ids or parent ids */
1449 if (flags & PARENT_IDS_FLAG) {
1450 check_leaf = false;
1451 }
1452
1453 /* Check access to each file_id passed in */
1454 for (i = 0; i < num_files; i++) {
1455 leaf_index=-1;
1456 cnid = (cnid_t) file_ids[i];
1457
1458 /* root always has access */
1459 if ((!parents) && (!suser(cred, NULL))) {
1460 access[i] = 0;
1461 continue;
1462 }
1463
1464 if (check_leaf) {
1465 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 1466 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
2d21ac55
A
1467 if (error) {
1468 access[i] = (short) error;
1469 continue;
1470 }
1471
1472 if (parents) {
1473 // Check if the leaf matches one of the parent scopes
1474 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
b0d623f7
A
1475 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1476 prev_parent_check_ok = 0;
1477 else if (leaf_index >= 0)
1478 prev_parent_check_ok = 1;
2d21ac55
A
1479 }
1480
1481 // if the thing has acl's, do the full permission check
1482 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1483 struct vnode *cvp;
1484 int myErr = 0;
1485 /* get the vnode for this cnid */
6d2010ae 1486 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
2d21ac55
A
1487 if ( myErr ) {
1488 access[i] = myErr;
1489 continue;
1490 }
1491
1492 hfs_unlock(VTOC(cvp));
1493
1494 if (vnode_vtype(cvp) == VDIR) {
1495 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1496 } else {
1497 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1498 }
1499
1500 vnode_put(cvp);
1501 if (myErr) {
1502 access[i] = myErr;
1503 continue;
1504 }
1505 } else {
1506 /* before calling CheckAccess(), check the target file for read access */
1507 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1508 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1509
1510 /* fail fast if no access */
1511 if ((myPerms & flags) == 0) {
1512 access[i] = EACCES;
1513 continue;
1514 }
1515 }
1516 } else {
1517 /* we were passed an array of parent ids */
1518 catkey.hfsPlus.parentID = cnid;
1519 }
1520
1521 /* if the last guy had the same parent and had access, we're done */
b0d623f7 1522 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
2d21ac55
A
1523 cache.cachehits++;
1524 access[i] = 0;
1525 continue;
1526 }
316670eb 1527
2d21ac55 1528 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
b0d623f7 1529 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
2d21ac55
A
1530
1531 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1532 access[i] = 0; // have access.. no errors to report
1533 } else {
1534 access[i] = (error != 0 ? (short) error : EACCES);
1535 }
1536
1537 prevParent_cnid = catkey.hfsPlus.parentID;
1538 }
1539
1540 /* copyout the access array */
1541 if ((error = copyout((caddr_t)access, user_access_structp->access,
1542 num_files * sizeof (short)))) {
1543 goto err_exit_bulk_access;
1544 }
1545 if (map_size && bitmap) {
1546 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1547 map_size * sizeof (char)))) {
1548 goto err_exit_bulk_access;
1549 }
1550 }
1551
1552
1553 err_exit_bulk_access:
1554
2d21ac55
A
1555 if (file_ids)
1556 kfree(file_ids, sizeof(int) * num_files);
1557 if (parents)
1558 kfree(parents, sizeof(cnid_t) * num_parents);
1559 if (bitmap)
1560 kfree(bitmap, sizeof(char) * map_size);
1561 if (access)
1562 kfree(access, sizeof(short) * num_files);
1563 if (cache.acache)
1564 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1565 if (cache.haveaccess)
1566 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1567
1568 return (error);
1569}
1570
1571
1572/* end "bulk-access" support */
1c79356b 1573
1c79356b 1574
91447636
A
1575/*
1576 * Control filesystem operating characteristics.
1577 */
1c79356b 1578int
91447636
A
1579hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1580 vnode_t a_vp;
04b8595b 1581 long a_command;
9bccf70c
A
1582 caddr_t a_data;
1583 int a_fflag;
91447636
A
1584 vfs_context_t a_context;
1585 } */ *ap)
1c79356b 1586{
91447636
A
1587 struct vnode * vp = ap->a_vp;
1588 struct hfsmount *hfsmp = VTOHFS(vp);
1589 vfs_context_t context = ap->a_context;
1590 kauth_cred_t cred = vfs_context_ucred(context);
1591 proc_t p = vfs_context_proc(context);
1592 struct vfsstatfs *vfsp;
1593 boolean_t is64bit;
b0d623f7
A
1594 off_t jnl_start, jnl_size;
1595 struct hfs_journal_info *jip;
1596#if HFS_COMPRESSION
1597 int compressed = 0;
1598 off_t uncompressed_size = -1;
1599 int decmpfs_error = 0;
1600
1601 if (ap->a_command == F_RDADVISE) {
1602 /* we need to inspect the decmpfs state of the file as early as possible */
1603 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1604 if (compressed) {
1605 if (VNODE_IS_RSRC(vp)) {
1606 /* if this is the resource fork, treat it as if it were empty */
1607 uncompressed_size = 0;
1608 } else {
1609 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1610 if (decmpfs_error != 0) {
1611 /* failed to get the uncompressed size, we'll check for this later */
1612 uncompressed_size = -1;
1613 }
1614 }
1615 }
1616 }
1617#endif /* HFS_COMPRESSION */
91447636
A
1618
1619 is64bit = proc_is64bit(p);
1620
6d2010ae
A
1621#if CONFIG_PROTECT
1622 {
1623 int error = 0;
316670eb 1624 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
6d2010ae
A
1625 return error;
1626 }
1627 }
1628#endif /* CONFIG_PROTECT */
1629
9bccf70c 1630 switch (ap->a_command) {
55e303ae 1631
2d21ac55
A
1632 case HFS_GETPATH:
1633 {
1634 struct vnode *file_vp;
1635 cnid_t cnid;
1636 int outlen;
1637 char *bufptr;
1638 int error;
39236c6e 1639 int flags = 0;
2d21ac55
A
1640
1641 /* Caller must be owner of file system. */
1642 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1643 if (suser(cred, NULL) &&
1644 kauth_cred_getuid(cred) != vfsp->f_owner) {
1645 return (EACCES);
1646 }
1647 /* Target vnode must be file system's root. */
1648 if (!vnode_isvroot(vp)) {
1649 return (EINVAL);
1650 }
1651 bufptr = (char *)ap->a_data;
1652 cnid = strtoul(bufptr, NULL, 10);
39236c6e
A
1653 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1654 flags |= BUILDPATH_VOLUME_RELATIVE;
1655 }
2d21ac55 1656
b0d623f7
A
1657 /* We need to call hfs_vfs_vget to leverage the code that will
1658 * fix the origin list for us if needed, as opposed to calling
1659 * hfs_vget, since we will need the parent for build_path call.
935ed37a 1660 */
b0d623f7 1661
935ed37a 1662 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
2d21ac55
A
1663 return (error);
1664 }
39236c6e 1665 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
2d21ac55
A
1666 vnode_put(file_vp);
1667
1668 return (error);
1669 }
1670
22ba694c
A
1671 case HFS_TRANSFER_DOCUMENT_ID:
1672 {
1673 struct cnode *cp = NULL;
1674 int error;
1675 u_int32_t to_fd = *(u_int32_t *)ap->a_data;
1676 struct fileproc *to_fp;
1677 struct vnode *to_vp;
1678 struct cnode *to_cp;
1679
1680 cp = VTOC(vp);
1681
1682 if ((error = fp_getfvp(p, to_fd, &to_fp, &to_vp)) != 0) {
1683 //printf("could not get the vnode for fd %d (err %d)\n", to_fd, error);
1684 return error;
1685 }
1686 if ( (error = vnode_getwithref(to_vp)) ) {
1687 file_drop(to_fd);
1688 return error;
1689 }
1690
1691 if (VTOHFS(to_vp) != hfsmp) {
1692 error = EXDEV;
1693 goto transfer_cleanup;
1694 }
1695
1696 int need_unlock = 1;
1697 to_cp = VTOC(to_vp);
1698 error = hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1699 if (error != 0) {
1700 //printf("could not lock the pair of cnodes (error %d)\n", error);
1701 goto transfer_cleanup;
1702 }
1703
1704 if (!(cp->c_bsdflags & UF_TRACKED)) {
1705 error = EINVAL;
1706 } else if (to_cp->c_bsdflags & UF_TRACKED) {
1707 //
1708 // if the destination is already tracked, return an error
1709 // as otherwise it's a silent deletion of the target's
1710 // document-id
1711 //
1712 error = EEXIST;
1713 } else if (S_ISDIR(cp->c_attr.ca_mode) || S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1714 //
1715 // we can use the FndrExtendedFileInfo because the doc-id is the first
1716 // thing in both it and the ExtendedDirInfo struct which is fixed in
1717 // format and can not change layout
1718 //
1719 struct FndrExtendedFileInfo *f_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)cp->c_finderinfo + 16);
1720 struct FndrExtendedFileInfo *to_extinfo = (struct FndrExtendedFileInfo *)((u_int8_t*)to_cp->c_finderinfo + 16);
1721
1722 if (f_extinfo->document_id == 0) {
1723 uint32_t new_id;
1724
1725 hfs_unlockpair(cp, to_cp); // have to unlock to be able to get a new-id
1726
1727 if ((error = hfs_generate_document_id(hfsmp, &new_id)) == 0) {
1728 //
1729 // re-lock the pair now that we have the document-id
1730 //
1731 hfs_lockpair(cp, to_cp, HFS_EXCLUSIVE_LOCK);
1732 f_extinfo->document_id = new_id;
1733 } else {
1734 goto transfer_cleanup;
1735 }
1736 }
1737
1738 to_extinfo->document_id = f_extinfo->document_id;
1739 f_extinfo->document_id = 0;
1740 //printf("TRANSFERRING: doc-id %d from ino %d to ino %d\n", to_extinfo->document_id, cp->c_fileid, to_cp->c_fileid);
1741
1742 // make sure the destination is also UF_TRACKED
1743 to_cp->c_bsdflags |= UF_TRACKED;
1744 cp->c_bsdflags &= ~UF_TRACKED;
1745
1746 // mark the cnodes dirty
1747 cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
1748 to_cp->c_flag |= C_MODIFIED | C_FORCEUPDATE;
1749
1750 int lockflags;
1751 if ((error = hfs_start_transaction(hfsmp)) == 0) {
1752
1753 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_EXCLUSIVE_LOCK);
1754
1755 (void) cat_update(hfsmp, &cp->c_desc, &cp->c_attr, NULL, NULL);
1756 (void) cat_update(hfsmp, &to_cp->c_desc, &to_cp->c_attr, NULL, NULL);
1757
1758 hfs_systemfile_unlock (hfsmp, lockflags);
1759 (void) hfs_end_transaction(hfsmp);
1760 }
1761
1762#if CONFIG_FSE
1763 add_fsevent(FSE_DOCID_CHANGED, context,
1764 FSE_ARG_DEV, hfsmp->hfs_raw_dev,
1765 FSE_ARG_INO, (ino64_t)cp->c_fileid, // src inode #
1766 FSE_ARG_INO, (ino64_t)to_cp->c_fileid, // dst inode #
1767 FSE_ARG_INT32, to_extinfo->document_id,
1768 FSE_ARG_DONE);
1769
1770 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1771 need_unlock = 0;
1772
1773 if (need_fsevent(FSE_STAT_CHANGED, vp)) {
1774 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, vp, FSE_ARG_DONE);
1775 }
1776 if (need_fsevent(FSE_STAT_CHANGED, to_vp)) {
1777 add_fsevent(FSE_STAT_CHANGED, context, FSE_ARG_VNODE, to_vp, FSE_ARG_DONE);
1778 }
1779#else
1780 hfs_unlockpair(cp, to_cp); // unlock this so we can send the fsevents
1781 need_unlock = 0;
1782#endif
1783 }
1784
1785 if (need_unlock) {
1786 hfs_unlockpair(cp, to_cp);
1787 }
1788
1789 transfer_cleanup:
1790 vnode_put(to_vp);
1791 file_drop(to_fd);
1792
1793 return error;
1794 }
1795
fe8ab488
A
1796
1797
2d21ac55
A
1798 case HFS_PREV_LINK:
1799 case HFS_NEXT_LINK:
1800 {
1801 cnid_t linkfileid;
1802 cnid_t nextlinkid;
1803 cnid_t prevlinkid;
1804 int error;
1805
1806 /* Caller must be owner of file system. */
1807 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1808 if (suser(cred, NULL) &&
1809 kauth_cred_getuid(cred) != vfsp->f_owner) {
1810 return (EACCES);
1811 }
1812 /* Target vnode must be file system's root. */
1813 if (!vnode_isvroot(vp)) {
1814 return (EINVAL);
1815 }
1816 linkfileid = *(cnid_t *)ap->a_data;
1817 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1818 return (EINVAL);
1819 }
6d2010ae 1820 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
2d21ac55
A
1821 return (error);
1822 }
1823 if (ap->a_command == HFS_NEXT_LINK) {
1824 *(cnid_t *)ap->a_data = nextlinkid;
1825 } else {
1826 *(cnid_t *)ap->a_data = prevlinkid;
1827 }
1828 return (0);
1829 }
1830
0c530ab8
A
1831 case HFS_RESIZE_PROGRESS: {
1832
1833 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1834 if (suser(cred, NULL) &&
1835 kauth_cred_getuid(cred) != vfsp->f_owner) {
1836 return (EACCES); /* must be owner of file system */
1837 }
1838 if (!vnode_isvroot(vp)) {
1839 return (EINVAL);
1840 }
b0d623f7
A
1841 /* file system must not be mounted read-only */
1842 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1843 return (EROFS);
1844 }
1845
0c530ab8
A
1846 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1847 }
2d21ac55 1848
91447636
A
1849 case HFS_RESIZE_VOLUME: {
1850 u_int64_t newsize;
1851 u_int64_t cursize;
1852
1853 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1854 if (suser(cred, NULL) &&
1855 kauth_cred_getuid(cred) != vfsp->f_owner) {
1856 return (EACCES); /* must be owner of file system */
1857 }
1858 if (!vnode_isvroot(vp)) {
1859 return (EINVAL);
1860 }
b0d623f7
A
1861
1862 /* filesystem must not be mounted read only */
1863 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1864 return (EROFS);
1865 }
91447636
A
1866 newsize = *(u_int64_t *)ap->a_data;
1867 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1868
1869 if (newsize > cursize) {
1870 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1871 } else if (newsize < cursize) {
1872 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1873 } else {
1874 return (0);
1875 }
1876 }
1877 case HFS_CHANGE_NEXT_ALLOCATION: {
2d21ac55 1878 int error = 0; /* Assume success */
91447636
A
1879 u_int32_t location;
1880
1881 if (vnode_vfsisrdonly(vp)) {
1882 return (EROFS);
1883 }
1884 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1885 if (suser(cred, NULL) &&
1886 kauth_cred_getuid(cred) != vfsp->f_owner) {
1887 return (EACCES); /* must be owner of file system */
1888 }
1889 if (!vnode_isvroot(vp)) {
1890 return (EINVAL);
1891 }
39236c6e 1892 hfs_lock_mount(hfsmp);
91447636 1893 location = *(u_int32_t *)ap->a_data;
2d21ac55
A
1894 if ((location >= hfsmp->allocLimit) &&
1895 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1896 error = EINVAL;
1897 goto fail_change_next_allocation;
91447636
A
1898 }
1899 /* Return previous value. */
1900 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
2d21ac55
A
1901 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1902 /* On magic value for location, set nextAllocation to next block
1903 * after metadata zone and set flag in mount structure to indicate
1904 * that nextAllocation should not be updated again.
1905 */
b0d623f7
A
1906 if (hfsmp->hfs_metazone_end != 0) {
1907 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1908 }
2d21ac55
A
1909 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1910 } else {
1911 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1912 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1913 }
1914 MarkVCBDirty(hfsmp);
1915fail_change_next_allocation:
39236c6e 1916 hfs_unlock_mount(hfsmp);
2d21ac55 1917 return (error);
91447636
A
1918 }
1919
6d2010ae 1920#if HFS_SPARSE_DEV
55e303ae 1921 case HFS_SETBACKINGSTOREINFO: {
55e303ae
A
1922 struct vnode * bsfs_rootvp;
1923 struct vnode * di_vp;
55e303ae
A
1924 struct hfs_backingstoreinfo *bsdata;
1925 int error = 0;
1926
b0d623f7
A
1927 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1928 return (EROFS);
1929 }
55e303ae
A
1930 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1931 return (EALREADY);
1932 }
91447636
A
1933 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1934 if (suser(cred, NULL) &&
1935 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1936 return (EACCES); /* must be owner of file system */
1937 }
1938 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1939 if (bsdata == NULL) {
1940 return (EINVAL);
1941 }
91447636 1942 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
55e303ae
A
1943 return (error);
1944 }
91447636
A
1945 if ((error = vnode_getwithref(di_vp))) {
1946 file_drop(bsdata->backingfd);
1947 return(error);
55e303ae 1948 }
91447636
A
1949
1950 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1951 (void)vnode_put(di_vp);
1952 file_drop(bsdata->backingfd);
55e303ae
A
1953 return (EINVAL);
1954 }
1955
1956 /*
1957 * Obtain the backing fs root vnode and keep a reference
1958 * on it. This reference will be dropped in hfs_unmount.
1959 */
91447636 1960 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
55e303ae 1961 if (error) {
91447636
A
1962 (void)vnode_put(di_vp);
1963 file_drop(bsdata->backingfd);
55e303ae
A
1964 return (error);
1965 }
91447636
A
1966 vnode_ref(bsfs_rootvp);
1967 vnode_put(bsfs_rootvp);
55e303ae 1968
fe8ab488 1969 hfs_lock_mount(hfsmp);
55e303ae
A
1970 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1971 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
fe8ab488
A
1972 hfsmp->hfs_sparsebandblks = bsdata->bandsize / hfsmp->blockSize * 4;
1973 hfs_unlock_mount(hfsmp);
55e303ae 1974
39236c6e 1975 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
2d21ac55 1976
b0d623f7
A
1977 /*
1978 * If the sparse image is on a sparse image file (as opposed to a sparse
1979 * bundle), then we may need to limit the free space to the maximum size
1980 * of a file on that volume. So we query (using pathconf), and if we get
1981 * a meaningful result, we cache the number of blocks for later use in
1982 * hfs_freeblks().
1983 */
1984 hfsmp->hfs_backingfs_maxblocks = 0;
1985 if (vnode_vtype(di_vp) == VREG) {
1986 int terr;
1987 int hostbits;
1988 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1989 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1990 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1991
1992 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1993 }
1994 }
1995
fe8ab488
A
1996 /* The free extent cache is managed differently for sparse devices.
1997 * There is a window between which the volume is mounted and the
1998 * device is marked as sparse, so the free extent cache for this
1999 * volume is currently initialized as normal volume (sorted by block
2000 * count). Reset the cache so that it will be rebuilt again
2001 * for sparse device (sorted by start block).
2002 */
2003 ResetVCBFreeExtCache(hfsmp);
2004
91447636
A
2005 (void)vnode_put(di_vp);
2006 file_drop(bsdata->backingfd);
55e303ae
A
2007 return (0);
2008 }
2009 case HFS_CLRBACKINGSTOREINFO: {
55e303ae
A
2010 struct vnode * tmpvp;
2011
91447636
A
2012 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2013 if (suser(cred, NULL) &&
2014 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
2015 return (EACCES); /* must be owner of file system */
2016 }
b0d623f7
A
2017 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2018 return (EROFS);
2019 }
2020
55e303ae
A
2021 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
2022 hfsmp->hfs_backingfs_rootvp) {
2023
fe8ab488 2024 hfs_lock_mount(hfsmp);
55e303ae
A
2025 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
2026 tmpvp = hfsmp->hfs_backingfs_rootvp;
2027 hfsmp->hfs_backingfs_rootvp = NULLVP;
2028 hfsmp->hfs_sparsebandblks = 0;
fe8ab488
A
2029 hfs_unlock_mount(hfsmp);
2030
91447636 2031 vnode_rele(tmpvp);
55e303ae
A
2032 }
2033 return (0);
2034 }
2035#endif /* HFS_SPARSE_DEV */
2036
316670eb
A
2037 /* Change the next CNID stored in the VH */
2038 case HFS_CHANGE_NEXTCNID: {
2039 int error = 0; /* Assume success */
2040 u_int32_t fileid;
2041 int wraparound = 0;
2042 int lockflags = 0;
2043
2044 if (vnode_vfsisrdonly(vp)) {
2045 return (EROFS);
2046 }
2047 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
2048 if (suser(cred, NULL) &&
2049 kauth_cred_getuid(cred) != vfsp->f_owner) {
2050 return (EACCES); /* must be owner of file system */
2051 }
2052
2053 fileid = *(u_int32_t *)ap->a_data;
2054
2055 /* Must have catalog lock excl. to advance the CNID pointer */
2056 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
2057
39236c6e
A
2058 hfs_lock_mount(hfsmp);
2059
316670eb
A
2060 /* If it is less than the current next CNID, force the wraparound bit to be set */
2061 if (fileid < hfsmp->vcbNxtCNID) {
2062 wraparound=1;
2063 }
2064
2065 /* Return previous value. */
2066 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
2067
2068 hfsmp->vcbNxtCNID = fileid;
2069
2070 if (wraparound) {
2071 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
2072 }
2073
2074 MarkVCBDirty(hfsmp);
39236c6e 2075 hfs_unlock_mount(hfsmp);
316670eb
A
2076 hfs_systemfile_unlock (hfsmp, lockflags);
2077
2078 return (error);
2079 }
2080
91447636
A
2081 case F_FREEZE_FS: {
2082 struct mount *mp;
91447636 2083
91447636
A
2084 mp = vnode_mount(vp);
2085 hfsmp = VFSTOHFS(mp);
2086
2087 if (!(hfsmp->jnl))
2088 return (ENOTSUP);
3a60a9f5 2089
b0d623f7
A
2090 vfsp = vfs_statfs(mp);
2091
2092 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2093 !kauth_cred_issuser(cred))
2094 return (EACCES);
2095
fe8ab488 2096 return hfs_freeze(hfsmp);
91447636
A
2097 }
2098
2099 case F_THAW_FS: {
b0d623f7
A
2100 vfsp = vfs_statfs(vnode_mount(vp));
2101 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2102 !kauth_cred_issuser(cred))
91447636
A
2103 return (EACCES);
2104
fe8ab488 2105 return hfs_thaw(hfsmp, current_proc());
91447636
A
2106 }
2107
2d21ac55
A
2108 case HFS_BULKACCESS_FSCTL: {
2109 int size;
2110
2111 if (hfsmp->hfs_flags & HFS_STANDARD) {
2112 return EINVAL;
2113 }
91447636 2114
2d21ac55 2115 if (is64bit) {
b0d623f7 2116 size = sizeof(struct user64_access_t);
2d21ac55 2117 } else {
b0d623f7 2118 size = sizeof(struct user32_access_t);
2d21ac55
A
2119 }
2120
2121 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2122 }
91447636 2123
2d21ac55
A
2124 case HFS_EXT_BULKACCESS_FSCTL: {
2125 int size;
2126
2127 if (hfsmp->hfs_flags & HFS_STANDARD) {
2128 return EINVAL;
2129 }
91447636 2130
2d21ac55 2131 if (is64bit) {
b0d623f7 2132 size = sizeof(struct user64_ext_access_t);
2d21ac55 2133 } else {
b0d623f7 2134 size = sizeof(struct user32_ext_access_t);
2d21ac55
A
2135 }
2136
2137 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2138 }
91447636 2139
2d21ac55
A
2140 case HFS_SET_XATTREXTENTS_STATE: {
2141 int state;
2142
2143 if (ap->a_data == NULL) {
2144 return (EINVAL);
2145 }
2146
2147 state = *(int *)ap->a_data;
b0d623f7
A
2148
2149 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2150 return (EROFS);
2151 }
2d21ac55
A
2152
2153 /* Super-user can enable or disable extent-based extended
2154 * attribute support on a volume
6d2010ae
A
2155 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2156 * are enabled by default, so any change will be transient only
2157 * till the volume is remounted.
2d21ac55 2158 */
39236c6e 2159 if (!kauth_cred_issuser(kauth_cred_get())) {
2d21ac55
A
2160 return (EPERM);
2161 }
2162 if (state == 0 || state == 1)
2163 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
91447636
A
2164 else
2165 return (EINVAL);
2166 }
2167
316670eb
A
2168 case F_SETSTATICCONTENT: {
2169 int error;
2170 int enable_static = 0;
2171 struct cnode *cp = NULL;
2172 /*
2173 * lock the cnode, decorate the cnode flag, and bail out.
2174 * VFS should have already authenticated the caller for us.
2175 */
2176
2177 if (ap->a_data) {
2178 /*
2179 * Note that even though ap->a_data is of type caddr_t,
2180 * the fcntl layer at the syscall handler will pass in NULL
2181 * or 1 depending on what the argument supplied to the fcntl
2182 * was. So it is in fact correct to check the ap->a_data
2183 * argument for zero or non-zero value when deciding whether or not
2184 * to enable the static bit in the cnode.
2185 */
2186 enable_static = 1;
2187 }
2188 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2189 return EROFS;
2190 }
2191 cp = VTOC(vp);
2192
39236c6e 2193 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
316670eb
A
2194 if (error == 0) {
2195 if (enable_static) {
2196 cp->c_flag |= C_SSD_STATIC;
2197 }
2198 else {
2199 cp->c_flag &= ~C_SSD_STATIC;
2200 }
2201 hfs_unlock (cp);
2202 }
2203 return error;
2204 }
2205
39236c6e
A
2206 case F_SET_GREEDY_MODE: {
2207 int error;
2208 int enable_greedy_mode = 0;
2209 struct cnode *cp = NULL;
2210 /*
2211 * lock the cnode, decorate the cnode flag, and bail out.
2212 * VFS should have already authenticated the caller for us.
2213 */
2214
2215 if (ap->a_data) {
2216 /*
2217 * Note that even though ap->a_data is of type caddr_t,
2218 * the fcntl layer at the syscall handler will pass in NULL
2219 * or 1 depending on what the argument supplied to the fcntl
2220 * was. So it is in fact correct to check the ap->a_data
2221 * argument for zero or non-zero value when deciding whether or not
2222 * to enable the greedy mode bit in the cnode.
2223 */
2224 enable_greedy_mode = 1;
2225 }
2226 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2227 return EROFS;
2228 }
2229 cp = VTOC(vp);
2230
2231 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2232 if (error == 0) {
2233 if (enable_greedy_mode) {
2234 cp->c_flag |= C_SSD_GREEDY_MODE;
2235 }
2236 else {
2237 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2238 }
2239 hfs_unlock (cp);
2240 }
2241 return error;
2242 }
2243
fe8ab488
A
2244 case F_SETIOTYPE: {
2245 int error;
2246 uint32_t iotypeflag = 0;
2247
2248 struct cnode *cp = NULL;
2249 /*
2250 * lock the cnode, decorate the cnode flag, and bail out.
2251 * VFS should have already authenticated the caller for us.
2252 */
2253
2254 if (ap->a_data == NULL) {
2255 return EINVAL;
2256 }
2257
2258 /*
2259 * Note that even though ap->a_data is of type caddr_t, we
2260 * can only use 32 bits of flag values.
2261 */
2262 iotypeflag = (uint32_t) ap->a_data;
2263 switch (iotypeflag) {
2264 case F_IOTYPE_ISOCHRONOUS:
2265 break;
2266 default:
2267 return EINVAL;
2268 }
2269
2270
2271 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2272 return EROFS;
2273 }
2274 cp = VTOC(vp);
2275
2276 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2277 if (error == 0) {
2278 switch (iotypeflag) {
2279 case F_IOTYPE_ISOCHRONOUS:
2280 cp->c_flag |= C_IO_ISOCHRONOUS;
2281 break;
2282 default:
2283 break;
2284 }
2285 hfs_unlock (cp);
2286 }
2287 return error;
2288 }
2289
39236c6e
A
2290 case F_MAKECOMPRESSED: {
2291 int error = 0;
2292 uint32_t gen_counter;
2293 struct cnode *cp = NULL;
2294 int reset_decmp = 0;
2295
2296 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2297 return EROFS;
2298 }
2299
2300 /*
2301 * acquire & lock the cnode.
2302 * VFS should have already authenticated the caller for us.
2303 */
2304
2305 if (ap->a_data) {
2306 /*
2307 * Cast the pointer into a uint32_t so we can extract the
2308 * supplied generation counter.
2309 */
2310 gen_counter = *((uint32_t*)ap->a_data);
2311 }
2312 else {
2313 return EINVAL;
2314 }
2315
2316#if HFS_COMPRESSION
2317 cp = VTOC(vp);
2318 /* Grab truncate lock first; we may truncate the file */
2319 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2320
2321 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2322 if (error) {
2323 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2324 return error;
2325 }
fe8ab488 2326
39236c6e
A
2327 /* Are there any other usecounts/FDs? */
2328 if (vnode_isinuse(vp, 1)) {
2329 hfs_unlock(cp);
2330 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2331 return EBUSY;
2332 }
2333
39236c6e
A
2334 /* now we have the cnode locked down; Validate arguments */
2335 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2336 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2337 hfs_unlock(cp);
2338 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2339 return EINVAL;
2340 }
2341
2342 if ((hfs_get_gencount (cp)) == gen_counter) {
2343 /*
2344 * OK, the gen_counter matched. Go for it:
2345 * Toggle state bits, truncate file, and suppress mtime update
2346 */
2347 reset_decmp = 1;
2348 cp->c_bsdflags |= UF_COMPRESSED;
fe8ab488
A
2349
2350 error = hfs_truncate(vp, 0, IO_NDELAY, HFS_TRUNCATE_SKIPTIMES,
2351 ap->a_context);
39236c6e
A
2352 }
2353 else {
2354 error = ESTALE;
2355 }
2356
2357 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2358 hfs_unlock(cp);
2359
2360 /*
2361 * Reset the decmp state while still holding the truncate lock. We need to
2362 * serialize here against a listxattr on this node which may occur at any
2363 * time.
2364 *
2365 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2366 * that will still potentially require getting the com.apple.decmpfs EA. If the
2367 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2368 * generic(through VFS), and can't pass along any info telling it that we're already
2369 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2370 * and trying to fill in the hfs_file_is_compressed info during the callback
2371 * operation, which will result in deadlock against the b-tree node.
2372 *
2373 * So, to serialize against listxattr (which will grab buf_t meta references on
2374 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2375 * decmpfs payload.
2376 */
2377 if ((reset_decmp) && (error == 0)) {
2378 decmpfs_cnode *dp = VTOCMP (vp);
2379 if (dp != NULL) {
2380 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2381 }
2382
2383 /* Initialize the decmpfs node as needed */
2384 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2385 }
2386
2387 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2388
2389#endif
2390 return error;
2391 }
2392
316670eb
A
2393 case F_SETBACKINGSTORE: {
2394
2395 int error = 0;
2396
2397 /*
2398 * See comment in F_SETSTATICCONTENT re: using
2399 * a null check for a_data
2400 */
2401 if (ap->a_data) {
2402 error = hfs_set_backingstore (vp, 1);
2403 }
2404 else {
2405 error = hfs_set_backingstore (vp, 0);
2406 }
2407
2408 return error;
2409 }
2410
2411 case F_GETPATH_MTMINFO: {
2412 int error = 0;
2413
2414 int *data = (int*) ap->a_data;
2415
2416 /* Ask if this is a backingstore vnode */
2417 error = hfs_is_backingstore (vp, data);
2418
2419 return error;
2420 }
2421
91447636 2422 case F_FULLFSYNC: {
55e303ae 2423 int error;
b0d623f7
A
2424
2425 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2426 return (EROFS);
2427 }
39236c6e 2428 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
91447636 2429 if (error == 0) {
2d21ac55 2430 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
91447636
A
2431 hfs_unlock(VTOC(vp));
2432 }
55e303ae
A
2433
2434 return error;
2435 }
91447636
A
2436
2437 case F_CHKCLEAN: {
9bccf70c 2438 register struct cnode *cp;
55e303ae
A
2439 int error;
2440
91447636 2441 if (!vnode_isreg(vp))
55e303ae
A
2442 return EINVAL;
2443
39236c6e 2444 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
91447636
A
2445 if (error == 0) {
2446 cp = VTOC(vp);
2447 /*
2448 * used by regression test to determine if
2449 * all the dirty pages (via write) have been cleaned
2450 * after a call to 'fsysnc'.
2451 */
2452 error = is_file_clean(vp, VTOF(vp)->ff_size);
2453 hfs_unlock(cp);
2454 }
55e303ae
A
2455 return (error);
2456 }
2457
91447636 2458 case F_RDADVISE: {
9bccf70c
A
2459 register struct radvisory *ra;
2460 struct filefork *fp;
9bccf70c
A
2461 int error;
2462
91447636 2463 if (!vnode_isreg(vp))
9bccf70c
A
2464 return EINVAL;
2465
9bccf70c 2466 ra = (struct radvisory *)(ap->a_data);
9bccf70c
A
2467 fp = VTOF(vp);
2468
91447636 2469 /* Protect against a size change. */
39236c6e 2470 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
91447636 2471
b0d623f7
A
2472#if HFS_COMPRESSION
2473 if (compressed && (uncompressed_size == -1)) {
2474 /* fetching the uncompressed size failed above, so return the error */
2475 error = decmpfs_error;
2476 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
2477 (!compressed && (ra->ra_offset >= fp->ff_size))) {
2478 error = EFBIG;
2479 }
2480#else /* HFS_COMPRESSION */
9bccf70c 2481 if (ra->ra_offset >= fp->ff_size) {
91447636 2482 error = EFBIG;
b0d623f7
A
2483 }
2484#endif /* HFS_COMPRESSION */
2485 else {
91447636 2486 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
9bccf70c 2487 }
1c79356b 2488
39236c6e 2489 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
9bccf70c 2490 return (error);
1c79356b 2491 }
1c79356b 2492
91447636
A
2493 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
2494 {
2495 if (is64bit) {
2496 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2497 }
2498 else {
b0d623f7 2499 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
91447636
A
2500 }
2501 return 0;
2502 }
2503
b0d623f7
A
2504 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2505 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2506 break;
2507
2508 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2509 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2510 break;
2511
316670eb
A
2512 case HFS_FSCTL_GET_VERY_LOW_DISK:
2513 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2514 break;
2515
b0d623f7
A
2516 case HFS_FSCTL_SET_VERY_LOW_DISK:
2517 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2518 return EINVAL;
e2fac8b1 2519 }
91447636 2520
b0d623f7
A
2521 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2522 break;
2523
316670eb
A
2524 case HFS_FSCTL_GET_LOW_DISK:
2525 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2526 break;
2527
b0d623f7
A
2528 case HFS_FSCTL_SET_LOW_DISK:
2529 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2530 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2531
2532 return EINVAL;
e2fac8b1 2533 }
b0d623f7
A
2534
2535 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2536 break;
2537
316670eb
A
2538 case HFS_FSCTL_GET_DESIRED_DISK:
2539 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2540 break;
2541
b0d623f7
A
2542 case HFS_FSCTL_SET_DESIRED_DISK:
2543 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2544 return EINVAL;
2545 }
2546
2547 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2548 break;
2549
2550 case HFS_VOLUME_STATUS:
2551 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2552 break;
91447636
A
2553
2554 case HFS_SET_BOOT_INFO:
2555 if (!vnode_isvroot(vp))
2556 return(EINVAL);
2557 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2558 return(EACCES); /* must be superuser or owner of filesystem */
b0d623f7
A
2559 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2560 return (EROFS);
2561 }
39236c6e 2562 hfs_lock_mount (hfsmp);
91447636 2563 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
39236c6e 2564 hfs_unlock_mount (hfsmp);
91447636
A
2565 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2566 break;
2567
2568 case HFS_GET_BOOT_INFO:
2569 if (!vnode_isvroot(vp))
2570 return(EINVAL);
39236c6e 2571 hfs_lock_mount (hfsmp);
91447636 2572 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
39236c6e 2573 hfs_unlock_mount(hfsmp);
91447636
A
2574 break;
2575
2d21ac55
A
2576 case HFS_MARK_BOOT_CORRUPT:
2577 /* Mark the boot volume corrupt by setting
2578 * kHFSVolumeInconsistentBit in the volume header. This will
2579 * force fsck_hfs on next mount.
2580 */
39236c6e 2581 if (!kauth_cred_issuser(kauth_cred_get())) {
2d21ac55
A
2582 return EACCES;
2583 }
b0d623f7 2584
2d21ac55
A
2585 /* Allowed only on the root vnode of the boot volume */
2586 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2587 !vnode_isvroot(vp)) {
2588 return EINVAL;
2589 }
b0d623f7
A
2590 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2591 return (EROFS);
2592 }
2d21ac55 2593 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
fe8ab488 2594 hfs_mark_inconsistent(hfsmp, HFS_FSCK_FORCED);
2d21ac55
A
2595 break;
2596
b0d623f7
A
2597 case HFS_FSCTL_GET_JOURNAL_INFO:
2598 jip = (struct hfs_journal_info*)ap->a_data;
2599
2600 if (vp == NULLVP)
2601 return EINVAL;
2602
2603 if (hfsmp->jnl == NULL) {
2604 jnl_start = 0;
2605 jnl_size = 0;
2606 } else {
2607 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2608 jnl_size = (off_t)hfsmp->jnl_size;
2609 }
2610
2611 jip->jstart = jnl_start;
2612 jip->jsize = jnl_size;
2613 break;
2614
2615 case HFS_SET_ALWAYS_ZEROFILL: {
2616 struct cnode *cp = VTOC(vp);
2617
2618 if (*(int *)ap->a_data) {
2619 cp->c_flag |= C_ALWAYS_ZEROFILL;
2620 } else {
2621 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2622 }
2623 break;
2624 }
2625
6d2010ae
A
2626 case HFS_DISABLE_METAZONE: {
2627 /* Only root can disable metadata zone */
39236c6e 2628 if (!kauth_cred_issuser(kauth_cred_get())) {
6d2010ae
A
2629 return EACCES;
2630 }
2631 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2632 return (EROFS);
2633 }
2634
2635 /* Disable metadata zone now */
2636 (void) hfs_metadatazone_init(hfsmp, true);
2637 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2638 break;
2639 }
fe8ab488
A
2640
2641
2642 case HFS_FSINFO_METADATA_BLOCKS: {
2643 int error;
2644 struct hfsinfo_metadata *hinfo;
2645
2646 hinfo = (struct hfsinfo_metadata *)ap->a_data;
2647
2648 /* Get information about number of metadata blocks */
2649 error = hfs_getinfo_metadata_blocks(hfsmp, hinfo);
2650 if (error) {
2651 return error;
2652 }
2653
2654 break;
2655 }
2656
04b8595b
A
2657 case HFS_GET_FSINFO: {
2658 hfs_fsinfo *fsinfo = (hfs_fsinfo *)ap->a_data;
2659
2660 /* Only root is allowed to get fsinfo */
2661 if (!kauth_cred_issuser(kauth_cred_get())) {
2662 return EACCES;
2663 }
2664
2665 /*
2666 * Make sure that the caller's version number matches with
2667 * the kernel's version number. This will make sure that
2668 * if the structures being read/written into are changed
2669 * by the kernel, the caller will not read incorrect data.
2670 *
2671 * The first three fields --- request_type, version and
2672 * flags are same for all the hfs_fsinfo structures, so
2673 * we can access the version number by assuming any
2674 * structure for now.
2675 */
2676 if (fsinfo->header.version != HFS_FSINFO_VERSION) {
2677 return ENOTSUP;
2678 }
2679
2680 /* Make sure that the current file system is not marked inconsistent */
2681 if (hfsmp->vcbAtrb & kHFSVolumeInconsistentMask) {
2682 return EIO;
2683 }
2684
2685 return hfs_get_fsinfo(hfsmp, ap->a_data);
2686 }
2687
fe8ab488
A
2688 case HFS_CS_FREESPACE_TRIM: {
2689 int error = 0;
2690 int lockflags = 0;
2691
2692 /* Only root allowed */
2693 if (!kauth_cred_issuser(kauth_cred_get())) {
2694 return EACCES;
2695 }
2696
2697 /*
2698 * This core functionality is similar to hfs_scan_blocks().
2699 * The main difference is that hfs_scan_blocks() is called
2700 * as part of mount where we are assured that the journal is
2701 * empty to start with. This fcntl() can be called on a
2702 * mounted volume, therefore it has to flush the content of
2703 * the journal as well as ensure the state of summary table.
2704 *
2705 * This fcntl scans over the entire allocation bitmap,
2706 * creates list of all the free blocks, and issues TRIM
2707 * down to the underlying device. This can take long time
2708 * as it can generate up to 512MB of read I/O.
2709 */
2710
2711 if ((hfsmp->hfs_flags & HFS_SUMMARY_TABLE) == 0) {
2712 error = hfs_init_summary(hfsmp);
2713 if (error) {
2714 printf("hfs: fsctl() could not initialize summary table for %s\n", hfsmp->vcbVN);
2715 return error;
2716 }
2717 }
2718
2719 /*
2720 * The journal maintains list of recently deallocated blocks to
2721 * issue DKIOCUNMAPs when the corresponding journal transaction is
2722 * flushed to the disk. To avoid any race conditions, we only
2723 * want one active trim list and only one thread issuing DKIOCUNMAPs.
2724 * Therefore we make sure that the journal trim list is sync'ed,
2725 * empty, and not modifiable for the duration of our scan.
2726 *
2727 * Take the journal lock before flushing the journal to the disk.
2728 * We will keep on holding the journal lock till we don't get the
2729 * bitmap lock to make sure that no new journal transactions can
2730 * start. This will make sure that the journal trim list is not
2731 * modified after the journal flush and before getting bitmap lock.
2732 * We can release the journal lock after we acquire the bitmap
2733 * lock as it will prevent any further block deallocations.
2734 */
2735 hfs_journal_lock(hfsmp);
2736
2737 /* Flush the journal and wait for all I/Os to finish up */
2738 error = hfs_journal_flush(hfsmp, TRUE);
2739 if (error) {
2740 hfs_journal_unlock(hfsmp);
2741 return error;
2742 }
2743
2744 /* Take bitmap lock to ensure it is not being modified */
2745 lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2746
2747 /* Release the journal lock */
2748 hfs_journal_unlock(hfsmp);
2749
2750 /*
2751 * ScanUnmapBlocks reads the bitmap in large block size
2752 * (up to 1MB) unlike the runtime which reads the bitmap
2753 * in the 4K block size. This can cause buf_t collisions
2754 * and potential data corruption. To avoid this, we
2755 * invalidate all the existing buffers associated with
2756 * the bitmap vnode before scanning it.
2757 *
2758 * Note: ScanUnmapBlock() cleans up all the buffers
2759 * after itself, so there won't be any large buffers left
2760 * for us to clean up after it returns.
2761 */
2762 error = buf_invalidateblks(hfsmp->hfs_allocation_vp, 0, 0, 0);
2763 if (error) {
2764 hfs_systemfile_unlock(hfsmp, lockflags);
2765 return error;
2766 }
2767
2768 /* Traverse bitmap and issue DKIOCUNMAPs */
2769 error = ScanUnmapBlocks(hfsmp);
2770 hfs_systemfile_unlock(hfsmp, lockflags);
2771 if (error) {
2772 return error;
2773 }
2774
2775 break;
2776 }
2777
91447636
A
2778 default:
2779 return (ENOTTY);
2780 }
1c79356b 2781
0b4e3aa0 2782 return 0;
1c79356b
A
2783}
2784
91447636
A
2785/*
2786 * select
2787 */
1c79356b 2788int
91447636
A
2789hfs_vnop_select(__unused struct vnop_select_args *ap)
2790/*
2791 struct vnop_select_args {
2792 vnode_t a_vp;
9bccf70c
A
2793 int a_which;
2794 int a_fflags;
9bccf70c 2795 void *a_wql;
91447636
A
2796 vfs_context_t a_context;
2797 };
2798*/
1c79356b 2799{
9bccf70c
A
2800 /*
2801 * We should really check to see if I/O is possible.
2802 */
2803 return (1);
1c79356b
A
2804}
2805
1c79356b
A
2806/*
2807 * Converts a logical block number to a physical block, and optionally returns
2808 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2809 * The physical block number is based on the device block size, currently its 512.
2810 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2811 */
1c79356b 2812int
2d21ac55 2813hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1c79356b 2814{
9bccf70c
A
2815 struct filefork *fp = VTOF(vp);
2816 struct hfsmount *hfsmp = VTOHFS(vp);
91447636 2817 int retval = E_NONE;
2d21ac55 2818 u_int32_t logBlockSize;
91447636
A
2819 size_t bytesContAvail = 0;
2820 off_t blockposition;
2821 int lockExtBtree;
2822 int lockflags = 0;
1c79356b 2823
9bccf70c
A
2824 /*
2825 * Check for underlying vnode requests and ensure that logical
2826 * to physical mapping is requested.
2827 */
91447636 2828 if (vpp != NULL)
2d21ac55 2829 *vpp = hfsmp->hfs_devvp;
91447636 2830 if (bnp == NULL)
9bccf70c
A
2831 return (0);
2832
9bccf70c 2833 logBlockSize = GetLogicalBlockSize(vp);
2d21ac55 2834 blockposition = (off_t)bn * logBlockSize;
9bccf70c
A
2835
2836 lockExtBtree = overflow_extents(fp);
91447636
A
2837
2838 if (lockExtBtree)
2d21ac55 2839 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1c79356b 2840
9bccf70c 2841 retval = MacToVFSError(
0b4e3aa0 2842 MapFileBlockC (HFSTOVCB(hfsmp),
9bccf70c 2843 (FCB*)fp,
0b4e3aa0
A
2844 MAXPHYSIO,
2845 blockposition,
91447636 2846 bnp,
0b4e3aa0 2847 &bytesContAvail));
1c79356b 2848
91447636
A
2849 if (lockExtBtree)
2850 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 2851
91447636
A
2852 if (retval == E_NONE) {
2853 /* Figure out how many read ahead blocks there are */
2854 if (runp != NULL) {
2855 if (can_cluster(logBlockSize)) {
2856 /* Make sure this result never goes negative: */
2857 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2858 } else {
2859 *runp = 0;
2860 }
2861 }
2862 }
2863 return (retval);
2864}
1c79356b 2865
91447636
A
2866/*
2867 * Convert logical block number to file offset.
2868 */
1c79356b 2869int
91447636
A
2870hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2871/*
2872 struct vnop_blktooff_args {
2873 vnode_t a_vp;
2874 daddr64_t a_lblkno;
9bccf70c 2875 off_t *a_offset;
91447636
A
2876 };
2877*/
1c79356b
A
2878{
2879 if (ap->a_vp == NULL)
2880 return (EINVAL);
91447636 2881 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1c79356b
A
2882
2883 return(0);
2884}
2885
91447636
A
2886/*
2887 * Convert file offset to logical block number.
2888 */
1c79356b 2889int
91447636
A
2890hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2891/*
2892 struct vnop_offtoblk_args {
2893 vnode_t a_vp;
9bccf70c 2894 off_t a_offset;
91447636
A
2895 daddr64_t *a_lblkno;
2896 };
2897*/
1c79356b 2898{
1c79356b
A
2899 if (ap->a_vp == NULL)
2900 return (EINVAL);
91447636 2901 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1c79356b
A
2902
2903 return(0);
2904}
2905
91447636
A
2906/*
2907 * Map file offset to physical block number.
2908 *
2d21ac55
A
2909 * If this function is called for write operation, and if the file
2910 * had virtual blocks allocated (delayed allocation), real blocks
2911 * are allocated by calling ExtendFileC().
2912 *
2913 * If this function is called for read operation, and if the file
2914 * had virtual blocks allocated (delayed allocation), no change
2915 * to the size of file is done, and if required, rangelist is
2916 * searched for mapping.
2917 *
91447636
A
2918 * System file cnodes are expected to be locked (shared or exclusive).
2919 */
1c79356b 2920int
91447636
A
2921hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2922/*
2923 struct vnop_blockmap_args {
2924 vnode_t a_vp;
9bccf70c
A
2925 off_t a_foffset;
2926 size_t a_size;
91447636 2927 daddr64_t *a_bpn;
9bccf70c
A
2928 size_t *a_run;
2929 void *a_poff;
91447636
A
2930 int a_flags;
2931 vfs_context_t a_context;
2932 };
2933*/
1c79356b 2934{
91447636
A
2935 struct vnode *vp = ap->a_vp;
2936 struct cnode *cp;
2937 struct filefork *fp;
2938 struct hfsmount *hfsmp;
2939 size_t bytesContAvail = 0;
2940 int retval = E_NONE;
2941 int syslocks = 0;
2942 int lockflags = 0;
2943 struct rl_entry *invalid_range;
2944 enum rl_overlaptype overlaptype;
2945 int started_tr = 0;
2946 int tooklock = 0;
1c79356b 2947
b0d623f7
A
2948#if HFS_COMPRESSION
2949 if (VNODE_IS_RSRC(vp)) {
2950 /* allow blockmaps to the resource fork */
2951 } else {
2952 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2953 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2954 switch(state) {
2955 case FILE_IS_COMPRESSED:
2956 return ENOTSUP;
2957 case FILE_IS_CONVERTING:
2958 /* if FILE_IS_CONVERTING, we allow blockmap */
2959 break;
2960 default:
2961 printf("invalid state %d for compressed file\n", state);
2962 /* fall through */
2963 }
2964 }
2965 }
2966#endif /* HFS_COMPRESSION */
2967
3a60a9f5
A
2968 /* Do not allow blockmap operation on a directory */
2969 if (vnode_isdir(vp)) {
2970 return (ENOTSUP);
2971 }
2972
9bccf70c
A
2973 /*
2974 * Check for underlying vnode requests and ensure that logical
2975 * to physical mapping is requested.
2976 */
2977 if (ap->a_bpn == NULL)
2978 return (0);
2979
2d21ac55 2980 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
91447636 2981 if (VTOC(vp)->c_lockowner != current_thread()) {
39236c6e 2982 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636 2983 tooklock = 1;
91447636
A
2984 }
2985 }
2986 hfsmp = VTOHFS(vp);
2987 cp = VTOC(vp);
2988 fp = VTOF(vp);
55e303ae 2989
91447636 2990retry:
2d21ac55
A
2991 /* Check virtual blocks only when performing write operation */
2992 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
91447636
A
2993 if (hfs_start_transaction(hfsmp) != 0) {
2994 retval = EINVAL;
2995 goto exit;
2996 } else {
2997 started_tr = 1;
b4c24cb9 2998 }
91447636
A
2999 syslocks = SFL_EXTENTS | SFL_BITMAP;
3000
b4c24cb9 3001 } else if (overflow_extents(fp)) {
91447636 3002 syslocks = SFL_EXTENTS;
9bccf70c 3003 }
91447636
A
3004
3005 if (syslocks)
3006 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1c79356b 3007
9bccf70c
A
3008 /*
3009 * Check for any delayed allocations.
3010 */
2d21ac55
A
3011 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
3012 int64_t actbytes;
91447636 3013 u_int32_t loanedBlocks;
1c79356b 3014
55e303ae 3015 //
d12e1678
A
3016 // Make sure we have a transaction. It's possible
3017 // that we came in and fp->ff_unallocblocks was zero
3018 // but during the time we blocked acquiring the extents
3019 // btree, ff_unallocblocks became non-zero and so we
3020 // will need to start a transaction.
3021 //
91447636
A
3022 if (started_tr == 0) {
3023 if (syslocks) {
3024 hfs_systemfile_unlock(hfsmp, lockflags);
3025 syslocks = 0;
3026 }
3027 goto retry;
d12e1678
A
3028 }
3029
9bccf70c 3030 /*
91447636
A
3031 * Note: ExtendFileC will Release any blocks on loan and
3032 * aquire real blocks. So we ask to extend by zero bytes
3033 * since ExtendFileC will account for the virtual blocks.
9bccf70c 3034 */
9bccf70c 3035
91447636
A
3036 loanedBlocks = fp->ff_unallocblocks;
3037 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
3038 kEFAllMask | kEFNoClumpMask, &actbytes);
3039
3040 if (retval) {
3041 fp->ff_unallocblocks = loanedBlocks;
3042 cp->c_blocks += loanedBlocks;
3043 fp->ff_blocks += loanedBlocks;
3044
39236c6e 3045 hfs_lock_mount (hfsmp);
91447636 3046 hfsmp->loanedBlocks += loanedBlocks;
39236c6e 3047 hfs_unlock_mount (hfsmp);
1c79356b 3048
91447636
A
3049 hfs_systemfile_unlock(hfsmp, lockflags);
3050 cp->c_flag |= C_MODIFIED;
b4c24cb9 3051 if (started_tr) {
91447636
A
3052 (void) hfs_update(vp, TRUE);
3053 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
55e303ae 3054
91447636 3055 hfs_end_transaction(hfsmp);
2d21ac55 3056 started_tr = 0;
b4c24cb9 3057 }
91447636 3058 goto exit;
b4c24cb9 3059 }
9bccf70c
A
3060 }
3061
91447636
A
3062 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
3063 ap->a_bpn, &bytesContAvail);
3064 if (syslocks) {
3065 hfs_systemfile_unlock(hfsmp, lockflags);
3066 syslocks = 0;
3067 }
1c79356b 3068
b4c24cb9 3069 if (started_tr) {
91447636
A
3070 (void) hfs_update(vp, TRUE);
3071 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3072 hfs_end_transaction(hfsmp);
b4c24cb9 3073 started_tr = 0;
91447636
A
3074 }
3075 if (retval) {
2d21ac55
A
3076 /* On write, always return error because virtual blocks, if any,
3077 * should have been allocated in ExtendFileC(). We do not
3078 * allocate virtual blocks on read, therefore return error
3079 * only if no virtual blocks are allocated. Otherwise we search
3080 * rangelist for zero-fills
3081 */
3082 if ((MacToVFSError(retval) != ERANGE) ||
3083 (ap->a_flags & VNODE_WRITE) ||
3084 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
3085 goto exit;
3086 }
3087
3088 /* Validate if the start offset is within logical file size */
316670eb 3089 if (ap->a_foffset >= fp->ff_size) {
39236c6e 3090 goto exit;
2d21ac55
A
3091 }
3092
316670eb
A
3093 /*
3094 * At this point, we have encountered a failure during
3095 * MapFileBlockC that resulted in ERANGE, and we are not servicing
3096 * a write, and there are borrowed blocks.
3097 *
3098 * However, the cluster layer will not call blockmap for
3099 * blocks that are borrowed and in-cache. We have to assume that
3100 * because we observed ERANGE being emitted from MapFileBlockC, this
3101 * extent range is not valid on-disk. So we treat this as a
3102 * mapping that needs to be zero-filled prior to reading.
3103 *
3104 * Note that under certain circumstances (such as non-contiguous
3105 * userland VM mappings in the calling process), cluster_io
3106 * may be forced to split a large I/O driven by hfs_vnop_write
3107 * into multiple sub-I/Os that necessitate a RMW cycle. If this is
3108 * the case here, then we have already removed the invalid range list
3109 * mapping prior to getting to this blockmap call, so we should not
3110 * search the invalid rangelist for this byte range.
2d21ac55 3111 */
316670eb
A
3112
3113 bytesContAvail = fp->ff_size - ap->a_foffset;
3114 /*
3115 * Clip the contiguous available bytes to, at most, the allowable
3116 * maximum or the amount requested.
3117 */
3118
3119 if (bytesContAvail > ap->a_size) {
3120 bytesContAvail = ap->a_size;
2d21ac55 3121 }
316670eb
A
3122
3123 *ap->a_bpn = (daddr64_t) -1;
3124 retval = 0;
3125
91447636
A
3126 goto exit;
3127 }
1c79356b 3128
2d21ac55
A
3129 /* MapFileC() found a valid extent in the filefork. Search the
3130 * mapping information further for invalid file ranges
3131 */
91447636
A
3132 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
3133 ap->a_foffset + (off_t)bytesContAvail - 1,
3134 &invalid_range);
3135 if (overlaptype != RL_NOOVERLAP) {
3136 switch(overlaptype) {
3137 case RL_MATCHINGOVERLAP:
3138 case RL_OVERLAPCONTAINSRANGE:
3139 case RL_OVERLAPSTARTSBEFORE:
2d21ac55 3140 /* There's no valid block for this byte offset */
91447636
A
3141 *ap->a_bpn = (daddr64_t)-1;
3142 /* There's no point limiting the amount to be returned
3143 * if the invalid range that was hit extends all the way
3144 * to the EOF (i.e. there's no valid bytes between the
3145 * end of this range and the file's EOF):
3146 */
3147 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 3148 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
3149 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3150 }
3151 break;
9bccf70c 3152
91447636
A
3153 case RL_OVERLAPISCONTAINED:
3154 case RL_OVERLAPENDSAFTER:
3155 /* The range of interest hits an invalid block before the end: */
3156 if (invalid_range->rl_start == ap->a_foffset) {
3157 /* There's actually no valid information to be had starting here: */
3158 *ap->a_bpn = (daddr64_t)-1;
3159 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 3160 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
3161 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
3162 }
3163 } else {
3164 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
3165 }
9bccf70c 3166 break;
1c79356b 3167
91447636 3168 case RL_NOOVERLAP:
9bccf70c 3169 break;
91447636
A
3170 } /* end switch */
3171 if (bytesContAvail > ap->a_size)
3172 bytesContAvail = ap->a_size;
2d21ac55
A
3173 }
3174
3175exit:
3176 if (retval == 0) {
3177 if (ap->a_run)
3178 *ap->a_run = bytesContAvail;
3179
3180 if (ap->a_poff)
3181 *(int *)ap->a_poff = 0;
9bccf70c 3182 }
91447636 3183
91447636
A
3184 if (tooklock)
3185 hfs_unlock(cp);
3186
3187 return (MacToVFSError(retval));
1c79356b
A
3188}
3189
3190/*
91447636
A
3191 * prepare and issue the I/O
3192 * buf_strategy knows how to deal
3193 * with requests that require
3194 * fragmented I/Os
3195 */
1c79356b 3196int
91447636 3197hfs_vnop_strategy(struct vnop_strategy_args *ap)
1c79356b 3198{
91447636
A
3199 buf_t bp = ap->a_bp;
3200 vnode_t vp = buf_vnode(bp);
6d2010ae
A
3201 int error = 0;
3202
316670eb
A
3203 /* Mark buffer as containing static data if cnode flag set */
3204 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
3205 buf_markstatic(bp);
3206 }
3207
39236c6e
A
3208 /* Mark buffer as containing static data if cnode flag set */
3209 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
fe8ab488
A
3210 bufattr_markgreedymode(&bp->b_attr);
3211 }
3212
3213 /* mark buffer as containing burst mode data if cnode flag set */
3214 if (VTOC(vp)->c_flag & C_IO_ISOCHRONOUS) {
3215 bufattr_markisochronous(&bp->b_attr);
39236c6e
A
3216 }
3217
6d2010ae
A
3218#if CONFIG_PROTECT
3219 cnode_t *cp = NULL;
3220
fe8ab488
A
3221 if ((!bufattr_rawencrypted(&bp->b_attr)) &&
3222 ((cp = cp_get_protected_cnode(vp)) != NULL)) {
316670eb
A
3223 /*
3224 * We rely upon the truncate lock to protect the
3225 * CP cache key from getting tossed prior to our IO finishing here.
3226 * Nearly all cluster io calls to manipulate file payload from HFS
3227 * take the truncate lock before calling into the cluster
3228 * layer to ensure the file size does not change, or that they
3229 * have exclusive right to change the EOF of the file.
3230 * That same guarantee protects us here since the code that
3231 * deals with CP lock events must now take the truncate lock
3232 * before doing anything.
3233 *
3234 * There is 1 exception here:
3235 * 1) One exception should be the VM swapfile IO, because HFS will
3236 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
3237 * swapfile code only without holding the truncate lock. This is because
3238 * individual swapfiles are maintained at fixed-length sizes by the VM code.
3239 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
3240 * create our own UPL and thus take the truncate lock before calling
3241 * into the cluster layer. In that case, however, we are not concerned
3242 * with the CP blob being wiped out in the middle of the IO
3243 * because there isn't anything to toss; the VM swapfile key stays
3244 * in-core as long as the file is open.
fe8ab488
A
3245 */
3246
3247
3248 /*
3249 * Last chance: If this data protected I/O does not have unwrapped keys
3250 * present, then try to get them. We already know that it should, by this point.
3251 */
3252 if (cp->c_cpentry->cp_flags & (CP_KEY_FLUSHED | CP_NEEDS_KEYS)) {
3253 int io_op = ( (buf_flags(bp) & B_READ) ? CP_READ_ACCESS : CP_WRITE_ACCESS);
3254 if ((error = cp_handle_vnop(vp, io_op, 0)) != 0) {
3255 /*
3256 * We have to be careful here. By this point in the I/O path, VM or the cluster
3257 * engine has prepared a buf_t with the proper file offsets and all the rest,
3258 * so simply erroring out will result in us leaking this particular buf_t.
3259 * We need to properly decorate the buf_t just as buf_strategy would so as
3260 * to make it appear that the I/O errored out with the particular error code.
3261 */
3262 buf_seterror (bp, error);
3263 buf_biodone(bp);
3264 return error;
3265 }
3266 }
3267
3268 /*
3269 *NB:
316670eb
A
3270 * For filesystem resize, we may not have access to the underlying
3271 * file's cache key for whatever reason (device may be locked). However,
3272 * we do not need it since we are going to use the temporary HFS-wide resize key
3273 * which is generated once we start relocating file content. If this file's I/O
3274 * should be done using the resize key, it will have been supplied already, so
3275 * do not attach the file's cp blob to the buffer.
6d2010ae 3276 */
316670eb
A
3277 if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
3278 buf_setcpaddr(bp, cp->c_cpentry);
3279 }
6d2010ae
A
3280 }
3281#endif /* CONFIG_PROTECT */
3282
3283 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
6d2010ae
A
3284
3285 return error;
1c79356b
A
3286}
3287
b0d623f7
A
3288static int
3289hfs_minorupdate(struct vnode *vp) {
3290 struct cnode *cp = VTOC(vp);
3291 cp->c_flag &= ~C_MODIFIED;
3292 cp->c_touch_acctime = 0;
3293 cp->c_touch_chgtime = 0;
3294 cp->c_touch_modtime = 0;
3295
3296 return 0;
3297}
1c79356b 3298
6d2010ae 3299int
39236c6e 3300do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
1c79356b 3301{
9bccf70c
A
3302 register struct cnode *cp = VTOC(vp);
3303 struct filefork *fp = VTOF(vp);
91447636 3304 kauth_cred_t cred = vfs_context_ucred(context);
9bccf70c
A
3305 int retval;
3306 off_t bytesToAdd;
3307 off_t actualBytesAdded;
3308 off_t filebytes;
b0d623f7 3309 u_int32_t fileblocks;
9bccf70c 3310 int blksize;
b4c24cb9 3311 struct hfsmount *hfsmp;
91447636 3312 int lockflags;
39236c6e
A
3313 int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE);
3314 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
fe8ab488 3315
9bccf70c
A
3316 blksize = VTOVCB(vp)->blockSize;
3317 fileblocks = fp->ff_blocks;
3318 filebytes = (off_t)fileblocks * (off_t)blksize;
3319
fe8ab488 3320 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_START,
9bccf70c
A
3321 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3322
3323 if (length < 0)
3324 return (EINVAL);
1c79356b 3325
8f6c56a5
A
3326 /* This should only happen with a corrupt filesystem */
3327 if ((off_t)fp->ff_size < 0)
3328 return (EINVAL);
3329
9bccf70c
A
3330 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3331 return (EFBIG);
1c79356b 3332
b4c24cb9 3333 hfsmp = VTOHFS(vp);
1c79356b 3334
9bccf70c 3335 retval = E_NONE;
1c79356b 3336
55e303ae
A
3337 /* Files that are changing size are not hot file candidates. */
3338 if (hfsmp->hfc_stage == HFC_RECORDING) {
3339 fp->ff_bytesread = 0;
3340 }
3341
9bccf70c
A
3342 /*
3343 * We cannot just check if fp->ff_size == length (as an optimization)
3344 * since there may be extra physical blocks that also need truncation.
3345 */
3346#if QUOTA
91447636 3347 if ((retval = hfs_getinoquota(cp)))
9bccf70c
A
3348 return(retval);
3349#endif /* QUOTA */
1c79356b 3350
9bccf70c
A
3351 /*
3352 * Lengthen the size of the file. We must ensure that the
3353 * last byte of the file is allocated. Since the smallest
3354 * value of ff_size is 0, length will be at least 1.
3355 */
91447636 3356 if (length > (off_t)fp->ff_size) {
9bccf70c 3357#if QUOTA
b4c24cb9 3358 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
91447636 3359 cred, 0);
9bccf70c
A
3360 if (retval)
3361 goto Err_Exit;
3362#endif /* QUOTA */
3363 /*
3364 * If we don't have enough physical space then
3365 * we need to extend the physical size.
3366 */
3367 if (length > filebytes) {
3368 int eflags;
b0d623f7 3369 u_int32_t blockHint = 0;
1c79356b 3370
9bccf70c
A
3371 /* All or nothing and don't round up to clumpsize. */
3372 eflags = kEFAllMask | kEFNoClumpMask;
1c79356b 3373
fe8ab488 3374 if (cred && (suser(cred, NULL) != 0)) {
9bccf70c 3375 eflags |= kEFReserveMask; /* keep a reserve */
fe8ab488 3376 }
1c79356b 3377
55e303ae
A
3378 /*
3379 * Allocate Journal and Quota files in metadata zone.
3380 */
3381 if (filebytes == 0 &&
3382 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3383 hfs_virtualmetafile(cp)) {
3384 eflags |= kEFMetadataMask;
3385 blockHint = hfsmp->hfs_metazone_start;
3386 }
91447636
A
3387 if (hfs_start_transaction(hfsmp) != 0) {
3388 retval = EINVAL;
3389 goto Err_Exit;
b4c24cb9
A
3390 }
3391
91447636
A
3392 /* Protect extents b-tree and allocation bitmap */
3393 lockflags = SFL_BITMAP;
3394 if (overflow_extents(fp))
3395 lockflags |= SFL_EXTENTS;
3396 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1c79356b 3397
fe8ab488
A
3398 /*
3399 * Keep growing the file as long as the current EOF is
3400 * less than the desired value.
3401 */
9bccf70c
A
3402 while ((length > filebytes) && (retval == E_NONE)) {
3403 bytesToAdd = length - filebytes;
3404 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3405 (FCB*)fp,
1c79356b 3406 bytesToAdd,
55e303ae 3407 blockHint,
9bccf70c 3408 eflags,
1c79356b
A
3409 &actualBytesAdded));
3410
9bccf70c
A
3411 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3412 if (actualBytesAdded == 0 && retval == E_NONE) {
3413 if (length > filebytes)
3414 length = filebytes;
3415 break;
3416 }
3417 } /* endwhile */
b4c24cb9 3418
91447636 3419 hfs_systemfile_unlock(hfsmp, lockflags);
b4c24cb9 3420
b4c24cb9 3421 if (hfsmp->jnl) {
b0d623f7
A
3422 if (skipupdate) {
3423 (void) hfs_minorupdate(vp);
3424 }
39236c6e 3425 else {
b0d623f7
A
3426 (void) hfs_update(vp, TRUE);
3427 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3428 }
91447636 3429 }
55e303ae 3430
91447636 3431 hfs_end_transaction(hfsmp);
b4c24cb9 3432
9bccf70c
A
3433 if (retval)
3434 goto Err_Exit;
3435
fe8ab488 3436 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
9bccf70c 3437 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1c79356b 3438 }
1c79356b 3439
fe8ab488
A
3440 if (ISSET(flags, IO_NOZEROFILL)) {
3441 // An optimisation for the hibernation file
3442 if (vnode_isswap(vp))
3443 rl_remove_all(&fp->ff_invalidranges);
3444 } else {
2d21ac55 3445 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
9bccf70c 3446 struct rl_entry *invalid_range;
9bccf70c 3447 off_t zero_limit;
0b4e3aa0 3448
9bccf70c
A
3449 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
3450 if (length < zero_limit) zero_limit = length;
3451
91447636
A
3452 if (length > (off_t)fp->ff_size) {
3453 struct timeval tv;
3454
9bccf70c
A
3455 /* Extending the file: time to fill out the current last page w. zeroes? */
3456 if ((fp->ff_size & PAGE_MASK_64) &&
3457 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
3458 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
0b4e3aa0
A
3459
3460 /* There's some valid data at the start of the (current) last page
3461 of the file, so zero out the remainder of that page to ensure the
3462 entire page contains valid data. Since there is no invalid range
3463 possible past the (current) eof, there's no need to remove anything
91447636
A
3464 from the invalid range list before calling cluster_write(): */
3465 hfs_unlock(cp);
9bccf70c 3466 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
91447636
A
3467 fp->ff_size, (off_t)0,
3468 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
39236c6e 3469 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
0b4e3aa0
A
3470 if (retval) goto Err_Exit;
3471
3472 /* Merely invalidate the remaining area, if necessary: */
9bccf70c 3473 if (length > zero_limit) {
91447636 3474 microuptime(&tv);
9bccf70c 3475 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
91447636 3476 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
3477 }
3478 } else {
0b4e3aa0
A
3479 /* The page containing the (current) eof is invalid: just add the
3480 remainder of the page to the invalid list, along with the area
3481 being newly allocated:
3482 */
91447636 3483 microuptime(&tv);
9bccf70c 3484 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
91447636 3485 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
3486 };
3487 }
3488 } else {
3489 panic("hfs_truncate: invoked on non-UBC object?!");
3490 };
3491 }
39236c6e
A
3492 if (suppress_times == 0) {
3493 cp->c_touch_modtime = TRUE;
3494 }
9bccf70c 3495 fp->ff_size = length;
0b4e3aa0 3496
9bccf70c 3497 } else { /* Shorten the size of the file */
0b4e3aa0 3498
fe8ab488
A
3499 // An optimisation for the hibernation file
3500 if (ISSET(flags, IO_NOZEROFILL) && vnode_isswap(vp)) {
3501 rl_remove_all(&fp->ff_invalidranges);
3502 } else if ((off_t)fp->ff_size > length) {
9bccf70c
A
3503 /* Any space previously marked as invalid is now irrelevant: */
3504 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3505 }
1c79356b 3506
9bccf70c
A
3507 /*
3508 * Account for any unmapped blocks. Note that the new
3509 * file length can still end up with unmapped blocks.
3510 */
3511 if (fp->ff_unallocblocks > 0) {
3512 u_int32_t finalblks;
91447636 3513 u_int32_t loanedBlocks;
1c79356b 3514
39236c6e 3515 hfs_lock_mount(hfsmp);
91447636
A
3516 loanedBlocks = fp->ff_unallocblocks;
3517 cp->c_blocks -= loanedBlocks;
3518 fp->ff_blocks -= loanedBlocks;
3519 fp->ff_unallocblocks = 0;
1c79356b 3520
91447636 3521 hfsmp->loanedBlocks -= loanedBlocks;
9bccf70c
A
3522
3523 finalblks = (length + blksize - 1) / blksize;
3524 if (finalblks > fp->ff_blocks) {
3525 /* calculate required unmapped blocks */
91447636
A
3526 loanedBlocks = finalblks - fp->ff_blocks;
3527 hfsmp->loanedBlocks += loanedBlocks;
3528
3529 fp->ff_unallocblocks = loanedBlocks;
3530 cp->c_blocks += loanedBlocks;
3531 fp->ff_blocks += loanedBlocks;
9bccf70c 3532 }
39236c6e 3533 hfs_unlock_mount (hfsmp);
9bccf70c 3534 }
1c79356b 3535
9bccf70c 3536#if QUOTA
fe8ab488 3537 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
9bccf70c 3538#endif /* QUOTA */
fe8ab488
A
3539 if (hfs_start_transaction(hfsmp) != 0) {
3540 retval = EINVAL;
3541 goto Err_Exit;
3542 }
91447636 3543
fe8ab488
A
3544 if (fp->ff_unallocblocks == 0) {
3545 /* Protect extents b-tree and allocation bitmap */
3546 lockflags = SFL_BITMAP;
3547 if (overflow_extents(fp))
3548 lockflags |= SFL_EXTENTS;
3549 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
b4c24cb9 3550
fe8ab488
A
3551 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3552 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
1c79356b 3553
fe8ab488
A
3554 hfs_systemfile_unlock(hfsmp, lockflags);
3555 }
3556 if (hfsmp->jnl) {
3557 if (retval == 0) {
3558 fp->ff_size = length;
91447636 3559 }
fe8ab488
A
3560 if (skipupdate) {
3561 (void) hfs_minorupdate(vp);
b4c24cb9 3562 }
fe8ab488
A
3563 else {
3564 (void) hfs_update(vp, TRUE);
3565 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3566 }
3567 }
3568 hfs_end_transaction(hfsmp);
b4c24cb9 3569
fe8ab488
A
3570 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3571 if (retval)
3572 goto Err_Exit;
9bccf70c 3573#if QUOTA
fe8ab488
A
3574 /* These are bytesreleased */
3575 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
9bccf70c 3576#endif /* QUOTA */
fe8ab488 3577
39236c6e
A
3578 /*
3579 * Only set update flag if the logical length changes & we aren't
3580 * suppressing modtime updates.
3581 */
3582 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
91447636 3583 cp->c_touch_modtime = TRUE;
39236c6e 3584 }
9bccf70c 3585 fp->ff_size = length;
1c79356b 3586 }
b0d623f7
A
3587 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3588 if (!vfs_context_issuser(context)) {
3589 cp->c_mode &= ~(S_ISUID | S_ISGID);
3590 skipupdate = 0;
3591 }
3592 }
3593 if (skipupdate) {
3594 retval = hfs_minorupdate(vp);
3595 }
3596 else {
3597 cp->c_touch_chgtime = TRUE; /* status changed */
39236c6e
A
3598 if (suppress_times == 0) {
3599 cp->c_touch_modtime = TRUE; /* file data was modified */
3600
3601 /*
3602 * If we are not suppressing the modtime update, then
3603 * update the gen count as well.
3604 */
3605 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3606 hfs_incr_gencount(cp);
3607 }
3608 }
3609
b0d623f7
A
3610 retval = hfs_update(vp, MNT_WAIT);
3611 }
9bccf70c 3612 if (retval) {
fe8ab488 3613 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_NONE,
1c79356b 3614 -1, -1, -1, retval, 0);
9bccf70c 3615 }
1c79356b 3616
9bccf70c 3617Err_Exit:
1c79356b 3618
fe8ab488 3619 KERNEL_DEBUG(HFSDBG_TRUNCATE | DBG_FUNC_END,
9bccf70c 3620 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1c79356b 3621
9bccf70c 3622 return (retval);
1c79356b
A
3623}
3624
6d2010ae
A
3625/*
3626 * Preparation which must be done prior to deleting the catalog record
3627 * of a file or directory. In order to make the on-disk as safe as possible,
3628 * we remove the catalog entry before releasing the bitmap blocks and the
3629 * overflow extent records. However, some work must be done prior to deleting
3630 * the catalog record.
3631 *
3632 * When calling this function, the cnode must exist both in memory and on-disk.
3633 * If there are both resource fork and data fork vnodes, this function should
3634 * be called on both.
3635 */
3636
3637int
3638hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3639
3640 struct filefork *fp = VTOF(vp);
3641 struct cnode *cp = VTOC(vp);
316670eb 3642#if QUOTA
6d2010ae 3643 int retval = 0;
316670eb 3644#endif /* QUOTA */
6d2010ae
A
3645
3646 /* Cannot truncate an HFS directory! */
3647 if (vnode_isdir(vp)) {
3648 return (EISDIR);
3649 }
3650
3651 /*
3652 * See the comment below in hfs_truncate for why we need to call
3653 * setsize here. Essentially we want to avoid pending IO if we
3654 * already know that the blocks are going to be released here.
3655 * This function is only called when totally removing all storage for a file, so
3656 * we can take a shortcut and immediately setsize (0);
3657 */
3658 ubc_setsize(vp, 0);
3659
3660 /* This should only happen with a corrupt filesystem */
3661 if ((off_t)fp->ff_size < 0)
3662 return (EINVAL);
3663
3664 /*
3665 * We cannot just check if fp->ff_size == length (as an optimization)
3666 * since there may be extra physical blocks that also need truncation.
3667 */
3668#if QUOTA
3669 if ((retval = hfs_getinoquota(cp))) {
3670 return(retval);
3671 }
3672#endif /* QUOTA */
3673
3674 /* Wipe out any invalid ranges which have yet to be backed by disk */
3675 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3676
3677 /*
3678 * Account for any unmapped blocks. Since we're deleting the
3679 * entire file, we don't have to worry about just shrinking
3680 * to a smaller number of borrowed blocks.
3681 */
3682 if (fp->ff_unallocblocks > 0) {
3683 u_int32_t loanedBlocks;
3684
39236c6e 3685 hfs_lock_mount (hfsmp);
6d2010ae
A
3686 loanedBlocks = fp->ff_unallocblocks;
3687 cp->c_blocks -= loanedBlocks;
3688 fp->ff_blocks -= loanedBlocks;
3689 fp->ff_unallocblocks = 0;
3690
3691 hfsmp->loanedBlocks -= loanedBlocks;
3692
39236c6e 3693 hfs_unlock_mount (hfsmp);
6d2010ae
A
3694 }
3695
3696 return 0;
3697}
3698
3699
3700/*
3701 * Special wrapper around calling TruncateFileC. This function is useable
3702 * even when the catalog record does not exist any longer, making it ideal
3703 * for use when deleting a file. The simplification here is that we know
3704 * that we are releasing all blocks.
3705 *
316670eb
A
3706 * Note that this function may be called when there is no vnode backing
3707 * the file fork in question. We may call this from hfs_vnop_inactive
3708 * to clear out resource fork data (and may not want to clear out the data
3709 * fork yet). As a result, we pointer-check both sets of inputs before
3710 * doing anything with them.
3711 *
6d2010ae
A
3712 * The caller is responsible for saving off a copy of the filefork(s)
3713 * embedded within the cnode prior to calling this function. The pointers
3714 * supplied as arguments must be valid even if the cnode is no longer valid.
3715 */
3716
3717int
3718hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3719 struct filefork *rsrcfork, u_int32_t fileid) {
3720
3721 off_t filebytes;
3722 u_int32_t fileblocks;
3723 int blksize = 0;
3724 int error = 0;
3725 int lockflags;
3726
3727 blksize = hfsmp->blockSize;
3728
3729 /* Data Fork */
fe8ab488
A
3730 if (datafork) {
3731 datafork->ff_size = 0;
3732
6d2010ae
A
3733 fileblocks = datafork->ff_blocks;
3734 filebytes = (off_t)fileblocks * (off_t)blksize;
3735
3736 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3737
3738 while (filebytes > 0) {
fe8ab488 3739 if (filebytes > HFS_BIGFILE_SIZE) {
6d2010ae
A
3740 filebytes -= HFS_BIGFILE_SIZE;
3741 } else {
3742 filebytes = 0;
3743 }
3744
3745 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3746 if (hfs_start_transaction(hfsmp) != 0) {
3747 error = EINVAL;
3748 break;
3749 }
3750
3751 if (datafork->ff_unallocblocks == 0) {
3752 /* Protect extents b-tree and allocation bitmap */
3753 lockflags = SFL_BITMAP;
3754 if (overflow_extents(datafork))
3755 lockflags |= SFL_EXTENTS;
3756 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3757
3758 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
3759
3760 hfs_systemfile_unlock(hfsmp, lockflags);
3761 }
6d2010ae
A
3762 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3763
3764 /* Finish the transaction and start over if necessary */
3765 hfs_end_transaction(hfsmp);
3766
3767 if (error) {
3768 break;
3769 }
3770 }
3771 }
3772
3773 /* Resource fork */
fe8ab488
A
3774 if (error == 0 && rsrcfork) {
3775 rsrcfork->ff_size = 0;
3776
6d2010ae
A
3777 fileblocks = rsrcfork->ff_blocks;
3778 filebytes = (off_t)fileblocks * (off_t)blksize;
3779
3780 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3781
3782 while (filebytes > 0) {
fe8ab488 3783 if (filebytes > HFS_BIGFILE_SIZE) {
6d2010ae
A
3784 filebytes -= HFS_BIGFILE_SIZE;
3785 } else {
3786 filebytes = 0;
3787 }
3788
3789 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3790 if (hfs_start_transaction(hfsmp) != 0) {
3791 error = EINVAL;
3792 break;
3793 }
3794
3795 if (rsrcfork->ff_unallocblocks == 0) {
3796 /* Protect extents b-tree and allocation bitmap */
3797 lockflags = SFL_BITMAP;
3798 if (overflow_extents(rsrcfork))
3799 lockflags |= SFL_EXTENTS;
3800 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3801
3802 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
3803
3804 hfs_systemfile_unlock(hfsmp, lockflags);
3805 }
6d2010ae
A
3806 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3807
3808 /* Finish the transaction and start over if necessary */
3809 hfs_end_transaction(hfsmp);
3810
3811 if (error) {
3812 break;
3813 }
3814 }
3815 }
3816
3817 return error;
3818}
1c79356b 3819
fe8ab488
A
3820errno_t hfs_ubc_setsize(vnode_t vp, off_t len, bool have_cnode_lock)
3821{
3822 errno_t error;
3823
3824 /*
3825 * Call ubc_setsize to give the VM subsystem a chance to do
3826 * whatever it needs to with existing pages before we delete
3827 * blocks. Note that symlinks don't use the UBC so we'll
3828 * get back ENOENT in that case.
3829 */
3830 if (have_cnode_lock) {
3831 error = ubc_setsize_ex(vp, len, UBC_SETSIZE_NO_FS_REENTRY);
3832 if (error == EAGAIN) {
3833 cnode_t *cp = VTOC(vp);
3834
3835 if (cp->c_truncatelockowner != current_thread()) {
3836#if DEVELOPMENT || DEBUG
3837 panic("hfs: hfs_ubc_setsize called without exclusive truncate lock!");
3838#else
3839 printf("hfs: hfs_ubc_setsize called without exclusive truncate lock!\n");
3840#endif
3841 }
3842
3843 hfs_unlock(cp);
3844 error = ubc_setsize_ex(vp, len, 0);
3845 hfs_lock_always(cp, HFS_EXCLUSIVE_LOCK);
3846 }
3847 } else
3848 error = ubc_setsize_ex(vp, len, 0);
3849
3850 return error == ENOENT ? 0 : error;
3851}
91447636 3852
55e303ae 3853/*
55e303ae
A
3854 * Truncate a cnode to at most length size, freeing (or adding) the
3855 * disk blocks.
3856 */
91447636 3857int
fe8ab488
A
3858hfs_truncate(struct vnode *vp, off_t length, int flags,
3859 int truncateflags, vfs_context_t context)
55e303ae 3860{
fe8ab488 3861 struct filefork *fp = VTOF(vp);
55e303ae 3862 off_t filebytes;
b0d623f7 3863 u_int32_t fileblocks;
fe8ab488
A
3864 int blksize;
3865 errno_t error = 0;
3a60a9f5 3866 struct cnode *cp = VTOC(vp);
55e303ae 3867
2d21ac55
A
3868 /* Cannot truncate an HFS directory! */
3869 if (vnode_isdir(vp)) {
3870 return (EISDIR);
3871 }
3872 /* A swap file cannot change size. */
fe8ab488 3873 if (vnode_isswap(vp) && length && !ISSET(flags, IO_NOAUTH)) {
2d21ac55
A
3874 return (EPERM);
3875 }
55e303ae 3876
55e303ae
A
3877 blksize = VTOVCB(vp)->blockSize;
3878 fileblocks = fp->ff_blocks;
3879 filebytes = (off_t)fileblocks * (off_t)blksize;
3880
fe8ab488
A
3881 bool caller_has_cnode_lock = (cp->c_lockowner == current_thread());
3882
3883 error = hfs_ubc_setsize(vp, length, caller_has_cnode_lock);
3884 if (error)
3885 return error;
3886
3887 if (!caller_has_cnode_lock) {
3888 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
3889 if (error)
3890 return error;
3891 }
2d21ac55 3892
55e303ae
A
3893 // have to loop truncating or growing files that are
3894 // really big because otherwise transactions can get
3895 // enormous and consume too many kernel resources.
91447636
A
3896
3897 if (length < filebytes) {
3898 while (filebytes > length) {
fe8ab488 3899 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
91447636
A
3900 filebytes -= HFS_BIGFILE_SIZE;
3901 } else {
3902 filebytes = length;
3903 }
3a60a9f5 3904 cp->c_flag |= C_FORCEUPDATE;
39236c6e 3905 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
91447636
A
3906 if (error)
3907 break;
3908 }
3909 } else if (length > filebytes) {
3910 while (filebytes < length) {
fe8ab488 3911 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
91447636
A
3912 filebytes += HFS_BIGFILE_SIZE;
3913 } else {
3914 filebytes = length;
3915 }
3a60a9f5 3916 cp->c_flag |= C_FORCEUPDATE;
39236c6e 3917 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
91447636
A
3918 if (error)
3919 break;
55e303ae 3920 }
91447636 3921 } else /* Same logical size */ {
55e303ae 3922
39236c6e 3923 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
91447636
A
3924 }
3925 /* Files that are changing size are not hot file candidates. */
3926 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3927 fp->ff_bytesread = 0;
55e303ae
A
3928 }
3929
fe8ab488
A
3930 if (!caller_has_cnode_lock)
3931 hfs_unlock(cp);
55e303ae 3932
fe8ab488
A
3933 // Make sure UBC's size matches up (in case we didn't completely succeed)
3934 errno_t err2 = hfs_ubc_setsize(vp, fp->ff_size, caller_has_cnode_lock);
3935 if (!error)
3936 error = err2;
3937
3938 return error;
3939}
55e303ae 3940
1c79356b
A
3941
3942/*
91447636 3943 * Preallocate file storage space.
1c79356b 3944 */
91447636
A
3945int
3946hfs_vnop_allocate(struct vnop_allocate_args /* {
3947 vnode_t a_vp;
9bccf70c
A
3948 off_t a_length;
3949 u_int32_t a_flags;
3950 off_t *a_bytesallocated;
3951 off_t a_offset;
91447636
A
3952 vfs_context_t a_context;
3953 } */ *ap)
1c79356b 3954{
9bccf70c 3955 struct vnode *vp = ap->a_vp;
91447636
A
3956 struct cnode *cp;
3957 struct filefork *fp;
3958 ExtendedVCB *vcb;
9bccf70c
A
3959 off_t length = ap->a_length;
3960 off_t startingPEOF;
3961 off_t moreBytesRequested;
3962 off_t actualBytesAdded;
3963 off_t filebytes;
b0d623f7 3964 u_int32_t fileblocks;
9bccf70c 3965 int retval, retval2;
2d21ac55
A
3966 u_int32_t blockHint;
3967 u_int32_t extendFlags; /* For call to ExtendFileC */
b4c24cb9 3968 struct hfsmount *hfsmp;
91447636
A
3969 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
3970 int lockflags;
6d2010ae 3971 time_t orig_ctime;
91447636
A
3972
3973 *(ap->a_bytesallocated) = 0;
3974
3975 if (!vnode_isreg(vp))
3976 return (EISDIR);
3977 if (length < (off_t)0)
3978 return (EINVAL);
2d21ac55 3979
91447636 3980 cp = VTOC(vp);
2d21ac55 3981
6d2010ae
A
3982 orig_ctime = VTOC(vp)->c_ctime;
3983
3984 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
3985
39236c6e 3986 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2d21ac55 3987
39236c6e 3988 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2d21ac55
A
3989 goto Err_Exit;
3990 }
3991
91447636 3992 fp = VTOF(vp);
b4c24cb9 3993 hfsmp = VTOHFS(vp);
91447636 3994 vcb = VTOVCB(vp);
9bccf70c 3995
9bccf70c 3996 fileblocks = fp->ff_blocks;
55e303ae 3997 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
9bccf70c 3998
91447636
A
3999 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
4000 retval = EINVAL;
4001 goto Err_Exit;
4002 }
0b4e3aa0 4003
9bccf70c 4004 /* Fill in the flags word for the call to Extend the file */
1c79356b 4005
55e303ae 4006 extendFlags = kEFNoClumpMask;
9bccf70c 4007 if (ap->a_flags & ALLOCATECONTIG)
1c79356b 4008 extendFlags |= kEFContigMask;
9bccf70c 4009 if (ap->a_flags & ALLOCATEALL)
1c79356b 4010 extendFlags |= kEFAllMask;
91447636 4011 if (cred && suser(cred, NULL) != 0)
9bccf70c 4012 extendFlags |= kEFReserveMask;
b0d623f7
A
4013 if (hfs_virtualmetafile(cp))
4014 extendFlags |= kEFMetadataMask;
1c79356b 4015
9bccf70c
A
4016 retval = E_NONE;
4017 blockHint = 0;
4018 startingPEOF = filebytes;
1c79356b 4019
9bccf70c
A
4020 if (ap->a_flags & ALLOCATEFROMPEOF)
4021 length += filebytes;
4022 else if (ap->a_flags & ALLOCATEFROMVOL)
4023 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1c79356b 4024
9bccf70c
A
4025 /* If no changes are necesary, then we're done */
4026 if (filebytes == length)
4027 goto Std_Exit;
1c79356b 4028
9bccf70c
A
4029 /*
4030 * Lengthen the size of the file. We must ensure that the
4031 * last byte of the file is allocated. Since the smallest
4032 * value of filebytes is 0, length will be at least 1.
4033 */
4034 if (length > filebytes) {
2d21ac55
A
4035 off_t total_bytes_added = 0, orig_request_size;
4036
4037 orig_request_size = moreBytesRequested = length - filebytes;
1c79356b 4038
9bccf70c 4039#if QUOTA
b4c24cb9 4040 retval = hfs_chkdq(cp,
55e303ae 4041 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
91447636 4042 cred, 0);
9bccf70c 4043 if (retval)
91447636 4044 goto Err_Exit;
9bccf70c
A
4045
4046#endif /* QUOTA */
55e303ae
A
4047 /*
4048 * Metadata zone checks.
4049 */
4050 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
4051 /*
4052 * Allocate Journal and Quota files in metadata zone.
4053 */
4054 if (hfs_virtualmetafile(cp)) {
55e303ae
A
4055 blockHint = hfsmp->hfs_metazone_start;
4056 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
4057 (blockHint <= hfsmp->hfs_metazone_end)) {
4058 /*
4059 * Move blockHint outside metadata zone.
4060 */
4061 blockHint = hfsmp->hfs_metazone_end + 1;
4062 }
4063 }
4064
b4c24cb9 4065
2d21ac55
A
4066 while ((length > filebytes) && (retval == E_NONE)) {
4067 off_t bytesRequested;
4068
4069 if (hfs_start_transaction(hfsmp) != 0) {
4070 retval = EINVAL;
4071 goto Err_Exit;
4072 }
4073
4074 /* Protect extents b-tree and allocation bitmap */
4075 lockflags = SFL_BITMAP;
4076 if (overflow_extents(fp))
fe8ab488 4077 lockflags |= SFL_EXTENTS;
2d21ac55
A
4078 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4079
4080 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
fe8ab488 4081 bytesRequested = HFS_BIGFILE_SIZE;
2d21ac55 4082 } else {
fe8ab488 4083 bytesRequested = moreBytesRequested;
2d21ac55 4084 }
1c79356b 4085
b0d623f7
A
4086 if (extendFlags & kEFContigMask) {
4087 // if we're on a sparse device, this will force it to do a
4088 // full scan to find the space needed.
4089 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
4090 }
4091
2d21ac55 4092 retval = MacToVFSError(ExtendFileC(vcb,
9bccf70c 4093 (FCB*)fp,
2d21ac55 4094 bytesRequested,
9bccf70c
A
4095 blockHint,
4096 extendFlags,
4097 &actualBytesAdded));
1c79356b 4098
2d21ac55
A
4099 if (retval == E_NONE) {
4100 *(ap->a_bytesallocated) += actualBytesAdded;
4101 total_bytes_added += actualBytesAdded;
4102 moreBytesRequested -= actualBytesAdded;
4103 if (blockHint != 0) {
4104 blockHint += actualBytesAdded / vcb->blockSize;
4105 }
4106 }
4107 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
4108
4109 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 4110
2d21ac55 4111 if (hfsmp->jnl) {
91447636
A
4112 (void) hfs_update(vp, TRUE);
4113 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2d21ac55
A
4114 }
4115
4116 hfs_end_transaction(hfsmp);
b4c24cb9 4117 }
91447636 4118
b4c24cb9 4119
1c79356b
A
4120 /*
4121 * if we get an error and no changes were made then exit
91447636 4122 * otherwise we must do the hfs_update to reflect the changes
1c79356b 4123 */
9bccf70c
A
4124 if (retval && (startingPEOF == filebytes))
4125 goto Err_Exit;
1c79356b 4126
9bccf70c
A
4127 /*
4128 * Adjust actualBytesAdded to be allocation block aligned, not
4129 * clump size aligned.
4130 * NOTE: So what we are reporting does not affect reality
4131 * until the file is closed, when we truncate the file to allocation
4132 * block size.
4133 */
2d21ac55 4134 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
0b4e3aa0 4135 *(ap->a_bytesallocated) =
2d21ac55 4136 roundup(orig_request_size, (off_t)vcb->blockSize);
1c79356b 4137
9bccf70c 4138 } else { /* Shorten the size of the file */
1c79356b 4139
fe8ab488
A
4140 /*
4141 * N.B. At present, this code is never called. If and when we
4142 * do start using it, it looks like there might be slightly
4143 * strange semantics with the file size: it's possible for the
4144 * file size to *increase* e.g. if current file size is 5,
4145 * length is 1024 and filebytes is 4096, the file size will
4146 * end up being 1024 bytes. This isn't necessarily a problem
4147 * but it's not consistent with the code above which doesn't
4148 * change the file size.
4149 */
1c79356b 4150
fe8ab488 4151 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
55e303ae 4152 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
b4c24cb9 4153
1c79356b
A
4154 /*
4155 * if we get an error and no changes were made then exit
91447636 4156 * otherwise we must do the hfs_update to reflect the changes
1c79356b 4157 */
9bccf70c
A
4158 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
4159#if QUOTA
4160 /* These are bytesreleased */
4161 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
4162#endif /* QUOTA */
1c79356b 4163
9bccf70c
A
4164 if (fp->ff_size > filebytes) {
4165 fp->ff_size = filebytes;
1c79356b 4166
fe8ab488 4167 hfs_ubc_setsize(vp, fp->ff_size, true);
9bccf70c
A
4168 }
4169 }
1c79356b
A
4170
4171Std_Exit:
91447636
A
4172 cp->c_touch_chgtime = TRUE;
4173 cp->c_touch_modtime = TRUE;
4174 retval2 = hfs_update(vp, MNT_WAIT);
1c79356b 4175
9bccf70c
A
4176 if (retval == 0)
4177 retval = retval2;
1c79356b 4178Err_Exit:
39236c6e 4179 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636 4180 hfs_unlock(cp);
9bccf70c 4181 return (retval);
1c79356b
A
4182}
4183
4184
9bccf70c 4185/*
91447636 4186 * Pagein for HFS filesystem
9bccf70c 4187 */
1c79356b 4188int
91447636
A
4189hfs_vnop_pagein(struct vnop_pagein_args *ap)
4190/*
4191 struct vnop_pagein_args {
4192 vnode_t a_vp,
1c79356b
A
4193 upl_t a_pl,
4194 vm_offset_t a_pl_offset,
4195 off_t a_f_offset,
4196 size_t a_size,
1c79356b 4197 int a_flags
91447636
A
4198 vfs_context_t a_context;
4199 };
4200*/
1c79356b 4201{
6d2010ae
A
4202 vnode_t vp;
4203 struct cnode *cp;
4204 struct filefork *fp;
4205 int error = 0;
4206 upl_t upl;
4207 upl_page_info_t *pl;
4208 off_t f_offset;
fe8ab488 4209 off_t page_needed_f_offset;
6d2010ae
A
4210 int offset;
4211 int isize;
fe8ab488 4212 int upl_size;
6d2010ae
A
4213 int pg_index;
4214 boolean_t truncate_lock_held = FALSE;
4215 boolean_t file_converted = FALSE;
4216 kern_return_t kret;
4217
4218 vp = ap->a_vp;
4219 cp = VTOC(vp);
4220 fp = VTOF(vp);
4221
4222#if CONFIG_PROTECT
316670eb 4223 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
39236c6e
A
4224 /*
4225 * If we errored here, then this means that one of two things occurred:
4226 * 1. there was a problem with the decryption of the key.
4227 * 2. the device is locked and we are not allowed to access this particular file.
4228 *
4229 * Either way, this means that we need to shut down this upl now. As long as
4230 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
4231 * then we create a upl and immediately abort it.
4232 */
4233 if (ap->a_pl == NULL) {
4234 /* create the upl */
4235 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
4236 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4237 /* mark the range as needed so it doesn't immediately get discarded upon abort */
4238 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
4239
4240 /* Abort the range */
4241 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
4242 }
4243
4244
6d2010ae
A
4245 return error;
4246 }
4247#endif /* CONFIG_PROTECT */
4248
4249 if (ap->a_pl != NULL) {
4250 /*
4251 * this can only happen for swap files now that
4252 * we're asking for V2 paging behavior...
4253 * so don't need to worry about decompression, or
4254 * keeping track of blocks read or taking the truncate lock
4255 */
4256 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
4257 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
4258 goto pagein_done;
4259 }
4260
fe8ab488
A
4261 page_needed_f_offset = ap->a_f_offset + ap->a_pl_offset;
4262
6d2010ae
A
4263retry_pagein:
4264 /*
4265 * take truncate lock (shared/recursive) to guard against
4266 * zero-fill thru fsync interfering, but only for v2
4267 *
4268 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
4269 * lock shared and we are allowed to recurse 1 level if this thread already
4270 * owns the lock exclusively... this can legally occur
4271 * if we are doing a shrinking ftruncate against a file
4272 * that is mapped private, and the pages being truncated
4273 * do not currently exist in the cache... in that case
4274 * we will have to page-in the missing pages in order
4275 * to provide them to the private mapping... we must
4276 * also call hfs_unlock_truncate with a postive been_recursed
4277 * arg to indicate that if we have recursed, there is no need to drop
4278 * the lock. Allowing this simple recursion is necessary
4279 * in order to avoid a certain deadlock... since the ftruncate
4280 * already holds the truncate lock exclusively, if we try
4281 * to acquire it shared to protect the pagein path, we will
4282 * hang this thread
4283 *
4284 * NOTE: The if () block below is a workaround in order to prevent a
4285 * VM deadlock. See rdar://7853471.
4286 *
4287 * If we are in a forced unmount, then launchd will still have the
4288 * dyld_shared_cache file mapped as it is trying to reboot. If we
4289 * take the truncate lock here to service a page fault, then our
4290 * thread could deadlock with the forced-unmount. The forced unmount
4291 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
4292 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
4293 * thread will think it needs to copy all of the data out of the file
4294 * and into a VM copy object. If we hold the cnode lock here, then that
4295 * VM operation will not be able to proceed, because we'll set a busy page
4296 * before attempting to grab the lock. Note that this isn't as simple as "don't
4297 * call ubc_setsize" because doing that would just shift the problem to the
4298 * ubc_msync done before the vnode is reclaimed.
4299 *
4300 * So, if a forced unmount on this volume is in flight AND the cnode is
4301 * marked C_DELETED, then just go ahead and do the page in without taking
4302 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4303 * that is not going to be available on the next mount, this seems like a
4304 * OK solution from a correctness point of view, even though it is hacky.
4305 */
4306 if (vfs_isforce(vp->v_mount)) {
4307 if (cp->c_flag & C_DELETED) {
4308 /* If we don't get it, then just go ahead and operate without the lock */
39236c6e 4309 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
6d2010ae
A
4310 }
4311 }
4312 else {
39236c6e 4313 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
6d2010ae
A
4314 truncate_lock_held = TRUE;
4315 }
4316
4317 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4318
4319 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4320 error = EINVAL;
4321 goto pagein_done;
4322 }
316670eb
A
4323 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4324
fe8ab488 4325 upl_size = isize = ap->a_size;
6d2010ae 4326
fe8ab488 4327 /*
6d2010ae
A
4328 * Scan from the back to find the last page in the UPL, so that we
4329 * aren't looking at a UPL that may have already been freed by the
4330 * preceding aborts/completions.
4331 */
4332 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4333 if (upl_page_present(pl, --pg_index))
4334 break;
4335 if (pg_index == 0) {
4336 /*
4337 * no absent pages were found in the range specified
4338 * just abort the UPL to get rid of it and then we're done
4339 */
4340 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4341 goto pagein_done;
4342 }
4343 }
4344 /*
4345 * initialize the offset variables before we touch the UPL.
4346 * f_offset is the position into the file, in bytes
4347 * offset is the position into the UPL, in bytes
4348 * pg_index is the pg# of the UPL we're operating on
4349 * isize is the offset into the UPL of the last page that is present.
4350 */
4351 isize = ((pg_index + 1) * PAGE_SIZE);
4352 pg_index = 0;
4353 offset = 0;
4354 f_offset = ap->a_f_offset;
4355
4356 while (isize) {
4357 int xsize;
4358 int num_of_pages;
4359
4360 if ( !upl_page_present(pl, pg_index)) {
4361 /*
4362 * we asked for RET_ONLY_ABSENT, so it's possible
4363 * to get back empty slots in the UPL.
4364 * just skip over them
4365 */
4366 f_offset += PAGE_SIZE;
4367 offset += PAGE_SIZE;
4368 isize -= PAGE_SIZE;
4369 pg_index++;
4370
4371 continue;
4372 }
4373 /*
4374 * We know that we have at least one absent page.
4375 * Now checking to see how many in a row we have
4376 */
4377 num_of_pages = 1;
4378 xsize = isize - PAGE_SIZE;
4379
4380 while (xsize) {
4381 if ( !upl_page_present(pl, pg_index + num_of_pages))
4382 break;
4383 num_of_pages++;
4384 xsize -= PAGE_SIZE;
4385 }
4386 xsize = num_of_pages * PAGE_SIZE;
1c79356b 4387
b0d623f7 4388#if HFS_COMPRESSION
6d2010ae
A
4389 if (VNODE_IS_RSRC(vp)) {
4390 /* allow pageins of the resource fork */
4391 } else {
4392 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4393
b0d623f7 4394 if (compressed) {
fe8ab488 4395
6d2010ae
A
4396 if (truncate_lock_held) {
4397 /*
4398 * can't hold the truncate lock when calling into the decmpfs layer
4399 * since it calls back into this layer... even though we're only
4400 * holding the lock in shared mode, and the re-entrant path only
4401 * takes the lock shared, we can deadlock if some other thread
4402 * tries to grab the lock exclusively in between.
4403 */
39236c6e 4404 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
6d2010ae
A
4405 truncate_lock_held = FALSE;
4406 }
4407 ap->a_pl = upl;
4408 ap->a_pl_offset = offset;
4409 ap->a_f_offset = f_offset;
4410 ap->a_size = xsize;
4411
4412 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4413 /*
4414 * note that decpfs_pagein_compressed can change the state of
4415 * 'compressed'... it will set it to 0 if the file is no longer
4416 * compressed once the compression lock is successfully taken
4417 * i.e. we would block on that lock while the file is being inflated
4418 */
4419 if (compressed) {
4420 if (error == 0) {
4421 /* successful page-in, update the access time */
4422 VTOC(vp)->c_touch_acctime = TRUE;
b0d623f7 4423
6d2010ae
A
4424 /* compressed files are not hot file candidates */
4425 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4426 fp->ff_bytesread = 0;
4427 }
4428 } else if (error == EAGAIN) {
4429 /*
4430 * EAGAIN indicates someone else already holds the compression lock...
4431 * to avoid deadlocking, we'll abort this range of pages with an
4432 * indication that the pagein needs to be redriven
4433 */
4434 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
fe8ab488
A
4435 } else if (error == ENOSPC) {
4436
4437 if (upl_size == PAGE_SIZE)
4438 panic("decmpfs_pagein_compressed: couldn't ubc_upl_map a single page\n");
4439
4440 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4441
4442 ap->a_size = PAGE_SIZE;
4443 ap->a_pl = NULL;
4444 ap->a_pl_offset = 0;
4445 ap->a_f_offset = page_needed_f_offset;
4446
4447 goto retry_pagein;
b0d623f7 4448 }
6d2010ae
A
4449 goto pagein_next_range;
4450 }
4451 else {
4452 /*
4453 * Set file_converted only if the file became decompressed while we were
4454 * paging in. If it were still compressed, we would re-start the loop using the goto
4455 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4456 * condition below, since we could have avoided taking the truncate lock to prevent
4457 * a deadlock in the force unmount case.
4458 */
4459 file_converted = TRUE;
b0d623f7 4460 }
b0d623f7 4461 }
6d2010ae
A
4462 if (file_converted == TRUE) {
4463 /*
4464 * the file was converted back to a regular file after we first saw it as compressed
4465 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4466 * reset a_size so that we consider what remains of the original request
4467 * and null out a_upl and a_pl_offset.
4468 *
4469 * We should only be able to get into this block if the decmpfs_pagein_compressed
4470 * successfully decompressed the range in question for this file.
4471 */
4472 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4473
4474 ap->a_size = isize;
4475 ap->a_pl = NULL;
4476 ap->a_pl_offset = 0;
4477
4478 /* Reset file_converted back to false so that we don't infinite-loop. */
4479 file_converted = FALSE;
4480 goto retry_pagein;
4481 }
b0d623f7 4482 }
b0d623f7 4483#endif
6d2010ae 4484 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
b0d623f7 4485
6d2010ae
A
4486 /*
4487 * Keep track of blocks read.
4488 */
4489 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4490 int bytesread;
4491 int took_cnode_lock = 0;
55e303ae 4492
6d2010ae
A
4493 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4494 bytesread = fp->ff_size;
4495 else
4496 bytesread = xsize;
91447636 4497
6d2010ae
A
4498 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4499 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
39236c6e 4500 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
6d2010ae
A
4501 took_cnode_lock = 1;
4502 }
4503 /*
4504 * If this file hasn't been seen since the start of
4505 * the current sampling period then start over.
4506 */
4507 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4508 struct timeval tv;
91447636 4509
6d2010ae
A
4510 fp->ff_bytesread = bytesread;
4511 microtime(&tv);
4512 cp->c_atime = tv.tv_sec;
4513 } else {
4514 fp->ff_bytesread += bytesread;
4515 }
4516 cp->c_touch_acctime = TRUE;
4517 if (took_cnode_lock)
4518 hfs_unlock(cp);
91447636 4519 }
6d2010ae
A
4520pagein_next_range:
4521 f_offset += xsize;
4522 offset += xsize;
4523 isize -= xsize;
4524 pg_index += num_of_pages;
55e303ae 4525
6d2010ae 4526 error = 0;
55e303ae 4527 }
6d2010ae
A
4528
4529pagein_done:
4530 if (truncate_lock_held == TRUE) {
4531 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
39236c6e 4532 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
6d2010ae
A
4533 }
4534
9bccf70c 4535 return (error);
1c79356b
A
4536}
4537
4538/*
91447636 4539 * Pageout for HFS filesystem.
1c79356b
A
4540 */
4541int
91447636
A
4542hfs_vnop_pageout(struct vnop_pageout_args *ap)
4543/*
4544 struct vnop_pageout_args {
4545 vnode_t a_vp,
1c79356b
A
4546 upl_t a_pl,
4547 vm_offset_t a_pl_offset,
4548 off_t a_f_offset,
4549 size_t a_size,
1c79356b 4550 int a_flags
91447636
A
4551 vfs_context_t a_context;
4552 };
4553*/
1c79356b 4554{
91447636
A
4555 vnode_t vp = ap->a_vp;
4556 struct cnode *cp;
4557 struct filefork *fp;
b0d623f7 4558 int retval = 0;
9bccf70c 4559 off_t filesize;
b0d623f7
A
4560 upl_t upl;
4561 upl_page_info_t* pl;
4562 vm_offset_t a_pl_offset;
4563 int a_flags;
4564 int is_pageoutv2 = 0;
b7266188 4565 kern_return_t kret;
1c79356b 4566
91447636 4567 cp = VTOC(vp);
91447636 4568 fp = VTOF(vp);
2d21ac55 4569
593a1d5f
A
4570 /*
4571 * Figure out where the file ends, for pageout purposes. If
4572 * ff_new_size > ff_size, then we're in the middle of extending the
4573 * file via a write, so it is safe (and necessary) that we be able
4574 * to pageout up to that point.
4575 */
4576 filesize = fp->ff_size;
4577 if (fp->ff_new_size > filesize)
4578 filesize = fp->ff_new_size;
b0d623f7
A
4579
4580 a_flags = ap->a_flags;
4581 a_pl_offset = ap->a_pl_offset;
4582
4583 /*
4584 * we can tell if we're getting the new or old behavior from the UPL
4585 */
4586 if ((upl = ap->a_pl) == NULL) {
4587 int request_flags;
4588
4589 is_pageoutv2 = 1;
4590 /*
4591 * we're in control of any UPL we commit
4592 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4593 */
4594 a_flags &= ~UPL_NOCOMMIT;
4595 a_pl_offset = 0;
4596
4597 /*
316670eb
A
4598 * For V2 semantics, we want to take the cnode truncate lock
4599 * shared to guard against the file size changing via zero-filling.
4600 *
4601 * However, we have to be careful because we may be invoked
4602 * via the ubc_msync path to write out dirty mmap'd pages
4603 * in response to a lock event on a content-protected
4604 * filesystem (e.g. to write out class A files).
4605 * As a result, we want to take the truncate lock 'SHARED' with
4606 * the mini-recursion locktype so that we don't deadlock/panic
4607 * because we may be already holding the truncate lock exclusive to force any other
4608 * IOs to have blocked behind us.
b0d623f7 4609 */
39236c6e 4610 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
b0d623f7
A
4611
4612 if (a_flags & UPL_MSYNC) {
4613 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4614 }
4615 else {
4616 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4617 }
6d2010ae 4618
b7266188 4619 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
b0d623f7 4620
b7266188 4621 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
b0d623f7
A
4622 retval = EINVAL;
4623 goto pageout_done;
4624 }
4625 }
4626 /*
4627 * from this point forward upl points at the UPL we're working with
4628 * it was either passed in or we succesfully created it
4629 */
4630
4631 /*
4632 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4633 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4634 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4635 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4636 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4637 * lock in HFS so that we don't lock invert ourselves.
4638 *
4639 * Note that we can still get into this function on behalf of the default pager with
4640 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4641 * since fsync and other writing threads will grab the locks, then mark the
4642 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4643 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4644 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4645 * by the paging/VM system.
4646 */
4647
4648 if (is_pageoutv2) {
4649 off_t f_offset;
4650 int offset;
4651 int isize;
4652 int pg_index;
4653 int error;
4654 int error_ret = 0;
4655
4656 isize = ap->a_size;
4657 f_offset = ap->a_f_offset;
4658
4659 /*
4660 * Scan from the back to find the last page in the UPL, so that we
4661 * aren't looking at a UPL that may have already been freed by the
4662 * preceding aborts/completions.
4663 */
4664 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4665 if (upl_page_present(pl, --pg_index))
4666 break;
4667 if (pg_index == 0) {
4668 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4669 goto pageout_done;
2d21ac55 4670 }
2d21ac55 4671 }
b0d623f7
A
4672
4673 /*
4674 * initialize the offset variables before we touch the UPL.
4675 * a_f_offset is the position into the file, in bytes
4676 * offset is the position into the UPL, in bytes
4677 * pg_index is the pg# of the UPL we're operating on.
4678 * isize is the offset into the UPL of the last non-clean page.
4679 */
4680 isize = ((pg_index + 1) * PAGE_SIZE);
4681
4682 offset = 0;
4683 pg_index = 0;
4684
4685 while (isize) {
4686 int xsize;
4687 int num_of_pages;
4688
4689 if ( !upl_page_present(pl, pg_index)) {
4690 /*
4691 * we asked for RET_ONLY_DIRTY, so it's possible
4692 * to get back empty slots in the UPL.
4693 * just skip over them
4694 */
4695 f_offset += PAGE_SIZE;
4696 offset += PAGE_SIZE;
4697 isize -= PAGE_SIZE;
4698 pg_index++;
4699
4700 continue;
4701 }
4702 if ( !upl_dirty_page(pl, pg_index)) {
4703 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
4704 }
4705
4706 /*
4707 * We know that we have at least one dirty page.
4708 * Now checking to see how many in a row we have
4709 */
4710 num_of_pages = 1;
4711 xsize = isize - PAGE_SIZE;
4712
4713 while (xsize) {
4714 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
4715 break;
4716 num_of_pages++;
4717 xsize -= PAGE_SIZE;
4718 }
4719 xsize = num_of_pages * PAGE_SIZE;
4720
4721 if (!vnode_isswap(vp)) {
4722 off_t end_of_range;
4723 int tooklock;
4724
4725 tooklock = 0;
4726
4727 if (cp->c_lockowner != current_thread()) {
39236c6e 4728 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
b0d623f7
A
4729 /*
4730 * we're in the v2 path, so we are the
4731 * owner of the UPL... we may have already
4732 * processed some of the UPL, so abort it
4733 * from the current working offset to the
4734 * end of the UPL
4735 */
4736 ubc_upl_abort_range(upl,
4737 offset,
4738 ap->a_size - offset,
4739 UPL_ABORT_FREE_ON_EMPTY);
4740 goto pageout_done;
4741 }
4742 tooklock = 1;
4743 }
4744 end_of_range = f_offset + xsize - 1;
2d21ac55 4745
b0d623f7
A
4746 if (end_of_range >= filesize) {
4747 end_of_range = (off_t)(filesize - 1);
4748 }
4749 if (f_offset < filesize) {
4750 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
4751 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4752 }
4753 if (tooklock) {
4754 hfs_unlock(cp);
4755 }
4756 }
4757 if ((error = cluster_pageout(vp, upl, offset, f_offset,
4758 xsize, filesize, a_flags))) {
4759 if (error_ret == 0)
4760 error_ret = error;
4761 }
4762 f_offset += xsize;
4763 offset += xsize;
4764 isize -= xsize;
4765 pg_index += num_of_pages;
4766 }
4767 /* capture errnos bubbled out of cluster_pageout if they occurred */
4768 if (error_ret != 0) {
4769 retval = error_ret;
4770 }
4771 } /* end block for v2 pageout behavior */
4772 else {
4773 if (!vnode_isswap(vp)) {
4774 off_t end_of_range;
4775 int tooklock = 0;
4776
4777 if (cp->c_lockowner != current_thread()) {
39236c6e 4778 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
b0d623f7
A
4779 if (!(a_flags & UPL_NOCOMMIT)) {
4780 ubc_upl_abort_range(upl,
4781 a_pl_offset,
4782 ap->a_size,
4783 UPL_ABORT_FREE_ON_EMPTY);
4784 }
4785 goto pageout_done;
4786 }
4787 tooklock = 1;
4788 }
4789 end_of_range = ap->a_f_offset + ap->a_size - 1;
2d21ac55 4790
b0d623f7
A
4791 if (end_of_range >= filesize) {
4792 end_of_range = (off_t)(filesize - 1);
4793 }
4794 if (ap->a_f_offset < filesize) {
4795 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
4796 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4797 }
1c79356b 4798
b0d623f7
A
4799 if (tooklock) {
4800 hfs_unlock(cp);
4801 }
2d21ac55 4802 }
b0d623f7
A
4803 /*
4804 * just call cluster_pageout for old pre-v2 behavior
4805 */
4806 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
4807 ap->a_size, filesize, a_flags);
55e303ae 4808 }
0b4e3aa0 4809
1c79356b 4810 /*
fe8ab488
A
4811 * If data was written, update the modification time of the file
4812 * but only if it's mapped writable; we will have touched the
4813 * modifcation time for direct writes.
1c79356b 4814 */
fe8ab488
A
4815 if (retval == 0 && (ubc_is_mapped_writable(vp)
4816 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING))) {
4817 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
4818
4819 // Check again with lock
4820 bool mapped_writable = ubc_is_mapped_writable(vp);
4821 if (mapped_writable
4822 || ISSET(cp->c_flag, C_MIGHT_BE_DIRTY_FROM_MAPPING)) {
4823 cp->c_touch_modtime = TRUE;
4824 cp->c_touch_chgtime = TRUE;
4825
4826 /*
4827 * We only need to increment the generation counter if
4828 * it's currently mapped writable because we incremented
4829 * the counter in hfs_vnop_mnomap.
4830 */
4831 if (mapped_writable)
4832 hfs_incr_gencount(VTOC(vp));
4833
4834 /*
4835 * If setuid or setgid bits are set and this process is
4836 * not the superuser then clear the setuid and setgid bits
4837 * as a precaution against tampering.
4838 */
4839 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
4840 (vfs_context_suser(ap->a_context) != 0)) {
4841 cp->c_mode &= ~(S_ISUID | S_ISGID);
4842 }
b0d623f7 4843 }
fe8ab488
A
4844
4845 hfs_unlock(cp);
b0d623f7
A
4846 }
4847
4848pageout_done:
4849 if (is_pageoutv2) {
316670eb
A
4850 /*
4851 * Release the truncate lock. Note that because
4852 * we may have taken the lock recursively by
4853 * being invoked via ubc_msync due to lockdown,
4854 * we should release it recursively, too.
4855 */
39236c6e 4856 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
91447636 4857 }
1c79356b
A
4858 return (retval);
4859}
4860
4861/*
4862 * Intercept B-Tree node writes to unswap them if necessary.
1c79356b
A
4863 */
4864int
91447636 4865hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
1c79356b 4866{
9bccf70c 4867 int retval = 0;
9bccf70c 4868 register struct buf *bp = ap->a_bp;
91447636 4869 register struct vnode *vp = buf_vnode(bp);
9bccf70c
A
4870 BlockDescriptor block;
4871
4872 /* Trap B-Tree writes */
4873 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
91447636 4874 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
0c530ab8
A
4875 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
4876 (vp == VTOHFS(vp)->hfc_filevp)) {
9bccf70c 4877
3a60a9f5
A
4878 /*
4879 * Swap and validate the node if it is in native byte order.
4880 * This is always be true on big endian, so we always validate
4881 * before writing here. On little endian, the node typically has
2d21ac55 4882 * been swapped and validated when it was written to the journal,
3a60a9f5
A
4883 * so we won't do anything here.
4884 */
2d21ac55 4885 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
9bccf70c
A
4886 /* Prepare the block pointer */
4887 block.blockHeader = bp;
91447636 4888 block.buffer = (char *)buf_dataptr(bp);
3a60a9f5 4889 block.blockNum = buf_lblkno(bp);
9bccf70c 4890 /* not found in cache ==> came from disk */
91447636
A
4891 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
4892 block.blockSize = buf_count(bp);
1c79356b 4893
9bccf70c 4894 /* Endian un-swap B-Tree node */
935ed37a 4895 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3a60a9f5
A
4896 if (retval)
4897 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
9bccf70c 4898 }
9bccf70c 4899 }
3a60a9f5 4900
9bccf70c 4901 /* This buffer shouldn't be locked anymore but if it is clear it */
91447636
A
4902 if ((buf_flags(bp) & B_LOCKED)) {
4903 // XXXdbg
4904 if (VTOHFS(vp)->jnl) {
2d21ac55 4905 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
91447636
A
4906 }
4907 buf_clearflags(bp, B_LOCKED);
9bccf70c
A
4908 }
4909 retval = vn_bwrite (ap);
1c79356b 4910
9bccf70c 4911 return (retval);
1c79356b 4912}
55e303ae
A
4913
4914/*
4915 * Relocate a file to a new location on disk
4916 * cnode must be locked on entry
4917 *
4918 * Relocation occurs by cloning the file's data from its
4919 * current set of blocks to a new set of blocks. During
4920 * the relocation all of the blocks (old and new) are
4921 * owned by the file.
4922 *
4923 * -----------------
4924 * |///////////////|
4925 * -----------------
4926 * 0 N (file offset)
4927 *
4928 * ----------------- -----------------
2d21ac55 4929 * |///////////////| | | STEP 1 (acquire new blocks)
55e303ae
A
4930 * ----------------- -----------------
4931 * 0 N N+1 2N
4932 *
4933 * ----------------- -----------------
4934 * |///////////////| |///////////////| STEP 2 (clone data)
4935 * ----------------- -----------------
4936 * 0 N N+1 2N
4937 *
4938 * -----------------
4939 * |///////////////| STEP 3 (head truncate blocks)
4940 * -----------------
4941 * 0 N
4942 *
4943 * During steps 2 and 3 page-outs to file offsets less
4944 * than or equal to N are suspended.
4945 *
2d21ac55 4946 * During step 3 page-ins to the file get suspended.
55e303ae 4947 */
55e303ae 4948int
91447636
A
4949hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
4950 struct proc *p)
55e303ae 4951{
91447636 4952 struct cnode *cp;
55e303ae
A
4953 struct filefork *fp;
4954 struct hfsmount *hfsmp;
55e303ae
A
4955 u_int32_t headblks;
4956 u_int32_t datablks;
4957 u_int32_t blksize;
55e303ae
A
4958 u_int32_t growsize;
4959 u_int32_t nextallocsave;
91447636 4960 daddr64_t sector_a, sector_b;
55e303ae 4961 int eflags;
55e303ae 4962 off_t newbytes;
91447636
A
4963 int retval;
4964 int lockflags = 0;
4965 int took_trunc_lock = 0;
4966 int started_tr = 0;
4967 enum vtype vnodetype;
4968
4969 vnodetype = vnode_vtype(vp);
bd504ef0 4970 if (vnodetype != VREG) {
39236c6e 4971 /* Not allowed to move symlinks. */
55e303ae
A
4972 return (EPERM);
4973 }
4974
4975 hfsmp = VTOHFS(vp);
4976 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
4977 return (ENOSPC);
4978 }
4979
91447636 4980 cp = VTOC(vp);
55e303ae
A
4981 fp = VTOF(vp);
4982 if (fp->ff_unallocblocks)
4983 return (EINVAL);
6d2010ae
A
4984
4985#if CONFIG_PROTECT
4986 /*
4987 * <rdar://problem/9118426>
4988 * Disable HFS file relocation on content-protected filesystems
4989 */
4990 if (cp_fs_protected (hfsmp->hfs_mp)) {
4991 return EINVAL;
4992 }
4993#endif
6d2010ae
A
4994 /* If it's an SSD, also disable HFS relocation */
4995 if (hfsmp->hfs_flags & HFS_SSD) {
4996 return EINVAL;
4997 }
4998
316670eb 4999
91447636 5000 blksize = hfsmp->blockSize;
55e303ae 5001 if (blockHint == 0)
91447636 5002 blockHint = hfsmp->nextAllocation;
55e303ae 5003
39236c6e 5004 if (fp->ff_size > 0x7fffffff) {
55e303ae
A
5005 return (EFBIG);
5006 }
5007
91447636
A
5008 //
5009 // We do not believe that this call to hfs_fsync() is
5010 // necessary and it causes a journal transaction
5011 // deadlock so we are removing it.
5012 //
5013 //if (vnodetype == VREG && !vnode_issystem(vp)) {
5014 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
5015 // if (retval)
5016 // return (retval);
5017 //}
5018
5019 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
5020 hfs_unlock(cp);
39236c6e 5021 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2d21ac55 5022 /* Force lock since callers expects lock to be held. */
39236c6e
A
5023 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
5024 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636
A
5025 return (retval);
5026 }
2d21ac55
A
5027 /* No need to continue if file was removed. */
5028 if (cp->c_flag & C_NOEXISTS) {
39236c6e 5029 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2d21ac55
A
5030 return (ENOENT);
5031 }
91447636
A
5032 took_trunc_lock = 1;
5033 }
55e303ae
A
5034 headblks = fp->ff_blocks;
5035 datablks = howmany(fp->ff_size, blksize);
5036 growsize = datablks * blksize;
55e303ae
A
5037 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
5038 if (blockHint >= hfsmp->hfs_metazone_start &&
5039 blockHint <= hfsmp->hfs_metazone_end)
5040 eflags |= kEFMetadataMask;
5041
91447636
A
5042 if (hfs_start_transaction(hfsmp) != 0) {
5043 if (took_trunc_lock)
39236c6e 5044 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636 5045 return (EINVAL);
55e303ae 5046 }
91447636
A
5047 started_tr = 1;
5048 /*
5049 * Protect the extents b-tree and the allocation bitmap
5050 * during MapFileBlockC and ExtendFileC operations.
5051 */
5052 lockflags = SFL_BITMAP;
5053 if (overflow_extents(fp))
5054 lockflags |= SFL_EXTENTS;
5055 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 5056
91447636 5057 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
55e303ae
A
5058 if (retval) {
5059 retval = MacToVFSError(retval);
5060 goto out;
5061 }
5062
5063 /*
2d21ac55 5064 * STEP 1 - acquire new allocation blocks.
55e303ae 5065 */
91447636
A
5066 nextallocsave = hfsmp->nextAllocation;
5067 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
5068 if (eflags & kEFMetadataMask) {
39236c6e 5069 hfs_lock_mount(hfsmp);
2d21ac55
A
5070 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
5071 MarkVCBDirty(hfsmp);
39236c6e 5072 hfs_unlock_mount(hfsmp);
91447636 5073 }
55e303ae
A
5074
5075 retval = MacToVFSError(retval);
5076 if (retval == 0) {
91447636 5077 cp->c_flag |= C_MODIFIED;
55e303ae
A
5078 if (newbytes < growsize) {
5079 retval = ENOSPC;
5080 goto restore;
5081 } else if (fp->ff_blocks < (headblks + datablks)) {
39236c6e 5082 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
55e303ae
A
5083 retval = ENOSPC;
5084 goto restore;
5085 }
5086
91447636 5087 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
55e303ae
A
5088 if (retval) {
5089 retval = MacToVFSError(retval);
5090 } else if ((sector_a + 1) == sector_b) {
5091 retval = ENOSPC;
5092 goto restore;
5093 } else if ((eflags & kEFMetadataMask) &&
593a1d5f 5094 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
55e303ae 5095 hfsmp->hfs_metazone_end)) {
b0d623f7 5096#if 0
2d21ac55
A
5097 const char * filestr;
5098 char emptystr = '\0';
5099
5100 if (cp->c_desc.cd_nameptr != NULL) {
5101 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
5102 } else if (vnode_name(vp) != NULL) {
5103 filestr = vnode_name(vp);
5104 } else {
5105 filestr = &emptystr;
5106 }
b0d623f7 5107#endif
55e303ae
A
5108 retval = ENOSPC;
5109 goto restore;
5110 }
5111 }
91447636
A
5112 /* Done with system locks and journal for now. */
5113 hfs_systemfile_unlock(hfsmp, lockflags);
5114 lockflags = 0;
5115 hfs_end_transaction(hfsmp);
5116 started_tr = 0;
5117
55e303ae
A
5118 if (retval) {
5119 /*
5120 * Check to see if failure is due to excessive fragmentation.
5121 */
91447636
A
5122 if ((retval == ENOSPC) &&
5123 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
55e303ae
A
5124 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
5125 }
5126 goto out;
5127 }
55e303ae 5128 /*
91447636 5129 * STEP 2 - clone file data into the new allocation blocks.
55e303ae
A
5130 */
5131
91447636 5132 if (vnodetype == VLNK)
39236c6e 5133 retval = EPERM;
91447636 5134 else if (vnode_issystem(vp))
55e303ae
A
5135 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
5136 else
91447636 5137 retval = hfs_clonefile(vp, headblks, datablks, blksize);
ccc36f2f 5138
91447636
A
5139 /* Start transaction for step 3 or for a restore. */
5140 if (hfs_start_transaction(hfsmp) != 0) {
5141 retval = EINVAL;
5142 goto out;
5143 }
5144 started_tr = 1;
55e303ae
A
5145 if (retval)
5146 goto restore;
55e303ae
A
5147
5148 /*
91447636 5149 * STEP 3 - switch to cloned data and remove old blocks.
55e303ae 5150 */
91447636
A
5151 lockflags = SFL_BITMAP;
5152 if (overflow_extents(fp))
5153 lockflags |= SFL_EXTENTS;
5154 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 5155
91447636 5156 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
55e303ae 5157
91447636
A
5158 hfs_systemfile_unlock(hfsmp, lockflags);
5159 lockflags = 0;
55e303ae
A
5160 if (retval)
5161 goto restore;
55e303ae 5162out:
91447636 5163 if (took_trunc_lock)
39236c6e 5164 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
55e303ae 5165
91447636
A
5166 if (lockflags) {
5167 hfs_systemfile_unlock(hfsmp, lockflags);
5168 lockflags = 0;
ccc36f2f
A
5169 }
5170
0c530ab8
A
5171 /* Push cnode's new extent data to disk. */
5172 if (retval == 0) {
5173 (void) hfs_update(vp, MNT_WAIT);
5174 }
55e303ae 5175 if (hfsmp->jnl) {
91447636 5176 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
55e303ae
A
5177 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
5178 else
5179 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
55e303ae 5180 }
91447636 5181exit:
91447636
A
5182 if (started_tr)
5183 hfs_end_transaction(hfsmp);
55e303ae
A
5184
5185 return (retval);
5186
5187restore:
2d21ac55
A
5188 if (fp->ff_blocks == headblks) {
5189 if (took_trunc_lock)
39236c6e 5190 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636 5191 goto exit;
2d21ac55 5192 }
55e303ae
A
5193 /*
5194 * Give back any newly allocated space.
5195 */
91447636
A
5196 if (lockflags == 0) {
5197 lockflags = SFL_BITMAP;
5198 if (overflow_extents(fp))
5199 lockflags |= SFL_EXTENTS;
5200 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
5201 }
5202
6d2010ae
A
5203 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
5204 FTOC(fp)->c_fileid, false);
91447636
A
5205
5206 hfs_systemfile_unlock(hfsmp, lockflags);
5207 lockflags = 0;
5208
5209 if (took_trunc_lock)
39236c6e 5210 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636 5211 goto exit;
55e303ae
A
5212}
5213
5214
55e303ae
A
5215/*
5216 * Clone a file's data within the file.
5217 *
5218 */
5219static int
91447636 5220hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
55e303ae
A
5221{
5222 caddr_t bufp;
55e303ae
A
5223 size_t bufsize;
5224 size_t copysize;
5225 size_t iosize;
55e303ae 5226 size_t offset;
b0d623f7 5227 off_t writebase;
91447636
A
5228 uio_t auio;
5229 int error = 0;
55e303ae 5230
55e303ae
A
5231 writebase = blkstart * blksize;
5232 copysize = blkcnt * blksize;
0c530ab8 5233 iosize = bufsize = MIN(copysize, 128 * 1024);
55e303ae
A
5234 offset = 0;
5235
6d2010ae
A
5236 hfs_unlock(VTOC(vp));
5237
5238#if CONFIG_PROTECT
316670eb 5239 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
39236c6e 5240 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
6d2010ae
A
5241 return (error);
5242 }
5243#endif /* CONFIG_PROTECT */
5244
55e303ae 5245 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
39236c6e 5246 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
55e303ae 5247 return (ENOMEM);
6d2010ae 5248 }
55e303ae 5249
b0d623f7 5250 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
55e303ae
A
5251
5252 while (offset < copysize) {
5253 iosize = MIN(copysize - offset, iosize);
5254
b0d623f7 5255 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
91447636 5256 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 5257
2d21ac55 5258 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
55e303ae
A
5259 if (error) {
5260 printf("hfs_clonefile: cluster_read failed - %d\n", error);
5261 break;
5262 }
91447636 5263 if (uio_resid(auio) != 0) {
316670eb 5264 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
55e303ae
A
5265 error = EIO;
5266 break;
5267 }
5268
b0d623f7 5269 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
91447636 5270 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 5271
b0d623f7
A
5272 error = cluster_write(vp, auio, writebase + offset,
5273 writebase + offset + iosize,
91447636 5274 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
55e303ae
A
5275 if (error) {
5276 printf("hfs_clonefile: cluster_write failed - %d\n", error);
5277 break;
5278 }
91447636 5279 if (uio_resid(auio) != 0) {
55e303ae
A
5280 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
5281 error = EIO;
5282 break;
5283 }
5284 offset += iosize;
5285 }
91447636
A
5286 uio_free(auio);
5287
b0d623f7
A
5288 if ((blksize & PAGE_MASK)) {
5289 /*
5290 * since the copy may not have started on a PAGE
5291 * boundary (or may not have ended on one), we
5292 * may have pages left in the cache since NOCACHE
5293 * will let partially written pages linger...
5294 * lets just flush the entire range to make sure
5295 * we don't have any pages left that are beyond
5296 * (or intersect) the real LEOF of this file
5297 */
5298 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
5299 } else {
5300 /*
fe8ab488 5301 * No need to call ubc_msync or hfs_invalbuf
b0d623f7
A
5302 * since the file was copied using IO_NOCACHE and
5303 * the copy was done starting and ending on a page
5304 * boundary in the file.
5305 */
5306 }
55e303ae 5307 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
91447636 5308
39236c6e 5309 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
55e303ae
A
5310 return (error);
5311}
5312
5313/*
5314 * Clone a system (metadata) file.
5315 *
5316 */
5317static int
5318hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
91447636 5319 kauth_cred_t cred, struct proc *p)
55e303ae
A
5320{
5321 caddr_t bufp;
5322 char * offset;
5323 size_t bufsize;
5324 size_t iosize;
5325 struct buf *bp = NULL;
91447636
A
5326 daddr64_t blkno;
5327 daddr64_t blk;
5328 daddr64_t start_blk;
5329 daddr64_t last_blk;
55e303ae
A
5330 int breadcnt;
5331 int i;
5332 int error = 0;
5333
5334
5335 iosize = GetLogicalBlockSize(vp);
5336 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5337 breadcnt = bufsize / iosize;
5338
5339 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
5340 return (ENOMEM);
5341 }
91447636
A
5342 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5343 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
55e303ae
A
5344 blkno = 0;
5345
91447636 5346 while (blkno < last_blk) {
55e303ae
A
5347 /*
5348 * Read up to a megabyte
5349 */
5350 offset = bufp;
91447636
A
5351 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5352 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
55e303ae
A
5353 if (error) {
5354 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5355 goto out;
5356 }
91447636
A
5357 if (buf_count(bp) != iosize) {
5358 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
55e303ae
A
5359 goto out;
5360 }
91447636
A
5361 bcopy((char *)buf_dataptr(bp), offset, iosize);
5362
5363 buf_markinvalid(bp);
5364 buf_brelse(bp);
55e303ae 5365 bp = NULL;
91447636 5366
55e303ae
A
5367 offset += iosize;
5368 }
5369
5370 /*
5371 * Write up to a megabyte
5372 */
5373 offset = bufp;
91447636
A
5374 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5375 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
55e303ae 5376 if (bp == NULL) {
91447636 5377 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
55e303ae
A
5378 error = EIO;
5379 goto out;
5380 }
91447636
A
5381 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5382 error = (int)buf_bwrite(bp);
55e303ae
A
5383 bp = NULL;
5384 if (error)
5385 goto out;
5386 offset += iosize;
5387 }
5388 }
5389out:
5390 if (bp) {
91447636 5391 buf_brelse(bp);
55e303ae
A
5392 }
5393
5394 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
5395
91447636 5396 error = hfs_fsync(vp, MNT_WAIT, 0, p);
55e303ae
A
5397
5398 return (error);
5399}