]> git.saurik.com Git - apple/xnu.git/blame - bsd/hfs/hfs_readwrite.c
xnu-2422.90.20.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
CommitLineData
1c79356b 1/*
bd504ef0 2 * Copyright (c) 2000-2013 Apple Inc. All rights reserved.
5d5c5d0d 3 *
2d21ac55 4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
1c79356b 5 *
2d21ac55
A
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
8f6c56a5 14 *
2d21ac55
A
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
8f6c56a5
A
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
2d21ac55
A
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
8f6c56a5 25 *
2d21ac55 26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
1c79356b
A
27 */
28/* @(#)hfs_readwrite.c 1.0
29 *
9bccf70c 30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
1c79356b 31 *
1c79356b
A
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
1c79356b
A
34 */
35
36#include <sys/param.h>
37#include <sys/systm.h>
38#include <sys/resourcevar.h>
39#include <sys/kernel.h>
40#include <sys/fcntl.h>
55e303ae 41#include <sys/filedesc.h>
1c79356b
A
42#include <sys/stat.h>
43#include <sys/buf.h>
316670eb 44#include <sys/buf_internal.h>
1c79356b 45#include <sys/proc.h>
91447636 46#include <sys/kauth.h>
1c79356b 47#include <sys/vnode.h>
2d21ac55 48#include <sys/vnode_internal.h>
1c79356b 49#include <sys/uio.h>
91447636 50#include <sys/vfs_context.h>
2d21ac55
A
51#include <sys/fsevents.h>
52#include <kern/kalloc.h>
8f6c56a5
A
53#include <sys/disk.h>
54#include <sys/sysctl.h>
b0d623f7 55#include <sys/fsctl.h>
316670eb 56#include <sys/mount_internal.h>
1c79356b
A
57
58#include <miscfs/specfs/specdev.h>
59
1c79356b 60#include <sys/ubc.h>
2d21ac55
A
61#include <sys/ubc_internal.h>
62
1c79356b 63#include <vm/vm_pageout.h>
91447636 64#include <vm/vm_kern.h>
1c79356b 65
1c79356b
A
66#include <sys/kdebug.h>
67
68#include "hfs.h"
2d21ac55 69#include "hfs_attrlist.h"
1c79356b 70#include "hfs_endian.h"
2d21ac55 71#include "hfs_fsctl.h"
9bccf70c 72#include "hfs_quota.h"
1c79356b
A
73#include "hfscommon/headers/FileMgrInternal.h"
74#include "hfscommon/headers/BTreesInternal.h"
9bccf70c
A
75#include "hfs_cnode.h"
76#include "hfs_dbg.h"
1c79356b 77
1c79356b
A
78#define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
79
80enum {
81 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
82};
83
935ed37a 84/* from bsd/hfs/hfs_vfsops.c */
b0d623f7 85extern int hfs_vfs_vget (struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
91447636 86
91447636
A
87static int hfs_clonefile(struct vnode *, int, int, int);
88static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
b0d623f7
A
89static int hfs_minorupdate(struct vnode *vp);
90static int do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skip, vfs_context_t context);
91
39236c6e
A
92/* from bsd/hfs/hfs_vnops.c */
93extern decmpfs_cnode* hfs_lazy_init_decmpfs_cnode (struct cnode *cp);
94
95
55e303ae 96
8f6c56a5 97int flush_cache_on_write = 0;
6d2010ae 98SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW | CTLFLAG_LOCKED, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
8f6c56a5 99
91447636
A
100/*
101 * Read data from a file.
102 */
1c79356b 103int
91447636 104hfs_vnop_read(struct vnop_read_args *ap)
1c79356b 105{
316670eb
A
106 /*
107 struct vnop_read_args {
108 struct vnodeop_desc *a_desc;
109 vnode_t a_vp;
110 struct uio *a_uio;
111 int a_ioflag;
112 vfs_context_t a_context;
113 };
114 */
115
91447636
A
116 uio_t uio = ap->a_uio;
117 struct vnode *vp = ap->a_vp;
9bccf70c
A
118 struct cnode *cp;
119 struct filefork *fp;
91447636
A
120 struct hfsmount *hfsmp;
121 off_t filesize;
122 off_t filebytes;
123 off_t start_resid = uio_resid(uio);
124 off_t offset = uio_offset(uio);
9bccf70c 125 int retval = 0;
6d2010ae 126 int took_truncate_lock = 0;
316670eb 127 int io_throttle = 0;
55e303ae 128
9bccf70c 129 /* Preflight checks */
91447636
A
130 if (!vnode_isreg(vp)) {
131 /* can only read regular files */
132 if (vnode_isdir(vp))
133 return (EISDIR);
134 else
135 return (EPERM);
136 }
137 if (start_resid == 0)
9bccf70c 138 return (0); /* Nothing left to do */
91447636 139 if (offset < 0)
9bccf70c 140 return (EINVAL); /* cant read from a negative offset */
b0d623f7 141
39236c6e
A
142
143
b0d623f7
A
144#if HFS_COMPRESSION
145 if (VNODE_IS_RSRC(vp)) {
146 if (hfs_hides_rsrc(ap->a_context, VTOC(vp), 1)) { /* 1 == don't take the cnode lock */
147 return 0;
148 }
149 /* otherwise read the resource fork normally */
150 } else {
151 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
152 if (compressed) {
153 retval = decmpfs_read_compressed(ap, &compressed, VTOCMP(vp));
154 if (compressed) {
155 if (retval == 0) {
156 /* successful read, update the access time */
157 VTOC(vp)->c_touch_acctime = TRUE;
158
159 /* compressed files are not hot file candidates */
160 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
161 VTOF(vp)->ff_bytesread = 0;
162 }
163 }
164 return retval;
165 }
166 /* otherwise the file was converted back to a regular file while we were reading it */
167 retval = 0;
316670eb 168 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
6d2010ae
A
169 int error;
170
171 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_READ_OP);
172 if (error) {
173 return error;
174 }
175
b0d623f7
A
176 }
177 }
178#endif /* HFS_COMPRESSION */
9bccf70c
A
179
180 cp = VTOC(vp);
181 fp = VTOF(vp);
91447636
A
182 hfsmp = VTOHFS(vp);
183
6d2010ae 184#if CONFIG_PROTECT
316670eb 185 if ((retval = cp_handle_vnop (vp, CP_READ_ACCESS, ap->a_ioflag)) != 0) {
6d2010ae
A
186 goto exit;
187 }
188#endif
189
316670eb
A
190 /*
191 * If this read request originated from a syscall (as opposed to
192 * an in-kernel page fault or something), then set it up for
39236c6e 193 * throttle checks
316670eb
A
194 */
195 if (ap->a_ioflag & IO_SYSCALL_DISPATCH) {
196 io_throttle = IO_RETURN_ON_THROTTLE;
197 }
198
199read_again:
200
91447636 201 /* Protect against a size change. */
39236c6e 202 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
6d2010ae 203 took_truncate_lock = 1;
91447636 204
9bccf70c 205 filesize = fp->ff_size;
91447636
A
206 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
207 if (offset > filesize) {
208 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
209 (offset > (off_t)MAXHFSFILESIZE)) {
210 retval = EFBIG;
211 }
212 goto exit;
9bccf70c 213 }
1c79356b 214
9bccf70c 215 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
91447636 216 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 217
39236c6e 218 retval = cluster_read(vp, uio, filesize, ap->a_ioflag |io_throttle);
1c79356b 219
91447636 220 cp->c_touch_acctime = TRUE;
1c79356b 221
9bccf70c 222 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
91447636 223 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
1c79356b 224
55e303ae
A
225 /*
226 * Keep track blocks read
227 */
2d21ac55 228 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
91447636
A
229 int took_cnode_lock = 0;
230 off_t bytesread;
231
232 bytesread = start_resid - uio_resid(uio);
233
234 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
235 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
39236c6e 236 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636
A
237 took_cnode_lock = 1;
238 }
55e303ae
A
239 /*
240 * If this file hasn't been seen since the start of
241 * the current sampling period then start over.
242 */
2d21ac55 243 if (cp->c_atime < hfsmp->hfc_timebase) {
91447636
A
244 struct timeval tv;
245
246 fp->ff_bytesread = bytesread;
247 microtime(&tv);
248 cp->c_atime = tv.tv_sec;
55e303ae 249 } else {
91447636 250 fp->ff_bytesread += bytesread;
55e303ae 251 }
91447636
A
252 if (took_cnode_lock)
253 hfs_unlock(cp);
55e303ae 254 }
91447636 255exit:
6d2010ae 256 if (took_truncate_lock) {
39236c6e 257 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
6d2010ae 258 }
316670eb
A
259 if (retval == EAGAIN) {
260 throttle_lowpri_io(1);
6d2010ae 261
316670eb
A
262 retval = 0;
263 goto read_again;
264 }
9bccf70c 265 return (retval);
1c79356b
A
266}
267
268/*
91447636
A
269 * Write data to a file.
270 */
1c79356b 271int
91447636 272hfs_vnop_write(struct vnop_write_args *ap)
1c79356b 273{
91447636 274 uio_t uio = ap->a_uio;
9bccf70c 275 struct vnode *vp = ap->a_vp;
9bccf70c
A
276 struct cnode *cp;
277 struct filefork *fp;
91447636
A
278 struct hfsmount *hfsmp;
279 kauth_cred_t cred = NULL;
280 off_t origFileSize;
281 off_t writelimit;
2d21ac55 282 off_t bytesToAdd = 0;
55e303ae 283 off_t actualBytesAdded;
9bccf70c 284 off_t filebytes;
91447636 285 off_t offset;
b0d623f7 286 ssize_t resid;
91447636
A
287 int eflags;
288 int ioflag = ap->a_ioflag;
289 int retval = 0;
290 int lockflags;
291 int cnode_locked = 0;
2d21ac55 292 int partialwrite = 0;
6d2010ae
A
293 int do_snapshot = 1;
294 time_t orig_ctime=VTOC(vp)->c_ctime;
295 int took_truncate_lock = 0;
316670eb 296 int io_return_on_throttle = 0;
7ddcb079 297 struct rl_entry *invalid_range;
1c79356b 298
b0d623f7
A
299#if HFS_COMPRESSION
300 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
301 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
302 switch(state) {
303 case FILE_IS_COMPRESSED:
304 return EACCES;
305 case FILE_IS_CONVERTING:
6d2010ae
A
306 /* if FILE_IS_CONVERTING, we allow writes but do not
307 bother with snapshots or else we will deadlock.
308 */
309 do_snapshot = 0;
b0d623f7
A
310 break;
311 default:
312 printf("invalid state %d for compressed file\n", state);
313 /* fall through */
314 }
316670eb 315 } else if ((VTOC(vp)->c_bsdflags & UF_COMPRESSED)) {
6d2010ae
A
316 int error;
317
318 error = check_for_dataless_file(vp, NAMESPACE_HANDLER_WRITE_OP);
319 if (error != 0) {
320 return error;
321 }
b0d623f7 322 }
6d2010ae
A
323
324 if (do_snapshot) {
325 check_for_tracked_file(vp, orig_ctime, NAMESPACE_HANDLER_WRITE_OP, uio);
326 }
327
b0d623f7
A
328#endif
329
91447636
A
330 resid = uio_resid(uio);
331 offset = uio_offset(uio);
1c79356b 332
91447636 333 if (offset < 0)
9bccf70c 334 return (EINVAL);
91447636 335 if (resid == 0)
9bccf70c 336 return (E_NONE);
91447636
A
337 if (!vnode_isreg(vp))
338 return (EPERM); /* Can only write regular files */
339
9bccf70c
A
340 cp = VTOC(vp);
341 fp = VTOF(vp);
91447636 342 hfsmp = VTOHFS(vp);
b4c24cb9 343
6d2010ae 344#if CONFIG_PROTECT
316670eb 345 if ((retval = cp_handle_vnop (vp, CP_WRITE_ACCESS, 0)) != 0) {
6d2010ae
A
346 goto exit;
347 }
348#endif
349
9bccf70c 350 eflags = kEFDeferMask; /* defer file block allocations */
6d2010ae 351#if HFS_SPARSE_DEV
55e303ae
A
352 /*
353 * When the underlying device is sparse and space
354 * is low (< 8MB), stop doing delayed allocations
355 * and begin doing synchronous I/O.
356 */
357 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
358 (hfs_freeblks(hfsmp, 0) < 2048)) {
359 eflags &= ~kEFDeferMask;
360 ioflag |= IO_SYNC;
361 }
362#endif /* HFS_SPARSE_DEV */
363
39236c6e
A
364 if ((ioflag & (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) ==
365 (IO_SINGLE_WRITER | IO_SYSCALL_DISPATCH)) {
316670eb
A
366 io_return_on_throttle = IO_RETURN_ON_THROTTLE;
367 }
39236c6e 368
2d21ac55
A
369again:
370 /* Protect against a size change. */
7ddcb079
A
371 /*
372 * Protect against a size change.
373 *
374 * Note: If took_truncate_lock is true, then we previously got the lock shared
375 * but needed to upgrade to exclusive. So try getting it exclusive from the
376 * start.
377 */
378 if (ioflag & IO_APPEND || took_truncate_lock) {
39236c6e 379 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
6d2010ae
A
380 }
381 else {
39236c6e 382 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_DEFAULT);
6d2010ae
A
383 }
384 took_truncate_lock = 1;
91447636 385
6d2010ae 386 /* Update UIO */
2d21ac55
A
387 if (ioflag & IO_APPEND) {
388 uio_setoffset(uio, fp->ff_size);
389 offset = fp->ff_size;
390 }
316670eb 391 if ((cp->c_bsdflags & APPEND) && offset != fp->ff_size) {
2d21ac55
A
392 retval = EPERM;
393 goto exit;
394 }
91447636 395
2d21ac55 396 origFileSize = fp->ff_size;
91447636 397 writelimit = offset + resid;
2d21ac55
A
398 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
399
7ddcb079
A
400 /*
401 * We may need an exclusive truncate lock for several reasons, all
402 * of which are because we may be writing to a (portion of a) block
403 * for the first time, and we need to make sure no readers see the
404 * prior, uninitialized contents of the block. The cases are:
405 *
406 * 1. We have unallocated (delayed allocation) blocks. We may be
407 * allocating new blocks to the file and writing to them.
408 * (A more precise check would be whether the range we're writing
409 * to contains delayed allocation blocks.)
410 * 2. We need to extend the file. The bytes between the old EOF
411 * and the new EOF are not yet initialized. This is important
412 * even if we're not allocating new blocks to the file. If the
413 * old EOF and new EOF are in the same block, we still need to
414 * protect that range of bytes until they are written for the
415 * first time.
416 * 3. The write overlaps some invalid ranges (delayed zero fill; that
417 * part of the file has been allocated, but not yet written).
418 *
419 * If we had a shared lock with the above cases, we need to try to upgrade
420 * to an exclusive lock. If the upgrade fails, we will lose the shared
421 * lock, and will need to take the truncate lock again; the took_truncate_lock
422 * flag will still be set, causing us to try for an exclusive lock next time.
423 *
424 * NOTE: Testing for #3 (delayed zero fill) needs to be done while the cnode
425 * lock is held, since it protects the range lists.
2d21ac55 426 */
6d2010ae 427 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
7ddcb079
A
428 ((fp->ff_unallocblocks != 0) ||
429 (writelimit > origFileSize))) {
2d21ac55 430 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
7ddcb079
A
431 /*
432 * Lock upgrade failed and we lost our shared lock, try again.
433 * Note: we do not set took_truncate_lock=0 here. Leaving it
434 * set to 1 will cause us to try to get the lock exclusive.
435 */
2d21ac55
A
436 goto again;
437 }
6d2010ae
A
438 else {
439 /* Store the owner in the c_truncatelockowner field if we successfully upgrade */
440 cp->c_truncatelockowner = current_thread();
441 }
2d21ac55
A
442 }
443
39236c6e 444 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2d21ac55
A
445 goto exit;
446 }
447 cnode_locked = 1;
448
39236c6e
A
449 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
450 hfs_incr_gencount (cp);
451 }
452
7ddcb079
A
453 /*
454 * Now that we have the cnode lock, see if there are delayed zero fill ranges
455 * overlapping our write. If so, we need the truncate lock exclusive (see above).
456 */
457 if ((cp->c_truncatelockowner == HFS_SHARED_OWNER) &&
458 (rl_scan(&fp->ff_invalidranges, offset, writelimit-1, &invalid_range) != RL_NOOVERLAP)) {
459 /*
460 * When testing, it appeared that calling lck_rw_lock_shared_to_exclusive() causes
461 * a deadlock, rather than simply returning failure. (That is, it apparently does
462 * not behave like a "try_lock"). Since this condition is rare, just drop the
463 * cnode lock and try again. Since took_truncate_lock is set, we will
464 * automatically take the truncate lock exclusive.
465 */
466 hfs_unlock(cp);
467 cnode_locked = 0;
39236c6e 468 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
7ddcb079 469 goto again;
2d21ac55 470 }
7ddcb079
A
471
472 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
473 (int)offset, uio_resid(uio), (int)fp->ff_size,
474 (int)filebytes, 0);
2d21ac55
A
475
476 /* Check if we do not need to extend the file */
477 if (writelimit <= filebytes) {
91447636 478 goto sizeok;
2d21ac55 479 }
91447636
A
480
481 cred = vfs_context_ucred(ap->a_context);
91447636 482 bytesToAdd = writelimit - filebytes;
2d21ac55
A
483
484#if QUOTA
91447636
A
485 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
486 cred, 0);
487 if (retval)
488 goto exit;
489#endif /* QUOTA */
490
491 if (hfs_start_transaction(hfsmp) != 0) {
492 retval = EINVAL;
493 goto exit;
b4c24cb9
A
494 }
495
9bccf70c 496 while (writelimit > filebytes) {
9bccf70c 497 bytesToAdd = writelimit - filebytes;
91447636 498 if (cred && suser(cred, NULL) != 0)
9bccf70c
A
499 eflags |= kEFReserveMask;
500
91447636
A
501 /* Protect extents b-tree and allocation bitmap */
502 lockflags = SFL_BITMAP;
503 if (overflow_extents(fp))
504 lockflags |= SFL_EXTENTS;
505 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae
A
506
507 /* Files that are changing size are not hot file candidates. */
508 if (hfsmp->hfc_stage == HFC_RECORDING) {
509 fp->ff_bytesread = 0;
510 }
91447636 511 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
9bccf70c
A
512 0, eflags, &actualBytesAdded));
513
91447636
A
514 hfs_systemfile_unlock(hfsmp, lockflags);
515
9bccf70c
A
516 if ((actualBytesAdded == 0) && (retval == E_NONE))
517 retval = ENOSPC;
518 if (retval != E_NONE)
519 break;
91447636 520 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
9bccf70c 521 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
91447636 522 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
b4c24cb9 523 }
91447636
A
524 (void) hfs_update(vp, TRUE);
525 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
526 (void) hfs_end_transaction(hfsmp);
b4c24cb9 527
2d21ac55
A
528 /*
529 * If we didn't grow the file enough try a partial write.
530 * POSIX expects this behavior.
531 */
532 if ((retval == ENOSPC) && (filebytes > offset)) {
533 retval = 0;
534 partialwrite = 1;
535 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
536 resid -= bytesToAdd;
537 writelimit = filebytes;
538 }
91447636 539sizeok:
55e303ae 540 if (retval == E_NONE) {
0b4e3aa0
A
541 off_t filesize;
542 off_t zero_off;
543 off_t tail_off;
544 off_t inval_start;
545 off_t inval_end;
91447636 546 off_t io_start;
0b4e3aa0 547 int lflag;
0b4e3aa0 548
9bccf70c 549 if (writelimit > fp->ff_size)
0b4e3aa0
A
550 filesize = writelimit;
551 else
9bccf70c 552 filesize = fp->ff_size;
1c79356b 553
2d21ac55 554 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
1c79356b 555
91447636
A
556 if (offset <= fp->ff_size) {
557 zero_off = offset & ~PAGE_MASK_64;
0b4e3aa0
A
558
559 /* Check to see whether the area between the zero_offset and the start
560 of the transfer to see whether is invalid and should be zero-filled
561 as part of the transfer:
562 */
91447636
A
563 if (offset > zero_off) {
564 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
55e303ae
A
565 lflag |= IO_HEADZEROFILL;
566 }
0b4e3aa0 567 } else {
9bccf70c 568 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
0b4e3aa0 569
9bccf70c 570 /* The bytes between fp->ff_size and uio->uio_offset must never be
0b4e3aa0
A
571 read without being zeroed. The current last block is filled with zeroes
572 if it holds valid data but in all cases merely do a little bookkeeping
573 to track the area from the end of the current last page to the start of
574 the area actually written. For the same reason only the bytes up to the
575 start of the page where this write will start is invalidated; any remainder
576 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
577
578 Note that inval_start, the start of the page after the current EOF,
579 may be past the start of the write, in which case the zeroing
580 will be handled by the cluser_write of the actual data.
581 */
9bccf70c 582 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
91447636 583 inval_end = offset & ~PAGE_MASK_64;
9bccf70c 584 zero_off = fp->ff_size;
0b4e3aa0 585
9bccf70c
A
586 if ((fp->ff_size & PAGE_MASK_64) &&
587 (rl_scan(&fp->ff_invalidranges,
0b4e3aa0 588 eof_page_base,
9bccf70c 589 fp->ff_size - 1,
0b4e3aa0
A
590 &invalid_range) != RL_NOOVERLAP)) {
591 /* The page containing the EOF is not valid, so the
592 entire page must be made inaccessible now. If the write
593 starts on a page beyond the page containing the eof
594 (inval_end > eof_page_base), add the
595 whole page to the range to be invalidated. Otherwise
596 (i.e. if the write starts on the same page), zero-fill
597 the entire page explicitly now:
598 */
599 if (inval_end > eof_page_base) {
600 inval_start = eof_page_base;
601 } else {
602 zero_off = eof_page_base;
603 };
604 };
605
606 if (inval_start < inval_end) {
91447636 607 struct timeval tv;
0b4e3aa0
A
608 /* There's some range of data that's going to be marked invalid */
609
610 if (zero_off < inval_start) {
611 /* The pages between inval_start and inval_end are going to be invalidated,
612 and the actual write will start on a page past inval_end. Now's the last
613 chance to zero-fill the page containing the EOF:
614 */
91447636
A
615 hfs_unlock(cp);
616 cnode_locked = 0;
617 retval = cluster_write(vp, (uio_t) 0,
9bccf70c 618 fp->ff_size, inval_start,
91447636 619 zero_off, (off_t)0,
9bccf70c 620 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
39236c6e 621 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636 622 cnode_locked = 1;
0b4e3aa0 623 if (retval) goto ioerr_exit;
91447636 624 offset = uio_offset(uio);
0b4e3aa0
A
625 };
626
627 /* Mark the remaining area of the newly allocated space as invalid: */
9bccf70c 628 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
91447636
A
629 microuptime(&tv);
630 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c 631 zero_off = fp->ff_size = inval_end;
0b4e3aa0
A
632 };
633
91447636 634 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
0b4e3aa0 635 };
1c79356b 636
0b4e3aa0
A
637 /* Check to see whether the area between the end of the write and the end of
638 the page it falls in is invalid and should be zero-filled as part of the transfer:
639 */
640 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
641 if (tail_off > filesize) tail_off = filesize;
642 if (tail_off > writelimit) {
9bccf70c 643 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
0b4e3aa0
A
644 lflag |= IO_TAILZEROFILL;
645 };
646 };
647
648 /*
649 * if the write starts beyond the current EOF (possibly advanced in the
650 * zeroing of the last block, above), then we'll zero fill from the current EOF
651 * to where the write begins:
652 *
653 * NOTE: If (and ONLY if) the portion of the file about to be written is
654 * before the current EOF it might be marked as invalid now and must be
655 * made readable (removed from the invalid ranges) before cluster_write
656 * tries to write it:
657 */
91447636 658 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
9bccf70c 659 if (io_start < fp->ff_size) {
91447636
A
660 off_t io_end;
661
662 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
9bccf70c 663 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
0b4e3aa0 664 };
91447636
A
665
666 hfs_unlock(cp);
667 cnode_locked = 0;
593a1d5f
A
668
669 /*
670 * We need to tell UBC the fork's new size BEFORE calling
671 * cluster_write, in case any of the new pages need to be
672 * paged out before cluster_write completes (which does happen
673 * in embedded systems due to extreme memory pressure).
674 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
675 * will be, so that it can pass that on to cluster_pageout, and
676 * allow those pageouts.
677 *
678 * We don't update ff_size yet since we don't want pageins to
679 * be able to see uninitialized data between the old and new
680 * EOF, until cluster_write has completed and initialized that
681 * part of the file.
682 *
683 * The vnode pager relies on the file size last given to UBC via
684 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
685 * ff_size (whichever is larger). NOTE: ff_new_size is always
686 * zero, unless we are extending the file via write.
687 */
688 if (filesize > fp->ff_size) {
689 fp->ff_new_size = filesize;
690 ubc_setsize(vp, filesize);
691 }
9bccf70c 692 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
316670eb 693 tail_off, lflag | IO_NOZERODIRTY | io_return_on_throttle);
2d21ac55 694 if (retval) {
593a1d5f 695 fp->ff_new_size = 0; /* no longer extending; use ff_size */
316670eb
A
696
697 if (retval == EAGAIN) {
698 /*
699 * EAGAIN indicates that we still have I/O to do, but
700 * that we now need to be throttled
701 */
702 if (resid != uio_resid(uio)) {
703 /*
704 * did manage to do some I/O before returning EAGAIN
705 */
706 resid = uio_resid(uio);
707 offset = uio_offset(uio);
708
709 cp->c_touch_chgtime = TRUE;
710 cp->c_touch_modtime = TRUE;
711 }
712 if (filesize > fp->ff_size) {
713 /*
714 * we called ubc_setsize before the call to
715 * cluster_write... since we only partially
716 * completed the I/O, we need to
717 * re-adjust our idea of the filesize based
718 * on our interim EOF
719 */
720 ubc_setsize(vp, offset);
721
722 fp->ff_size = offset;
723 }
724 goto exit;
725 }
593a1d5f
A
726 if (filesize > origFileSize) {
727 ubc_setsize(vp, origFileSize);
728 }
2d21ac55
A
729 goto ioerr_exit;
730 }
593a1d5f
A
731
732 if (filesize > origFileSize) {
733 fp->ff_size = filesize;
734
91447636 735 /* Files that are changing size are not hot file candidates. */
593a1d5f 736 if (hfsmp->hfc_stage == HFC_RECORDING) {
91447636 737 fp->ff_bytesread = 0;
593a1d5f 738 }
91447636 739 }
593a1d5f
A
740 fp->ff_new_size = 0; /* ff_size now has the correct size */
741
742 /* If we wrote some bytes, then touch the change and mod times */
91447636
A
743 if (resid > uio_resid(uio)) {
744 cp->c_touch_chgtime = TRUE;
745 cp->c_touch_modtime = TRUE;
0b4e3aa0 746 }
9bccf70c 747 }
2d21ac55
A
748 if (partialwrite) {
749 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
750 resid += bytesToAdd;
751 }
8f6c56a5 752
2d21ac55 753 // XXXdbg - see radar 4871353 for more info
8f6c56a5
A
754 {
755 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
756 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
757 }
758 }
55e303ae 759
0b4e3aa0 760ioerr_exit:
9bccf70c 761 /*
0b4e3aa0 762 * If we successfully wrote any data, and we are not the superuser
9bccf70c
A
763 * we clear the setuid and setgid bits as a precaution against
764 * tampering.
765 */
91447636
A
766 if (cp->c_mode & (S_ISUID | S_ISGID)) {
767 cred = vfs_context_ucred(ap->a_context);
768 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
769 if (!cnode_locked) {
39236c6e 770 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636
A
771 cnode_locked = 1;
772 }
773 cp->c_mode &= ~(S_ISUID | S_ISGID);
774 }
775 }
9bccf70c
A
776 if (retval) {
777 if (ioflag & IO_UNIT) {
91447636 778 if (!cnode_locked) {
39236c6e 779 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636
A
780 cnode_locked = 1;
781 }
782 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
b0d623f7 783 0, 0, ap->a_context);
91447636
A
784 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
785 uio_setresid(uio, resid);
786 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
787 }
788 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
789 if (!cnode_locked) {
39236c6e 790 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636 791 cnode_locked = 1;
9bccf70c 792 }
91447636 793 retval = hfs_update(vp, TRUE);
9bccf70c 794 }
91447636
A
795 /* Updating vcbWrCnt doesn't need to be atomic. */
796 hfsmp->vcbWrCnt++;
1c79356b 797
9bccf70c 798 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
91447636
A
799 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
800exit:
801 if (cnode_locked)
802 hfs_unlock(cp);
6d2010ae
A
803
804 if (took_truncate_lock) {
39236c6e 805 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
6d2010ae 806 }
316670eb
A
807 if (retval == EAGAIN) {
808 throttle_lowpri_io(1);
809
810 retval = 0;
811 goto again;
812 }
9bccf70c 813 return (retval);
1c79356b
A
814}
815
91447636 816/* support for the "bulk-access" fcntl */
1c79356b 817
91447636 818#define CACHE_LEVELS 16
2d21ac55 819#define NUM_CACHE_ENTRIES (64*16)
91447636
A
820#define PARENT_IDS_FLAG 0x100
821
91447636
A
822struct access_cache {
823 int numcached;
824 int cachehits; /* these two for statistics gathering */
825 int lookups;
826 unsigned int *acache;
2d21ac55 827 unsigned char *haveaccess;
55e303ae
A
828};
829
91447636
A
830struct access_t {
831 uid_t uid; /* IN: effective user id */
832 short flags; /* IN: access requested (i.e. R_OK) */
833 short num_groups; /* IN: number of groups user belongs to */
834 int num_files; /* IN: number of files to process */
835 int *file_ids; /* IN: array of file ids */
836 gid_t *groups; /* IN: array of groups */
837 short *access; /* OUT: access info for each file (0 for 'has access') */
b0d623f7
A
838} __attribute__((unavailable)); // this structure is for reference purposes only
839
840struct user32_access_t {
841 uid_t uid; /* IN: effective user id */
842 short flags; /* IN: access requested (i.e. R_OK) */
843 short num_groups; /* IN: number of groups user belongs to */
844 int num_files; /* IN: number of files to process */
845 user32_addr_t file_ids; /* IN: array of file ids */
846 user32_addr_t groups; /* IN: array of groups */
847 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 848};
55e303ae 849
b0d623f7 850struct user64_access_t {
91447636
A
851 uid_t uid; /* IN: effective user id */
852 short flags; /* IN: access requested (i.e. R_OK) */
853 short num_groups; /* IN: number of groups user belongs to */
2d21ac55 854 int num_files; /* IN: number of files to process */
b0d623f7
A
855 user64_addr_t file_ids; /* IN: array of file ids */
856 user64_addr_t groups; /* IN: array of groups */
857 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
91447636 858};
55e303ae 859
2d21ac55
A
860
861// these are the "extended" versions of the above structures
862// note that it is crucial that they be different sized than
863// the regular version
864struct ext_access_t {
865 uint32_t flags; /* IN: access requested (i.e. R_OK) */
866 uint32_t num_files; /* IN: number of files to process */
867 uint32_t map_size; /* IN: size of the bit map */
868 uint32_t *file_ids; /* IN: Array of file ids */
869 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
870 short *access; /* OUT: access info for each file (0 for 'has access') */
871 uint32_t num_parents; /* future use */
872 cnid_t *parents; /* future use */
b0d623f7
A
873} __attribute__((unavailable)); // this structure is for reference purposes only
874
875struct user32_ext_access_t {
876 uint32_t flags; /* IN: access requested (i.e. R_OK) */
877 uint32_t num_files; /* IN: number of files to process */
878 uint32_t map_size; /* IN: size of the bit map */
879 user32_addr_t file_ids; /* IN: Array of file ids */
880 user32_addr_t bitmap; /* OUT: hash-bitmap of interesting directory ids */
881 user32_addr_t access; /* OUT: access info for each file (0 for 'has access') */
882 uint32_t num_parents; /* future use */
883 user32_addr_t parents; /* future use */
2d21ac55
A
884};
885
b0d623f7 886struct user64_ext_access_t {
2d21ac55
A
887 uint32_t flags; /* IN: access requested (i.e. R_OK) */
888 uint32_t num_files; /* IN: number of files to process */
889 uint32_t map_size; /* IN: size of the bit map */
b0d623f7
A
890 user64_addr_t file_ids; /* IN: array of file ids */
891 user64_addr_t bitmap; /* IN: array of groups */
892 user64_addr_t access; /* OUT: access info for each file (0 for 'has access') */
2d21ac55 893 uint32_t num_parents;/* future use */
b0d623f7 894 user64_addr_t parents;/* future use */
2d21ac55
A
895};
896
897
91447636
A
898/*
899 * Perform a binary search for the given parent_id. Return value is
2d21ac55
A
900 * the index if there is a match. If no_match_indexp is non-NULL it
901 * will be assigned with the index to insert the item (even if it was
902 * not found).
91447636 903 */
2d21ac55 904static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
91447636 905{
2d21ac55
A
906 int index=-1;
907 unsigned int lo=0;
91447636 908
2d21ac55
A
909 do {
910 unsigned int mid = ((hi - lo)/2) + lo;
911 unsigned int this_id = array[mid];
912
913 if (parent_id == this_id) {
914 hi = mid;
915 break;
91447636 916 }
2d21ac55
A
917
918 if (parent_id < this_id) {
919 hi = mid;
920 continue;
91447636 921 }
2d21ac55
A
922
923 if (parent_id > this_id) {
924 lo = mid + 1;
925 continue;
926 }
927 } while(lo < hi);
928
929 /* check if lo and hi converged on the match */
930 if (parent_id == array[hi]) {
931 index = hi;
932 }
91447636 933
2d21ac55
A
934 if (no_match_indexp) {
935 *no_match_indexp = hi;
936 }
937
938 return index;
939}
940
941
942static int
943lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
944{
945 unsigned int hi;
946 int matches = 0;
947 int index, no_match_index;
91447636 948
2d21ac55
A
949 if (cache->numcached == 0) {
950 *indexp = 0;
951 return 0; // table is empty, so insert at index=0 and report no match
952 }
91447636 953
2d21ac55 954 if (cache->numcached > NUM_CACHE_ENTRIES) {
2d21ac55
A
955 cache->numcached = NUM_CACHE_ENTRIES;
956 }
91447636 957
2d21ac55 958 hi = cache->numcached - 1;
91447636 959
2d21ac55
A
960 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
961
962 /* if no existing entry found, find index for new one */
963 if (index == -1) {
964 index = no_match_index;
965 matches = 0;
966 } else {
967 matches = 1;
968 }
969
970 *indexp = index;
971 return matches;
91447636
A
972}
973
974/*
975 * Add a node to the access_cache at the given index (or do a lookup first
976 * to find the index if -1 is passed in). We currently do a replace rather
977 * than an insert if the cache is full.
978 */
979static void
980add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
981{
2d21ac55
A
982 int lookup_index = -1;
983
984 /* need to do a lookup first if -1 passed for index */
985 if (index == -1) {
986 if (lookup_bucket(cache, &lookup_index, nodeID)) {
987 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
988 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
989 cache->haveaccess[lookup_index] = access;
990 }
991
992 /* mission accomplished */
993 return;
994 } else {
995 index = lookup_index;
996 }
997
998 }
999
1000 /* if the cache is full, do a replace rather than an insert */
1001 if (cache->numcached >= NUM_CACHE_ENTRIES) {
2d21ac55
A
1002 cache->numcached = NUM_CACHE_ENTRIES-1;
1003
1004 if (index > cache->numcached) {
2d21ac55
A
1005 index = cache->numcached;
1006 }
1007 }
1008
1009 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
1010 index++;
1011 }
1012
1013 if (index >= 0 && index < cache->numcached) {
1014 /* only do bcopy if we're inserting */
1015 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
1016 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
1017 }
1018
1019 cache->acache[index] = nodeID;
1020 cache->haveaccess[index] = access;
1021 cache->numcached++;
91447636
A
1022}
1023
1024
1025struct cinfo {
2d21ac55
A
1026 uid_t uid;
1027 gid_t gid;
1028 mode_t mode;
1029 cnid_t parentcnid;
1030 u_int16_t recflags;
91447636
A
1031};
1032
1033static int
1034snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
1035{
2d21ac55 1036 struct cinfo *cip = (struct cinfo *)arg;
91447636 1037
2d21ac55
A
1038 cip->uid = attrp->ca_uid;
1039 cip->gid = attrp->ca_gid;
1040 cip->mode = attrp->ca_mode;
1041 cip->parentcnid = descp->cd_parentcnid;
1042 cip->recflags = attrp->ca_recflags;
91447636 1043
2d21ac55 1044 return (0);
91447636
A
1045}
1046
1047/*
1048 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
1049 * isn't incore, then go to the catalog.
1050 */
1051static int
b0d623f7 1052do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, cnid_t cnid,
2d21ac55 1053 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
91447636 1054{
2d21ac55
A
1055 int error = 0;
1056
1057 /* if this id matches the one the fsctl was called with, skip the lookup */
1058 if (cnid == skip_cp->c_cnid) {
1059 cnattrp->ca_uid = skip_cp->c_uid;
1060 cnattrp->ca_gid = skip_cp->c_gid;
1061 cnattrp->ca_mode = skip_cp->c_mode;
b0d623f7 1062 cnattrp->ca_recflags = skip_cp->c_attr.ca_recflags;
2d21ac55
A
1063 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
1064 } else {
1065 struct cinfo c_info;
1066
1067 /* otherwise, check the cnode hash incase the file/dir is incore */
ebb1b9f4 1068 if (hfs_chash_snoop(hfsmp, cnid, 0, snoop_callback, &c_info) == 0) {
2d21ac55
A
1069 cnattrp->ca_uid = c_info.uid;
1070 cnattrp->ca_gid = c_info.gid;
1071 cnattrp->ca_mode = c_info.mode;
1072 cnattrp->ca_recflags = c_info.recflags;
1073 keyp->hfsPlus.parentID = c_info.parentcnid;
91447636 1074 } else {
2d21ac55 1075 int lockflags;
91447636 1076
316670eb
A
1077 if (throttle_io_will_be_throttled(-1, HFSTOVFS(hfsmp)))
1078 throttle_lowpri_io(1);
1079
2d21ac55 1080 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
316670eb 1081
2d21ac55
A
1082 /* lookup this cnid in the catalog */
1083 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
91447636 1084
2d21ac55 1085 hfs_systemfile_unlock(hfsmp, lockflags);
91447636 1086
2d21ac55 1087 cache->lookups++;
91447636 1088 }
2d21ac55 1089 }
91447636 1090
2d21ac55 1091 return (error);
91447636 1092}
55e303ae 1093
2d21ac55 1094
1c79356b 1095/*
91447636
A
1096 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
1097 * up to CACHE_LEVELS as we progress towards the root.
1098 */
1099static int
1100do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
b0d623f7 1101 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred,
2d21ac55
A
1102 struct vfs_context *my_context,
1103 char *bitmap,
1104 uint32_t map_size,
1105 cnid_t* parents,
1106 uint32_t num_parents)
91447636 1107{
2d21ac55
A
1108 int myErr = 0;
1109 int myResult;
1110 HFSCatalogNodeID thisNodeID;
1111 unsigned int myPerms;
1112 struct cat_attr cnattr;
1113 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
1114 CatalogKey catkey;
1115
1116 int i = 0, ids_to_cache = 0;
1117 int parent_ids[CACHE_LEVELS];
1118
1119 thisNodeID = nodeID;
1120 while (thisNodeID >= kRootDirID) {
1121 myResult = 0; /* default to "no access" */
91447636 1122
2d21ac55
A
1123 /* check the cache before resorting to hitting the catalog */
1124
1125 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
1126 * to look any further after hitting cached dir */
1127
1128 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
1129 cache->cachehits++;
1130 myErr = cache->haveaccess[cache_index];
1131 if (scope_index != -1) {
1132 if (myErr == ESRCH) {
1133 myErr = 0;
1134 }
1135 } else {
1136 scope_index = 0; // so we'll just use the cache result
1137 scope_idx_start = ids_to_cache;
1138 }
1139 myResult = (myErr == 0) ? 1 : 0;
1140 goto ExitThisRoutine;
1141 }
1142
1143
1144 if (parents) {
1145 int tmp;
1146 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
1147 if (scope_index == -1)
1148 scope_index = tmp;
1149 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
1150 scope_idx_start = ids_to_cache;
1151 }
1152 }
1153
1154 /* remember which parents we want to cache */
1155 if (ids_to_cache < CACHE_LEVELS) {
1156 parent_ids[ids_to_cache] = thisNodeID;
1157 ids_to_cache++;
1158 }
1159 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
1160 if (bitmap && map_size) {
1161 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
1162 }
1163
1164
1165 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 1166 myErr = do_attr_lookup(hfsmp, cache, thisNodeID, skip_cp, &catkey, &cnattr);
2d21ac55
A
1167 if (myErr) {
1168 goto ExitThisRoutine; /* no access */
1169 }
1170
1171 /* Root always gets access. */
1172 if (suser(myp_ucred, NULL) == 0) {
1173 thisNodeID = catkey.hfsPlus.parentID;
1174 myResult = 1;
1175 continue;
1176 }
1177
1178 // if the thing has acl's, do the full permission check
1179 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1180 struct vnode *vp;
1181
1182 /* get the vnode for this cnid */
6d2010ae 1183 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0, 0);
2d21ac55
A
1184 if ( myErr ) {
1185 myResult = 0;
1186 goto ExitThisRoutine;
1187 }
1188
1189 thisNodeID = VTOC(vp)->c_parentcnid;
1190
1191 hfs_unlock(VTOC(vp));
1192
1193 if (vnode_vtype(vp) == VDIR) {
1194 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
1195 } else {
1196 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
1197 }
1198
1199 vnode_put(vp);
1200 if (myErr) {
1201 myResult = 0;
1202 goto ExitThisRoutine;
1203 }
1204 } else {
1205 unsigned int flags;
6d2010ae
A
1206 int mode = cnattr.ca_mode & S_IFMT;
1207 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid, cnattr.ca_mode, hfsmp->hfs_mp,myp_ucred, theProcPtr);
2d21ac55 1208
6d2010ae
A
1209 if (mode == S_IFDIR) {
1210 flags = R_OK | X_OK;
1211 } else {
1212 flags = R_OK;
1213 }
1214 if ( (myPerms & flags) != flags) {
1215 myResult = 0;
1216 myErr = EACCES;
1217 goto ExitThisRoutine; /* no access */
1218 }
2d21ac55
A
1219
1220 /* up the hierarchy we go */
1221 thisNodeID = catkey.hfsPlus.parentID;
1222 }
1223 }
1224
1225 /* if here, we have access to this node */
1226 myResult = 1;
1227
1228 ExitThisRoutine:
1229 if (parents && myErr == 0 && scope_index == -1) {
1230 myErr = ESRCH;
1231 }
1232
1233 if (myErr) {
1234 myResult = 0;
1235 }
1236 *err = myErr;
1237
1238 /* cache the parent directory(ies) */
1239 for (i = 0; i < ids_to_cache; i++) {
1240 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
1241 add_node(cache, -1, parent_ids[i], ESRCH);
1242 } else {
1243 add_node(cache, -1, parent_ids[i], myErr);
1244 }
1245 }
1246
1247 return (myResult);
91447636 1248}
1c79356b 1249
2d21ac55
A
1250static int
1251do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1252 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1253{
1254 boolean_t is64bit;
1255
1256 /*
316670eb 1257 * NOTE: on entry, the vnode has an io_ref. In case this vnode
2d21ac55
A
1258 * happens to be in our list of file_ids, we'll note it
1259 * avoid calling hfs_chashget_nowait() on that id as that
1260 * will cause a "locking against myself" panic.
1261 */
1262 Boolean check_leaf = true;
1263
b0d623f7
A
1264 struct user64_ext_access_t *user_access_structp;
1265 struct user64_ext_access_t tmp_user_access;
2d21ac55
A
1266 struct access_cache cache;
1267
b0d623f7 1268 int error = 0, prev_parent_check_ok=1;
2d21ac55
A
1269 unsigned int i;
1270
2d21ac55
A
1271 short flags;
1272 unsigned int num_files = 0;
1273 int map_size = 0;
1274 int num_parents = 0;
1275 int *file_ids=NULL;
1276 short *access=NULL;
1277 char *bitmap=NULL;
1278 cnid_t *parents=NULL;
1279 int leaf_index;
1280
1281 cnid_t cnid;
1282 cnid_t prevParent_cnid = 0;
1283 unsigned int myPerms;
1284 short myaccess = 0;
1285 struct cat_attr cnattr;
1286 CatalogKey catkey;
1287 struct cnode *skip_cp = VTOC(vp);
1288 kauth_cred_t cred = vfs_context_ucred(context);
1289 proc_t p = vfs_context_proc(context);
1290
1291 is64bit = proc_is64bit(p);
1292
1293 /* initialize the local cache and buffers */
1294 cache.numcached = 0;
1295 cache.cachehits = 0;
1296 cache.lookups = 0;
1297 cache.acache = NULL;
1298 cache.haveaccess = NULL;
1299
1300 /* struct copyin done during dispatch... need to copy file_id array separately */
1301 if (ap->a_data == NULL) {
1302 error = EINVAL;
1303 goto err_exit_bulk_access;
1304 }
1305
1306 if (is64bit) {
b0d623f7 1307 if (arg_size != sizeof(struct user64_ext_access_t)) {
2d21ac55
A
1308 error = EINVAL;
1309 goto err_exit_bulk_access;
1310 }
1311
b0d623f7 1312 user_access_structp = (struct user64_ext_access_t *)ap->a_data;
2d21ac55 1313
b0d623f7
A
1314 } else if (arg_size == sizeof(struct user32_access_t)) {
1315 struct user32_access_t *accessp = (struct user32_access_t *)ap->a_data;
2d21ac55
A
1316
1317 // convert an old style bulk-access struct to the new style
1318 tmp_user_access.flags = accessp->flags;
1319 tmp_user_access.num_files = accessp->num_files;
1320 tmp_user_access.map_size = 0;
1321 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
cf7d32b8 1322 tmp_user_access.bitmap = USER_ADDR_NULL;
2d21ac55
A
1323 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1324 tmp_user_access.num_parents = 0;
1325 user_access_structp = &tmp_user_access;
1326
b0d623f7
A
1327 } else if (arg_size == sizeof(struct user32_ext_access_t)) {
1328 struct user32_ext_access_t *accessp = (struct user32_ext_access_t *)ap->a_data;
2d21ac55
A
1329
1330 // up-cast from a 32-bit version of the struct
1331 tmp_user_access.flags = accessp->flags;
1332 tmp_user_access.num_files = accessp->num_files;
1333 tmp_user_access.map_size = accessp->map_size;
1334 tmp_user_access.num_parents = accessp->num_parents;
1335
1336 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1337 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1338 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1339 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1340
1341 user_access_structp = &tmp_user_access;
1342 } else {
1343 error = EINVAL;
1344 goto err_exit_bulk_access;
1345 }
1346
1347 map_size = user_access_structp->map_size;
1348
1349 num_files = user_access_structp->num_files;
1350
1351 num_parents= user_access_structp->num_parents;
1352
1353 if (num_files < 1) {
1354 goto err_exit_bulk_access;
1355 }
1356 if (num_files > 1024) {
1357 error = EINVAL;
1358 goto err_exit_bulk_access;
1359 }
1360
1361 if (num_parents > 1024) {
1362 error = EINVAL;
1363 goto err_exit_bulk_access;
1364 }
1365
1366 file_ids = (int *) kalloc(sizeof(int) * num_files);
1367 access = (short *) kalloc(sizeof(short) * num_files);
1368 if (map_size) {
1369 bitmap = (char *) kalloc(sizeof(char) * map_size);
1370 }
1371
1372 if (num_parents) {
1373 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1374 }
1375
1376 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1377 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1378
1379 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1380 if (file_ids) {
1381 kfree(file_ids, sizeof(int) * num_files);
1382 }
1383 if (bitmap) {
1384 kfree(bitmap, sizeof(char) * map_size);
1385 }
1386 if (access) {
1387 kfree(access, sizeof(short) * num_files);
1388 }
1389 if (cache.acache) {
1390 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1391 }
1392 if (cache.haveaccess) {
1393 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1394 }
1395 if (parents) {
1396 kfree(parents, sizeof(cnid_t) * num_parents);
1397 }
1398 return ENOMEM;
1399 }
1400
1401 // make sure the bitmap is zero'ed out...
1402 if (bitmap) {
1403 bzero(bitmap, (sizeof(char) * map_size));
1404 }
1405
1406 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1407 num_files * sizeof(int)))) {
1408 goto err_exit_bulk_access;
1409 }
1410
1411 if (num_parents) {
1412 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1413 num_parents * sizeof(cnid_t)))) {
1414 goto err_exit_bulk_access;
1415 }
1416 }
1417
1418 flags = user_access_structp->flags;
1419 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1420 flags = R_OK;
1421 }
1422
1423 /* check if we've been passed leaf node ids or parent ids */
1424 if (flags & PARENT_IDS_FLAG) {
1425 check_leaf = false;
1426 }
1427
1428 /* Check access to each file_id passed in */
1429 for (i = 0; i < num_files; i++) {
1430 leaf_index=-1;
1431 cnid = (cnid_t) file_ids[i];
1432
1433 /* root always has access */
1434 if ((!parents) && (!suser(cred, NULL))) {
1435 access[i] = 0;
1436 continue;
1437 }
1438
1439 if (check_leaf) {
1440 /* do the lookup (checks the cnode hash, then the catalog) */
b0d623f7 1441 error = do_attr_lookup(hfsmp, &cache, cnid, skip_cp, &catkey, &cnattr);
2d21ac55
A
1442 if (error) {
1443 access[i] = (short) error;
1444 continue;
1445 }
1446
1447 if (parents) {
1448 // Check if the leaf matches one of the parent scopes
1449 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
b0d623f7
A
1450 if (leaf_index >= 0 && parents[leaf_index] == cnid)
1451 prev_parent_check_ok = 0;
1452 else if (leaf_index >= 0)
1453 prev_parent_check_ok = 1;
2d21ac55
A
1454 }
1455
1456 // if the thing has acl's, do the full permission check
1457 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1458 struct vnode *cvp;
1459 int myErr = 0;
1460 /* get the vnode for this cnid */
6d2010ae 1461 myErr = hfs_vget(hfsmp, cnid, &cvp, 0, 0);
2d21ac55
A
1462 if ( myErr ) {
1463 access[i] = myErr;
1464 continue;
1465 }
1466
1467 hfs_unlock(VTOC(cvp));
1468
1469 if (vnode_vtype(cvp) == VDIR) {
1470 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1471 } else {
1472 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1473 }
1474
1475 vnode_put(cvp);
1476 if (myErr) {
1477 access[i] = myErr;
1478 continue;
1479 }
1480 } else {
1481 /* before calling CheckAccess(), check the target file for read access */
1482 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1483 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1484
1485 /* fail fast if no access */
1486 if ((myPerms & flags) == 0) {
1487 access[i] = EACCES;
1488 continue;
1489 }
1490 }
1491 } else {
1492 /* we were passed an array of parent ids */
1493 catkey.hfsPlus.parentID = cnid;
1494 }
1495
1496 /* if the last guy had the same parent and had access, we're done */
b0d623f7 1497 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0 && prev_parent_check_ok) {
2d21ac55
A
1498 cache.cachehits++;
1499 access[i] = 0;
1500 continue;
1501 }
316670eb 1502
2d21ac55 1503 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
b0d623f7 1504 skip_cp, p, cred, context,bitmap, map_size, parents, num_parents);
2d21ac55
A
1505
1506 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1507 access[i] = 0; // have access.. no errors to report
1508 } else {
1509 access[i] = (error != 0 ? (short) error : EACCES);
1510 }
1511
1512 prevParent_cnid = catkey.hfsPlus.parentID;
1513 }
1514
1515 /* copyout the access array */
1516 if ((error = copyout((caddr_t)access, user_access_structp->access,
1517 num_files * sizeof (short)))) {
1518 goto err_exit_bulk_access;
1519 }
1520 if (map_size && bitmap) {
1521 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1522 map_size * sizeof (char)))) {
1523 goto err_exit_bulk_access;
1524 }
1525 }
1526
1527
1528 err_exit_bulk_access:
1529
2d21ac55
A
1530 if (file_ids)
1531 kfree(file_ids, sizeof(int) * num_files);
1532 if (parents)
1533 kfree(parents, sizeof(cnid_t) * num_parents);
1534 if (bitmap)
1535 kfree(bitmap, sizeof(char) * map_size);
1536 if (access)
1537 kfree(access, sizeof(short) * num_files);
1538 if (cache.acache)
1539 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1540 if (cache.haveaccess)
1541 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1542
1543 return (error);
1544}
1545
1546
1547/* end "bulk-access" support */
1c79356b 1548
1c79356b 1549
91447636
A
1550/*
1551 * Callback for use with freeze ioctl.
1552 */
1553static int
2d21ac55 1554hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
91447636
A
1555{
1556 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1557
1558 return 0;
1559}
1c79356b 1560
91447636
A
1561/*
1562 * Control filesystem operating characteristics.
1563 */
1c79356b 1564int
91447636
A
1565hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1566 vnode_t a_vp;
9bccf70c
A
1567 int a_command;
1568 caddr_t a_data;
1569 int a_fflag;
91447636
A
1570 vfs_context_t a_context;
1571 } */ *ap)
1c79356b 1572{
91447636
A
1573 struct vnode * vp = ap->a_vp;
1574 struct hfsmount *hfsmp = VTOHFS(vp);
1575 vfs_context_t context = ap->a_context;
1576 kauth_cred_t cred = vfs_context_ucred(context);
1577 proc_t p = vfs_context_proc(context);
1578 struct vfsstatfs *vfsp;
1579 boolean_t is64bit;
b0d623f7
A
1580 off_t jnl_start, jnl_size;
1581 struct hfs_journal_info *jip;
1582#if HFS_COMPRESSION
1583 int compressed = 0;
1584 off_t uncompressed_size = -1;
1585 int decmpfs_error = 0;
1586
1587 if (ap->a_command == F_RDADVISE) {
1588 /* we need to inspect the decmpfs state of the file as early as possible */
1589 compressed = hfs_file_is_compressed(VTOC(vp), 0);
1590 if (compressed) {
1591 if (VNODE_IS_RSRC(vp)) {
1592 /* if this is the resource fork, treat it as if it were empty */
1593 uncompressed_size = 0;
1594 } else {
1595 decmpfs_error = hfs_uncompressed_size_of_compressed_file(NULL, vp, 0, &uncompressed_size, 0);
1596 if (decmpfs_error != 0) {
1597 /* failed to get the uncompressed size, we'll check for this later */
1598 uncompressed_size = -1;
1599 }
1600 }
1601 }
1602 }
1603#endif /* HFS_COMPRESSION */
91447636
A
1604
1605 is64bit = proc_is64bit(p);
1606
6d2010ae
A
1607#if CONFIG_PROTECT
1608 {
1609 int error = 0;
316670eb 1610 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
6d2010ae
A
1611 return error;
1612 }
1613 }
1614#endif /* CONFIG_PROTECT */
1615
9bccf70c 1616 switch (ap->a_command) {
55e303ae 1617
2d21ac55
A
1618 case HFS_GETPATH:
1619 {
1620 struct vnode *file_vp;
1621 cnid_t cnid;
1622 int outlen;
1623 char *bufptr;
1624 int error;
39236c6e 1625 int flags = 0;
2d21ac55
A
1626
1627 /* Caller must be owner of file system. */
1628 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1629 if (suser(cred, NULL) &&
1630 kauth_cred_getuid(cred) != vfsp->f_owner) {
1631 return (EACCES);
1632 }
1633 /* Target vnode must be file system's root. */
1634 if (!vnode_isvroot(vp)) {
1635 return (EINVAL);
1636 }
1637 bufptr = (char *)ap->a_data;
1638 cnid = strtoul(bufptr, NULL, 10);
39236c6e
A
1639 if (ap->a_fflag & HFS_GETPATH_VOLUME_RELATIVE) {
1640 flags |= BUILDPATH_VOLUME_RELATIVE;
1641 }
2d21ac55 1642
b0d623f7
A
1643 /* We need to call hfs_vfs_vget to leverage the code that will
1644 * fix the origin list for us if needed, as opposed to calling
1645 * hfs_vget, since we will need the parent for build_path call.
935ed37a 1646 */
b0d623f7 1647
935ed37a 1648 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
2d21ac55
A
1649 return (error);
1650 }
39236c6e 1651 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, flags, context);
2d21ac55
A
1652 vnode_put(file_vp);
1653
1654 return (error);
1655 }
1656
39236c6e
A
1657 case HFS_GET_WRITE_GEN_COUNTER:
1658 {
1659 struct cnode *cp = NULL;
1660 int error;
1661 u_int32_t *counter = (u_int32_t *)ap->a_data;
1662
1663 cp = VTOC(vp);
1664
1665 if (vnode_isdir (vp)) {
1666 error = EISDIR;
1667 *counter = 0;
1668 return error;
1669 }
1670
1671 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
1672 if (error == 0) {
1673 struct ubc_info *uip;
1674 int is_mapped = 0;
1675
1676 if (UBCINFOEXISTS(vp)) {
1677 uip = vp->v_ubcinfo;
1678 if (uip->ui_flags & UI_ISMAPPED) {
1679 is_mapped = 1;
1680 }
1681 }
1682
1683
1684 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
1685 uint32_t gcount = hfs_get_gencount(cp);
1686 //
1687 // Even though we return EBUSY for files that are mmap'ed
1688 // we also want to bump the value so that the write-gen
1689 // counter will always be different once the file is unmapped
1690 // (since the file may be unmapped but the pageouts have not
1691 // yet happened).
1692 //
1693 if (is_mapped) {
1694 hfs_incr_gencount (cp);
1695 gcount = hfs_get_gencount(cp);
1696 }
1697
1698 *counter = gcount;
1699
1700 }
1701 else {
1702 /* not a file or dir? silently return */
1703 *counter = 0;
1704 }
1705 hfs_unlock (cp);
1706
1707 if (is_mapped) {
1708 error = EBUSY;
1709 }
1710 }
1711
1712 return error;
1713 }
1714
2d21ac55
A
1715 case HFS_PREV_LINK:
1716 case HFS_NEXT_LINK:
1717 {
1718 cnid_t linkfileid;
1719 cnid_t nextlinkid;
1720 cnid_t prevlinkid;
1721 int error;
1722
1723 /* Caller must be owner of file system. */
1724 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1725 if (suser(cred, NULL) &&
1726 kauth_cred_getuid(cred) != vfsp->f_owner) {
1727 return (EACCES);
1728 }
1729 /* Target vnode must be file system's root. */
1730 if (!vnode_isvroot(vp)) {
1731 return (EINVAL);
1732 }
1733 linkfileid = *(cnid_t *)ap->a_data;
1734 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1735 return (EINVAL);
1736 }
6d2010ae 1737 if ((error = hfs_lookup_siblinglinks(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
2d21ac55
A
1738 return (error);
1739 }
1740 if (ap->a_command == HFS_NEXT_LINK) {
1741 *(cnid_t *)ap->a_data = nextlinkid;
1742 } else {
1743 *(cnid_t *)ap->a_data = prevlinkid;
1744 }
1745 return (0);
1746 }
1747
0c530ab8
A
1748 case HFS_RESIZE_PROGRESS: {
1749
1750 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1751 if (suser(cred, NULL) &&
1752 kauth_cred_getuid(cred) != vfsp->f_owner) {
1753 return (EACCES); /* must be owner of file system */
1754 }
1755 if (!vnode_isvroot(vp)) {
1756 return (EINVAL);
1757 }
b0d623f7
A
1758 /* file system must not be mounted read-only */
1759 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1760 return (EROFS);
1761 }
1762
0c530ab8
A
1763 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1764 }
2d21ac55 1765
91447636
A
1766 case HFS_RESIZE_VOLUME: {
1767 u_int64_t newsize;
1768 u_int64_t cursize;
1769
1770 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1771 if (suser(cred, NULL) &&
1772 kauth_cred_getuid(cred) != vfsp->f_owner) {
1773 return (EACCES); /* must be owner of file system */
1774 }
1775 if (!vnode_isvroot(vp)) {
1776 return (EINVAL);
1777 }
b0d623f7
A
1778
1779 /* filesystem must not be mounted read only */
1780 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1781 return (EROFS);
1782 }
91447636
A
1783 newsize = *(u_int64_t *)ap->a_data;
1784 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1785
1786 if (newsize > cursize) {
1787 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1788 } else if (newsize < cursize) {
1789 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1790 } else {
1791 return (0);
1792 }
1793 }
1794 case HFS_CHANGE_NEXT_ALLOCATION: {
2d21ac55 1795 int error = 0; /* Assume success */
91447636
A
1796 u_int32_t location;
1797
1798 if (vnode_vfsisrdonly(vp)) {
1799 return (EROFS);
1800 }
1801 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1802 if (suser(cred, NULL) &&
1803 kauth_cred_getuid(cred) != vfsp->f_owner) {
1804 return (EACCES); /* must be owner of file system */
1805 }
1806 if (!vnode_isvroot(vp)) {
1807 return (EINVAL);
1808 }
39236c6e 1809 hfs_lock_mount(hfsmp);
91447636 1810 location = *(u_int32_t *)ap->a_data;
2d21ac55
A
1811 if ((location >= hfsmp->allocLimit) &&
1812 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1813 error = EINVAL;
1814 goto fail_change_next_allocation;
91447636
A
1815 }
1816 /* Return previous value. */
1817 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
2d21ac55
A
1818 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1819 /* On magic value for location, set nextAllocation to next block
1820 * after metadata zone and set flag in mount structure to indicate
1821 * that nextAllocation should not be updated again.
1822 */
b0d623f7
A
1823 if (hfsmp->hfs_metazone_end != 0) {
1824 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1825 }
2d21ac55
A
1826 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1827 } else {
1828 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1829 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1830 }
1831 MarkVCBDirty(hfsmp);
1832fail_change_next_allocation:
39236c6e 1833 hfs_unlock_mount(hfsmp);
2d21ac55 1834 return (error);
91447636
A
1835 }
1836
6d2010ae 1837#if HFS_SPARSE_DEV
55e303ae 1838 case HFS_SETBACKINGSTOREINFO: {
55e303ae
A
1839 struct vnode * bsfs_rootvp;
1840 struct vnode * di_vp;
55e303ae
A
1841 struct hfs_backingstoreinfo *bsdata;
1842 int error = 0;
1843
b0d623f7
A
1844 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1845 return (EROFS);
1846 }
55e303ae
A
1847 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1848 return (EALREADY);
1849 }
91447636
A
1850 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1851 if (suser(cred, NULL) &&
1852 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1853 return (EACCES); /* must be owner of file system */
1854 }
1855 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1856 if (bsdata == NULL) {
1857 return (EINVAL);
1858 }
91447636 1859 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
55e303ae
A
1860 return (error);
1861 }
91447636
A
1862 if ((error = vnode_getwithref(di_vp))) {
1863 file_drop(bsdata->backingfd);
1864 return(error);
55e303ae 1865 }
91447636
A
1866
1867 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1868 (void)vnode_put(di_vp);
1869 file_drop(bsdata->backingfd);
55e303ae
A
1870 return (EINVAL);
1871 }
1872
1873 /*
1874 * Obtain the backing fs root vnode and keep a reference
1875 * on it. This reference will be dropped in hfs_unmount.
1876 */
91447636 1877 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
55e303ae 1878 if (error) {
91447636
A
1879 (void)vnode_put(di_vp);
1880 file_drop(bsdata->backingfd);
55e303ae
A
1881 return (error);
1882 }
91447636
A
1883 vnode_ref(bsfs_rootvp);
1884 vnode_put(bsfs_rootvp);
55e303ae
A
1885
1886 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
6d2010ae 1887
55e303ae 1888 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
6d2010ae
A
1889 /* The free extent cache is managed differently for sparse devices.
1890 * There is a window between which the volume is mounted and the
1891 * device is marked as sparse, so the free extent cache for this
1892 * volume is currently initialized as normal volume (sorted by block
1893 * count). Reset the cache so that it will be rebuilt again
1894 * for sparse device (sorted by start block).
1895 */
1896 ResetVCBFreeExtCache(hfsmp);
1897
55e303ae
A
1898 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1899 hfsmp->hfs_sparsebandblks *= 4;
1900
39236c6e 1901 /* We check the MNTK_VIRTUALDEV bit instead of marking the dependent process */
2d21ac55 1902
b0d623f7
A
1903 /*
1904 * If the sparse image is on a sparse image file (as opposed to a sparse
1905 * bundle), then we may need to limit the free space to the maximum size
1906 * of a file on that volume. So we query (using pathconf), and if we get
1907 * a meaningful result, we cache the number of blocks for later use in
1908 * hfs_freeblks().
1909 */
1910 hfsmp->hfs_backingfs_maxblocks = 0;
1911 if (vnode_vtype(di_vp) == VREG) {
1912 int terr;
1913 int hostbits;
1914 terr = vn_pathconf(di_vp, _PC_FILESIZEBITS, &hostbits, context);
1915 if (terr == 0 && hostbits != 0 && hostbits < 64) {
1916 u_int64_t hostfilesizemax = ((u_int64_t)1) << hostbits;
1917
1918 hfsmp->hfs_backingfs_maxblocks = hostfilesizemax / hfsmp->blockSize;
1919 }
1920 }
1921
91447636
A
1922 (void)vnode_put(di_vp);
1923 file_drop(bsdata->backingfd);
55e303ae
A
1924 return (0);
1925 }
1926 case HFS_CLRBACKINGSTOREINFO: {
55e303ae
A
1927 struct vnode * tmpvp;
1928
91447636
A
1929 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1930 if (suser(cred, NULL) &&
1931 kauth_cred_getuid(cred) != vfsp->f_owner) {
55e303ae
A
1932 return (EACCES); /* must be owner of file system */
1933 }
b0d623f7
A
1934 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
1935 return (EROFS);
1936 }
1937
55e303ae
A
1938 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1939 hfsmp->hfs_backingfs_rootvp) {
1940
1941 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1942 tmpvp = hfsmp->hfs_backingfs_rootvp;
1943 hfsmp->hfs_backingfs_rootvp = NULLVP;
1944 hfsmp->hfs_sparsebandblks = 0;
91447636 1945 vnode_rele(tmpvp);
55e303ae
A
1946 }
1947 return (0);
1948 }
1949#endif /* HFS_SPARSE_DEV */
1950
316670eb
A
1951 /* Change the next CNID stored in the VH */
1952 case HFS_CHANGE_NEXTCNID: {
1953 int error = 0; /* Assume success */
1954 u_int32_t fileid;
1955 int wraparound = 0;
1956 int lockflags = 0;
1957
1958 if (vnode_vfsisrdonly(vp)) {
1959 return (EROFS);
1960 }
1961 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1962 if (suser(cred, NULL) &&
1963 kauth_cred_getuid(cred) != vfsp->f_owner) {
1964 return (EACCES); /* must be owner of file system */
1965 }
1966
1967 fileid = *(u_int32_t *)ap->a_data;
1968
1969 /* Must have catalog lock excl. to advance the CNID pointer */
1970 lockflags = hfs_systemfile_lock (hfsmp, SFL_CATALOG , HFS_EXCLUSIVE_LOCK);
1971
39236c6e
A
1972 hfs_lock_mount(hfsmp);
1973
316670eb
A
1974 /* If it is less than the current next CNID, force the wraparound bit to be set */
1975 if (fileid < hfsmp->vcbNxtCNID) {
1976 wraparound=1;
1977 }
1978
1979 /* Return previous value. */
1980 *(u_int32_t *)ap->a_data = hfsmp->vcbNxtCNID;
1981
1982 hfsmp->vcbNxtCNID = fileid;
1983
1984 if (wraparound) {
1985 hfsmp->vcbAtrb |= kHFSCatalogNodeIDsReusedMask;
1986 }
1987
1988 MarkVCBDirty(hfsmp);
39236c6e 1989 hfs_unlock_mount(hfsmp);
316670eb
A
1990 hfs_systemfile_unlock (hfsmp, lockflags);
1991
1992 return (error);
1993 }
1994
91447636
A
1995 case F_FREEZE_FS: {
1996 struct mount *mp;
91447636 1997
91447636
A
1998 mp = vnode_mount(vp);
1999 hfsmp = VFSTOHFS(mp);
2000
2001 if (!(hfsmp->jnl))
2002 return (ENOTSUP);
3a60a9f5 2003
b0d623f7
A
2004 vfsp = vfs_statfs(mp);
2005
2006 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2007 !kauth_cred_issuser(cred))
2008 return (EACCES);
2009
3a60a9f5 2010 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
91447636 2011
91447636
A
2012 // flush things before we get started to try and prevent
2013 // dirty data from being paged out while we're frozen.
2014 // note: can't do this after taking the lock as it will
2015 // deadlock against ourselves.
2016 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
6d2010ae 2017 hfs_lock_global (hfsmp, HFS_EXCLUSIVE_LOCK);
b0d623f7
A
2018
2019 // DO NOT call hfs_journal_flush() because that takes a
2020 // shared lock on the global exclusive lock!
6d2010ae 2021 journal_flush(hfsmp->jnl, TRUE);
3a60a9f5 2022
91447636
A
2023 // don't need to iterate on all vnodes, we just need to
2024 // wait for writes to the system files and the device vnode
6d2010ae
A
2025 //
2026 // Now that journal flush waits for all metadata blocks to
2027 // be written out, waiting for btree writes is probably no
2028 // longer required.
91447636
A
2029 if (HFSTOVCB(hfsmp)->extentsRefNum)
2030 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
2031 if (HFSTOVCB(hfsmp)->catalogRefNum)
2032 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
2033 if (HFSTOVCB(hfsmp)->allocationsRefNum)
2034 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
2035 if (hfsmp->hfs_attribute_vp)
2036 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
2037 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
2038
2039 hfsmp->hfs_freezing_proc = current_proc();
2040
2041 return (0);
2042 }
2043
2044 case F_THAW_FS: {
b0d623f7
A
2045 vfsp = vfs_statfs(vnode_mount(vp));
2046 if (kauth_cred_getuid(cred) != vfsp->f_owner &&
2047 !kauth_cred_issuser(cred))
91447636
A
2048 return (EACCES);
2049
2050 // if we're not the one who froze the fs then we
2051 // can't thaw it.
2052 if (hfsmp->hfs_freezing_proc != current_proc()) {
3a60a9f5 2053 return EPERM;
91447636
A
2054 }
2055
2056 // NOTE: if you add code here, also go check the
2057 // code that "thaws" the fs in hfs_vnop_close()
2058 //
2059 hfsmp->hfs_freezing_proc = NULL;
6d2010ae 2060 hfs_unlock_global (hfsmp);
3a60a9f5 2061 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
91447636
A
2062
2063 return (0);
2064 }
2065
2d21ac55
A
2066 case HFS_BULKACCESS_FSCTL: {
2067 int size;
2068
2069 if (hfsmp->hfs_flags & HFS_STANDARD) {
2070 return EINVAL;
2071 }
91447636 2072
2d21ac55 2073 if (is64bit) {
b0d623f7 2074 size = sizeof(struct user64_access_t);
2d21ac55 2075 } else {
b0d623f7 2076 size = sizeof(struct user32_access_t);
2d21ac55
A
2077 }
2078
2079 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2080 }
91447636 2081
2d21ac55
A
2082 case HFS_EXT_BULKACCESS_FSCTL: {
2083 int size;
2084
2085 if (hfsmp->hfs_flags & HFS_STANDARD) {
2086 return EINVAL;
2087 }
91447636 2088
2d21ac55 2089 if (is64bit) {
b0d623f7 2090 size = sizeof(struct user64_ext_access_t);
2d21ac55 2091 } else {
b0d623f7 2092 size = sizeof(struct user32_ext_access_t);
2d21ac55
A
2093 }
2094
2095 return do_bulk_access_check(hfsmp, vp, ap, size, context);
2096 }
91447636 2097
2d21ac55
A
2098 case HFS_SET_XATTREXTENTS_STATE: {
2099 int state;
2100
2101 if (ap->a_data == NULL) {
2102 return (EINVAL);
2103 }
2104
2105 state = *(int *)ap->a_data;
b0d623f7
A
2106
2107 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2108 return (EROFS);
2109 }
2d21ac55
A
2110
2111 /* Super-user can enable or disable extent-based extended
2112 * attribute support on a volume
6d2010ae
A
2113 * Note: Starting Mac OS X 10.7, extent-based extended attributes
2114 * are enabled by default, so any change will be transient only
2115 * till the volume is remounted.
2d21ac55 2116 */
39236c6e 2117 if (!kauth_cred_issuser(kauth_cred_get())) {
2d21ac55
A
2118 return (EPERM);
2119 }
2120 if (state == 0 || state == 1)
2121 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
91447636
A
2122 else
2123 return (EINVAL);
2124 }
2125
316670eb
A
2126 case F_SETSTATICCONTENT: {
2127 int error;
2128 int enable_static = 0;
2129 struct cnode *cp = NULL;
2130 /*
2131 * lock the cnode, decorate the cnode flag, and bail out.
2132 * VFS should have already authenticated the caller for us.
2133 */
2134
2135 if (ap->a_data) {
2136 /*
2137 * Note that even though ap->a_data is of type caddr_t,
2138 * the fcntl layer at the syscall handler will pass in NULL
2139 * or 1 depending on what the argument supplied to the fcntl
2140 * was. So it is in fact correct to check the ap->a_data
2141 * argument for zero or non-zero value when deciding whether or not
2142 * to enable the static bit in the cnode.
2143 */
2144 enable_static = 1;
2145 }
2146 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2147 return EROFS;
2148 }
2149 cp = VTOC(vp);
2150
39236c6e 2151 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
316670eb
A
2152 if (error == 0) {
2153 if (enable_static) {
2154 cp->c_flag |= C_SSD_STATIC;
2155 }
2156 else {
2157 cp->c_flag &= ~C_SSD_STATIC;
2158 }
2159 hfs_unlock (cp);
2160 }
2161 return error;
2162 }
2163
39236c6e
A
2164 case F_SET_GREEDY_MODE: {
2165 int error;
2166 int enable_greedy_mode = 0;
2167 struct cnode *cp = NULL;
2168 /*
2169 * lock the cnode, decorate the cnode flag, and bail out.
2170 * VFS should have already authenticated the caller for us.
2171 */
2172
2173 if (ap->a_data) {
2174 /*
2175 * Note that even though ap->a_data is of type caddr_t,
2176 * the fcntl layer at the syscall handler will pass in NULL
2177 * or 1 depending on what the argument supplied to the fcntl
2178 * was. So it is in fact correct to check the ap->a_data
2179 * argument for zero or non-zero value when deciding whether or not
2180 * to enable the greedy mode bit in the cnode.
2181 */
2182 enable_greedy_mode = 1;
2183 }
2184 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2185 return EROFS;
2186 }
2187 cp = VTOC(vp);
2188
2189 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2190 if (error == 0) {
2191 if (enable_greedy_mode) {
2192 cp->c_flag |= C_SSD_GREEDY_MODE;
2193 }
2194 else {
2195 cp->c_flag &= ~C_SSD_GREEDY_MODE;
2196 }
2197 hfs_unlock (cp);
2198 }
2199 return error;
2200 }
2201
2202 case F_MAKECOMPRESSED: {
2203 int error = 0;
2204 uint32_t gen_counter;
2205 struct cnode *cp = NULL;
2206 int reset_decmp = 0;
2207
2208 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2209 return EROFS;
2210 }
2211
2212 /*
2213 * acquire & lock the cnode.
2214 * VFS should have already authenticated the caller for us.
2215 */
2216
2217 if (ap->a_data) {
2218 /*
2219 * Cast the pointer into a uint32_t so we can extract the
2220 * supplied generation counter.
2221 */
2222 gen_counter = *((uint32_t*)ap->a_data);
2223 }
2224 else {
2225 return EINVAL;
2226 }
2227
2228#if HFS_COMPRESSION
2229 cp = VTOC(vp);
2230 /* Grab truncate lock first; we may truncate the file */
2231 hfs_lock_truncate (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2232
2233 error = hfs_lock (cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2234 if (error) {
2235 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2236 return error;
2237 }
2238
2239 /* Are there any other usecounts/FDs? */
2240 if (vnode_isinuse(vp, 1)) {
2241 hfs_unlock(cp);
2242 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2243 return EBUSY;
2244 }
2245
2246
2247 /* now we have the cnode locked down; Validate arguments */
2248 if (cp->c_attr.ca_flags & (UF_IMMUTABLE | UF_COMPRESSED)) {
2249 /* EINVAL if you are trying to manipulate an IMMUTABLE file */
2250 hfs_unlock(cp);
2251 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2252 return EINVAL;
2253 }
2254
2255 if ((hfs_get_gencount (cp)) == gen_counter) {
2256 /*
2257 * OK, the gen_counter matched. Go for it:
2258 * Toggle state bits, truncate file, and suppress mtime update
2259 */
2260 reset_decmp = 1;
2261 cp->c_bsdflags |= UF_COMPRESSED;
2262
2263 error = hfs_truncate(vp, 0, IO_NDELAY, 0, (HFS_TRUNCATE_SKIPTIMES), ap->a_context);
2264 }
2265 else {
2266 error = ESTALE;
2267 }
2268
2269 /* Unlock cnode before executing decmpfs ; they may need to get an EA */
2270 hfs_unlock(cp);
2271
2272 /*
2273 * Reset the decmp state while still holding the truncate lock. We need to
2274 * serialize here against a listxattr on this node which may occur at any
2275 * time.
2276 *
2277 * Even if '0/skiplock' is passed in 2nd argument to hfs_file_is_compressed,
2278 * that will still potentially require getting the com.apple.decmpfs EA. If the
2279 * EA is required, then we can't hold the cnode lock, because the getxattr call is
2280 * generic(through VFS), and can't pass along any info telling it that we're already
2281 * holding it (the lock). If we don't serialize, then we risk listxattr stopping
2282 * and trying to fill in the hfs_file_is_compressed info during the callback
2283 * operation, which will result in deadlock against the b-tree node.
2284 *
2285 * So, to serialize against listxattr (which will grab buf_t meta references on
2286 * the b-tree blocks), we hold the truncate lock as we're manipulating the
2287 * decmpfs payload.
2288 */
2289 if ((reset_decmp) && (error == 0)) {
2290 decmpfs_cnode *dp = VTOCMP (vp);
2291 if (dp != NULL) {
2292 decmpfs_cnode_set_vnode_state(dp, FILE_TYPE_UNKNOWN, 0);
2293 }
2294
2295 /* Initialize the decmpfs node as needed */
2296 (void) hfs_file_is_compressed (cp, 0); /* ok to take lock */
2297 }
2298
2299 hfs_unlock_truncate (cp, HFS_LOCK_DEFAULT);
2300
2301#endif
2302 return error;
2303 }
2304
316670eb
A
2305 case F_SETBACKINGSTORE: {
2306
2307 int error = 0;
2308
2309 /*
2310 * See comment in F_SETSTATICCONTENT re: using
2311 * a null check for a_data
2312 */
2313 if (ap->a_data) {
2314 error = hfs_set_backingstore (vp, 1);
2315 }
2316 else {
2317 error = hfs_set_backingstore (vp, 0);
2318 }
2319
2320 return error;
2321 }
2322
2323 case F_GETPATH_MTMINFO: {
2324 int error = 0;
2325
2326 int *data = (int*) ap->a_data;
2327
2328 /* Ask if this is a backingstore vnode */
2329 error = hfs_is_backingstore (vp, data);
2330
2331 return error;
2332 }
2333
91447636 2334 case F_FULLFSYNC: {
55e303ae 2335 int error;
b0d623f7
A
2336
2337 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2338 return (EROFS);
2339 }
39236c6e 2340 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
91447636 2341 if (error == 0) {
2d21ac55 2342 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
91447636
A
2343 hfs_unlock(VTOC(vp));
2344 }
55e303ae
A
2345
2346 return error;
2347 }
91447636
A
2348
2349 case F_CHKCLEAN: {
9bccf70c 2350 register struct cnode *cp;
55e303ae
A
2351 int error;
2352
91447636 2353 if (!vnode_isreg(vp))
55e303ae
A
2354 return EINVAL;
2355
39236c6e 2356 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
91447636
A
2357 if (error == 0) {
2358 cp = VTOC(vp);
2359 /*
2360 * used by regression test to determine if
2361 * all the dirty pages (via write) have been cleaned
2362 * after a call to 'fsysnc'.
2363 */
2364 error = is_file_clean(vp, VTOF(vp)->ff_size);
2365 hfs_unlock(cp);
2366 }
55e303ae
A
2367 return (error);
2368 }
2369
91447636 2370 case F_RDADVISE: {
9bccf70c
A
2371 register struct radvisory *ra;
2372 struct filefork *fp;
9bccf70c
A
2373 int error;
2374
91447636 2375 if (!vnode_isreg(vp))
9bccf70c
A
2376 return EINVAL;
2377
9bccf70c 2378 ra = (struct radvisory *)(ap->a_data);
9bccf70c
A
2379 fp = VTOF(vp);
2380
91447636 2381 /* Protect against a size change. */
39236c6e 2382 hfs_lock_truncate(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
91447636 2383
b0d623f7
A
2384#if HFS_COMPRESSION
2385 if (compressed && (uncompressed_size == -1)) {
2386 /* fetching the uncompressed size failed above, so return the error */
2387 error = decmpfs_error;
2388 } else if ((compressed && (ra->ra_offset >= uncompressed_size)) ||
2389 (!compressed && (ra->ra_offset >= fp->ff_size))) {
2390 error = EFBIG;
2391 }
2392#else /* HFS_COMPRESSION */
9bccf70c 2393 if (ra->ra_offset >= fp->ff_size) {
91447636 2394 error = EFBIG;
b0d623f7
A
2395 }
2396#endif /* HFS_COMPRESSION */
2397 else {
91447636 2398 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
9bccf70c 2399 }
1c79356b 2400
39236c6e 2401 hfs_unlock_truncate(VTOC(vp), HFS_LOCK_DEFAULT);
9bccf70c 2402 return (error);
1c79356b 2403 }
1c79356b 2404
91447636
A
2405 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
2406 {
2407 if (is64bit) {
2408 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
2409 }
2410 else {
b0d623f7 2411 *(user32_time_t *)(ap->a_data) = (user32_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
91447636
A
2412 }
2413 return 0;
2414 }
2415
b0d623f7
A
2416 case SPOTLIGHT_FSCTL_GET_MOUNT_TIME:
2417 *(uint32_t *)ap->a_data = hfsmp->hfs_mount_time;
2418 break;
2419
2420 case SPOTLIGHT_FSCTL_GET_LAST_MTIME:
2421 *(uint32_t *)ap->a_data = hfsmp->hfs_last_mounted_mtime;
2422 break;
2423
316670eb
A
2424 case HFS_FSCTL_GET_VERY_LOW_DISK:
2425 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_dangerlimit;
2426 break;
2427
b0d623f7
A
2428 case HFS_FSCTL_SET_VERY_LOW_DISK:
2429 if (*(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_warninglimit) {
2430 return EINVAL;
e2fac8b1 2431 }
91447636 2432
b0d623f7
A
2433 hfsmp->hfs_freespace_notify_dangerlimit = *(uint32_t *)ap->a_data;
2434 break;
2435
316670eb
A
2436 case HFS_FSCTL_GET_LOW_DISK:
2437 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_warninglimit;
2438 break;
2439
b0d623f7
A
2440 case HFS_FSCTL_SET_LOW_DISK:
2441 if ( *(uint32_t *)ap->a_data >= hfsmp->hfs_freespace_notify_desiredlevel
2442 || *(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_dangerlimit) {
2443
2444 return EINVAL;
e2fac8b1 2445 }
b0d623f7
A
2446
2447 hfsmp->hfs_freespace_notify_warninglimit = *(uint32_t *)ap->a_data;
2448 break;
2449
316670eb
A
2450 case HFS_FSCTL_GET_DESIRED_DISK:
2451 *(uint32_t*)ap->a_data = hfsmp->hfs_freespace_notify_desiredlevel;
2452 break;
2453
b0d623f7
A
2454 case HFS_FSCTL_SET_DESIRED_DISK:
2455 if (*(uint32_t *)ap->a_data <= hfsmp->hfs_freespace_notify_warninglimit) {
2456 return EINVAL;
2457 }
2458
2459 hfsmp->hfs_freespace_notify_desiredlevel = *(uint32_t *)ap->a_data;
2460 break;
2461
2462 case HFS_VOLUME_STATUS:
2463 *(uint32_t *)ap->a_data = hfsmp->hfs_notification_conditions;
2464 break;
91447636
A
2465
2466 case HFS_SET_BOOT_INFO:
2467 if (!vnode_isvroot(vp))
2468 return(EINVAL);
2469 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
2470 return(EACCES); /* must be superuser or owner of filesystem */
b0d623f7
A
2471 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2472 return (EROFS);
2473 }
39236c6e 2474 hfs_lock_mount (hfsmp);
91447636 2475 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
39236c6e 2476 hfs_unlock_mount (hfsmp);
91447636
A
2477 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
2478 break;
2479
2480 case HFS_GET_BOOT_INFO:
2481 if (!vnode_isvroot(vp))
2482 return(EINVAL);
39236c6e 2483 hfs_lock_mount (hfsmp);
91447636 2484 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
39236c6e 2485 hfs_unlock_mount(hfsmp);
91447636
A
2486 break;
2487
2d21ac55
A
2488 case HFS_MARK_BOOT_CORRUPT:
2489 /* Mark the boot volume corrupt by setting
2490 * kHFSVolumeInconsistentBit in the volume header. This will
2491 * force fsck_hfs on next mount.
2492 */
39236c6e 2493 if (!kauth_cred_issuser(kauth_cred_get())) {
2d21ac55
A
2494 return EACCES;
2495 }
b0d623f7 2496
2d21ac55
A
2497 /* Allowed only on the root vnode of the boot volume */
2498 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
2499 !vnode_isvroot(vp)) {
2500 return EINVAL;
2501 }
b0d623f7
A
2502 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2503 return (EROFS);
2504 }
2d21ac55
A
2505 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
2506 hfs_mark_volume_inconsistent(hfsmp);
2507 break;
2508
b0d623f7
A
2509 case HFS_FSCTL_GET_JOURNAL_INFO:
2510 jip = (struct hfs_journal_info*)ap->a_data;
2511
2512 if (vp == NULLVP)
2513 return EINVAL;
2514
2515 if (hfsmp->jnl == NULL) {
2516 jnl_start = 0;
2517 jnl_size = 0;
2518 } else {
2519 jnl_start = (off_t)(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset;
2520 jnl_size = (off_t)hfsmp->jnl_size;
2521 }
2522
2523 jip->jstart = jnl_start;
2524 jip->jsize = jnl_size;
2525 break;
2526
2527 case HFS_SET_ALWAYS_ZEROFILL: {
2528 struct cnode *cp = VTOC(vp);
2529
2530 if (*(int *)ap->a_data) {
2531 cp->c_flag |= C_ALWAYS_ZEROFILL;
2532 } else {
2533 cp->c_flag &= ~C_ALWAYS_ZEROFILL;
2534 }
2535 break;
2536 }
2537
6d2010ae
A
2538 case HFS_DISABLE_METAZONE: {
2539 /* Only root can disable metadata zone */
39236c6e 2540 if (!kauth_cred_issuser(kauth_cred_get())) {
6d2010ae
A
2541 return EACCES;
2542 }
2543 if (hfsmp->hfs_flags & HFS_READ_ONLY) {
2544 return (EROFS);
2545 }
2546
2547 /* Disable metadata zone now */
2548 (void) hfs_metadatazone_init(hfsmp, true);
2549 printf ("hfs: Disabling metadata zone on %s\n", hfsmp->vcbVN);
2550 break;
2551 }
2552
91447636
A
2553 default:
2554 return (ENOTTY);
2555 }
1c79356b 2556
0b4e3aa0 2557 return 0;
1c79356b
A
2558}
2559
91447636
A
2560/*
2561 * select
2562 */
1c79356b 2563int
91447636
A
2564hfs_vnop_select(__unused struct vnop_select_args *ap)
2565/*
2566 struct vnop_select_args {
2567 vnode_t a_vp;
9bccf70c
A
2568 int a_which;
2569 int a_fflags;
9bccf70c 2570 void *a_wql;
91447636
A
2571 vfs_context_t a_context;
2572 };
2573*/
1c79356b 2574{
9bccf70c
A
2575 /*
2576 * We should really check to see if I/O is possible.
2577 */
2578 return (1);
1c79356b
A
2579}
2580
1c79356b
A
2581/*
2582 * Converts a logical block number to a physical block, and optionally returns
2583 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
2584 * The physical block number is based on the device block size, currently its 512.
2585 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
2586 */
1c79356b 2587int
2d21ac55 2588hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1c79356b 2589{
9bccf70c
A
2590 struct filefork *fp = VTOF(vp);
2591 struct hfsmount *hfsmp = VTOHFS(vp);
91447636 2592 int retval = E_NONE;
2d21ac55 2593 u_int32_t logBlockSize;
91447636
A
2594 size_t bytesContAvail = 0;
2595 off_t blockposition;
2596 int lockExtBtree;
2597 int lockflags = 0;
1c79356b 2598
9bccf70c
A
2599 /*
2600 * Check for underlying vnode requests and ensure that logical
2601 * to physical mapping is requested.
2602 */
91447636 2603 if (vpp != NULL)
2d21ac55 2604 *vpp = hfsmp->hfs_devvp;
91447636 2605 if (bnp == NULL)
9bccf70c
A
2606 return (0);
2607
9bccf70c 2608 logBlockSize = GetLogicalBlockSize(vp);
2d21ac55 2609 blockposition = (off_t)bn * logBlockSize;
9bccf70c
A
2610
2611 lockExtBtree = overflow_extents(fp);
91447636
A
2612
2613 if (lockExtBtree)
2d21ac55 2614 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1c79356b 2615
9bccf70c 2616 retval = MacToVFSError(
0b4e3aa0 2617 MapFileBlockC (HFSTOVCB(hfsmp),
9bccf70c 2618 (FCB*)fp,
0b4e3aa0
A
2619 MAXPHYSIO,
2620 blockposition,
91447636 2621 bnp,
0b4e3aa0 2622 &bytesContAvail));
1c79356b 2623
91447636
A
2624 if (lockExtBtree)
2625 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 2626
91447636
A
2627 if (retval == E_NONE) {
2628 /* Figure out how many read ahead blocks there are */
2629 if (runp != NULL) {
2630 if (can_cluster(logBlockSize)) {
2631 /* Make sure this result never goes negative: */
2632 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
2633 } else {
2634 *runp = 0;
2635 }
2636 }
2637 }
2638 return (retval);
2639}
1c79356b 2640
91447636
A
2641/*
2642 * Convert logical block number to file offset.
2643 */
1c79356b 2644int
91447636
A
2645hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
2646/*
2647 struct vnop_blktooff_args {
2648 vnode_t a_vp;
2649 daddr64_t a_lblkno;
9bccf70c 2650 off_t *a_offset;
91447636
A
2651 };
2652*/
1c79356b
A
2653{
2654 if (ap->a_vp == NULL)
2655 return (EINVAL);
91447636 2656 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1c79356b
A
2657
2658 return(0);
2659}
2660
91447636
A
2661/*
2662 * Convert file offset to logical block number.
2663 */
1c79356b 2664int
91447636
A
2665hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2666/*
2667 struct vnop_offtoblk_args {
2668 vnode_t a_vp;
9bccf70c 2669 off_t a_offset;
91447636
A
2670 daddr64_t *a_lblkno;
2671 };
2672*/
1c79356b 2673{
1c79356b
A
2674 if (ap->a_vp == NULL)
2675 return (EINVAL);
91447636 2676 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1c79356b
A
2677
2678 return(0);
2679}
2680
91447636
A
2681/*
2682 * Map file offset to physical block number.
2683 *
2d21ac55
A
2684 * If this function is called for write operation, and if the file
2685 * had virtual blocks allocated (delayed allocation), real blocks
2686 * are allocated by calling ExtendFileC().
2687 *
2688 * If this function is called for read operation, and if the file
2689 * had virtual blocks allocated (delayed allocation), no change
2690 * to the size of file is done, and if required, rangelist is
2691 * searched for mapping.
2692 *
91447636
A
2693 * System file cnodes are expected to be locked (shared or exclusive).
2694 */
1c79356b 2695int
91447636
A
2696hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2697/*
2698 struct vnop_blockmap_args {
2699 vnode_t a_vp;
9bccf70c
A
2700 off_t a_foffset;
2701 size_t a_size;
91447636 2702 daddr64_t *a_bpn;
9bccf70c
A
2703 size_t *a_run;
2704 void *a_poff;
91447636
A
2705 int a_flags;
2706 vfs_context_t a_context;
2707 };
2708*/
1c79356b 2709{
91447636
A
2710 struct vnode *vp = ap->a_vp;
2711 struct cnode *cp;
2712 struct filefork *fp;
2713 struct hfsmount *hfsmp;
2714 size_t bytesContAvail = 0;
2715 int retval = E_NONE;
2716 int syslocks = 0;
2717 int lockflags = 0;
2718 struct rl_entry *invalid_range;
2719 enum rl_overlaptype overlaptype;
2720 int started_tr = 0;
2721 int tooklock = 0;
1c79356b 2722
b0d623f7
A
2723#if HFS_COMPRESSION
2724 if (VNODE_IS_RSRC(vp)) {
2725 /* allow blockmaps to the resource fork */
2726 } else {
2727 if ( hfs_file_is_compressed(VTOC(vp), 1) ) { /* 1 == don't take the cnode lock */
2728 int state = decmpfs_cnode_get_vnode_state(VTOCMP(vp));
2729 switch(state) {
2730 case FILE_IS_COMPRESSED:
2731 return ENOTSUP;
2732 case FILE_IS_CONVERTING:
2733 /* if FILE_IS_CONVERTING, we allow blockmap */
2734 break;
2735 default:
2736 printf("invalid state %d for compressed file\n", state);
2737 /* fall through */
2738 }
2739 }
2740 }
2741#endif /* HFS_COMPRESSION */
2742
3a60a9f5
A
2743 /* Do not allow blockmap operation on a directory */
2744 if (vnode_isdir(vp)) {
2745 return (ENOTSUP);
2746 }
2747
9bccf70c
A
2748 /*
2749 * Check for underlying vnode requests and ensure that logical
2750 * to physical mapping is requested.
2751 */
2752 if (ap->a_bpn == NULL)
2753 return (0);
2754
2d21ac55 2755 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
91447636 2756 if (VTOC(vp)->c_lockowner != current_thread()) {
39236c6e 2757 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
91447636 2758 tooklock = 1;
91447636
A
2759 }
2760 }
2761 hfsmp = VTOHFS(vp);
2762 cp = VTOC(vp);
2763 fp = VTOF(vp);
55e303ae 2764
91447636 2765retry:
2d21ac55
A
2766 /* Check virtual blocks only when performing write operation */
2767 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
91447636
A
2768 if (hfs_start_transaction(hfsmp) != 0) {
2769 retval = EINVAL;
2770 goto exit;
2771 } else {
2772 started_tr = 1;
b4c24cb9 2773 }
91447636
A
2774 syslocks = SFL_EXTENTS | SFL_BITMAP;
2775
b4c24cb9 2776 } else if (overflow_extents(fp)) {
91447636 2777 syslocks = SFL_EXTENTS;
9bccf70c 2778 }
91447636
A
2779
2780 if (syslocks)
2781 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1c79356b 2782
9bccf70c
A
2783 /*
2784 * Check for any delayed allocations.
2785 */
2d21ac55
A
2786 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2787 int64_t actbytes;
91447636 2788 u_int32_t loanedBlocks;
1c79356b 2789
55e303ae 2790 //
d12e1678
A
2791 // Make sure we have a transaction. It's possible
2792 // that we came in and fp->ff_unallocblocks was zero
2793 // but during the time we blocked acquiring the extents
2794 // btree, ff_unallocblocks became non-zero and so we
2795 // will need to start a transaction.
2796 //
91447636
A
2797 if (started_tr == 0) {
2798 if (syslocks) {
2799 hfs_systemfile_unlock(hfsmp, lockflags);
2800 syslocks = 0;
2801 }
2802 goto retry;
d12e1678
A
2803 }
2804
9bccf70c 2805 /*
91447636
A
2806 * Note: ExtendFileC will Release any blocks on loan and
2807 * aquire real blocks. So we ask to extend by zero bytes
2808 * since ExtendFileC will account for the virtual blocks.
9bccf70c 2809 */
9bccf70c 2810
91447636
A
2811 loanedBlocks = fp->ff_unallocblocks;
2812 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2813 kEFAllMask | kEFNoClumpMask, &actbytes);
2814
2815 if (retval) {
2816 fp->ff_unallocblocks = loanedBlocks;
2817 cp->c_blocks += loanedBlocks;
2818 fp->ff_blocks += loanedBlocks;
2819
39236c6e 2820 hfs_lock_mount (hfsmp);
91447636 2821 hfsmp->loanedBlocks += loanedBlocks;
39236c6e 2822 hfs_unlock_mount (hfsmp);
1c79356b 2823
91447636
A
2824 hfs_systemfile_unlock(hfsmp, lockflags);
2825 cp->c_flag |= C_MODIFIED;
b4c24cb9 2826 if (started_tr) {
91447636
A
2827 (void) hfs_update(vp, TRUE);
2828 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
55e303ae 2829
91447636 2830 hfs_end_transaction(hfsmp);
2d21ac55 2831 started_tr = 0;
b4c24cb9 2832 }
91447636 2833 goto exit;
b4c24cb9 2834 }
9bccf70c
A
2835 }
2836
91447636
A
2837 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2838 ap->a_bpn, &bytesContAvail);
2839 if (syslocks) {
2840 hfs_systemfile_unlock(hfsmp, lockflags);
2841 syslocks = 0;
2842 }
1c79356b 2843
b4c24cb9 2844 if (started_tr) {
91447636
A
2845 (void) hfs_update(vp, TRUE);
2846 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2847 hfs_end_transaction(hfsmp);
b4c24cb9 2848 started_tr = 0;
91447636
A
2849 }
2850 if (retval) {
2d21ac55
A
2851 /* On write, always return error because virtual blocks, if any,
2852 * should have been allocated in ExtendFileC(). We do not
2853 * allocate virtual blocks on read, therefore return error
2854 * only if no virtual blocks are allocated. Otherwise we search
2855 * rangelist for zero-fills
2856 */
2857 if ((MacToVFSError(retval) != ERANGE) ||
2858 (ap->a_flags & VNODE_WRITE) ||
2859 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2860 goto exit;
2861 }
2862
2863 /* Validate if the start offset is within logical file size */
316670eb 2864 if (ap->a_foffset >= fp->ff_size) {
39236c6e 2865 goto exit;
2d21ac55
A
2866 }
2867
316670eb
A
2868 /*
2869 * At this point, we have encountered a failure during
2870 * MapFileBlockC that resulted in ERANGE, and we are not servicing
2871 * a write, and there are borrowed blocks.
2872 *
2873 * However, the cluster layer will not call blockmap for
2874 * blocks that are borrowed and in-cache. We have to assume that
2875 * because we observed ERANGE being emitted from MapFileBlockC, this
2876 * extent range is not valid on-disk. So we treat this as a
2877 * mapping that needs to be zero-filled prior to reading.
2878 *
2879 * Note that under certain circumstances (such as non-contiguous
2880 * userland VM mappings in the calling process), cluster_io
2881 * may be forced to split a large I/O driven by hfs_vnop_write
2882 * into multiple sub-I/Os that necessitate a RMW cycle. If this is
2883 * the case here, then we have already removed the invalid range list
2884 * mapping prior to getting to this blockmap call, so we should not
2885 * search the invalid rangelist for this byte range.
2d21ac55 2886 */
316670eb
A
2887
2888 bytesContAvail = fp->ff_size - ap->a_foffset;
2889 /*
2890 * Clip the contiguous available bytes to, at most, the allowable
2891 * maximum or the amount requested.
2892 */
2893
2894 if (bytesContAvail > ap->a_size) {
2895 bytesContAvail = ap->a_size;
2d21ac55 2896 }
316670eb
A
2897
2898 *ap->a_bpn = (daddr64_t) -1;
2899 retval = 0;
2900
91447636
A
2901 goto exit;
2902 }
1c79356b 2903
2d21ac55
A
2904 /* MapFileC() found a valid extent in the filefork. Search the
2905 * mapping information further for invalid file ranges
2906 */
91447636
A
2907 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2908 ap->a_foffset + (off_t)bytesContAvail - 1,
2909 &invalid_range);
2910 if (overlaptype != RL_NOOVERLAP) {
2911 switch(overlaptype) {
2912 case RL_MATCHINGOVERLAP:
2913 case RL_OVERLAPCONTAINSRANGE:
2914 case RL_OVERLAPSTARTSBEFORE:
2d21ac55 2915 /* There's no valid block for this byte offset */
91447636
A
2916 *ap->a_bpn = (daddr64_t)-1;
2917 /* There's no point limiting the amount to be returned
2918 * if the invalid range that was hit extends all the way
2919 * to the EOF (i.e. there's no valid bytes between the
2920 * end of this range and the file's EOF):
2921 */
2922 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 2923 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
2924 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2925 }
2926 break;
9bccf70c 2927
91447636
A
2928 case RL_OVERLAPISCONTAINED:
2929 case RL_OVERLAPENDSAFTER:
2930 /* The range of interest hits an invalid block before the end: */
2931 if (invalid_range->rl_start == ap->a_foffset) {
2932 /* There's actually no valid information to be had starting here: */
2933 *ap->a_bpn = (daddr64_t)-1;
2934 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
b0d623f7 2935 ((size_t)(invalid_range->rl_end + 1 - ap->a_foffset) < bytesContAvail)) {
91447636
A
2936 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2937 }
2938 } else {
2939 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2940 }
9bccf70c 2941 break;
1c79356b 2942
91447636 2943 case RL_NOOVERLAP:
9bccf70c 2944 break;
91447636
A
2945 } /* end switch */
2946 if (bytesContAvail > ap->a_size)
2947 bytesContAvail = ap->a_size;
2d21ac55
A
2948 }
2949
2950exit:
2951 if (retval == 0) {
2952 if (ap->a_run)
2953 *ap->a_run = bytesContAvail;
2954
2955 if (ap->a_poff)
2956 *(int *)ap->a_poff = 0;
9bccf70c 2957 }
91447636 2958
91447636
A
2959 if (tooklock)
2960 hfs_unlock(cp);
2961
2962 return (MacToVFSError(retval));
1c79356b
A
2963}
2964
2965/*
91447636
A
2966 * prepare and issue the I/O
2967 * buf_strategy knows how to deal
2968 * with requests that require
2969 * fragmented I/Os
2970 */
1c79356b 2971int
91447636 2972hfs_vnop_strategy(struct vnop_strategy_args *ap)
1c79356b 2973{
91447636
A
2974 buf_t bp = ap->a_bp;
2975 vnode_t vp = buf_vnode(bp);
6d2010ae
A
2976 int error = 0;
2977
316670eb
A
2978 /* Mark buffer as containing static data if cnode flag set */
2979 if (VTOC(vp)->c_flag & C_SSD_STATIC) {
2980 buf_markstatic(bp);
2981 }
2982
39236c6e
A
2983 /* Mark buffer as containing static data if cnode flag set */
2984 if (VTOC(vp)->c_flag & C_SSD_GREEDY_MODE) {
2985 bufattr_markgreedymode((bufattr_t)(&bp->b_attr));
2986 }
2987
6d2010ae
A
2988#if CONFIG_PROTECT
2989 cnode_t *cp = NULL;
2990
2991 if ((cp = cp_get_protected_cnode(vp)) != NULL) {
316670eb
A
2992 /*
2993 * We rely upon the truncate lock to protect the
2994 * CP cache key from getting tossed prior to our IO finishing here.
2995 * Nearly all cluster io calls to manipulate file payload from HFS
2996 * take the truncate lock before calling into the cluster
2997 * layer to ensure the file size does not change, or that they
2998 * have exclusive right to change the EOF of the file.
2999 * That same guarantee protects us here since the code that
3000 * deals with CP lock events must now take the truncate lock
3001 * before doing anything.
3002 *
3003 * There is 1 exception here:
3004 * 1) One exception should be the VM swapfile IO, because HFS will
3005 * funnel the VNOP_PAGEOUT directly into a cluster_pageout call for the
3006 * swapfile code only without holding the truncate lock. This is because
3007 * individual swapfiles are maintained at fixed-length sizes by the VM code.
3008 * In non-swapfile IO we use PAGEOUT_V2 semantics which allow us to
3009 * create our own UPL and thus take the truncate lock before calling
3010 * into the cluster layer. In that case, however, we are not concerned
3011 * with the CP blob being wiped out in the middle of the IO
3012 * because there isn't anything to toss; the VM swapfile key stays
3013 * in-core as long as the file is open.
3014 *
3015 * NB:
3016 * For filesystem resize, we may not have access to the underlying
3017 * file's cache key for whatever reason (device may be locked). However,
3018 * we do not need it since we are going to use the temporary HFS-wide resize key
3019 * which is generated once we start relocating file content. If this file's I/O
3020 * should be done using the resize key, it will have been supplied already, so
3021 * do not attach the file's cp blob to the buffer.
6d2010ae 3022 */
316670eb
A
3023 if ((cp->c_cpentry->cp_flags & CP_RELOCATION_INFLIGHT) == 0) {
3024 buf_setcpaddr(bp, cp->c_cpentry);
3025 }
6d2010ae
A
3026 }
3027#endif /* CONFIG_PROTECT */
3028
3029 error = buf_strategy(VTOHFS(vp)->hfs_devvp, ap);
6d2010ae
A
3030
3031 return error;
1c79356b
A
3032}
3033
b0d623f7
A
3034static int
3035hfs_minorupdate(struct vnode *vp) {
3036 struct cnode *cp = VTOC(vp);
3037 cp->c_flag &= ~C_MODIFIED;
3038 cp->c_touch_acctime = 0;
3039 cp->c_touch_chgtime = 0;
3040 cp->c_touch_modtime = 0;
3041
3042 return 0;
3043}
1c79356b 3044
6d2010ae 3045int
39236c6e 3046do_hfs_truncate(struct vnode *vp, off_t length, int flags, int truncateflags, vfs_context_t context)
1c79356b 3047{
9bccf70c
A
3048 register struct cnode *cp = VTOC(vp);
3049 struct filefork *fp = VTOF(vp);
91447636
A
3050 struct proc *p = vfs_context_proc(context);;
3051 kauth_cred_t cred = vfs_context_ucred(context);
9bccf70c
A
3052 int retval;
3053 off_t bytesToAdd;
3054 off_t actualBytesAdded;
3055 off_t filebytes;
b0d623f7 3056 u_int32_t fileblocks;
9bccf70c 3057 int blksize;
b4c24cb9 3058 struct hfsmount *hfsmp;
91447636 3059 int lockflags;
39236c6e
A
3060 int skipupdate = (truncateflags & HFS_TRUNCATE_SKIPUPDATE);
3061 int suppress_times = (truncateflags & HFS_TRUNCATE_SKIPTIMES);
3062
9bccf70c
A
3063 blksize = VTOVCB(vp)->blockSize;
3064 fileblocks = fp->ff_blocks;
3065 filebytes = (off_t)fileblocks * (off_t)blksize;
3066
3067 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
3068 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
3069
3070 if (length < 0)
3071 return (EINVAL);
1c79356b 3072
8f6c56a5
A
3073 /* This should only happen with a corrupt filesystem */
3074 if ((off_t)fp->ff_size < 0)
3075 return (EINVAL);
3076
9bccf70c
A
3077 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
3078 return (EFBIG);
1c79356b 3079
b4c24cb9 3080 hfsmp = VTOHFS(vp);
1c79356b 3081
9bccf70c 3082 retval = E_NONE;
1c79356b 3083
55e303ae
A
3084 /* Files that are changing size are not hot file candidates. */
3085 if (hfsmp->hfc_stage == HFC_RECORDING) {
3086 fp->ff_bytesread = 0;
3087 }
3088
9bccf70c
A
3089 /*
3090 * We cannot just check if fp->ff_size == length (as an optimization)
3091 * since there may be extra physical blocks that also need truncation.
3092 */
3093#if QUOTA
91447636 3094 if ((retval = hfs_getinoquota(cp)))
9bccf70c
A
3095 return(retval);
3096#endif /* QUOTA */
1c79356b 3097
9bccf70c
A
3098 /*
3099 * Lengthen the size of the file. We must ensure that the
3100 * last byte of the file is allocated. Since the smallest
3101 * value of ff_size is 0, length will be at least 1.
3102 */
91447636 3103 if (length > (off_t)fp->ff_size) {
9bccf70c 3104#if QUOTA
b4c24cb9 3105 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
91447636 3106 cred, 0);
9bccf70c
A
3107 if (retval)
3108 goto Err_Exit;
3109#endif /* QUOTA */
3110 /*
3111 * If we don't have enough physical space then
3112 * we need to extend the physical size.
3113 */
3114 if (length > filebytes) {
3115 int eflags;
b0d623f7 3116 u_int32_t blockHint = 0;
1c79356b 3117
9bccf70c
A
3118 /* All or nothing and don't round up to clumpsize. */
3119 eflags = kEFAllMask | kEFNoClumpMask;
1c79356b 3120
91447636 3121 if (cred && suser(cred, NULL) != 0)
9bccf70c 3122 eflags |= kEFReserveMask; /* keep a reserve */
1c79356b 3123
55e303ae
A
3124 /*
3125 * Allocate Journal and Quota files in metadata zone.
3126 */
3127 if (filebytes == 0 &&
3128 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
3129 hfs_virtualmetafile(cp)) {
3130 eflags |= kEFMetadataMask;
3131 blockHint = hfsmp->hfs_metazone_start;
3132 }
91447636
A
3133 if (hfs_start_transaction(hfsmp) != 0) {
3134 retval = EINVAL;
3135 goto Err_Exit;
b4c24cb9
A
3136 }
3137
91447636
A
3138 /* Protect extents b-tree and allocation bitmap */
3139 lockflags = SFL_BITMAP;
3140 if (overflow_extents(fp))
3141 lockflags |= SFL_EXTENTS;
3142 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1c79356b 3143
9bccf70c
A
3144 while ((length > filebytes) && (retval == E_NONE)) {
3145 bytesToAdd = length - filebytes;
3146 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
3147 (FCB*)fp,
1c79356b 3148 bytesToAdd,
55e303ae 3149 blockHint,
9bccf70c 3150 eflags,
1c79356b
A
3151 &actualBytesAdded));
3152
9bccf70c
A
3153 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3154 if (actualBytesAdded == 0 && retval == E_NONE) {
3155 if (length > filebytes)
3156 length = filebytes;
3157 break;
3158 }
3159 } /* endwhile */
b4c24cb9 3160
91447636 3161 hfs_systemfile_unlock(hfsmp, lockflags);
b4c24cb9 3162
b4c24cb9 3163 if (hfsmp->jnl) {
b0d623f7
A
3164 if (skipupdate) {
3165 (void) hfs_minorupdate(vp);
3166 }
39236c6e 3167 else {
b0d623f7
A
3168 (void) hfs_update(vp, TRUE);
3169 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3170 }
91447636 3171 }
55e303ae 3172
91447636 3173 hfs_end_transaction(hfsmp);
b4c24cb9 3174
9bccf70c
A
3175 if (retval)
3176 goto Err_Exit;
3177
3178 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
3179 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1c79356b 3180 }
1c79356b 3181
91447636 3182 if (!(flags & IO_NOZEROFILL)) {
2d21ac55 3183 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
9bccf70c 3184 struct rl_entry *invalid_range;
9bccf70c 3185 off_t zero_limit;
0b4e3aa0 3186
9bccf70c
A
3187 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
3188 if (length < zero_limit) zero_limit = length;
3189
91447636
A
3190 if (length > (off_t)fp->ff_size) {
3191 struct timeval tv;
3192
9bccf70c
A
3193 /* Extending the file: time to fill out the current last page w. zeroes? */
3194 if ((fp->ff_size & PAGE_MASK_64) &&
3195 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
3196 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
0b4e3aa0
A
3197
3198 /* There's some valid data at the start of the (current) last page
3199 of the file, so zero out the remainder of that page to ensure the
3200 entire page contains valid data. Since there is no invalid range
3201 possible past the (current) eof, there's no need to remove anything
91447636
A
3202 from the invalid range list before calling cluster_write(): */
3203 hfs_unlock(cp);
9bccf70c 3204 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
91447636
A
3205 fp->ff_size, (off_t)0,
3206 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
39236c6e 3207 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
0b4e3aa0
A
3208 if (retval) goto Err_Exit;
3209
3210 /* Merely invalidate the remaining area, if necessary: */
9bccf70c 3211 if (length > zero_limit) {
91447636 3212 microuptime(&tv);
9bccf70c 3213 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
91447636 3214 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
3215 }
3216 } else {
0b4e3aa0
A
3217 /* The page containing the (current) eof is invalid: just add the
3218 remainder of the page to the invalid list, along with the area
3219 being newly allocated:
3220 */
91447636 3221 microuptime(&tv);
9bccf70c 3222 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
91447636 3223 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
9bccf70c
A
3224 };
3225 }
3226 } else {
3227 panic("hfs_truncate: invoked on non-UBC object?!");
3228 };
3229 }
39236c6e
A
3230 if (suppress_times == 0) {
3231 cp->c_touch_modtime = TRUE;
3232 }
9bccf70c 3233 fp->ff_size = length;
0b4e3aa0 3234
9bccf70c 3235 } else { /* Shorten the size of the file */
0b4e3aa0 3236
91447636 3237 if ((off_t)fp->ff_size > length) {
9bccf70c
A
3238 /* Any space previously marked as invalid is now irrelevant: */
3239 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
3240 }
1c79356b 3241
9bccf70c
A
3242 /*
3243 * Account for any unmapped blocks. Note that the new
3244 * file length can still end up with unmapped blocks.
3245 */
3246 if (fp->ff_unallocblocks > 0) {
3247 u_int32_t finalblks;
91447636 3248 u_int32_t loanedBlocks;
1c79356b 3249
39236c6e 3250 hfs_lock_mount(hfsmp);
91447636
A
3251 loanedBlocks = fp->ff_unallocblocks;
3252 cp->c_blocks -= loanedBlocks;
3253 fp->ff_blocks -= loanedBlocks;
3254 fp->ff_unallocblocks = 0;
1c79356b 3255
91447636 3256 hfsmp->loanedBlocks -= loanedBlocks;
9bccf70c
A
3257
3258 finalblks = (length + blksize - 1) / blksize;
3259 if (finalblks > fp->ff_blocks) {
3260 /* calculate required unmapped blocks */
91447636
A
3261 loanedBlocks = finalblks - fp->ff_blocks;
3262 hfsmp->loanedBlocks += loanedBlocks;
3263
3264 fp->ff_unallocblocks = loanedBlocks;
3265 cp->c_blocks += loanedBlocks;
3266 fp->ff_blocks += loanedBlocks;
9bccf70c 3267 }
39236c6e 3268 hfs_unlock_mount (hfsmp);
9bccf70c 3269 }
1c79356b 3270
9bccf70c
A
3271 /*
3272 * For a TBE process the deallocation of the file blocks is
3273 * delayed until the file is closed. And hfs_close calls
3274 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
3275 * isn't set, we make sure this isn't a TBE process.
3276 */
91447636 3277 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
9bccf70c
A
3278#if QUOTA
3279 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
3280#endif /* QUOTA */
91447636
A
3281 if (hfs_start_transaction(hfsmp) != 0) {
3282 retval = EINVAL;
3283 goto Err_Exit;
3284 }
3285
3286 if (fp->ff_unallocblocks == 0) {
3287 /* Protect extents b-tree and allocation bitmap */
3288 lockflags = SFL_BITMAP;
3289 if (overflow_extents(fp))
3290 lockflags |= SFL_EXTENTS;
3291 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
b4c24cb9 3292
6d2010ae
A
3293 retval = MacToVFSError(TruncateFileC(VTOVCB(vp), (FCB*)fp, length, 0,
3294 FORK_IS_RSRC (fp), FTOC(fp)->c_fileid, false));
1c79356b 3295
91447636
A
3296 hfs_systemfile_unlock(hfsmp, lockflags);
3297 }
b4c24cb9 3298 if (hfsmp->jnl) {
ff6e181a
A
3299 if (retval == 0) {
3300 fp->ff_size = length;
3301 }
b0d623f7
A
3302 if (skipupdate) {
3303 (void) hfs_minorupdate(vp);
3304 }
3305 else {
3306 (void) hfs_update(vp, TRUE);
3307 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3308 }
b4c24cb9 3309 }
91447636 3310 hfs_end_transaction(hfsmp);
b4c24cb9 3311
9bccf70c
A
3312 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
3313 if (retval)
3314 goto Err_Exit;
3315#if QUOTA
3316 /* These are bytesreleased */
3317 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
3318#endif /* QUOTA */
3319 }
39236c6e
A
3320 /*
3321 * Only set update flag if the logical length changes & we aren't
3322 * suppressing modtime updates.
3323 */
3324 if (((off_t)fp->ff_size != length) && (suppress_times == 0)) {
91447636 3325 cp->c_touch_modtime = TRUE;
39236c6e 3326 }
9bccf70c 3327 fp->ff_size = length;
1c79356b 3328 }
b0d623f7
A
3329 if (cp->c_mode & (S_ISUID | S_ISGID)) {
3330 if (!vfs_context_issuser(context)) {
3331 cp->c_mode &= ~(S_ISUID | S_ISGID);
3332 skipupdate = 0;
3333 }
3334 }
3335 if (skipupdate) {
3336 retval = hfs_minorupdate(vp);
3337 }
3338 else {
3339 cp->c_touch_chgtime = TRUE; /* status changed */
39236c6e
A
3340 if (suppress_times == 0) {
3341 cp->c_touch_modtime = TRUE; /* file data was modified */
3342
3343 /*
3344 * If we are not suppressing the modtime update, then
3345 * update the gen count as well.
3346 */
3347 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK (cp->c_attr.ca_mode)) {
3348 hfs_incr_gencount(cp);
3349 }
3350 }
3351
b0d623f7
A
3352 retval = hfs_update(vp, MNT_WAIT);
3353 }
9bccf70c 3354 if (retval) {
0b4e3aa0 3355 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1c79356b 3356 -1, -1, -1, retval, 0);
9bccf70c 3357 }
1c79356b 3358
9bccf70c 3359Err_Exit:
1c79356b 3360
9bccf70c
A
3361 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
3362 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1c79356b 3363
9bccf70c 3364 return (retval);
1c79356b
A
3365}
3366
6d2010ae
A
3367/*
3368 * Preparation which must be done prior to deleting the catalog record
3369 * of a file or directory. In order to make the on-disk as safe as possible,
3370 * we remove the catalog entry before releasing the bitmap blocks and the
3371 * overflow extent records. However, some work must be done prior to deleting
3372 * the catalog record.
3373 *
3374 * When calling this function, the cnode must exist both in memory and on-disk.
3375 * If there are both resource fork and data fork vnodes, this function should
3376 * be called on both.
3377 */
3378
3379int
3380hfs_prepare_release_storage (struct hfsmount *hfsmp, struct vnode *vp) {
3381
3382 struct filefork *fp = VTOF(vp);
3383 struct cnode *cp = VTOC(vp);
316670eb 3384#if QUOTA
6d2010ae 3385 int retval = 0;
316670eb 3386#endif /* QUOTA */
6d2010ae
A
3387
3388 /* Cannot truncate an HFS directory! */
3389 if (vnode_isdir(vp)) {
3390 return (EISDIR);
3391 }
3392
3393 /*
3394 * See the comment below in hfs_truncate for why we need to call
3395 * setsize here. Essentially we want to avoid pending IO if we
3396 * already know that the blocks are going to be released here.
3397 * This function is only called when totally removing all storage for a file, so
3398 * we can take a shortcut and immediately setsize (0);
3399 */
3400 ubc_setsize(vp, 0);
3401
3402 /* This should only happen with a corrupt filesystem */
3403 if ((off_t)fp->ff_size < 0)
3404 return (EINVAL);
3405
3406 /*
3407 * We cannot just check if fp->ff_size == length (as an optimization)
3408 * since there may be extra physical blocks that also need truncation.
3409 */
3410#if QUOTA
3411 if ((retval = hfs_getinoquota(cp))) {
3412 return(retval);
3413 }
3414#endif /* QUOTA */
3415
3416 /* Wipe out any invalid ranges which have yet to be backed by disk */
3417 rl_remove(0, fp->ff_size - 1, &fp->ff_invalidranges);
3418
3419 /*
3420 * Account for any unmapped blocks. Since we're deleting the
3421 * entire file, we don't have to worry about just shrinking
3422 * to a smaller number of borrowed blocks.
3423 */
3424 if (fp->ff_unallocblocks > 0) {
3425 u_int32_t loanedBlocks;
3426
39236c6e 3427 hfs_lock_mount (hfsmp);
6d2010ae
A
3428 loanedBlocks = fp->ff_unallocblocks;
3429 cp->c_blocks -= loanedBlocks;
3430 fp->ff_blocks -= loanedBlocks;
3431 fp->ff_unallocblocks = 0;
3432
3433 hfsmp->loanedBlocks -= loanedBlocks;
3434
39236c6e 3435 hfs_unlock_mount (hfsmp);
6d2010ae
A
3436 }
3437
3438 return 0;
3439}
3440
3441
3442/*
3443 * Special wrapper around calling TruncateFileC. This function is useable
3444 * even when the catalog record does not exist any longer, making it ideal
3445 * for use when deleting a file. The simplification here is that we know
3446 * that we are releasing all blocks.
3447 *
316670eb
A
3448 * Note that this function may be called when there is no vnode backing
3449 * the file fork in question. We may call this from hfs_vnop_inactive
3450 * to clear out resource fork data (and may not want to clear out the data
3451 * fork yet). As a result, we pointer-check both sets of inputs before
3452 * doing anything with them.
3453 *
6d2010ae
A
3454 * The caller is responsible for saving off a copy of the filefork(s)
3455 * embedded within the cnode prior to calling this function. The pointers
3456 * supplied as arguments must be valid even if the cnode is no longer valid.
3457 */
3458
3459int
3460hfs_release_storage (struct hfsmount *hfsmp, struct filefork *datafork,
3461 struct filefork *rsrcfork, u_int32_t fileid) {
3462
3463 off_t filebytes;
3464 u_int32_t fileblocks;
3465 int blksize = 0;
3466 int error = 0;
3467 int lockflags;
3468
3469 blksize = hfsmp->blockSize;
3470
3471 /* Data Fork */
316670eb 3472 if ((datafork != NULL) && (datafork->ff_blocks > 0)) {
6d2010ae
A
3473 fileblocks = datafork->ff_blocks;
3474 filebytes = (off_t)fileblocks * (off_t)blksize;
3475
3476 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3477
3478 while (filebytes > 0) {
3479 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(datafork)) {
3480 filebytes -= HFS_BIGFILE_SIZE;
3481 } else {
3482 filebytes = 0;
3483 }
3484
3485 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3486 if (hfs_start_transaction(hfsmp) != 0) {
3487 error = EINVAL;
3488 break;
3489 }
3490
3491 if (datafork->ff_unallocblocks == 0) {
3492 /* Protect extents b-tree and allocation bitmap */
3493 lockflags = SFL_BITMAP;
3494 if (overflow_extents(datafork))
3495 lockflags |= SFL_EXTENTS;
3496 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3497
3498 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), datafork, filebytes, 1, 0, fileid, false));
3499
3500 hfs_systemfile_unlock(hfsmp, lockflags);
3501 }
3502 if (error == 0) {
3503 datafork->ff_size = filebytes;
3504 }
3505 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3506
3507 /* Finish the transaction and start over if necessary */
3508 hfs_end_transaction(hfsmp);
3509
3510 if (error) {
3511 break;
3512 }
3513 }
3514 }
3515
3516 /* Resource fork */
3517 if (error == 0 && (rsrcfork != NULL) && rsrcfork->ff_blocks > 0) {
3518 fileblocks = rsrcfork->ff_blocks;
3519 filebytes = (off_t)fileblocks * (off_t)blksize;
3520
3521 /* We killed invalid ranges and loaned blocks before we removed the catalog entry */
3522
3523 while (filebytes > 0) {
3524 if (filebytes > HFS_BIGFILE_SIZE && overflow_extents(rsrcfork)) {
3525 filebytes -= HFS_BIGFILE_SIZE;
3526 } else {
3527 filebytes = 0;
3528 }
3529
3530 /* Start a transaction, and wipe out as many blocks as we can in this iteration */
3531 if (hfs_start_transaction(hfsmp) != 0) {
3532 error = EINVAL;
3533 break;
3534 }
3535
3536 if (rsrcfork->ff_unallocblocks == 0) {
3537 /* Protect extents b-tree and allocation bitmap */
3538 lockflags = SFL_BITMAP;
3539 if (overflow_extents(rsrcfork))
3540 lockflags |= SFL_EXTENTS;
3541 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3542
3543 error = MacToVFSError(TruncateFileC(HFSTOVCB(hfsmp), rsrcfork, filebytes, 1, 1, fileid, false));
3544
3545 hfs_systemfile_unlock(hfsmp, lockflags);
3546 }
3547 if (error == 0) {
3548 rsrcfork->ff_size = filebytes;
3549 }
3550 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
3551
3552 /* Finish the transaction and start over if necessary */
3553 hfs_end_transaction(hfsmp);
3554
3555 if (error) {
3556 break;
3557 }
3558 }
3559 }
3560
3561 return error;
3562}
1c79356b 3563
91447636 3564
55e303ae 3565/*
55e303ae
A
3566 * Truncate a cnode to at most length size, freeing (or adding) the
3567 * disk blocks.
3568 */
91447636
A
3569int
3570hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
39236c6e 3571 int truncateflags, vfs_context_t context)
55e303ae 3572{
55e303ae 3573 struct filefork *fp = VTOF(vp);
55e303ae 3574 off_t filebytes;
b0d623f7 3575 u_int32_t fileblocks;
91447636 3576 int blksize, error = 0;
3a60a9f5 3577 struct cnode *cp = VTOC(vp);
55e303ae 3578
2d21ac55
A
3579 /* Cannot truncate an HFS directory! */
3580 if (vnode_isdir(vp)) {
3581 return (EISDIR);
3582 }
3583 /* A swap file cannot change size. */
3584 if (vnode_isswap(vp) && (length != 0)) {
3585 return (EPERM);
3586 }
55e303ae 3587
55e303ae
A
3588 blksize = VTOVCB(vp)->blockSize;
3589 fileblocks = fp->ff_blocks;
3590 filebytes = (off_t)fileblocks * (off_t)blksize;
3591
2d21ac55
A
3592 //
3593 // Have to do this here so that we don't wind up with
3594 // i/o pending for blocks that are about to be released
3595 // if we truncate the file.
3596 //
3597 // If skipsetsize is set, then the caller is responsible
3598 // for the ubc_setsize.
3599 //
b0d623f7
A
3600 // Even if skipsetsize is set, if the length is zero we
3601 // want to call ubc_setsize() because as of SnowLeopard
3602 // it will no longer cause any page-ins and it will drop
3603 // any dirty pages so that we don't do any i/o that we
3604 // don't have to. This also prevents a race where i/o
3605 // for truncated blocks may overwrite later data if the
3606 // blocks get reallocated to a different file.
3607 //
3608 if (!skipsetsize || length == 0)
2d21ac55
A
3609 ubc_setsize(vp, length);
3610
55e303ae
A
3611 // have to loop truncating or growing files that are
3612 // really big because otherwise transactions can get
3613 // enormous and consume too many kernel resources.
91447636
A
3614
3615 if (length < filebytes) {
3616 while (filebytes > length) {
0c530ab8 3617 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
3618 filebytes -= HFS_BIGFILE_SIZE;
3619 } else {
3620 filebytes = length;
3621 }
3a60a9f5 3622 cp->c_flag |= C_FORCEUPDATE;
39236c6e 3623 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
91447636
A
3624 if (error)
3625 break;
3626 }
3627 } else if (length > filebytes) {
3628 while (filebytes < length) {
0c530ab8 3629 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
91447636
A
3630 filebytes += HFS_BIGFILE_SIZE;
3631 } else {
3632 filebytes = length;
3633 }
3a60a9f5 3634 cp->c_flag |= C_FORCEUPDATE;
39236c6e 3635 error = do_hfs_truncate(vp, filebytes, flags, truncateflags, context);
91447636
A
3636 if (error)
3637 break;
55e303ae 3638 }
91447636 3639 } else /* Same logical size */ {
55e303ae 3640
39236c6e 3641 error = do_hfs_truncate(vp, length, flags, truncateflags, context);
91447636
A
3642 }
3643 /* Files that are changing size are not hot file candidates. */
3644 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
3645 fp->ff_bytesread = 0;
55e303ae
A
3646 }
3647
91447636 3648 return (error);
55e303ae
A
3649}
3650
3651
1c79356b
A
3652
3653/*
91447636 3654 * Preallocate file storage space.
1c79356b 3655 */
91447636
A
3656int
3657hfs_vnop_allocate(struct vnop_allocate_args /* {
3658 vnode_t a_vp;
9bccf70c
A
3659 off_t a_length;
3660 u_int32_t a_flags;
3661 off_t *a_bytesallocated;
3662 off_t a_offset;
91447636
A
3663 vfs_context_t a_context;
3664 } */ *ap)
1c79356b 3665{
9bccf70c 3666 struct vnode *vp = ap->a_vp;
91447636
A
3667 struct cnode *cp;
3668 struct filefork *fp;
3669 ExtendedVCB *vcb;
9bccf70c
A
3670 off_t length = ap->a_length;
3671 off_t startingPEOF;
3672 off_t moreBytesRequested;
3673 off_t actualBytesAdded;
3674 off_t filebytes;
b0d623f7 3675 u_int32_t fileblocks;
9bccf70c 3676 int retval, retval2;
2d21ac55
A
3677 u_int32_t blockHint;
3678 u_int32_t extendFlags; /* For call to ExtendFileC */
b4c24cb9 3679 struct hfsmount *hfsmp;
91447636
A
3680 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
3681 int lockflags;
6d2010ae 3682 time_t orig_ctime;
91447636
A
3683
3684 *(ap->a_bytesallocated) = 0;
3685
3686 if (!vnode_isreg(vp))
3687 return (EISDIR);
3688 if (length < (off_t)0)
3689 return (EINVAL);
2d21ac55 3690
91447636 3691 cp = VTOC(vp);
2d21ac55 3692
6d2010ae
A
3693 orig_ctime = VTOC(vp)->c_ctime;
3694
3695 check_for_tracked_file(vp, orig_ctime, ap->a_length == 0 ? NAMESPACE_HANDLER_TRUNCATE_OP|NAMESPACE_HANDLER_DELETE_OP : NAMESPACE_HANDLER_TRUNCATE_OP, NULL);
3696
39236c6e 3697 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2d21ac55 3698
39236c6e 3699 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
2d21ac55
A
3700 goto Err_Exit;
3701 }
3702
91447636 3703 fp = VTOF(vp);
b4c24cb9 3704 hfsmp = VTOHFS(vp);
91447636 3705 vcb = VTOVCB(vp);
9bccf70c 3706
9bccf70c 3707 fileblocks = fp->ff_blocks;
55e303ae 3708 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
9bccf70c 3709
91447636
A
3710 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
3711 retval = EINVAL;
3712 goto Err_Exit;
3713 }
0b4e3aa0 3714
9bccf70c 3715 /* Fill in the flags word for the call to Extend the file */
1c79356b 3716
55e303ae 3717 extendFlags = kEFNoClumpMask;
9bccf70c 3718 if (ap->a_flags & ALLOCATECONTIG)
1c79356b 3719 extendFlags |= kEFContigMask;
9bccf70c 3720 if (ap->a_flags & ALLOCATEALL)
1c79356b 3721 extendFlags |= kEFAllMask;
91447636 3722 if (cred && suser(cred, NULL) != 0)
9bccf70c 3723 extendFlags |= kEFReserveMask;
b0d623f7
A
3724 if (hfs_virtualmetafile(cp))
3725 extendFlags |= kEFMetadataMask;
1c79356b 3726
9bccf70c
A
3727 retval = E_NONE;
3728 blockHint = 0;
3729 startingPEOF = filebytes;
1c79356b 3730
9bccf70c
A
3731 if (ap->a_flags & ALLOCATEFROMPEOF)
3732 length += filebytes;
3733 else if (ap->a_flags & ALLOCATEFROMVOL)
3734 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1c79356b 3735
9bccf70c
A
3736 /* If no changes are necesary, then we're done */
3737 if (filebytes == length)
3738 goto Std_Exit;
1c79356b 3739
9bccf70c
A
3740 /*
3741 * Lengthen the size of the file. We must ensure that the
3742 * last byte of the file is allocated. Since the smallest
3743 * value of filebytes is 0, length will be at least 1.
3744 */
3745 if (length > filebytes) {
2d21ac55
A
3746 off_t total_bytes_added = 0, orig_request_size;
3747
3748 orig_request_size = moreBytesRequested = length - filebytes;
1c79356b 3749
9bccf70c 3750#if QUOTA
b4c24cb9 3751 retval = hfs_chkdq(cp,
55e303ae 3752 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
91447636 3753 cred, 0);
9bccf70c 3754 if (retval)
91447636 3755 goto Err_Exit;
9bccf70c
A
3756
3757#endif /* QUOTA */
55e303ae
A
3758 /*
3759 * Metadata zone checks.
3760 */
3761 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
3762 /*
3763 * Allocate Journal and Quota files in metadata zone.
3764 */
3765 if (hfs_virtualmetafile(cp)) {
55e303ae
A
3766 blockHint = hfsmp->hfs_metazone_start;
3767 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
3768 (blockHint <= hfsmp->hfs_metazone_end)) {
3769 /*
3770 * Move blockHint outside metadata zone.
3771 */
3772 blockHint = hfsmp->hfs_metazone_end + 1;
3773 }
3774 }
3775
b4c24cb9 3776
2d21ac55
A
3777 while ((length > filebytes) && (retval == E_NONE)) {
3778 off_t bytesRequested;
3779
3780 if (hfs_start_transaction(hfsmp) != 0) {
3781 retval = EINVAL;
3782 goto Err_Exit;
3783 }
3784
3785 /* Protect extents b-tree and allocation bitmap */
3786 lockflags = SFL_BITMAP;
3787 if (overflow_extents(fp))
91447636 3788 lockflags |= SFL_EXTENTS;
2d21ac55
A
3789 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3790
3791 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
3792 bytesRequested = HFS_BIGFILE_SIZE;
3793 } else {
3794 bytesRequested = moreBytesRequested;
3795 }
1c79356b 3796
b0d623f7
A
3797 if (extendFlags & kEFContigMask) {
3798 // if we're on a sparse device, this will force it to do a
3799 // full scan to find the space needed.
3800 hfsmp->hfs_flags &= ~HFS_DID_CONTIG_SCAN;
3801 }
3802
2d21ac55 3803 retval = MacToVFSError(ExtendFileC(vcb,
9bccf70c 3804 (FCB*)fp,
2d21ac55 3805 bytesRequested,
9bccf70c
A
3806 blockHint,
3807 extendFlags,
3808 &actualBytesAdded));
1c79356b 3809
2d21ac55
A
3810 if (retval == E_NONE) {
3811 *(ap->a_bytesallocated) += actualBytesAdded;
3812 total_bytes_added += actualBytesAdded;
3813 moreBytesRequested -= actualBytesAdded;
3814 if (blockHint != 0) {
3815 blockHint += actualBytesAdded / vcb->blockSize;
3816 }
3817 }
3818 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
3819
3820 hfs_systemfile_unlock(hfsmp, lockflags);
1c79356b 3821
2d21ac55 3822 if (hfsmp->jnl) {
91447636
A
3823 (void) hfs_update(vp, TRUE);
3824 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2d21ac55
A
3825 }
3826
3827 hfs_end_transaction(hfsmp);
b4c24cb9 3828 }
91447636 3829
b4c24cb9 3830
1c79356b
A
3831 /*
3832 * if we get an error and no changes were made then exit
91447636 3833 * otherwise we must do the hfs_update to reflect the changes
1c79356b 3834 */
9bccf70c
A
3835 if (retval && (startingPEOF == filebytes))
3836 goto Err_Exit;
1c79356b 3837
9bccf70c
A
3838 /*
3839 * Adjust actualBytesAdded to be allocation block aligned, not
3840 * clump size aligned.
3841 * NOTE: So what we are reporting does not affect reality
3842 * until the file is closed, when we truncate the file to allocation
3843 * block size.
3844 */
2d21ac55 3845 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
0b4e3aa0 3846 *(ap->a_bytesallocated) =
2d21ac55 3847 roundup(orig_request_size, (off_t)vcb->blockSize);
1c79356b 3848
9bccf70c 3849 } else { /* Shorten the size of the file */
1c79356b 3850
9bccf70c 3851 if (fp->ff_size > length) {
1c79356b
A
3852 /*
3853 * Any buffers that are past the truncation point need to be
91447636 3854 * invalidated (to maintain buffer cache consistency).
1c79356b 3855 */
1c79356b
A
3856 }
3857
b0d623f7 3858 retval = hfs_truncate(vp, length, 0, 0, 0, ap->a_context);
55e303ae 3859 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
b4c24cb9 3860
1c79356b
A
3861 /*
3862 * if we get an error and no changes were made then exit
91447636 3863 * otherwise we must do the hfs_update to reflect the changes
1c79356b 3864 */
9bccf70c
A
3865 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
3866#if QUOTA
3867 /* These are bytesreleased */
3868 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
3869#endif /* QUOTA */
1c79356b 3870
9bccf70c
A
3871 if (fp->ff_size > filebytes) {
3872 fp->ff_size = filebytes;
1c79356b 3873
91447636
A
3874 hfs_unlock(cp);
3875 ubc_setsize(vp, fp->ff_size);
39236c6e 3876 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
9bccf70c
A
3877 }
3878 }
1c79356b
A
3879
3880Std_Exit:
91447636
A
3881 cp->c_touch_chgtime = TRUE;
3882 cp->c_touch_modtime = TRUE;
3883 retval2 = hfs_update(vp, MNT_WAIT);
1c79356b 3884
9bccf70c
A
3885 if (retval == 0)
3886 retval = retval2;
1c79356b 3887Err_Exit:
39236c6e 3888 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636 3889 hfs_unlock(cp);
9bccf70c 3890 return (retval);
1c79356b
A
3891}
3892
3893
9bccf70c 3894/*
91447636 3895 * Pagein for HFS filesystem
9bccf70c 3896 */
1c79356b 3897int
91447636
A
3898hfs_vnop_pagein(struct vnop_pagein_args *ap)
3899/*
3900 struct vnop_pagein_args {
3901 vnode_t a_vp,
1c79356b
A
3902 upl_t a_pl,
3903 vm_offset_t a_pl_offset,
3904 off_t a_f_offset,
3905 size_t a_size,
1c79356b 3906 int a_flags
91447636
A
3907 vfs_context_t a_context;
3908 };
3909*/
1c79356b 3910{
6d2010ae
A
3911 vnode_t vp;
3912 struct cnode *cp;
3913 struct filefork *fp;
3914 int error = 0;
3915 upl_t upl;
3916 upl_page_info_t *pl;
3917 off_t f_offset;
3918 int offset;
3919 int isize;
3920 int pg_index;
3921 boolean_t truncate_lock_held = FALSE;
3922 boolean_t file_converted = FALSE;
3923 kern_return_t kret;
3924
3925 vp = ap->a_vp;
3926 cp = VTOC(vp);
3927 fp = VTOF(vp);
3928
3929#if CONFIG_PROTECT
316670eb 3930 if ((error = cp_handle_vnop(vp, CP_READ_ACCESS | CP_WRITE_ACCESS, 0)) != 0) {
39236c6e
A
3931 /*
3932 * If we errored here, then this means that one of two things occurred:
3933 * 1. there was a problem with the decryption of the key.
3934 * 2. the device is locked and we are not allowed to access this particular file.
3935 *
3936 * Either way, this means that we need to shut down this upl now. As long as
3937 * the pl pointer is NULL (meaning that we're supposed to create the UPL ourselves)
3938 * then we create a upl and immediately abort it.
3939 */
3940 if (ap->a_pl == NULL) {
3941 /* create the upl */
3942 ubc_create_upl (vp, ap->a_f_offset, ap->a_size, &upl, &pl,
3943 UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
3944 /* mark the range as needed so it doesn't immediately get discarded upon abort */
3945 ubc_upl_range_needed (upl, ap->a_pl_offset / PAGE_SIZE, 1);
3946
3947 /* Abort the range */
3948 ubc_upl_abort_range (upl, 0, ap->a_size, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_ERROR);
3949 }
3950
3951
6d2010ae
A
3952 return error;
3953 }
3954#endif /* CONFIG_PROTECT */
3955
3956 if (ap->a_pl != NULL) {
3957 /*
3958 * this can only happen for swap files now that
3959 * we're asking for V2 paging behavior...
3960 * so don't need to worry about decompression, or
3961 * keeping track of blocks read or taking the truncate lock
3962 */
3963 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
3964 ap->a_size, (off_t)fp->ff_size, ap->a_flags);
3965 goto pagein_done;
3966 }
3967
3968retry_pagein:
3969 /*
3970 * take truncate lock (shared/recursive) to guard against
3971 * zero-fill thru fsync interfering, but only for v2
3972 *
3973 * the HFS_RECURSE_TRUNCLOCK arg indicates that we want the
3974 * lock shared and we are allowed to recurse 1 level if this thread already
3975 * owns the lock exclusively... this can legally occur
3976 * if we are doing a shrinking ftruncate against a file
3977 * that is mapped private, and the pages being truncated
3978 * do not currently exist in the cache... in that case
3979 * we will have to page-in the missing pages in order
3980 * to provide them to the private mapping... we must
3981 * also call hfs_unlock_truncate with a postive been_recursed
3982 * arg to indicate that if we have recursed, there is no need to drop
3983 * the lock. Allowing this simple recursion is necessary
3984 * in order to avoid a certain deadlock... since the ftruncate
3985 * already holds the truncate lock exclusively, if we try
3986 * to acquire it shared to protect the pagein path, we will
3987 * hang this thread
3988 *
3989 * NOTE: The if () block below is a workaround in order to prevent a
3990 * VM deadlock. See rdar://7853471.
3991 *
3992 * If we are in a forced unmount, then launchd will still have the
3993 * dyld_shared_cache file mapped as it is trying to reboot. If we
3994 * take the truncate lock here to service a page fault, then our
3995 * thread could deadlock with the forced-unmount. The forced unmount
3996 * thread will try to reclaim the dyld_shared_cache vnode, but since it's
3997 * marked C_DELETED, it will call ubc_setsize(0). As a result, the unmount
3998 * thread will think it needs to copy all of the data out of the file
3999 * and into a VM copy object. If we hold the cnode lock here, then that
4000 * VM operation will not be able to proceed, because we'll set a busy page
4001 * before attempting to grab the lock. Note that this isn't as simple as "don't
4002 * call ubc_setsize" because doing that would just shift the problem to the
4003 * ubc_msync done before the vnode is reclaimed.
4004 *
4005 * So, if a forced unmount on this volume is in flight AND the cnode is
4006 * marked C_DELETED, then just go ahead and do the page in without taking
4007 * the lock (thus suspending pagein_v2 semantics temporarily). Since it's on a file
4008 * that is not going to be available on the next mount, this seems like a
4009 * OK solution from a correctness point of view, even though it is hacky.
4010 */
4011 if (vfs_isforce(vp->v_mount)) {
4012 if (cp->c_flag & C_DELETED) {
4013 /* If we don't get it, then just go ahead and operate without the lock */
39236c6e 4014 truncate_lock_held = hfs_try_trunclock(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
6d2010ae
A
4015 }
4016 }
4017 else {
39236c6e 4018 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
6d2010ae
A
4019 truncate_lock_held = TRUE;
4020 }
4021
4022 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, UPL_UBC_PAGEIN | UPL_RET_ONLY_ABSENT);
4023
4024 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
4025 error = EINVAL;
4026 goto pagein_done;
4027 }
316670eb
A
4028 ubc_upl_range_needed(upl, ap->a_pl_offset / PAGE_SIZE, 1);
4029
6d2010ae
A
4030 isize = ap->a_size;
4031
4032 /*
4033 * Scan from the back to find the last page in the UPL, so that we
4034 * aren't looking at a UPL that may have already been freed by the
4035 * preceding aborts/completions.
4036 */
4037 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4038 if (upl_page_present(pl, --pg_index))
4039 break;
4040 if (pg_index == 0) {
4041 /*
4042 * no absent pages were found in the range specified
4043 * just abort the UPL to get rid of it and then we're done
4044 */
4045 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4046 goto pagein_done;
4047 }
4048 }
4049 /*
4050 * initialize the offset variables before we touch the UPL.
4051 * f_offset is the position into the file, in bytes
4052 * offset is the position into the UPL, in bytes
4053 * pg_index is the pg# of the UPL we're operating on
4054 * isize is the offset into the UPL of the last page that is present.
4055 */
4056 isize = ((pg_index + 1) * PAGE_SIZE);
4057 pg_index = 0;
4058 offset = 0;
4059 f_offset = ap->a_f_offset;
4060
4061 while (isize) {
4062 int xsize;
4063 int num_of_pages;
4064
4065 if ( !upl_page_present(pl, pg_index)) {
4066 /*
4067 * we asked for RET_ONLY_ABSENT, so it's possible
4068 * to get back empty slots in the UPL.
4069 * just skip over them
4070 */
4071 f_offset += PAGE_SIZE;
4072 offset += PAGE_SIZE;
4073 isize -= PAGE_SIZE;
4074 pg_index++;
4075
4076 continue;
4077 }
4078 /*
4079 * We know that we have at least one absent page.
4080 * Now checking to see how many in a row we have
4081 */
4082 num_of_pages = 1;
4083 xsize = isize - PAGE_SIZE;
4084
4085 while (xsize) {
4086 if ( !upl_page_present(pl, pg_index + num_of_pages))
4087 break;
4088 num_of_pages++;
4089 xsize -= PAGE_SIZE;
4090 }
4091 xsize = num_of_pages * PAGE_SIZE;
1c79356b 4092
b0d623f7 4093#if HFS_COMPRESSION
6d2010ae
A
4094 if (VNODE_IS_RSRC(vp)) {
4095 /* allow pageins of the resource fork */
4096 } else {
4097 int compressed = hfs_file_is_compressed(VTOC(vp), 1); /* 1 == don't take the cnode lock */
4098
b0d623f7 4099 if (compressed) {
6d2010ae
A
4100 if (truncate_lock_held) {
4101 /*
4102 * can't hold the truncate lock when calling into the decmpfs layer
4103 * since it calls back into this layer... even though we're only
4104 * holding the lock in shared mode, and the re-entrant path only
4105 * takes the lock shared, we can deadlock if some other thread
4106 * tries to grab the lock exclusively in between.
4107 */
39236c6e 4108 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
6d2010ae
A
4109 truncate_lock_held = FALSE;
4110 }
4111 ap->a_pl = upl;
4112 ap->a_pl_offset = offset;
4113 ap->a_f_offset = f_offset;
4114 ap->a_size = xsize;
4115
4116 error = decmpfs_pagein_compressed(ap, &compressed, VTOCMP(vp));
4117 /*
4118 * note that decpfs_pagein_compressed can change the state of
4119 * 'compressed'... it will set it to 0 if the file is no longer
4120 * compressed once the compression lock is successfully taken
4121 * i.e. we would block on that lock while the file is being inflated
4122 */
4123 if (compressed) {
4124 if (error == 0) {
4125 /* successful page-in, update the access time */
4126 VTOC(vp)->c_touch_acctime = TRUE;
b0d623f7 4127
6d2010ae
A
4128 /* compressed files are not hot file candidates */
4129 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
4130 fp->ff_bytesread = 0;
4131 }
4132 } else if (error == EAGAIN) {
4133 /*
4134 * EAGAIN indicates someone else already holds the compression lock...
4135 * to avoid deadlocking, we'll abort this range of pages with an
4136 * indication that the pagein needs to be redriven
4137 */
4138 ubc_upl_abort_range(upl, (upl_offset_t) offset, xsize, UPL_ABORT_FREE_ON_EMPTY | UPL_ABORT_RESTART);
b0d623f7 4139 }
6d2010ae
A
4140 goto pagein_next_range;
4141 }
4142 else {
4143 /*
4144 * Set file_converted only if the file became decompressed while we were
4145 * paging in. If it were still compressed, we would re-start the loop using the goto
4146 * in the above block. This avoid us overloading truncate_lock_held as our retry_pagein
4147 * condition below, since we could have avoided taking the truncate lock to prevent
4148 * a deadlock in the force unmount case.
4149 */
4150 file_converted = TRUE;
b0d623f7 4151 }
b0d623f7 4152 }
6d2010ae
A
4153 if (file_converted == TRUE) {
4154 /*
4155 * the file was converted back to a regular file after we first saw it as compressed
4156 * we need to abort the upl, retake the truncate lock, recreate the UPL and start over
4157 * reset a_size so that we consider what remains of the original request
4158 * and null out a_upl and a_pl_offset.
4159 *
4160 * We should only be able to get into this block if the decmpfs_pagein_compressed
4161 * successfully decompressed the range in question for this file.
4162 */
4163 ubc_upl_abort_range(upl, (upl_offset_t) offset, isize, UPL_ABORT_FREE_ON_EMPTY);
4164
4165 ap->a_size = isize;
4166 ap->a_pl = NULL;
4167 ap->a_pl_offset = 0;
4168
4169 /* Reset file_converted back to false so that we don't infinite-loop. */
4170 file_converted = FALSE;
4171 goto retry_pagein;
4172 }
b0d623f7 4173 }
b0d623f7 4174#endif
6d2010ae 4175 error = cluster_pagein(vp, upl, offset, f_offset, xsize, (off_t)fp->ff_size, ap->a_flags);
b0d623f7 4176
6d2010ae
A
4177 /*
4178 * Keep track of blocks read.
4179 */
4180 if ( !vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
4181 int bytesread;
4182 int took_cnode_lock = 0;
55e303ae 4183
6d2010ae
A
4184 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
4185 bytesread = fp->ff_size;
4186 else
4187 bytesread = xsize;
91447636 4188
6d2010ae
A
4189 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
4190 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
39236c6e 4191 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
6d2010ae
A
4192 took_cnode_lock = 1;
4193 }
4194 /*
4195 * If this file hasn't been seen since the start of
4196 * the current sampling period then start over.
4197 */
4198 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
4199 struct timeval tv;
91447636 4200
6d2010ae
A
4201 fp->ff_bytesread = bytesread;
4202 microtime(&tv);
4203 cp->c_atime = tv.tv_sec;
4204 } else {
4205 fp->ff_bytesread += bytesread;
4206 }
4207 cp->c_touch_acctime = TRUE;
4208 if (took_cnode_lock)
4209 hfs_unlock(cp);
91447636 4210 }
6d2010ae
A
4211pagein_next_range:
4212 f_offset += xsize;
4213 offset += xsize;
4214 isize -= xsize;
4215 pg_index += num_of_pages;
55e303ae 4216
6d2010ae 4217 error = 0;
55e303ae 4218 }
6d2010ae
A
4219
4220pagein_done:
4221 if (truncate_lock_held == TRUE) {
4222 /* Note 1 is passed to hfs_unlock_truncate in been_recursed argument */
39236c6e 4223 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
6d2010ae
A
4224 }
4225
9bccf70c 4226 return (error);
1c79356b
A
4227}
4228
4229/*
91447636 4230 * Pageout for HFS filesystem.
1c79356b
A
4231 */
4232int
91447636
A
4233hfs_vnop_pageout(struct vnop_pageout_args *ap)
4234/*
4235 struct vnop_pageout_args {
4236 vnode_t a_vp,
1c79356b
A
4237 upl_t a_pl,
4238 vm_offset_t a_pl_offset,
4239 off_t a_f_offset,
4240 size_t a_size,
1c79356b 4241 int a_flags
91447636
A
4242 vfs_context_t a_context;
4243 };
4244*/
1c79356b 4245{
91447636
A
4246 vnode_t vp = ap->a_vp;
4247 struct cnode *cp;
4248 struct filefork *fp;
b0d623f7 4249 int retval = 0;
9bccf70c 4250 off_t filesize;
b0d623f7
A
4251 upl_t upl;
4252 upl_page_info_t* pl;
4253 vm_offset_t a_pl_offset;
4254 int a_flags;
4255 int is_pageoutv2 = 0;
b7266188 4256 kern_return_t kret;
1c79356b 4257
91447636 4258 cp = VTOC(vp);
91447636 4259 fp = VTOF(vp);
2d21ac55 4260
593a1d5f
A
4261 /*
4262 * Figure out where the file ends, for pageout purposes. If
4263 * ff_new_size > ff_size, then we're in the middle of extending the
4264 * file via a write, so it is safe (and necessary) that we be able
4265 * to pageout up to that point.
4266 */
4267 filesize = fp->ff_size;
4268 if (fp->ff_new_size > filesize)
4269 filesize = fp->ff_new_size;
b0d623f7
A
4270
4271 a_flags = ap->a_flags;
4272 a_pl_offset = ap->a_pl_offset;
4273
39236c6e
A
4274 if (S_ISREG(cp->c_attr.ca_mode) || S_ISLNK(cp->c_attr.ca_mode)) {
4275 hfs_incr_gencount (cp);
4276 }
4277
b0d623f7
A
4278 /*
4279 * we can tell if we're getting the new or old behavior from the UPL
4280 */
4281 if ((upl = ap->a_pl) == NULL) {
4282 int request_flags;
4283
4284 is_pageoutv2 = 1;
4285 /*
4286 * we're in control of any UPL we commit
4287 * make sure someone hasn't accidentally passed in UPL_NOCOMMIT
4288 */
4289 a_flags &= ~UPL_NOCOMMIT;
4290 a_pl_offset = 0;
4291
4292 /*
316670eb
A
4293 * For V2 semantics, we want to take the cnode truncate lock
4294 * shared to guard against the file size changing via zero-filling.
4295 *
4296 * However, we have to be careful because we may be invoked
4297 * via the ubc_msync path to write out dirty mmap'd pages
4298 * in response to a lock event on a content-protected
4299 * filesystem (e.g. to write out class A files).
4300 * As a result, we want to take the truncate lock 'SHARED' with
4301 * the mini-recursion locktype so that we don't deadlock/panic
4302 * because we may be already holding the truncate lock exclusive to force any other
4303 * IOs to have blocked behind us.
b0d623f7 4304 */
39236c6e 4305 hfs_lock_truncate(cp, HFS_SHARED_LOCK, HFS_LOCK_SKIP_IF_EXCLUSIVE);
b0d623f7
A
4306
4307 if (a_flags & UPL_MSYNC) {
4308 request_flags = UPL_UBC_MSYNC | UPL_RET_ONLY_DIRTY;
4309 }
4310 else {
4311 request_flags = UPL_UBC_PAGEOUT | UPL_RET_ONLY_DIRTY;
4312 }
6d2010ae 4313
b7266188 4314 kret = ubc_create_upl(vp, ap->a_f_offset, ap->a_size, &upl, &pl, request_flags);
b0d623f7 4315
b7266188 4316 if ((kret != KERN_SUCCESS) || (upl == (upl_t) NULL)) {
b0d623f7
A
4317 retval = EINVAL;
4318 goto pageout_done;
4319 }
4320 }
4321 /*
4322 * from this point forward upl points at the UPL we're working with
4323 * it was either passed in or we succesfully created it
4324 */
4325
4326 /*
4327 * Now that HFS is opting into VFC_VFSVNOP_PAGEOUTV2, we may need to operate on our own
4328 * UPL instead of relying on the UPL passed into us. We go ahead and do that here,
4329 * scanning for dirty ranges. We'll issue our own N cluster_pageout calls, for
4330 * N dirty ranges in the UPL. Note that this is almost a direct copy of the
4331 * logic in vnode_pageout except that we need to do it after grabbing the truncate
4332 * lock in HFS so that we don't lock invert ourselves.
4333 *
4334 * Note that we can still get into this function on behalf of the default pager with
4335 * non-V2 behavior (swapfiles). However in that case, we did not grab locks above
4336 * since fsync and other writing threads will grab the locks, then mark the
4337 * relevant pages as busy. But the pageout codepath marks the pages as busy,
4338 * and THEN would attempt to grab the truncate lock, which would result in deadlock. So
4339 * we do not try to grab anything for the pre-V2 case, which should only be accessed
4340 * by the paging/VM system.
4341 */
4342
4343 if (is_pageoutv2) {
4344 off_t f_offset;
4345 int offset;
4346 int isize;
4347 int pg_index;
4348 int error;
4349 int error_ret = 0;
4350
4351 isize = ap->a_size;
4352 f_offset = ap->a_f_offset;
4353
4354 /*
4355 * Scan from the back to find the last page in the UPL, so that we
4356 * aren't looking at a UPL that may have already been freed by the
4357 * preceding aborts/completions.
4358 */
4359 for (pg_index = ((isize) / PAGE_SIZE); pg_index > 0;) {
4360 if (upl_page_present(pl, --pg_index))
4361 break;
4362 if (pg_index == 0) {
4363 ubc_upl_abort_range(upl, 0, isize, UPL_ABORT_FREE_ON_EMPTY);
4364 goto pageout_done;
2d21ac55 4365 }
2d21ac55 4366 }
b0d623f7
A
4367
4368 /*
4369 * initialize the offset variables before we touch the UPL.
4370 * a_f_offset is the position into the file, in bytes
4371 * offset is the position into the UPL, in bytes
4372 * pg_index is the pg# of the UPL we're operating on.
4373 * isize is the offset into the UPL of the last non-clean page.
4374 */
4375 isize = ((pg_index + 1) * PAGE_SIZE);
4376
4377 offset = 0;
4378 pg_index = 0;
4379
4380 while (isize) {
4381 int xsize;
4382 int num_of_pages;
4383
4384 if ( !upl_page_present(pl, pg_index)) {
4385 /*
4386 * we asked for RET_ONLY_DIRTY, so it's possible
4387 * to get back empty slots in the UPL.
4388 * just skip over them
4389 */
4390 f_offset += PAGE_SIZE;
4391 offset += PAGE_SIZE;
4392 isize -= PAGE_SIZE;
4393 pg_index++;
4394
4395 continue;
4396 }
4397 if ( !upl_dirty_page(pl, pg_index)) {
4398 panic ("hfs_vnop_pageout: unforeseen clean page @ index %d for UPL %p\n", pg_index, upl);
4399 }
4400
4401 /*
4402 * We know that we have at least one dirty page.
4403 * Now checking to see how many in a row we have
4404 */
4405 num_of_pages = 1;
4406 xsize = isize - PAGE_SIZE;
4407
4408 while (xsize) {
4409 if ( !upl_dirty_page(pl, pg_index + num_of_pages))
4410 break;
4411 num_of_pages++;
4412 xsize -= PAGE_SIZE;
4413 }
4414 xsize = num_of_pages * PAGE_SIZE;
4415
4416 if (!vnode_isswap(vp)) {
4417 off_t end_of_range;
4418 int tooklock;
4419
4420 tooklock = 0;
4421
4422 if (cp->c_lockowner != current_thread()) {
39236c6e 4423 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
b0d623f7
A
4424 /*
4425 * we're in the v2 path, so we are the
4426 * owner of the UPL... we may have already
4427 * processed some of the UPL, so abort it
4428 * from the current working offset to the
4429 * end of the UPL
4430 */
4431 ubc_upl_abort_range(upl,
4432 offset,
4433 ap->a_size - offset,
4434 UPL_ABORT_FREE_ON_EMPTY);
4435 goto pageout_done;
4436 }
4437 tooklock = 1;
4438 }
4439 end_of_range = f_offset + xsize - 1;
2d21ac55 4440
b0d623f7
A
4441 if (end_of_range >= filesize) {
4442 end_of_range = (off_t)(filesize - 1);
4443 }
4444 if (f_offset < filesize) {
4445 rl_remove(f_offset, end_of_range, &fp->ff_invalidranges);
4446 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4447 }
4448 if (tooklock) {
4449 hfs_unlock(cp);
4450 }
4451 }
4452 if ((error = cluster_pageout(vp, upl, offset, f_offset,
4453 xsize, filesize, a_flags))) {
4454 if (error_ret == 0)
4455 error_ret = error;
4456 }
4457 f_offset += xsize;
4458 offset += xsize;
4459 isize -= xsize;
4460 pg_index += num_of_pages;
4461 }
4462 /* capture errnos bubbled out of cluster_pageout if they occurred */
4463 if (error_ret != 0) {
4464 retval = error_ret;
4465 }
4466 } /* end block for v2 pageout behavior */
4467 else {
4468 if (!vnode_isswap(vp)) {
4469 off_t end_of_range;
4470 int tooklock = 0;
4471
4472 if (cp->c_lockowner != current_thread()) {
39236c6e 4473 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT))) {
b0d623f7
A
4474 if (!(a_flags & UPL_NOCOMMIT)) {
4475 ubc_upl_abort_range(upl,
4476 a_pl_offset,
4477 ap->a_size,
4478 UPL_ABORT_FREE_ON_EMPTY);
4479 }
4480 goto pageout_done;
4481 }
4482 tooklock = 1;
4483 }
4484 end_of_range = ap->a_f_offset + ap->a_size - 1;
2d21ac55 4485
b0d623f7
A
4486 if (end_of_range >= filesize) {
4487 end_of_range = (off_t)(filesize - 1);
4488 }
4489 if (ap->a_f_offset < filesize) {
4490 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
4491 cp->c_flag |= C_MODIFIED; /* leof is dirty */
4492 }
1c79356b 4493
b0d623f7
A
4494 if (tooklock) {
4495 hfs_unlock(cp);
4496 }
2d21ac55 4497 }
b0d623f7
A
4498 /*
4499 * just call cluster_pageout for old pre-v2 behavior
4500 */
4501 retval = cluster_pageout(vp, upl, a_pl_offset, ap->a_f_offset,
4502 ap->a_size, filesize, a_flags);
55e303ae 4503 }
0b4e3aa0 4504
1c79356b 4505 /*
b0d623f7
A
4506 * If data was written, update the modification time of the file.
4507 * If setuid or setgid bits are set and this process is not the
4508 * superuser then clear the setuid and setgid bits as a precaution
4509 * against tampering.
1c79356b 4510 */
b0d623f7
A
4511 if (retval == 0) {
4512 cp->c_touch_modtime = TRUE;
91447636 4513 cp->c_touch_chgtime = TRUE;
b0d623f7
A
4514 if ((cp->c_mode & (S_ISUID | S_ISGID)) &&
4515 (vfs_context_suser(ap->a_context) != 0)) {
39236c6e 4516 hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
b0d623f7
A
4517 cp->c_mode &= ~(S_ISUID | S_ISGID);
4518 hfs_unlock(cp);
4519 }
4520 }
4521
4522pageout_done:
4523 if (is_pageoutv2) {
316670eb
A
4524 /*
4525 * Release the truncate lock. Note that because
4526 * we may have taken the lock recursively by
4527 * being invoked via ubc_msync due to lockdown,
4528 * we should release it recursively, too.
4529 */
39236c6e 4530 hfs_unlock_truncate(cp, HFS_LOCK_SKIP_IF_EXCLUSIVE);
91447636 4531 }
1c79356b
A
4532 return (retval);
4533}
4534
4535/*
4536 * Intercept B-Tree node writes to unswap them if necessary.
1c79356b
A
4537 */
4538int
91447636 4539hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
1c79356b 4540{
9bccf70c 4541 int retval = 0;
9bccf70c 4542 register struct buf *bp = ap->a_bp;
91447636 4543 register struct vnode *vp = buf_vnode(bp);
9bccf70c
A
4544 BlockDescriptor block;
4545
4546 /* Trap B-Tree writes */
4547 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
91447636 4548 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
0c530ab8
A
4549 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
4550 (vp == VTOHFS(vp)->hfc_filevp)) {
9bccf70c 4551
3a60a9f5
A
4552 /*
4553 * Swap and validate the node if it is in native byte order.
4554 * This is always be true on big endian, so we always validate
4555 * before writing here. On little endian, the node typically has
2d21ac55 4556 * been swapped and validated when it was written to the journal,
3a60a9f5
A
4557 * so we won't do anything here.
4558 */
2d21ac55 4559 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
9bccf70c
A
4560 /* Prepare the block pointer */
4561 block.blockHeader = bp;
91447636 4562 block.buffer = (char *)buf_dataptr(bp);
3a60a9f5 4563 block.blockNum = buf_lblkno(bp);
9bccf70c 4564 /* not found in cache ==> came from disk */
91447636
A
4565 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
4566 block.blockSize = buf_count(bp);
1c79356b 4567
9bccf70c 4568 /* Endian un-swap B-Tree node */
935ed37a 4569 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3a60a9f5
A
4570 if (retval)
4571 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
9bccf70c 4572 }
9bccf70c 4573 }
3a60a9f5 4574
9bccf70c 4575 /* This buffer shouldn't be locked anymore but if it is clear it */
91447636
A
4576 if ((buf_flags(bp) & B_LOCKED)) {
4577 // XXXdbg
4578 if (VTOHFS(vp)->jnl) {
2d21ac55 4579 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
91447636
A
4580 }
4581 buf_clearflags(bp, B_LOCKED);
9bccf70c
A
4582 }
4583 retval = vn_bwrite (ap);
1c79356b 4584
9bccf70c 4585 return (retval);
1c79356b 4586}
55e303ae
A
4587
4588/*
4589 * Relocate a file to a new location on disk
4590 * cnode must be locked on entry
4591 *
4592 * Relocation occurs by cloning the file's data from its
4593 * current set of blocks to a new set of blocks. During
4594 * the relocation all of the blocks (old and new) are
4595 * owned by the file.
4596 *
4597 * -----------------
4598 * |///////////////|
4599 * -----------------
4600 * 0 N (file offset)
4601 *
4602 * ----------------- -----------------
2d21ac55 4603 * |///////////////| | | STEP 1 (acquire new blocks)
55e303ae
A
4604 * ----------------- -----------------
4605 * 0 N N+1 2N
4606 *
4607 * ----------------- -----------------
4608 * |///////////////| |///////////////| STEP 2 (clone data)
4609 * ----------------- -----------------
4610 * 0 N N+1 2N
4611 *
4612 * -----------------
4613 * |///////////////| STEP 3 (head truncate blocks)
4614 * -----------------
4615 * 0 N
4616 *
4617 * During steps 2 and 3 page-outs to file offsets less
4618 * than or equal to N are suspended.
4619 *
2d21ac55 4620 * During step 3 page-ins to the file get suspended.
55e303ae 4621 */
55e303ae 4622int
91447636
A
4623hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
4624 struct proc *p)
55e303ae 4625{
91447636 4626 struct cnode *cp;
55e303ae
A
4627 struct filefork *fp;
4628 struct hfsmount *hfsmp;
55e303ae
A
4629 u_int32_t headblks;
4630 u_int32_t datablks;
4631 u_int32_t blksize;
55e303ae
A
4632 u_int32_t growsize;
4633 u_int32_t nextallocsave;
91447636 4634 daddr64_t sector_a, sector_b;
55e303ae 4635 int eflags;
55e303ae 4636 off_t newbytes;
91447636
A
4637 int retval;
4638 int lockflags = 0;
4639 int took_trunc_lock = 0;
4640 int started_tr = 0;
4641 enum vtype vnodetype;
4642
4643 vnodetype = vnode_vtype(vp);
bd504ef0 4644 if (vnodetype != VREG) {
39236c6e 4645 /* Not allowed to move symlinks. */
55e303ae
A
4646 return (EPERM);
4647 }
4648
4649 hfsmp = VTOHFS(vp);
4650 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
4651 return (ENOSPC);
4652 }
4653
91447636 4654 cp = VTOC(vp);
55e303ae
A
4655 fp = VTOF(vp);
4656 if (fp->ff_unallocblocks)
4657 return (EINVAL);
6d2010ae
A
4658
4659#if CONFIG_PROTECT
4660 /*
4661 * <rdar://problem/9118426>
4662 * Disable HFS file relocation on content-protected filesystems
4663 */
4664 if (cp_fs_protected (hfsmp->hfs_mp)) {
4665 return EINVAL;
4666 }
4667#endif
6d2010ae
A
4668 /* If it's an SSD, also disable HFS relocation */
4669 if (hfsmp->hfs_flags & HFS_SSD) {
4670 return EINVAL;
4671 }
4672
316670eb 4673
91447636 4674 blksize = hfsmp->blockSize;
55e303ae 4675 if (blockHint == 0)
91447636 4676 blockHint = hfsmp->nextAllocation;
55e303ae 4677
39236c6e 4678 if (fp->ff_size > 0x7fffffff) {
55e303ae
A
4679 return (EFBIG);
4680 }
4681
91447636
A
4682 //
4683 // We do not believe that this call to hfs_fsync() is
4684 // necessary and it causes a journal transaction
4685 // deadlock so we are removing it.
4686 //
4687 //if (vnodetype == VREG && !vnode_issystem(vp)) {
4688 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
4689 // if (retval)
4690 // return (retval);
4691 //}
4692
4693 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
4694 hfs_unlock(cp);
39236c6e 4695 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2d21ac55 4696 /* Force lock since callers expects lock to be held. */
39236c6e
A
4697 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS))) {
4698 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636
A
4699 return (retval);
4700 }
2d21ac55
A
4701 /* No need to continue if file was removed. */
4702 if (cp->c_flag & C_NOEXISTS) {
39236c6e 4703 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2d21ac55
A
4704 return (ENOENT);
4705 }
91447636
A
4706 took_trunc_lock = 1;
4707 }
55e303ae
A
4708 headblks = fp->ff_blocks;
4709 datablks = howmany(fp->ff_size, blksize);
4710 growsize = datablks * blksize;
55e303ae
A
4711 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
4712 if (blockHint >= hfsmp->hfs_metazone_start &&
4713 blockHint <= hfsmp->hfs_metazone_end)
4714 eflags |= kEFMetadataMask;
4715
91447636
A
4716 if (hfs_start_transaction(hfsmp) != 0) {
4717 if (took_trunc_lock)
39236c6e 4718 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636 4719 return (EINVAL);
55e303ae 4720 }
91447636
A
4721 started_tr = 1;
4722 /*
4723 * Protect the extents b-tree and the allocation bitmap
4724 * during MapFileBlockC and ExtendFileC operations.
4725 */
4726 lockflags = SFL_BITMAP;
4727 if (overflow_extents(fp))
4728 lockflags |= SFL_EXTENTS;
4729 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 4730
91447636 4731 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
55e303ae
A
4732 if (retval) {
4733 retval = MacToVFSError(retval);
4734 goto out;
4735 }
4736
4737 /*
2d21ac55 4738 * STEP 1 - acquire new allocation blocks.
55e303ae 4739 */
91447636
A
4740 nextallocsave = hfsmp->nextAllocation;
4741 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
4742 if (eflags & kEFMetadataMask) {
39236c6e 4743 hfs_lock_mount(hfsmp);
2d21ac55
A
4744 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
4745 MarkVCBDirty(hfsmp);
39236c6e 4746 hfs_unlock_mount(hfsmp);
91447636 4747 }
55e303ae
A
4748
4749 retval = MacToVFSError(retval);
4750 if (retval == 0) {
91447636 4751 cp->c_flag |= C_MODIFIED;
55e303ae
A
4752 if (newbytes < growsize) {
4753 retval = ENOSPC;
4754 goto restore;
4755 } else if (fp->ff_blocks < (headblks + datablks)) {
39236c6e 4756 printf("hfs_relocate: allocation failed id=%u, vol=%s\n", cp->c_cnid, hfsmp->vcbVN);
55e303ae
A
4757 retval = ENOSPC;
4758 goto restore;
4759 }
4760
91447636 4761 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
55e303ae
A
4762 if (retval) {
4763 retval = MacToVFSError(retval);
4764 } else if ((sector_a + 1) == sector_b) {
4765 retval = ENOSPC;
4766 goto restore;
4767 } else if ((eflags & kEFMetadataMask) &&
593a1d5f 4768 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
55e303ae 4769 hfsmp->hfs_metazone_end)) {
b0d623f7 4770#if 0
2d21ac55
A
4771 const char * filestr;
4772 char emptystr = '\0';
4773
4774 if (cp->c_desc.cd_nameptr != NULL) {
4775 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
4776 } else if (vnode_name(vp) != NULL) {
4777 filestr = vnode_name(vp);
4778 } else {
4779 filestr = &emptystr;
4780 }
b0d623f7 4781#endif
55e303ae
A
4782 retval = ENOSPC;
4783 goto restore;
4784 }
4785 }
91447636
A
4786 /* Done with system locks and journal for now. */
4787 hfs_systemfile_unlock(hfsmp, lockflags);
4788 lockflags = 0;
4789 hfs_end_transaction(hfsmp);
4790 started_tr = 0;
4791
55e303ae
A
4792 if (retval) {
4793 /*
4794 * Check to see if failure is due to excessive fragmentation.
4795 */
91447636
A
4796 if ((retval == ENOSPC) &&
4797 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
55e303ae
A
4798 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
4799 }
4800 goto out;
4801 }
55e303ae 4802 /*
91447636 4803 * STEP 2 - clone file data into the new allocation blocks.
55e303ae
A
4804 */
4805
91447636 4806 if (vnodetype == VLNK)
39236c6e 4807 retval = EPERM;
91447636 4808 else if (vnode_issystem(vp))
55e303ae
A
4809 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
4810 else
91447636 4811 retval = hfs_clonefile(vp, headblks, datablks, blksize);
ccc36f2f 4812
91447636
A
4813 /* Start transaction for step 3 or for a restore. */
4814 if (hfs_start_transaction(hfsmp) != 0) {
4815 retval = EINVAL;
4816 goto out;
4817 }
4818 started_tr = 1;
55e303ae
A
4819 if (retval)
4820 goto restore;
55e303ae
A
4821
4822 /*
91447636 4823 * STEP 3 - switch to cloned data and remove old blocks.
55e303ae 4824 */
91447636
A
4825 lockflags = SFL_BITMAP;
4826 if (overflow_extents(fp))
4827 lockflags |= SFL_EXTENTS;
4828 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
55e303ae 4829
91447636 4830 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
55e303ae 4831
91447636
A
4832 hfs_systemfile_unlock(hfsmp, lockflags);
4833 lockflags = 0;
55e303ae
A
4834 if (retval)
4835 goto restore;
55e303ae 4836out:
91447636 4837 if (took_trunc_lock)
39236c6e 4838 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
55e303ae 4839
91447636
A
4840 if (lockflags) {
4841 hfs_systemfile_unlock(hfsmp, lockflags);
4842 lockflags = 0;
ccc36f2f
A
4843 }
4844
0c530ab8
A
4845 /* Push cnode's new extent data to disk. */
4846 if (retval == 0) {
4847 (void) hfs_update(vp, MNT_WAIT);
4848 }
55e303ae 4849 if (hfsmp->jnl) {
91447636 4850 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
55e303ae
A
4851 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
4852 else
4853 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
55e303ae 4854 }
91447636 4855exit:
91447636
A
4856 if (started_tr)
4857 hfs_end_transaction(hfsmp);
55e303ae
A
4858
4859 return (retval);
4860
4861restore:
2d21ac55
A
4862 if (fp->ff_blocks == headblks) {
4863 if (took_trunc_lock)
39236c6e 4864 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636 4865 goto exit;
2d21ac55 4866 }
55e303ae
A
4867 /*
4868 * Give back any newly allocated space.
4869 */
91447636
A
4870 if (lockflags == 0) {
4871 lockflags = SFL_BITMAP;
4872 if (overflow_extents(fp))
4873 lockflags |= SFL_EXTENTS;
4874 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
4875 }
4876
6d2010ae
A
4877 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, 0, FORK_IS_RSRC(fp),
4878 FTOC(fp)->c_fileid, false);
91447636
A
4879
4880 hfs_systemfile_unlock(hfsmp, lockflags);
4881 lockflags = 0;
4882
4883 if (took_trunc_lock)
39236c6e 4884 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
91447636 4885 goto exit;
55e303ae
A
4886}
4887
4888
55e303ae
A
4889/*
4890 * Clone a file's data within the file.
4891 *
4892 */
4893static int
91447636 4894hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
55e303ae
A
4895{
4896 caddr_t bufp;
55e303ae
A
4897 size_t bufsize;
4898 size_t copysize;
4899 size_t iosize;
55e303ae 4900 size_t offset;
b0d623f7 4901 off_t writebase;
91447636
A
4902 uio_t auio;
4903 int error = 0;
55e303ae 4904
55e303ae
A
4905 writebase = blkstart * blksize;
4906 copysize = blkcnt * blksize;
0c530ab8 4907 iosize = bufsize = MIN(copysize, 128 * 1024);
55e303ae
A
4908 offset = 0;
4909
6d2010ae
A
4910 hfs_unlock(VTOC(vp));
4911
4912#if CONFIG_PROTECT
316670eb 4913 if ((error = cp_handle_vnop(vp, CP_WRITE_ACCESS, 0)) != 0) {
39236c6e 4914 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
6d2010ae
A
4915 return (error);
4916 }
4917#endif /* CONFIG_PROTECT */
4918
55e303ae 4919 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
39236c6e 4920 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
55e303ae 4921 return (ENOMEM);
6d2010ae 4922 }
55e303ae 4923
b0d623f7 4924 auio = uio_create(1, 0, UIO_SYSSPACE, UIO_READ);
55e303ae
A
4925
4926 while (offset < copysize) {
4927 iosize = MIN(copysize - offset, iosize);
4928
b0d623f7 4929 uio_reset(auio, offset, UIO_SYSSPACE, UIO_READ);
91447636 4930 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 4931
2d21ac55 4932 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
55e303ae
A
4933 if (error) {
4934 printf("hfs_clonefile: cluster_read failed - %d\n", error);
4935 break;
4936 }
91447636 4937 if (uio_resid(auio) != 0) {
316670eb 4938 printf("hfs_clonefile: cluster_read: uio_resid = %lld\n", (int64_t)uio_resid(auio));
55e303ae
A
4939 error = EIO;
4940 break;
4941 }
4942
b0d623f7 4943 uio_reset(auio, writebase + offset, UIO_SYSSPACE, UIO_WRITE);
91447636 4944 uio_addiov(auio, (uintptr_t)bufp, iosize);
55e303ae 4945
b0d623f7
A
4946 error = cluster_write(vp, auio, writebase + offset,
4947 writebase + offset + iosize,
91447636 4948 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
55e303ae
A
4949 if (error) {
4950 printf("hfs_clonefile: cluster_write failed - %d\n", error);
4951 break;
4952 }
91447636 4953 if (uio_resid(auio) != 0) {
55e303ae
A
4954 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
4955 error = EIO;
4956 break;
4957 }
4958 offset += iosize;
4959 }
91447636
A
4960 uio_free(auio);
4961
b0d623f7
A
4962 if ((blksize & PAGE_MASK)) {
4963 /*
4964 * since the copy may not have started on a PAGE
4965 * boundary (or may not have ended on one), we
4966 * may have pages left in the cache since NOCACHE
4967 * will let partially written pages linger...
4968 * lets just flush the entire range to make sure
4969 * we don't have any pages left that are beyond
4970 * (or intersect) the real LEOF of this file
4971 */
4972 ubc_msync(vp, writebase, writebase + offset, NULL, UBC_INVALIDATE | UBC_PUSHDIRTY);
4973 } else {
4974 /*
4975 * No need to call ubc_sync_range or hfs_invalbuf
4976 * since the file was copied using IO_NOCACHE and
4977 * the copy was done starting and ending on a page
4978 * boundary in the file.
4979 */
4980 }
55e303ae 4981 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
91447636 4982
39236c6e 4983 hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
55e303ae
A
4984 return (error);
4985}
4986
4987/*
4988 * Clone a system (metadata) file.
4989 *
4990 */
4991static int
4992hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
91447636 4993 kauth_cred_t cred, struct proc *p)
55e303ae
A
4994{
4995 caddr_t bufp;
4996 char * offset;
4997 size_t bufsize;
4998 size_t iosize;
4999 struct buf *bp = NULL;
91447636
A
5000 daddr64_t blkno;
5001 daddr64_t blk;
5002 daddr64_t start_blk;
5003 daddr64_t last_blk;
55e303ae
A
5004 int breadcnt;
5005 int i;
5006 int error = 0;
5007
5008
5009 iosize = GetLogicalBlockSize(vp);
5010 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
5011 breadcnt = bufsize / iosize;
5012
5013 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
5014 return (ENOMEM);
5015 }
91447636
A
5016 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
5017 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
55e303ae
A
5018 blkno = 0;
5019
91447636 5020 while (blkno < last_blk) {
55e303ae
A
5021 /*
5022 * Read up to a megabyte
5023 */
5024 offset = bufp;
91447636
A
5025 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
5026 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
55e303ae
A
5027 if (error) {
5028 printf("hfs_clonesysfile: meta_bread error %d\n", error);
5029 goto out;
5030 }
91447636
A
5031 if (buf_count(bp) != iosize) {
5032 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
55e303ae
A
5033 goto out;
5034 }
91447636
A
5035 bcopy((char *)buf_dataptr(bp), offset, iosize);
5036
5037 buf_markinvalid(bp);
5038 buf_brelse(bp);
55e303ae 5039 bp = NULL;
91447636 5040
55e303ae
A
5041 offset += iosize;
5042 }
5043
5044 /*
5045 * Write up to a megabyte
5046 */
5047 offset = bufp;
91447636
A
5048 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
5049 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
55e303ae 5050 if (bp == NULL) {
91447636 5051 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
55e303ae
A
5052 error = EIO;
5053 goto out;
5054 }
91447636
A
5055 bcopy(offset, (char *)buf_dataptr(bp), iosize);
5056 error = (int)buf_bwrite(bp);
55e303ae
A
5057 bp = NULL;
5058 if (error)
5059 goto out;
5060 offset += iosize;
5061 }
5062 }
5063out:
5064 if (bp) {
91447636 5065 buf_brelse(bp);
55e303ae
A
5066 }
5067
5068 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
5069
91447636 5070 error = hfs_fsync(vp, MNT_WAIT, 0, p);
55e303ae
A
5071
5072 return (error);
5073}