]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-1228.9.59.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
42 #include <sys/stat.h>
43 #include <sys/buf.h>
44 #include <sys/proc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
48 #include <sys/uio.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
52 #include <sys/disk.h>
53 #include <sys/sysctl.h>
54
55 #include <miscfs/specfs/specdev.h>
56
57 #include <sys/ubc.h>
58 #include <sys/ubc_internal.h>
59
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
62
63 #include <sys/kdebug.h>
64
65 #include "hfs.h"
66 #include "hfs_attrlist.h"
67 #include "hfs_endian.h"
68 #include "hfs_fsctl.h"
69 #include "hfs_quota.h"
70 #include "hfscommon/headers/FileMgrInternal.h"
71 #include "hfscommon/headers/BTreesInternal.h"
72 #include "hfs_cnode.h"
73 #include "hfs_dbg.h"
74
75 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
76
77 enum {
78 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
79 };
80
81 /* from bsd/vfs/vfs_cluster.c */
82 extern int is_file_clean(vnode_t vp, off_t filesize);
83 /* from bsd/hfs/hfs_vfsops.c */
84 extern int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
85
86 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
87 static int hfs_clonefile(struct vnode *, int, int, int);
88 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
89
90 int flush_cache_on_write = 0;
91 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
92
93
94 /*
95 * Read data from a file.
96 */
97 int
98 hfs_vnop_read(struct vnop_read_args *ap)
99 {
100 uio_t uio = ap->a_uio;
101 struct vnode *vp = ap->a_vp;
102 struct cnode *cp;
103 struct filefork *fp;
104 struct hfsmount *hfsmp;
105 off_t filesize;
106 off_t filebytes;
107 off_t start_resid = uio_resid(uio);
108 off_t offset = uio_offset(uio);
109 int retval = 0;
110
111
112 /* Preflight checks */
113 if (!vnode_isreg(vp)) {
114 /* can only read regular files */
115 if (vnode_isdir(vp))
116 return (EISDIR);
117 else
118 return (EPERM);
119 }
120 if (start_resid == 0)
121 return (0); /* Nothing left to do */
122 if (offset < 0)
123 return (EINVAL); /* cant read from a negative offset */
124
125 cp = VTOC(vp);
126 fp = VTOF(vp);
127 hfsmp = VTOHFS(vp);
128
129 /* Protect against a size change. */
130 hfs_lock_truncate(cp, 0);
131
132 filesize = fp->ff_size;
133 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
134 if (offset > filesize) {
135 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
136 (offset > (off_t)MAXHFSFILESIZE)) {
137 retval = EFBIG;
138 }
139 goto exit;
140 }
141
142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
143 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
144
145 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
146
147 cp->c_touch_acctime = TRUE;
148
149 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
150 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
151
152 /*
153 * Keep track blocks read
154 */
155 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
156 int took_cnode_lock = 0;
157 off_t bytesread;
158
159 bytesread = start_resid - uio_resid(uio);
160
161 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
162 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
163 hfs_lock(cp, HFS_FORCE_LOCK);
164 took_cnode_lock = 1;
165 }
166 /*
167 * If this file hasn't been seen since the start of
168 * the current sampling period then start over.
169 */
170 if (cp->c_atime < hfsmp->hfc_timebase) {
171 struct timeval tv;
172
173 fp->ff_bytesread = bytesread;
174 microtime(&tv);
175 cp->c_atime = tv.tv_sec;
176 } else {
177 fp->ff_bytesread += bytesread;
178 }
179 if (took_cnode_lock)
180 hfs_unlock(cp);
181 }
182 exit:
183 hfs_unlock_truncate(cp, 0);
184 return (retval);
185 }
186
187 /*
188 * Write data to a file.
189 */
190 int
191 hfs_vnop_write(struct vnop_write_args *ap)
192 {
193 uio_t uio = ap->a_uio;
194 struct vnode *vp = ap->a_vp;
195 struct cnode *cp;
196 struct filefork *fp;
197 struct hfsmount *hfsmp;
198 kauth_cred_t cred = NULL;
199 off_t origFileSize;
200 off_t writelimit;
201 off_t bytesToAdd = 0;
202 off_t actualBytesAdded;
203 off_t filebytes;
204 off_t offset;
205 size_t resid;
206 int eflags;
207 int ioflag = ap->a_ioflag;
208 int retval = 0;
209 int lockflags;
210 int cnode_locked = 0;
211 int partialwrite = 0;
212 int exclusive_lock = 0;
213
214 // LP64todo - fix this! uio_resid may be 64-bit value
215 resid = uio_resid(uio);
216 offset = uio_offset(uio);
217
218 if (ioflag & IO_APPEND) {
219 exclusive_lock = 1;
220 }
221
222 if (offset < 0)
223 return (EINVAL);
224 if (resid == 0)
225 return (E_NONE);
226 if (!vnode_isreg(vp))
227 return (EPERM); /* Can only write regular files */
228
229 cp = VTOC(vp);
230 fp = VTOF(vp);
231 hfsmp = VTOHFS(vp);
232
233 eflags = kEFDeferMask; /* defer file block allocations */
234 #ifdef HFS_SPARSE_DEV
235 /*
236 * When the underlying device is sparse and space
237 * is low (< 8MB), stop doing delayed allocations
238 * and begin doing synchronous I/O.
239 */
240 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
241 (hfs_freeblks(hfsmp, 0) < 2048)) {
242 eflags &= ~kEFDeferMask;
243 ioflag |= IO_SYNC;
244 }
245 #endif /* HFS_SPARSE_DEV */
246
247 again:
248 /* Protect against a size change. */
249 hfs_lock_truncate(cp, exclusive_lock);
250
251 if (ioflag & IO_APPEND) {
252 uio_setoffset(uio, fp->ff_size);
253 offset = fp->ff_size;
254 }
255 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
256 retval = EPERM;
257 goto exit;
258 }
259
260 origFileSize = fp->ff_size;
261 writelimit = offset + resid;
262 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
263
264 /* If the truncate lock is shared, and if we either have virtual
265 * blocks or will need to extend the file, upgrade the truncate
266 * to exclusive lock. If upgrade fails, we lose the lock and
267 * have to get exclusive lock again
268 */
269 if ((exclusive_lock == 0) &&
270 ((fp->ff_unallocblocks != 0) || (writelimit > filebytes))) {
271 exclusive_lock = 1;
272 /* Lock upgrade failed and we lost our shared lock, try again */
273 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
274 goto again;
275 }
276 }
277
278 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
279 goto exit;
280 }
281 cnode_locked = 1;
282
283 if (!exclusive_lock) {
284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
285 (int)offset, uio_resid(uio), (int)fp->ff_size,
286 (int)filebytes, 0);
287 }
288
289 /* Check if we do not need to extend the file */
290 if (writelimit <= filebytes) {
291 goto sizeok;
292 }
293
294 cred = vfs_context_ucred(ap->a_context);
295 bytesToAdd = writelimit - filebytes;
296
297 #if QUOTA
298 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
299 cred, 0);
300 if (retval)
301 goto exit;
302 #endif /* QUOTA */
303
304 if (hfs_start_transaction(hfsmp) != 0) {
305 retval = EINVAL;
306 goto exit;
307 }
308
309 while (writelimit > filebytes) {
310 bytesToAdd = writelimit - filebytes;
311 if (cred && suser(cred, NULL) != 0)
312 eflags |= kEFReserveMask;
313
314 /* Protect extents b-tree and allocation bitmap */
315 lockflags = SFL_BITMAP;
316 if (overflow_extents(fp))
317 lockflags |= SFL_EXTENTS;
318 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
319
320 /* Files that are changing size are not hot file candidates. */
321 if (hfsmp->hfc_stage == HFC_RECORDING) {
322 fp->ff_bytesread = 0;
323 }
324 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
325 0, eflags, &actualBytesAdded));
326
327 hfs_systemfile_unlock(hfsmp, lockflags);
328
329 if ((actualBytesAdded == 0) && (retval == E_NONE))
330 retval = ENOSPC;
331 if (retval != E_NONE)
332 break;
333 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
334 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
335 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
336 }
337 (void) hfs_update(vp, TRUE);
338 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
339 (void) hfs_end_transaction(hfsmp);
340
341 /*
342 * If we didn't grow the file enough try a partial write.
343 * POSIX expects this behavior.
344 */
345 if ((retval == ENOSPC) && (filebytes > offset)) {
346 retval = 0;
347 partialwrite = 1;
348 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
349 resid -= bytesToAdd;
350 writelimit = filebytes;
351 }
352 sizeok:
353 if (retval == E_NONE) {
354 off_t filesize;
355 off_t zero_off;
356 off_t tail_off;
357 off_t inval_start;
358 off_t inval_end;
359 off_t io_start;
360 int lflag;
361 struct rl_entry *invalid_range;
362
363 if (writelimit > fp->ff_size)
364 filesize = writelimit;
365 else
366 filesize = fp->ff_size;
367
368 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
369
370 if (offset <= fp->ff_size) {
371 zero_off = offset & ~PAGE_MASK_64;
372
373 /* Check to see whether the area between the zero_offset and the start
374 of the transfer to see whether is invalid and should be zero-filled
375 as part of the transfer:
376 */
377 if (offset > zero_off) {
378 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
379 lflag |= IO_HEADZEROFILL;
380 }
381 } else {
382 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
383
384 /* The bytes between fp->ff_size and uio->uio_offset must never be
385 read without being zeroed. The current last block is filled with zeroes
386 if it holds valid data but in all cases merely do a little bookkeeping
387 to track the area from the end of the current last page to the start of
388 the area actually written. For the same reason only the bytes up to the
389 start of the page where this write will start is invalidated; any remainder
390 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
391
392 Note that inval_start, the start of the page after the current EOF,
393 may be past the start of the write, in which case the zeroing
394 will be handled by the cluser_write of the actual data.
395 */
396 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
397 inval_end = offset & ~PAGE_MASK_64;
398 zero_off = fp->ff_size;
399
400 if ((fp->ff_size & PAGE_MASK_64) &&
401 (rl_scan(&fp->ff_invalidranges,
402 eof_page_base,
403 fp->ff_size - 1,
404 &invalid_range) != RL_NOOVERLAP)) {
405 /* The page containing the EOF is not valid, so the
406 entire page must be made inaccessible now. If the write
407 starts on a page beyond the page containing the eof
408 (inval_end > eof_page_base), add the
409 whole page to the range to be invalidated. Otherwise
410 (i.e. if the write starts on the same page), zero-fill
411 the entire page explicitly now:
412 */
413 if (inval_end > eof_page_base) {
414 inval_start = eof_page_base;
415 } else {
416 zero_off = eof_page_base;
417 };
418 };
419
420 if (inval_start < inval_end) {
421 struct timeval tv;
422 /* There's some range of data that's going to be marked invalid */
423
424 if (zero_off < inval_start) {
425 /* The pages between inval_start and inval_end are going to be invalidated,
426 and the actual write will start on a page past inval_end. Now's the last
427 chance to zero-fill the page containing the EOF:
428 */
429 hfs_unlock(cp);
430 cnode_locked = 0;
431 retval = cluster_write(vp, (uio_t) 0,
432 fp->ff_size, inval_start,
433 zero_off, (off_t)0,
434 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
435 hfs_lock(cp, HFS_FORCE_LOCK);
436 cnode_locked = 1;
437 if (retval) goto ioerr_exit;
438 offset = uio_offset(uio);
439 };
440
441 /* Mark the remaining area of the newly allocated space as invalid: */
442 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
443 microuptime(&tv);
444 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
445 zero_off = fp->ff_size = inval_end;
446 };
447
448 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
449 };
450
451 /* Check to see whether the area between the end of the write and the end of
452 the page it falls in is invalid and should be zero-filled as part of the transfer:
453 */
454 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
455 if (tail_off > filesize) tail_off = filesize;
456 if (tail_off > writelimit) {
457 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
458 lflag |= IO_TAILZEROFILL;
459 };
460 };
461
462 /*
463 * if the write starts beyond the current EOF (possibly advanced in the
464 * zeroing of the last block, above), then we'll zero fill from the current EOF
465 * to where the write begins:
466 *
467 * NOTE: If (and ONLY if) the portion of the file about to be written is
468 * before the current EOF it might be marked as invalid now and must be
469 * made readable (removed from the invalid ranges) before cluster_write
470 * tries to write it:
471 */
472 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
473 if (io_start < fp->ff_size) {
474 off_t io_end;
475
476 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
477 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
478 };
479
480 hfs_unlock(cp);
481 cnode_locked = 0;
482
483 /*
484 * We need to tell UBC the fork's new size BEFORE calling
485 * cluster_write, in case any of the new pages need to be
486 * paged out before cluster_write completes (which does happen
487 * in embedded systems due to extreme memory pressure).
488 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
489 * will be, so that it can pass that on to cluster_pageout, and
490 * allow those pageouts.
491 *
492 * We don't update ff_size yet since we don't want pageins to
493 * be able to see uninitialized data between the old and new
494 * EOF, until cluster_write has completed and initialized that
495 * part of the file.
496 *
497 * The vnode pager relies on the file size last given to UBC via
498 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
499 * ff_size (whichever is larger). NOTE: ff_new_size is always
500 * zero, unless we are extending the file via write.
501 */
502 if (filesize > fp->ff_size) {
503 fp->ff_new_size = filesize;
504 ubc_setsize(vp, filesize);
505 }
506 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
507 tail_off, lflag | IO_NOZERODIRTY);
508 if (retval) {
509 fp->ff_new_size = 0; /* no longer extending; use ff_size */
510 if (filesize > origFileSize) {
511 ubc_setsize(vp, origFileSize);
512 }
513 goto ioerr_exit;
514 }
515
516 if (filesize > origFileSize) {
517 fp->ff_size = filesize;
518
519 /* Files that are changing size are not hot file candidates. */
520 if (hfsmp->hfc_stage == HFC_RECORDING) {
521 fp->ff_bytesread = 0;
522 }
523 }
524 fp->ff_new_size = 0; /* ff_size now has the correct size */
525
526 /* If we wrote some bytes, then touch the change and mod times */
527 if (resid > uio_resid(uio)) {
528 cp->c_touch_chgtime = TRUE;
529 cp->c_touch_modtime = TRUE;
530 }
531 }
532 if (partialwrite) {
533 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
534 resid += bytesToAdd;
535 }
536
537 // XXXdbg - see radar 4871353 for more info
538 {
539 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
540 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
541 }
542 }
543 HFS_KNOTE(vp, NOTE_WRITE);
544
545 ioerr_exit:
546 /*
547 * If we successfully wrote any data, and we are not the superuser
548 * we clear the setuid and setgid bits as a precaution against
549 * tampering.
550 */
551 if (cp->c_mode & (S_ISUID | S_ISGID)) {
552 cred = vfs_context_ucred(ap->a_context);
553 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
554 if (!cnode_locked) {
555 hfs_lock(cp, HFS_FORCE_LOCK);
556 cnode_locked = 1;
557 }
558 cp->c_mode &= ~(S_ISUID | S_ISGID);
559 }
560 }
561 if (retval) {
562 if (ioflag & IO_UNIT) {
563 if (!cnode_locked) {
564 hfs_lock(cp, HFS_FORCE_LOCK);
565 cnode_locked = 1;
566 }
567 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
568 0, ap->a_context);
569 // LP64todo - fix this! resid needs to by user_ssize_t
570 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
571 uio_setresid(uio, resid);
572 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
573 }
574 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
575 if (!cnode_locked) {
576 hfs_lock(cp, HFS_FORCE_LOCK);
577 cnode_locked = 1;
578 }
579 retval = hfs_update(vp, TRUE);
580 }
581 /* Updating vcbWrCnt doesn't need to be atomic. */
582 hfsmp->vcbWrCnt++;
583
584 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
585 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
586 exit:
587 if (cnode_locked)
588 hfs_unlock(cp);
589 hfs_unlock_truncate(cp, exclusive_lock);
590 return (retval);
591 }
592
593 /* support for the "bulk-access" fcntl */
594
595 #define CACHE_LEVELS 16
596 #define NUM_CACHE_ENTRIES (64*16)
597 #define PARENT_IDS_FLAG 0x100
598
599 struct access_cache {
600 int numcached;
601 int cachehits; /* these two for statistics gathering */
602 int lookups;
603 unsigned int *acache;
604 unsigned char *haveaccess;
605 };
606
607 struct access_t {
608 uid_t uid; /* IN: effective user id */
609 short flags; /* IN: access requested (i.e. R_OK) */
610 short num_groups; /* IN: number of groups user belongs to */
611 int num_files; /* IN: number of files to process */
612 int *file_ids; /* IN: array of file ids */
613 gid_t *groups; /* IN: array of groups */
614 short *access; /* OUT: access info for each file (0 for 'has access') */
615 };
616
617 struct user_access_t {
618 uid_t uid; /* IN: effective user id */
619 short flags; /* IN: access requested (i.e. R_OK) */
620 short num_groups; /* IN: number of groups user belongs to */
621 int num_files; /* IN: number of files to process */
622 user_addr_t file_ids; /* IN: array of file ids */
623 user_addr_t groups; /* IN: array of groups */
624 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
625 };
626
627
628 // these are the "extended" versions of the above structures
629 // note that it is crucial that they be different sized than
630 // the regular version
631 struct ext_access_t {
632 uint32_t flags; /* IN: access requested (i.e. R_OK) */
633 uint32_t num_files; /* IN: number of files to process */
634 uint32_t map_size; /* IN: size of the bit map */
635 uint32_t *file_ids; /* IN: Array of file ids */
636 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
637 short *access; /* OUT: access info for each file (0 for 'has access') */
638 uint32_t num_parents; /* future use */
639 cnid_t *parents; /* future use */
640 };
641
642 struct ext_user_access_t {
643 uint32_t flags; /* IN: access requested (i.e. R_OK) */
644 uint32_t num_files; /* IN: number of files to process */
645 uint32_t map_size; /* IN: size of the bit map */
646 user_addr_t file_ids; /* IN: array of file ids */
647 user_addr_t bitmap; /* IN: array of groups */
648 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
649 uint32_t num_parents;/* future use */
650 user_addr_t parents;/* future use */
651 };
652
653
654 /*
655 * Perform a binary search for the given parent_id. Return value is
656 * the index if there is a match. If no_match_indexp is non-NULL it
657 * will be assigned with the index to insert the item (even if it was
658 * not found).
659 */
660 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
661 {
662 int index=-1;
663 unsigned int lo=0;
664
665 do {
666 unsigned int mid = ((hi - lo)/2) + lo;
667 unsigned int this_id = array[mid];
668
669 if (parent_id == this_id) {
670 hi = mid;
671 break;
672 }
673
674 if (parent_id < this_id) {
675 hi = mid;
676 continue;
677 }
678
679 if (parent_id > this_id) {
680 lo = mid + 1;
681 continue;
682 }
683 } while(lo < hi);
684
685 /* check if lo and hi converged on the match */
686 if (parent_id == array[hi]) {
687 index = hi;
688 }
689
690 if (no_match_indexp) {
691 *no_match_indexp = hi;
692 }
693
694 return index;
695 }
696
697
698 static int
699 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
700 {
701 unsigned int hi;
702 int matches = 0;
703 int index, no_match_index;
704
705 if (cache->numcached == 0) {
706 *indexp = 0;
707 return 0; // table is empty, so insert at index=0 and report no match
708 }
709
710 if (cache->numcached > NUM_CACHE_ENTRIES) {
711 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
712 cache->numcached, NUM_CACHE_ENTRIES);*/
713 cache->numcached = NUM_CACHE_ENTRIES;
714 }
715
716 hi = cache->numcached - 1;
717
718 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
719
720 /* if no existing entry found, find index for new one */
721 if (index == -1) {
722 index = no_match_index;
723 matches = 0;
724 } else {
725 matches = 1;
726 }
727
728 *indexp = index;
729 return matches;
730 }
731
732 /*
733 * Add a node to the access_cache at the given index (or do a lookup first
734 * to find the index if -1 is passed in). We currently do a replace rather
735 * than an insert if the cache is full.
736 */
737 static void
738 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
739 {
740 int lookup_index = -1;
741
742 /* need to do a lookup first if -1 passed for index */
743 if (index == -1) {
744 if (lookup_bucket(cache, &lookup_index, nodeID)) {
745 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
746 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
747 cache->haveaccess[lookup_index] = access;
748 }
749
750 /* mission accomplished */
751 return;
752 } else {
753 index = lookup_index;
754 }
755
756 }
757
758 /* if the cache is full, do a replace rather than an insert */
759 if (cache->numcached >= NUM_CACHE_ENTRIES) {
760 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
761 cache->numcached = NUM_CACHE_ENTRIES-1;
762
763 if (index > cache->numcached) {
764 // printf("index %d pinned to %d\n", index, cache->numcached);
765 index = cache->numcached;
766 }
767 }
768
769 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
770 index++;
771 }
772
773 if (index >= 0 && index < cache->numcached) {
774 /* only do bcopy if we're inserting */
775 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
776 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
777 }
778
779 cache->acache[index] = nodeID;
780 cache->haveaccess[index] = access;
781 cache->numcached++;
782 }
783
784
785 struct cinfo {
786 uid_t uid;
787 gid_t gid;
788 mode_t mode;
789 cnid_t parentcnid;
790 u_int16_t recflags;
791 };
792
793 static int
794 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
795 {
796 struct cinfo *cip = (struct cinfo *)arg;
797
798 cip->uid = attrp->ca_uid;
799 cip->gid = attrp->ca_gid;
800 cip->mode = attrp->ca_mode;
801 cip->parentcnid = descp->cd_parentcnid;
802 cip->recflags = attrp->ca_recflags;
803
804 return (0);
805 }
806
807 /*
808 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
809 * isn't incore, then go to the catalog.
810 */
811 static int
812 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
813 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
814 {
815 int error = 0;
816
817 /* if this id matches the one the fsctl was called with, skip the lookup */
818 if (cnid == skip_cp->c_cnid) {
819 cnattrp->ca_uid = skip_cp->c_uid;
820 cnattrp->ca_gid = skip_cp->c_gid;
821 cnattrp->ca_mode = skip_cp->c_mode;
822 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
823 } else {
824 struct cinfo c_info;
825
826 /* otherwise, check the cnode hash incase the file/dir is incore */
827 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
828 cnattrp->ca_uid = c_info.uid;
829 cnattrp->ca_gid = c_info.gid;
830 cnattrp->ca_mode = c_info.mode;
831 cnattrp->ca_recflags = c_info.recflags;
832 keyp->hfsPlus.parentID = c_info.parentcnid;
833 } else {
834 int lockflags;
835
836 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
837
838 /* lookup this cnid in the catalog */
839 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
840
841 hfs_systemfile_unlock(hfsmp, lockflags);
842
843 cache->lookups++;
844 }
845 }
846
847 return (error);
848 }
849
850
851 /*
852 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
853 * up to CACHE_LEVELS as we progress towards the root.
854 */
855 static int
856 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
857 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev,
858 struct vfs_context *my_context,
859 char *bitmap,
860 uint32_t map_size,
861 cnid_t* parents,
862 uint32_t num_parents)
863 {
864 int myErr = 0;
865 int myResult;
866 HFSCatalogNodeID thisNodeID;
867 unsigned int myPerms;
868 struct cat_attr cnattr;
869 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
870 CatalogKey catkey;
871
872 int i = 0, ids_to_cache = 0;
873 int parent_ids[CACHE_LEVELS];
874
875 thisNodeID = nodeID;
876 while (thisNodeID >= kRootDirID) {
877 myResult = 0; /* default to "no access" */
878
879 /* check the cache before resorting to hitting the catalog */
880
881 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
882 * to look any further after hitting cached dir */
883
884 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
885 cache->cachehits++;
886 myErr = cache->haveaccess[cache_index];
887 if (scope_index != -1) {
888 if (myErr == ESRCH) {
889 myErr = 0;
890 }
891 } else {
892 scope_index = 0; // so we'll just use the cache result
893 scope_idx_start = ids_to_cache;
894 }
895 myResult = (myErr == 0) ? 1 : 0;
896 goto ExitThisRoutine;
897 }
898
899
900 if (parents) {
901 int tmp;
902 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
903 if (scope_index == -1)
904 scope_index = tmp;
905 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
906 scope_idx_start = ids_to_cache;
907 }
908 }
909
910 /* remember which parents we want to cache */
911 if (ids_to_cache < CACHE_LEVELS) {
912 parent_ids[ids_to_cache] = thisNodeID;
913 ids_to_cache++;
914 }
915 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
916 if (bitmap && map_size) {
917 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
918 }
919
920
921 /* do the lookup (checks the cnode hash, then the catalog) */
922 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr);
923 if (myErr) {
924 goto ExitThisRoutine; /* no access */
925 }
926
927 /* Root always gets access. */
928 if (suser(myp_ucred, NULL) == 0) {
929 thisNodeID = catkey.hfsPlus.parentID;
930 myResult = 1;
931 continue;
932 }
933
934 // if the thing has acl's, do the full permission check
935 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
936 struct vnode *vp;
937
938 /* get the vnode for this cnid */
939 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
940 if ( myErr ) {
941 myResult = 0;
942 goto ExitThisRoutine;
943 }
944
945 thisNodeID = VTOC(vp)->c_parentcnid;
946
947 hfs_unlock(VTOC(vp));
948
949 if (vnode_vtype(vp) == VDIR) {
950 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
951 } else {
952 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
953 }
954
955 vnode_put(vp);
956 if (myErr) {
957 myResult = 0;
958 goto ExitThisRoutine;
959 }
960 } else {
961 unsigned int flags;
962
963 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
964 cnattr.ca_mode, hfsmp->hfs_mp,
965 myp_ucred, theProcPtr);
966
967 if (cnattr.ca_mode & S_IFDIR) {
968 flags = R_OK | X_OK;
969 } else {
970 flags = R_OK;
971 }
972 if ( (myPerms & flags) != flags) {
973 myResult = 0;
974 myErr = EACCES;
975 goto ExitThisRoutine; /* no access */
976 }
977
978 /* up the hierarchy we go */
979 thisNodeID = catkey.hfsPlus.parentID;
980 }
981 }
982
983 /* if here, we have access to this node */
984 myResult = 1;
985
986 ExitThisRoutine:
987 if (parents && myErr == 0 && scope_index == -1) {
988 myErr = ESRCH;
989 }
990
991 if (myErr) {
992 myResult = 0;
993 }
994 *err = myErr;
995
996 /* cache the parent directory(ies) */
997 for (i = 0; i < ids_to_cache; i++) {
998 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
999 add_node(cache, -1, parent_ids[i], ESRCH);
1000 } else {
1001 add_node(cache, -1, parent_ids[i], myErr);
1002 }
1003 }
1004
1005 return (myResult);
1006 }
1007
1008 static int
1009 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1010 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1011 {
1012 boolean_t is64bit;
1013
1014 /*
1015 * NOTE: on entry, the vnode is locked. Incase this vnode
1016 * happens to be in our list of file_ids, we'll note it
1017 * avoid calling hfs_chashget_nowait() on that id as that
1018 * will cause a "locking against myself" panic.
1019 */
1020 Boolean check_leaf = true;
1021
1022 struct ext_user_access_t *user_access_structp;
1023 struct ext_user_access_t tmp_user_access;
1024 struct access_cache cache;
1025
1026 int error = 0;
1027 unsigned int i;
1028
1029 dev_t dev = VTOC(vp)->c_dev;
1030
1031 short flags;
1032 unsigned int num_files = 0;
1033 int map_size = 0;
1034 int num_parents = 0;
1035 int *file_ids=NULL;
1036 short *access=NULL;
1037 char *bitmap=NULL;
1038 cnid_t *parents=NULL;
1039 int leaf_index;
1040
1041 cnid_t cnid;
1042 cnid_t prevParent_cnid = 0;
1043 unsigned int myPerms;
1044 short myaccess = 0;
1045 struct cat_attr cnattr;
1046 CatalogKey catkey;
1047 struct cnode *skip_cp = VTOC(vp);
1048 kauth_cred_t cred = vfs_context_ucred(context);
1049 proc_t p = vfs_context_proc(context);
1050
1051 is64bit = proc_is64bit(p);
1052
1053 /* initialize the local cache and buffers */
1054 cache.numcached = 0;
1055 cache.cachehits = 0;
1056 cache.lookups = 0;
1057 cache.acache = NULL;
1058 cache.haveaccess = NULL;
1059
1060 /* struct copyin done during dispatch... need to copy file_id array separately */
1061 if (ap->a_data == NULL) {
1062 error = EINVAL;
1063 goto err_exit_bulk_access;
1064 }
1065
1066 if (is64bit) {
1067 if (arg_size != sizeof(struct ext_user_access_t)) {
1068 error = EINVAL;
1069 goto err_exit_bulk_access;
1070 }
1071
1072 user_access_structp = (struct ext_user_access_t *)ap->a_data;
1073
1074 } else if (arg_size == sizeof(struct access_t)) {
1075 struct access_t *accessp = (struct access_t *)ap->a_data;
1076
1077 // convert an old style bulk-access struct to the new style
1078 tmp_user_access.flags = accessp->flags;
1079 tmp_user_access.num_files = accessp->num_files;
1080 tmp_user_access.map_size = 0;
1081 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1082 tmp_user_access.bitmap = USER_ADDR_NULL;
1083 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1084 tmp_user_access.num_parents = 0;
1085 user_access_structp = &tmp_user_access;
1086
1087 } else if (arg_size == sizeof(struct ext_access_t)) {
1088 struct ext_access_t *accessp = (struct ext_access_t *)ap->a_data;
1089
1090 // up-cast from a 32-bit version of the struct
1091 tmp_user_access.flags = accessp->flags;
1092 tmp_user_access.num_files = accessp->num_files;
1093 tmp_user_access.map_size = accessp->map_size;
1094 tmp_user_access.num_parents = accessp->num_parents;
1095
1096 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1097 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1098 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1099 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1100
1101 user_access_structp = &tmp_user_access;
1102 } else {
1103 error = EINVAL;
1104 goto err_exit_bulk_access;
1105 }
1106
1107 map_size = user_access_structp->map_size;
1108
1109 num_files = user_access_structp->num_files;
1110
1111 num_parents= user_access_structp->num_parents;
1112
1113 if (num_files < 1) {
1114 goto err_exit_bulk_access;
1115 }
1116 if (num_files > 1024) {
1117 error = EINVAL;
1118 goto err_exit_bulk_access;
1119 }
1120
1121 if (num_parents > 1024) {
1122 error = EINVAL;
1123 goto err_exit_bulk_access;
1124 }
1125
1126 file_ids = (int *) kalloc(sizeof(int) * num_files);
1127 access = (short *) kalloc(sizeof(short) * num_files);
1128 if (map_size) {
1129 bitmap = (char *) kalloc(sizeof(char) * map_size);
1130 }
1131
1132 if (num_parents) {
1133 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1134 }
1135
1136 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1137 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1138
1139 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1140 if (file_ids) {
1141 kfree(file_ids, sizeof(int) * num_files);
1142 }
1143 if (bitmap) {
1144 kfree(bitmap, sizeof(char) * map_size);
1145 }
1146 if (access) {
1147 kfree(access, sizeof(short) * num_files);
1148 }
1149 if (cache.acache) {
1150 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1151 }
1152 if (cache.haveaccess) {
1153 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1154 }
1155 if (parents) {
1156 kfree(parents, sizeof(cnid_t) * num_parents);
1157 }
1158 return ENOMEM;
1159 }
1160
1161 // make sure the bitmap is zero'ed out...
1162 if (bitmap) {
1163 bzero(bitmap, (sizeof(char) * map_size));
1164 }
1165
1166 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1167 num_files * sizeof(int)))) {
1168 goto err_exit_bulk_access;
1169 }
1170
1171 if (num_parents) {
1172 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1173 num_parents * sizeof(cnid_t)))) {
1174 goto err_exit_bulk_access;
1175 }
1176 }
1177
1178 flags = user_access_structp->flags;
1179 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1180 flags = R_OK;
1181 }
1182
1183 /* check if we've been passed leaf node ids or parent ids */
1184 if (flags & PARENT_IDS_FLAG) {
1185 check_leaf = false;
1186 }
1187
1188 /* Check access to each file_id passed in */
1189 for (i = 0; i < num_files; i++) {
1190 leaf_index=-1;
1191 cnid = (cnid_t) file_ids[i];
1192
1193 /* root always has access */
1194 if ((!parents) && (!suser(cred, NULL))) {
1195 access[i] = 0;
1196 continue;
1197 }
1198
1199 if (check_leaf) {
1200 /* do the lookup (checks the cnode hash, then the catalog) */
1201 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr);
1202 if (error) {
1203 access[i] = (short) error;
1204 continue;
1205 }
1206
1207 if (parents) {
1208 // Check if the leaf matches one of the parent scopes
1209 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1210 }
1211
1212 // if the thing has acl's, do the full permission check
1213 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1214 struct vnode *cvp;
1215 int myErr = 0;
1216 /* get the vnode for this cnid */
1217 myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
1218 if ( myErr ) {
1219 access[i] = myErr;
1220 continue;
1221 }
1222
1223 hfs_unlock(VTOC(cvp));
1224
1225 if (vnode_vtype(cvp) == VDIR) {
1226 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1227 } else {
1228 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1229 }
1230
1231 vnode_put(cvp);
1232 if (myErr) {
1233 access[i] = myErr;
1234 continue;
1235 }
1236 } else {
1237 /* before calling CheckAccess(), check the target file for read access */
1238 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1239 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1240
1241 /* fail fast if no access */
1242 if ((myPerms & flags) == 0) {
1243 access[i] = EACCES;
1244 continue;
1245 }
1246 }
1247 } else {
1248 /* we were passed an array of parent ids */
1249 catkey.hfsPlus.parentID = cnid;
1250 }
1251
1252 /* if the last guy had the same parent and had access, we're done */
1253 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1254 cache.cachehits++;
1255 access[i] = 0;
1256 continue;
1257 }
1258
1259 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1260 skip_cp, p, cred, dev, context,bitmap, map_size, parents, num_parents);
1261
1262 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1263 access[i] = 0; // have access.. no errors to report
1264 } else {
1265 access[i] = (error != 0 ? (short) error : EACCES);
1266 }
1267
1268 prevParent_cnid = catkey.hfsPlus.parentID;
1269 }
1270
1271 /* copyout the access array */
1272 if ((error = copyout((caddr_t)access, user_access_structp->access,
1273 num_files * sizeof (short)))) {
1274 goto err_exit_bulk_access;
1275 }
1276 if (map_size && bitmap) {
1277 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1278 map_size * sizeof (char)))) {
1279 goto err_exit_bulk_access;
1280 }
1281 }
1282
1283
1284 err_exit_bulk_access:
1285
1286 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1287
1288 if (file_ids)
1289 kfree(file_ids, sizeof(int) * num_files);
1290 if (parents)
1291 kfree(parents, sizeof(cnid_t) * num_parents);
1292 if (bitmap)
1293 kfree(bitmap, sizeof(char) * map_size);
1294 if (access)
1295 kfree(access, sizeof(short) * num_files);
1296 if (cache.acache)
1297 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1298 if (cache.haveaccess)
1299 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1300
1301 return (error);
1302 }
1303
1304
1305 /* end "bulk-access" support */
1306
1307
1308 /*
1309 * Callback for use with freeze ioctl.
1310 */
1311 static int
1312 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
1313 {
1314 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1315
1316 return 0;
1317 }
1318
1319 /*
1320 * Control filesystem operating characteristics.
1321 */
1322 int
1323 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1324 vnode_t a_vp;
1325 int a_command;
1326 caddr_t a_data;
1327 int a_fflag;
1328 vfs_context_t a_context;
1329 } */ *ap)
1330 {
1331 struct vnode * vp = ap->a_vp;
1332 struct hfsmount *hfsmp = VTOHFS(vp);
1333 vfs_context_t context = ap->a_context;
1334 kauth_cred_t cred = vfs_context_ucred(context);
1335 proc_t p = vfs_context_proc(context);
1336 struct vfsstatfs *vfsp;
1337 boolean_t is64bit;
1338
1339 is64bit = proc_is64bit(p);
1340
1341 switch (ap->a_command) {
1342
1343 case HFS_GETPATH:
1344 {
1345 struct vnode *file_vp;
1346 cnid_t cnid;
1347 int outlen;
1348 char *bufptr;
1349 int error;
1350
1351 /* Caller must be owner of file system. */
1352 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1353 if (suser(cred, NULL) &&
1354 kauth_cred_getuid(cred) != vfsp->f_owner) {
1355 return (EACCES);
1356 }
1357 /* Target vnode must be file system's root. */
1358 if (!vnode_isvroot(vp)) {
1359 return (EINVAL);
1360 }
1361 bufptr = (char *)ap->a_data;
1362 cnid = strtoul(bufptr, NULL, 10);
1363
1364 /* We need to call hfs_vfs_vget to leverage the code that will fix the
1365 * origin list for us if needed, as opposed to calling hfs_vget, since
1366 * we will need it for the subsequent build_path call.
1367 */
1368 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1369 return (error);
1370 }
1371 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1372 vnode_put(file_vp);
1373
1374 return (error);
1375 }
1376
1377 case HFS_PREV_LINK:
1378 case HFS_NEXT_LINK:
1379 {
1380 cnid_t linkfileid;
1381 cnid_t nextlinkid;
1382 cnid_t prevlinkid;
1383 int error;
1384
1385 /* Caller must be owner of file system. */
1386 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1387 if (suser(cred, NULL) &&
1388 kauth_cred_getuid(cred) != vfsp->f_owner) {
1389 return (EACCES);
1390 }
1391 /* Target vnode must be file system's root. */
1392 if (!vnode_isvroot(vp)) {
1393 return (EINVAL);
1394 }
1395 linkfileid = *(cnid_t *)ap->a_data;
1396 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1397 return (EINVAL);
1398 }
1399 if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1400 return (error);
1401 }
1402 if (ap->a_command == HFS_NEXT_LINK) {
1403 *(cnid_t *)ap->a_data = nextlinkid;
1404 } else {
1405 *(cnid_t *)ap->a_data = prevlinkid;
1406 }
1407 return (0);
1408 }
1409
1410 case HFS_RESIZE_PROGRESS: {
1411
1412 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1413 if (suser(cred, NULL) &&
1414 kauth_cred_getuid(cred) != vfsp->f_owner) {
1415 return (EACCES); /* must be owner of file system */
1416 }
1417 if (!vnode_isvroot(vp)) {
1418 return (EINVAL);
1419 }
1420 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1421 }
1422
1423 case HFS_RESIZE_VOLUME: {
1424 u_int64_t newsize;
1425 u_int64_t cursize;
1426
1427 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1428 if (suser(cred, NULL) &&
1429 kauth_cred_getuid(cred) != vfsp->f_owner) {
1430 return (EACCES); /* must be owner of file system */
1431 }
1432 if (!vnode_isvroot(vp)) {
1433 return (EINVAL);
1434 }
1435 newsize = *(u_int64_t *)ap->a_data;
1436 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1437
1438 if (newsize > cursize) {
1439 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1440 } else if (newsize < cursize) {
1441 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1442 } else {
1443 return (0);
1444 }
1445 }
1446 case HFS_CHANGE_NEXT_ALLOCATION: {
1447 int error = 0; /* Assume success */
1448 u_int32_t location;
1449
1450 if (vnode_vfsisrdonly(vp)) {
1451 return (EROFS);
1452 }
1453 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1454 if (suser(cred, NULL) &&
1455 kauth_cred_getuid(cred) != vfsp->f_owner) {
1456 return (EACCES); /* must be owner of file system */
1457 }
1458 if (!vnode_isvroot(vp)) {
1459 return (EINVAL);
1460 }
1461 HFS_MOUNT_LOCK(hfsmp, TRUE);
1462 location = *(u_int32_t *)ap->a_data;
1463 if ((location >= hfsmp->allocLimit) &&
1464 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1465 error = EINVAL;
1466 goto fail_change_next_allocation;
1467 }
1468 /* Return previous value. */
1469 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1470 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1471 /* On magic value for location, set nextAllocation to next block
1472 * after metadata zone and set flag in mount structure to indicate
1473 * that nextAllocation should not be updated again.
1474 */
1475 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1476 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1477 } else {
1478 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1479 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1480 }
1481 MarkVCBDirty(hfsmp);
1482 fail_change_next_allocation:
1483 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1484 return (error);
1485 }
1486
1487 #ifdef HFS_SPARSE_DEV
1488 case HFS_SETBACKINGSTOREINFO: {
1489 struct vnode * bsfs_rootvp;
1490 struct vnode * di_vp;
1491 struct hfs_backingstoreinfo *bsdata;
1492 int error = 0;
1493
1494 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1495 return (EALREADY);
1496 }
1497 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1498 if (suser(cred, NULL) &&
1499 kauth_cred_getuid(cred) != vfsp->f_owner) {
1500 return (EACCES); /* must be owner of file system */
1501 }
1502 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1503 if (bsdata == NULL) {
1504 return (EINVAL);
1505 }
1506 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1507 return (error);
1508 }
1509 if ((error = vnode_getwithref(di_vp))) {
1510 file_drop(bsdata->backingfd);
1511 return(error);
1512 }
1513
1514 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1515 (void)vnode_put(di_vp);
1516 file_drop(bsdata->backingfd);
1517 return (EINVAL);
1518 }
1519
1520 /*
1521 * Obtain the backing fs root vnode and keep a reference
1522 * on it. This reference will be dropped in hfs_unmount.
1523 */
1524 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
1525 if (error) {
1526 (void)vnode_put(di_vp);
1527 file_drop(bsdata->backingfd);
1528 return (error);
1529 }
1530 vnode_ref(bsfs_rootvp);
1531 vnode_put(bsfs_rootvp);
1532
1533 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1534 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1535 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1536 hfsmp->hfs_sparsebandblks *= 4;
1537
1538 vfs_markdependency(hfsmp->hfs_mp);
1539
1540 (void)vnode_put(di_vp);
1541 file_drop(bsdata->backingfd);
1542 return (0);
1543 }
1544 case HFS_CLRBACKINGSTOREINFO: {
1545 struct vnode * tmpvp;
1546
1547 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1548 if (suser(cred, NULL) &&
1549 kauth_cred_getuid(cred) != vfsp->f_owner) {
1550 return (EACCES); /* must be owner of file system */
1551 }
1552 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1553 hfsmp->hfs_backingfs_rootvp) {
1554
1555 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1556 tmpvp = hfsmp->hfs_backingfs_rootvp;
1557 hfsmp->hfs_backingfs_rootvp = NULLVP;
1558 hfsmp->hfs_sparsebandblks = 0;
1559 vnode_rele(tmpvp);
1560 }
1561 return (0);
1562 }
1563 #endif /* HFS_SPARSE_DEV */
1564
1565 case F_FREEZE_FS: {
1566 struct mount *mp;
1567
1568 if (!is_suser())
1569 return (EACCES);
1570
1571 mp = vnode_mount(vp);
1572 hfsmp = VFSTOHFS(mp);
1573
1574 if (!(hfsmp->jnl))
1575 return (ENOTSUP);
1576
1577 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1578
1579 // flush things before we get started to try and prevent
1580 // dirty data from being paged out while we're frozen.
1581 // note: can't do this after taking the lock as it will
1582 // deadlock against ourselves.
1583 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1584 hfs_global_exclusive_lock_acquire(hfsmp);
1585 journal_flush(hfsmp->jnl);
1586
1587 // don't need to iterate on all vnodes, we just need to
1588 // wait for writes to the system files and the device vnode
1589 if (HFSTOVCB(hfsmp)->extentsRefNum)
1590 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1591 if (HFSTOVCB(hfsmp)->catalogRefNum)
1592 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1593 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1594 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1595 if (hfsmp->hfs_attribute_vp)
1596 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1597 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1598
1599 hfsmp->hfs_freezing_proc = current_proc();
1600
1601 return (0);
1602 }
1603
1604 case F_THAW_FS: {
1605 if (!is_suser())
1606 return (EACCES);
1607
1608 // if we're not the one who froze the fs then we
1609 // can't thaw it.
1610 if (hfsmp->hfs_freezing_proc != current_proc()) {
1611 return EPERM;
1612 }
1613
1614 // NOTE: if you add code here, also go check the
1615 // code that "thaws" the fs in hfs_vnop_close()
1616 //
1617 hfsmp->hfs_freezing_proc = NULL;
1618 hfs_global_exclusive_lock_release(hfsmp);
1619 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1620
1621 return (0);
1622 }
1623
1624 case HFS_BULKACCESS_FSCTL: {
1625 int size;
1626
1627 if (hfsmp->hfs_flags & HFS_STANDARD) {
1628 return EINVAL;
1629 }
1630
1631 if (is64bit) {
1632 size = sizeof(struct user_access_t);
1633 } else {
1634 size = sizeof(struct access_t);
1635 }
1636
1637 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1638 }
1639
1640 case HFS_EXT_BULKACCESS_FSCTL: {
1641 int size;
1642
1643 if (hfsmp->hfs_flags & HFS_STANDARD) {
1644 return EINVAL;
1645 }
1646
1647 if (is64bit) {
1648 size = sizeof(struct ext_user_access_t);
1649 } else {
1650 size = sizeof(struct ext_access_t);
1651 }
1652
1653 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1654 }
1655
1656 case HFS_SETACLSTATE: {
1657 int state;
1658
1659 if (ap->a_data == NULL) {
1660 return (EINVAL);
1661 }
1662
1663 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1664 state = *(int *)ap->a_data;
1665
1666 // super-user can enable or disable acl's on a volume.
1667 // the volume owner can only enable acl's
1668 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1669 return (EPERM);
1670 }
1671 if (state == 0 || state == 1)
1672 return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
1673 else
1674 return (EINVAL);
1675 }
1676
1677 case HFS_SET_XATTREXTENTS_STATE: {
1678 int state;
1679
1680 if (ap->a_data == NULL) {
1681 return (EINVAL);
1682 }
1683
1684 state = *(int *)ap->a_data;
1685
1686 /* Super-user can enable or disable extent-based extended
1687 * attribute support on a volume
1688 */
1689 if (!is_suser()) {
1690 return (EPERM);
1691 }
1692 if (state == 0 || state == 1)
1693 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
1694 else
1695 return (EINVAL);
1696 }
1697
1698 case F_FULLFSYNC: {
1699 int error;
1700
1701 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1702 if (error == 0) {
1703 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
1704 hfs_unlock(VTOC(vp));
1705 }
1706
1707 return error;
1708 }
1709
1710 case F_CHKCLEAN: {
1711 register struct cnode *cp;
1712 int error;
1713
1714 if (!vnode_isreg(vp))
1715 return EINVAL;
1716
1717 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1718 if (error == 0) {
1719 cp = VTOC(vp);
1720 /*
1721 * used by regression test to determine if
1722 * all the dirty pages (via write) have been cleaned
1723 * after a call to 'fsysnc'.
1724 */
1725 error = is_file_clean(vp, VTOF(vp)->ff_size);
1726 hfs_unlock(cp);
1727 }
1728 return (error);
1729 }
1730
1731 case F_RDADVISE: {
1732 register struct radvisory *ra;
1733 struct filefork *fp;
1734 int error;
1735
1736 if (!vnode_isreg(vp))
1737 return EINVAL;
1738
1739 ra = (struct radvisory *)(ap->a_data);
1740 fp = VTOF(vp);
1741
1742 /* Protect against a size change. */
1743 hfs_lock_truncate(VTOC(vp), TRUE);
1744
1745 if (ra->ra_offset >= fp->ff_size) {
1746 error = EFBIG;
1747 } else {
1748 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1749 }
1750
1751 hfs_unlock_truncate(VTOC(vp), TRUE);
1752 return (error);
1753 }
1754
1755 case F_READBOOTSTRAP:
1756 case F_WRITEBOOTSTRAP:
1757 {
1758 struct vnode *devvp = NULL;
1759 user_fbootstraptransfer_t *user_bootstrapp;
1760 int devBlockSize;
1761 int error;
1762 uio_t auio;
1763 daddr64_t blockNumber;
1764 u_long blockOffset;
1765 u_long xfersize;
1766 struct buf *bp;
1767 user_fbootstraptransfer_t user_bootstrap;
1768
1769 if (!vnode_isvroot(vp))
1770 return (EINVAL);
1771 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1772 * to a user_fbootstraptransfer_t else we get a pointer to a
1773 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1774 */
1775 if (is64bit) {
1776 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1777 }
1778 else {
1779 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1780 user_bootstrapp = &user_bootstrap;
1781 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1782 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1783 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1784 }
1785 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1786 return EINVAL;
1787
1788 devvp = VTOHFS(vp)->hfs_devvp;
1789 auio = uio_create(1, user_bootstrapp->fbt_offset,
1790 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1791 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1792 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1793
1794 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1795
1796 while (uio_resid(auio) > 0) {
1797 blockNumber = uio_offset(auio) / devBlockSize;
1798 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1799 if (error) {
1800 if (bp) buf_brelse(bp);
1801 uio_free(auio);
1802 return error;
1803 };
1804
1805 blockOffset = uio_offset(auio) % devBlockSize;
1806 xfersize = devBlockSize - blockOffset;
1807 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1808 if (error) {
1809 buf_brelse(bp);
1810 uio_free(auio);
1811 return error;
1812 };
1813 if (uio_rw(auio) == UIO_WRITE) {
1814 error = VNOP_BWRITE(bp);
1815 if (error) {
1816 uio_free(auio);
1817 return error;
1818 }
1819 } else {
1820 buf_brelse(bp);
1821 };
1822 };
1823 uio_free(auio);
1824 };
1825 return 0;
1826
1827 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1828 {
1829 if (is64bit) {
1830 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1831 }
1832 else {
1833 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1834 }
1835 return 0;
1836 }
1837
1838 case HFS_GET_MOUNT_TIME:
1839 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1840 break;
1841
1842 case HFS_GET_LAST_MTIME:
1843 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1844 break;
1845
1846 case HFS_SET_BOOT_INFO:
1847 if (!vnode_isvroot(vp))
1848 return(EINVAL);
1849 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1850 return(EACCES); /* must be superuser or owner of filesystem */
1851 HFS_MOUNT_LOCK(hfsmp, TRUE);
1852 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1853 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1854 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1855 break;
1856
1857 case HFS_GET_BOOT_INFO:
1858 if (!vnode_isvroot(vp))
1859 return(EINVAL);
1860 HFS_MOUNT_LOCK(hfsmp, TRUE);
1861 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1862 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1863 break;
1864
1865 case HFS_MARK_BOOT_CORRUPT:
1866 /* Mark the boot volume corrupt by setting
1867 * kHFSVolumeInconsistentBit in the volume header. This will
1868 * force fsck_hfs on next mount.
1869 */
1870 if (!is_suser()) {
1871 return EACCES;
1872 }
1873
1874 /* Allowed only on the root vnode of the boot volume */
1875 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
1876 !vnode_isvroot(vp)) {
1877 return EINVAL;
1878 }
1879
1880 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1881 hfs_mark_volume_inconsistent(hfsmp);
1882 break;
1883
1884 default:
1885 return (ENOTTY);
1886 }
1887
1888 /* Should never get here */
1889 return 0;
1890 }
1891
1892 /*
1893 * select
1894 */
1895 int
1896 hfs_vnop_select(__unused struct vnop_select_args *ap)
1897 /*
1898 struct vnop_select_args {
1899 vnode_t a_vp;
1900 int a_which;
1901 int a_fflags;
1902 void *a_wql;
1903 vfs_context_t a_context;
1904 };
1905 */
1906 {
1907 /*
1908 * We should really check to see if I/O is possible.
1909 */
1910 return (1);
1911 }
1912
1913 /*
1914 * Converts a logical block number to a physical block, and optionally returns
1915 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1916 * The physical block number is based on the device block size, currently its 512.
1917 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1918 */
1919 int
1920 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1921 {
1922 struct filefork *fp = VTOF(vp);
1923 struct hfsmount *hfsmp = VTOHFS(vp);
1924 int retval = E_NONE;
1925 u_int32_t logBlockSize;
1926 size_t bytesContAvail = 0;
1927 off_t blockposition;
1928 int lockExtBtree;
1929 int lockflags = 0;
1930
1931 /*
1932 * Check for underlying vnode requests and ensure that logical
1933 * to physical mapping is requested.
1934 */
1935 if (vpp != NULL)
1936 *vpp = hfsmp->hfs_devvp;
1937 if (bnp == NULL)
1938 return (0);
1939
1940 logBlockSize = GetLogicalBlockSize(vp);
1941 blockposition = (off_t)bn * logBlockSize;
1942
1943 lockExtBtree = overflow_extents(fp);
1944
1945 if (lockExtBtree)
1946 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1947
1948 retval = MacToVFSError(
1949 MapFileBlockC (HFSTOVCB(hfsmp),
1950 (FCB*)fp,
1951 MAXPHYSIO,
1952 blockposition,
1953 bnp,
1954 &bytesContAvail));
1955
1956 if (lockExtBtree)
1957 hfs_systemfile_unlock(hfsmp, lockflags);
1958
1959 if (retval == E_NONE) {
1960 /* Figure out how many read ahead blocks there are */
1961 if (runp != NULL) {
1962 if (can_cluster(logBlockSize)) {
1963 /* Make sure this result never goes negative: */
1964 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1965 } else {
1966 *runp = 0;
1967 }
1968 }
1969 }
1970 return (retval);
1971 }
1972
1973 /*
1974 * Convert logical block number to file offset.
1975 */
1976 int
1977 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1978 /*
1979 struct vnop_blktooff_args {
1980 vnode_t a_vp;
1981 daddr64_t a_lblkno;
1982 off_t *a_offset;
1983 };
1984 */
1985 {
1986 if (ap->a_vp == NULL)
1987 return (EINVAL);
1988 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1989
1990 return(0);
1991 }
1992
1993 /*
1994 * Convert file offset to logical block number.
1995 */
1996 int
1997 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1998 /*
1999 struct vnop_offtoblk_args {
2000 vnode_t a_vp;
2001 off_t a_offset;
2002 daddr64_t *a_lblkno;
2003 };
2004 */
2005 {
2006 if (ap->a_vp == NULL)
2007 return (EINVAL);
2008 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
2009
2010 return(0);
2011 }
2012
2013 /*
2014 * Map file offset to physical block number.
2015 *
2016 * If this function is called for write operation, and if the file
2017 * had virtual blocks allocated (delayed allocation), real blocks
2018 * are allocated by calling ExtendFileC().
2019 *
2020 * If this function is called for read operation, and if the file
2021 * had virtual blocks allocated (delayed allocation), no change
2022 * to the size of file is done, and if required, rangelist is
2023 * searched for mapping.
2024 *
2025 * System file cnodes are expected to be locked (shared or exclusive).
2026 */
2027 int
2028 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2029 /*
2030 struct vnop_blockmap_args {
2031 vnode_t a_vp;
2032 off_t a_foffset;
2033 size_t a_size;
2034 daddr64_t *a_bpn;
2035 size_t *a_run;
2036 void *a_poff;
2037 int a_flags;
2038 vfs_context_t a_context;
2039 };
2040 */
2041 {
2042 struct vnode *vp = ap->a_vp;
2043 struct cnode *cp;
2044 struct filefork *fp;
2045 struct hfsmount *hfsmp;
2046 size_t bytesContAvail = 0;
2047 int retval = E_NONE;
2048 int syslocks = 0;
2049 int lockflags = 0;
2050 struct rl_entry *invalid_range;
2051 enum rl_overlaptype overlaptype;
2052 int started_tr = 0;
2053 int tooklock = 0;
2054
2055 /* Do not allow blockmap operation on a directory */
2056 if (vnode_isdir(vp)) {
2057 return (ENOTSUP);
2058 }
2059
2060 /*
2061 * Check for underlying vnode requests and ensure that logical
2062 * to physical mapping is requested.
2063 */
2064 if (ap->a_bpn == NULL)
2065 return (0);
2066
2067 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
2068 if (VTOC(vp)->c_lockowner != current_thread()) {
2069 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2070 tooklock = 1;
2071 }
2072 }
2073 hfsmp = VTOHFS(vp);
2074 cp = VTOC(vp);
2075 fp = VTOF(vp);
2076
2077 retry:
2078 /* Check virtual blocks only when performing write operation */
2079 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2080 if (hfs_start_transaction(hfsmp) != 0) {
2081 retval = EINVAL;
2082 goto exit;
2083 } else {
2084 started_tr = 1;
2085 }
2086 syslocks = SFL_EXTENTS | SFL_BITMAP;
2087
2088 } else if (overflow_extents(fp)) {
2089 syslocks = SFL_EXTENTS;
2090 }
2091
2092 if (syslocks)
2093 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
2094
2095 /*
2096 * Check for any delayed allocations.
2097 */
2098 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2099 int64_t actbytes;
2100 u_int32_t loanedBlocks;
2101
2102 //
2103 // Make sure we have a transaction. It's possible
2104 // that we came in and fp->ff_unallocblocks was zero
2105 // but during the time we blocked acquiring the extents
2106 // btree, ff_unallocblocks became non-zero and so we
2107 // will need to start a transaction.
2108 //
2109 if (started_tr == 0) {
2110 if (syslocks) {
2111 hfs_systemfile_unlock(hfsmp, lockflags);
2112 syslocks = 0;
2113 }
2114 goto retry;
2115 }
2116
2117 /*
2118 * Note: ExtendFileC will Release any blocks on loan and
2119 * aquire real blocks. So we ask to extend by zero bytes
2120 * since ExtendFileC will account for the virtual blocks.
2121 */
2122
2123 loanedBlocks = fp->ff_unallocblocks;
2124 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2125 kEFAllMask | kEFNoClumpMask, &actbytes);
2126
2127 if (retval) {
2128 fp->ff_unallocblocks = loanedBlocks;
2129 cp->c_blocks += loanedBlocks;
2130 fp->ff_blocks += loanedBlocks;
2131
2132 HFS_MOUNT_LOCK(hfsmp, TRUE);
2133 hfsmp->loanedBlocks += loanedBlocks;
2134 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2135
2136 hfs_systemfile_unlock(hfsmp, lockflags);
2137 cp->c_flag |= C_MODIFIED;
2138 if (started_tr) {
2139 (void) hfs_update(vp, TRUE);
2140 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2141
2142 hfs_end_transaction(hfsmp);
2143 started_tr = 0;
2144 }
2145 goto exit;
2146 }
2147 }
2148
2149 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2150 ap->a_bpn, &bytesContAvail);
2151 if (syslocks) {
2152 hfs_systemfile_unlock(hfsmp, lockflags);
2153 syslocks = 0;
2154 }
2155
2156 if (started_tr) {
2157 (void) hfs_update(vp, TRUE);
2158 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2159 hfs_end_transaction(hfsmp);
2160 started_tr = 0;
2161 }
2162 if (retval) {
2163 /* On write, always return error because virtual blocks, if any,
2164 * should have been allocated in ExtendFileC(). We do not
2165 * allocate virtual blocks on read, therefore return error
2166 * only if no virtual blocks are allocated. Otherwise we search
2167 * rangelist for zero-fills
2168 */
2169 if ((MacToVFSError(retval) != ERANGE) ||
2170 (ap->a_flags & VNODE_WRITE) ||
2171 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2172 goto exit;
2173 }
2174
2175 /* Validate if the start offset is within logical file size */
2176 if (ap->a_foffset > fp->ff_size) {
2177 goto exit;
2178 }
2179
2180 /* Searching file extents has failed for read operation, therefore
2181 * search rangelist for any uncommitted holes in the file.
2182 */
2183 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2184 ap->a_foffset + (off_t)(ap->a_size - 1),
2185 &invalid_range);
2186 switch(overlaptype) {
2187 case RL_OVERLAPISCONTAINED:
2188 /* start_offset <= rl_start, end_offset >= rl_end */
2189 if (ap->a_foffset != invalid_range->rl_start) {
2190 break;
2191 }
2192 case RL_MATCHINGOVERLAP:
2193 /* start_offset = rl_start, end_offset = rl_end */
2194 case RL_OVERLAPCONTAINSRANGE:
2195 /* start_offset >= rl_start, end_offset <= rl_end */
2196 case RL_OVERLAPSTARTSBEFORE:
2197 /* start_offset > rl_start, end_offset >= rl_start */
2198 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2199 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2200 } else {
2201 bytesContAvail = fp->ff_size - ap->a_foffset;
2202 }
2203 if (bytesContAvail > ap->a_size) {
2204 bytesContAvail = ap->a_size;
2205 }
2206 *ap->a_bpn = (daddr64_t)-1;
2207 retval = 0;
2208 break;
2209 case RL_OVERLAPENDSAFTER:
2210 /* start_offset < rl_start, end_offset < rl_end */
2211 case RL_NOOVERLAP:
2212 break;
2213 }
2214 goto exit;
2215 }
2216
2217 /* MapFileC() found a valid extent in the filefork. Search the
2218 * mapping information further for invalid file ranges
2219 */
2220 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2221 ap->a_foffset + (off_t)bytesContAvail - 1,
2222 &invalid_range);
2223 if (overlaptype != RL_NOOVERLAP) {
2224 switch(overlaptype) {
2225 case RL_MATCHINGOVERLAP:
2226 case RL_OVERLAPCONTAINSRANGE:
2227 case RL_OVERLAPSTARTSBEFORE:
2228 /* There's no valid block for this byte offset */
2229 *ap->a_bpn = (daddr64_t)-1;
2230 /* There's no point limiting the amount to be returned
2231 * if the invalid range that was hit extends all the way
2232 * to the EOF (i.e. there's no valid bytes between the
2233 * end of this range and the file's EOF):
2234 */
2235 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2236 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2237 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2238 }
2239 break;
2240
2241 case RL_OVERLAPISCONTAINED:
2242 case RL_OVERLAPENDSAFTER:
2243 /* The range of interest hits an invalid block before the end: */
2244 if (invalid_range->rl_start == ap->a_foffset) {
2245 /* There's actually no valid information to be had starting here: */
2246 *ap->a_bpn = (daddr64_t)-1;
2247 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2248 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2249 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2250 }
2251 } else {
2252 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2253 }
2254 break;
2255
2256 case RL_NOOVERLAP:
2257 break;
2258 } /* end switch */
2259 if (bytesContAvail > ap->a_size)
2260 bytesContAvail = ap->a_size;
2261 }
2262
2263 exit:
2264 if (retval == 0) {
2265 if (ap->a_run)
2266 *ap->a_run = bytesContAvail;
2267
2268 if (ap->a_poff)
2269 *(int *)ap->a_poff = 0;
2270 }
2271
2272 if (tooklock)
2273 hfs_unlock(cp);
2274
2275 return (MacToVFSError(retval));
2276 }
2277
2278
2279 /*
2280 * prepare and issue the I/O
2281 * buf_strategy knows how to deal
2282 * with requests that require
2283 * fragmented I/Os
2284 */
2285 int
2286 hfs_vnop_strategy(struct vnop_strategy_args *ap)
2287 {
2288 buf_t bp = ap->a_bp;
2289 vnode_t vp = buf_vnode(bp);
2290
2291 return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
2292 }
2293
2294
2295 static int
2296 do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context)
2297 {
2298 register struct cnode *cp = VTOC(vp);
2299 struct filefork *fp = VTOF(vp);
2300 struct proc *p = vfs_context_proc(context);;
2301 kauth_cred_t cred = vfs_context_ucred(context);
2302 int retval;
2303 off_t bytesToAdd;
2304 off_t actualBytesAdded;
2305 off_t filebytes;
2306 u_long fileblocks;
2307 int blksize;
2308 struct hfsmount *hfsmp;
2309 int lockflags;
2310
2311 blksize = VTOVCB(vp)->blockSize;
2312 fileblocks = fp->ff_blocks;
2313 filebytes = (off_t)fileblocks * (off_t)blksize;
2314
2315 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2316 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2317
2318 if (length < 0)
2319 return (EINVAL);
2320
2321 /* This should only happen with a corrupt filesystem */
2322 if ((off_t)fp->ff_size < 0)
2323 return (EINVAL);
2324
2325 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2326 return (EFBIG);
2327
2328 hfsmp = VTOHFS(vp);
2329
2330 retval = E_NONE;
2331
2332 /* Files that are changing size are not hot file candidates. */
2333 if (hfsmp->hfc_stage == HFC_RECORDING) {
2334 fp->ff_bytesread = 0;
2335 }
2336
2337 /*
2338 * We cannot just check if fp->ff_size == length (as an optimization)
2339 * since there may be extra physical blocks that also need truncation.
2340 */
2341 #if QUOTA
2342 if ((retval = hfs_getinoquota(cp)))
2343 return(retval);
2344 #endif /* QUOTA */
2345
2346 /*
2347 * Lengthen the size of the file. We must ensure that the
2348 * last byte of the file is allocated. Since the smallest
2349 * value of ff_size is 0, length will be at least 1.
2350 */
2351 if (length > (off_t)fp->ff_size) {
2352 #if QUOTA
2353 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
2354 cred, 0);
2355 if (retval)
2356 goto Err_Exit;
2357 #endif /* QUOTA */
2358 /*
2359 * If we don't have enough physical space then
2360 * we need to extend the physical size.
2361 */
2362 if (length > filebytes) {
2363 int eflags;
2364 u_long blockHint = 0;
2365
2366 /* All or nothing and don't round up to clumpsize. */
2367 eflags = kEFAllMask | kEFNoClumpMask;
2368
2369 if (cred && suser(cred, NULL) != 0)
2370 eflags |= kEFReserveMask; /* keep a reserve */
2371
2372 /*
2373 * Allocate Journal and Quota files in metadata zone.
2374 */
2375 if (filebytes == 0 &&
2376 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2377 hfs_virtualmetafile(cp)) {
2378 eflags |= kEFMetadataMask;
2379 blockHint = hfsmp->hfs_metazone_start;
2380 }
2381 if (hfs_start_transaction(hfsmp) != 0) {
2382 retval = EINVAL;
2383 goto Err_Exit;
2384 }
2385
2386 /* Protect extents b-tree and allocation bitmap */
2387 lockflags = SFL_BITMAP;
2388 if (overflow_extents(fp))
2389 lockflags |= SFL_EXTENTS;
2390 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2391
2392 while ((length > filebytes) && (retval == E_NONE)) {
2393 bytesToAdd = length - filebytes;
2394 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2395 (FCB*)fp,
2396 bytesToAdd,
2397 blockHint,
2398 eflags,
2399 &actualBytesAdded));
2400
2401 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2402 if (actualBytesAdded == 0 && retval == E_NONE) {
2403 if (length > filebytes)
2404 length = filebytes;
2405 break;
2406 }
2407 } /* endwhile */
2408
2409 hfs_systemfile_unlock(hfsmp, lockflags);
2410
2411 if (hfsmp->jnl) {
2412 (void) hfs_update(vp, TRUE);
2413 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2414 }
2415
2416 hfs_end_transaction(hfsmp);
2417
2418 if (retval)
2419 goto Err_Exit;
2420
2421 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2422 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2423 }
2424
2425 if (!(flags & IO_NOZEROFILL)) {
2426 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
2427 struct rl_entry *invalid_range;
2428 off_t zero_limit;
2429
2430 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2431 if (length < zero_limit) zero_limit = length;
2432
2433 if (length > (off_t)fp->ff_size) {
2434 struct timeval tv;
2435
2436 /* Extending the file: time to fill out the current last page w. zeroes? */
2437 if ((fp->ff_size & PAGE_MASK_64) &&
2438 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2439 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
2440
2441 /* There's some valid data at the start of the (current) last page
2442 of the file, so zero out the remainder of that page to ensure the
2443 entire page contains valid data. Since there is no invalid range
2444 possible past the (current) eof, there's no need to remove anything
2445 from the invalid range list before calling cluster_write(): */
2446 hfs_unlock(cp);
2447 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
2448 fp->ff_size, (off_t)0,
2449 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2450 hfs_lock(cp, HFS_FORCE_LOCK);
2451 if (retval) goto Err_Exit;
2452
2453 /* Merely invalidate the remaining area, if necessary: */
2454 if (length > zero_limit) {
2455 microuptime(&tv);
2456 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
2457 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2458 }
2459 } else {
2460 /* The page containing the (current) eof is invalid: just add the
2461 remainder of the page to the invalid list, along with the area
2462 being newly allocated:
2463 */
2464 microuptime(&tv);
2465 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
2466 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2467 };
2468 }
2469 } else {
2470 panic("hfs_truncate: invoked on non-UBC object?!");
2471 };
2472 }
2473 cp->c_touch_modtime = TRUE;
2474 fp->ff_size = length;
2475
2476 } else { /* Shorten the size of the file */
2477
2478 if ((off_t)fp->ff_size > length) {
2479 /* Any space previously marked as invalid is now irrelevant: */
2480 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2481 }
2482
2483 /*
2484 * Account for any unmapped blocks. Note that the new
2485 * file length can still end up with unmapped blocks.
2486 */
2487 if (fp->ff_unallocblocks > 0) {
2488 u_int32_t finalblks;
2489 u_int32_t loanedBlocks;
2490
2491 HFS_MOUNT_LOCK(hfsmp, TRUE);
2492
2493 loanedBlocks = fp->ff_unallocblocks;
2494 cp->c_blocks -= loanedBlocks;
2495 fp->ff_blocks -= loanedBlocks;
2496 fp->ff_unallocblocks = 0;
2497
2498 hfsmp->loanedBlocks -= loanedBlocks;
2499
2500 finalblks = (length + blksize - 1) / blksize;
2501 if (finalblks > fp->ff_blocks) {
2502 /* calculate required unmapped blocks */
2503 loanedBlocks = finalblks - fp->ff_blocks;
2504 hfsmp->loanedBlocks += loanedBlocks;
2505
2506 fp->ff_unallocblocks = loanedBlocks;
2507 cp->c_blocks += loanedBlocks;
2508 fp->ff_blocks += loanedBlocks;
2509 }
2510 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2511 }
2512
2513 /*
2514 * For a TBE process the deallocation of the file blocks is
2515 * delayed until the file is closed. And hfs_close calls
2516 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2517 * isn't set, we make sure this isn't a TBE process.
2518 */
2519 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2520 #if QUOTA
2521 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2522 #endif /* QUOTA */
2523 if (hfs_start_transaction(hfsmp) != 0) {
2524 retval = EINVAL;
2525 goto Err_Exit;
2526 }
2527
2528 if (fp->ff_unallocblocks == 0) {
2529 /* Protect extents b-tree and allocation bitmap */
2530 lockflags = SFL_BITMAP;
2531 if (overflow_extents(fp))
2532 lockflags |= SFL_EXTENTS;
2533 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2534
2535 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2536 (FCB*)fp, length, false));
2537
2538 hfs_systemfile_unlock(hfsmp, lockflags);
2539 }
2540 if (hfsmp->jnl) {
2541 if (retval == 0) {
2542 fp->ff_size = length;
2543 }
2544 (void) hfs_update(vp, TRUE);
2545 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2546 }
2547
2548 hfs_end_transaction(hfsmp);
2549
2550 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2551 if (retval)
2552 goto Err_Exit;
2553 #if QUOTA
2554 /* These are bytesreleased */
2555 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2556 #endif /* QUOTA */
2557 }
2558 /* Only set update flag if the logical length changes */
2559 if ((off_t)fp->ff_size != length)
2560 cp->c_touch_modtime = TRUE;
2561 fp->ff_size = length;
2562 }
2563 cp->c_touch_chgtime = TRUE; /* status changed */
2564 cp->c_touch_modtime = TRUE; /* file data was modified */
2565 retval = hfs_update(vp, MNT_WAIT);
2566 if (retval) {
2567 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2568 -1, -1, -1, retval, 0);
2569 }
2570
2571 Err_Exit:
2572
2573 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2574 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2575
2576 return (retval);
2577 }
2578
2579
2580
2581 /*
2582 * Truncate a cnode to at most length size, freeing (or adding) the
2583 * disk blocks.
2584 */
2585 __private_extern__
2586 int
2587 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2588 vfs_context_t context)
2589 {
2590 struct filefork *fp = VTOF(vp);
2591 off_t filebytes;
2592 u_long fileblocks;
2593 int blksize, error = 0;
2594 struct cnode *cp = VTOC(vp);
2595
2596 /* Cannot truncate an HFS directory! */
2597 if (vnode_isdir(vp)) {
2598 return (EISDIR);
2599 }
2600 /* A swap file cannot change size. */
2601 if (vnode_isswap(vp) && (length != 0)) {
2602 return (EPERM);
2603 }
2604
2605 blksize = VTOVCB(vp)->blockSize;
2606 fileblocks = fp->ff_blocks;
2607 filebytes = (off_t)fileblocks * (off_t)blksize;
2608
2609 //
2610 // Have to do this here so that we don't wind up with
2611 // i/o pending for blocks that are about to be released
2612 // if we truncate the file.
2613 //
2614 // If skipsetsize is set, then the caller is responsible
2615 // for the ubc_setsize.
2616 //
2617 if (!skipsetsize)
2618 ubc_setsize(vp, length);
2619
2620 // have to loop truncating or growing files that are
2621 // really big because otherwise transactions can get
2622 // enormous and consume too many kernel resources.
2623
2624 if (length < filebytes) {
2625 while (filebytes > length) {
2626 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2627 filebytes -= HFS_BIGFILE_SIZE;
2628 } else {
2629 filebytes = length;
2630 }
2631 cp->c_flag |= C_FORCEUPDATE;
2632 error = do_hfs_truncate(vp, filebytes, flags, context);
2633 if (error)
2634 break;
2635 }
2636 } else if (length > filebytes) {
2637 while (filebytes < length) {
2638 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2639 filebytes += HFS_BIGFILE_SIZE;
2640 } else {
2641 filebytes = length;
2642 }
2643 cp->c_flag |= C_FORCEUPDATE;
2644 error = do_hfs_truncate(vp, filebytes, flags, context);
2645 if (error)
2646 break;
2647 }
2648 } else /* Same logical size */ {
2649
2650 error = do_hfs_truncate(vp, length, flags, context);
2651 }
2652 /* Files that are changing size are not hot file candidates. */
2653 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2654 fp->ff_bytesread = 0;
2655 }
2656
2657 return (error);
2658 }
2659
2660
2661
2662 /*
2663 * Preallocate file storage space.
2664 */
2665 int
2666 hfs_vnop_allocate(struct vnop_allocate_args /* {
2667 vnode_t a_vp;
2668 off_t a_length;
2669 u_int32_t a_flags;
2670 off_t *a_bytesallocated;
2671 off_t a_offset;
2672 vfs_context_t a_context;
2673 } */ *ap)
2674 {
2675 struct vnode *vp = ap->a_vp;
2676 struct cnode *cp;
2677 struct filefork *fp;
2678 ExtendedVCB *vcb;
2679 off_t length = ap->a_length;
2680 off_t startingPEOF;
2681 off_t moreBytesRequested;
2682 off_t actualBytesAdded;
2683 off_t filebytes;
2684 u_long fileblocks;
2685 int retval, retval2;
2686 u_int32_t blockHint;
2687 u_int32_t extendFlags; /* For call to ExtendFileC */
2688 struct hfsmount *hfsmp;
2689 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2690 int lockflags;
2691
2692 *(ap->a_bytesallocated) = 0;
2693
2694 if (!vnode_isreg(vp))
2695 return (EISDIR);
2696 if (length < (off_t)0)
2697 return (EINVAL);
2698
2699 cp = VTOC(vp);
2700
2701 hfs_lock_truncate(cp, TRUE);
2702
2703 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2704 goto Err_Exit;
2705 }
2706
2707 fp = VTOF(vp);
2708 hfsmp = VTOHFS(vp);
2709 vcb = VTOVCB(vp);
2710
2711 fileblocks = fp->ff_blocks;
2712 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2713
2714 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2715 retval = EINVAL;
2716 goto Err_Exit;
2717 }
2718
2719 /* Fill in the flags word for the call to Extend the file */
2720
2721 extendFlags = kEFNoClumpMask;
2722 if (ap->a_flags & ALLOCATECONTIG)
2723 extendFlags |= kEFContigMask;
2724 if (ap->a_flags & ALLOCATEALL)
2725 extendFlags |= kEFAllMask;
2726 if (cred && suser(cred, NULL) != 0)
2727 extendFlags |= kEFReserveMask;
2728
2729 retval = E_NONE;
2730 blockHint = 0;
2731 startingPEOF = filebytes;
2732
2733 if (ap->a_flags & ALLOCATEFROMPEOF)
2734 length += filebytes;
2735 else if (ap->a_flags & ALLOCATEFROMVOL)
2736 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2737
2738 /* If no changes are necesary, then we're done */
2739 if (filebytes == length)
2740 goto Std_Exit;
2741
2742 /*
2743 * Lengthen the size of the file. We must ensure that the
2744 * last byte of the file is allocated. Since the smallest
2745 * value of filebytes is 0, length will be at least 1.
2746 */
2747 if (length > filebytes) {
2748 off_t total_bytes_added = 0, orig_request_size;
2749
2750 orig_request_size = moreBytesRequested = length - filebytes;
2751
2752 #if QUOTA
2753 retval = hfs_chkdq(cp,
2754 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2755 cred, 0);
2756 if (retval)
2757 goto Err_Exit;
2758
2759 #endif /* QUOTA */
2760 /*
2761 * Metadata zone checks.
2762 */
2763 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2764 /*
2765 * Allocate Journal and Quota files in metadata zone.
2766 */
2767 if (hfs_virtualmetafile(cp)) {
2768 extendFlags |= kEFMetadataMask;
2769 blockHint = hfsmp->hfs_metazone_start;
2770 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2771 (blockHint <= hfsmp->hfs_metazone_end)) {
2772 /*
2773 * Move blockHint outside metadata zone.
2774 */
2775 blockHint = hfsmp->hfs_metazone_end + 1;
2776 }
2777 }
2778
2779
2780 while ((length > filebytes) && (retval == E_NONE)) {
2781 off_t bytesRequested;
2782
2783 if (hfs_start_transaction(hfsmp) != 0) {
2784 retval = EINVAL;
2785 goto Err_Exit;
2786 }
2787
2788 /* Protect extents b-tree and allocation bitmap */
2789 lockflags = SFL_BITMAP;
2790 if (overflow_extents(fp))
2791 lockflags |= SFL_EXTENTS;
2792 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2793
2794 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
2795 bytesRequested = HFS_BIGFILE_SIZE;
2796 } else {
2797 bytesRequested = moreBytesRequested;
2798 }
2799
2800 retval = MacToVFSError(ExtendFileC(vcb,
2801 (FCB*)fp,
2802 bytesRequested,
2803 blockHint,
2804 extendFlags,
2805 &actualBytesAdded));
2806
2807 if (retval == E_NONE) {
2808 *(ap->a_bytesallocated) += actualBytesAdded;
2809 total_bytes_added += actualBytesAdded;
2810 moreBytesRequested -= actualBytesAdded;
2811 if (blockHint != 0) {
2812 blockHint += actualBytesAdded / vcb->blockSize;
2813 }
2814 }
2815 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2816
2817 hfs_systemfile_unlock(hfsmp, lockflags);
2818
2819 if (hfsmp->jnl) {
2820 (void) hfs_update(vp, TRUE);
2821 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2822 }
2823
2824 hfs_end_transaction(hfsmp);
2825 }
2826
2827
2828 /*
2829 * if we get an error and no changes were made then exit
2830 * otherwise we must do the hfs_update to reflect the changes
2831 */
2832 if (retval && (startingPEOF == filebytes))
2833 goto Err_Exit;
2834
2835 /*
2836 * Adjust actualBytesAdded to be allocation block aligned, not
2837 * clump size aligned.
2838 * NOTE: So what we are reporting does not affect reality
2839 * until the file is closed, when we truncate the file to allocation
2840 * block size.
2841 */
2842 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
2843 *(ap->a_bytesallocated) =
2844 roundup(orig_request_size, (off_t)vcb->blockSize);
2845
2846 } else { /* Shorten the size of the file */
2847
2848 if (fp->ff_size > length) {
2849 /*
2850 * Any buffers that are past the truncation point need to be
2851 * invalidated (to maintain buffer cache consistency).
2852 */
2853 }
2854
2855 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
2856 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2857
2858 /*
2859 * if we get an error and no changes were made then exit
2860 * otherwise we must do the hfs_update to reflect the changes
2861 */
2862 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2863 #if QUOTA
2864 /* These are bytesreleased */
2865 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2866 #endif /* QUOTA */
2867
2868 if (fp->ff_size > filebytes) {
2869 fp->ff_size = filebytes;
2870
2871 hfs_unlock(cp);
2872 ubc_setsize(vp, fp->ff_size);
2873 hfs_lock(cp, HFS_FORCE_LOCK);
2874 }
2875 }
2876
2877 Std_Exit:
2878 cp->c_touch_chgtime = TRUE;
2879 cp->c_touch_modtime = TRUE;
2880 retval2 = hfs_update(vp, MNT_WAIT);
2881
2882 if (retval == 0)
2883 retval = retval2;
2884 Err_Exit:
2885 hfs_unlock_truncate(cp, TRUE);
2886 hfs_unlock(cp);
2887 return (retval);
2888 }
2889
2890
2891 /*
2892 * Pagein for HFS filesystem
2893 */
2894 int
2895 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2896 /*
2897 struct vnop_pagein_args {
2898 vnode_t a_vp,
2899 upl_t a_pl,
2900 vm_offset_t a_pl_offset,
2901 off_t a_f_offset,
2902 size_t a_size,
2903 int a_flags
2904 vfs_context_t a_context;
2905 };
2906 */
2907 {
2908 vnode_t vp = ap->a_vp;
2909 int error;
2910
2911 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2912 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2913 /*
2914 * Keep track of blocks read.
2915 */
2916 if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2917 struct cnode *cp;
2918 struct filefork *fp;
2919 int bytesread;
2920 int took_cnode_lock = 0;
2921
2922 cp = VTOC(vp);
2923 fp = VTOF(vp);
2924
2925 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2926 bytesread = fp->ff_size;
2927 else
2928 bytesread = ap->a_size;
2929
2930 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2931 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
2932 hfs_lock(cp, HFS_FORCE_LOCK);
2933 took_cnode_lock = 1;
2934 }
2935 /*
2936 * If this file hasn't been seen since the start of
2937 * the current sampling period then start over.
2938 */
2939 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2940 struct timeval tv;
2941
2942 fp->ff_bytesread = bytesread;
2943 microtime(&tv);
2944 cp->c_atime = tv.tv_sec;
2945 } else {
2946 fp->ff_bytesread += bytesread;
2947 }
2948 cp->c_touch_acctime = TRUE;
2949 if (took_cnode_lock)
2950 hfs_unlock(cp);
2951 }
2952 return (error);
2953 }
2954
2955 /*
2956 * Pageout for HFS filesystem.
2957 */
2958 int
2959 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2960 /*
2961 struct vnop_pageout_args {
2962 vnode_t a_vp,
2963 upl_t a_pl,
2964 vm_offset_t a_pl_offset,
2965 off_t a_f_offset,
2966 size_t a_size,
2967 int a_flags
2968 vfs_context_t a_context;
2969 };
2970 */
2971 {
2972 vnode_t vp = ap->a_vp;
2973 struct cnode *cp;
2974 struct filefork *fp;
2975 int retval;
2976 off_t filesize;
2977
2978 cp = VTOC(vp);
2979 fp = VTOF(vp);
2980
2981 /*
2982 * Figure out where the file ends, for pageout purposes. If
2983 * ff_new_size > ff_size, then we're in the middle of extending the
2984 * file via a write, so it is safe (and necessary) that we be able
2985 * to pageout up to that point.
2986 */
2987 filesize = fp->ff_size;
2988 if (fp->ff_new_size > filesize)
2989 filesize = fp->ff_new_size;
2990
2991 if (!vnode_isswap(vp)) {
2992 off_t end_of_range;
2993 int tooklock = 0;
2994
2995 if (cp->c_lockowner != current_thread()) {
2996 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2997 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2998 ubc_upl_abort_range(ap->a_pl,
2999 ap->a_pl_offset,
3000 ap->a_size,
3001 UPL_ABORT_FREE_ON_EMPTY);
3002 }
3003 return (retval);
3004 }
3005 tooklock = 1;
3006 }
3007
3008 end_of_range = ap->a_f_offset + ap->a_size - 1;
3009
3010 if (end_of_range >= filesize) {
3011 end_of_range = (off_t)(filesize - 1);
3012 }
3013 if (ap->a_f_offset < filesize) {
3014 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
3015 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3016 }
3017
3018 if (tooklock) {
3019 hfs_unlock(cp);
3020 }
3021 }
3022
3023 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
3024 ap->a_size, filesize, ap->a_flags);
3025
3026 /*
3027 * If data was written, and setuid or setgid bits are set and
3028 * this process is not the superuser then clear the setuid and
3029 * setgid bits as a precaution against tampering.
3030 */
3031 if ((retval == 0) &&
3032 (cp->c_mode & (S_ISUID | S_ISGID)) &&
3033 (vfs_context_suser(ap->a_context) != 0)) {
3034 hfs_lock(cp, HFS_FORCE_LOCK);
3035 cp->c_mode &= ~(S_ISUID | S_ISGID);
3036 cp->c_touch_chgtime = TRUE;
3037 hfs_unlock(cp);
3038 }
3039 return (retval);
3040 }
3041
3042 /*
3043 * Intercept B-Tree node writes to unswap them if necessary.
3044 */
3045 int
3046 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
3047 {
3048 int retval = 0;
3049 register struct buf *bp = ap->a_bp;
3050 register struct vnode *vp = buf_vnode(bp);
3051 BlockDescriptor block;
3052
3053 /* Trap B-Tree writes */
3054 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
3055 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
3056 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
3057 (vp == VTOHFS(vp)->hfc_filevp)) {
3058
3059 /*
3060 * Swap and validate the node if it is in native byte order.
3061 * This is always be true on big endian, so we always validate
3062 * before writing here. On little endian, the node typically has
3063 * been swapped and validated when it was written to the journal,
3064 * so we won't do anything here.
3065 */
3066 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
3067 /* Prepare the block pointer */
3068 block.blockHeader = bp;
3069 block.buffer = (char *)buf_dataptr(bp);
3070 block.blockNum = buf_lblkno(bp);
3071 /* not found in cache ==> came from disk */
3072 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
3073 block.blockSize = buf_count(bp);
3074
3075 /* Endian un-swap B-Tree node */
3076 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3077 if (retval)
3078 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3079 }
3080 }
3081
3082 /* This buffer shouldn't be locked anymore but if it is clear it */
3083 if ((buf_flags(bp) & B_LOCKED)) {
3084 // XXXdbg
3085 if (VTOHFS(vp)->jnl) {
3086 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
3087 }
3088 buf_clearflags(bp, B_LOCKED);
3089 }
3090 retval = vn_bwrite (ap);
3091
3092 return (retval);
3093 }
3094
3095 /*
3096 * Relocate a file to a new location on disk
3097 * cnode must be locked on entry
3098 *
3099 * Relocation occurs by cloning the file's data from its
3100 * current set of blocks to a new set of blocks. During
3101 * the relocation all of the blocks (old and new) are
3102 * owned by the file.
3103 *
3104 * -----------------
3105 * |///////////////|
3106 * -----------------
3107 * 0 N (file offset)
3108 *
3109 * ----------------- -----------------
3110 * |///////////////| | | STEP 1 (acquire new blocks)
3111 * ----------------- -----------------
3112 * 0 N N+1 2N
3113 *
3114 * ----------------- -----------------
3115 * |///////////////| |///////////////| STEP 2 (clone data)
3116 * ----------------- -----------------
3117 * 0 N N+1 2N
3118 *
3119 * -----------------
3120 * |///////////////| STEP 3 (head truncate blocks)
3121 * -----------------
3122 * 0 N
3123 *
3124 * During steps 2 and 3 page-outs to file offsets less
3125 * than or equal to N are suspended.
3126 *
3127 * During step 3 page-ins to the file get suspended.
3128 */
3129 __private_extern__
3130 int
3131 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
3132 struct proc *p)
3133 {
3134 struct cnode *cp;
3135 struct filefork *fp;
3136 struct hfsmount *hfsmp;
3137 u_int32_t headblks;
3138 u_int32_t datablks;
3139 u_int32_t blksize;
3140 u_int32_t growsize;
3141 u_int32_t nextallocsave;
3142 daddr64_t sector_a, sector_b;
3143 int eflags;
3144 off_t newbytes;
3145 int retval;
3146 int lockflags = 0;
3147 int took_trunc_lock = 0;
3148 int started_tr = 0;
3149 enum vtype vnodetype;
3150
3151 vnodetype = vnode_vtype(vp);
3152 if (vnodetype != VREG && vnodetype != VLNK) {
3153 return (EPERM);
3154 }
3155
3156 hfsmp = VTOHFS(vp);
3157 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
3158 return (ENOSPC);
3159 }
3160
3161 cp = VTOC(vp);
3162 fp = VTOF(vp);
3163 if (fp->ff_unallocblocks)
3164 return (EINVAL);
3165 blksize = hfsmp->blockSize;
3166 if (blockHint == 0)
3167 blockHint = hfsmp->nextAllocation;
3168
3169 if ((fp->ff_size > 0x7fffffff) ||
3170 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
3171 return (EFBIG);
3172 }
3173
3174 //
3175 // We do not believe that this call to hfs_fsync() is
3176 // necessary and it causes a journal transaction
3177 // deadlock so we are removing it.
3178 //
3179 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3180 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3181 // if (retval)
3182 // return (retval);
3183 //}
3184
3185 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
3186 hfs_unlock(cp);
3187 hfs_lock_truncate(cp, TRUE);
3188 /* Force lock since callers expects lock to be held. */
3189 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
3190 hfs_unlock_truncate(cp, TRUE);
3191 return (retval);
3192 }
3193 /* No need to continue if file was removed. */
3194 if (cp->c_flag & C_NOEXISTS) {
3195 hfs_unlock_truncate(cp, TRUE);
3196 return (ENOENT);
3197 }
3198 took_trunc_lock = 1;
3199 }
3200 headblks = fp->ff_blocks;
3201 datablks = howmany(fp->ff_size, blksize);
3202 growsize = datablks * blksize;
3203 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
3204 if (blockHint >= hfsmp->hfs_metazone_start &&
3205 blockHint <= hfsmp->hfs_metazone_end)
3206 eflags |= kEFMetadataMask;
3207
3208 if (hfs_start_transaction(hfsmp) != 0) {
3209 if (took_trunc_lock)
3210 hfs_unlock_truncate(cp, TRUE);
3211 return (EINVAL);
3212 }
3213 started_tr = 1;
3214 /*
3215 * Protect the extents b-tree and the allocation bitmap
3216 * during MapFileBlockC and ExtendFileC operations.
3217 */
3218 lockflags = SFL_BITMAP;
3219 if (overflow_extents(fp))
3220 lockflags |= SFL_EXTENTS;
3221 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3222
3223 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
3224 if (retval) {
3225 retval = MacToVFSError(retval);
3226 goto out;
3227 }
3228
3229 /*
3230 * STEP 1 - acquire new allocation blocks.
3231 */
3232 nextallocsave = hfsmp->nextAllocation;
3233 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
3234 if (eflags & kEFMetadataMask) {
3235 HFS_MOUNT_LOCK(hfsmp, TRUE);
3236 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
3237 MarkVCBDirty(hfsmp);
3238 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3239 }
3240
3241 retval = MacToVFSError(retval);
3242 if (retval == 0) {
3243 cp->c_flag |= C_MODIFIED;
3244 if (newbytes < growsize) {
3245 retval = ENOSPC;
3246 goto restore;
3247 } else if (fp->ff_blocks < (headblks + datablks)) {
3248 printf("hfs_relocate: allocation failed");
3249 retval = ENOSPC;
3250 goto restore;
3251 }
3252
3253 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
3254 if (retval) {
3255 retval = MacToVFSError(retval);
3256 } else if ((sector_a + 1) == sector_b) {
3257 retval = ENOSPC;
3258 goto restore;
3259 } else if ((eflags & kEFMetadataMask) &&
3260 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
3261 hfsmp->hfs_metazone_end)) {
3262 const char * filestr;
3263 char emptystr = '\0';
3264
3265 if (cp->c_desc.cd_nameptr != NULL) {
3266 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
3267 } else if (vnode_name(vp) != NULL) {
3268 filestr = vnode_name(vp);
3269 } else {
3270 filestr = &emptystr;
3271 }
3272 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr, fp->ff_blocks);
3273 retval = ENOSPC;
3274 goto restore;
3275 }
3276 }
3277 /* Done with system locks and journal for now. */
3278 hfs_systemfile_unlock(hfsmp, lockflags);
3279 lockflags = 0;
3280 hfs_end_transaction(hfsmp);
3281 started_tr = 0;
3282
3283 if (retval) {
3284 /*
3285 * Check to see if failure is due to excessive fragmentation.
3286 */
3287 if ((retval == ENOSPC) &&
3288 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
3289 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
3290 }
3291 goto out;
3292 }
3293 /*
3294 * STEP 2 - clone file data into the new allocation blocks.
3295 */
3296
3297 if (vnodetype == VLNK)
3298 retval = hfs_clonelink(vp, blksize, cred, p);
3299 else if (vnode_issystem(vp))
3300 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
3301 else
3302 retval = hfs_clonefile(vp, headblks, datablks, blksize);
3303
3304 /* Start transaction for step 3 or for a restore. */
3305 if (hfs_start_transaction(hfsmp) != 0) {
3306 retval = EINVAL;
3307 goto out;
3308 }
3309 started_tr = 1;
3310 if (retval)
3311 goto restore;
3312
3313 /*
3314 * STEP 3 - switch to cloned data and remove old blocks.
3315 */
3316 lockflags = SFL_BITMAP;
3317 if (overflow_extents(fp))
3318 lockflags |= SFL_EXTENTS;
3319 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3320
3321 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
3322
3323 hfs_systemfile_unlock(hfsmp, lockflags);
3324 lockflags = 0;
3325 if (retval)
3326 goto restore;
3327 out:
3328 if (took_trunc_lock)
3329 hfs_unlock_truncate(cp, TRUE);
3330
3331 if (lockflags) {
3332 hfs_systemfile_unlock(hfsmp, lockflags);
3333 lockflags = 0;
3334 }
3335
3336 /* Push cnode's new extent data to disk. */
3337 if (retval == 0) {
3338 (void) hfs_update(vp, MNT_WAIT);
3339 }
3340 if (hfsmp->jnl) {
3341 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
3342 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3343 else
3344 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
3345 }
3346 exit:
3347 if (started_tr)
3348 hfs_end_transaction(hfsmp);
3349
3350 return (retval);
3351
3352 restore:
3353 if (fp->ff_blocks == headblks) {
3354 if (took_trunc_lock)
3355 hfs_unlock_truncate(cp, TRUE);
3356 goto exit;
3357 }
3358 /*
3359 * Give back any newly allocated space.
3360 */
3361 if (lockflags == 0) {
3362 lockflags = SFL_BITMAP;
3363 if (overflow_extents(fp))
3364 lockflags |= SFL_EXTENTS;
3365 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3366 }
3367
3368 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
3369
3370 hfs_systemfile_unlock(hfsmp, lockflags);
3371 lockflags = 0;
3372
3373 if (took_trunc_lock)
3374 hfs_unlock_truncate(cp, TRUE);
3375 goto exit;
3376 }
3377
3378
3379 /*
3380 * Clone a symlink.
3381 *
3382 */
3383 static int
3384 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
3385 {
3386 struct buf *head_bp = NULL;
3387 struct buf *tail_bp = NULL;
3388 int error;
3389
3390
3391 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
3392 if (error)
3393 goto out;
3394
3395 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
3396 if (tail_bp == NULL) {
3397 error = EIO;
3398 goto out;
3399 }
3400 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
3401 error = (int)buf_bwrite(tail_bp);
3402 out:
3403 if (head_bp) {
3404 buf_markinvalid(head_bp);
3405 buf_brelse(head_bp);
3406 }
3407 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
3408
3409 return (error);
3410 }
3411
3412 /*
3413 * Clone a file's data within the file.
3414 *
3415 */
3416 static int
3417 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
3418 {
3419 caddr_t bufp;
3420 size_t writebase;
3421 size_t bufsize;
3422 size_t copysize;
3423 size_t iosize;
3424 off_t filesize;
3425 size_t offset;
3426 uio_t auio;
3427 int error = 0;
3428
3429 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
3430 writebase = blkstart * blksize;
3431 copysize = blkcnt * blksize;
3432 iosize = bufsize = MIN(copysize, 128 * 1024);
3433 offset = 0;
3434
3435 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3436 return (ENOMEM);
3437 }
3438 hfs_unlock(VTOC(vp));
3439
3440 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
3441
3442 while (offset < copysize) {
3443 iosize = MIN(copysize - offset, iosize);
3444
3445 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
3446 uio_addiov(auio, (uintptr_t)bufp, iosize);
3447
3448 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
3449 if (error) {
3450 printf("hfs_clonefile: cluster_read failed - %d\n", error);
3451 break;
3452 }
3453 if (uio_resid(auio) != 0) {
3454 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
3455 error = EIO;
3456 break;
3457 }
3458
3459 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
3460 uio_addiov(auio, (uintptr_t)bufp, iosize);
3461
3462 error = cluster_write(vp, auio, filesize + offset,
3463 filesize + offset + iosize,
3464 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
3465 if (error) {
3466 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3467 break;
3468 }
3469 if (uio_resid(auio) != 0) {
3470 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3471 error = EIO;
3472 break;
3473 }
3474 offset += iosize;
3475 }
3476 uio_free(auio);
3477
3478 /*
3479 * No need to call ubc_sync_range or hfs_invalbuf
3480 * since the file was copied using IO_NOCACHE.
3481 */
3482
3483 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3484
3485 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
3486 return (error);
3487 }
3488
3489 /*
3490 * Clone a system (metadata) file.
3491 *
3492 */
3493 static int
3494 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3495 kauth_cred_t cred, struct proc *p)
3496 {
3497 caddr_t bufp;
3498 char * offset;
3499 size_t bufsize;
3500 size_t iosize;
3501 struct buf *bp = NULL;
3502 daddr64_t blkno;
3503 daddr64_t blk;
3504 daddr64_t start_blk;
3505 daddr64_t last_blk;
3506 int breadcnt;
3507 int i;
3508 int error = 0;
3509
3510
3511 iosize = GetLogicalBlockSize(vp);
3512 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3513 breadcnt = bufsize / iosize;
3514
3515 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3516 return (ENOMEM);
3517 }
3518 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3519 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3520 blkno = 0;
3521
3522 while (blkno < last_blk) {
3523 /*
3524 * Read up to a megabyte
3525 */
3526 offset = bufp;
3527 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3528 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3529 if (error) {
3530 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3531 goto out;
3532 }
3533 if (buf_count(bp) != iosize) {
3534 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3535 goto out;
3536 }
3537 bcopy((char *)buf_dataptr(bp), offset, iosize);
3538
3539 buf_markinvalid(bp);
3540 buf_brelse(bp);
3541 bp = NULL;
3542
3543 offset += iosize;
3544 }
3545
3546 /*
3547 * Write up to a megabyte
3548 */
3549 offset = bufp;
3550 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3551 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3552 if (bp == NULL) {
3553 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3554 error = EIO;
3555 goto out;
3556 }
3557 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3558 error = (int)buf_bwrite(bp);
3559 bp = NULL;
3560 if (error)
3561 goto out;
3562 offset += iosize;
3563 }
3564 }
3565 out:
3566 if (bp) {
3567 buf_brelse(bp);
3568 }
3569
3570 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3571
3572 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3573
3574 return (error);
3575 }