]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-1228.15.4.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
42 #include <sys/stat.h>
43 #include <sys/buf.h>
44 #include <sys/proc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
48 #include <sys/uio.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
52 #include <sys/disk.h>
53 #include <sys/sysctl.h>
54
55 #include <miscfs/specfs/specdev.h>
56
57 #include <sys/ubc.h>
58 #include <sys/ubc_internal.h>
59
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
62
63 #include <sys/kdebug.h>
64
65 #include "hfs.h"
66 #include "hfs_attrlist.h"
67 #include "hfs_endian.h"
68 #include "hfs_fsctl.h"
69 #include "hfs_quota.h"
70 #include "hfscommon/headers/FileMgrInternal.h"
71 #include "hfscommon/headers/BTreesInternal.h"
72 #include "hfs_cnode.h"
73 #include "hfs_dbg.h"
74
75 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
76
77 enum {
78 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
79 };
80
81 /* from bsd/vfs/vfs_cluster.c */
82 extern int is_file_clean(vnode_t vp, off_t filesize);
83 /* from bsd/hfs/hfs_vfsops.c */
84 extern int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
85
86 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
87 static int hfs_clonefile(struct vnode *, int, int, int);
88 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
89
90 int flush_cache_on_write = 0;
91 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
92
93
94 /*
95 * Read data from a file.
96 */
97 int
98 hfs_vnop_read(struct vnop_read_args *ap)
99 {
100 uio_t uio = ap->a_uio;
101 struct vnode *vp = ap->a_vp;
102 struct cnode *cp;
103 struct filefork *fp;
104 struct hfsmount *hfsmp;
105 off_t filesize;
106 off_t filebytes;
107 off_t start_resid = uio_resid(uio);
108 off_t offset = uio_offset(uio);
109 int retval = 0;
110
111
112 /* Preflight checks */
113 if (!vnode_isreg(vp)) {
114 /* can only read regular files */
115 if (vnode_isdir(vp))
116 return (EISDIR);
117 else
118 return (EPERM);
119 }
120 if (start_resid == 0)
121 return (0); /* Nothing left to do */
122 if (offset < 0)
123 return (EINVAL); /* cant read from a negative offset */
124
125 cp = VTOC(vp);
126 fp = VTOF(vp);
127 hfsmp = VTOHFS(vp);
128
129 /* Protect against a size change. */
130 hfs_lock_truncate(cp, 0);
131
132 filesize = fp->ff_size;
133 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
134 if (offset > filesize) {
135 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
136 (offset > (off_t)MAXHFSFILESIZE)) {
137 retval = EFBIG;
138 }
139 goto exit;
140 }
141
142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
143 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
144
145 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
146
147 cp->c_touch_acctime = TRUE;
148
149 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
150 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
151
152 /*
153 * Keep track blocks read
154 */
155 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
156 int took_cnode_lock = 0;
157 off_t bytesread;
158
159 bytesread = start_resid - uio_resid(uio);
160
161 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
162 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
163 hfs_lock(cp, HFS_FORCE_LOCK);
164 took_cnode_lock = 1;
165 }
166 /*
167 * If this file hasn't been seen since the start of
168 * the current sampling period then start over.
169 */
170 if (cp->c_atime < hfsmp->hfc_timebase) {
171 struct timeval tv;
172
173 fp->ff_bytesread = bytesread;
174 microtime(&tv);
175 cp->c_atime = tv.tv_sec;
176 } else {
177 fp->ff_bytesread += bytesread;
178 }
179 if (took_cnode_lock)
180 hfs_unlock(cp);
181 }
182 exit:
183 hfs_unlock_truncate(cp, 0);
184 return (retval);
185 }
186
187 /*
188 * Write data to a file.
189 */
190 int
191 hfs_vnop_write(struct vnop_write_args *ap)
192 {
193 uio_t uio = ap->a_uio;
194 struct vnode *vp = ap->a_vp;
195 struct cnode *cp;
196 struct filefork *fp;
197 struct hfsmount *hfsmp;
198 kauth_cred_t cred = NULL;
199 off_t origFileSize;
200 off_t writelimit;
201 off_t bytesToAdd = 0;
202 off_t actualBytesAdded;
203 off_t filebytes;
204 off_t offset;
205 size_t resid;
206 int eflags;
207 int ioflag = ap->a_ioflag;
208 int retval = 0;
209 int lockflags;
210 int cnode_locked = 0;
211 int partialwrite = 0;
212 int exclusive_lock = 0;
213
214 // LP64todo - fix this! uio_resid may be 64-bit value
215 resid = uio_resid(uio);
216 offset = uio_offset(uio);
217
218 if (ioflag & IO_APPEND) {
219 exclusive_lock = 1;
220 }
221
222 if (offset < 0)
223 return (EINVAL);
224 if (resid == 0)
225 return (E_NONE);
226 if (!vnode_isreg(vp))
227 return (EPERM); /* Can only write regular files */
228
229 cp = VTOC(vp);
230 fp = VTOF(vp);
231 hfsmp = VTOHFS(vp);
232
233 eflags = kEFDeferMask; /* defer file block allocations */
234 #ifdef HFS_SPARSE_DEV
235 /*
236 * When the underlying device is sparse and space
237 * is low (< 8MB), stop doing delayed allocations
238 * and begin doing synchronous I/O.
239 */
240 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
241 (hfs_freeblks(hfsmp, 0) < 2048)) {
242 eflags &= ~kEFDeferMask;
243 ioflag |= IO_SYNC;
244 }
245 #endif /* HFS_SPARSE_DEV */
246
247 again:
248 /* Protect against a size change. */
249 hfs_lock_truncate(cp, exclusive_lock);
250
251 if (ioflag & IO_APPEND) {
252 uio_setoffset(uio, fp->ff_size);
253 offset = fp->ff_size;
254 }
255 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
256 retval = EPERM;
257 goto exit;
258 }
259
260 origFileSize = fp->ff_size;
261 writelimit = offset + resid;
262 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
263
264 /* If the truncate lock is shared, and if we either have virtual
265 * blocks or will need to extend the file, upgrade the truncate
266 * to exclusive lock. If upgrade fails, we lose the lock and
267 * have to get exclusive lock again
268 */
269 if ((exclusive_lock == 0) &&
270 ((fp->ff_unallocblocks != 0) || (writelimit > filebytes))) {
271 exclusive_lock = 1;
272 /* Lock upgrade failed and we lost our shared lock, try again */
273 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
274 goto again;
275 }
276 }
277
278 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
279 goto exit;
280 }
281 cnode_locked = 1;
282
283 if (!exclusive_lock) {
284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
285 (int)offset, uio_resid(uio), (int)fp->ff_size,
286 (int)filebytes, 0);
287 }
288
289 /* Check if we do not need to extend the file */
290 if (writelimit <= filebytes) {
291 goto sizeok;
292 }
293
294 cred = vfs_context_ucred(ap->a_context);
295 bytesToAdd = writelimit - filebytes;
296
297 #if QUOTA
298 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
299 cred, 0);
300 if (retval)
301 goto exit;
302 #endif /* QUOTA */
303
304 if (hfs_start_transaction(hfsmp) != 0) {
305 retval = EINVAL;
306 goto exit;
307 }
308
309 while (writelimit > filebytes) {
310 bytesToAdd = writelimit - filebytes;
311 if (cred && suser(cred, NULL) != 0)
312 eflags |= kEFReserveMask;
313
314 /* Protect extents b-tree and allocation bitmap */
315 lockflags = SFL_BITMAP;
316 if (overflow_extents(fp))
317 lockflags |= SFL_EXTENTS;
318 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
319
320 /* Files that are changing size are not hot file candidates. */
321 if (hfsmp->hfc_stage == HFC_RECORDING) {
322 fp->ff_bytesread = 0;
323 }
324 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
325 0, eflags, &actualBytesAdded));
326
327 hfs_systemfile_unlock(hfsmp, lockflags);
328
329 if ((actualBytesAdded == 0) && (retval == E_NONE))
330 retval = ENOSPC;
331 if (retval != E_NONE)
332 break;
333 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
334 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
335 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
336 }
337 (void) hfs_update(vp, TRUE);
338 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
339 (void) hfs_end_transaction(hfsmp);
340
341 /*
342 * If we didn't grow the file enough try a partial write.
343 * POSIX expects this behavior.
344 */
345 if ((retval == ENOSPC) && (filebytes > offset)) {
346 retval = 0;
347 partialwrite = 1;
348 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
349 resid -= bytesToAdd;
350 writelimit = filebytes;
351 }
352 sizeok:
353 if (retval == E_NONE) {
354 off_t filesize;
355 off_t zero_off;
356 off_t tail_off;
357 off_t inval_start;
358 off_t inval_end;
359 off_t io_start;
360 int lflag;
361 struct rl_entry *invalid_range;
362
363 if (writelimit > fp->ff_size)
364 filesize = writelimit;
365 else
366 filesize = fp->ff_size;
367
368 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
369
370 if (offset <= fp->ff_size) {
371 zero_off = offset & ~PAGE_MASK_64;
372
373 /* Check to see whether the area between the zero_offset and the start
374 of the transfer to see whether is invalid and should be zero-filled
375 as part of the transfer:
376 */
377 if (offset > zero_off) {
378 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
379 lflag |= IO_HEADZEROFILL;
380 }
381 } else {
382 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
383
384 /* The bytes between fp->ff_size and uio->uio_offset must never be
385 read without being zeroed. The current last block is filled with zeroes
386 if it holds valid data but in all cases merely do a little bookkeeping
387 to track the area from the end of the current last page to the start of
388 the area actually written. For the same reason only the bytes up to the
389 start of the page where this write will start is invalidated; any remainder
390 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
391
392 Note that inval_start, the start of the page after the current EOF,
393 may be past the start of the write, in which case the zeroing
394 will be handled by the cluser_write of the actual data.
395 */
396 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
397 inval_end = offset & ~PAGE_MASK_64;
398 zero_off = fp->ff_size;
399
400 if ((fp->ff_size & PAGE_MASK_64) &&
401 (rl_scan(&fp->ff_invalidranges,
402 eof_page_base,
403 fp->ff_size - 1,
404 &invalid_range) != RL_NOOVERLAP)) {
405 /* The page containing the EOF is not valid, so the
406 entire page must be made inaccessible now. If the write
407 starts on a page beyond the page containing the eof
408 (inval_end > eof_page_base), add the
409 whole page to the range to be invalidated. Otherwise
410 (i.e. if the write starts on the same page), zero-fill
411 the entire page explicitly now:
412 */
413 if (inval_end > eof_page_base) {
414 inval_start = eof_page_base;
415 } else {
416 zero_off = eof_page_base;
417 };
418 };
419
420 if (inval_start < inval_end) {
421 struct timeval tv;
422 /* There's some range of data that's going to be marked invalid */
423
424 if (zero_off < inval_start) {
425 /* The pages between inval_start and inval_end are going to be invalidated,
426 and the actual write will start on a page past inval_end. Now's the last
427 chance to zero-fill the page containing the EOF:
428 */
429 hfs_unlock(cp);
430 cnode_locked = 0;
431 retval = cluster_write(vp, (uio_t) 0,
432 fp->ff_size, inval_start,
433 zero_off, (off_t)0,
434 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
435 hfs_lock(cp, HFS_FORCE_LOCK);
436 cnode_locked = 1;
437 if (retval) goto ioerr_exit;
438 offset = uio_offset(uio);
439 };
440
441 /* Mark the remaining area of the newly allocated space as invalid: */
442 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
443 microuptime(&tv);
444 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
445 zero_off = fp->ff_size = inval_end;
446 };
447
448 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
449 };
450
451 /* Check to see whether the area between the end of the write and the end of
452 the page it falls in is invalid and should be zero-filled as part of the transfer:
453 */
454 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
455 if (tail_off > filesize) tail_off = filesize;
456 if (tail_off > writelimit) {
457 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
458 lflag |= IO_TAILZEROFILL;
459 };
460 };
461
462 /*
463 * if the write starts beyond the current EOF (possibly advanced in the
464 * zeroing of the last block, above), then we'll zero fill from the current EOF
465 * to where the write begins:
466 *
467 * NOTE: If (and ONLY if) the portion of the file about to be written is
468 * before the current EOF it might be marked as invalid now and must be
469 * made readable (removed from the invalid ranges) before cluster_write
470 * tries to write it:
471 */
472 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
473 if (io_start < fp->ff_size) {
474 off_t io_end;
475
476 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
477 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
478 };
479
480 hfs_unlock(cp);
481 cnode_locked = 0;
482
483 /*
484 * We need to tell UBC the fork's new size BEFORE calling
485 * cluster_write, in case any of the new pages need to be
486 * paged out before cluster_write completes (which does happen
487 * in embedded systems due to extreme memory pressure).
488 * Similarly, we need to tell hfs_vnop_pageout what the new EOF
489 * will be, so that it can pass that on to cluster_pageout, and
490 * allow those pageouts.
491 *
492 * We don't update ff_size yet since we don't want pageins to
493 * be able to see uninitialized data between the old and new
494 * EOF, until cluster_write has completed and initialized that
495 * part of the file.
496 *
497 * The vnode pager relies on the file size last given to UBC via
498 * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
499 * ff_size (whichever is larger). NOTE: ff_new_size is always
500 * zero, unless we are extending the file via write.
501 */
502 if (filesize > fp->ff_size) {
503 fp->ff_new_size = filesize;
504 ubc_setsize(vp, filesize);
505 }
506 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
507 tail_off, lflag | IO_NOZERODIRTY);
508 if (retval) {
509 fp->ff_new_size = 0; /* no longer extending; use ff_size */
510 if (filesize > origFileSize) {
511 ubc_setsize(vp, origFileSize);
512 }
513 goto ioerr_exit;
514 }
515
516 if (filesize > origFileSize) {
517 fp->ff_size = filesize;
518
519 /* Files that are changing size are not hot file candidates. */
520 if (hfsmp->hfc_stage == HFC_RECORDING) {
521 fp->ff_bytesread = 0;
522 }
523 }
524 fp->ff_new_size = 0; /* ff_size now has the correct size */
525
526 /* If we wrote some bytes, then touch the change and mod times */
527 if (resid > uio_resid(uio)) {
528 cp->c_touch_chgtime = TRUE;
529 cp->c_touch_modtime = TRUE;
530 }
531 }
532 if (partialwrite) {
533 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
534 resid += bytesToAdd;
535 }
536
537 // XXXdbg - see radar 4871353 for more info
538 {
539 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
540 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
541 }
542 }
543 HFS_KNOTE(vp, NOTE_WRITE);
544
545 ioerr_exit:
546 /*
547 * If we successfully wrote any data, and we are not the superuser
548 * we clear the setuid and setgid bits as a precaution against
549 * tampering.
550 */
551 if (cp->c_mode & (S_ISUID | S_ISGID)) {
552 cred = vfs_context_ucred(ap->a_context);
553 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
554 if (!cnode_locked) {
555 hfs_lock(cp, HFS_FORCE_LOCK);
556 cnode_locked = 1;
557 }
558 cp->c_mode &= ~(S_ISUID | S_ISGID);
559 }
560 }
561 if (retval) {
562 if (ioflag & IO_UNIT) {
563 if (!cnode_locked) {
564 hfs_lock(cp, HFS_FORCE_LOCK);
565 cnode_locked = 1;
566 }
567 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
568 0, ap->a_context);
569 // LP64todo - fix this! resid needs to by user_ssize_t
570 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
571 uio_setresid(uio, resid);
572 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
573 }
574 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
575 if (!cnode_locked) {
576 hfs_lock(cp, HFS_FORCE_LOCK);
577 cnode_locked = 1;
578 }
579 retval = hfs_update(vp, TRUE);
580 }
581 /* Updating vcbWrCnt doesn't need to be atomic. */
582 hfsmp->vcbWrCnt++;
583
584 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
585 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
586 exit:
587 if (cnode_locked)
588 hfs_unlock(cp);
589 hfs_unlock_truncate(cp, exclusive_lock);
590 return (retval);
591 }
592
593 /* support for the "bulk-access" fcntl */
594
595 #define CACHE_LEVELS 16
596 #define NUM_CACHE_ENTRIES (64*16)
597 #define PARENT_IDS_FLAG 0x100
598
599 struct access_cache {
600 int numcached;
601 int cachehits; /* these two for statistics gathering */
602 int lookups;
603 unsigned int *acache;
604 unsigned char *haveaccess;
605 };
606
607 struct access_t {
608 uid_t uid; /* IN: effective user id */
609 short flags; /* IN: access requested (i.e. R_OK) */
610 short num_groups; /* IN: number of groups user belongs to */
611 int num_files; /* IN: number of files to process */
612 int *file_ids; /* IN: array of file ids */
613 gid_t *groups; /* IN: array of groups */
614 short *access; /* OUT: access info for each file (0 for 'has access') */
615 };
616
617 struct user_access_t {
618 uid_t uid; /* IN: effective user id */
619 short flags; /* IN: access requested (i.e. R_OK) */
620 short num_groups; /* IN: number of groups user belongs to */
621 int num_files; /* IN: number of files to process */
622 user_addr_t file_ids; /* IN: array of file ids */
623 user_addr_t groups; /* IN: array of groups */
624 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
625 };
626
627
628 // these are the "extended" versions of the above structures
629 // note that it is crucial that they be different sized than
630 // the regular version
631 struct ext_access_t {
632 uint32_t flags; /* IN: access requested (i.e. R_OK) */
633 uint32_t num_files; /* IN: number of files to process */
634 uint32_t map_size; /* IN: size of the bit map */
635 uint32_t *file_ids; /* IN: Array of file ids */
636 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
637 short *access; /* OUT: access info for each file (0 for 'has access') */
638 uint32_t num_parents; /* future use */
639 cnid_t *parents; /* future use */
640 };
641
642 struct ext_user_access_t {
643 uint32_t flags; /* IN: access requested (i.e. R_OK) */
644 uint32_t num_files; /* IN: number of files to process */
645 uint32_t map_size; /* IN: size of the bit map */
646 user_addr_t file_ids; /* IN: array of file ids */
647 user_addr_t bitmap; /* IN: array of groups */
648 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
649 uint32_t num_parents;/* future use */
650 user_addr_t parents;/* future use */
651 };
652
653
654 /*
655 * Perform a binary search for the given parent_id. Return value is
656 * the index if there is a match. If no_match_indexp is non-NULL it
657 * will be assigned with the index to insert the item (even if it was
658 * not found).
659 */
660 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
661 {
662 int index=-1;
663 unsigned int lo=0;
664
665 do {
666 unsigned int mid = ((hi - lo)/2) + lo;
667 unsigned int this_id = array[mid];
668
669 if (parent_id == this_id) {
670 hi = mid;
671 break;
672 }
673
674 if (parent_id < this_id) {
675 hi = mid;
676 continue;
677 }
678
679 if (parent_id > this_id) {
680 lo = mid + 1;
681 continue;
682 }
683 } while(lo < hi);
684
685 /* check if lo and hi converged on the match */
686 if (parent_id == array[hi]) {
687 index = hi;
688 }
689
690 if (no_match_indexp) {
691 *no_match_indexp = hi;
692 }
693
694 return index;
695 }
696
697
698 static int
699 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
700 {
701 unsigned int hi;
702 int matches = 0;
703 int index, no_match_index;
704
705 if (cache->numcached == 0) {
706 *indexp = 0;
707 return 0; // table is empty, so insert at index=0 and report no match
708 }
709
710 if (cache->numcached > NUM_CACHE_ENTRIES) {
711 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
712 cache->numcached, NUM_CACHE_ENTRIES);*/
713 cache->numcached = NUM_CACHE_ENTRIES;
714 }
715
716 hi = cache->numcached - 1;
717
718 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
719
720 /* if no existing entry found, find index for new one */
721 if (index == -1) {
722 index = no_match_index;
723 matches = 0;
724 } else {
725 matches = 1;
726 }
727
728 *indexp = index;
729 return matches;
730 }
731
732 /*
733 * Add a node to the access_cache at the given index (or do a lookup first
734 * to find the index if -1 is passed in). We currently do a replace rather
735 * than an insert if the cache is full.
736 */
737 static void
738 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
739 {
740 int lookup_index = -1;
741
742 /* need to do a lookup first if -1 passed for index */
743 if (index == -1) {
744 if (lookup_bucket(cache, &lookup_index, nodeID)) {
745 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
746 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
747 cache->haveaccess[lookup_index] = access;
748 }
749
750 /* mission accomplished */
751 return;
752 } else {
753 index = lookup_index;
754 }
755
756 }
757
758 /* if the cache is full, do a replace rather than an insert */
759 if (cache->numcached >= NUM_CACHE_ENTRIES) {
760 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
761 cache->numcached = NUM_CACHE_ENTRIES-1;
762
763 if (index > cache->numcached) {
764 // printf("index %d pinned to %d\n", index, cache->numcached);
765 index = cache->numcached;
766 }
767 }
768
769 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
770 index++;
771 }
772
773 if (index >= 0 && index < cache->numcached) {
774 /* only do bcopy if we're inserting */
775 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
776 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
777 }
778
779 cache->acache[index] = nodeID;
780 cache->haveaccess[index] = access;
781 cache->numcached++;
782 }
783
784
785 struct cinfo {
786 uid_t uid;
787 gid_t gid;
788 mode_t mode;
789 cnid_t parentcnid;
790 u_int16_t recflags;
791 };
792
793 static int
794 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
795 {
796 struct cinfo *cip = (struct cinfo *)arg;
797
798 cip->uid = attrp->ca_uid;
799 cip->gid = attrp->ca_gid;
800 cip->mode = attrp->ca_mode;
801 cip->parentcnid = descp->cd_parentcnid;
802 cip->recflags = attrp->ca_recflags;
803
804 return (0);
805 }
806
807 /*
808 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
809 * isn't incore, then go to the catalog.
810 */
811 static int
812 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
813 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
814 {
815 int error = 0;
816
817 /* if this id matches the one the fsctl was called with, skip the lookup */
818 if (cnid == skip_cp->c_cnid) {
819 cnattrp->ca_uid = skip_cp->c_uid;
820 cnattrp->ca_gid = skip_cp->c_gid;
821 cnattrp->ca_mode = skip_cp->c_mode;
822 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
823 } else {
824 struct cinfo c_info;
825
826 /* otherwise, check the cnode hash incase the file/dir is incore */
827 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
828 cnattrp->ca_uid = c_info.uid;
829 cnattrp->ca_gid = c_info.gid;
830 cnattrp->ca_mode = c_info.mode;
831 cnattrp->ca_recflags = c_info.recflags;
832 keyp->hfsPlus.parentID = c_info.parentcnid;
833 } else {
834 int lockflags;
835
836 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
837
838 /* lookup this cnid in the catalog */
839 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
840
841 hfs_systemfile_unlock(hfsmp, lockflags);
842
843 cache->lookups++;
844 }
845 }
846
847 return (error);
848 }
849
850
851 /*
852 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
853 * up to CACHE_LEVELS as we progress towards the root.
854 */
855 static int
856 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
857 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev,
858 struct vfs_context *my_context,
859 char *bitmap,
860 uint32_t map_size,
861 cnid_t* parents,
862 uint32_t num_parents)
863 {
864 int myErr = 0;
865 int myResult;
866 HFSCatalogNodeID thisNodeID;
867 unsigned int myPerms;
868 struct cat_attr cnattr;
869 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
870 CatalogKey catkey;
871
872 int i = 0, ids_to_cache = 0;
873 int parent_ids[CACHE_LEVELS];
874
875 thisNodeID = nodeID;
876 while (thisNodeID >= kRootDirID) {
877 myResult = 0; /* default to "no access" */
878
879 /* check the cache before resorting to hitting the catalog */
880
881 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
882 * to look any further after hitting cached dir */
883
884 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
885 cache->cachehits++;
886 myErr = cache->haveaccess[cache_index];
887 if (scope_index != -1) {
888 if (myErr == ESRCH) {
889 myErr = 0;
890 }
891 } else {
892 scope_index = 0; // so we'll just use the cache result
893 scope_idx_start = ids_to_cache;
894 }
895 myResult = (myErr == 0) ? 1 : 0;
896 goto ExitThisRoutine;
897 }
898
899
900 if (parents) {
901 int tmp;
902 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
903 if (scope_index == -1)
904 scope_index = tmp;
905 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
906 scope_idx_start = ids_to_cache;
907 }
908 }
909
910 /* remember which parents we want to cache */
911 if (ids_to_cache < CACHE_LEVELS) {
912 parent_ids[ids_to_cache] = thisNodeID;
913 ids_to_cache++;
914 }
915 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
916 if (bitmap && map_size) {
917 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
918 }
919
920
921 /* do the lookup (checks the cnode hash, then the catalog) */
922 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr);
923 if (myErr) {
924 goto ExitThisRoutine; /* no access */
925 }
926
927 /* Root always gets access. */
928 if (suser(myp_ucred, NULL) == 0) {
929 thisNodeID = catkey.hfsPlus.parentID;
930 myResult = 1;
931 continue;
932 }
933
934 // if the thing has acl's, do the full permission check
935 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
936 struct vnode *vp;
937
938 /* get the vnode for this cnid */
939 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
940 if ( myErr ) {
941 myResult = 0;
942 goto ExitThisRoutine;
943 }
944
945 thisNodeID = VTOC(vp)->c_parentcnid;
946
947 hfs_unlock(VTOC(vp));
948
949 if (vnode_vtype(vp) == VDIR) {
950 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
951 } else {
952 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
953 }
954
955 vnode_put(vp);
956 if (myErr) {
957 myResult = 0;
958 goto ExitThisRoutine;
959 }
960 } else {
961 unsigned int flags;
962
963 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
964 cnattr.ca_mode, hfsmp->hfs_mp,
965 myp_ucred, theProcPtr);
966
967 if (cnattr.ca_mode & S_IFDIR) {
968 flags = R_OK | X_OK;
969 } else {
970 flags = R_OK;
971 }
972 if ( (myPerms & flags) != flags) {
973 myResult = 0;
974 myErr = EACCES;
975 goto ExitThisRoutine; /* no access */
976 }
977
978 /* up the hierarchy we go */
979 thisNodeID = catkey.hfsPlus.parentID;
980 }
981 }
982
983 /* if here, we have access to this node */
984 myResult = 1;
985
986 ExitThisRoutine:
987 if (parents && myErr == 0 && scope_index == -1) {
988 myErr = ESRCH;
989 }
990
991 if (myErr) {
992 myResult = 0;
993 }
994 *err = myErr;
995
996 /* cache the parent directory(ies) */
997 for (i = 0; i < ids_to_cache; i++) {
998 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
999 add_node(cache, -1, parent_ids[i], ESRCH);
1000 } else {
1001 add_node(cache, -1, parent_ids[i], myErr);
1002 }
1003 }
1004
1005 return (myResult);
1006 }
1007
1008 static int
1009 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
1010 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
1011 {
1012 boolean_t is64bit;
1013
1014 /*
1015 * NOTE: on entry, the vnode is locked. Incase this vnode
1016 * happens to be in our list of file_ids, we'll note it
1017 * avoid calling hfs_chashget_nowait() on that id as that
1018 * will cause a "locking against myself" panic.
1019 */
1020 Boolean check_leaf = true;
1021
1022 struct ext_user_access_t *user_access_structp;
1023 struct ext_user_access_t tmp_user_access;
1024 struct access_cache cache;
1025
1026 int error = 0;
1027 unsigned int i;
1028
1029 dev_t dev = VTOC(vp)->c_dev;
1030
1031 short flags;
1032 unsigned int num_files = 0;
1033 int map_size = 0;
1034 int num_parents = 0;
1035 int *file_ids=NULL;
1036 short *access=NULL;
1037 char *bitmap=NULL;
1038 cnid_t *parents=NULL;
1039 int leaf_index;
1040
1041 cnid_t cnid;
1042 cnid_t prevParent_cnid = 0;
1043 unsigned int myPerms;
1044 short myaccess = 0;
1045 struct cat_attr cnattr;
1046 CatalogKey catkey;
1047 struct cnode *skip_cp = VTOC(vp);
1048 kauth_cred_t cred = vfs_context_ucred(context);
1049 proc_t p = vfs_context_proc(context);
1050
1051 is64bit = proc_is64bit(p);
1052
1053 /* initialize the local cache and buffers */
1054 cache.numcached = 0;
1055 cache.cachehits = 0;
1056 cache.lookups = 0;
1057 cache.acache = NULL;
1058 cache.haveaccess = NULL;
1059
1060 /* struct copyin done during dispatch... need to copy file_id array separately */
1061 if (ap->a_data == NULL) {
1062 error = EINVAL;
1063 goto err_exit_bulk_access;
1064 }
1065
1066 if (is64bit) {
1067 if (arg_size != sizeof(struct ext_user_access_t)) {
1068 error = EINVAL;
1069 goto err_exit_bulk_access;
1070 }
1071
1072 user_access_structp = (struct ext_user_access_t *)ap->a_data;
1073
1074 } else if (arg_size == sizeof(struct access_t)) {
1075 struct access_t *accessp = (struct access_t *)ap->a_data;
1076
1077 // convert an old style bulk-access struct to the new style
1078 tmp_user_access.flags = accessp->flags;
1079 tmp_user_access.num_files = accessp->num_files;
1080 tmp_user_access.map_size = 0;
1081 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1082 tmp_user_access.bitmap = USER_ADDR_NULL;
1083 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1084 tmp_user_access.num_parents = 0;
1085 user_access_structp = &tmp_user_access;
1086
1087 } else if (arg_size == sizeof(struct ext_access_t)) {
1088 struct ext_access_t *accessp = (struct ext_access_t *)ap->a_data;
1089
1090 // up-cast from a 32-bit version of the struct
1091 tmp_user_access.flags = accessp->flags;
1092 tmp_user_access.num_files = accessp->num_files;
1093 tmp_user_access.map_size = accessp->map_size;
1094 tmp_user_access.num_parents = accessp->num_parents;
1095
1096 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1097 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1098 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1099 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1100
1101 user_access_structp = &tmp_user_access;
1102 } else {
1103 error = EINVAL;
1104 goto err_exit_bulk_access;
1105 }
1106
1107 map_size = user_access_structp->map_size;
1108
1109 num_files = user_access_structp->num_files;
1110
1111 num_parents= user_access_structp->num_parents;
1112
1113 if (num_files < 1) {
1114 goto err_exit_bulk_access;
1115 }
1116 if (num_files > 1024) {
1117 error = EINVAL;
1118 goto err_exit_bulk_access;
1119 }
1120
1121 if (num_parents > 1024) {
1122 error = EINVAL;
1123 goto err_exit_bulk_access;
1124 }
1125
1126 file_ids = (int *) kalloc(sizeof(int) * num_files);
1127 access = (short *) kalloc(sizeof(short) * num_files);
1128 if (map_size) {
1129 bitmap = (char *) kalloc(sizeof(char) * map_size);
1130 }
1131
1132 if (num_parents) {
1133 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1134 }
1135
1136 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1137 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1138
1139 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1140 if (file_ids) {
1141 kfree(file_ids, sizeof(int) * num_files);
1142 }
1143 if (bitmap) {
1144 kfree(bitmap, sizeof(char) * map_size);
1145 }
1146 if (access) {
1147 kfree(access, sizeof(short) * num_files);
1148 }
1149 if (cache.acache) {
1150 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1151 }
1152 if (cache.haveaccess) {
1153 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1154 }
1155 if (parents) {
1156 kfree(parents, sizeof(cnid_t) * num_parents);
1157 }
1158 return ENOMEM;
1159 }
1160
1161 // make sure the bitmap is zero'ed out...
1162 if (bitmap) {
1163 bzero(bitmap, (sizeof(char) * map_size));
1164 }
1165
1166 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1167 num_files * sizeof(int)))) {
1168 goto err_exit_bulk_access;
1169 }
1170
1171 if (num_parents) {
1172 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1173 num_parents * sizeof(cnid_t)))) {
1174 goto err_exit_bulk_access;
1175 }
1176 }
1177
1178 flags = user_access_structp->flags;
1179 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1180 flags = R_OK;
1181 }
1182
1183 /* check if we've been passed leaf node ids or parent ids */
1184 if (flags & PARENT_IDS_FLAG) {
1185 check_leaf = false;
1186 }
1187
1188 /* Check access to each file_id passed in */
1189 for (i = 0; i < num_files; i++) {
1190 leaf_index=-1;
1191 cnid = (cnid_t) file_ids[i];
1192
1193 /* root always has access */
1194 if ((!parents) && (!suser(cred, NULL))) {
1195 access[i] = 0;
1196 continue;
1197 }
1198
1199 if (check_leaf) {
1200 /* do the lookup (checks the cnode hash, then the catalog) */
1201 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr);
1202 if (error) {
1203 access[i] = (short) error;
1204 continue;
1205 }
1206
1207 if (parents) {
1208 // Check if the leaf matches one of the parent scopes
1209 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1210 }
1211
1212 // if the thing has acl's, do the full permission check
1213 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1214 struct vnode *cvp;
1215 int myErr = 0;
1216 /* get the vnode for this cnid */
1217 myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
1218 if ( myErr ) {
1219 access[i] = myErr;
1220 continue;
1221 }
1222
1223 hfs_unlock(VTOC(cvp));
1224
1225 if (vnode_vtype(cvp) == VDIR) {
1226 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1227 } else {
1228 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1229 }
1230
1231 vnode_put(cvp);
1232 if (myErr) {
1233 access[i] = myErr;
1234 continue;
1235 }
1236 } else {
1237 /* before calling CheckAccess(), check the target file for read access */
1238 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1239 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1240
1241 /* fail fast if no access */
1242 if ((myPerms & flags) == 0) {
1243 access[i] = EACCES;
1244 continue;
1245 }
1246 }
1247 } else {
1248 /* we were passed an array of parent ids */
1249 catkey.hfsPlus.parentID = cnid;
1250 }
1251
1252 /* if the last guy had the same parent and had access, we're done */
1253 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1254 cache.cachehits++;
1255 access[i] = 0;
1256 continue;
1257 }
1258
1259 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1260 skip_cp, p, cred, dev, context,bitmap, map_size, parents, num_parents);
1261
1262 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1263 access[i] = 0; // have access.. no errors to report
1264 } else {
1265 access[i] = (error != 0 ? (short) error : EACCES);
1266 }
1267
1268 prevParent_cnid = catkey.hfsPlus.parentID;
1269 }
1270
1271 /* copyout the access array */
1272 if ((error = copyout((caddr_t)access, user_access_structp->access,
1273 num_files * sizeof (short)))) {
1274 goto err_exit_bulk_access;
1275 }
1276 if (map_size && bitmap) {
1277 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1278 map_size * sizeof (char)))) {
1279 goto err_exit_bulk_access;
1280 }
1281 }
1282
1283
1284 err_exit_bulk_access:
1285
1286 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1287
1288 if (file_ids)
1289 kfree(file_ids, sizeof(int) * num_files);
1290 if (parents)
1291 kfree(parents, sizeof(cnid_t) * num_parents);
1292 if (bitmap)
1293 kfree(bitmap, sizeof(char) * map_size);
1294 if (access)
1295 kfree(access, sizeof(short) * num_files);
1296 if (cache.acache)
1297 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1298 if (cache.haveaccess)
1299 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1300
1301 return (error);
1302 }
1303
1304
1305 /* end "bulk-access" support */
1306
1307
1308 /*
1309 * Callback for use with freeze ioctl.
1310 */
1311 static int
1312 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
1313 {
1314 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1315
1316 return 0;
1317 }
1318
1319 /*
1320 * Control filesystem operating characteristics.
1321 */
1322 int
1323 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1324 vnode_t a_vp;
1325 int a_command;
1326 caddr_t a_data;
1327 int a_fflag;
1328 vfs_context_t a_context;
1329 } */ *ap)
1330 {
1331 struct vnode * vp = ap->a_vp;
1332 struct hfsmount *hfsmp = VTOHFS(vp);
1333 vfs_context_t context = ap->a_context;
1334 kauth_cred_t cred = vfs_context_ucred(context);
1335 proc_t p = vfs_context_proc(context);
1336 struct vfsstatfs *vfsp;
1337 boolean_t is64bit;
1338
1339 is64bit = proc_is64bit(p);
1340
1341 switch (ap->a_command) {
1342
1343 case HFS_GETPATH:
1344 {
1345 struct vnode *file_vp;
1346 cnid_t cnid;
1347 int outlen;
1348 char *bufptr;
1349 int error;
1350
1351 /* Caller must be owner of file system. */
1352 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1353 if (suser(cred, NULL) &&
1354 kauth_cred_getuid(cred) != vfsp->f_owner) {
1355 return (EACCES);
1356 }
1357 /* Target vnode must be file system's root. */
1358 if (!vnode_isvroot(vp)) {
1359 return (EINVAL);
1360 }
1361 bufptr = (char *)ap->a_data;
1362 cnid = strtoul(bufptr, NULL, 10);
1363
1364 /* We need to call hfs_vfs_vget to leverage the code that will fix the
1365 * origin list for us if needed, as opposed to calling hfs_vget, since
1366 * we will need it for the subsequent build_path call.
1367 */
1368 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1369 return (error);
1370 }
1371 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1372 vnode_put(file_vp);
1373
1374 return (error);
1375 }
1376
1377 case HFS_PREV_LINK:
1378 case HFS_NEXT_LINK:
1379 {
1380 cnid_t linkfileid;
1381 cnid_t nextlinkid;
1382 cnid_t prevlinkid;
1383 int error;
1384
1385 /* Caller must be owner of file system. */
1386 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1387 if (suser(cred, NULL) &&
1388 kauth_cred_getuid(cred) != vfsp->f_owner) {
1389 return (EACCES);
1390 }
1391 /* Target vnode must be file system's root. */
1392 if (!vnode_isvroot(vp)) {
1393 return (EINVAL);
1394 }
1395 linkfileid = *(cnid_t *)ap->a_data;
1396 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1397 return (EINVAL);
1398 }
1399 if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1400 return (error);
1401 }
1402 if (ap->a_command == HFS_NEXT_LINK) {
1403 *(cnid_t *)ap->a_data = nextlinkid;
1404 } else {
1405 *(cnid_t *)ap->a_data = prevlinkid;
1406 }
1407 return (0);
1408 }
1409
1410 case HFS_RESIZE_PROGRESS: {
1411
1412 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1413 if (suser(cred, NULL) &&
1414 kauth_cred_getuid(cred) != vfsp->f_owner) {
1415 return (EACCES); /* must be owner of file system */
1416 }
1417 if (!vnode_isvroot(vp)) {
1418 return (EINVAL);
1419 }
1420 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1421 }
1422
1423 case HFS_RESIZE_VOLUME: {
1424 u_int64_t newsize;
1425 u_int64_t cursize;
1426
1427 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1428 if (suser(cred, NULL) &&
1429 kauth_cred_getuid(cred) != vfsp->f_owner) {
1430 return (EACCES); /* must be owner of file system */
1431 }
1432 if (!vnode_isvroot(vp)) {
1433 return (EINVAL);
1434 }
1435 newsize = *(u_int64_t *)ap->a_data;
1436 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1437
1438 if (newsize > cursize) {
1439 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1440 } else if (newsize < cursize) {
1441 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1442 } else {
1443 return (0);
1444 }
1445 }
1446 case HFS_CHANGE_NEXT_ALLOCATION: {
1447 int error = 0; /* Assume success */
1448 u_int32_t location;
1449
1450 if (vnode_vfsisrdonly(vp)) {
1451 return (EROFS);
1452 }
1453 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1454 if (suser(cred, NULL) &&
1455 kauth_cred_getuid(cred) != vfsp->f_owner) {
1456 return (EACCES); /* must be owner of file system */
1457 }
1458 if (!vnode_isvroot(vp)) {
1459 return (EINVAL);
1460 }
1461 HFS_MOUNT_LOCK(hfsmp, TRUE);
1462 location = *(u_int32_t *)ap->a_data;
1463 if ((location >= hfsmp->allocLimit) &&
1464 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1465 error = EINVAL;
1466 goto fail_change_next_allocation;
1467 }
1468 /* Return previous value. */
1469 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1470 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1471 /* On magic value for location, set nextAllocation to next block
1472 * after metadata zone and set flag in mount structure to indicate
1473 * that nextAllocation should not be updated again.
1474 */
1475 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1476 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1477 } else {
1478 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1479 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1480 }
1481 MarkVCBDirty(hfsmp);
1482 fail_change_next_allocation:
1483 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1484 return (error);
1485 }
1486
1487 #ifdef HFS_SPARSE_DEV
1488 case HFS_SETBACKINGSTOREINFO: {
1489 struct vnode * bsfs_rootvp;
1490 struct vnode * di_vp;
1491 struct hfs_backingstoreinfo *bsdata;
1492 int error = 0;
1493
1494 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1495 return (EALREADY);
1496 }
1497 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1498 if (suser(cred, NULL) &&
1499 kauth_cred_getuid(cred) != vfsp->f_owner) {
1500 return (EACCES); /* must be owner of file system */
1501 }
1502 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1503 if (bsdata == NULL) {
1504 return (EINVAL);
1505 }
1506 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1507 return (error);
1508 }
1509 if ((error = vnode_getwithref(di_vp))) {
1510 file_drop(bsdata->backingfd);
1511 return(error);
1512 }
1513
1514 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1515 (void)vnode_put(di_vp);
1516 file_drop(bsdata->backingfd);
1517 return (EINVAL);
1518 }
1519
1520 /*
1521 * Obtain the backing fs root vnode and keep a reference
1522 * on it. This reference will be dropped in hfs_unmount.
1523 */
1524 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
1525 if (error) {
1526 (void)vnode_put(di_vp);
1527 file_drop(bsdata->backingfd);
1528 return (error);
1529 }
1530 vnode_ref(bsfs_rootvp);
1531 vnode_put(bsfs_rootvp);
1532
1533 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1534 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1535 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1536 hfsmp->hfs_sparsebandblks *= 4;
1537
1538 vfs_markdependency(hfsmp->hfs_mp);
1539
1540 (void)vnode_put(di_vp);
1541 file_drop(bsdata->backingfd);
1542 return (0);
1543 }
1544 case HFS_CLRBACKINGSTOREINFO: {
1545 struct vnode * tmpvp;
1546
1547 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1548 if (suser(cred, NULL) &&
1549 kauth_cred_getuid(cred) != vfsp->f_owner) {
1550 return (EACCES); /* must be owner of file system */
1551 }
1552 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1553 hfsmp->hfs_backingfs_rootvp) {
1554
1555 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1556 tmpvp = hfsmp->hfs_backingfs_rootvp;
1557 hfsmp->hfs_backingfs_rootvp = NULLVP;
1558 hfsmp->hfs_sparsebandblks = 0;
1559 vnode_rele(tmpvp);
1560 }
1561 return (0);
1562 }
1563 #endif /* HFS_SPARSE_DEV */
1564
1565 case F_FREEZE_FS: {
1566 struct mount *mp;
1567
1568 if (!is_suser())
1569 return (EACCES);
1570
1571 mp = vnode_mount(vp);
1572 hfsmp = VFSTOHFS(mp);
1573
1574 if (!(hfsmp->jnl))
1575 return (ENOTSUP);
1576
1577 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1578
1579 // flush things before we get started to try and prevent
1580 // dirty data from being paged out while we're frozen.
1581 // note: can't do this after taking the lock as it will
1582 // deadlock against ourselves.
1583 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1584 hfs_global_exclusive_lock_acquire(hfsmp);
1585 journal_flush(hfsmp->jnl);
1586
1587 // don't need to iterate on all vnodes, we just need to
1588 // wait for writes to the system files and the device vnode
1589 if (HFSTOVCB(hfsmp)->extentsRefNum)
1590 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1591 if (HFSTOVCB(hfsmp)->catalogRefNum)
1592 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1593 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1594 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1595 if (hfsmp->hfs_attribute_vp)
1596 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1597 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1598
1599 hfsmp->hfs_freezing_proc = current_proc();
1600
1601 return (0);
1602 }
1603
1604 case F_THAW_FS: {
1605 if (!is_suser())
1606 return (EACCES);
1607
1608 // if we're not the one who froze the fs then we
1609 // can't thaw it.
1610 if (hfsmp->hfs_freezing_proc != current_proc()) {
1611 return EPERM;
1612 }
1613
1614 // NOTE: if you add code here, also go check the
1615 // code that "thaws" the fs in hfs_vnop_close()
1616 //
1617 hfsmp->hfs_freezing_proc = NULL;
1618 hfs_global_exclusive_lock_release(hfsmp);
1619 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1620
1621 return (0);
1622 }
1623
1624 case HFS_BULKACCESS_FSCTL: {
1625 int size;
1626
1627 if (hfsmp->hfs_flags & HFS_STANDARD) {
1628 return EINVAL;
1629 }
1630
1631 if (is64bit) {
1632 size = sizeof(struct user_access_t);
1633 } else {
1634 size = sizeof(struct access_t);
1635 }
1636
1637 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1638 }
1639
1640 case HFS_EXT_BULKACCESS_FSCTL: {
1641 int size;
1642
1643 if (hfsmp->hfs_flags & HFS_STANDARD) {
1644 return EINVAL;
1645 }
1646
1647 if (is64bit) {
1648 size = sizeof(struct ext_user_access_t);
1649 } else {
1650 size = sizeof(struct ext_access_t);
1651 }
1652
1653 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1654 }
1655
1656 case HFS_SETACLSTATE: {
1657 int state;
1658
1659 if (ap->a_data == NULL) {
1660 return (EINVAL);
1661 }
1662
1663 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1664 state = *(int *)ap->a_data;
1665
1666 // super-user can enable or disable acl's on a volume.
1667 // the volume owner can only enable acl's
1668 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1669 return (EPERM);
1670 }
1671 if (state == 0 || state == 1)
1672 return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
1673 else
1674 return (EINVAL);
1675 }
1676
1677 case HFS_SET_XATTREXTENTS_STATE: {
1678 int state;
1679
1680 if (ap->a_data == NULL) {
1681 return (EINVAL);
1682 }
1683
1684 state = *(int *)ap->a_data;
1685
1686 /* Super-user can enable or disable extent-based extended
1687 * attribute support on a volume
1688 */
1689 if (!is_suser()) {
1690 return (EPERM);
1691 }
1692 if (state == 0 || state == 1)
1693 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
1694 else
1695 return (EINVAL);
1696 }
1697
1698 case F_FULLFSYNC: {
1699 int error;
1700
1701 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1702 if (error == 0) {
1703 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
1704 hfs_unlock(VTOC(vp));
1705 }
1706
1707 return error;
1708 }
1709
1710 case F_CHKCLEAN: {
1711 register struct cnode *cp;
1712 int error;
1713
1714 if (!vnode_isreg(vp))
1715 return EINVAL;
1716
1717 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1718 if (error == 0) {
1719 cp = VTOC(vp);
1720 /*
1721 * used by regression test to determine if
1722 * all the dirty pages (via write) have been cleaned
1723 * after a call to 'fsysnc'.
1724 */
1725 error = is_file_clean(vp, VTOF(vp)->ff_size);
1726 hfs_unlock(cp);
1727 }
1728 return (error);
1729 }
1730
1731 case F_RDADVISE: {
1732 register struct radvisory *ra;
1733 struct filefork *fp;
1734 int error;
1735
1736 if (!vnode_isreg(vp))
1737 return EINVAL;
1738
1739 ra = (struct radvisory *)(ap->a_data);
1740 fp = VTOF(vp);
1741
1742 /* Protect against a size change. */
1743 hfs_lock_truncate(VTOC(vp), TRUE);
1744
1745 if (ra->ra_offset >= fp->ff_size) {
1746 error = EFBIG;
1747 } else {
1748 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1749 }
1750
1751 hfs_unlock_truncate(VTOC(vp), TRUE);
1752 return (error);
1753 }
1754
1755 case F_READBOOTSTRAP:
1756 case F_WRITEBOOTSTRAP:
1757 {
1758 struct vnode *devvp = NULL;
1759 user_fbootstraptransfer_t *user_bootstrapp;
1760 int devBlockSize;
1761 int error;
1762 uio_t auio;
1763 daddr64_t blockNumber;
1764 u_long blockOffset;
1765 u_long xfersize;
1766 struct buf *bp;
1767 user_fbootstraptransfer_t user_bootstrap;
1768
1769 if (!vnode_isvroot(vp))
1770 return (EINVAL);
1771 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1772 * to a user_fbootstraptransfer_t else we get a pointer to a
1773 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1774 */
1775 if (is64bit) {
1776 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1777 }
1778 else {
1779 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1780 user_bootstrapp = &user_bootstrap;
1781 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1782 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1783 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1784 }
1785 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1786 return EINVAL;
1787
1788 devvp = VTOHFS(vp)->hfs_devvp;
1789 auio = uio_create(1, user_bootstrapp->fbt_offset,
1790 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1791 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1792 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1793
1794 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1795
1796 while (uio_resid(auio) > 0) {
1797 blockNumber = uio_offset(auio) / devBlockSize;
1798 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1799 if (error) {
1800 if (bp) buf_brelse(bp);
1801 uio_free(auio);
1802 return error;
1803 };
1804
1805 blockOffset = uio_offset(auio) % devBlockSize;
1806 xfersize = devBlockSize - blockOffset;
1807 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1808 if (error) {
1809 buf_brelse(bp);
1810 uio_free(auio);
1811 return error;
1812 };
1813 if (uio_rw(auio) == UIO_WRITE) {
1814 error = VNOP_BWRITE(bp);
1815 if (error) {
1816 uio_free(auio);
1817 return error;
1818 }
1819 } else {
1820 buf_brelse(bp);
1821 };
1822 };
1823 uio_free(auio);
1824 };
1825 return 0;
1826
1827 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1828 {
1829 if (is64bit) {
1830 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1831 }
1832 else {
1833 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1834 }
1835 return 0;
1836 }
1837
1838 case HFS_GET_MOUNT_TIME:
1839 if (is64bit) {
1840 *(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_mount_time;
1841 } else {
1842 *(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_mount_time;
1843 }
1844 return 0;
1845
1846 case HFS_GET_LAST_MTIME:
1847 if (is64bit) {
1848 *(user_time_t *)(ap->a_data) = (user_time_t) hfsmp->hfs_last_mounted_mtime;
1849 } else {
1850 *(time_t *)(ap->a_data) = (time_t) hfsmp->hfs_last_mounted_mtime;
1851 }
1852 return 0;
1853
1854 case HFS_SET_BOOT_INFO:
1855 if (!vnode_isvroot(vp))
1856 return(EINVAL);
1857 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1858 return(EACCES); /* must be superuser or owner of filesystem */
1859 HFS_MOUNT_LOCK(hfsmp, TRUE);
1860 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1861 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1862 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1863 break;
1864
1865 case HFS_GET_BOOT_INFO:
1866 if (!vnode_isvroot(vp))
1867 return(EINVAL);
1868 HFS_MOUNT_LOCK(hfsmp, TRUE);
1869 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1870 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1871 break;
1872
1873 case HFS_MARK_BOOT_CORRUPT:
1874 /* Mark the boot volume corrupt by setting
1875 * kHFSVolumeInconsistentBit in the volume header. This will
1876 * force fsck_hfs on next mount.
1877 */
1878 if (!is_suser()) {
1879 return EACCES;
1880 }
1881
1882 /* Allowed only on the root vnode of the boot volume */
1883 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
1884 !vnode_isvroot(vp)) {
1885 return EINVAL;
1886 }
1887
1888 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1889 hfs_mark_volume_inconsistent(hfsmp);
1890 break;
1891
1892 default:
1893 return (ENOTTY);
1894 }
1895
1896 /* Should never get here */
1897 return 0;
1898 }
1899
1900 /*
1901 * select
1902 */
1903 int
1904 hfs_vnop_select(__unused struct vnop_select_args *ap)
1905 /*
1906 struct vnop_select_args {
1907 vnode_t a_vp;
1908 int a_which;
1909 int a_fflags;
1910 void *a_wql;
1911 vfs_context_t a_context;
1912 };
1913 */
1914 {
1915 /*
1916 * We should really check to see if I/O is possible.
1917 */
1918 return (1);
1919 }
1920
1921 /*
1922 * Converts a logical block number to a physical block, and optionally returns
1923 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1924 * The physical block number is based on the device block size, currently its 512.
1925 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1926 */
1927 int
1928 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1929 {
1930 struct filefork *fp = VTOF(vp);
1931 struct hfsmount *hfsmp = VTOHFS(vp);
1932 int retval = E_NONE;
1933 u_int32_t logBlockSize;
1934 size_t bytesContAvail = 0;
1935 off_t blockposition;
1936 int lockExtBtree;
1937 int lockflags = 0;
1938
1939 /*
1940 * Check for underlying vnode requests and ensure that logical
1941 * to physical mapping is requested.
1942 */
1943 if (vpp != NULL)
1944 *vpp = hfsmp->hfs_devvp;
1945 if (bnp == NULL)
1946 return (0);
1947
1948 logBlockSize = GetLogicalBlockSize(vp);
1949 blockposition = (off_t)bn * logBlockSize;
1950
1951 lockExtBtree = overflow_extents(fp);
1952
1953 if (lockExtBtree)
1954 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1955
1956 retval = MacToVFSError(
1957 MapFileBlockC (HFSTOVCB(hfsmp),
1958 (FCB*)fp,
1959 MAXPHYSIO,
1960 blockposition,
1961 bnp,
1962 &bytesContAvail));
1963
1964 if (lockExtBtree)
1965 hfs_systemfile_unlock(hfsmp, lockflags);
1966
1967 if (retval == E_NONE) {
1968 /* Figure out how many read ahead blocks there are */
1969 if (runp != NULL) {
1970 if (can_cluster(logBlockSize)) {
1971 /* Make sure this result never goes negative: */
1972 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1973 } else {
1974 *runp = 0;
1975 }
1976 }
1977 }
1978 return (retval);
1979 }
1980
1981 /*
1982 * Convert logical block number to file offset.
1983 */
1984 int
1985 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1986 /*
1987 struct vnop_blktooff_args {
1988 vnode_t a_vp;
1989 daddr64_t a_lblkno;
1990 off_t *a_offset;
1991 };
1992 */
1993 {
1994 if (ap->a_vp == NULL)
1995 return (EINVAL);
1996 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1997
1998 return(0);
1999 }
2000
2001 /*
2002 * Convert file offset to logical block number.
2003 */
2004 int
2005 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
2006 /*
2007 struct vnop_offtoblk_args {
2008 vnode_t a_vp;
2009 off_t a_offset;
2010 daddr64_t *a_lblkno;
2011 };
2012 */
2013 {
2014 if (ap->a_vp == NULL)
2015 return (EINVAL);
2016 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
2017
2018 return(0);
2019 }
2020
2021 /*
2022 * Map file offset to physical block number.
2023 *
2024 * If this function is called for write operation, and if the file
2025 * had virtual blocks allocated (delayed allocation), real blocks
2026 * are allocated by calling ExtendFileC().
2027 *
2028 * If this function is called for read operation, and if the file
2029 * had virtual blocks allocated (delayed allocation), no change
2030 * to the size of file is done, and if required, rangelist is
2031 * searched for mapping.
2032 *
2033 * System file cnodes are expected to be locked (shared or exclusive).
2034 */
2035 int
2036 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
2037 /*
2038 struct vnop_blockmap_args {
2039 vnode_t a_vp;
2040 off_t a_foffset;
2041 size_t a_size;
2042 daddr64_t *a_bpn;
2043 size_t *a_run;
2044 void *a_poff;
2045 int a_flags;
2046 vfs_context_t a_context;
2047 };
2048 */
2049 {
2050 struct vnode *vp = ap->a_vp;
2051 struct cnode *cp;
2052 struct filefork *fp;
2053 struct hfsmount *hfsmp;
2054 size_t bytesContAvail = 0;
2055 int retval = E_NONE;
2056 int syslocks = 0;
2057 int lockflags = 0;
2058 struct rl_entry *invalid_range;
2059 enum rl_overlaptype overlaptype;
2060 int started_tr = 0;
2061 int tooklock = 0;
2062
2063 /* Do not allow blockmap operation on a directory */
2064 if (vnode_isdir(vp)) {
2065 return (ENOTSUP);
2066 }
2067
2068 /*
2069 * Check for underlying vnode requests and ensure that logical
2070 * to physical mapping is requested.
2071 */
2072 if (ap->a_bpn == NULL)
2073 return (0);
2074
2075 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
2076 if (VTOC(vp)->c_lockowner != current_thread()) {
2077 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2078 tooklock = 1;
2079 }
2080 }
2081 hfsmp = VTOHFS(vp);
2082 cp = VTOC(vp);
2083 fp = VTOF(vp);
2084
2085 retry:
2086 /* Check virtual blocks only when performing write operation */
2087 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2088 if (hfs_start_transaction(hfsmp) != 0) {
2089 retval = EINVAL;
2090 goto exit;
2091 } else {
2092 started_tr = 1;
2093 }
2094 syslocks = SFL_EXTENTS | SFL_BITMAP;
2095
2096 } else if (overflow_extents(fp)) {
2097 syslocks = SFL_EXTENTS;
2098 }
2099
2100 if (syslocks)
2101 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
2102
2103 /*
2104 * Check for any delayed allocations.
2105 */
2106 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2107 int64_t actbytes;
2108 u_int32_t loanedBlocks;
2109
2110 //
2111 // Make sure we have a transaction. It's possible
2112 // that we came in and fp->ff_unallocblocks was zero
2113 // but during the time we blocked acquiring the extents
2114 // btree, ff_unallocblocks became non-zero and so we
2115 // will need to start a transaction.
2116 //
2117 if (started_tr == 0) {
2118 if (syslocks) {
2119 hfs_systemfile_unlock(hfsmp, lockflags);
2120 syslocks = 0;
2121 }
2122 goto retry;
2123 }
2124
2125 /*
2126 * Note: ExtendFileC will Release any blocks on loan and
2127 * aquire real blocks. So we ask to extend by zero bytes
2128 * since ExtendFileC will account for the virtual blocks.
2129 */
2130
2131 loanedBlocks = fp->ff_unallocblocks;
2132 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2133 kEFAllMask | kEFNoClumpMask, &actbytes);
2134
2135 if (retval) {
2136 fp->ff_unallocblocks = loanedBlocks;
2137 cp->c_blocks += loanedBlocks;
2138 fp->ff_blocks += loanedBlocks;
2139
2140 HFS_MOUNT_LOCK(hfsmp, TRUE);
2141 hfsmp->loanedBlocks += loanedBlocks;
2142 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2143
2144 hfs_systemfile_unlock(hfsmp, lockflags);
2145 cp->c_flag |= C_MODIFIED;
2146 if (started_tr) {
2147 (void) hfs_update(vp, TRUE);
2148 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2149
2150 hfs_end_transaction(hfsmp);
2151 started_tr = 0;
2152 }
2153 goto exit;
2154 }
2155 }
2156
2157 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2158 ap->a_bpn, &bytesContAvail);
2159 if (syslocks) {
2160 hfs_systemfile_unlock(hfsmp, lockflags);
2161 syslocks = 0;
2162 }
2163
2164 if (started_tr) {
2165 (void) hfs_update(vp, TRUE);
2166 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2167 hfs_end_transaction(hfsmp);
2168 started_tr = 0;
2169 }
2170 if (retval) {
2171 /* On write, always return error because virtual blocks, if any,
2172 * should have been allocated in ExtendFileC(). We do not
2173 * allocate virtual blocks on read, therefore return error
2174 * only if no virtual blocks are allocated. Otherwise we search
2175 * rangelist for zero-fills
2176 */
2177 if ((MacToVFSError(retval) != ERANGE) ||
2178 (ap->a_flags & VNODE_WRITE) ||
2179 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2180 goto exit;
2181 }
2182
2183 /* Validate if the start offset is within logical file size */
2184 if (ap->a_foffset > fp->ff_size) {
2185 goto exit;
2186 }
2187
2188 /* Searching file extents has failed for read operation, therefore
2189 * search rangelist for any uncommitted holes in the file.
2190 */
2191 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2192 ap->a_foffset + (off_t)(ap->a_size - 1),
2193 &invalid_range);
2194 switch(overlaptype) {
2195 case RL_OVERLAPISCONTAINED:
2196 /* start_offset <= rl_start, end_offset >= rl_end */
2197 if (ap->a_foffset != invalid_range->rl_start) {
2198 break;
2199 }
2200 case RL_MATCHINGOVERLAP:
2201 /* start_offset = rl_start, end_offset = rl_end */
2202 case RL_OVERLAPCONTAINSRANGE:
2203 /* start_offset >= rl_start, end_offset <= rl_end */
2204 case RL_OVERLAPSTARTSBEFORE:
2205 /* start_offset > rl_start, end_offset >= rl_start */
2206 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2207 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2208 } else {
2209 bytesContAvail = fp->ff_size - ap->a_foffset;
2210 }
2211 if (bytesContAvail > ap->a_size) {
2212 bytesContAvail = ap->a_size;
2213 }
2214 *ap->a_bpn = (daddr64_t)-1;
2215 retval = 0;
2216 break;
2217 case RL_OVERLAPENDSAFTER:
2218 /* start_offset < rl_start, end_offset < rl_end */
2219 case RL_NOOVERLAP:
2220 break;
2221 }
2222 goto exit;
2223 }
2224
2225 /* MapFileC() found a valid extent in the filefork. Search the
2226 * mapping information further for invalid file ranges
2227 */
2228 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2229 ap->a_foffset + (off_t)bytesContAvail - 1,
2230 &invalid_range);
2231 if (overlaptype != RL_NOOVERLAP) {
2232 switch(overlaptype) {
2233 case RL_MATCHINGOVERLAP:
2234 case RL_OVERLAPCONTAINSRANGE:
2235 case RL_OVERLAPSTARTSBEFORE:
2236 /* There's no valid block for this byte offset */
2237 *ap->a_bpn = (daddr64_t)-1;
2238 /* There's no point limiting the amount to be returned
2239 * if the invalid range that was hit extends all the way
2240 * to the EOF (i.e. there's no valid bytes between the
2241 * end of this range and the file's EOF):
2242 */
2243 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2244 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2245 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2246 }
2247 break;
2248
2249 case RL_OVERLAPISCONTAINED:
2250 case RL_OVERLAPENDSAFTER:
2251 /* The range of interest hits an invalid block before the end: */
2252 if (invalid_range->rl_start == ap->a_foffset) {
2253 /* There's actually no valid information to be had starting here: */
2254 *ap->a_bpn = (daddr64_t)-1;
2255 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2256 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2257 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2258 }
2259 } else {
2260 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2261 }
2262 break;
2263
2264 case RL_NOOVERLAP:
2265 break;
2266 } /* end switch */
2267 if (bytesContAvail > ap->a_size)
2268 bytesContAvail = ap->a_size;
2269 }
2270
2271 exit:
2272 if (retval == 0) {
2273 if (ap->a_run)
2274 *ap->a_run = bytesContAvail;
2275
2276 if (ap->a_poff)
2277 *(int *)ap->a_poff = 0;
2278 }
2279
2280 if (tooklock)
2281 hfs_unlock(cp);
2282
2283 return (MacToVFSError(retval));
2284 }
2285
2286
2287 /*
2288 * prepare and issue the I/O
2289 * buf_strategy knows how to deal
2290 * with requests that require
2291 * fragmented I/Os
2292 */
2293 int
2294 hfs_vnop_strategy(struct vnop_strategy_args *ap)
2295 {
2296 buf_t bp = ap->a_bp;
2297 vnode_t vp = buf_vnode(bp);
2298
2299 return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
2300 }
2301
2302
2303 static int
2304 do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context)
2305 {
2306 register struct cnode *cp = VTOC(vp);
2307 struct filefork *fp = VTOF(vp);
2308 struct proc *p = vfs_context_proc(context);;
2309 kauth_cred_t cred = vfs_context_ucred(context);
2310 int retval;
2311 off_t bytesToAdd;
2312 off_t actualBytesAdded;
2313 off_t filebytes;
2314 u_long fileblocks;
2315 int blksize;
2316 struct hfsmount *hfsmp;
2317 int lockflags;
2318
2319 blksize = VTOVCB(vp)->blockSize;
2320 fileblocks = fp->ff_blocks;
2321 filebytes = (off_t)fileblocks * (off_t)blksize;
2322
2323 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2324 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2325
2326 if (length < 0)
2327 return (EINVAL);
2328
2329 /* This should only happen with a corrupt filesystem */
2330 if ((off_t)fp->ff_size < 0)
2331 return (EINVAL);
2332
2333 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2334 return (EFBIG);
2335
2336 hfsmp = VTOHFS(vp);
2337
2338 retval = E_NONE;
2339
2340 /* Files that are changing size are not hot file candidates. */
2341 if (hfsmp->hfc_stage == HFC_RECORDING) {
2342 fp->ff_bytesread = 0;
2343 }
2344
2345 /*
2346 * We cannot just check if fp->ff_size == length (as an optimization)
2347 * since there may be extra physical blocks that also need truncation.
2348 */
2349 #if QUOTA
2350 if ((retval = hfs_getinoquota(cp)))
2351 return(retval);
2352 #endif /* QUOTA */
2353
2354 /*
2355 * Lengthen the size of the file. We must ensure that the
2356 * last byte of the file is allocated. Since the smallest
2357 * value of ff_size is 0, length will be at least 1.
2358 */
2359 if (length > (off_t)fp->ff_size) {
2360 #if QUOTA
2361 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
2362 cred, 0);
2363 if (retval)
2364 goto Err_Exit;
2365 #endif /* QUOTA */
2366 /*
2367 * If we don't have enough physical space then
2368 * we need to extend the physical size.
2369 */
2370 if (length > filebytes) {
2371 int eflags;
2372 u_long blockHint = 0;
2373
2374 /* All or nothing and don't round up to clumpsize. */
2375 eflags = kEFAllMask | kEFNoClumpMask;
2376
2377 if (cred && suser(cred, NULL) != 0)
2378 eflags |= kEFReserveMask; /* keep a reserve */
2379
2380 /*
2381 * Allocate Journal and Quota files in metadata zone.
2382 */
2383 if (filebytes == 0 &&
2384 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2385 hfs_virtualmetafile(cp)) {
2386 eflags |= kEFMetadataMask;
2387 blockHint = hfsmp->hfs_metazone_start;
2388 }
2389 if (hfs_start_transaction(hfsmp) != 0) {
2390 retval = EINVAL;
2391 goto Err_Exit;
2392 }
2393
2394 /* Protect extents b-tree and allocation bitmap */
2395 lockflags = SFL_BITMAP;
2396 if (overflow_extents(fp))
2397 lockflags |= SFL_EXTENTS;
2398 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2399
2400 while ((length > filebytes) && (retval == E_NONE)) {
2401 bytesToAdd = length - filebytes;
2402 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2403 (FCB*)fp,
2404 bytesToAdd,
2405 blockHint,
2406 eflags,
2407 &actualBytesAdded));
2408
2409 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2410 if (actualBytesAdded == 0 && retval == E_NONE) {
2411 if (length > filebytes)
2412 length = filebytes;
2413 break;
2414 }
2415 } /* endwhile */
2416
2417 hfs_systemfile_unlock(hfsmp, lockflags);
2418
2419 if (hfsmp->jnl) {
2420 (void) hfs_update(vp, TRUE);
2421 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2422 }
2423
2424 hfs_end_transaction(hfsmp);
2425
2426 if (retval)
2427 goto Err_Exit;
2428
2429 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2430 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2431 }
2432
2433 if (!(flags & IO_NOZEROFILL)) {
2434 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
2435 struct rl_entry *invalid_range;
2436 off_t zero_limit;
2437
2438 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2439 if (length < zero_limit) zero_limit = length;
2440
2441 if (length > (off_t)fp->ff_size) {
2442 struct timeval tv;
2443
2444 /* Extending the file: time to fill out the current last page w. zeroes? */
2445 if ((fp->ff_size & PAGE_MASK_64) &&
2446 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2447 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
2448
2449 /* There's some valid data at the start of the (current) last page
2450 of the file, so zero out the remainder of that page to ensure the
2451 entire page contains valid data. Since there is no invalid range
2452 possible past the (current) eof, there's no need to remove anything
2453 from the invalid range list before calling cluster_write(): */
2454 hfs_unlock(cp);
2455 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
2456 fp->ff_size, (off_t)0,
2457 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2458 hfs_lock(cp, HFS_FORCE_LOCK);
2459 if (retval) goto Err_Exit;
2460
2461 /* Merely invalidate the remaining area, if necessary: */
2462 if (length > zero_limit) {
2463 microuptime(&tv);
2464 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
2465 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2466 }
2467 } else {
2468 /* The page containing the (current) eof is invalid: just add the
2469 remainder of the page to the invalid list, along with the area
2470 being newly allocated:
2471 */
2472 microuptime(&tv);
2473 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
2474 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2475 };
2476 }
2477 } else {
2478 panic("hfs_truncate: invoked on non-UBC object?!");
2479 };
2480 }
2481 cp->c_touch_modtime = TRUE;
2482 fp->ff_size = length;
2483
2484 } else { /* Shorten the size of the file */
2485
2486 if ((off_t)fp->ff_size > length) {
2487 /* Any space previously marked as invalid is now irrelevant: */
2488 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2489 }
2490
2491 /*
2492 * Account for any unmapped blocks. Note that the new
2493 * file length can still end up with unmapped blocks.
2494 */
2495 if (fp->ff_unallocblocks > 0) {
2496 u_int32_t finalblks;
2497 u_int32_t loanedBlocks;
2498
2499 HFS_MOUNT_LOCK(hfsmp, TRUE);
2500
2501 loanedBlocks = fp->ff_unallocblocks;
2502 cp->c_blocks -= loanedBlocks;
2503 fp->ff_blocks -= loanedBlocks;
2504 fp->ff_unallocblocks = 0;
2505
2506 hfsmp->loanedBlocks -= loanedBlocks;
2507
2508 finalblks = (length + blksize - 1) / blksize;
2509 if (finalblks > fp->ff_blocks) {
2510 /* calculate required unmapped blocks */
2511 loanedBlocks = finalblks - fp->ff_blocks;
2512 hfsmp->loanedBlocks += loanedBlocks;
2513
2514 fp->ff_unallocblocks = loanedBlocks;
2515 cp->c_blocks += loanedBlocks;
2516 fp->ff_blocks += loanedBlocks;
2517 }
2518 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2519 }
2520
2521 /*
2522 * For a TBE process the deallocation of the file blocks is
2523 * delayed until the file is closed. And hfs_close calls
2524 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2525 * isn't set, we make sure this isn't a TBE process.
2526 */
2527 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2528 #if QUOTA
2529 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2530 #endif /* QUOTA */
2531 if (hfs_start_transaction(hfsmp) != 0) {
2532 retval = EINVAL;
2533 goto Err_Exit;
2534 }
2535
2536 if (fp->ff_unallocblocks == 0) {
2537 /* Protect extents b-tree and allocation bitmap */
2538 lockflags = SFL_BITMAP;
2539 if (overflow_extents(fp))
2540 lockflags |= SFL_EXTENTS;
2541 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2542
2543 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2544 (FCB*)fp, length, false));
2545
2546 hfs_systemfile_unlock(hfsmp, lockflags);
2547 }
2548 if (hfsmp->jnl) {
2549 if (retval == 0) {
2550 fp->ff_size = length;
2551 }
2552 (void) hfs_update(vp, TRUE);
2553 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2554 }
2555
2556 hfs_end_transaction(hfsmp);
2557
2558 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2559 if (retval)
2560 goto Err_Exit;
2561 #if QUOTA
2562 /* These are bytesreleased */
2563 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2564 #endif /* QUOTA */
2565 }
2566 /* Only set update flag if the logical length changes */
2567 if ((off_t)fp->ff_size != length)
2568 cp->c_touch_modtime = TRUE;
2569 fp->ff_size = length;
2570 }
2571 cp->c_touch_chgtime = TRUE; /* status changed */
2572 cp->c_touch_modtime = TRUE; /* file data was modified */
2573 retval = hfs_update(vp, MNT_WAIT);
2574 if (retval) {
2575 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2576 -1, -1, -1, retval, 0);
2577 }
2578
2579 Err_Exit:
2580
2581 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2582 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2583
2584 return (retval);
2585 }
2586
2587
2588
2589 /*
2590 * Truncate a cnode to at most length size, freeing (or adding) the
2591 * disk blocks.
2592 */
2593 __private_extern__
2594 int
2595 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2596 vfs_context_t context)
2597 {
2598 struct filefork *fp = VTOF(vp);
2599 off_t filebytes;
2600 u_long fileblocks;
2601 int blksize, error = 0;
2602 struct cnode *cp = VTOC(vp);
2603
2604 /* Cannot truncate an HFS directory! */
2605 if (vnode_isdir(vp)) {
2606 return (EISDIR);
2607 }
2608 /* A swap file cannot change size. */
2609 if (vnode_isswap(vp) && (length != 0)) {
2610 return (EPERM);
2611 }
2612
2613 blksize = VTOVCB(vp)->blockSize;
2614 fileblocks = fp->ff_blocks;
2615 filebytes = (off_t)fileblocks * (off_t)blksize;
2616
2617 //
2618 // Have to do this here so that we don't wind up with
2619 // i/o pending for blocks that are about to be released
2620 // if we truncate the file.
2621 //
2622 // If skipsetsize is set, then the caller is responsible
2623 // for the ubc_setsize.
2624 //
2625 if (!skipsetsize)
2626 ubc_setsize(vp, length);
2627
2628 // have to loop truncating or growing files that are
2629 // really big because otherwise transactions can get
2630 // enormous and consume too many kernel resources.
2631
2632 if (length < filebytes) {
2633 while (filebytes > length) {
2634 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2635 filebytes -= HFS_BIGFILE_SIZE;
2636 } else {
2637 filebytes = length;
2638 }
2639 cp->c_flag |= C_FORCEUPDATE;
2640 error = do_hfs_truncate(vp, filebytes, flags, context);
2641 if (error)
2642 break;
2643 }
2644 } else if (length > filebytes) {
2645 while (filebytes < length) {
2646 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2647 filebytes += HFS_BIGFILE_SIZE;
2648 } else {
2649 filebytes = length;
2650 }
2651 cp->c_flag |= C_FORCEUPDATE;
2652 error = do_hfs_truncate(vp, filebytes, flags, context);
2653 if (error)
2654 break;
2655 }
2656 } else /* Same logical size */ {
2657
2658 error = do_hfs_truncate(vp, length, flags, context);
2659 }
2660 /* Files that are changing size are not hot file candidates. */
2661 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2662 fp->ff_bytesread = 0;
2663 }
2664
2665 return (error);
2666 }
2667
2668
2669
2670 /*
2671 * Preallocate file storage space.
2672 */
2673 int
2674 hfs_vnop_allocate(struct vnop_allocate_args /* {
2675 vnode_t a_vp;
2676 off_t a_length;
2677 u_int32_t a_flags;
2678 off_t *a_bytesallocated;
2679 off_t a_offset;
2680 vfs_context_t a_context;
2681 } */ *ap)
2682 {
2683 struct vnode *vp = ap->a_vp;
2684 struct cnode *cp;
2685 struct filefork *fp;
2686 ExtendedVCB *vcb;
2687 off_t length = ap->a_length;
2688 off_t startingPEOF;
2689 off_t moreBytesRequested;
2690 off_t actualBytesAdded;
2691 off_t filebytes;
2692 u_long fileblocks;
2693 int retval, retval2;
2694 u_int32_t blockHint;
2695 u_int32_t extendFlags; /* For call to ExtendFileC */
2696 struct hfsmount *hfsmp;
2697 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2698 int lockflags;
2699
2700 *(ap->a_bytesallocated) = 0;
2701
2702 if (!vnode_isreg(vp))
2703 return (EISDIR);
2704 if (length < (off_t)0)
2705 return (EINVAL);
2706
2707 cp = VTOC(vp);
2708
2709 hfs_lock_truncate(cp, TRUE);
2710
2711 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2712 goto Err_Exit;
2713 }
2714
2715 fp = VTOF(vp);
2716 hfsmp = VTOHFS(vp);
2717 vcb = VTOVCB(vp);
2718
2719 fileblocks = fp->ff_blocks;
2720 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2721
2722 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2723 retval = EINVAL;
2724 goto Err_Exit;
2725 }
2726
2727 /* Fill in the flags word for the call to Extend the file */
2728
2729 extendFlags = kEFNoClumpMask;
2730 if (ap->a_flags & ALLOCATECONTIG)
2731 extendFlags |= kEFContigMask;
2732 if (ap->a_flags & ALLOCATEALL)
2733 extendFlags |= kEFAllMask;
2734 if (cred && suser(cred, NULL) != 0)
2735 extendFlags |= kEFReserveMask;
2736
2737 retval = E_NONE;
2738 blockHint = 0;
2739 startingPEOF = filebytes;
2740
2741 if (ap->a_flags & ALLOCATEFROMPEOF)
2742 length += filebytes;
2743 else if (ap->a_flags & ALLOCATEFROMVOL)
2744 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2745
2746 /* If no changes are necesary, then we're done */
2747 if (filebytes == length)
2748 goto Std_Exit;
2749
2750 /*
2751 * Lengthen the size of the file. We must ensure that the
2752 * last byte of the file is allocated. Since the smallest
2753 * value of filebytes is 0, length will be at least 1.
2754 */
2755 if (length > filebytes) {
2756 off_t total_bytes_added = 0, orig_request_size;
2757
2758 orig_request_size = moreBytesRequested = length - filebytes;
2759
2760 #if QUOTA
2761 retval = hfs_chkdq(cp,
2762 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2763 cred, 0);
2764 if (retval)
2765 goto Err_Exit;
2766
2767 #endif /* QUOTA */
2768 /*
2769 * Metadata zone checks.
2770 */
2771 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2772 /*
2773 * Allocate Journal and Quota files in metadata zone.
2774 */
2775 if (hfs_virtualmetafile(cp)) {
2776 extendFlags |= kEFMetadataMask;
2777 blockHint = hfsmp->hfs_metazone_start;
2778 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2779 (blockHint <= hfsmp->hfs_metazone_end)) {
2780 /*
2781 * Move blockHint outside metadata zone.
2782 */
2783 blockHint = hfsmp->hfs_metazone_end + 1;
2784 }
2785 }
2786
2787
2788 while ((length > filebytes) && (retval == E_NONE)) {
2789 off_t bytesRequested;
2790
2791 if (hfs_start_transaction(hfsmp) != 0) {
2792 retval = EINVAL;
2793 goto Err_Exit;
2794 }
2795
2796 /* Protect extents b-tree and allocation bitmap */
2797 lockflags = SFL_BITMAP;
2798 if (overflow_extents(fp))
2799 lockflags |= SFL_EXTENTS;
2800 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2801
2802 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
2803 bytesRequested = HFS_BIGFILE_SIZE;
2804 } else {
2805 bytesRequested = moreBytesRequested;
2806 }
2807
2808 retval = MacToVFSError(ExtendFileC(vcb,
2809 (FCB*)fp,
2810 bytesRequested,
2811 blockHint,
2812 extendFlags,
2813 &actualBytesAdded));
2814
2815 if (retval == E_NONE) {
2816 *(ap->a_bytesallocated) += actualBytesAdded;
2817 total_bytes_added += actualBytesAdded;
2818 moreBytesRequested -= actualBytesAdded;
2819 if (blockHint != 0) {
2820 blockHint += actualBytesAdded / vcb->blockSize;
2821 }
2822 }
2823 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2824
2825 hfs_systemfile_unlock(hfsmp, lockflags);
2826
2827 if (hfsmp->jnl) {
2828 (void) hfs_update(vp, TRUE);
2829 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2830 }
2831
2832 hfs_end_transaction(hfsmp);
2833 }
2834
2835
2836 /*
2837 * if we get an error and no changes were made then exit
2838 * otherwise we must do the hfs_update to reflect the changes
2839 */
2840 if (retval && (startingPEOF == filebytes))
2841 goto Err_Exit;
2842
2843 /*
2844 * Adjust actualBytesAdded to be allocation block aligned, not
2845 * clump size aligned.
2846 * NOTE: So what we are reporting does not affect reality
2847 * until the file is closed, when we truncate the file to allocation
2848 * block size.
2849 */
2850 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
2851 *(ap->a_bytesallocated) =
2852 roundup(orig_request_size, (off_t)vcb->blockSize);
2853
2854 } else { /* Shorten the size of the file */
2855
2856 if (fp->ff_size > length) {
2857 /*
2858 * Any buffers that are past the truncation point need to be
2859 * invalidated (to maintain buffer cache consistency).
2860 */
2861 }
2862
2863 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
2864 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2865
2866 /*
2867 * if we get an error and no changes were made then exit
2868 * otherwise we must do the hfs_update to reflect the changes
2869 */
2870 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2871 #if QUOTA
2872 /* These are bytesreleased */
2873 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2874 #endif /* QUOTA */
2875
2876 if (fp->ff_size > filebytes) {
2877 fp->ff_size = filebytes;
2878
2879 hfs_unlock(cp);
2880 ubc_setsize(vp, fp->ff_size);
2881 hfs_lock(cp, HFS_FORCE_LOCK);
2882 }
2883 }
2884
2885 Std_Exit:
2886 cp->c_touch_chgtime = TRUE;
2887 cp->c_touch_modtime = TRUE;
2888 retval2 = hfs_update(vp, MNT_WAIT);
2889
2890 if (retval == 0)
2891 retval = retval2;
2892 Err_Exit:
2893 hfs_unlock_truncate(cp, TRUE);
2894 hfs_unlock(cp);
2895 return (retval);
2896 }
2897
2898
2899 /*
2900 * Pagein for HFS filesystem
2901 */
2902 int
2903 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2904 /*
2905 struct vnop_pagein_args {
2906 vnode_t a_vp,
2907 upl_t a_pl,
2908 vm_offset_t a_pl_offset,
2909 off_t a_f_offset,
2910 size_t a_size,
2911 int a_flags
2912 vfs_context_t a_context;
2913 };
2914 */
2915 {
2916 vnode_t vp = ap->a_vp;
2917 int error;
2918
2919 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2920 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2921 /*
2922 * Keep track of blocks read.
2923 */
2924 if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2925 struct cnode *cp;
2926 struct filefork *fp;
2927 int bytesread;
2928 int took_cnode_lock = 0;
2929
2930 cp = VTOC(vp);
2931 fp = VTOF(vp);
2932
2933 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2934 bytesread = fp->ff_size;
2935 else
2936 bytesread = ap->a_size;
2937
2938 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2939 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
2940 hfs_lock(cp, HFS_FORCE_LOCK);
2941 took_cnode_lock = 1;
2942 }
2943 /*
2944 * If this file hasn't been seen since the start of
2945 * the current sampling period then start over.
2946 */
2947 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2948 struct timeval tv;
2949
2950 fp->ff_bytesread = bytesread;
2951 microtime(&tv);
2952 cp->c_atime = tv.tv_sec;
2953 } else {
2954 fp->ff_bytesread += bytesread;
2955 }
2956 cp->c_touch_acctime = TRUE;
2957 if (took_cnode_lock)
2958 hfs_unlock(cp);
2959 }
2960 return (error);
2961 }
2962
2963 /*
2964 * Pageout for HFS filesystem.
2965 */
2966 int
2967 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2968 /*
2969 struct vnop_pageout_args {
2970 vnode_t a_vp,
2971 upl_t a_pl,
2972 vm_offset_t a_pl_offset,
2973 off_t a_f_offset,
2974 size_t a_size,
2975 int a_flags
2976 vfs_context_t a_context;
2977 };
2978 */
2979 {
2980 vnode_t vp = ap->a_vp;
2981 struct cnode *cp;
2982 struct filefork *fp;
2983 int retval;
2984 off_t filesize;
2985
2986 cp = VTOC(vp);
2987 fp = VTOF(vp);
2988
2989 /*
2990 * Figure out where the file ends, for pageout purposes. If
2991 * ff_new_size > ff_size, then we're in the middle of extending the
2992 * file via a write, so it is safe (and necessary) that we be able
2993 * to pageout up to that point.
2994 */
2995 filesize = fp->ff_size;
2996 if (fp->ff_new_size > filesize)
2997 filesize = fp->ff_new_size;
2998
2999 if (!vnode_isswap(vp)) {
3000 off_t end_of_range;
3001 int tooklock = 0;
3002
3003 if (cp->c_lockowner != current_thread()) {
3004 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
3005 if (!(ap->a_flags & UPL_NOCOMMIT)) {
3006 ubc_upl_abort_range(ap->a_pl,
3007 ap->a_pl_offset,
3008 ap->a_size,
3009 UPL_ABORT_FREE_ON_EMPTY);
3010 }
3011 return (retval);
3012 }
3013 tooklock = 1;
3014 }
3015
3016 end_of_range = ap->a_f_offset + ap->a_size - 1;
3017
3018 if (end_of_range >= filesize) {
3019 end_of_range = (off_t)(filesize - 1);
3020 }
3021 if (ap->a_f_offset < filesize) {
3022 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
3023 cp->c_flag |= C_MODIFIED; /* leof is dirty */
3024 }
3025
3026 if (tooklock) {
3027 hfs_unlock(cp);
3028 }
3029 }
3030
3031 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
3032 ap->a_size, filesize, ap->a_flags);
3033
3034 /*
3035 * If data was written, and setuid or setgid bits are set and
3036 * this process is not the superuser then clear the setuid and
3037 * setgid bits as a precaution against tampering.
3038 */
3039 if ((retval == 0) &&
3040 (cp->c_mode & (S_ISUID | S_ISGID)) &&
3041 (vfs_context_suser(ap->a_context) != 0)) {
3042 hfs_lock(cp, HFS_FORCE_LOCK);
3043 cp->c_mode &= ~(S_ISUID | S_ISGID);
3044 cp->c_touch_chgtime = TRUE;
3045 hfs_unlock(cp);
3046 }
3047 return (retval);
3048 }
3049
3050 /*
3051 * Intercept B-Tree node writes to unswap them if necessary.
3052 */
3053 int
3054 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
3055 {
3056 int retval = 0;
3057 register struct buf *bp = ap->a_bp;
3058 register struct vnode *vp = buf_vnode(bp);
3059 BlockDescriptor block;
3060
3061 /* Trap B-Tree writes */
3062 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
3063 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
3064 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
3065 (vp == VTOHFS(vp)->hfc_filevp)) {
3066
3067 /*
3068 * Swap and validate the node if it is in native byte order.
3069 * This is always be true on big endian, so we always validate
3070 * before writing here. On little endian, the node typically has
3071 * been swapped and validated when it was written to the journal,
3072 * so we won't do anything here.
3073 */
3074 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
3075 /* Prepare the block pointer */
3076 block.blockHeader = bp;
3077 block.buffer = (char *)buf_dataptr(bp);
3078 block.blockNum = buf_lblkno(bp);
3079 /* not found in cache ==> came from disk */
3080 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
3081 block.blockSize = buf_count(bp);
3082
3083 /* Endian un-swap B-Tree node */
3084 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3085 if (retval)
3086 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3087 }
3088 }
3089
3090 /* This buffer shouldn't be locked anymore but if it is clear it */
3091 if ((buf_flags(bp) & B_LOCKED)) {
3092 // XXXdbg
3093 if (VTOHFS(vp)->jnl) {
3094 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
3095 }
3096 buf_clearflags(bp, B_LOCKED);
3097 }
3098 retval = vn_bwrite (ap);
3099
3100 return (retval);
3101 }
3102
3103 /*
3104 * Relocate a file to a new location on disk
3105 * cnode must be locked on entry
3106 *
3107 * Relocation occurs by cloning the file's data from its
3108 * current set of blocks to a new set of blocks. During
3109 * the relocation all of the blocks (old and new) are
3110 * owned by the file.
3111 *
3112 * -----------------
3113 * |///////////////|
3114 * -----------------
3115 * 0 N (file offset)
3116 *
3117 * ----------------- -----------------
3118 * |///////////////| | | STEP 1 (acquire new blocks)
3119 * ----------------- -----------------
3120 * 0 N N+1 2N
3121 *
3122 * ----------------- -----------------
3123 * |///////////////| |///////////////| STEP 2 (clone data)
3124 * ----------------- -----------------
3125 * 0 N N+1 2N
3126 *
3127 * -----------------
3128 * |///////////////| STEP 3 (head truncate blocks)
3129 * -----------------
3130 * 0 N
3131 *
3132 * During steps 2 and 3 page-outs to file offsets less
3133 * than or equal to N are suspended.
3134 *
3135 * During step 3 page-ins to the file get suspended.
3136 */
3137 __private_extern__
3138 int
3139 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
3140 struct proc *p)
3141 {
3142 struct cnode *cp;
3143 struct filefork *fp;
3144 struct hfsmount *hfsmp;
3145 u_int32_t headblks;
3146 u_int32_t datablks;
3147 u_int32_t blksize;
3148 u_int32_t growsize;
3149 u_int32_t nextallocsave;
3150 daddr64_t sector_a, sector_b;
3151 int eflags;
3152 off_t newbytes;
3153 int retval;
3154 int lockflags = 0;
3155 int took_trunc_lock = 0;
3156 int started_tr = 0;
3157 enum vtype vnodetype;
3158
3159 vnodetype = vnode_vtype(vp);
3160 if (vnodetype != VREG && vnodetype != VLNK) {
3161 return (EPERM);
3162 }
3163
3164 hfsmp = VTOHFS(vp);
3165 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
3166 return (ENOSPC);
3167 }
3168
3169 cp = VTOC(vp);
3170 fp = VTOF(vp);
3171 if (fp->ff_unallocblocks)
3172 return (EINVAL);
3173 blksize = hfsmp->blockSize;
3174 if (blockHint == 0)
3175 blockHint = hfsmp->nextAllocation;
3176
3177 if ((fp->ff_size > 0x7fffffff) ||
3178 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
3179 return (EFBIG);
3180 }
3181
3182 //
3183 // We do not believe that this call to hfs_fsync() is
3184 // necessary and it causes a journal transaction
3185 // deadlock so we are removing it.
3186 //
3187 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3188 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3189 // if (retval)
3190 // return (retval);
3191 //}
3192
3193 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
3194 hfs_unlock(cp);
3195 hfs_lock_truncate(cp, TRUE);
3196 /* Force lock since callers expects lock to be held. */
3197 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
3198 hfs_unlock_truncate(cp, TRUE);
3199 return (retval);
3200 }
3201 /* No need to continue if file was removed. */
3202 if (cp->c_flag & C_NOEXISTS) {
3203 hfs_unlock_truncate(cp, TRUE);
3204 return (ENOENT);
3205 }
3206 took_trunc_lock = 1;
3207 }
3208 headblks = fp->ff_blocks;
3209 datablks = howmany(fp->ff_size, blksize);
3210 growsize = datablks * blksize;
3211 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
3212 if (blockHint >= hfsmp->hfs_metazone_start &&
3213 blockHint <= hfsmp->hfs_metazone_end)
3214 eflags |= kEFMetadataMask;
3215
3216 if (hfs_start_transaction(hfsmp) != 0) {
3217 if (took_trunc_lock)
3218 hfs_unlock_truncate(cp, TRUE);
3219 return (EINVAL);
3220 }
3221 started_tr = 1;
3222 /*
3223 * Protect the extents b-tree and the allocation bitmap
3224 * during MapFileBlockC and ExtendFileC operations.
3225 */
3226 lockflags = SFL_BITMAP;
3227 if (overflow_extents(fp))
3228 lockflags |= SFL_EXTENTS;
3229 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3230
3231 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
3232 if (retval) {
3233 retval = MacToVFSError(retval);
3234 goto out;
3235 }
3236
3237 /*
3238 * STEP 1 - acquire new allocation blocks.
3239 */
3240 nextallocsave = hfsmp->nextAllocation;
3241 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
3242 if (eflags & kEFMetadataMask) {
3243 HFS_MOUNT_LOCK(hfsmp, TRUE);
3244 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
3245 MarkVCBDirty(hfsmp);
3246 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3247 }
3248
3249 retval = MacToVFSError(retval);
3250 if (retval == 0) {
3251 cp->c_flag |= C_MODIFIED;
3252 if (newbytes < growsize) {
3253 retval = ENOSPC;
3254 goto restore;
3255 } else if (fp->ff_blocks < (headblks + datablks)) {
3256 printf("hfs_relocate: allocation failed");
3257 retval = ENOSPC;
3258 goto restore;
3259 }
3260
3261 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
3262 if (retval) {
3263 retval = MacToVFSError(retval);
3264 } else if ((sector_a + 1) == sector_b) {
3265 retval = ENOSPC;
3266 goto restore;
3267 } else if ((eflags & kEFMetadataMask) &&
3268 ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
3269 hfsmp->hfs_metazone_end)) {
3270 const char * filestr;
3271 char emptystr = '\0';
3272
3273 if (cp->c_desc.cd_nameptr != NULL) {
3274 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
3275 } else if (vnode_name(vp) != NULL) {
3276 filestr = vnode_name(vp);
3277 } else {
3278 filestr = &emptystr;
3279 }
3280 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr, fp->ff_blocks);
3281 retval = ENOSPC;
3282 goto restore;
3283 }
3284 }
3285 /* Done with system locks and journal for now. */
3286 hfs_systemfile_unlock(hfsmp, lockflags);
3287 lockflags = 0;
3288 hfs_end_transaction(hfsmp);
3289 started_tr = 0;
3290
3291 if (retval) {
3292 /*
3293 * Check to see if failure is due to excessive fragmentation.
3294 */
3295 if ((retval == ENOSPC) &&
3296 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
3297 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
3298 }
3299 goto out;
3300 }
3301 /*
3302 * STEP 2 - clone file data into the new allocation blocks.
3303 */
3304
3305 if (vnodetype == VLNK)
3306 retval = hfs_clonelink(vp, blksize, cred, p);
3307 else if (vnode_issystem(vp))
3308 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
3309 else
3310 retval = hfs_clonefile(vp, headblks, datablks, blksize);
3311
3312 /* Start transaction for step 3 or for a restore. */
3313 if (hfs_start_transaction(hfsmp) != 0) {
3314 retval = EINVAL;
3315 goto out;
3316 }
3317 started_tr = 1;
3318 if (retval)
3319 goto restore;
3320
3321 /*
3322 * STEP 3 - switch to cloned data and remove old blocks.
3323 */
3324 lockflags = SFL_BITMAP;
3325 if (overflow_extents(fp))
3326 lockflags |= SFL_EXTENTS;
3327 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3328
3329 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
3330
3331 hfs_systemfile_unlock(hfsmp, lockflags);
3332 lockflags = 0;
3333 if (retval)
3334 goto restore;
3335 out:
3336 if (took_trunc_lock)
3337 hfs_unlock_truncate(cp, TRUE);
3338
3339 if (lockflags) {
3340 hfs_systemfile_unlock(hfsmp, lockflags);
3341 lockflags = 0;
3342 }
3343
3344 /* Push cnode's new extent data to disk. */
3345 if (retval == 0) {
3346 (void) hfs_update(vp, MNT_WAIT);
3347 }
3348 if (hfsmp->jnl) {
3349 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
3350 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3351 else
3352 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
3353 }
3354 exit:
3355 if (started_tr)
3356 hfs_end_transaction(hfsmp);
3357
3358 return (retval);
3359
3360 restore:
3361 if (fp->ff_blocks == headblks) {
3362 if (took_trunc_lock)
3363 hfs_unlock_truncate(cp, TRUE);
3364 goto exit;
3365 }
3366 /*
3367 * Give back any newly allocated space.
3368 */
3369 if (lockflags == 0) {
3370 lockflags = SFL_BITMAP;
3371 if (overflow_extents(fp))
3372 lockflags |= SFL_EXTENTS;
3373 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3374 }
3375
3376 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
3377
3378 hfs_systemfile_unlock(hfsmp, lockflags);
3379 lockflags = 0;
3380
3381 if (took_trunc_lock)
3382 hfs_unlock_truncate(cp, TRUE);
3383 goto exit;
3384 }
3385
3386
3387 /*
3388 * Clone a symlink.
3389 *
3390 */
3391 static int
3392 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
3393 {
3394 struct buf *head_bp = NULL;
3395 struct buf *tail_bp = NULL;
3396 int error;
3397
3398
3399 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
3400 if (error)
3401 goto out;
3402
3403 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
3404 if (tail_bp == NULL) {
3405 error = EIO;
3406 goto out;
3407 }
3408 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
3409 error = (int)buf_bwrite(tail_bp);
3410 out:
3411 if (head_bp) {
3412 buf_markinvalid(head_bp);
3413 buf_brelse(head_bp);
3414 }
3415 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
3416
3417 return (error);
3418 }
3419
3420 /*
3421 * Clone a file's data within the file.
3422 *
3423 */
3424 static int
3425 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
3426 {
3427 caddr_t bufp;
3428 size_t writebase;
3429 size_t bufsize;
3430 size_t copysize;
3431 size_t iosize;
3432 off_t filesize;
3433 size_t offset;
3434 uio_t auio;
3435 int error = 0;
3436
3437 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
3438 writebase = blkstart * blksize;
3439 copysize = blkcnt * blksize;
3440 iosize = bufsize = MIN(copysize, 128 * 1024);
3441 offset = 0;
3442
3443 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3444 return (ENOMEM);
3445 }
3446 hfs_unlock(VTOC(vp));
3447
3448 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
3449
3450 while (offset < copysize) {
3451 iosize = MIN(copysize - offset, iosize);
3452
3453 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
3454 uio_addiov(auio, (uintptr_t)bufp, iosize);
3455
3456 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
3457 if (error) {
3458 printf("hfs_clonefile: cluster_read failed - %d\n", error);
3459 break;
3460 }
3461 if (uio_resid(auio) != 0) {
3462 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
3463 error = EIO;
3464 break;
3465 }
3466
3467 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
3468 uio_addiov(auio, (uintptr_t)bufp, iosize);
3469
3470 error = cluster_write(vp, auio, filesize + offset,
3471 filesize + offset + iosize,
3472 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
3473 if (error) {
3474 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3475 break;
3476 }
3477 if (uio_resid(auio) != 0) {
3478 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3479 error = EIO;
3480 break;
3481 }
3482 offset += iosize;
3483 }
3484 uio_free(auio);
3485
3486 /*
3487 * No need to call ubc_sync_range or hfs_invalbuf
3488 * since the file was copied using IO_NOCACHE.
3489 */
3490
3491 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3492
3493 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
3494 return (error);
3495 }
3496
3497 /*
3498 * Clone a system (metadata) file.
3499 *
3500 */
3501 static int
3502 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3503 kauth_cred_t cred, struct proc *p)
3504 {
3505 caddr_t bufp;
3506 char * offset;
3507 size_t bufsize;
3508 size_t iosize;
3509 struct buf *bp = NULL;
3510 daddr64_t blkno;
3511 daddr64_t blk;
3512 daddr64_t start_blk;
3513 daddr64_t last_blk;
3514 int breadcnt;
3515 int i;
3516 int error = 0;
3517
3518
3519 iosize = GetLogicalBlockSize(vp);
3520 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3521 breadcnt = bufsize / iosize;
3522
3523 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3524 return (ENOMEM);
3525 }
3526 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3527 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3528 blkno = 0;
3529
3530 while (blkno < last_blk) {
3531 /*
3532 * Read up to a megabyte
3533 */
3534 offset = bufp;
3535 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3536 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3537 if (error) {
3538 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3539 goto out;
3540 }
3541 if (buf_count(bp) != iosize) {
3542 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3543 goto out;
3544 }
3545 bcopy((char *)buf_dataptr(bp), offset, iosize);
3546
3547 buf_markinvalid(bp);
3548 buf_brelse(bp);
3549 bp = NULL;
3550
3551 offset += iosize;
3552 }
3553
3554 /*
3555 * Write up to a megabyte
3556 */
3557 offset = bufp;
3558 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3559 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3560 if (bp == NULL) {
3561 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3562 error = EIO;
3563 goto out;
3564 }
3565 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3566 error = (int)buf_bwrite(bp);
3567 bp = NULL;
3568 if (error)
3569 goto out;
3570 offset += iosize;
3571 }
3572 }
3573 out:
3574 if (bp) {
3575 buf_brelse(bp);
3576 }
3577
3578 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3579
3580 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3581
3582 return (error);
3583 }