]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-1228.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
42 #include <sys/stat.h>
43 #include <sys/buf.h>
44 #include <sys/proc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
48 #include <sys/uio.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
52 #include <sys/disk.h>
53 #include <sys/sysctl.h>
54
55 #include <miscfs/specfs/specdev.h>
56
57 #include <sys/ubc.h>
58 #include <sys/ubc_internal.h>
59
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
62
63 #include <sys/kdebug.h>
64
65 #include "hfs.h"
66 #include "hfs_attrlist.h"
67 #include "hfs_endian.h"
68 #include "hfs_fsctl.h"
69 #include "hfs_quota.h"
70 #include "hfscommon/headers/FileMgrInternal.h"
71 #include "hfscommon/headers/BTreesInternal.h"
72 #include "hfs_cnode.h"
73 #include "hfs_dbg.h"
74
75 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
76
77 enum {
78 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
79 };
80
81 /* from bsd/vfs/vfs_cluster.c */
82 extern int is_file_clean(vnode_t vp, off_t filesize);
83
84 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
85 static int hfs_clonefile(struct vnode *, int, int, int);
86 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
87
88 int flush_cache_on_write = 0;
89 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
90
91
92 /*
93 * Read data from a file.
94 */
95 int
96 hfs_vnop_read(struct vnop_read_args *ap)
97 {
98 uio_t uio = ap->a_uio;
99 struct vnode *vp = ap->a_vp;
100 struct cnode *cp;
101 struct filefork *fp;
102 struct hfsmount *hfsmp;
103 off_t filesize;
104 off_t filebytes;
105 off_t start_resid = uio_resid(uio);
106 off_t offset = uio_offset(uio);
107 int retval = 0;
108
109
110 /* Preflight checks */
111 if (!vnode_isreg(vp)) {
112 /* can only read regular files */
113 if (vnode_isdir(vp))
114 return (EISDIR);
115 else
116 return (EPERM);
117 }
118 if (start_resid == 0)
119 return (0); /* Nothing left to do */
120 if (offset < 0)
121 return (EINVAL); /* cant read from a negative offset */
122
123 cp = VTOC(vp);
124 fp = VTOF(vp);
125 hfsmp = VTOHFS(vp);
126
127 /* Protect against a size change. */
128 hfs_lock_truncate(cp, 0);
129
130 filesize = fp->ff_size;
131 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
132 if (offset > filesize) {
133 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
134 (offset > (off_t)MAXHFSFILESIZE)) {
135 retval = EFBIG;
136 }
137 goto exit;
138 }
139
140 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
141 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
142
143 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
144
145 cp->c_touch_acctime = TRUE;
146
147 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
148 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
149
150 /*
151 * Keep track blocks read
152 */
153 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
154 int took_cnode_lock = 0;
155 off_t bytesread;
156
157 bytesread = start_resid - uio_resid(uio);
158
159 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
160 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
161 hfs_lock(cp, HFS_FORCE_LOCK);
162 took_cnode_lock = 1;
163 }
164 /*
165 * If this file hasn't been seen since the start of
166 * the current sampling period then start over.
167 */
168 if (cp->c_atime < hfsmp->hfc_timebase) {
169 struct timeval tv;
170
171 fp->ff_bytesread = bytesread;
172 microtime(&tv);
173 cp->c_atime = tv.tv_sec;
174 } else {
175 fp->ff_bytesread += bytesread;
176 }
177 if (took_cnode_lock)
178 hfs_unlock(cp);
179 }
180 exit:
181 hfs_unlock_truncate(cp, 0);
182 return (retval);
183 }
184
185 /*
186 * Write data to a file.
187 */
188 int
189 hfs_vnop_write(struct vnop_write_args *ap)
190 {
191 uio_t uio = ap->a_uio;
192 struct vnode *vp = ap->a_vp;
193 struct cnode *cp;
194 struct filefork *fp;
195 struct hfsmount *hfsmp;
196 kauth_cred_t cred = NULL;
197 off_t origFileSize;
198 off_t writelimit;
199 off_t bytesToAdd = 0;
200 off_t actualBytesAdded;
201 off_t filebytes;
202 off_t offset;
203 size_t resid;
204 int eflags;
205 int ioflag = ap->a_ioflag;
206 int retval = 0;
207 int lockflags;
208 int cnode_locked = 0;
209 int partialwrite = 0;
210 int exclusive_lock = 0;
211
212 // LP64todo - fix this! uio_resid may be 64-bit value
213 resid = uio_resid(uio);
214 offset = uio_offset(uio);
215
216 if (ioflag & IO_APPEND) {
217 exclusive_lock = 1;
218 }
219
220 if (offset < 0)
221 return (EINVAL);
222 if (resid == 0)
223 return (E_NONE);
224 if (!vnode_isreg(vp))
225 return (EPERM); /* Can only write regular files */
226
227 cp = VTOC(vp);
228 fp = VTOF(vp);
229 hfsmp = VTOHFS(vp);
230
231 eflags = kEFDeferMask; /* defer file block allocations */
232 #ifdef HFS_SPARSE_DEV
233 /*
234 * When the underlying device is sparse and space
235 * is low (< 8MB), stop doing delayed allocations
236 * and begin doing synchronous I/O.
237 */
238 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
239 (hfs_freeblks(hfsmp, 0) < 2048)) {
240 eflags &= ~kEFDeferMask;
241 ioflag |= IO_SYNC;
242 }
243 #endif /* HFS_SPARSE_DEV */
244
245 again:
246 /* Protect against a size change. */
247 hfs_lock_truncate(cp, exclusive_lock);
248
249 if (ioflag & IO_APPEND) {
250 uio_setoffset(uio, fp->ff_size);
251 offset = fp->ff_size;
252 }
253 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
254 retval = EPERM;
255 goto exit;
256 }
257
258 origFileSize = fp->ff_size;
259 writelimit = offset + resid;
260 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
261
262 /* If the truncate lock is shared, and if we either have virtual
263 * blocks or will need to extend the file, upgrade the truncate
264 * to exclusive lock. If upgrade fails, we lose the lock and
265 * have to get exclusive lock again
266 */
267 if ((exclusive_lock == 0) &&
268 ((fp->ff_unallocblocks != 0) || (writelimit > filebytes))) {
269 exclusive_lock = 1;
270 /* Lock upgrade failed and we lost our shared lock, try again */
271 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
272 goto again;
273 }
274 }
275
276 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
277 goto exit;
278 }
279 cnode_locked = 1;
280
281 if (!exclusive_lock) {
282 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
283 (int)offset, uio_resid(uio), (int)fp->ff_size,
284 (int)filebytes, 0);
285 }
286
287 /* Check if we do not need to extend the file */
288 if (writelimit <= filebytes) {
289 goto sizeok;
290 }
291
292 cred = vfs_context_ucred(ap->a_context);
293 bytesToAdd = writelimit - filebytes;
294
295 #if QUOTA
296 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
297 cred, 0);
298 if (retval)
299 goto exit;
300 #endif /* QUOTA */
301
302 if (hfs_start_transaction(hfsmp) != 0) {
303 retval = EINVAL;
304 goto exit;
305 }
306
307 while (writelimit > filebytes) {
308 bytesToAdd = writelimit - filebytes;
309 if (cred && suser(cred, NULL) != 0)
310 eflags |= kEFReserveMask;
311
312 /* Protect extents b-tree and allocation bitmap */
313 lockflags = SFL_BITMAP;
314 if (overflow_extents(fp))
315 lockflags |= SFL_EXTENTS;
316 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
317
318 /* Files that are changing size are not hot file candidates. */
319 if (hfsmp->hfc_stage == HFC_RECORDING) {
320 fp->ff_bytesread = 0;
321 }
322 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
323 0, eflags, &actualBytesAdded));
324
325 hfs_systemfile_unlock(hfsmp, lockflags);
326
327 if ((actualBytesAdded == 0) && (retval == E_NONE))
328 retval = ENOSPC;
329 if (retval != E_NONE)
330 break;
331 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
332 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
333 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
334 }
335 (void) hfs_update(vp, TRUE);
336 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
337 (void) hfs_end_transaction(hfsmp);
338
339 /*
340 * If we didn't grow the file enough try a partial write.
341 * POSIX expects this behavior.
342 */
343 if ((retval == ENOSPC) && (filebytes > offset)) {
344 retval = 0;
345 partialwrite = 1;
346 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
347 resid -= bytesToAdd;
348 writelimit = filebytes;
349 }
350 sizeok:
351 if (retval == E_NONE) {
352 off_t filesize;
353 off_t zero_off;
354 off_t tail_off;
355 off_t inval_start;
356 off_t inval_end;
357 off_t io_start;
358 int lflag;
359 struct rl_entry *invalid_range;
360
361 if (writelimit > fp->ff_size)
362 filesize = writelimit;
363 else
364 filesize = fp->ff_size;
365
366 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
367
368 if (offset <= fp->ff_size) {
369 zero_off = offset & ~PAGE_MASK_64;
370
371 /* Check to see whether the area between the zero_offset and the start
372 of the transfer to see whether is invalid and should be zero-filled
373 as part of the transfer:
374 */
375 if (offset > zero_off) {
376 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
377 lflag |= IO_HEADZEROFILL;
378 }
379 } else {
380 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
381
382 /* The bytes between fp->ff_size and uio->uio_offset must never be
383 read without being zeroed. The current last block is filled with zeroes
384 if it holds valid data but in all cases merely do a little bookkeeping
385 to track the area from the end of the current last page to the start of
386 the area actually written. For the same reason only the bytes up to the
387 start of the page where this write will start is invalidated; any remainder
388 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
389
390 Note that inval_start, the start of the page after the current EOF,
391 may be past the start of the write, in which case the zeroing
392 will be handled by the cluser_write of the actual data.
393 */
394 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
395 inval_end = offset & ~PAGE_MASK_64;
396 zero_off = fp->ff_size;
397
398 if ((fp->ff_size & PAGE_MASK_64) &&
399 (rl_scan(&fp->ff_invalidranges,
400 eof_page_base,
401 fp->ff_size - 1,
402 &invalid_range) != RL_NOOVERLAP)) {
403 /* The page containing the EOF is not valid, so the
404 entire page must be made inaccessible now. If the write
405 starts on a page beyond the page containing the eof
406 (inval_end > eof_page_base), add the
407 whole page to the range to be invalidated. Otherwise
408 (i.e. if the write starts on the same page), zero-fill
409 the entire page explicitly now:
410 */
411 if (inval_end > eof_page_base) {
412 inval_start = eof_page_base;
413 } else {
414 zero_off = eof_page_base;
415 };
416 };
417
418 if (inval_start < inval_end) {
419 struct timeval tv;
420 /* There's some range of data that's going to be marked invalid */
421
422 if (zero_off < inval_start) {
423 /* The pages between inval_start and inval_end are going to be invalidated,
424 and the actual write will start on a page past inval_end. Now's the last
425 chance to zero-fill the page containing the EOF:
426 */
427 hfs_unlock(cp);
428 cnode_locked = 0;
429 retval = cluster_write(vp, (uio_t) 0,
430 fp->ff_size, inval_start,
431 zero_off, (off_t)0,
432 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
433 hfs_lock(cp, HFS_FORCE_LOCK);
434 cnode_locked = 1;
435 if (retval) goto ioerr_exit;
436 offset = uio_offset(uio);
437 };
438
439 /* Mark the remaining area of the newly allocated space as invalid: */
440 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
441 microuptime(&tv);
442 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
443 zero_off = fp->ff_size = inval_end;
444 };
445
446 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
447 };
448
449 /* Check to see whether the area between the end of the write and the end of
450 the page it falls in is invalid and should be zero-filled as part of the transfer:
451 */
452 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
453 if (tail_off > filesize) tail_off = filesize;
454 if (tail_off > writelimit) {
455 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
456 lflag |= IO_TAILZEROFILL;
457 };
458 };
459
460 /*
461 * if the write starts beyond the current EOF (possibly advanced in the
462 * zeroing of the last block, above), then we'll zero fill from the current EOF
463 * to where the write begins:
464 *
465 * NOTE: If (and ONLY if) the portion of the file about to be written is
466 * before the current EOF it might be marked as invalid now and must be
467 * made readable (removed from the invalid ranges) before cluster_write
468 * tries to write it:
469 */
470 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
471 if (io_start < fp->ff_size) {
472 off_t io_end;
473
474 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
475 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
476 };
477
478 hfs_unlock(cp);
479 cnode_locked = 0;
480 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
481 tail_off, lflag | IO_NOZERODIRTY);
482 if (retval) {
483 goto ioerr_exit;
484 }
485 offset = uio_offset(uio);
486 if (offset > fp->ff_size) {
487 fp->ff_size = offset;
488
489 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
490 /* Files that are changing size are not hot file candidates. */
491 if (hfsmp->hfc_stage == HFC_RECORDING)
492 fp->ff_bytesread = 0;
493 }
494 if (resid > uio_resid(uio)) {
495 cp->c_touch_chgtime = TRUE;
496 cp->c_touch_modtime = TRUE;
497 }
498 }
499 if (partialwrite) {
500 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
501 resid += bytesToAdd;
502 }
503
504 // XXXdbg - see radar 4871353 for more info
505 {
506 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
507 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
508 }
509 }
510 HFS_KNOTE(vp, NOTE_WRITE);
511
512 ioerr_exit:
513 /*
514 * If we successfully wrote any data, and we are not the superuser
515 * we clear the setuid and setgid bits as a precaution against
516 * tampering.
517 */
518 if (cp->c_mode & (S_ISUID | S_ISGID)) {
519 cred = vfs_context_ucred(ap->a_context);
520 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
521 if (!cnode_locked) {
522 hfs_lock(cp, HFS_FORCE_LOCK);
523 cnode_locked = 1;
524 }
525 cp->c_mode &= ~(S_ISUID | S_ISGID);
526 }
527 }
528 if (retval) {
529 if (ioflag & IO_UNIT) {
530 if (!cnode_locked) {
531 hfs_lock(cp, HFS_FORCE_LOCK);
532 cnode_locked = 1;
533 }
534 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
535 0, ap->a_context);
536 // LP64todo - fix this! resid needs to by user_ssize_t
537 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
538 uio_setresid(uio, resid);
539 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
540 }
541 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
542 if (!cnode_locked) {
543 hfs_lock(cp, HFS_FORCE_LOCK);
544 cnode_locked = 1;
545 }
546 retval = hfs_update(vp, TRUE);
547 }
548 /* Updating vcbWrCnt doesn't need to be atomic. */
549 hfsmp->vcbWrCnt++;
550
551 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
552 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
553 exit:
554 if (cnode_locked)
555 hfs_unlock(cp);
556 hfs_unlock_truncate(cp, exclusive_lock);
557 return (retval);
558 }
559
560 /* support for the "bulk-access" fcntl */
561
562 #define CACHE_LEVELS 16
563 #define NUM_CACHE_ENTRIES (64*16)
564 #define PARENT_IDS_FLAG 0x100
565
566 struct access_cache {
567 int numcached;
568 int cachehits; /* these two for statistics gathering */
569 int lookups;
570 unsigned int *acache;
571 unsigned char *haveaccess;
572 };
573
574 struct access_t {
575 uid_t uid; /* IN: effective user id */
576 short flags; /* IN: access requested (i.e. R_OK) */
577 short num_groups; /* IN: number of groups user belongs to */
578 int num_files; /* IN: number of files to process */
579 int *file_ids; /* IN: array of file ids */
580 gid_t *groups; /* IN: array of groups */
581 short *access; /* OUT: access info for each file (0 for 'has access') */
582 };
583
584 struct user_access_t {
585 uid_t uid; /* IN: effective user id */
586 short flags; /* IN: access requested (i.e. R_OK) */
587 short num_groups; /* IN: number of groups user belongs to */
588 int num_files; /* IN: number of files to process */
589 user_addr_t file_ids; /* IN: array of file ids */
590 user_addr_t groups; /* IN: array of groups */
591 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
592 };
593
594
595 // these are the "extended" versions of the above structures
596 // note that it is crucial that they be different sized than
597 // the regular version
598 struct ext_access_t {
599 uint32_t flags; /* IN: access requested (i.e. R_OK) */
600 uint32_t num_files; /* IN: number of files to process */
601 uint32_t map_size; /* IN: size of the bit map */
602 uint32_t *file_ids; /* IN: Array of file ids */
603 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
604 short *access; /* OUT: access info for each file (0 for 'has access') */
605 uint32_t num_parents; /* future use */
606 cnid_t *parents; /* future use */
607 };
608
609 struct ext_user_access_t {
610 uint32_t flags; /* IN: access requested (i.e. R_OK) */
611 uint32_t num_files; /* IN: number of files to process */
612 uint32_t map_size; /* IN: size of the bit map */
613 user_addr_t file_ids; /* IN: array of file ids */
614 user_addr_t bitmap; /* IN: array of groups */
615 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
616 uint32_t num_parents;/* future use */
617 user_addr_t parents;/* future use */
618 };
619
620
621 /*
622 * Perform a binary search for the given parent_id. Return value is
623 * the index if there is a match. If no_match_indexp is non-NULL it
624 * will be assigned with the index to insert the item (even if it was
625 * not found).
626 */
627 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
628 {
629 int index=-1;
630 unsigned int lo=0;
631
632 do {
633 unsigned int mid = ((hi - lo)/2) + lo;
634 unsigned int this_id = array[mid];
635
636 if (parent_id == this_id) {
637 hi = mid;
638 break;
639 }
640
641 if (parent_id < this_id) {
642 hi = mid;
643 continue;
644 }
645
646 if (parent_id > this_id) {
647 lo = mid + 1;
648 continue;
649 }
650 } while(lo < hi);
651
652 /* check if lo and hi converged on the match */
653 if (parent_id == array[hi]) {
654 index = hi;
655 }
656
657 if (no_match_indexp) {
658 *no_match_indexp = hi;
659 }
660
661 return index;
662 }
663
664
665 static int
666 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
667 {
668 unsigned int hi;
669 int matches = 0;
670 int index, no_match_index;
671
672 if (cache->numcached == 0) {
673 *indexp = 0;
674 return 0; // table is empty, so insert at index=0 and report no match
675 }
676
677 if (cache->numcached > NUM_CACHE_ENTRIES) {
678 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
679 cache->numcached, NUM_CACHE_ENTRIES);*/
680 cache->numcached = NUM_CACHE_ENTRIES;
681 }
682
683 hi = cache->numcached - 1;
684
685 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
686
687 /* if no existing entry found, find index for new one */
688 if (index == -1) {
689 index = no_match_index;
690 matches = 0;
691 } else {
692 matches = 1;
693 }
694
695 *indexp = index;
696 return matches;
697 }
698
699 /*
700 * Add a node to the access_cache at the given index (or do a lookup first
701 * to find the index if -1 is passed in). We currently do a replace rather
702 * than an insert if the cache is full.
703 */
704 static void
705 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
706 {
707 int lookup_index = -1;
708
709 /* need to do a lookup first if -1 passed for index */
710 if (index == -1) {
711 if (lookup_bucket(cache, &lookup_index, nodeID)) {
712 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
713 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
714 cache->haveaccess[lookup_index] = access;
715 }
716
717 /* mission accomplished */
718 return;
719 } else {
720 index = lookup_index;
721 }
722
723 }
724
725 /* if the cache is full, do a replace rather than an insert */
726 if (cache->numcached >= NUM_CACHE_ENTRIES) {
727 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
728 cache->numcached = NUM_CACHE_ENTRIES-1;
729
730 if (index > cache->numcached) {
731 // printf("index %d pinned to %d\n", index, cache->numcached);
732 index = cache->numcached;
733 }
734 }
735
736 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
737 index++;
738 }
739
740 if (index >= 0 && index < cache->numcached) {
741 /* only do bcopy if we're inserting */
742 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
743 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
744 }
745
746 cache->acache[index] = nodeID;
747 cache->haveaccess[index] = access;
748 cache->numcached++;
749 }
750
751
752 struct cinfo {
753 uid_t uid;
754 gid_t gid;
755 mode_t mode;
756 cnid_t parentcnid;
757 u_int16_t recflags;
758 };
759
760 static int
761 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
762 {
763 struct cinfo *cip = (struct cinfo *)arg;
764
765 cip->uid = attrp->ca_uid;
766 cip->gid = attrp->ca_gid;
767 cip->mode = attrp->ca_mode;
768 cip->parentcnid = descp->cd_parentcnid;
769 cip->recflags = attrp->ca_recflags;
770
771 return (0);
772 }
773
774 /*
775 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
776 * isn't incore, then go to the catalog.
777 */
778 static int
779 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
780 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
781 {
782 int error = 0;
783
784 /* if this id matches the one the fsctl was called with, skip the lookup */
785 if (cnid == skip_cp->c_cnid) {
786 cnattrp->ca_uid = skip_cp->c_uid;
787 cnattrp->ca_gid = skip_cp->c_gid;
788 cnattrp->ca_mode = skip_cp->c_mode;
789 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
790 } else {
791 struct cinfo c_info;
792
793 /* otherwise, check the cnode hash incase the file/dir is incore */
794 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
795 cnattrp->ca_uid = c_info.uid;
796 cnattrp->ca_gid = c_info.gid;
797 cnattrp->ca_mode = c_info.mode;
798 cnattrp->ca_recflags = c_info.recflags;
799 keyp->hfsPlus.parentID = c_info.parentcnid;
800 } else {
801 int lockflags;
802
803 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
804
805 /* lookup this cnid in the catalog */
806 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
807
808 hfs_systemfile_unlock(hfsmp, lockflags);
809
810 cache->lookups++;
811 }
812 }
813
814 return (error);
815 }
816
817
818 /*
819 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
820 * up to CACHE_LEVELS as we progress towards the root.
821 */
822 static int
823 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
824 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev,
825 struct vfs_context *my_context,
826 char *bitmap,
827 uint32_t map_size,
828 cnid_t* parents,
829 uint32_t num_parents)
830 {
831 int myErr = 0;
832 int myResult;
833 HFSCatalogNodeID thisNodeID;
834 unsigned int myPerms;
835 struct cat_attr cnattr;
836 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
837 CatalogKey catkey;
838
839 int i = 0, ids_to_cache = 0;
840 int parent_ids[CACHE_LEVELS];
841
842 thisNodeID = nodeID;
843 while (thisNodeID >= kRootDirID) {
844 myResult = 0; /* default to "no access" */
845
846 /* check the cache before resorting to hitting the catalog */
847
848 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
849 * to look any further after hitting cached dir */
850
851 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
852 cache->cachehits++;
853 myErr = cache->haveaccess[cache_index];
854 if (scope_index != -1) {
855 if (myErr == ESRCH) {
856 myErr = 0;
857 }
858 } else {
859 scope_index = 0; // so we'll just use the cache result
860 scope_idx_start = ids_to_cache;
861 }
862 myResult = (myErr == 0) ? 1 : 0;
863 goto ExitThisRoutine;
864 }
865
866
867 if (parents) {
868 int tmp;
869 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
870 if (scope_index == -1)
871 scope_index = tmp;
872 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
873 scope_idx_start = ids_to_cache;
874 }
875 }
876
877 /* remember which parents we want to cache */
878 if (ids_to_cache < CACHE_LEVELS) {
879 parent_ids[ids_to_cache] = thisNodeID;
880 ids_to_cache++;
881 }
882 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
883 if (bitmap && map_size) {
884 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
885 }
886
887
888 /* do the lookup (checks the cnode hash, then the catalog) */
889 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr);
890 if (myErr) {
891 goto ExitThisRoutine; /* no access */
892 }
893
894 /* Root always gets access. */
895 if (suser(myp_ucred, NULL) == 0) {
896 thisNodeID = catkey.hfsPlus.parentID;
897 myResult = 1;
898 continue;
899 }
900
901 // if the thing has acl's, do the full permission check
902 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
903 struct vnode *vp;
904
905 /* get the vnode for this cnid */
906 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
907 if ( myErr ) {
908 myResult = 0;
909 goto ExitThisRoutine;
910 }
911
912 thisNodeID = VTOC(vp)->c_parentcnid;
913
914 hfs_unlock(VTOC(vp));
915
916 if (vnode_vtype(vp) == VDIR) {
917 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
918 } else {
919 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
920 }
921
922 vnode_put(vp);
923 if (myErr) {
924 myResult = 0;
925 goto ExitThisRoutine;
926 }
927 } else {
928 unsigned int flags;
929
930 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
931 cnattr.ca_mode, hfsmp->hfs_mp,
932 myp_ucred, theProcPtr);
933
934 if (cnattr.ca_mode & S_IFDIR) {
935 flags = R_OK | X_OK;
936 } else {
937 flags = R_OK;
938 }
939 if ( (myPerms & flags) != flags) {
940 myResult = 0;
941 myErr = EACCES;
942 goto ExitThisRoutine; /* no access */
943 }
944
945 /* up the hierarchy we go */
946 thisNodeID = catkey.hfsPlus.parentID;
947 }
948 }
949
950 /* if here, we have access to this node */
951 myResult = 1;
952
953 ExitThisRoutine:
954 if (parents && myErr == 0 && scope_index == -1) {
955 myErr = ESRCH;
956 }
957
958 if (myErr) {
959 myResult = 0;
960 }
961 *err = myErr;
962
963 /* cache the parent directory(ies) */
964 for (i = 0; i < ids_to_cache; i++) {
965 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
966 add_node(cache, -1, parent_ids[i], ESRCH);
967 } else {
968 add_node(cache, -1, parent_ids[i], myErr);
969 }
970 }
971
972 return (myResult);
973 }
974
975 static int
976 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
977 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
978 {
979 boolean_t is64bit;
980
981 /*
982 * NOTE: on entry, the vnode is locked. Incase this vnode
983 * happens to be in our list of file_ids, we'll note it
984 * avoid calling hfs_chashget_nowait() on that id as that
985 * will cause a "locking against myself" panic.
986 */
987 Boolean check_leaf = true;
988
989 struct ext_user_access_t *user_access_structp;
990 struct ext_user_access_t tmp_user_access;
991 struct access_cache cache;
992
993 int error = 0;
994 unsigned int i;
995
996 dev_t dev = VTOC(vp)->c_dev;
997
998 short flags;
999 unsigned int num_files = 0;
1000 int map_size = 0;
1001 int num_parents = 0;
1002 int *file_ids=NULL;
1003 short *access=NULL;
1004 char *bitmap=NULL;
1005 cnid_t *parents=NULL;
1006 int leaf_index;
1007
1008 cnid_t cnid;
1009 cnid_t prevParent_cnid = 0;
1010 unsigned int myPerms;
1011 short myaccess = 0;
1012 struct cat_attr cnattr;
1013 CatalogKey catkey;
1014 struct cnode *skip_cp = VTOC(vp);
1015 kauth_cred_t cred = vfs_context_ucred(context);
1016 proc_t p = vfs_context_proc(context);
1017
1018 is64bit = proc_is64bit(p);
1019
1020 /* initialize the local cache and buffers */
1021 cache.numcached = 0;
1022 cache.cachehits = 0;
1023 cache.lookups = 0;
1024 cache.acache = NULL;
1025 cache.haveaccess = NULL;
1026
1027 /* struct copyin done during dispatch... need to copy file_id array separately */
1028 if (ap->a_data == NULL) {
1029 error = EINVAL;
1030 goto err_exit_bulk_access;
1031 }
1032
1033 if (is64bit) {
1034 if (arg_size != sizeof(struct ext_user_access_t)) {
1035 error = EINVAL;
1036 goto err_exit_bulk_access;
1037 }
1038
1039 user_access_structp = (struct ext_user_access_t *)ap->a_data;
1040
1041 } else if (arg_size == sizeof(struct access_t)) {
1042 struct access_t *accessp = (struct access_t *)ap->a_data;
1043
1044 // convert an old style bulk-access struct to the new style
1045 tmp_user_access.flags = accessp->flags;
1046 tmp_user_access.num_files = accessp->num_files;
1047 tmp_user_access.map_size = 0;
1048 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1049 tmp_user_access.bitmap = (user_addr_t)NULL;
1050 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1051 tmp_user_access.num_parents = 0;
1052 user_access_structp = &tmp_user_access;
1053
1054 } else if (arg_size == sizeof(struct ext_access_t)) {
1055 struct ext_access_t *accessp = (struct ext_access_t *)ap->a_data;
1056
1057 // up-cast from a 32-bit version of the struct
1058 tmp_user_access.flags = accessp->flags;
1059 tmp_user_access.num_files = accessp->num_files;
1060 tmp_user_access.map_size = accessp->map_size;
1061 tmp_user_access.num_parents = accessp->num_parents;
1062
1063 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1064 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1065 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1066 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1067
1068 user_access_structp = &tmp_user_access;
1069 } else {
1070 error = EINVAL;
1071 goto err_exit_bulk_access;
1072 }
1073
1074 map_size = user_access_structp->map_size;
1075
1076 num_files = user_access_structp->num_files;
1077
1078 num_parents= user_access_structp->num_parents;
1079
1080 if (num_files < 1) {
1081 goto err_exit_bulk_access;
1082 }
1083 if (num_files > 1024) {
1084 error = EINVAL;
1085 goto err_exit_bulk_access;
1086 }
1087
1088 if (num_parents > 1024) {
1089 error = EINVAL;
1090 goto err_exit_bulk_access;
1091 }
1092
1093 file_ids = (int *) kalloc(sizeof(int) * num_files);
1094 access = (short *) kalloc(sizeof(short) * num_files);
1095 if (map_size) {
1096 bitmap = (char *) kalloc(sizeof(char) * map_size);
1097 }
1098
1099 if (num_parents) {
1100 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1101 }
1102
1103 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1104 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1105
1106 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1107 if (file_ids) {
1108 kfree(file_ids, sizeof(int) * num_files);
1109 }
1110 if (bitmap) {
1111 kfree(bitmap, sizeof(char) * map_size);
1112 }
1113 if (access) {
1114 kfree(access, sizeof(short) * num_files);
1115 }
1116 if (cache.acache) {
1117 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1118 }
1119 if (cache.haveaccess) {
1120 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1121 }
1122 if (parents) {
1123 kfree(parents, sizeof(cnid_t) * num_parents);
1124 }
1125 return ENOMEM;
1126 }
1127
1128 // make sure the bitmap is zero'ed out...
1129 if (bitmap) {
1130 bzero(bitmap, (sizeof(char) * map_size));
1131 }
1132
1133 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1134 num_files * sizeof(int)))) {
1135 goto err_exit_bulk_access;
1136 }
1137
1138 if (num_parents) {
1139 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1140 num_parents * sizeof(cnid_t)))) {
1141 goto err_exit_bulk_access;
1142 }
1143 }
1144
1145 flags = user_access_structp->flags;
1146 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1147 flags = R_OK;
1148 }
1149
1150 /* check if we've been passed leaf node ids or parent ids */
1151 if (flags & PARENT_IDS_FLAG) {
1152 check_leaf = false;
1153 }
1154
1155 /* Check access to each file_id passed in */
1156 for (i = 0; i < num_files; i++) {
1157 leaf_index=-1;
1158 cnid = (cnid_t) file_ids[i];
1159
1160 /* root always has access */
1161 if ((!parents) && (!suser(cred, NULL))) {
1162 access[i] = 0;
1163 continue;
1164 }
1165
1166 if (check_leaf) {
1167 /* do the lookup (checks the cnode hash, then the catalog) */
1168 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr);
1169 if (error) {
1170 access[i] = (short) error;
1171 continue;
1172 }
1173
1174 if (parents) {
1175 // Check if the leaf matches one of the parent scopes
1176 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1177 }
1178
1179 // if the thing has acl's, do the full permission check
1180 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1181 struct vnode *cvp;
1182 int myErr = 0;
1183 /* get the vnode for this cnid */
1184 myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
1185 if ( myErr ) {
1186 access[i] = myErr;
1187 continue;
1188 }
1189
1190 hfs_unlock(VTOC(cvp));
1191
1192 if (vnode_vtype(cvp) == VDIR) {
1193 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1194 } else {
1195 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1196 }
1197
1198 vnode_put(cvp);
1199 if (myErr) {
1200 access[i] = myErr;
1201 continue;
1202 }
1203 } else {
1204 /* before calling CheckAccess(), check the target file for read access */
1205 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1206 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1207
1208 /* fail fast if no access */
1209 if ((myPerms & flags) == 0) {
1210 access[i] = EACCES;
1211 continue;
1212 }
1213 }
1214 } else {
1215 /* we were passed an array of parent ids */
1216 catkey.hfsPlus.parentID = cnid;
1217 }
1218
1219 /* if the last guy had the same parent and had access, we're done */
1220 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1221 cache.cachehits++;
1222 access[i] = 0;
1223 continue;
1224 }
1225
1226 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1227 skip_cp, p, cred, dev, context,bitmap, map_size, parents, num_parents);
1228
1229 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1230 access[i] = 0; // have access.. no errors to report
1231 } else {
1232 access[i] = (error != 0 ? (short) error : EACCES);
1233 }
1234
1235 prevParent_cnid = catkey.hfsPlus.parentID;
1236 }
1237
1238 /* copyout the access array */
1239 if ((error = copyout((caddr_t)access, user_access_structp->access,
1240 num_files * sizeof (short)))) {
1241 goto err_exit_bulk_access;
1242 }
1243 if (map_size && bitmap) {
1244 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1245 map_size * sizeof (char)))) {
1246 goto err_exit_bulk_access;
1247 }
1248 }
1249
1250
1251 err_exit_bulk_access:
1252
1253 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1254
1255 if (file_ids)
1256 kfree(file_ids, sizeof(int) * num_files);
1257 if (parents)
1258 kfree(parents, sizeof(cnid_t) * num_parents);
1259 if (bitmap)
1260 kfree(bitmap, sizeof(char) * map_size);
1261 if (access)
1262 kfree(access, sizeof(short) * num_files);
1263 if (cache.acache)
1264 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1265 if (cache.haveaccess)
1266 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1267
1268 return (error);
1269 }
1270
1271
1272 /* end "bulk-access" support */
1273
1274
1275 /*
1276 * Callback for use with freeze ioctl.
1277 */
1278 static int
1279 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
1280 {
1281 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1282
1283 return 0;
1284 }
1285
1286 /*
1287 * Control filesystem operating characteristics.
1288 */
1289 int
1290 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1291 vnode_t a_vp;
1292 int a_command;
1293 caddr_t a_data;
1294 int a_fflag;
1295 vfs_context_t a_context;
1296 } */ *ap)
1297 {
1298 struct vnode * vp = ap->a_vp;
1299 struct hfsmount *hfsmp = VTOHFS(vp);
1300 vfs_context_t context = ap->a_context;
1301 kauth_cred_t cred = vfs_context_ucred(context);
1302 proc_t p = vfs_context_proc(context);
1303 struct vfsstatfs *vfsp;
1304 boolean_t is64bit;
1305
1306 is64bit = proc_is64bit(p);
1307
1308 switch (ap->a_command) {
1309
1310 case HFS_GETPATH:
1311 {
1312 struct vnode *file_vp;
1313 cnid_t cnid;
1314 int outlen;
1315 char *bufptr;
1316 int error;
1317
1318 /* Caller must be owner of file system. */
1319 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1320 if (suser(cred, NULL) &&
1321 kauth_cred_getuid(cred) != vfsp->f_owner) {
1322 return (EACCES);
1323 }
1324 /* Target vnode must be file system's root. */
1325 if (!vnode_isvroot(vp)) {
1326 return (EINVAL);
1327 }
1328 bufptr = (char *)ap->a_data;
1329 cnid = strtoul(bufptr, NULL, 10);
1330
1331 if ((error = hfs_vget(hfsmp, cnid, &file_vp, 1))) {
1332 return (error);
1333 }
1334 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1335 vnode_put(file_vp);
1336
1337 return (error);
1338 }
1339
1340 case HFS_PREV_LINK:
1341 case HFS_NEXT_LINK:
1342 {
1343 cnid_t linkfileid;
1344 cnid_t nextlinkid;
1345 cnid_t prevlinkid;
1346 int error;
1347
1348 /* Caller must be owner of file system. */
1349 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1350 if (suser(cred, NULL) &&
1351 kauth_cred_getuid(cred) != vfsp->f_owner) {
1352 return (EACCES);
1353 }
1354 /* Target vnode must be file system's root. */
1355 if (!vnode_isvroot(vp)) {
1356 return (EINVAL);
1357 }
1358 linkfileid = *(cnid_t *)ap->a_data;
1359 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1360 return (EINVAL);
1361 }
1362 if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1363 return (error);
1364 }
1365 if (ap->a_command == HFS_NEXT_LINK) {
1366 *(cnid_t *)ap->a_data = nextlinkid;
1367 } else {
1368 *(cnid_t *)ap->a_data = prevlinkid;
1369 }
1370 return (0);
1371 }
1372
1373 case HFS_RESIZE_PROGRESS: {
1374
1375 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1376 if (suser(cred, NULL) &&
1377 kauth_cred_getuid(cred) != vfsp->f_owner) {
1378 return (EACCES); /* must be owner of file system */
1379 }
1380 if (!vnode_isvroot(vp)) {
1381 return (EINVAL);
1382 }
1383 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1384 }
1385
1386 case HFS_RESIZE_VOLUME: {
1387 u_int64_t newsize;
1388 u_int64_t cursize;
1389
1390 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1391 if (suser(cred, NULL) &&
1392 kauth_cred_getuid(cred) != vfsp->f_owner) {
1393 return (EACCES); /* must be owner of file system */
1394 }
1395 if (!vnode_isvroot(vp)) {
1396 return (EINVAL);
1397 }
1398 newsize = *(u_int64_t *)ap->a_data;
1399 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1400
1401 if (newsize > cursize) {
1402 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1403 } else if (newsize < cursize) {
1404 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1405 } else {
1406 return (0);
1407 }
1408 }
1409 case HFS_CHANGE_NEXT_ALLOCATION: {
1410 int error = 0; /* Assume success */
1411 u_int32_t location;
1412
1413 if (vnode_vfsisrdonly(vp)) {
1414 return (EROFS);
1415 }
1416 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1417 if (suser(cred, NULL) &&
1418 kauth_cred_getuid(cred) != vfsp->f_owner) {
1419 return (EACCES); /* must be owner of file system */
1420 }
1421 if (!vnode_isvroot(vp)) {
1422 return (EINVAL);
1423 }
1424 HFS_MOUNT_LOCK(hfsmp, TRUE);
1425 location = *(u_int32_t *)ap->a_data;
1426 if ((location >= hfsmp->allocLimit) &&
1427 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1428 error = EINVAL;
1429 goto fail_change_next_allocation;
1430 }
1431 /* Return previous value. */
1432 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1433 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1434 /* On magic value for location, set nextAllocation to next block
1435 * after metadata zone and set flag in mount structure to indicate
1436 * that nextAllocation should not be updated again.
1437 */
1438 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1439 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1440 } else {
1441 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1442 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1443 }
1444 MarkVCBDirty(hfsmp);
1445 fail_change_next_allocation:
1446 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1447 return (error);
1448 }
1449
1450 #ifdef HFS_SPARSE_DEV
1451 case HFS_SETBACKINGSTOREINFO: {
1452 struct vnode * bsfs_rootvp;
1453 struct vnode * di_vp;
1454 struct hfs_backingstoreinfo *bsdata;
1455 int error = 0;
1456
1457 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1458 return (EALREADY);
1459 }
1460 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1461 if (suser(cred, NULL) &&
1462 kauth_cred_getuid(cred) != vfsp->f_owner) {
1463 return (EACCES); /* must be owner of file system */
1464 }
1465 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1466 if (bsdata == NULL) {
1467 return (EINVAL);
1468 }
1469 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1470 return (error);
1471 }
1472 if ((error = vnode_getwithref(di_vp))) {
1473 file_drop(bsdata->backingfd);
1474 return(error);
1475 }
1476
1477 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1478 (void)vnode_put(di_vp);
1479 file_drop(bsdata->backingfd);
1480 return (EINVAL);
1481 }
1482
1483 /*
1484 * Obtain the backing fs root vnode and keep a reference
1485 * on it. This reference will be dropped in hfs_unmount.
1486 */
1487 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
1488 if (error) {
1489 (void)vnode_put(di_vp);
1490 file_drop(bsdata->backingfd);
1491 return (error);
1492 }
1493 vnode_ref(bsfs_rootvp);
1494 vnode_put(bsfs_rootvp);
1495
1496 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1497 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1498 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1499 hfsmp->hfs_sparsebandblks *= 4;
1500
1501 vfs_markdependency(hfsmp->hfs_mp);
1502
1503 (void)vnode_put(di_vp);
1504 file_drop(bsdata->backingfd);
1505 return (0);
1506 }
1507 case HFS_CLRBACKINGSTOREINFO: {
1508 struct vnode * tmpvp;
1509
1510 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1511 if (suser(cred, NULL) &&
1512 kauth_cred_getuid(cred) != vfsp->f_owner) {
1513 return (EACCES); /* must be owner of file system */
1514 }
1515 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1516 hfsmp->hfs_backingfs_rootvp) {
1517
1518 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1519 tmpvp = hfsmp->hfs_backingfs_rootvp;
1520 hfsmp->hfs_backingfs_rootvp = NULLVP;
1521 hfsmp->hfs_sparsebandblks = 0;
1522 vnode_rele(tmpvp);
1523 }
1524 return (0);
1525 }
1526 #endif /* HFS_SPARSE_DEV */
1527
1528 case F_FREEZE_FS: {
1529 struct mount *mp;
1530
1531 if (!is_suser())
1532 return (EACCES);
1533
1534 mp = vnode_mount(vp);
1535 hfsmp = VFSTOHFS(mp);
1536
1537 if (!(hfsmp->jnl))
1538 return (ENOTSUP);
1539
1540 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1541
1542 // flush things before we get started to try and prevent
1543 // dirty data from being paged out while we're frozen.
1544 // note: can't do this after taking the lock as it will
1545 // deadlock against ourselves.
1546 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1547 hfs_global_exclusive_lock_acquire(hfsmp);
1548 journal_flush(hfsmp->jnl);
1549
1550 // don't need to iterate on all vnodes, we just need to
1551 // wait for writes to the system files and the device vnode
1552 if (HFSTOVCB(hfsmp)->extentsRefNum)
1553 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1554 if (HFSTOVCB(hfsmp)->catalogRefNum)
1555 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1556 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1557 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1558 if (hfsmp->hfs_attribute_vp)
1559 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1560 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1561
1562 hfsmp->hfs_freezing_proc = current_proc();
1563
1564 return (0);
1565 }
1566
1567 case F_THAW_FS: {
1568 if (!is_suser())
1569 return (EACCES);
1570
1571 // if we're not the one who froze the fs then we
1572 // can't thaw it.
1573 if (hfsmp->hfs_freezing_proc != current_proc()) {
1574 return EPERM;
1575 }
1576
1577 // NOTE: if you add code here, also go check the
1578 // code that "thaws" the fs in hfs_vnop_close()
1579 //
1580 hfsmp->hfs_freezing_proc = NULL;
1581 hfs_global_exclusive_lock_release(hfsmp);
1582 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1583
1584 return (0);
1585 }
1586
1587 case HFS_BULKACCESS_FSCTL: {
1588 int size;
1589
1590 if (hfsmp->hfs_flags & HFS_STANDARD) {
1591 return EINVAL;
1592 }
1593
1594 if (is64bit) {
1595 size = sizeof(struct user_access_t);
1596 } else {
1597 size = sizeof(struct access_t);
1598 }
1599
1600 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1601 }
1602
1603 case HFS_EXT_BULKACCESS_FSCTL: {
1604 int size;
1605
1606 if (hfsmp->hfs_flags & HFS_STANDARD) {
1607 return EINVAL;
1608 }
1609
1610 if (is64bit) {
1611 size = sizeof(struct ext_user_access_t);
1612 } else {
1613 size = sizeof(struct ext_access_t);
1614 }
1615
1616 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1617 }
1618
1619 case HFS_SETACLSTATE: {
1620 int state;
1621
1622 if (ap->a_data == NULL) {
1623 return (EINVAL);
1624 }
1625
1626 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1627 state = *(int *)ap->a_data;
1628
1629 // super-user can enable or disable acl's on a volume.
1630 // the volume owner can only enable acl's
1631 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1632 return (EPERM);
1633 }
1634 if (state == 0 || state == 1)
1635 return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
1636 else
1637 return (EINVAL);
1638 }
1639
1640 case HFS_SET_XATTREXTENTS_STATE: {
1641 int state;
1642
1643 if (ap->a_data == NULL) {
1644 return (EINVAL);
1645 }
1646
1647 state = *(int *)ap->a_data;
1648
1649 /* Super-user can enable or disable extent-based extended
1650 * attribute support on a volume
1651 */
1652 if (!is_suser()) {
1653 return (EPERM);
1654 }
1655 if (state == 0 || state == 1)
1656 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
1657 else
1658 return (EINVAL);
1659 }
1660
1661 case F_FULLFSYNC: {
1662 int error;
1663
1664 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1665 if (error == 0) {
1666 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
1667 hfs_unlock(VTOC(vp));
1668 }
1669
1670 return error;
1671 }
1672
1673 case F_CHKCLEAN: {
1674 register struct cnode *cp;
1675 int error;
1676
1677 if (!vnode_isreg(vp))
1678 return EINVAL;
1679
1680 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1681 if (error == 0) {
1682 cp = VTOC(vp);
1683 /*
1684 * used by regression test to determine if
1685 * all the dirty pages (via write) have been cleaned
1686 * after a call to 'fsysnc'.
1687 */
1688 error = is_file_clean(vp, VTOF(vp)->ff_size);
1689 hfs_unlock(cp);
1690 }
1691 return (error);
1692 }
1693
1694 case F_RDADVISE: {
1695 register struct radvisory *ra;
1696 struct filefork *fp;
1697 int error;
1698
1699 if (!vnode_isreg(vp))
1700 return EINVAL;
1701
1702 ra = (struct radvisory *)(ap->a_data);
1703 fp = VTOF(vp);
1704
1705 /* Protect against a size change. */
1706 hfs_lock_truncate(VTOC(vp), TRUE);
1707
1708 if (ra->ra_offset >= fp->ff_size) {
1709 error = EFBIG;
1710 } else {
1711 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1712 }
1713
1714 hfs_unlock_truncate(VTOC(vp), TRUE);
1715 return (error);
1716 }
1717
1718 case F_READBOOTSTRAP:
1719 case F_WRITEBOOTSTRAP:
1720 {
1721 struct vnode *devvp = NULL;
1722 user_fbootstraptransfer_t *user_bootstrapp;
1723 int devBlockSize;
1724 int error;
1725 uio_t auio;
1726 daddr64_t blockNumber;
1727 u_long blockOffset;
1728 u_long xfersize;
1729 struct buf *bp;
1730 user_fbootstraptransfer_t user_bootstrap;
1731
1732 if (!vnode_isvroot(vp))
1733 return (EINVAL);
1734 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1735 * to a user_fbootstraptransfer_t else we get a pointer to a
1736 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1737 */
1738 if (is64bit) {
1739 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1740 }
1741 else {
1742 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1743 user_bootstrapp = &user_bootstrap;
1744 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1745 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1746 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1747 }
1748 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1749 return EINVAL;
1750
1751 devvp = VTOHFS(vp)->hfs_devvp;
1752 auio = uio_create(1, user_bootstrapp->fbt_offset,
1753 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1754 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1755 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1756
1757 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1758
1759 while (uio_resid(auio) > 0) {
1760 blockNumber = uio_offset(auio) / devBlockSize;
1761 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1762 if (error) {
1763 if (bp) buf_brelse(bp);
1764 uio_free(auio);
1765 return error;
1766 };
1767
1768 blockOffset = uio_offset(auio) % devBlockSize;
1769 xfersize = devBlockSize - blockOffset;
1770 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1771 if (error) {
1772 buf_brelse(bp);
1773 uio_free(auio);
1774 return error;
1775 };
1776 if (uio_rw(auio) == UIO_WRITE) {
1777 error = VNOP_BWRITE(bp);
1778 if (error) {
1779 uio_free(auio);
1780 return error;
1781 }
1782 } else {
1783 buf_brelse(bp);
1784 };
1785 };
1786 uio_free(auio);
1787 };
1788 return 0;
1789
1790 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1791 {
1792 if (is64bit) {
1793 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1794 }
1795 else {
1796 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1797 }
1798 return 0;
1799 }
1800
1801 case HFS_GET_MOUNT_TIME:
1802 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1803 break;
1804
1805 case HFS_GET_LAST_MTIME:
1806 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1807 break;
1808
1809 case HFS_SET_BOOT_INFO:
1810 if (!vnode_isvroot(vp))
1811 return(EINVAL);
1812 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1813 return(EACCES); /* must be superuser or owner of filesystem */
1814 HFS_MOUNT_LOCK(hfsmp, TRUE);
1815 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1816 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1817 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1818 break;
1819
1820 case HFS_GET_BOOT_INFO:
1821 if (!vnode_isvroot(vp))
1822 return(EINVAL);
1823 HFS_MOUNT_LOCK(hfsmp, TRUE);
1824 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1825 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1826 break;
1827
1828 case HFS_MARK_BOOT_CORRUPT:
1829 /* Mark the boot volume corrupt by setting
1830 * kHFSVolumeInconsistentBit in the volume header. This will
1831 * force fsck_hfs on next mount.
1832 */
1833 if (!is_suser()) {
1834 return EACCES;
1835 }
1836
1837 /* Allowed only on the root vnode of the boot volume */
1838 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
1839 !vnode_isvroot(vp)) {
1840 return EINVAL;
1841 }
1842
1843 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1844 hfs_mark_volume_inconsistent(hfsmp);
1845 break;
1846
1847 default:
1848 return (ENOTTY);
1849 }
1850
1851 /* Should never get here */
1852 return 0;
1853 }
1854
1855 /*
1856 * select
1857 */
1858 int
1859 hfs_vnop_select(__unused struct vnop_select_args *ap)
1860 /*
1861 struct vnop_select_args {
1862 vnode_t a_vp;
1863 int a_which;
1864 int a_fflags;
1865 void *a_wql;
1866 vfs_context_t a_context;
1867 };
1868 */
1869 {
1870 /*
1871 * We should really check to see if I/O is possible.
1872 */
1873 return (1);
1874 }
1875
1876 /*
1877 * Converts a logical block number to a physical block, and optionally returns
1878 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1879 * The physical block number is based on the device block size, currently its 512.
1880 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1881 */
1882 int
1883 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1884 {
1885 struct filefork *fp = VTOF(vp);
1886 struct hfsmount *hfsmp = VTOHFS(vp);
1887 int retval = E_NONE;
1888 u_int32_t logBlockSize;
1889 size_t bytesContAvail = 0;
1890 off_t blockposition;
1891 int lockExtBtree;
1892 int lockflags = 0;
1893
1894 /*
1895 * Check for underlying vnode requests and ensure that logical
1896 * to physical mapping is requested.
1897 */
1898 if (vpp != NULL)
1899 *vpp = hfsmp->hfs_devvp;
1900 if (bnp == NULL)
1901 return (0);
1902
1903 logBlockSize = GetLogicalBlockSize(vp);
1904 blockposition = (off_t)bn * logBlockSize;
1905
1906 lockExtBtree = overflow_extents(fp);
1907
1908 if (lockExtBtree)
1909 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1910
1911 retval = MacToVFSError(
1912 MapFileBlockC (HFSTOVCB(hfsmp),
1913 (FCB*)fp,
1914 MAXPHYSIO,
1915 blockposition,
1916 bnp,
1917 &bytesContAvail));
1918
1919 if (lockExtBtree)
1920 hfs_systemfile_unlock(hfsmp, lockflags);
1921
1922 if (retval == E_NONE) {
1923 /* Figure out how many read ahead blocks there are */
1924 if (runp != NULL) {
1925 if (can_cluster(logBlockSize)) {
1926 /* Make sure this result never goes negative: */
1927 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1928 } else {
1929 *runp = 0;
1930 }
1931 }
1932 }
1933 return (retval);
1934 }
1935
1936 /*
1937 * Convert logical block number to file offset.
1938 */
1939 int
1940 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1941 /*
1942 struct vnop_blktooff_args {
1943 vnode_t a_vp;
1944 daddr64_t a_lblkno;
1945 off_t *a_offset;
1946 };
1947 */
1948 {
1949 if (ap->a_vp == NULL)
1950 return (EINVAL);
1951 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1952
1953 return(0);
1954 }
1955
1956 /*
1957 * Convert file offset to logical block number.
1958 */
1959 int
1960 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1961 /*
1962 struct vnop_offtoblk_args {
1963 vnode_t a_vp;
1964 off_t a_offset;
1965 daddr64_t *a_lblkno;
1966 };
1967 */
1968 {
1969 if (ap->a_vp == NULL)
1970 return (EINVAL);
1971 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1972
1973 return(0);
1974 }
1975
1976 /*
1977 * Map file offset to physical block number.
1978 *
1979 * If this function is called for write operation, and if the file
1980 * had virtual blocks allocated (delayed allocation), real blocks
1981 * are allocated by calling ExtendFileC().
1982 *
1983 * If this function is called for read operation, and if the file
1984 * had virtual blocks allocated (delayed allocation), no change
1985 * to the size of file is done, and if required, rangelist is
1986 * searched for mapping.
1987 *
1988 * System file cnodes are expected to be locked (shared or exclusive).
1989 */
1990 int
1991 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1992 /*
1993 struct vnop_blockmap_args {
1994 vnode_t a_vp;
1995 off_t a_foffset;
1996 size_t a_size;
1997 daddr64_t *a_bpn;
1998 size_t *a_run;
1999 void *a_poff;
2000 int a_flags;
2001 vfs_context_t a_context;
2002 };
2003 */
2004 {
2005 struct vnode *vp = ap->a_vp;
2006 struct cnode *cp;
2007 struct filefork *fp;
2008 struct hfsmount *hfsmp;
2009 size_t bytesContAvail = 0;
2010 int retval = E_NONE;
2011 int syslocks = 0;
2012 int lockflags = 0;
2013 struct rl_entry *invalid_range;
2014 enum rl_overlaptype overlaptype;
2015 int started_tr = 0;
2016 int tooklock = 0;
2017
2018 /* Do not allow blockmap operation on a directory */
2019 if (vnode_isdir(vp)) {
2020 return (ENOTSUP);
2021 }
2022
2023 /*
2024 * Check for underlying vnode requests and ensure that logical
2025 * to physical mapping is requested.
2026 */
2027 if (ap->a_bpn == NULL)
2028 return (0);
2029
2030 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
2031 if (VTOC(vp)->c_lockowner != current_thread()) {
2032 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2033 tooklock = 1;
2034 }
2035 }
2036 hfsmp = VTOHFS(vp);
2037 cp = VTOC(vp);
2038 fp = VTOF(vp);
2039
2040 retry:
2041 /* Check virtual blocks only when performing write operation */
2042 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2043 if (hfs_start_transaction(hfsmp) != 0) {
2044 retval = EINVAL;
2045 goto exit;
2046 } else {
2047 started_tr = 1;
2048 }
2049 syslocks = SFL_EXTENTS | SFL_BITMAP;
2050
2051 } else if (overflow_extents(fp)) {
2052 syslocks = SFL_EXTENTS;
2053 }
2054
2055 if (syslocks)
2056 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
2057
2058 /*
2059 * Check for any delayed allocations.
2060 */
2061 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2062 int64_t actbytes;
2063 u_int32_t loanedBlocks;
2064
2065 //
2066 // Make sure we have a transaction. It's possible
2067 // that we came in and fp->ff_unallocblocks was zero
2068 // but during the time we blocked acquiring the extents
2069 // btree, ff_unallocblocks became non-zero and so we
2070 // will need to start a transaction.
2071 //
2072 if (started_tr == 0) {
2073 if (syslocks) {
2074 hfs_systemfile_unlock(hfsmp, lockflags);
2075 syslocks = 0;
2076 }
2077 goto retry;
2078 }
2079
2080 /*
2081 * Note: ExtendFileC will Release any blocks on loan and
2082 * aquire real blocks. So we ask to extend by zero bytes
2083 * since ExtendFileC will account for the virtual blocks.
2084 */
2085
2086 loanedBlocks = fp->ff_unallocblocks;
2087 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2088 kEFAllMask | kEFNoClumpMask, &actbytes);
2089
2090 if (retval) {
2091 fp->ff_unallocblocks = loanedBlocks;
2092 cp->c_blocks += loanedBlocks;
2093 fp->ff_blocks += loanedBlocks;
2094
2095 HFS_MOUNT_LOCK(hfsmp, TRUE);
2096 hfsmp->loanedBlocks += loanedBlocks;
2097 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2098
2099 hfs_systemfile_unlock(hfsmp, lockflags);
2100 cp->c_flag |= C_MODIFIED;
2101 if (started_tr) {
2102 (void) hfs_update(vp, TRUE);
2103 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2104
2105 hfs_end_transaction(hfsmp);
2106 started_tr = 0;
2107 }
2108 goto exit;
2109 }
2110 }
2111
2112 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2113 ap->a_bpn, &bytesContAvail);
2114 if (syslocks) {
2115 hfs_systemfile_unlock(hfsmp, lockflags);
2116 syslocks = 0;
2117 }
2118
2119 if (started_tr) {
2120 (void) hfs_update(vp, TRUE);
2121 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2122 hfs_end_transaction(hfsmp);
2123 started_tr = 0;
2124 }
2125 if (retval) {
2126 /* On write, always return error because virtual blocks, if any,
2127 * should have been allocated in ExtendFileC(). We do not
2128 * allocate virtual blocks on read, therefore return error
2129 * only if no virtual blocks are allocated. Otherwise we search
2130 * rangelist for zero-fills
2131 */
2132 if ((MacToVFSError(retval) != ERANGE) ||
2133 (ap->a_flags & VNODE_WRITE) ||
2134 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2135 goto exit;
2136 }
2137
2138 /* Validate if the start offset is within logical file size */
2139 if (ap->a_foffset > fp->ff_size) {
2140 goto exit;
2141 }
2142
2143 /* Searching file extents has failed for read operation, therefore
2144 * search rangelist for any uncommitted holes in the file.
2145 */
2146 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2147 ap->a_foffset + (off_t)(ap->a_size - 1),
2148 &invalid_range);
2149 switch(overlaptype) {
2150 case RL_OVERLAPISCONTAINED:
2151 /* start_offset <= rl_start, end_offset >= rl_end */
2152 if (ap->a_foffset != invalid_range->rl_start) {
2153 break;
2154 }
2155 case RL_MATCHINGOVERLAP:
2156 /* start_offset = rl_start, end_offset = rl_end */
2157 case RL_OVERLAPCONTAINSRANGE:
2158 /* start_offset >= rl_start, end_offset <= rl_end */
2159 case RL_OVERLAPSTARTSBEFORE:
2160 /* start_offset > rl_start, end_offset >= rl_start */
2161 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2162 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2163 } else {
2164 bytesContAvail = fp->ff_size - ap->a_foffset;
2165 }
2166 if (bytesContAvail > ap->a_size) {
2167 bytesContAvail = ap->a_size;
2168 }
2169 *ap->a_bpn = (daddr64_t)-1;
2170 retval = 0;
2171 break;
2172 case RL_OVERLAPENDSAFTER:
2173 /* start_offset < rl_start, end_offset < rl_end */
2174 case RL_NOOVERLAP:
2175 break;
2176 }
2177 goto exit;
2178 }
2179
2180 /* MapFileC() found a valid extent in the filefork. Search the
2181 * mapping information further for invalid file ranges
2182 */
2183 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2184 ap->a_foffset + (off_t)bytesContAvail - 1,
2185 &invalid_range);
2186 if (overlaptype != RL_NOOVERLAP) {
2187 switch(overlaptype) {
2188 case RL_MATCHINGOVERLAP:
2189 case RL_OVERLAPCONTAINSRANGE:
2190 case RL_OVERLAPSTARTSBEFORE:
2191 /* There's no valid block for this byte offset */
2192 *ap->a_bpn = (daddr64_t)-1;
2193 /* There's no point limiting the amount to be returned
2194 * if the invalid range that was hit extends all the way
2195 * to the EOF (i.e. there's no valid bytes between the
2196 * end of this range and the file's EOF):
2197 */
2198 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2199 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2200 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2201 }
2202 break;
2203
2204 case RL_OVERLAPISCONTAINED:
2205 case RL_OVERLAPENDSAFTER:
2206 /* The range of interest hits an invalid block before the end: */
2207 if (invalid_range->rl_start == ap->a_foffset) {
2208 /* There's actually no valid information to be had starting here: */
2209 *ap->a_bpn = (daddr64_t)-1;
2210 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2211 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2212 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2213 }
2214 } else {
2215 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2216 }
2217 break;
2218
2219 case RL_NOOVERLAP:
2220 break;
2221 } /* end switch */
2222 if (bytesContAvail > ap->a_size)
2223 bytesContAvail = ap->a_size;
2224 }
2225
2226 exit:
2227 if (retval == 0) {
2228 if (ap->a_run)
2229 *ap->a_run = bytesContAvail;
2230
2231 if (ap->a_poff)
2232 *(int *)ap->a_poff = 0;
2233 }
2234
2235 if (tooklock)
2236 hfs_unlock(cp);
2237
2238 return (MacToVFSError(retval));
2239 }
2240
2241
2242 /*
2243 * prepare and issue the I/O
2244 * buf_strategy knows how to deal
2245 * with requests that require
2246 * fragmented I/Os
2247 */
2248 int
2249 hfs_vnop_strategy(struct vnop_strategy_args *ap)
2250 {
2251 buf_t bp = ap->a_bp;
2252 vnode_t vp = buf_vnode(bp);
2253
2254 return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
2255 }
2256
2257
2258 static int
2259 do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context)
2260 {
2261 register struct cnode *cp = VTOC(vp);
2262 struct filefork *fp = VTOF(vp);
2263 struct proc *p = vfs_context_proc(context);;
2264 kauth_cred_t cred = vfs_context_ucred(context);
2265 int retval;
2266 off_t bytesToAdd;
2267 off_t actualBytesAdded;
2268 off_t filebytes;
2269 u_long fileblocks;
2270 int blksize;
2271 struct hfsmount *hfsmp;
2272 int lockflags;
2273
2274 blksize = VTOVCB(vp)->blockSize;
2275 fileblocks = fp->ff_blocks;
2276 filebytes = (off_t)fileblocks * (off_t)blksize;
2277
2278 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2279 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2280
2281 if (length < 0)
2282 return (EINVAL);
2283
2284 /* This should only happen with a corrupt filesystem */
2285 if ((off_t)fp->ff_size < 0)
2286 return (EINVAL);
2287
2288 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2289 return (EFBIG);
2290
2291 hfsmp = VTOHFS(vp);
2292
2293 retval = E_NONE;
2294
2295 /* Files that are changing size are not hot file candidates. */
2296 if (hfsmp->hfc_stage == HFC_RECORDING) {
2297 fp->ff_bytesread = 0;
2298 }
2299
2300 /*
2301 * We cannot just check if fp->ff_size == length (as an optimization)
2302 * since there may be extra physical blocks that also need truncation.
2303 */
2304 #if QUOTA
2305 if ((retval = hfs_getinoquota(cp)))
2306 return(retval);
2307 #endif /* QUOTA */
2308
2309 /*
2310 * Lengthen the size of the file. We must ensure that the
2311 * last byte of the file is allocated. Since the smallest
2312 * value of ff_size is 0, length will be at least 1.
2313 */
2314 if (length > (off_t)fp->ff_size) {
2315 #if QUOTA
2316 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
2317 cred, 0);
2318 if (retval)
2319 goto Err_Exit;
2320 #endif /* QUOTA */
2321 /*
2322 * If we don't have enough physical space then
2323 * we need to extend the physical size.
2324 */
2325 if (length > filebytes) {
2326 int eflags;
2327 u_long blockHint = 0;
2328
2329 /* All or nothing and don't round up to clumpsize. */
2330 eflags = kEFAllMask | kEFNoClumpMask;
2331
2332 if (cred && suser(cred, NULL) != 0)
2333 eflags |= kEFReserveMask; /* keep a reserve */
2334
2335 /*
2336 * Allocate Journal and Quota files in metadata zone.
2337 */
2338 if (filebytes == 0 &&
2339 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2340 hfs_virtualmetafile(cp)) {
2341 eflags |= kEFMetadataMask;
2342 blockHint = hfsmp->hfs_metazone_start;
2343 }
2344 if (hfs_start_transaction(hfsmp) != 0) {
2345 retval = EINVAL;
2346 goto Err_Exit;
2347 }
2348
2349 /* Protect extents b-tree and allocation bitmap */
2350 lockflags = SFL_BITMAP;
2351 if (overflow_extents(fp))
2352 lockflags |= SFL_EXTENTS;
2353 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2354
2355 while ((length > filebytes) && (retval == E_NONE)) {
2356 bytesToAdd = length - filebytes;
2357 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2358 (FCB*)fp,
2359 bytesToAdd,
2360 blockHint,
2361 eflags,
2362 &actualBytesAdded));
2363
2364 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2365 if (actualBytesAdded == 0 && retval == E_NONE) {
2366 if (length > filebytes)
2367 length = filebytes;
2368 break;
2369 }
2370 } /* endwhile */
2371
2372 hfs_systemfile_unlock(hfsmp, lockflags);
2373
2374 if (hfsmp->jnl) {
2375 (void) hfs_update(vp, TRUE);
2376 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2377 }
2378
2379 hfs_end_transaction(hfsmp);
2380
2381 if (retval)
2382 goto Err_Exit;
2383
2384 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2385 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2386 }
2387
2388 if (!(flags & IO_NOZEROFILL)) {
2389 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
2390 struct rl_entry *invalid_range;
2391 off_t zero_limit;
2392
2393 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2394 if (length < zero_limit) zero_limit = length;
2395
2396 if (length > (off_t)fp->ff_size) {
2397 struct timeval tv;
2398
2399 /* Extending the file: time to fill out the current last page w. zeroes? */
2400 if ((fp->ff_size & PAGE_MASK_64) &&
2401 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2402 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
2403
2404 /* There's some valid data at the start of the (current) last page
2405 of the file, so zero out the remainder of that page to ensure the
2406 entire page contains valid data. Since there is no invalid range
2407 possible past the (current) eof, there's no need to remove anything
2408 from the invalid range list before calling cluster_write(): */
2409 hfs_unlock(cp);
2410 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
2411 fp->ff_size, (off_t)0,
2412 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2413 hfs_lock(cp, HFS_FORCE_LOCK);
2414 if (retval) goto Err_Exit;
2415
2416 /* Merely invalidate the remaining area, if necessary: */
2417 if (length > zero_limit) {
2418 microuptime(&tv);
2419 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
2420 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2421 }
2422 } else {
2423 /* The page containing the (current) eof is invalid: just add the
2424 remainder of the page to the invalid list, along with the area
2425 being newly allocated:
2426 */
2427 microuptime(&tv);
2428 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
2429 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2430 };
2431 }
2432 } else {
2433 panic("hfs_truncate: invoked on non-UBC object?!");
2434 };
2435 }
2436 cp->c_touch_modtime = TRUE;
2437 fp->ff_size = length;
2438
2439 } else { /* Shorten the size of the file */
2440
2441 if ((off_t)fp->ff_size > length) {
2442 /* Any space previously marked as invalid is now irrelevant: */
2443 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2444 }
2445
2446 /*
2447 * Account for any unmapped blocks. Note that the new
2448 * file length can still end up with unmapped blocks.
2449 */
2450 if (fp->ff_unallocblocks > 0) {
2451 u_int32_t finalblks;
2452 u_int32_t loanedBlocks;
2453
2454 HFS_MOUNT_LOCK(hfsmp, TRUE);
2455
2456 loanedBlocks = fp->ff_unallocblocks;
2457 cp->c_blocks -= loanedBlocks;
2458 fp->ff_blocks -= loanedBlocks;
2459 fp->ff_unallocblocks = 0;
2460
2461 hfsmp->loanedBlocks -= loanedBlocks;
2462
2463 finalblks = (length + blksize - 1) / blksize;
2464 if (finalblks > fp->ff_blocks) {
2465 /* calculate required unmapped blocks */
2466 loanedBlocks = finalblks - fp->ff_blocks;
2467 hfsmp->loanedBlocks += loanedBlocks;
2468
2469 fp->ff_unallocblocks = loanedBlocks;
2470 cp->c_blocks += loanedBlocks;
2471 fp->ff_blocks += loanedBlocks;
2472 }
2473 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2474 }
2475
2476 /*
2477 * For a TBE process the deallocation of the file blocks is
2478 * delayed until the file is closed. And hfs_close calls
2479 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2480 * isn't set, we make sure this isn't a TBE process.
2481 */
2482 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2483 #if QUOTA
2484 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2485 #endif /* QUOTA */
2486 if (hfs_start_transaction(hfsmp) != 0) {
2487 retval = EINVAL;
2488 goto Err_Exit;
2489 }
2490
2491 if (fp->ff_unallocblocks == 0) {
2492 /* Protect extents b-tree and allocation bitmap */
2493 lockflags = SFL_BITMAP;
2494 if (overflow_extents(fp))
2495 lockflags |= SFL_EXTENTS;
2496 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2497
2498 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2499 (FCB*)fp, length, false));
2500
2501 hfs_systemfile_unlock(hfsmp, lockflags);
2502 }
2503 if (hfsmp->jnl) {
2504 if (retval == 0) {
2505 fp->ff_size = length;
2506 }
2507 (void) hfs_update(vp, TRUE);
2508 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2509 }
2510
2511 hfs_end_transaction(hfsmp);
2512
2513 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2514 if (retval)
2515 goto Err_Exit;
2516 #if QUOTA
2517 /* These are bytesreleased */
2518 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2519 #endif /* QUOTA */
2520 }
2521 /* Only set update flag if the logical length changes */
2522 if ((off_t)fp->ff_size != length)
2523 cp->c_touch_modtime = TRUE;
2524 fp->ff_size = length;
2525 }
2526 cp->c_touch_chgtime = TRUE; /* status changed */
2527 cp->c_touch_modtime = TRUE; /* file data was modified */
2528 retval = hfs_update(vp, MNT_WAIT);
2529 if (retval) {
2530 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2531 -1, -1, -1, retval, 0);
2532 }
2533
2534 Err_Exit:
2535
2536 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2537 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2538
2539 return (retval);
2540 }
2541
2542
2543
2544 /*
2545 * Truncate a cnode to at most length size, freeing (or adding) the
2546 * disk blocks.
2547 */
2548 __private_extern__
2549 int
2550 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2551 vfs_context_t context)
2552 {
2553 struct filefork *fp = VTOF(vp);
2554 off_t filebytes;
2555 u_long fileblocks;
2556 int blksize, error = 0;
2557 struct cnode *cp = VTOC(vp);
2558
2559 /* Cannot truncate an HFS directory! */
2560 if (vnode_isdir(vp)) {
2561 return (EISDIR);
2562 }
2563 /* A swap file cannot change size. */
2564 if (vnode_isswap(vp) && (length != 0)) {
2565 return (EPERM);
2566 }
2567
2568 blksize = VTOVCB(vp)->blockSize;
2569 fileblocks = fp->ff_blocks;
2570 filebytes = (off_t)fileblocks * (off_t)blksize;
2571
2572 //
2573 // Have to do this here so that we don't wind up with
2574 // i/o pending for blocks that are about to be released
2575 // if we truncate the file.
2576 //
2577 // If skipsetsize is set, then the caller is responsible
2578 // for the ubc_setsize.
2579 //
2580 if (!skipsetsize)
2581 ubc_setsize(vp, length);
2582
2583 // have to loop truncating or growing files that are
2584 // really big because otherwise transactions can get
2585 // enormous and consume too many kernel resources.
2586
2587 if (length < filebytes) {
2588 while (filebytes > length) {
2589 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2590 filebytes -= HFS_BIGFILE_SIZE;
2591 } else {
2592 filebytes = length;
2593 }
2594 cp->c_flag |= C_FORCEUPDATE;
2595 error = do_hfs_truncate(vp, filebytes, flags, context);
2596 if (error)
2597 break;
2598 }
2599 } else if (length > filebytes) {
2600 while (filebytes < length) {
2601 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2602 filebytes += HFS_BIGFILE_SIZE;
2603 } else {
2604 filebytes = length;
2605 }
2606 cp->c_flag |= C_FORCEUPDATE;
2607 error = do_hfs_truncate(vp, filebytes, flags, context);
2608 if (error)
2609 break;
2610 }
2611 } else /* Same logical size */ {
2612
2613 error = do_hfs_truncate(vp, length, flags, context);
2614 }
2615 /* Files that are changing size are not hot file candidates. */
2616 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2617 fp->ff_bytesread = 0;
2618 }
2619
2620 return (error);
2621 }
2622
2623
2624
2625 /*
2626 * Preallocate file storage space.
2627 */
2628 int
2629 hfs_vnop_allocate(struct vnop_allocate_args /* {
2630 vnode_t a_vp;
2631 off_t a_length;
2632 u_int32_t a_flags;
2633 off_t *a_bytesallocated;
2634 off_t a_offset;
2635 vfs_context_t a_context;
2636 } */ *ap)
2637 {
2638 struct vnode *vp = ap->a_vp;
2639 struct cnode *cp;
2640 struct filefork *fp;
2641 ExtendedVCB *vcb;
2642 off_t length = ap->a_length;
2643 off_t startingPEOF;
2644 off_t moreBytesRequested;
2645 off_t actualBytesAdded;
2646 off_t filebytes;
2647 u_long fileblocks;
2648 int retval, retval2;
2649 u_int32_t blockHint;
2650 u_int32_t extendFlags; /* For call to ExtendFileC */
2651 struct hfsmount *hfsmp;
2652 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2653 int lockflags;
2654
2655 *(ap->a_bytesallocated) = 0;
2656
2657 if (!vnode_isreg(vp))
2658 return (EISDIR);
2659 if (length < (off_t)0)
2660 return (EINVAL);
2661
2662 cp = VTOC(vp);
2663
2664 hfs_lock_truncate(cp, TRUE);
2665
2666 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2667 goto Err_Exit;
2668 }
2669
2670 fp = VTOF(vp);
2671 hfsmp = VTOHFS(vp);
2672 vcb = VTOVCB(vp);
2673
2674 fileblocks = fp->ff_blocks;
2675 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2676
2677 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2678 retval = EINVAL;
2679 goto Err_Exit;
2680 }
2681
2682 /* Fill in the flags word for the call to Extend the file */
2683
2684 extendFlags = kEFNoClumpMask;
2685 if (ap->a_flags & ALLOCATECONTIG)
2686 extendFlags |= kEFContigMask;
2687 if (ap->a_flags & ALLOCATEALL)
2688 extendFlags |= kEFAllMask;
2689 if (cred && suser(cred, NULL) != 0)
2690 extendFlags |= kEFReserveMask;
2691
2692 retval = E_NONE;
2693 blockHint = 0;
2694 startingPEOF = filebytes;
2695
2696 if (ap->a_flags & ALLOCATEFROMPEOF)
2697 length += filebytes;
2698 else if (ap->a_flags & ALLOCATEFROMVOL)
2699 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2700
2701 /* If no changes are necesary, then we're done */
2702 if (filebytes == length)
2703 goto Std_Exit;
2704
2705 /*
2706 * Lengthen the size of the file. We must ensure that the
2707 * last byte of the file is allocated. Since the smallest
2708 * value of filebytes is 0, length will be at least 1.
2709 */
2710 if (length > filebytes) {
2711 off_t total_bytes_added = 0, orig_request_size;
2712
2713 orig_request_size = moreBytesRequested = length - filebytes;
2714
2715 #if QUOTA
2716 retval = hfs_chkdq(cp,
2717 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2718 cred, 0);
2719 if (retval)
2720 goto Err_Exit;
2721
2722 #endif /* QUOTA */
2723 /*
2724 * Metadata zone checks.
2725 */
2726 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2727 /*
2728 * Allocate Journal and Quota files in metadata zone.
2729 */
2730 if (hfs_virtualmetafile(cp)) {
2731 extendFlags |= kEFMetadataMask;
2732 blockHint = hfsmp->hfs_metazone_start;
2733 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2734 (blockHint <= hfsmp->hfs_metazone_end)) {
2735 /*
2736 * Move blockHint outside metadata zone.
2737 */
2738 blockHint = hfsmp->hfs_metazone_end + 1;
2739 }
2740 }
2741
2742
2743 while ((length > filebytes) && (retval == E_NONE)) {
2744 off_t bytesRequested;
2745
2746 if (hfs_start_transaction(hfsmp) != 0) {
2747 retval = EINVAL;
2748 goto Err_Exit;
2749 }
2750
2751 /* Protect extents b-tree and allocation bitmap */
2752 lockflags = SFL_BITMAP;
2753 if (overflow_extents(fp))
2754 lockflags |= SFL_EXTENTS;
2755 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2756
2757 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
2758 bytesRequested = HFS_BIGFILE_SIZE;
2759 } else {
2760 bytesRequested = moreBytesRequested;
2761 }
2762
2763 retval = MacToVFSError(ExtendFileC(vcb,
2764 (FCB*)fp,
2765 bytesRequested,
2766 blockHint,
2767 extendFlags,
2768 &actualBytesAdded));
2769
2770 if (retval == E_NONE) {
2771 *(ap->a_bytesallocated) += actualBytesAdded;
2772 total_bytes_added += actualBytesAdded;
2773 moreBytesRequested -= actualBytesAdded;
2774 if (blockHint != 0) {
2775 blockHint += actualBytesAdded / vcb->blockSize;
2776 }
2777 }
2778 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2779
2780 hfs_systemfile_unlock(hfsmp, lockflags);
2781
2782 if (hfsmp->jnl) {
2783 (void) hfs_update(vp, TRUE);
2784 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2785 }
2786
2787 hfs_end_transaction(hfsmp);
2788 }
2789
2790
2791 /*
2792 * if we get an error and no changes were made then exit
2793 * otherwise we must do the hfs_update to reflect the changes
2794 */
2795 if (retval && (startingPEOF == filebytes))
2796 goto Err_Exit;
2797
2798 /*
2799 * Adjust actualBytesAdded to be allocation block aligned, not
2800 * clump size aligned.
2801 * NOTE: So what we are reporting does not affect reality
2802 * until the file is closed, when we truncate the file to allocation
2803 * block size.
2804 */
2805 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
2806 *(ap->a_bytesallocated) =
2807 roundup(orig_request_size, (off_t)vcb->blockSize);
2808
2809 } else { /* Shorten the size of the file */
2810
2811 if (fp->ff_size > length) {
2812 /*
2813 * Any buffers that are past the truncation point need to be
2814 * invalidated (to maintain buffer cache consistency).
2815 */
2816 }
2817
2818 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
2819 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2820
2821 /*
2822 * if we get an error and no changes were made then exit
2823 * otherwise we must do the hfs_update to reflect the changes
2824 */
2825 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2826 #if QUOTA
2827 /* These are bytesreleased */
2828 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2829 #endif /* QUOTA */
2830
2831 if (fp->ff_size > filebytes) {
2832 fp->ff_size = filebytes;
2833
2834 hfs_unlock(cp);
2835 ubc_setsize(vp, fp->ff_size);
2836 hfs_lock(cp, HFS_FORCE_LOCK);
2837 }
2838 }
2839
2840 Std_Exit:
2841 cp->c_touch_chgtime = TRUE;
2842 cp->c_touch_modtime = TRUE;
2843 retval2 = hfs_update(vp, MNT_WAIT);
2844
2845 if (retval == 0)
2846 retval = retval2;
2847 Err_Exit:
2848 hfs_unlock_truncate(cp, TRUE);
2849 hfs_unlock(cp);
2850 return (retval);
2851 }
2852
2853
2854 /*
2855 * Pagein for HFS filesystem
2856 */
2857 int
2858 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2859 /*
2860 struct vnop_pagein_args {
2861 vnode_t a_vp,
2862 upl_t a_pl,
2863 vm_offset_t a_pl_offset,
2864 off_t a_f_offset,
2865 size_t a_size,
2866 int a_flags
2867 vfs_context_t a_context;
2868 };
2869 */
2870 {
2871 vnode_t vp = ap->a_vp;
2872 int error;
2873
2874 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2875 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2876 /*
2877 * Keep track of blocks read.
2878 */
2879 if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2880 struct cnode *cp;
2881 struct filefork *fp;
2882 int bytesread;
2883 int took_cnode_lock = 0;
2884
2885 cp = VTOC(vp);
2886 fp = VTOF(vp);
2887
2888 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2889 bytesread = fp->ff_size;
2890 else
2891 bytesread = ap->a_size;
2892
2893 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2894 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
2895 hfs_lock(cp, HFS_FORCE_LOCK);
2896 took_cnode_lock = 1;
2897 }
2898 /*
2899 * If this file hasn't been seen since the start of
2900 * the current sampling period then start over.
2901 */
2902 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2903 struct timeval tv;
2904
2905 fp->ff_bytesread = bytesread;
2906 microtime(&tv);
2907 cp->c_atime = tv.tv_sec;
2908 } else {
2909 fp->ff_bytesread += bytesread;
2910 }
2911 cp->c_touch_acctime = TRUE;
2912 if (took_cnode_lock)
2913 hfs_unlock(cp);
2914 }
2915 return (error);
2916 }
2917
2918 /*
2919 * Pageout for HFS filesystem.
2920 */
2921 int
2922 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2923 /*
2924 struct vnop_pageout_args {
2925 vnode_t a_vp,
2926 upl_t a_pl,
2927 vm_offset_t a_pl_offset,
2928 off_t a_f_offset,
2929 size_t a_size,
2930 int a_flags
2931 vfs_context_t a_context;
2932 };
2933 */
2934 {
2935 vnode_t vp = ap->a_vp;
2936 struct cnode *cp;
2937 struct filefork *fp;
2938 int retval;
2939 off_t filesize;
2940
2941 cp = VTOC(vp);
2942 fp = VTOF(vp);
2943
2944 if (vnode_isswap(vp)) {
2945 filesize = fp->ff_size;
2946 } else {
2947 off_t end_of_range;
2948 int tooklock = 0;
2949
2950 if (cp->c_lockowner != current_thread()) {
2951 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2952 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2953 ubc_upl_abort_range(ap->a_pl,
2954 ap->a_pl_offset,
2955 ap->a_size,
2956 UPL_ABORT_FREE_ON_EMPTY);
2957 }
2958 return (retval);
2959 }
2960 tooklock = 1;
2961 }
2962
2963 filesize = fp->ff_size;
2964 end_of_range = ap->a_f_offset + ap->a_size - 1;
2965
2966 if (end_of_range >= filesize) {
2967 end_of_range = (off_t)(filesize - 1);
2968 }
2969 if (ap->a_f_offset < filesize) {
2970 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2971 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2972 }
2973
2974 if (tooklock) {
2975 hfs_unlock(cp);
2976 }
2977 }
2978
2979 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2980 ap->a_size, filesize, ap->a_flags);
2981
2982 /*
2983 * If data was written, and setuid or setgid bits are set and
2984 * this process is not the superuser then clear the setuid and
2985 * setgid bits as a precaution against tampering.
2986 */
2987 if ((retval == 0) &&
2988 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2989 (vfs_context_suser(ap->a_context) != 0)) {
2990 hfs_lock(cp, HFS_FORCE_LOCK);
2991 cp->c_mode &= ~(S_ISUID | S_ISGID);
2992 cp->c_touch_chgtime = TRUE;
2993 hfs_unlock(cp);
2994 }
2995 return (retval);
2996 }
2997
2998 /*
2999 * Intercept B-Tree node writes to unswap them if necessary.
3000 */
3001 int
3002 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
3003 {
3004 int retval = 0;
3005 register struct buf *bp = ap->a_bp;
3006 register struct vnode *vp = buf_vnode(bp);
3007 BlockDescriptor block;
3008
3009 /* Trap B-Tree writes */
3010 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
3011 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
3012 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
3013 (vp == VTOHFS(vp)->hfc_filevp)) {
3014
3015 /*
3016 * Swap and validate the node if it is in native byte order.
3017 * This is always be true on big endian, so we always validate
3018 * before writing here. On little endian, the node typically has
3019 * been swapped and validated when it was written to the journal,
3020 * so we won't do anything here.
3021 */
3022 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
3023 /* Prepare the block pointer */
3024 block.blockHeader = bp;
3025 block.buffer = (char *)buf_dataptr(bp);
3026 block.blockNum = buf_lblkno(bp);
3027 /* not found in cache ==> came from disk */
3028 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
3029 block.blockSize = buf_count(bp);
3030
3031 /* Endian un-swap B-Tree node */
3032 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
3033 if (retval)
3034 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3035 }
3036 }
3037
3038 /* This buffer shouldn't be locked anymore but if it is clear it */
3039 if ((buf_flags(bp) & B_LOCKED)) {
3040 // XXXdbg
3041 if (VTOHFS(vp)->jnl) {
3042 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
3043 }
3044 buf_clearflags(bp, B_LOCKED);
3045 }
3046 retval = vn_bwrite (ap);
3047
3048 return (retval);
3049 }
3050
3051 /*
3052 * Relocate a file to a new location on disk
3053 * cnode must be locked on entry
3054 *
3055 * Relocation occurs by cloning the file's data from its
3056 * current set of blocks to a new set of blocks. During
3057 * the relocation all of the blocks (old and new) are
3058 * owned by the file.
3059 *
3060 * -----------------
3061 * |///////////////|
3062 * -----------------
3063 * 0 N (file offset)
3064 *
3065 * ----------------- -----------------
3066 * |///////////////| | | STEP 1 (acquire new blocks)
3067 * ----------------- -----------------
3068 * 0 N N+1 2N
3069 *
3070 * ----------------- -----------------
3071 * |///////////////| |///////////////| STEP 2 (clone data)
3072 * ----------------- -----------------
3073 * 0 N N+1 2N
3074 *
3075 * -----------------
3076 * |///////////////| STEP 3 (head truncate blocks)
3077 * -----------------
3078 * 0 N
3079 *
3080 * During steps 2 and 3 page-outs to file offsets less
3081 * than or equal to N are suspended.
3082 *
3083 * During step 3 page-ins to the file get suspended.
3084 */
3085 __private_extern__
3086 int
3087 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
3088 struct proc *p)
3089 {
3090 struct cnode *cp;
3091 struct filefork *fp;
3092 struct hfsmount *hfsmp;
3093 u_int32_t headblks;
3094 u_int32_t datablks;
3095 u_int32_t blksize;
3096 u_int32_t growsize;
3097 u_int32_t nextallocsave;
3098 daddr64_t sector_a, sector_b;
3099 int eflags;
3100 off_t newbytes;
3101 int retval;
3102 int lockflags = 0;
3103 int took_trunc_lock = 0;
3104 int started_tr = 0;
3105 enum vtype vnodetype;
3106
3107 vnodetype = vnode_vtype(vp);
3108 if (vnodetype != VREG && vnodetype != VLNK) {
3109 return (EPERM);
3110 }
3111
3112 hfsmp = VTOHFS(vp);
3113 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
3114 return (ENOSPC);
3115 }
3116
3117 cp = VTOC(vp);
3118 fp = VTOF(vp);
3119 if (fp->ff_unallocblocks)
3120 return (EINVAL);
3121 blksize = hfsmp->blockSize;
3122 if (blockHint == 0)
3123 blockHint = hfsmp->nextAllocation;
3124
3125 if ((fp->ff_size > 0x7fffffff) ||
3126 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
3127 return (EFBIG);
3128 }
3129
3130 //
3131 // We do not believe that this call to hfs_fsync() is
3132 // necessary and it causes a journal transaction
3133 // deadlock so we are removing it.
3134 //
3135 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3136 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3137 // if (retval)
3138 // return (retval);
3139 //}
3140
3141 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
3142 hfs_unlock(cp);
3143 hfs_lock_truncate(cp, TRUE);
3144 /* Force lock since callers expects lock to be held. */
3145 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
3146 hfs_unlock_truncate(cp, TRUE);
3147 return (retval);
3148 }
3149 /* No need to continue if file was removed. */
3150 if (cp->c_flag & C_NOEXISTS) {
3151 hfs_unlock_truncate(cp, TRUE);
3152 return (ENOENT);
3153 }
3154 took_trunc_lock = 1;
3155 }
3156 headblks = fp->ff_blocks;
3157 datablks = howmany(fp->ff_size, blksize);
3158 growsize = datablks * blksize;
3159 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
3160 if (blockHint >= hfsmp->hfs_metazone_start &&
3161 blockHint <= hfsmp->hfs_metazone_end)
3162 eflags |= kEFMetadataMask;
3163
3164 if (hfs_start_transaction(hfsmp) != 0) {
3165 if (took_trunc_lock)
3166 hfs_unlock_truncate(cp, TRUE);
3167 return (EINVAL);
3168 }
3169 started_tr = 1;
3170 /*
3171 * Protect the extents b-tree and the allocation bitmap
3172 * during MapFileBlockC and ExtendFileC operations.
3173 */
3174 lockflags = SFL_BITMAP;
3175 if (overflow_extents(fp))
3176 lockflags |= SFL_EXTENTS;
3177 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3178
3179 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
3180 if (retval) {
3181 retval = MacToVFSError(retval);
3182 goto out;
3183 }
3184
3185 /*
3186 * STEP 1 - acquire new allocation blocks.
3187 */
3188 nextallocsave = hfsmp->nextAllocation;
3189 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
3190 if (eflags & kEFMetadataMask) {
3191 HFS_MOUNT_LOCK(hfsmp, TRUE);
3192 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
3193 MarkVCBDirty(hfsmp);
3194 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3195 }
3196
3197 retval = MacToVFSError(retval);
3198 if (retval == 0) {
3199 cp->c_flag |= C_MODIFIED;
3200 if (newbytes < growsize) {
3201 retval = ENOSPC;
3202 goto restore;
3203 } else if (fp->ff_blocks < (headblks + datablks)) {
3204 printf("hfs_relocate: allocation failed");
3205 retval = ENOSPC;
3206 goto restore;
3207 }
3208
3209 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
3210 if (retval) {
3211 retval = MacToVFSError(retval);
3212 } else if ((sector_a + 1) == sector_b) {
3213 retval = ENOSPC;
3214 goto restore;
3215 } else if ((eflags & kEFMetadataMask) &&
3216 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
3217 hfsmp->hfs_metazone_end)) {
3218 const char * filestr;
3219 char emptystr = '\0';
3220
3221 if (cp->c_desc.cd_nameptr != NULL) {
3222 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
3223 } else if (vnode_name(vp) != NULL) {
3224 filestr = vnode_name(vp);
3225 } else {
3226 filestr = &emptystr;
3227 }
3228 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr, fp->ff_blocks);
3229 retval = ENOSPC;
3230 goto restore;
3231 }
3232 }
3233 /* Done with system locks and journal for now. */
3234 hfs_systemfile_unlock(hfsmp, lockflags);
3235 lockflags = 0;
3236 hfs_end_transaction(hfsmp);
3237 started_tr = 0;
3238
3239 if (retval) {
3240 /*
3241 * Check to see if failure is due to excessive fragmentation.
3242 */
3243 if ((retval == ENOSPC) &&
3244 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
3245 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
3246 }
3247 goto out;
3248 }
3249 /*
3250 * STEP 2 - clone file data into the new allocation blocks.
3251 */
3252
3253 if (vnodetype == VLNK)
3254 retval = hfs_clonelink(vp, blksize, cred, p);
3255 else if (vnode_issystem(vp))
3256 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
3257 else
3258 retval = hfs_clonefile(vp, headblks, datablks, blksize);
3259
3260 /* Start transaction for step 3 or for a restore. */
3261 if (hfs_start_transaction(hfsmp) != 0) {
3262 retval = EINVAL;
3263 goto out;
3264 }
3265 started_tr = 1;
3266 if (retval)
3267 goto restore;
3268
3269 /*
3270 * STEP 3 - switch to cloned data and remove old blocks.
3271 */
3272 lockflags = SFL_BITMAP;
3273 if (overflow_extents(fp))
3274 lockflags |= SFL_EXTENTS;
3275 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3276
3277 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
3278
3279 hfs_systemfile_unlock(hfsmp, lockflags);
3280 lockflags = 0;
3281 if (retval)
3282 goto restore;
3283 out:
3284 if (took_trunc_lock)
3285 hfs_unlock_truncate(cp, TRUE);
3286
3287 if (lockflags) {
3288 hfs_systemfile_unlock(hfsmp, lockflags);
3289 lockflags = 0;
3290 }
3291
3292 /* Push cnode's new extent data to disk. */
3293 if (retval == 0) {
3294 (void) hfs_update(vp, MNT_WAIT);
3295 }
3296 if (hfsmp->jnl) {
3297 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
3298 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3299 else
3300 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
3301 }
3302 exit:
3303 if (started_tr)
3304 hfs_end_transaction(hfsmp);
3305
3306 return (retval);
3307
3308 restore:
3309 if (fp->ff_blocks == headblks) {
3310 if (took_trunc_lock)
3311 hfs_unlock_truncate(cp, TRUE);
3312 goto exit;
3313 }
3314 /*
3315 * Give back any newly allocated space.
3316 */
3317 if (lockflags == 0) {
3318 lockflags = SFL_BITMAP;
3319 if (overflow_extents(fp))
3320 lockflags |= SFL_EXTENTS;
3321 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3322 }
3323
3324 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
3325
3326 hfs_systemfile_unlock(hfsmp, lockflags);
3327 lockflags = 0;
3328
3329 if (took_trunc_lock)
3330 hfs_unlock_truncate(cp, TRUE);
3331 goto exit;
3332 }
3333
3334
3335 /*
3336 * Clone a symlink.
3337 *
3338 */
3339 static int
3340 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
3341 {
3342 struct buf *head_bp = NULL;
3343 struct buf *tail_bp = NULL;
3344 int error;
3345
3346
3347 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
3348 if (error)
3349 goto out;
3350
3351 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
3352 if (tail_bp == NULL) {
3353 error = EIO;
3354 goto out;
3355 }
3356 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
3357 error = (int)buf_bwrite(tail_bp);
3358 out:
3359 if (head_bp) {
3360 buf_markinvalid(head_bp);
3361 buf_brelse(head_bp);
3362 }
3363 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
3364
3365 return (error);
3366 }
3367
3368 /*
3369 * Clone a file's data within the file.
3370 *
3371 */
3372 static int
3373 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
3374 {
3375 caddr_t bufp;
3376 size_t writebase;
3377 size_t bufsize;
3378 size_t copysize;
3379 size_t iosize;
3380 off_t filesize;
3381 size_t offset;
3382 uio_t auio;
3383 int error = 0;
3384
3385 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
3386 writebase = blkstart * blksize;
3387 copysize = blkcnt * blksize;
3388 iosize = bufsize = MIN(copysize, 128 * 1024);
3389 offset = 0;
3390
3391 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3392 return (ENOMEM);
3393 }
3394 hfs_unlock(VTOC(vp));
3395
3396 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
3397
3398 while (offset < copysize) {
3399 iosize = MIN(copysize - offset, iosize);
3400
3401 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
3402 uio_addiov(auio, (uintptr_t)bufp, iosize);
3403
3404 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
3405 if (error) {
3406 printf("hfs_clonefile: cluster_read failed - %d\n", error);
3407 break;
3408 }
3409 if (uio_resid(auio) != 0) {
3410 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
3411 error = EIO;
3412 break;
3413 }
3414
3415 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
3416 uio_addiov(auio, (uintptr_t)bufp, iosize);
3417
3418 error = cluster_write(vp, auio, filesize + offset,
3419 filesize + offset + iosize,
3420 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
3421 if (error) {
3422 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3423 break;
3424 }
3425 if (uio_resid(auio) != 0) {
3426 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3427 error = EIO;
3428 break;
3429 }
3430 offset += iosize;
3431 }
3432 uio_free(auio);
3433
3434 /*
3435 * No need to call ubc_sync_range or hfs_invalbuf
3436 * since the file was copied using IO_NOCACHE.
3437 */
3438
3439 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3440
3441 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
3442 return (error);
3443 }
3444
3445 /*
3446 * Clone a system (metadata) file.
3447 *
3448 */
3449 static int
3450 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3451 kauth_cred_t cred, struct proc *p)
3452 {
3453 caddr_t bufp;
3454 char * offset;
3455 size_t bufsize;
3456 size_t iosize;
3457 struct buf *bp = NULL;
3458 daddr64_t blkno;
3459 daddr64_t blk;
3460 daddr64_t start_blk;
3461 daddr64_t last_blk;
3462 int breadcnt;
3463 int i;
3464 int error = 0;
3465
3466
3467 iosize = GetLogicalBlockSize(vp);
3468 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3469 breadcnt = bufsize / iosize;
3470
3471 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3472 return (ENOMEM);
3473 }
3474 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3475 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3476 blkno = 0;
3477
3478 while (blkno < last_blk) {
3479 /*
3480 * Read up to a megabyte
3481 */
3482 offset = bufp;
3483 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3484 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3485 if (error) {
3486 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3487 goto out;
3488 }
3489 if (buf_count(bp) != iosize) {
3490 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3491 goto out;
3492 }
3493 bcopy((char *)buf_dataptr(bp), offset, iosize);
3494
3495 buf_markinvalid(bp);
3496 buf_brelse(bp);
3497 bp = NULL;
3498
3499 offset += iosize;
3500 }
3501
3502 /*
3503 * Write up to a megabyte
3504 */
3505 offset = bufp;
3506 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3507 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3508 if (bp == NULL) {
3509 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3510 error = EIO;
3511 goto out;
3512 }
3513 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3514 error = (int)buf_bwrite(bp);
3515 bp = NULL;
3516 if (error)
3517 goto out;
3518 offset += iosize;
3519 }
3520 }
3521 out:
3522 if (bp) {
3523 buf_brelse(bp);
3524 }
3525
3526 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3527
3528 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3529
3530 return (error);
3531 }