]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
958ca6e3a09a081c0e80d9082cd95abaa9bd56c1
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 /* @(#)hfs_readwrite.c 1.0
29 *
30 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
31 *
32 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
33 *
34 */
35
36 #include <sys/param.h>
37 #include <sys/systm.h>
38 #include <sys/resourcevar.h>
39 #include <sys/kernel.h>
40 #include <sys/fcntl.h>
41 #include <sys/filedesc.h>
42 #include <sys/stat.h>
43 #include <sys/buf.h>
44 #include <sys/proc.h>
45 #include <sys/kauth.h>
46 #include <sys/vnode.h>
47 #include <sys/vnode_internal.h>
48 #include <sys/uio.h>
49 #include <sys/vfs_context.h>
50 #include <sys/fsevents.h>
51 #include <kern/kalloc.h>
52 #include <sys/disk.h>
53 #include <sys/sysctl.h>
54
55 #include <miscfs/specfs/specdev.h>
56
57 #include <sys/ubc.h>
58 #include <sys/ubc_internal.h>
59
60 #include <vm/vm_pageout.h>
61 #include <vm/vm_kern.h>
62
63 #include <sys/kdebug.h>
64
65 #include "hfs.h"
66 #include "hfs_attrlist.h"
67 #include "hfs_endian.h"
68 #include "hfs_fsctl.h"
69 #include "hfs_quota.h"
70 #include "hfscommon/headers/FileMgrInternal.h"
71 #include "hfscommon/headers/BTreesInternal.h"
72 #include "hfs_cnode.h"
73 #include "hfs_dbg.h"
74
75 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
76
77 enum {
78 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
79 };
80
81 /* from bsd/vfs/vfs_cluster.c */
82 extern int is_file_clean(vnode_t vp, off_t filesize);
83 /* from bsd/hfs/hfs_vfsops.c */
84 extern int hfs_vfs_vget(struct mount *mp, ino64_t ino, struct vnode **vpp, vfs_context_t context);
85
86 static int hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
87 static int hfs_clonefile(struct vnode *, int, int, int);
88 static int hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
89
90 int flush_cache_on_write = 0;
91 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
92
93
94 /*
95 * Read data from a file.
96 */
97 int
98 hfs_vnop_read(struct vnop_read_args *ap)
99 {
100 uio_t uio = ap->a_uio;
101 struct vnode *vp = ap->a_vp;
102 struct cnode *cp;
103 struct filefork *fp;
104 struct hfsmount *hfsmp;
105 off_t filesize;
106 off_t filebytes;
107 off_t start_resid = uio_resid(uio);
108 off_t offset = uio_offset(uio);
109 int retval = 0;
110
111
112 /* Preflight checks */
113 if (!vnode_isreg(vp)) {
114 /* can only read regular files */
115 if (vnode_isdir(vp))
116 return (EISDIR);
117 else
118 return (EPERM);
119 }
120 if (start_resid == 0)
121 return (0); /* Nothing left to do */
122 if (offset < 0)
123 return (EINVAL); /* cant read from a negative offset */
124
125 cp = VTOC(vp);
126 fp = VTOF(vp);
127 hfsmp = VTOHFS(vp);
128
129 /* Protect against a size change. */
130 hfs_lock_truncate(cp, 0);
131
132 filesize = fp->ff_size;
133 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
134 if (offset > filesize) {
135 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
136 (offset > (off_t)MAXHFSFILESIZE)) {
137 retval = EFBIG;
138 }
139 goto exit;
140 }
141
142 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
143 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
144
145 retval = cluster_read(vp, uio, filesize, ap->a_ioflag);
146
147 cp->c_touch_acctime = TRUE;
148
149 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
150 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
151
152 /*
153 * Keep track blocks read
154 */
155 if (hfsmp->hfc_stage == HFC_RECORDING && retval == 0) {
156 int took_cnode_lock = 0;
157 off_t bytesread;
158
159 bytesread = start_resid - uio_resid(uio);
160
161 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
162 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
163 hfs_lock(cp, HFS_FORCE_LOCK);
164 took_cnode_lock = 1;
165 }
166 /*
167 * If this file hasn't been seen since the start of
168 * the current sampling period then start over.
169 */
170 if (cp->c_atime < hfsmp->hfc_timebase) {
171 struct timeval tv;
172
173 fp->ff_bytesread = bytesread;
174 microtime(&tv);
175 cp->c_atime = tv.tv_sec;
176 } else {
177 fp->ff_bytesread += bytesread;
178 }
179 if (took_cnode_lock)
180 hfs_unlock(cp);
181 }
182 exit:
183 hfs_unlock_truncate(cp, 0);
184 return (retval);
185 }
186
187 /*
188 * Write data to a file.
189 */
190 int
191 hfs_vnop_write(struct vnop_write_args *ap)
192 {
193 uio_t uio = ap->a_uio;
194 struct vnode *vp = ap->a_vp;
195 struct cnode *cp;
196 struct filefork *fp;
197 struct hfsmount *hfsmp;
198 kauth_cred_t cred = NULL;
199 off_t origFileSize;
200 off_t writelimit;
201 off_t bytesToAdd = 0;
202 off_t actualBytesAdded;
203 off_t filebytes;
204 off_t offset;
205 size_t resid;
206 int eflags;
207 int ioflag = ap->a_ioflag;
208 int retval = 0;
209 int lockflags;
210 int cnode_locked = 0;
211 int partialwrite = 0;
212 int exclusive_lock = 0;
213
214 // LP64todo - fix this! uio_resid may be 64-bit value
215 resid = uio_resid(uio);
216 offset = uio_offset(uio);
217
218 if (ioflag & IO_APPEND) {
219 exclusive_lock = 1;
220 }
221
222 if (offset < 0)
223 return (EINVAL);
224 if (resid == 0)
225 return (E_NONE);
226 if (!vnode_isreg(vp))
227 return (EPERM); /* Can only write regular files */
228
229 cp = VTOC(vp);
230 fp = VTOF(vp);
231 hfsmp = VTOHFS(vp);
232
233 eflags = kEFDeferMask; /* defer file block allocations */
234 #ifdef HFS_SPARSE_DEV
235 /*
236 * When the underlying device is sparse and space
237 * is low (< 8MB), stop doing delayed allocations
238 * and begin doing synchronous I/O.
239 */
240 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
241 (hfs_freeblks(hfsmp, 0) < 2048)) {
242 eflags &= ~kEFDeferMask;
243 ioflag |= IO_SYNC;
244 }
245 #endif /* HFS_SPARSE_DEV */
246
247 again:
248 /* Protect against a size change. */
249 hfs_lock_truncate(cp, exclusive_lock);
250
251 if (ioflag & IO_APPEND) {
252 uio_setoffset(uio, fp->ff_size);
253 offset = fp->ff_size;
254 }
255 if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
256 retval = EPERM;
257 goto exit;
258 }
259
260 origFileSize = fp->ff_size;
261 writelimit = offset + resid;
262 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
263
264 /* If the truncate lock is shared, and if we either have virtual
265 * blocks or will need to extend the file, upgrade the truncate
266 * to exclusive lock. If upgrade fails, we lose the lock and
267 * have to get exclusive lock again
268 */
269 if ((exclusive_lock == 0) &&
270 ((fp->ff_unallocblocks != 0) || (writelimit > filebytes))) {
271 exclusive_lock = 1;
272 /* Lock upgrade failed and we lost our shared lock, try again */
273 if (lck_rw_lock_shared_to_exclusive(&cp->c_truncatelock) == FALSE) {
274 goto again;
275 }
276 }
277
278 if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
279 goto exit;
280 }
281 cnode_locked = 1;
282
283 if (!exclusive_lock) {
284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
285 (int)offset, uio_resid(uio), (int)fp->ff_size,
286 (int)filebytes, 0);
287 }
288
289 /* Check if we do not need to extend the file */
290 if (writelimit <= filebytes) {
291 goto sizeok;
292 }
293
294 cred = vfs_context_ucred(ap->a_context);
295 bytesToAdd = writelimit - filebytes;
296
297 #if QUOTA
298 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
299 cred, 0);
300 if (retval)
301 goto exit;
302 #endif /* QUOTA */
303
304 if (hfs_start_transaction(hfsmp) != 0) {
305 retval = EINVAL;
306 goto exit;
307 }
308
309 while (writelimit > filebytes) {
310 bytesToAdd = writelimit - filebytes;
311 if (cred && suser(cred, NULL) != 0)
312 eflags |= kEFReserveMask;
313
314 /* Protect extents b-tree and allocation bitmap */
315 lockflags = SFL_BITMAP;
316 if (overflow_extents(fp))
317 lockflags |= SFL_EXTENTS;
318 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
319
320 /* Files that are changing size are not hot file candidates. */
321 if (hfsmp->hfc_stage == HFC_RECORDING) {
322 fp->ff_bytesread = 0;
323 }
324 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
325 0, eflags, &actualBytesAdded));
326
327 hfs_systemfile_unlock(hfsmp, lockflags);
328
329 if ((actualBytesAdded == 0) && (retval == E_NONE))
330 retval = ENOSPC;
331 if (retval != E_NONE)
332 break;
333 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
334 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
335 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
336 }
337 (void) hfs_update(vp, TRUE);
338 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
339 (void) hfs_end_transaction(hfsmp);
340
341 /*
342 * If we didn't grow the file enough try a partial write.
343 * POSIX expects this behavior.
344 */
345 if ((retval == ENOSPC) && (filebytes > offset)) {
346 retval = 0;
347 partialwrite = 1;
348 uio_setresid(uio, (uio_resid(uio) - bytesToAdd));
349 resid -= bytesToAdd;
350 writelimit = filebytes;
351 }
352 sizeok:
353 if (retval == E_NONE) {
354 off_t filesize;
355 off_t zero_off;
356 off_t tail_off;
357 off_t inval_start;
358 off_t inval_end;
359 off_t io_start;
360 int lflag;
361 struct rl_entry *invalid_range;
362
363 if (writelimit > fp->ff_size)
364 filesize = writelimit;
365 else
366 filesize = fp->ff_size;
367
368 lflag = ioflag & ~(IO_TAILZEROFILL | IO_HEADZEROFILL | IO_NOZEROVALID | IO_NOZERODIRTY);
369
370 if (offset <= fp->ff_size) {
371 zero_off = offset & ~PAGE_MASK_64;
372
373 /* Check to see whether the area between the zero_offset and the start
374 of the transfer to see whether is invalid and should be zero-filled
375 as part of the transfer:
376 */
377 if (offset > zero_off) {
378 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
379 lflag |= IO_HEADZEROFILL;
380 }
381 } else {
382 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
383
384 /* The bytes between fp->ff_size and uio->uio_offset must never be
385 read without being zeroed. The current last block is filled with zeroes
386 if it holds valid data but in all cases merely do a little bookkeeping
387 to track the area from the end of the current last page to the start of
388 the area actually written. For the same reason only the bytes up to the
389 start of the page where this write will start is invalidated; any remainder
390 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
391
392 Note that inval_start, the start of the page after the current EOF,
393 may be past the start of the write, in which case the zeroing
394 will be handled by the cluser_write of the actual data.
395 */
396 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
397 inval_end = offset & ~PAGE_MASK_64;
398 zero_off = fp->ff_size;
399
400 if ((fp->ff_size & PAGE_MASK_64) &&
401 (rl_scan(&fp->ff_invalidranges,
402 eof_page_base,
403 fp->ff_size - 1,
404 &invalid_range) != RL_NOOVERLAP)) {
405 /* The page containing the EOF is not valid, so the
406 entire page must be made inaccessible now. If the write
407 starts on a page beyond the page containing the eof
408 (inval_end > eof_page_base), add the
409 whole page to the range to be invalidated. Otherwise
410 (i.e. if the write starts on the same page), zero-fill
411 the entire page explicitly now:
412 */
413 if (inval_end > eof_page_base) {
414 inval_start = eof_page_base;
415 } else {
416 zero_off = eof_page_base;
417 };
418 };
419
420 if (inval_start < inval_end) {
421 struct timeval tv;
422 /* There's some range of data that's going to be marked invalid */
423
424 if (zero_off < inval_start) {
425 /* The pages between inval_start and inval_end are going to be invalidated,
426 and the actual write will start on a page past inval_end. Now's the last
427 chance to zero-fill the page containing the EOF:
428 */
429 hfs_unlock(cp);
430 cnode_locked = 0;
431 retval = cluster_write(vp, (uio_t) 0,
432 fp->ff_size, inval_start,
433 zero_off, (off_t)0,
434 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
435 hfs_lock(cp, HFS_FORCE_LOCK);
436 cnode_locked = 1;
437 if (retval) goto ioerr_exit;
438 offset = uio_offset(uio);
439 };
440
441 /* Mark the remaining area of the newly allocated space as invalid: */
442 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
443 microuptime(&tv);
444 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
445 zero_off = fp->ff_size = inval_end;
446 };
447
448 if (offset > zero_off) lflag |= IO_HEADZEROFILL;
449 };
450
451 /* Check to see whether the area between the end of the write and the end of
452 the page it falls in is invalid and should be zero-filled as part of the transfer:
453 */
454 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
455 if (tail_off > filesize) tail_off = filesize;
456 if (tail_off > writelimit) {
457 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
458 lflag |= IO_TAILZEROFILL;
459 };
460 };
461
462 /*
463 * if the write starts beyond the current EOF (possibly advanced in the
464 * zeroing of the last block, above), then we'll zero fill from the current EOF
465 * to where the write begins:
466 *
467 * NOTE: If (and ONLY if) the portion of the file about to be written is
468 * before the current EOF it might be marked as invalid now and must be
469 * made readable (removed from the invalid ranges) before cluster_write
470 * tries to write it:
471 */
472 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
473 if (io_start < fp->ff_size) {
474 off_t io_end;
475
476 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
477 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
478 };
479
480 hfs_unlock(cp);
481 cnode_locked = 0;
482 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
483 tail_off, lflag | IO_NOZERODIRTY);
484 if (retval) {
485 goto ioerr_exit;
486 }
487 offset = uio_offset(uio);
488 if (offset > fp->ff_size) {
489 fp->ff_size = offset;
490
491 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
492 /* Files that are changing size are not hot file candidates. */
493 if (hfsmp->hfc_stage == HFC_RECORDING)
494 fp->ff_bytesread = 0;
495 }
496 if (resid > uio_resid(uio)) {
497 cp->c_touch_chgtime = TRUE;
498 cp->c_touch_modtime = TRUE;
499 }
500 }
501 if (partialwrite) {
502 uio_setresid(uio, (uio_resid(uio) + bytesToAdd));
503 resid += bytesToAdd;
504 }
505
506 // XXXdbg - see radar 4871353 for more info
507 {
508 if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
509 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
510 }
511 }
512 HFS_KNOTE(vp, NOTE_WRITE);
513
514 ioerr_exit:
515 /*
516 * If we successfully wrote any data, and we are not the superuser
517 * we clear the setuid and setgid bits as a precaution against
518 * tampering.
519 */
520 if (cp->c_mode & (S_ISUID | S_ISGID)) {
521 cred = vfs_context_ucred(ap->a_context);
522 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
523 if (!cnode_locked) {
524 hfs_lock(cp, HFS_FORCE_LOCK);
525 cnode_locked = 1;
526 }
527 cp->c_mode &= ~(S_ISUID | S_ISGID);
528 }
529 }
530 if (retval) {
531 if (ioflag & IO_UNIT) {
532 if (!cnode_locked) {
533 hfs_lock(cp, HFS_FORCE_LOCK);
534 cnode_locked = 1;
535 }
536 (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
537 0, ap->a_context);
538 // LP64todo - fix this! resid needs to by user_ssize_t
539 uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
540 uio_setresid(uio, resid);
541 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
542 }
543 } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
544 if (!cnode_locked) {
545 hfs_lock(cp, HFS_FORCE_LOCK);
546 cnode_locked = 1;
547 }
548 retval = hfs_update(vp, TRUE);
549 }
550 /* Updating vcbWrCnt doesn't need to be atomic. */
551 hfsmp->vcbWrCnt++;
552
553 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
554 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
555 exit:
556 if (cnode_locked)
557 hfs_unlock(cp);
558 hfs_unlock_truncate(cp, exclusive_lock);
559 return (retval);
560 }
561
562 /* support for the "bulk-access" fcntl */
563
564 #define CACHE_LEVELS 16
565 #define NUM_CACHE_ENTRIES (64*16)
566 #define PARENT_IDS_FLAG 0x100
567
568 struct access_cache {
569 int numcached;
570 int cachehits; /* these two for statistics gathering */
571 int lookups;
572 unsigned int *acache;
573 unsigned char *haveaccess;
574 };
575
576 struct access_t {
577 uid_t uid; /* IN: effective user id */
578 short flags; /* IN: access requested (i.e. R_OK) */
579 short num_groups; /* IN: number of groups user belongs to */
580 int num_files; /* IN: number of files to process */
581 int *file_ids; /* IN: array of file ids */
582 gid_t *groups; /* IN: array of groups */
583 short *access; /* OUT: access info for each file (0 for 'has access') */
584 };
585
586 struct user_access_t {
587 uid_t uid; /* IN: effective user id */
588 short flags; /* IN: access requested (i.e. R_OK) */
589 short num_groups; /* IN: number of groups user belongs to */
590 int num_files; /* IN: number of files to process */
591 user_addr_t file_ids; /* IN: array of file ids */
592 user_addr_t groups; /* IN: array of groups */
593 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
594 };
595
596
597 // these are the "extended" versions of the above structures
598 // note that it is crucial that they be different sized than
599 // the regular version
600 struct ext_access_t {
601 uint32_t flags; /* IN: access requested (i.e. R_OK) */
602 uint32_t num_files; /* IN: number of files to process */
603 uint32_t map_size; /* IN: size of the bit map */
604 uint32_t *file_ids; /* IN: Array of file ids */
605 char *bitmap; /* OUT: hash-bitmap of interesting directory ids */
606 short *access; /* OUT: access info for each file (0 for 'has access') */
607 uint32_t num_parents; /* future use */
608 cnid_t *parents; /* future use */
609 };
610
611 struct ext_user_access_t {
612 uint32_t flags; /* IN: access requested (i.e. R_OK) */
613 uint32_t num_files; /* IN: number of files to process */
614 uint32_t map_size; /* IN: size of the bit map */
615 user_addr_t file_ids; /* IN: array of file ids */
616 user_addr_t bitmap; /* IN: array of groups */
617 user_addr_t access; /* OUT: access info for each file (0 for 'has access') */
618 uint32_t num_parents;/* future use */
619 user_addr_t parents;/* future use */
620 };
621
622
623 /*
624 * Perform a binary search for the given parent_id. Return value is
625 * the index if there is a match. If no_match_indexp is non-NULL it
626 * will be assigned with the index to insert the item (even if it was
627 * not found).
628 */
629 static int cache_binSearch(cnid_t *array, unsigned int hi, cnid_t parent_id, int *no_match_indexp)
630 {
631 int index=-1;
632 unsigned int lo=0;
633
634 do {
635 unsigned int mid = ((hi - lo)/2) + lo;
636 unsigned int this_id = array[mid];
637
638 if (parent_id == this_id) {
639 hi = mid;
640 break;
641 }
642
643 if (parent_id < this_id) {
644 hi = mid;
645 continue;
646 }
647
648 if (parent_id > this_id) {
649 lo = mid + 1;
650 continue;
651 }
652 } while(lo < hi);
653
654 /* check if lo and hi converged on the match */
655 if (parent_id == array[hi]) {
656 index = hi;
657 }
658
659 if (no_match_indexp) {
660 *no_match_indexp = hi;
661 }
662
663 return index;
664 }
665
666
667 static int
668 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
669 {
670 unsigned int hi;
671 int matches = 0;
672 int index, no_match_index;
673
674 if (cache->numcached == 0) {
675 *indexp = 0;
676 return 0; // table is empty, so insert at index=0 and report no match
677 }
678
679 if (cache->numcached > NUM_CACHE_ENTRIES) {
680 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
681 cache->numcached, NUM_CACHE_ENTRIES);*/
682 cache->numcached = NUM_CACHE_ENTRIES;
683 }
684
685 hi = cache->numcached - 1;
686
687 index = cache_binSearch(cache->acache, hi, parent_id, &no_match_index);
688
689 /* if no existing entry found, find index for new one */
690 if (index == -1) {
691 index = no_match_index;
692 matches = 0;
693 } else {
694 matches = 1;
695 }
696
697 *indexp = index;
698 return matches;
699 }
700
701 /*
702 * Add a node to the access_cache at the given index (or do a lookup first
703 * to find the index if -1 is passed in). We currently do a replace rather
704 * than an insert if the cache is full.
705 */
706 static void
707 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
708 {
709 int lookup_index = -1;
710
711 /* need to do a lookup first if -1 passed for index */
712 if (index == -1) {
713 if (lookup_bucket(cache, &lookup_index, nodeID)) {
714 if (cache->haveaccess[lookup_index] != access && cache->haveaccess[lookup_index] == ESRCH) {
715 // only update an entry if the previous access was ESRCH (i.e. a scope checking error)
716 cache->haveaccess[lookup_index] = access;
717 }
718
719 /* mission accomplished */
720 return;
721 } else {
722 index = lookup_index;
723 }
724
725 }
726
727 /* if the cache is full, do a replace rather than an insert */
728 if (cache->numcached >= NUM_CACHE_ENTRIES) {
729 //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
730 cache->numcached = NUM_CACHE_ENTRIES-1;
731
732 if (index > cache->numcached) {
733 // printf("index %d pinned to %d\n", index, cache->numcached);
734 index = cache->numcached;
735 }
736 }
737
738 if (index < cache->numcached && index < NUM_CACHE_ENTRIES && nodeID > cache->acache[index]) {
739 index++;
740 }
741
742 if (index >= 0 && index < cache->numcached) {
743 /* only do bcopy if we're inserting */
744 bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
745 bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(unsigned char) );
746 }
747
748 cache->acache[index] = nodeID;
749 cache->haveaccess[index] = access;
750 cache->numcached++;
751 }
752
753
754 struct cinfo {
755 uid_t uid;
756 gid_t gid;
757 mode_t mode;
758 cnid_t parentcnid;
759 u_int16_t recflags;
760 };
761
762 static int
763 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
764 {
765 struct cinfo *cip = (struct cinfo *)arg;
766
767 cip->uid = attrp->ca_uid;
768 cip->gid = attrp->ca_gid;
769 cip->mode = attrp->ca_mode;
770 cip->parentcnid = descp->cd_parentcnid;
771 cip->recflags = attrp->ca_recflags;
772
773 return (0);
774 }
775
776 /*
777 * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
778 * isn't incore, then go to the catalog.
779 */
780 static int
781 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
782 struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp)
783 {
784 int error = 0;
785
786 /* if this id matches the one the fsctl was called with, skip the lookup */
787 if (cnid == skip_cp->c_cnid) {
788 cnattrp->ca_uid = skip_cp->c_uid;
789 cnattrp->ca_gid = skip_cp->c_gid;
790 cnattrp->ca_mode = skip_cp->c_mode;
791 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
792 } else {
793 struct cinfo c_info;
794
795 /* otherwise, check the cnode hash incase the file/dir is incore */
796 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
797 cnattrp->ca_uid = c_info.uid;
798 cnattrp->ca_gid = c_info.gid;
799 cnattrp->ca_mode = c_info.mode;
800 cnattrp->ca_recflags = c_info.recflags;
801 keyp->hfsPlus.parentID = c_info.parentcnid;
802 } else {
803 int lockflags;
804
805 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
806
807 /* lookup this cnid in the catalog */
808 error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
809
810 hfs_systemfile_unlock(hfsmp, lockflags);
811
812 cache->lookups++;
813 }
814 }
815
816 return (error);
817 }
818
819
820 /*
821 * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
822 * up to CACHE_LEVELS as we progress towards the root.
823 */
824 static int
825 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
826 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev,
827 struct vfs_context *my_context,
828 char *bitmap,
829 uint32_t map_size,
830 cnid_t* parents,
831 uint32_t num_parents)
832 {
833 int myErr = 0;
834 int myResult;
835 HFSCatalogNodeID thisNodeID;
836 unsigned int myPerms;
837 struct cat_attr cnattr;
838 int cache_index = -1, scope_index = -1, scope_idx_start = -1;
839 CatalogKey catkey;
840
841 int i = 0, ids_to_cache = 0;
842 int parent_ids[CACHE_LEVELS];
843
844 thisNodeID = nodeID;
845 while (thisNodeID >= kRootDirID) {
846 myResult = 0; /* default to "no access" */
847
848 /* check the cache before resorting to hitting the catalog */
849
850 /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
851 * to look any further after hitting cached dir */
852
853 if (lookup_bucket(cache, &cache_index, thisNodeID)) {
854 cache->cachehits++;
855 myErr = cache->haveaccess[cache_index];
856 if (scope_index != -1) {
857 if (myErr == ESRCH) {
858 myErr = 0;
859 }
860 } else {
861 scope_index = 0; // so we'll just use the cache result
862 scope_idx_start = ids_to_cache;
863 }
864 myResult = (myErr == 0) ? 1 : 0;
865 goto ExitThisRoutine;
866 }
867
868
869 if (parents) {
870 int tmp;
871 tmp = cache_binSearch(parents, num_parents-1, thisNodeID, NULL);
872 if (scope_index == -1)
873 scope_index = tmp;
874 if (tmp != -1 && scope_idx_start == -1 && ids_to_cache < CACHE_LEVELS) {
875 scope_idx_start = ids_to_cache;
876 }
877 }
878
879 /* remember which parents we want to cache */
880 if (ids_to_cache < CACHE_LEVELS) {
881 parent_ids[ids_to_cache] = thisNodeID;
882 ids_to_cache++;
883 }
884 // Inefficient (using modulo) and we might want to use a hash function, not rely on the node id to be "nice"...
885 if (bitmap && map_size) {
886 bitmap[(thisNodeID/8)%(map_size)]|=(1<<(thisNodeID&7));
887 }
888
889
890 /* do the lookup (checks the cnode hash, then the catalog) */
891 myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr);
892 if (myErr) {
893 goto ExitThisRoutine; /* no access */
894 }
895
896 /* Root always gets access. */
897 if (suser(myp_ucred, NULL) == 0) {
898 thisNodeID = catkey.hfsPlus.parentID;
899 myResult = 1;
900 continue;
901 }
902
903 // if the thing has acl's, do the full permission check
904 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
905 struct vnode *vp;
906
907 /* get the vnode for this cnid */
908 myErr = hfs_vget(hfsmp, thisNodeID, &vp, 0);
909 if ( myErr ) {
910 myResult = 0;
911 goto ExitThisRoutine;
912 }
913
914 thisNodeID = VTOC(vp)->c_parentcnid;
915
916 hfs_unlock(VTOC(vp));
917
918 if (vnode_vtype(vp) == VDIR) {
919 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), my_context);
920 } else {
921 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, my_context);
922 }
923
924 vnode_put(vp);
925 if (myErr) {
926 myResult = 0;
927 goto ExitThisRoutine;
928 }
929 } else {
930 unsigned int flags;
931
932 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
933 cnattr.ca_mode, hfsmp->hfs_mp,
934 myp_ucred, theProcPtr);
935
936 if (cnattr.ca_mode & S_IFDIR) {
937 flags = R_OK | X_OK;
938 } else {
939 flags = R_OK;
940 }
941 if ( (myPerms & flags) != flags) {
942 myResult = 0;
943 myErr = EACCES;
944 goto ExitThisRoutine; /* no access */
945 }
946
947 /* up the hierarchy we go */
948 thisNodeID = catkey.hfsPlus.parentID;
949 }
950 }
951
952 /* if here, we have access to this node */
953 myResult = 1;
954
955 ExitThisRoutine:
956 if (parents && myErr == 0 && scope_index == -1) {
957 myErr = ESRCH;
958 }
959
960 if (myErr) {
961 myResult = 0;
962 }
963 *err = myErr;
964
965 /* cache the parent directory(ies) */
966 for (i = 0; i < ids_to_cache; i++) {
967 if (myErr == 0 && parents && (scope_idx_start == -1 || i > scope_idx_start)) {
968 add_node(cache, -1, parent_ids[i], ESRCH);
969 } else {
970 add_node(cache, -1, parent_ids[i], myErr);
971 }
972 }
973
974 return (myResult);
975 }
976
977 static int
978 do_bulk_access_check(struct hfsmount *hfsmp, struct vnode *vp,
979 struct vnop_ioctl_args *ap, int arg_size, vfs_context_t context)
980 {
981 boolean_t is64bit;
982
983 /*
984 * NOTE: on entry, the vnode is locked. Incase this vnode
985 * happens to be in our list of file_ids, we'll note it
986 * avoid calling hfs_chashget_nowait() on that id as that
987 * will cause a "locking against myself" panic.
988 */
989 Boolean check_leaf = true;
990
991 struct ext_user_access_t *user_access_structp;
992 struct ext_user_access_t tmp_user_access;
993 struct access_cache cache;
994
995 int error = 0;
996 unsigned int i;
997
998 dev_t dev = VTOC(vp)->c_dev;
999
1000 short flags;
1001 unsigned int num_files = 0;
1002 int map_size = 0;
1003 int num_parents = 0;
1004 int *file_ids=NULL;
1005 short *access=NULL;
1006 char *bitmap=NULL;
1007 cnid_t *parents=NULL;
1008 int leaf_index;
1009
1010 cnid_t cnid;
1011 cnid_t prevParent_cnid = 0;
1012 unsigned int myPerms;
1013 short myaccess = 0;
1014 struct cat_attr cnattr;
1015 CatalogKey catkey;
1016 struct cnode *skip_cp = VTOC(vp);
1017 kauth_cred_t cred = vfs_context_ucred(context);
1018 proc_t p = vfs_context_proc(context);
1019
1020 is64bit = proc_is64bit(p);
1021
1022 /* initialize the local cache and buffers */
1023 cache.numcached = 0;
1024 cache.cachehits = 0;
1025 cache.lookups = 0;
1026 cache.acache = NULL;
1027 cache.haveaccess = NULL;
1028
1029 /* struct copyin done during dispatch... need to copy file_id array separately */
1030 if (ap->a_data == NULL) {
1031 error = EINVAL;
1032 goto err_exit_bulk_access;
1033 }
1034
1035 if (is64bit) {
1036 if (arg_size != sizeof(struct ext_user_access_t)) {
1037 error = EINVAL;
1038 goto err_exit_bulk_access;
1039 }
1040
1041 user_access_structp = (struct ext_user_access_t *)ap->a_data;
1042
1043 } else if (arg_size == sizeof(struct access_t)) {
1044 struct access_t *accessp = (struct access_t *)ap->a_data;
1045
1046 // convert an old style bulk-access struct to the new style
1047 tmp_user_access.flags = accessp->flags;
1048 tmp_user_access.num_files = accessp->num_files;
1049 tmp_user_access.map_size = 0;
1050 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1051 tmp_user_access.bitmap = USER_ADDR_NULL;
1052 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1053 tmp_user_access.num_parents = 0;
1054 user_access_structp = &tmp_user_access;
1055
1056 } else if (arg_size == sizeof(struct ext_access_t)) {
1057 struct ext_access_t *accessp = (struct ext_access_t *)ap->a_data;
1058
1059 // up-cast from a 32-bit version of the struct
1060 tmp_user_access.flags = accessp->flags;
1061 tmp_user_access.num_files = accessp->num_files;
1062 tmp_user_access.map_size = accessp->map_size;
1063 tmp_user_access.num_parents = accessp->num_parents;
1064
1065 tmp_user_access.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1066 tmp_user_access.bitmap = CAST_USER_ADDR_T(accessp->bitmap);
1067 tmp_user_access.access = CAST_USER_ADDR_T(accessp->access);
1068 tmp_user_access.parents = CAST_USER_ADDR_T(accessp->parents);
1069
1070 user_access_structp = &tmp_user_access;
1071 } else {
1072 error = EINVAL;
1073 goto err_exit_bulk_access;
1074 }
1075
1076 map_size = user_access_structp->map_size;
1077
1078 num_files = user_access_structp->num_files;
1079
1080 num_parents= user_access_structp->num_parents;
1081
1082 if (num_files < 1) {
1083 goto err_exit_bulk_access;
1084 }
1085 if (num_files > 1024) {
1086 error = EINVAL;
1087 goto err_exit_bulk_access;
1088 }
1089
1090 if (num_parents > 1024) {
1091 error = EINVAL;
1092 goto err_exit_bulk_access;
1093 }
1094
1095 file_ids = (int *) kalloc(sizeof(int) * num_files);
1096 access = (short *) kalloc(sizeof(short) * num_files);
1097 if (map_size) {
1098 bitmap = (char *) kalloc(sizeof(char) * map_size);
1099 }
1100
1101 if (num_parents) {
1102 parents = (cnid_t *) kalloc(sizeof(cnid_t) * num_parents);
1103 }
1104
1105 cache.acache = (unsigned int *) kalloc(sizeof(int) * NUM_CACHE_ENTRIES);
1106 cache.haveaccess = (unsigned char *) kalloc(sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1107
1108 if (file_ids == NULL || access == NULL || (map_size != 0 && bitmap == NULL) || cache.acache == NULL || cache.haveaccess == NULL) {
1109 if (file_ids) {
1110 kfree(file_ids, sizeof(int) * num_files);
1111 }
1112 if (bitmap) {
1113 kfree(bitmap, sizeof(char) * map_size);
1114 }
1115 if (access) {
1116 kfree(access, sizeof(short) * num_files);
1117 }
1118 if (cache.acache) {
1119 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1120 }
1121 if (cache.haveaccess) {
1122 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1123 }
1124 if (parents) {
1125 kfree(parents, sizeof(cnid_t) * num_parents);
1126 }
1127 return ENOMEM;
1128 }
1129
1130 // make sure the bitmap is zero'ed out...
1131 if (bitmap) {
1132 bzero(bitmap, (sizeof(char) * map_size));
1133 }
1134
1135 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1136 num_files * sizeof(int)))) {
1137 goto err_exit_bulk_access;
1138 }
1139
1140 if (num_parents) {
1141 if ((error = copyin(user_access_structp->parents, (caddr_t)parents,
1142 num_parents * sizeof(cnid_t)))) {
1143 goto err_exit_bulk_access;
1144 }
1145 }
1146
1147 flags = user_access_structp->flags;
1148 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1149 flags = R_OK;
1150 }
1151
1152 /* check if we've been passed leaf node ids or parent ids */
1153 if (flags & PARENT_IDS_FLAG) {
1154 check_leaf = false;
1155 }
1156
1157 /* Check access to each file_id passed in */
1158 for (i = 0; i < num_files; i++) {
1159 leaf_index=-1;
1160 cnid = (cnid_t) file_ids[i];
1161
1162 /* root always has access */
1163 if ((!parents) && (!suser(cred, NULL))) {
1164 access[i] = 0;
1165 continue;
1166 }
1167
1168 if (check_leaf) {
1169 /* do the lookup (checks the cnode hash, then the catalog) */
1170 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr);
1171 if (error) {
1172 access[i] = (short) error;
1173 continue;
1174 }
1175
1176 if (parents) {
1177 // Check if the leaf matches one of the parent scopes
1178 leaf_index = cache_binSearch(parents, num_parents-1, cnid, NULL);
1179 }
1180
1181 // if the thing has acl's, do the full permission check
1182 if ((cnattr.ca_recflags & kHFSHasSecurityMask) != 0) {
1183 struct vnode *cvp;
1184 int myErr = 0;
1185 /* get the vnode for this cnid */
1186 myErr = hfs_vget(hfsmp, cnid, &cvp, 0);
1187 if ( myErr ) {
1188 access[i] = myErr;
1189 continue;
1190 }
1191
1192 hfs_unlock(VTOC(cvp));
1193
1194 if (vnode_vtype(cvp) == VDIR) {
1195 myErr = vnode_authorize(cvp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), context);
1196 } else {
1197 myErr = vnode_authorize(cvp, NULL, KAUTH_VNODE_READ_DATA, context);
1198 }
1199
1200 vnode_put(cvp);
1201 if (myErr) {
1202 access[i] = myErr;
1203 continue;
1204 }
1205 } else {
1206 /* before calling CheckAccess(), check the target file for read access */
1207 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1208 cnattr.ca_mode, hfsmp->hfs_mp, cred, p);
1209
1210 /* fail fast if no access */
1211 if ((myPerms & flags) == 0) {
1212 access[i] = EACCES;
1213 continue;
1214 }
1215 }
1216 } else {
1217 /* we were passed an array of parent ids */
1218 catkey.hfsPlus.parentID = cnid;
1219 }
1220
1221 /* if the last guy had the same parent and had access, we're done */
1222 if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1223 cache.cachehits++;
1224 access[i] = 0;
1225 continue;
1226 }
1227
1228 myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1229 skip_cp, p, cred, dev, context,bitmap, map_size, parents, num_parents);
1230
1231 if (myaccess || (error == ESRCH && leaf_index != -1)) {
1232 access[i] = 0; // have access.. no errors to report
1233 } else {
1234 access[i] = (error != 0 ? (short) error : EACCES);
1235 }
1236
1237 prevParent_cnid = catkey.hfsPlus.parentID;
1238 }
1239
1240 /* copyout the access array */
1241 if ((error = copyout((caddr_t)access, user_access_structp->access,
1242 num_files * sizeof (short)))) {
1243 goto err_exit_bulk_access;
1244 }
1245 if (map_size && bitmap) {
1246 if ((error = copyout((caddr_t)bitmap, user_access_structp->bitmap,
1247 map_size * sizeof (char)))) {
1248 goto err_exit_bulk_access;
1249 }
1250 }
1251
1252
1253 err_exit_bulk_access:
1254
1255 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1256
1257 if (file_ids)
1258 kfree(file_ids, sizeof(int) * num_files);
1259 if (parents)
1260 kfree(parents, sizeof(cnid_t) * num_parents);
1261 if (bitmap)
1262 kfree(bitmap, sizeof(char) * map_size);
1263 if (access)
1264 kfree(access, sizeof(short) * num_files);
1265 if (cache.acache)
1266 kfree(cache.acache, sizeof(int) * NUM_CACHE_ENTRIES);
1267 if (cache.haveaccess)
1268 kfree(cache.haveaccess, sizeof(unsigned char) * NUM_CACHE_ENTRIES);
1269
1270 return (error);
1271 }
1272
1273
1274 /* end "bulk-access" support */
1275
1276
1277 /*
1278 * Callback for use with freeze ioctl.
1279 */
1280 static int
1281 hfs_freezewrite_callback(struct vnode *vp, __unused void *cargs)
1282 {
1283 vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
1284
1285 return 0;
1286 }
1287
1288 /*
1289 * Control filesystem operating characteristics.
1290 */
1291 int
1292 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
1293 vnode_t a_vp;
1294 int a_command;
1295 caddr_t a_data;
1296 int a_fflag;
1297 vfs_context_t a_context;
1298 } */ *ap)
1299 {
1300 struct vnode * vp = ap->a_vp;
1301 struct hfsmount *hfsmp = VTOHFS(vp);
1302 vfs_context_t context = ap->a_context;
1303 kauth_cred_t cred = vfs_context_ucred(context);
1304 proc_t p = vfs_context_proc(context);
1305 struct vfsstatfs *vfsp;
1306 boolean_t is64bit;
1307
1308 is64bit = proc_is64bit(p);
1309
1310 switch (ap->a_command) {
1311
1312 case HFS_GETPATH:
1313 {
1314 struct vnode *file_vp;
1315 cnid_t cnid;
1316 int outlen;
1317 char *bufptr;
1318 int error;
1319
1320 /* Caller must be owner of file system. */
1321 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1322 if (suser(cred, NULL) &&
1323 kauth_cred_getuid(cred) != vfsp->f_owner) {
1324 return (EACCES);
1325 }
1326 /* Target vnode must be file system's root. */
1327 if (!vnode_isvroot(vp)) {
1328 return (EINVAL);
1329 }
1330 bufptr = (char *)ap->a_data;
1331 cnid = strtoul(bufptr, NULL, 10);
1332
1333 /* We need to call hfs_vfs_vget to leverage the code that will fix the
1334 * origin list for us if needed, as opposed to calling hfs_vget, since
1335 * we will need it for the subsequent build_path call.
1336 */
1337 if ((error = hfs_vfs_vget(HFSTOVFS(hfsmp), cnid, &file_vp, context))) {
1338 return (error);
1339 }
1340 error = build_path(file_vp, bufptr, sizeof(pathname_t), &outlen, 0, context);
1341 vnode_put(file_vp);
1342
1343 return (error);
1344 }
1345
1346 case HFS_PREV_LINK:
1347 case HFS_NEXT_LINK:
1348 {
1349 cnid_t linkfileid;
1350 cnid_t nextlinkid;
1351 cnid_t prevlinkid;
1352 int error;
1353
1354 /* Caller must be owner of file system. */
1355 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1356 if (suser(cred, NULL) &&
1357 kauth_cred_getuid(cred) != vfsp->f_owner) {
1358 return (EACCES);
1359 }
1360 /* Target vnode must be file system's root. */
1361 if (!vnode_isvroot(vp)) {
1362 return (EINVAL);
1363 }
1364 linkfileid = *(cnid_t *)ap->a_data;
1365 if (linkfileid < kHFSFirstUserCatalogNodeID) {
1366 return (EINVAL);
1367 }
1368 if ((error = hfs_lookuplink(hfsmp, linkfileid, &prevlinkid, &nextlinkid))) {
1369 return (error);
1370 }
1371 if (ap->a_command == HFS_NEXT_LINK) {
1372 *(cnid_t *)ap->a_data = nextlinkid;
1373 } else {
1374 *(cnid_t *)ap->a_data = prevlinkid;
1375 }
1376 return (0);
1377 }
1378
1379 case HFS_RESIZE_PROGRESS: {
1380
1381 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1382 if (suser(cred, NULL) &&
1383 kauth_cred_getuid(cred) != vfsp->f_owner) {
1384 return (EACCES); /* must be owner of file system */
1385 }
1386 if (!vnode_isvroot(vp)) {
1387 return (EINVAL);
1388 }
1389 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
1390 }
1391
1392 case HFS_RESIZE_VOLUME: {
1393 u_int64_t newsize;
1394 u_int64_t cursize;
1395
1396 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1397 if (suser(cred, NULL) &&
1398 kauth_cred_getuid(cred) != vfsp->f_owner) {
1399 return (EACCES); /* must be owner of file system */
1400 }
1401 if (!vnode_isvroot(vp)) {
1402 return (EINVAL);
1403 }
1404 newsize = *(u_int64_t *)ap->a_data;
1405 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
1406
1407 if (newsize > cursize) {
1408 return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
1409 } else if (newsize < cursize) {
1410 return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
1411 } else {
1412 return (0);
1413 }
1414 }
1415 case HFS_CHANGE_NEXT_ALLOCATION: {
1416 int error = 0; /* Assume success */
1417 u_int32_t location;
1418
1419 if (vnode_vfsisrdonly(vp)) {
1420 return (EROFS);
1421 }
1422 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1423 if (suser(cred, NULL) &&
1424 kauth_cred_getuid(cred) != vfsp->f_owner) {
1425 return (EACCES); /* must be owner of file system */
1426 }
1427 if (!vnode_isvroot(vp)) {
1428 return (EINVAL);
1429 }
1430 HFS_MOUNT_LOCK(hfsmp, TRUE);
1431 location = *(u_int32_t *)ap->a_data;
1432 if ((location >= hfsmp->allocLimit) &&
1433 (location != HFS_NO_UPDATE_NEXT_ALLOCATION)) {
1434 error = EINVAL;
1435 goto fail_change_next_allocation;
1436 }
1437 /* Return previous value. */
1438 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
1439 if (location == HFS_NO_UPDATE_NEXT_ALLOCATION) {
1440 /* On magic value for location, set nextAllocation to next block
1441 * after metadata zone and set flag in mount structure to indicate
1442 * that nextAllocation should not be updated again.
1443 */
1444 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, hfsmp->hfs_metazone_end + 1);
1445 hfsmp->hfs_flags |= HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1446 } else {
1447 hfsmp->hfs_flags &= ~HFS_SKIP_UPDATE_NEXT_ALLOCATION;
1448 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, location);
1449 }
1450 MarkVCBDirty(hfsmp);
1451 fail_change_next_allocation:
1452 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1453 return (error);
1454 }
1455
1456 #ifdef HFS_SPARSE_DEV
1457 case HFS_SETBACKINGSTOREINFO: {
1458 struct vnode * bsfs_rootvp;
1459 struct vnode * di_vp;
1460 struct hfs_backingstoreinfo *bsdata;
1461 int error = 0;
1462
1463 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
1464 return (EALREADY);
1465 }
1466 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1467 if (suser(cred, NULL) &&
1468 kauth_cred_getuid(cred) != vfsp->f_owner) {
1469 return (EACCES); /* must be owner of file system */
1470 }
1471 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
1472 if (bsdata == NULL) {
1473 return (EINVAL);
1474 }
1475 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
1476 return (error);
1477 }
1478 if ((error = vnode_getwithref(di_vp))) {
1479 file_drop(bsdata->backingfd);
1480 return(error);
1481 }
1482
1483 if (vnode_mount(vp) == vnode_mount(di_vp)) {
1484 (void)vnode_put(di_vp);
1485 file_drop(bsdata->backingfd);
1486 return (EINVAL);
1487 }
1488
1489 /*
1490 * Obtain the backing fs root vnode and keep a reference
1491 * on it. This reference will be dropped in hfs_unmount.
1492 */
1493 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
1494 if (error) {
1495 (void)vnode_put(di_vp);
1496 file_drop(bsdata->backingfd);
1497 return (error);
1498 }
1499 vnode_ref(bsfs_rootvp);
1500 vnode_put(bsfs_rootvp);
1501
1502 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
1503 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
1504 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
1505 hfsmp->hfs_sparsebandblks *= 4;
1506
1507 vfs_markdependency(hfsmp->hfs_mp);
1508
1509 (void)vnode_put(di_vp);
1510 file_drop(bsdata->backingfd);
1511 return (0);
1512 }
1513 case HFS_CLRBACKINGSTOREINFO: {
1514 struct vnode * tmpvp;
1515
1516 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1517 if (suser(cred, NULL) &&
1518 kauth_cred_getuid(cred) != vfsp->f_owner) {
1519 return (EACCES); /* must be owner of file system */
1520 }
1521 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1522 hfsmp->hfs_backingfs_rootvp) {
1523
1524 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1525 tmpvp = hfsmp->hfs_backingfs_rootvp;
1526 hfsmp->hfs_backingfs_rootvp = NULLVP;
1527 hfsmp->hfs_sparsebandblks = 0;
1528 vnode_rele(tmpvp);
1529 }
1530 return (0);
1531 }
1532 #endif /* HFS_SPARSE_DEV */
1533
1534 case F_FREEZE_FS: {
1535 struct mount *mp;
1536
1537 if (!is_suser())
1538 return (EACCES);
1539
1540 mp = vnode_mount(vp);
1541 hfsmp = VFSTOHFS(mp);
1542
1543 if (!(hfsmp->jnl))
1544 return (ENOTSUP);
1545
1546 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1547
1548 // flush things before we get started to try and prevent
1549 // dirty data from being paged out while we're frozen.
1550 // note: can't do this after taking the lock as it will
1551 // deadlock against ourselves.
1552 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1553 hfs_global_exclusive_lock_acquire(hfsmp);
1554 journal_flush(hfsmp->jnl);
1555
1556 // don't need to iterate on all vnodes, we just need to
1557 // wait for writes to the system files and the device vnode
1558 if (HFSTOVCB(hfsmp)->extentsRefNum)
1559 vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1560 if (HFSTOVCB(hfsmp)->catalogRefNum)
1561 vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1562 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1563 vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1564 if (hfsmp->hfs_attribute_vp)
1565 vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1566 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1567
1568 hfsmp->hfs_freezing_proc = current_proc();
1569
1570 return (0);
1571 }
1572
1573 case F_THAW_FS: {
1574 if (!is_suser())
1575 return (EACCES);
1576
1577 // if we're not the one who froze the fs then we
1578 // can't thaw it.
1579 if (hfsmp->hfs_freezing_proc != current_proc()) {
1580 return EPERM;
1581 }
1582
1583 // NOTE: if you add code here, also go check the
1584 // code that "thaws" the fs in hfs_vnop_close()
1585 //
1586 hfsmp->hfs_freezing_proc = NULL;
1587 hfs_global_exclusive_lock_release(hfsmp);
1588 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1589
1590 return (0);
1591 }
1592
1593 case HFS_BULKACCESS_FSCTL: {
1594 int size;
1595
1596 if (hfsmp->hfs_flags & HFS_STANDARD) {
1597 return EINVAL;
1598 }
1599
1600 if (is64bit) {
1601 size = sizeof(struct user_access_t);
1602 } else {
1603 size = sizeof(struct access_t);
1604 }
1605
1606 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1607 }
1608
1609 case HFS_EXT_BULKACCESS_FSCTL: {
1610 int size;
1611
1612 if (hfsmp->hfs_flags & HFS_STANDARD) {
1613 return EINVAL;
1614 }
1615
1616 if (is64bit) {
1617 size = sizeof(struct ext_user_access_t);
1618 } else {
1619 size = sizeof(struct ext_access_t);
1620 }
1621
1622 return do_bulk_access_check(hfsmp, vp, ap, size, context);
1623 }
1624
1625 case HFS_SETACLSTATE: {
1626 int state;
1627
1628 if (ap->a_data == NULL) {
1629 return (EINVAL);
1630 }
1631
1632 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1633 state = *(int *)ap->a_data;
1634
1635 // super-user can enable or disable acl's on a volume.
1636 // the volume owner can only enable acl's
1637 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1638 return (EPERM);
1639 }
1640 if (state == 0 || state == 1)
1641 return hfs_set_volxattr(hfsmp, HFS_SETACLSTATE, state);
1642 else
1643 return (EINVAL);
1644 }
1645
1646 case HFS_SET_XATTREXTENTS_STATE: {
1647 int state;
1648
1649 if (ap->a_data == NULL) {
1650 return (EINVAL);
1651 }
1652
1653 state = *(int *)ap->a_data;
1654
1655 /* Super-user can enable or disable extent-based extended
1656 * attribute support on a volume
1657 */
1658 if (!is_suser()) {
1659 return (EPERM);
1660 }
1661 if (state == 0 || state == 1)
1662 return hfs_set_volxattr(hfsmp, HFS_SET_XATTREXTENTS_STATE, state);
1663 else
1664 return (EINVAL);
1665 }
1666
1667 case F_FULLFSYNC: {
1668 int error;
1669
1670 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1671 if (error == 0) {
1672 error = hfs_fsync(vp, MNT_WAIT, TRUE, p);
1673 hfs_unlock(VTOC(vp));
1674 }
1675
1676 return error;
1677 }
1678
1679 case F_CHKCLEAN: {
1680 register struct cnode *cp;
1681 int error;
1682
1683 if (!vnode_isreg(vp))
1684 return EINVAL;
1685
1686 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1687 if (error == 0) {
1688 cp = VTOC(vp);
1689 /*
1690 * used by regression test to determine if
1691 * all the dirty pages (via write) have been cleaned
1692 * after a call to 'fsysnc'.
1693 */
1694 error = is_file_clean(vp, VTOF(vp)->ff_size);
1695 hfs_unlock(cp);
1696 }
1697 return (error);
1698 }
1699
1700 case F_RDADVISE: {
1701 register struct radvisory *ra;
1702 struct filefork *fp;
1703 int error;
1704
1705 if (!vnode_isreg(vp))
1706 return EINVAL;
1707
1708 ra = (struct radvisory *)(ap->a_data);
1709 fp = VTOF(vp);
1710
1711 /* Protect against a size change. */
1712 hfs_lock_truncate(VTOC(vp), TRUE);
1713
1714 if (ra->ra_offset >= fp->ff_size) {
1715 error = EFBIG;
1716 } else {
1717 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1718 }
1719
1720 hfs_unlock_truncate(VTOC(vp), TRUE);
1721 return (error);
1722 }
1723
1724 case F_READBOOTSTRAP:
1725 case F_WRITEBOOTSTRAP:
1726 {
1727 struct vnode *devvp = NULL;
1728 user_fbootstraptransfer_t *user_bootstrapp;
1729 int devBlockSize;
1730 int error;
1731 uio_t auio;
1732 daddr64_t blockNumber;
1733 u_long blockOffset;
1734 u_long xfersize;
1735 struct buf *bp;
1736 user_fbootstraptransfer_t user_bootstrap;
1737
1738 if (!vnode_isvroot(vp))
1739 return (EINVAL);
1740 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1741 * to a user_fbootstraptransfer_t else we get a pointer to a
1742 * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1743 */
1744 if (is64bit) {
1745 user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1746 }
1747 else {
1748 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1749 user_bootstrapp = &user_bootstrap;
1750 user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1751 user_bootstrap.fbt_length = bootstrapp->fbt_length;
1752 user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1753 }
1754 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1755 return EINVAL;
1756
1757 devvp = VTOHFS(vp)->hfs_devvp;
1758 auio = uio_create(1, user_bootstrapp->fbt_offset,
1759 is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1760 (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1761 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1762
1763 devBlockSize = vfs_devblocksize(vnode_mount(vp));
1764
1765 while (uio_resid(auio) > 0) {
1766 blockNumber = uio_offset(auio) / devBlockSize;
1767 error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1768 if (error) {
1769 if (bp) buf_brelse(bp);
1770 uio_free(auio);
1771 return error;
1772 };
1773
1774 blockOffset = uio_offset(auio) % devBlockSize;
1775 xfersize = devBlockSize - blockOffset;
1776 error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1777 if (error) {
1778 buf_brelse(bp);
1779 uio_free(auio);
1780 return error;
1781 };
1782 if (uio_rw(auio) == UIO_WRITE) {
1783 error = VNOP_BWRITE(bp);
1784 if (error) {
1785 uio_free(auio);
1786 return error;
1787 }
1788 } else {
1789 buf_brelse(bp);
1790 };
1791 };
1792 uio_free(auio);
1793 };
1794 return 0;
1795
1796 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
1797 {
1798 if (is64bit) {
1799 *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1800 }
1801 else {
1802 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1803 }
1804 return 0;
1805 }
1806
1807 case HFS_GET_MOUNT_TIME:
1808 return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1809 break;
1810
1811 case HFS_GET_LAST_MTIME:
1812 return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1813 break;
1814
1815 case HFS_SET_BOOT_INFO:
1816 if (!vnode_isvroot(vp))
1817 return(EINVAL);
1818 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1819 return(EACCES); /* must be superuser or owner of filesystem */
1820 HFS_MOUNT_LOCK(hfsmp, TRUE);
1821 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1822 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1823 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1824 break;
1825
1826 case HFS_GET_BOOT_INFO:
1827 if (!vnode_isvroot(vp))
1828 return(EINVAL);
1829 HFS_MOUNT_LOCK(hfsmp, TRUE);
1830 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1831 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1832 break;
1833
1834 case HFS_MARK_BOOT_CORRUPT:
1835 /* Mark the boot volume corrupt by setting
1836 * kHFSVolumeInconsistentBit in the volume header. This will
1837 * force fsck_hfs on next mount.
1838 */
1839 if (!is_suser()) {
1840 return EACCES;
1841 }
1842
1843 /* Allowed only on the root vnode of the boot volume */
1844 if (!(vfs_flags(HFSTOVFS(hfsmp)) & MNT_ROOTFS) ||
1845 !vnode_isvroot(vp)) {
1846 return EINVAL;
1847 }
1848
1849 printf ("hfs_vnop_ioctl: Marking the boot volume corrupt.\n");
1850 hfs_mark_volume_inconsistent(hfsmp);
1851 break;
1852
1853 default:
1854 return (ENOTTY);
1855 }
1856
1857 /* Should never get here */
1858 return 0;
1859 }
1860
1861 /*
1862 * select
1863 */
1864 int
1865 hfs_vnop_select(__unused struct vnop_select_args *ap)
1866 /*
1867 struct vnop_select_args {
1868 vnode_t a_vp;
1869 int a_which;
1870 int a_fflags;
1871 void *a_wql;
1872 vfs_context_t a_context;
1873 };
1874 */
1875 {
1876 /*
1877 * We should really check to see if I/O is possible.
1878 */
1879 return (1);
1880 }
1881
1882 /*
1883 * Converts a logical block number to a physical block, and optionally returns
1884 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1885 * The physical block number is based on the device block size, currently its 512.
1886 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1887 */
1888 int
1889 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, unsigned int *runp)
1890 {
1891 struct filefork *fp = VTOF(vp);
1892 struct hfsmount *hfsmp = VTOHFS(vp);
1893 int retval = E_NONE;
1894 u_int32_t logBlockSize;
1895 size_t bytesContAvail = 0;
1896 off_t blockposition;
1897 int lockExtBtree;
1898 int lockflags = 0;
1899
1900 /*
1901 * Check for underlying vnode requests and ensure that logical
1902 * to physical mapping is requested.
1903 */
1904 if (vpp != NULL)
1905 *vpp = hfsmp->hfs_devvp;
1906 if (bnp == NULL)
1907 return (0);
1908
1909 logBlockSize = GetLogicalBlockSize(vp);
1910 blockposition = (off_t)bn * logBlockSize;
1911
1912 lockExtBtree = overflow_extents(fp);
1913
1914 if (lockExtBtree)
1915 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_EXCLUSIVE_LOCK);
1916
1917 retval = MacToVFSError(
1918 MapFileBlockC (HFSTOVCB(hfsmp),
1919 (FCB*)fp,
1920 MAXPHYSIO,
1921 blockposition,
1922 bnp,
1923 &bytesContAvail));
1924
1925 if (lockExtBtree)
1926 hfs_systemfile_unlock(hfsmp, lockflags);
1927
1928 if (retval == E_NONE) {
1929 /* Figure out how many read ahead blocks there are */
1930 if (runp != NULL) {
1931 if (can_cluster(logBlockSize)) {
1932 /* Make sure this result never goes negative: */
1933 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1934 } else {
1935 *runp = 0;
1936 }
1937 }
1938 }
1939 return (retval);
1940 }
1941
1942 /*
1943 * Convert logical block number to file offset.
1944 */
1945 int
1946 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1947 /*
1948 struct vnop_blktooff_args {
1949 vnode_t a_vp;
1950 daddr64_t a_lblkno;
1951 off_t *a_offset;
1952 };
1953 */
1954 {
1955 if (ap->a_vp == NULL)
1956 return (EINVAL);
1957 *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1958
1959 return(0);
1960 }
1961
1962 /*
1963 * Convert file offset to logical block number.
1964 */
1965 int
1966 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1967 /*
1968 struct vnop_offtoblk_args {
1969 vnode_t a_vp;
1970 off_t a_offset;
1971 daddr64_t *a_lblkno;
1972 };
1973 */
1974 {
1975 if (ap->a_vp == NULL)
1976 return (EINVAL);
1977 *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1978
1979 return(0);
1980 }
1981
1982 /*
1983 * Map file offset to physical block number.
1984 *
1985 * If this function is called for write operation, and if the file
1986 * had virtual blocks allocated (delayed allocation), real blocks
1987 * are allocated by calling ExtendFileC().
1988 *
1989 * If this function is called for read operation, and if the file
1990 * had virtual blocks allocated (delayed allocation), no change
1991 * to the size of file is done, and if required, rangelist is
1992 * searched for mapping.
1993 *
1994 * System file cnodes are expected to be locked (shared or exclusive).
1995 */
1996 int
1997 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1998 /*
1999 struct vnop_blockmap_args {
2000 vnode_t a_vp;
2001 off_t a_foffset;
2002 size_t a_size;
2003 daddr64_t *a_bpn;
2004 size_t *a_run;
2005 void *a_poff;
2006 int a_flags;
2007 vfs_context_t a_context;
2008 };
2009 */
2010 {
2011 struct vnode *vp = ap->a_vp;
2012 struct cnode *cp;
2013 struct filefork *fp;
2014 struct hfsmount *hfsmp;
2015 size_t bytesContAvail = 0;
2016 int retval = E_NONE;
2017 int syslocks = 0;
2018 int lockflags = 0;
2019 struct rl_entry *invalid_range;
2020 enum rl_overlaptype overlaptype;
2021 int started_tr = 0;
2022 int tooklock = 0;
2023
2024 /* Do not allow blockmap operation on a directory */
2025 if (vnode_isdir(vp)) {
2026 return (ENOTSUP);
2027 }
2028
2029 /*
2030 * Check for underlying vnode requests and ensure that logical
2031 * to physical mapping is requested.
2032 */
2033 if (ap->a_bpn == NULL)
2034 return (0);
2035
2036 if ( !vnode_issystem(vp) && !vnode_islnk(vp) && !vnode_isswap(vp)) {
2037 if (VTOC(vp)->c_lockowner != current_thread()) {
2038 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2039 tooklock = 1;
2040 }
2041 }
2042 hfsmp = VTOHFS(vp);
2043 cp = VTOC(vp);
2044 fp = VTOF(vp);
2045
2046 retry:
2047 /* Check virtual blocks only when performing write operation */
2048 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2049 if (hfs_start_transaction(hfsmp) != 0) {
2050 retval = EINVAL;
2051 goto exit;
2052 } else {
2053 started_tr = 1;
2054 }
2055 syslocks = SFL_EXTENTS | SFL_BITMAP;
2056
2057 } else if (overflow_extents(fp)) {
2058 syslocks = SFL_EXTENTS;
2059 }
2060
2061 if (syslocks)
2062 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
2063
2064 /*
2065 * Check for any delayed allocations.
2066 */
2067 if ((ap->a_flags & VNODE_WRITE) && (fp->ff_unallocblocks != 0)) {
2068 int64_t actbytes;
2069 u_int32_t loanedBlocks;
2070
2071 //
2072 // Make sure we have a transaction. It's possible
2073 // that we came in and fp->ff_unallocblocks was zero
2074 // but during the time we blocked acquiring the extents
2075 // btree, ff_unallocblocks became non-zero and so we
2076 // will need to start a transaction.
2077 //
2078 if (started_tr == 0) {
2079 if (syslocks) {
2080 hfs_systemfile_unlock(hfsmp, lockflags);
2081 syslocks = 0;
2082 }
2083 goto retry;
2084 }
2085
2086 /*
2087 * Note: ExtendFileC will Release any blocks on loan and
2088 * aquire real blocks. So we ask to extend by zero bytes
2089 * since ExtendFileC will account for the virtual blocks.
2090 */
2091
2092 loanedBlocks = fp->ff_unallocblocks;
2093 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
2094 kEFAllMask | kEFNoClumpMask, &actbytes);
2095
2096 if (retval) {
2097 fp->ff_unallocblocks = loanedBlocks;
2098 cp->c_blocks += loanedBlocks;
2099 fp->ff_blocks += loanedBlocks;
2100
2101 HFS_MOUNT_LOCK(hfsmp, TRUE);
2102 hfsmp->loanedBlocks += loanedBlocks;
2103 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2104
2105 hfs_systemfile_unlock(hfsmp, lockflags);
2106 cp->c_flag |= C_MODIFIED;
2107 if (started_tr) {
2108 (void) hfs_update(vp, TRUE);
2109 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2110
2111 hfs_end_transaction(hfsmp);
2112 started_tr = 0;
2113 }
2114 goto exit;
2115 }
2116 }
2117
2118 retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
2119 ap->a_bpn, &bytesContAvail);
2120 if (syslocks) {
2121 hfs_systemfile_unlock(hfsmp, lockflags);
2122 syslocks = 0;
2123 }
2124
2125 if (started_tr) {
2126 (void) hfs_update(vp, TRUE);
2127 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2128 hfs_end_transaction(hfsmp);
2129 started_tr = 0;
2130 }
2131 if (retval) {
2132 /* On write, always return error because virtual blocks, if any,
2133 * should have been allocated in ExtendFileC(). We do not
2134 * allocate virtual blocks on read, therefore return error
2135 * only if no virtual blocks are allocated. Otherwise we search
2136 * rangelist for zero-fills
2137 */
2138 if ((MacToVFSError(retval) != ERANGE) ||
2139 (ap->a_flags & VNODE_WRITE) ||
2140 ((ap->a_flags & VNODE_READ) && (fp->ff_unallocblocks == 0))) {
2141 goto exit;
2142 }
2143
2144 /* Validate if the start offset is within logical file size */
2145 if (ap->a_foffset > fp->ff_size) {
2146 goto exit;
2147 }
2148
2149 /* Searching file extents has failed for read operation, therefore
2150 * search rangelist for any uncommitted holes in the file.
2151 */
2152 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2153 ap->a_foffset + (off_t)(ap->a_size - 1),
2154 &invalid_range);
2155 switch(overlaptype) {
2156 case RL_OVERLAPISCONTAINED:
2157 /* start_offset <= rl_start, end_offset >= rl_end */
2158 if (ap->a_foffset != invalid_range->rl_start) {
2159 break;
2160 }
2161 case RL_MATCHINGOVERLAP:
2162 /* start_offset = rl_start, end_offset = rl_end */
2163 case RL_OVERLAPCONTAINSRANGE:
2164 /* start_offset >= rl_start, end_offset <= rl_end */
2165 case RL_OVERLAPSTARTSBEFORE:
2166 /* start_offset > rl_start, end_offset >= rl_start */
2167 if ((off_t)fp->ff_size > (invalid_range->rl_end + 1)) {
2168 bytesContAvail = (invalid_range->rl_end + 1) - ap->a_foffset;
2169 } else {
2170 bytesContAvail = fp->ff_size - ap->a_foffset;
2171 }
2172 if (bytesContAvail > ap->a_size) {
2173 bytesContAvail = ap->a_size;
2174 }
2175 *ap->a_bpn = (daddr64_t)-1;
2176 retval = 0;
2177 break;
2178 case RL_OVERLAPENDSAFTER:
2179 /* start_offset < rl_start, end_offset < rl_end */
2180 case RL_NOOVERLAP:
2181 break;
2182 }
2183 goto exit;
2184 }
2185
2186 /* MapFileC() found a valid extent in the filefork. Search the
2187 * mapping information further for invalid file ranges
2188 */
2189 overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
2190 ap->a_foffset + (off_t)bytesContAvail - 1,
2191 &invalid_range);
2192 if (overlaptype != RL_NOOVERLAP) {
2193 switch(overlaptype) {
2194 case RL_MATCHINGOVERLAP:
2195 case RL_OVERLAPCONTAINSRANGE:
2196 case RL_OVERLAPSTARTSBEFORE:
2197 /* There's no valid block for this byte offset */
2198 *ap->a_bpn = (daddr64_t)-1;
2199 /* There's no point limiting the amount to be returned
2200 * if the invalid range that was hit extends all the way
2201 * to the EOF (i.e. there's no valid bytes between the
2202 * end of this range and the file's EOF):
2203 */
2204 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2205 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2206 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2207 }
2208 break;
2209
2210 case RL_OVERLAPISCONTAINED:
2211 case RL_OVERLAPENDSAFTER:
2212 /* The range of interest hits an invalid block before the end: */
2213 if (invalid_range->rl_start == ap->a_foffset) {
2214 /* There's actually no valid information to be had starting here: */
2215 *ap->a_bpn = (daddr64_t)-1;
2216 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
2217 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
2218 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
2219 }
2220 } else {
2221 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
2222 }
2223 break;
2224
2225 case RL_NOOVERLAP:
2226 break;
2227 } /* end switch */
2228 if (bytesContAvail > ap->a_size)
2229 bytesContAvail = ap->a_size;
2230 }
2231
2232 exit:
2233 if (retval == 0) {
2234 if (ap->a_run)
2235 *ap->a_run = bytesContAvail;
2236
2237 if (ap->a_poff)
2238 *(int *)ap->a_poff = 0;
2239 }
2240
2241 if (tooklock)
2242 hfs_unlock(cp);
2243
2244 return (MacToVFSError(retval));
2245 }
2246
2247
2248 /*
2249 * prepare and issue the I/O
2250 * buf_strategy knows how to deal
2251 * with requests that require
2252 * fragmented I/Os
2253 */
2254 int
2255 hfs_vnop_strategy(struct vnop_strategy_args *ap)
2256 {
2257 buf_t bp = ap->a_bp;
2258 vnode_t vp = buf_vnode(bp);
2259
2260 return (buf_strategy(VTOHFS(vp)->hfs_devvp, ap));
2261 }
2262
2263
2264 static int
2265 do_hfs_truncate(struct vnode *vp, off_t length, int flags, vfs_context_t context)
2266 {
2267 register struct cnode *cp = VTOC(vp);
2268 struct filefork *fp = VTOF(vp);
2269 struct proc *p = vfs_context_proc(context);;
2270 kauth_cred_t cred = vfs_context_ucred(context);
2271 int retval;
2272 off_t bytesToAdd;
2273 off_t actualBytesAdded;
2274 off_t filebytes;
2275 u_long fileblocks;
2276 int blksize;
2277 struct hfsmount *hfsmp;
2278 int lockflags;
2279
2280 blksize = VTOVCB(vp)->blockSize;
2281 fileblocks = fp->ff_blocks;
2282 filebytes = (off_t)fileblocks * (off_t)blksize;
2283
2284 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
2285 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2286
2287 if (length < 0)
2288 return (EINVAL);
2289
2290 /* This should only happen with a corrupt filesystem */
2291 if ((off_t)fp->ff_size < 0)
2292 return (EINVAL);
2293
2294 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
2295 return (EFBIG);
2296
2297 hfsmp = VTOHFS(vp);
2298
2299 retval = E_NONE;
2300
2301 /* Files that are changing size are not hot file candidates. */
2302 if (hfsmp->hfc_stage == HFC_RECORDING) {
2303 fp->ff_bytesread = 0;
2304 }
2305
2306 /*
2307 * We cannot just check if fp->ff_size == length (as an optimization)
2308 * since there may be extra physical blocks that also need truncation.
2309 */
2310 #if QUOTA
2311 if ((retval = hfs_getinoquota(cp)))
2312 return(retval);
2313 #endif /* QUOTA */
2314
2315 /*
2316 * Lengthen the size of the file. We must ensure that the
2317 * last byte of the file is allocated. Since the smallest
2318 * value of ff_size is 0, length will be at least 1.
2319 */
2320 if (length > (off_t)fp->ff_size) {
2321 #if QUOTA
2322 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
2323 cred, 0);
2324 if (retval)
2325 goto Err_Exit;
2326 #endif /* QUOTA */
2327 /*
2328 * If we don't have enough physical space then
2329 * we need to extend the physical size.
2330 */
2331 if (length > filebytes) {
2332 int eflags;
2333 u_long blockHint = 0;
2334
2335 /* All or nothing and don't round up to clumpsize. */
2336 eflags = kEFAllMask | kEFNoClumpMask;
2337
2338 if (cred && suser(cred, NULL) != 0)
2339 eflags |= kEFReserveMask; /* keep a reserve */
2340
2341 /*
2342 * Allocate Journal and Quota files in metadata zone.
2343 */
2344 if (filebytes == 0 &&
2345 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
2346 hfs_virtualmetafile(cp)) {
2347 eflags |= kEFMetadataMask;
2348 blockHint = hfsmp->hfs_metazone_start;
2349 }
2350 if (hfs_start_transaction(hfsmp) != 0) {
2351 retval = EINVAL;
2352 goto Err_Exit;
2353 }
2354
2355 /* Protect extents b-tree and allocation bitmap */
2356 lockflags = SFL_BITMAP;
2357 if (overflow_extents(fp))
2358 lockflags |= SFL_EXTENTS;
2359 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2360
2361 while ((length > filebytes) && (retval == E_NONE)) {
2362 bytesToAdd = length - filebytes;
2363 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
2364 (FCB*)fp,
2365 bytesToAdd,
2366 blockHint,
2367 eflags,
2368 &actualBytesAdded));
2369
2370 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2371 if (actualBytesAdded == 0 && retval == E_NONE) {
2372 if (length > filebytes)
2373 length = filebytes;
2374 break;
2375 }
2376 } /* endwhile */
2377
2378 hfs_systemfile_unlock(hfsmp, lockflags);
2379
2380 if (hfsmp->jnl) {
2381 (void) hfs_update(vp, TRUE);
2382 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2383 }
2384
2385 hfs_end_transaction(hfsmp);
2386
2387 if (retval)
2388 goto Err_Exit;
2389
2390 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2391 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
2392 }
2393
2394 if (!(flags & IO_NOZEROFILL)) {
2395 if (UBCINFOEXISTS(vp) && (vnode_issystem(vp) == 0) && retval == E_NONE) {
2396 struct rl_entry *invalid_range;
2397 off_t zero_limit;
2398
2399 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
2400 if (length < zero_limit) zero_limit = length;
2401
2402 if (length > (off_t)fp->ff_size) {
2403 struct timeval tv;
2404
2405 /* Extending the file: time to fill out the current last page w. zeroes? */
2406 if ((fp->ff_size & PAGE_MASK_64) &&
2407 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2408 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
2409
2410 /* There's some valid data at the start of the (current) last page
2411 of the file, so zero out the remainder of that page to ensure the
2412 entire page contains valid data. Since there is no invalid range
2413 possible past the (current) eof, there's no need to remove anything
2414 from the invalid range list before calling cluster_write(): */
2415 hfs_unlock(cp);
2416 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
2417 fp->ff_size, (off_t)0,
2418 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2419 hfs_lock(cp, HFS_FORCE_LOCK);
2420 if (retval) goto Err_Exit;
2421
2422 /* Merely invalidate the remaining area, if necessary: */
2423 if (length > zero_limit) {
2424 microuptime(&tv);
2425 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
2426 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2427 }
2428 } else {
2429 /* The page containing the (current) eof is invalid: just add the
2430 remainder of the page to the invalid list, along with the area
2431 being newly allocated:
2432 */
2433 microuptime(&tv);
2434 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
2435 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2436 };
2437 }
2438 } else {
2439 panic("hfs_truncate: invoked on non-UBC object?!");
2440 };
2441 }
2442 cp->c_touch_modtime = TRUE;
2443 fp->ff_size = length;
2444
2445 } else { /* Shorten the size of the file */
2446
2447 if ((off_t)fp->ff_size > length) {
2448 /* Any space previously marked as invalid is now irrelevant: */
2449 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2450 }
2451
2452 /*
2453 * Account for any unmapped blocks. Note that the new
2454 * file length can still end up with unmapped blocks.
2455 */
2456 if (fp->ff_unallocblocks > 0) {
2457 u_int32_t finalblks;
2458 u_int32_t loanedBlocks;
2459
2460 HFS_MOUNT_LOCK(hfsmp, TRUE);
2461
2462 loanedBlocks = fp->ff_unallocblocks;
2463 cp->c_blocks -= loanedBlocks;
2464 fp->ff_blocks -= loanedBlocks;
2465 fp->ff_unallocblocks = 0;
2466
2467 hfsmp->loanedBlocks -= loanedBlocks;
2468
2469 finalblks = (length + blksize - 1) / blksize;
2470 if (finalblks > fp->ff_blocks) {
2471 /* calculate required unmapped blocks */
2472 loanedBlocks = finalblks - fp->ff_blocks;
2473 hfsmp->loanedBlocks += loanedBlocks;
2474
2475 fp->ff_unallocblocks = loanedBlocks;
2476 cp->c_blocks += loanedBlocks;
2477 fp->ff_blocks += loanedBlocks;
2478 }
2479 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2480 }
2481
2482 /*
2483 * For a TBE process the deallocation of the file blocks is
2484 * delayed until the file is closed. And hfs_close calls
2485 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
2486 * isn't set, we make sure this isn't a TBE process.
2487 */
2488 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2489 #if QUOTA
2490 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2491 #endif /* QUOTA */
2492 if (hfs_start_transaction(hfsmp) != 0) {
2493 retval = EINVAL;
2494 goto Err_Exit;
2495 }
2496
2497 if (fp->ff_unallocblocks == 0) {
2498 /* Protect extents b-tree and allocation bitmap */
2499 lockflags = SFL_BITMAP;
2500 if (overflow_extents(fp))
2501 lockflags |= SFL_EXTENTS;
2502 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2503
2504 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2505 (FCB*)fp, length, false));
2506
2507 hfs_systemfile_unlock(hfsmp, lockflags);
2508 }
2509 if (hfsmp->jnl) {
2510 if (retval == 0) {
2511 fp->ff_size = length;
2512 }
2513 (void) hfs_update(vp, TRUE);
2514 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2515 }
2516
2517 hfs_end_transaction(hfsmp);
2518
2519 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2520 if (retval)
2521 goto Err_Exit;
2522 #if QUOTA
2523 /* These are bytesreleased */
2524 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2525 #endif /* QUOTA */
2526 }
2527 /* Only set update flag if the logical length changes */
2528 if ((off_t)fp->ff_size != length)
2529 cp->c_touch_modtime = TRUE;
2530 fp->ff_size = length;
2531 }
2532 cp->c_touch_chgtime = TRUE; /* status changed */
2533 cp->c_touch_modtime = TRUE; /* file data was modified */
2534 retval = hfs_update(vp, MNT_WAIT);
2535 if (retval) {
2536 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2537 -1, -1, -1, retval, 0);
2538 }
2539
2540 Err_Exit:
2541
2542 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2543 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2544
2545 return (retval);
2546 }
2547
2548
2549
2550 /*
2551 * Truncate a cnode to at most length size, freeing (or adding) the
2552 * disk blocks.
2553 */
2554 __private_extern__
2555 int
2556 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2557 vfs_context_t context)
2558 {
2559 struct filefork *fp = VTOF(vp);
2560 off_t filebytes;
2561 u_long fileblocks;
2562 int blksize, error = 0;
2563 struct cnode *cp = VTOC(vp);
2564
2565 /* Cannot truncate an HFS directory! */
2566 if (vnode_isdir(vp)) {
2567 return (EISDIR);
2568 }
2569 /* A swap file cannot change size. */
2570 if (vnode_isswap(vp) && (length != 0)) {
2571 return (EPERM);
2572 }
2573
2574 blksize = VTOVCB(vp)->blockSize;
2575 fileblocks = fp->ff_blocks;
2576 filebytes = (off_t)fileblocks * (off_t)blksize;
2577
2578 //
2579 // Have to do this here so that we don't wind up with
2580 // i/o pending for blocks that are about to be released
2581 // if we truncate the file.
2582 //
2583 // If skipsetsize is set, then the caller is responsible
2584 // for the ubc_setsize.
2585 //
2586 if (!skipsetsize)
2587 ubc_setsize(vp, length);
2588
2589 // have to loop truncating or growing files that are
2590 // really big because otherwise transactions can get
2591 // enormous and consume too many kernel resources.
2592
2593 if (length < filebytes) {
2594 while (filebytes > length) {
2595 if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2596 filebytes -= HFS_BIGFILE_SIZE;
2597 } else {
2598 filebytes = length;
2599 }
2600 cp->c_flag |= C_FORCEUPDATE;
2601 error = do_hfs_truncate(vp, filebytes, flags, context);
2602 if (error)
2603 break;
2604 }
2605 } else if (length > filebytes) {
2606 while (filebytes < length) {
2607 if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2608 filebytes += HFS_BIGFILE_SIZE;
2609 } else {
2610 filebytes = length;
2611 }
2612 cp->c_flag |= C_FORCEUPDATE;
2613 error = do_hfs_truncate(vp, filebytes, flags, context);
2614 if (error)
2615 break;
2616 }
2617 } else /* Same logical size */ {
2618
2619 error = do_hfs_truncate(vp, length, flags, context);
2620 }
2621 /* Files that are changing size are not hot file candidates. */
2622 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2623 fp->ff_bytesread = 0;
2624 }
2625
2626 return (error);
2627 }
2628
2629
2630
2631 /*
2632 * Preallocate file storage space.
2633 */
2634 int
2635 hfs_vnop_allocate(struct vnop_allocate_args /* {
2636 vnode_t a_vp;
2637 off_t a_length;
2638 u_int32_t a_flags;
2639 off_t *a_bytesallocated;
2640 off_t a_offset;
2641 vfs_context_t a_context;
2642 } */ *ap)
2643 {
2644 struct vnode *vp = ap->a_vp;
2645 struct cnode *cp;
2646 struct filefork *fp;
2647 ExtendedVCB *vcb;
2648 off_t length = ap->a_length;
2649 off_t startingPEOF;
2650 off_t moreBytesRequested;
2651 off_t actualBytesAdded;
2652 off_t filebytes;
2653 u_long fileblocks;
2654 int retval, retval2;
2655 u_int32_t blockHint;
2656 u_int32_t extendFlags; /* For call to ExtendFileC */
2657 struct hfsmount *hfsmp;
2658 kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2659 int lockflags;
2660
2661 *(ap->a_bytesallocated) = 0;
2662
2663 if (!vnode_isreg(vp))
2664 return (EISDIR);
2665 if (length < (off_t)0)
2666 return (EINVAL);
2667
2668 cp = VTOC(vp);
2669
2670 hfs_lock_truncate(cp, TRUE);
2671
2672 if ((retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2673 goto Err_Exit;
2674 }
2675
2676 fp = VTOF(vp);
2677 hfsmp = VTOHFS(vp);
2678 vcb = VTOVCB(vp);
2679
2680 fileblocks = fp->ff_blocks;
2681 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2682
2683 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2684 retval = EINVAL;
2685 goto Err_Exit;
2686 }
2687
2688 /* Fill in the flags word for the call to Extend the file */
2689
2690 extendFlags = kEFNoClumpMask;
2691 if (ap->a_flags & ALLOCATECONTIG)
2692 extendFlags |= kEFContigMask;
2693 if (ap->a_flags & ALLOCATEALL)
2694 extendFlags |= kEFAllMask;
2695 if (cred && suser(cred, NULL) != 0)
2696 extendFlags |= kEFReserveMask;
2697
2698 retval = E_NONE;
2699 blockHint = 0;
2700 startingPEOF = filebytes;
2701
2702 if (ap->a_flags & ALLOCATEFROMPEOF)
2703 length += filebytes;
2704 else if (ap->a_flags & ALLOCATEFROMVOL)
2705 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2706
2707 /* If no changes are necesary, then we're done */
2708 if (filebytes == length)
2709 goto Std_Exit;
2710
2711 /*
2712 * Lengthen the size of the file. We must ensure that the
2713 * last byte of the file is allocated. Since the smallest
2714 * value of filebytes is 0, length will be at least 1.
2715 */
2716 if (length > filebytes) {
2717 off_t total_bytes_added = 0, orig_request_size;
2718
2719 orig_request_size = moreBytesRequested = length - filebytes;
2720
2721 #if QUOTA
2722 retval = hfs_chkdq(cp,
2723 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2724 cred, 0);
2725 if (retval)
2726 goto Err_Exit;
2727
2728 #endif /* QUOTA */
2729 /*
2730 * Metadata zone checks.
2731 */
2732 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2733 /*
2734 * Allocate Journal and Quota files in metadata zone.
2735 */
2736 if (hfs_virtualmetafile(cp)) {
2737 extendFlags |= kEFMetadataMask;
2738 blockHint = hfsmp->hfs_metazone_start;
2739 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2740 (blockHint <= hfsmp->hfs_metazone_end)) {
2741 /*
2742 * Move blockHint outside metadata zone.
2743 */
2744 blockHint = hfsmp->hfs_metazone_end + 1;
2745 }
2746 }
2747
2748
2749 while ((length > filebytes) && (retval == E_NONE)) {
2750 off_t bytesRequested;
2751
2752 if (hfs_start_transaction(hfsmp) != 0) {
2753 retval = EINVAL;
2754 goto Err_Exit;
2755 }
2756
2757 /* Protect extents b-tree and allocation bitmap */
2758 lockflags = SFL_BITMAP;
2759 if (overflow_extents(fp))
2760 lockflags |= SFL_EXTENTS;
2761 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2762
2763 if (moreBytesRequested >= HFS_BIGFILE_SIZE) {
2764 bytesRequested = HFS_BIGFILE_SIZE;
2765 } else {
2766 bytesRequested = moreBytesRequested;
2767 }
2768
2769 retval = MacToVFSError(ExtendFileC(vcb,
2770 (FCB*)fp,
2771 bytesRequested,
2772 blockHint,
2773 extendFlags,
2774 &actualBytesAdded));
2775
2776 if (retval == E_NONE) {
2777 *(ap->a_bytesallocated) += actualBytesAdded;
2778 total_bytes_added += actualBytesAdded;
2779 moreBytesRequested -= actualBytesAdded;
2780 if (blockHint != 0) {
2781 blockHint += actualBytesAdded / vcb->blockSize;
2782 }
2783 }
2784 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2785
2786 hfs_systemfile_unlock(hfsmp, lockflags);
2787
2788 if (hfsmp->jnl) {
2789 (void) hfs_update(vp, TRUE);
2790 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2791 }
2792
2793 hfs_end_transaction(hfsmp);
2794 }
2795
2796
2797 /*
2798 * if we get an error and no changes were made then exit
2799 * otherwise we must do the hfs_update to reflect the changes
2800 */
2801 if (retval && (startingPEOF == filebytes))
2802 goto Err_Exit;
2803
2804 /*
2805 * Adjust actualBytesAdded to be allocation block aligned, not
2806 * clump size aligned.
2807 * NOTE: So what we are reporting does not affect reality
2808 * until the file is closed, when we truncate the file to allocation
2809 * block size.
2810 */
2811 if (total_bytes_added != 0 && orig_request_size < total_bytes_added)
2812 *(ap->a_bytesallocated) =
2813 roundup(orig_request_size, (off_t)vcb->blockSize);
2814
2815 } else { /* Shorten the size of the file */
2816
2817 if (fp->ff_size > length) {
2818 /*
2819 * Any buffers that are past the truncation point need to be
2820 * invalidated (to maintain buffer cache consistency).
2821 */
2822 }
2823
2824 retval = hfs_truncate(vp, length, 0, 0, ap->a_context);
2825 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2826
2827 /*
2828 * if we get an error and no changes were made then exit
2829 * otherwise we must do the hfs_update to reflect the changes
2830 */
2831 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2832 #if QUOTA
2833 /* These are bytesreleased */
2834 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2835 #endif /* QUOTA */
2836
2837 if (fp->ff_size > filebytes) {
2838 fp->ff_size = filebytes;
2839
2840 hfs_unlock(cp);
2841 ubc_setsize(vp, fp->ff_size);
2842 hfs_lock(cp, HFS_FORCE_LOCK);
2843 }
2844 }
2845
2846 Std_Exit:
2847 cp->c_touch_chgtime = TRUE;
2848 cp->c_touch_modtime = TRUE;
2849 retval2 = hfs_update(vp, MNT_WAIT);
2850
2851 if (retval == 0)
2852 retval = retval2;
2853 Err_Exit:
2854 hfs_unlock_truncate(cp, TRUE);
2855 hfs_unlock(cp);
2856 return (retval);
2857 }
2858
2859
2860 /*
2861 * Pagein for HFS filesystem
2862 */
2863 int
2864 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2865 /*
2866 struct vnop_pagein_args {
2867 vnode_t a_vp,
2868 upl_t a_pl,
2869 vm_offset_t a_pl_offset,
2870 off_t a_f_offset,
2871 size_t a_size,
2872 int a_flags
2873 vfs_context_t a_context;
2874 };
2875 */
2876 {
2877 vnode_t vp = ap->a_vp;
2878 int error;
2879
2880 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2881 ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2882 /*
2883 * Keep track of blocks read.
2884 */
2885 if (!vnode_isswap(vp) && VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2886 struct cnode *cp;
2887 struct filefork *fp;
2888 int bytesread;
2889 int took_cnode_lock = 0;
2890
2891 cp = VTOC(vp);
2892 fp = VTOF(vp);
2893
2894 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2895 bytesread = fp->ff_size;
2896 else
2897 bytesread = ap->a_size;
2898
2899 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2900 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff && cp->c_lockowner != current_thread()) {
2901 hfs_lock(cp, HFS_FORCE_LOCK);
2902 took_cnode_lock = 1;
2903 }
2904 /*
2905 * If this file hasn't been seen since the start of
2906 * the current sampling period then start over.
2907 */
2908 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2909 struct timeval tv;
2910
2911 fp->ff_bytesread = bytesread;
2912 microtime(&tv);
2913 cp->c_atime = tv.tv_sec;
2914 } else {
2915 fp->ff_bytesread += bytesread;
2916 }
2917 cp->c_touch_acctime = TRUE;
2918 if (took_cnode_lock)
2919 hfs_unlock(cp);
2920 }
2921 return (error);
2922 }
2923
2924 /*
2925 * Pageout for HFS filesystem.
2926 */
2927 int
2928 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2929 /*
2930 struct vnop_pageout_args {
2931 vnode_t a_vp,
2932 upl_t a_pl,
2933 vm_offset_t a_pl_offset,
2934 off_t a_f_offset,
2935 size_t a_size,
2936 int a_flags
2937 vfs_context_t a_context;
2938 };
2939 */
2940 {
2941 vnode_t vp = ap->a_vp;
2942 struct cnode *cp;
2943 struct filefork *fp;
2944 int retval;
2945 off_t filesize;
2946
2947 cp = VTOC(vp);
2948 fp = VTOF(vp);
2949
2950 if (vnode_isswap(vp)) {
2951 filesize = fp->ff_size;
2952 } else {
2953 off_t end_of_range;
2954 int tooklock = 0;
2955
2956 if (cp->c_lockowner != current_thread()) {
2957 if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2958 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2959 ubc_upl_abort_range(ap->a_pl,
2960 ap->a_pl_offset,
2961 ap->a_size,
2962 UPL_ABORT_FREE_ON_EMPTY);
2963 }
2964 return (retval);
2965 }
2966 tooklock = 1;
2967 }
2968
2969 filesize = fp->ff_size;
2970 end_of_range = ap->a_f_offset + ap->a_size - 1;
2971
2972 if (end_of_range >= filesize) {
2973 end_of_range = (off_t)(filesize - 1);
2974 }
2975 if (ap->a_f_offset < filesize) {
2976 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2977 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2978 }
2979
2980 if (tooklock) {
2981 hfs_unlock(cp);
2982 }
2983 }
2984
2985 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2986 ap->a_size, filesize, ap->a_flags);
2987
2988 /*
2989 * If data was written, and setuid or setgid bits are set and
2990 * this process is not the superuser then clear the setuid and
2991 * setgid bits as a precaution against tampering.
2992 */
2993 if ((retval == 0) &&
2994 (cp->c_mode & (S_ISUID | S_ISGID)) &&
2995 (vfs_context_suser(ap->a_context) != 0)) {
2996 hfs_lock(cp, HFS_FORCE_LOCK);
2997 cp->c_mode &= ~(S_ISUID | S_ISGID);
2998 cp->c_touch_chgtime = TRUE;
2999 hfs_unlock(cp);
3000 }
3001 return (retval);
3002 }
3003
3004 /*
3005 * Intercept B-Tree node writes to unswap them if necessary.
3006 */
3007 int
3008 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
3009 {
3010 int retval = 0;
3011 register struct buf *bp = ap->a_bp;
3012 register struct vnode *vp = buf_vnode(bp);
3013 BlockDescriptor block;
3014
3015 /* Trap B-Tree writes */
3016 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
3017 (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
3018 (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
3019 (vp == VTOHFS(vp)->hfc_filevp)) {
3020
3021 /*
3022 * Swap and validate the node if it is in native byte order.
3023 * This is always be true on big endian, so we always validate
3024 * before writing here. On little endian, the node typically has
3025 * been swapped and validated when it was written to the journal,
3026 * so we won't do anything here.
3027 */
3028 if (((u_int16_t *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
3029 /* Prepare the block pointer */
3030 block.blockHeader = bp;
3031 block.buffer = (char *)buf_dataptr(bp);
3032 block.blockNum = buf_lblkno(bp);
3033 /* not found in cache ==> came from disk */
3034 block.blockReadFromDisk = (buf_fromcache(bp) == 0);
3035 block.blockSize = buf_count(bp);
3036
3037 /* Endian un-swap B-Tree node */
3038 retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig, false);
3039 if (retval)
3040 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
3041 }
3042 }
3043
3044 /* This buffer shouldn't be locked anymore but if it is clear it */
3045 if ((buf_flags(bp) & B_LOCKED)) {
3046 // XXXdbg
3047 if (VTOHFS(vp)->jnl) {
3048 panic("hfs: CLEARING the lock bit on bp %p\n", bp);
3049 }
3050 buf_clearflags(bp, B_LOCKED);
3051 }
3052 retval = vn_bwrite (ap);
3053
3054 return (retval);
3055 }
3056
3057 /*
3058 * Relocate a file to a new location on disk
3059 * cnode must be locked on entry
3060 *
3061 * Relocation occurs by cloning the file's data from its
3062 * current set of blocks to a new set of blocks. During
3063 * the relocation all of the blocks (old and new) are
3064 * owned by the file.
3065 *
3066 * -----------------
3067 * |///////////////|
3068 * -----------------
3069 * 0 N (file offset)
3070 *
3071 * ----------------- -----------------
3072 * |///////////////| | | STEP 1 (acquire new blocks)
3073 * ----------------- -----------------
3074 * 0 N N+1 2N
3075 *
3076 * ----------------- -----------------
3077 * |///////////////| |///////////////| STEP 2 (clone data)
3078 * ----------------- -----------------
3079 * 0 N N+1 2N
3080 *
3081 * -----------------
3082 * |///////////////| STEP 3 (head truncate blocks)
3083 * -----------------
3084 * 0 N
3085 *
3086 * During steps 2 and 3 page-outs to file offsets less
3087 * than or equal to N are suspended.
3088 *
3089 * During step 3 page-ins to the file get suspended.
3090 */
3091 __private_extern__
3092 int
3093 hfs_relocate(struct vnode *vp, u_int32_t blockHint, kauth_cred_t cred,
3094 struct proc *p)
3095 {
3096 struct cnode *cp;
3097 struct filefork *fp;
3098 struct hfsmount *hfsmp;
3099 u_int32_t headblks;
3100 u_int32_t datablks;
3101 u_int32_t blksize;
3102 u_int32_t growsize;
3103 u_int32_t nextallocsave;
3104 daddr64_t sector_a, sector_b;
3105 int eflags;
3106 off_t newbytes;
3107 int retval;
3108 int lockflags = 0;
3109 int took_trunc_lock = 0;
3110 int started_tr = 0;
3111 enum vtype vnodetype;
3112
3113 vnodetype = vnode_vtype(vp);
3114 if (vnodetype != VREG && vnodetype != VLNK) {
3115 return (EPERM);
3116 }
3117
3118 hfsmp = VTOHFS(vp);
3119 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
3120 return (ENOSPC);
3121 }
3122
3123 cp = VTOC(vp);
3124 fp = VTOF(vp);
3125 if (fp->ff_unallocblocks)
3126 return (EINVAL);
3127 blksize = hfsmp->blockSize;
3128 if (blockHint == 0)
3129 blockHint = hfsmp->nextAllocation;
3130
3131 if ((fp->ff_size > 0x7fffffff) ||
3132 ((fp->ff_size > blksize) && vnodetype == VLNK)) {
3133 return (EFBIG);
3134 }
3135
3136 //
3137 // We do not believe that this call to hfs_fsync() is
3138 // necessary and it causes a journal transaction
3139 // deadlock so we are removing it.
3140 //
3141 //if (vnodetype == VREG && !vnode_issystem(vp)) {
3142 // retval = hfs_fsync(vp, MNT_WAIT, 0, p);
3143 // if (retval)
3144 // return (retval);
3145 //}
3146
3147 if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
3148 hfs_unlock(cp);
3149 hfs_lock_truncate(cp, TRUE);
3150 /* Force lock since callers expects lock to be held. */
3151 if ((retval = hfs_lock(cp, HFS_FORCE_LOCK))) {
3152 hfs_unlock_truncate(cp, TRUE);
3153 return (retval);
3154 }
3155 /* No need to continue if file was removed. */
3156 if (cp->c_flag & C_NOEXISTS) {
3157 hfs_unlock_truncate(cp, TRUE);
3158 return (ENOENT);
3159 }
3160 took_trunc_lock = 1;
3161 }
3162 headblks = fp->ff_blocks;
3163 datablks = howmany(fp->ff_size, blksize);
3164 growsize = datablks * blksize;
3165 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
3166 if (blockHint >= hfsmp->hfs_metazone_start &&
3167 blockHint <= hfsmp->hfs_metazone_end)
3168 eflags |= kEFMetadataMask;
3169
3170 if (hfs_start_transaction(hfsmp) != 0) {
3171 if (took_trunc_lock)
3172 hfs_unlock_truncate(cp, TRUE);
3173 return (EINVAL);
3174 }
3175 started_tr = 1;
3176 /*
3177 * Protect the extents b-tree and the allocation bitmap
3178 * during MapFileBlockC and ExtendFileC operations.
3179 */
3180 lockflags = SFL_BITMAP;
3181 if (overflow_extents(fp))
3182 lockflags |= SFL_EXTENTS;
3183 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3184
3185 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
3186 if (retval) {
3187 retval = MacToVFSError(retval);
3188 goto out;
3189 }
3190
3191 /*
3192 * STEP 1 - acquire new allocation blocks.
3193 */
3194 nextallocsave = hfsmp->nextAllocation;
3195 retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
3196 if (eflags & kEFMetadataMask) {
3197 HFS_MOUNT_LOCK(hfsmp, TRUE);
3198 HFS_UPDATE_NEXT_ALLOCATION(hfsmp, nextallocsave);
3199 MarkVCBDirty(hfsmp);
3200 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
3201 }
3202
3203 retval = MacToVFSError(retval);
3204 if (retval == 0) {
3205 cp->c_flag |= C_MODIFIED;
3206 if (newbytes < growsize) {
3207 retval = ENOSPC;
3208 goto restore;
3209 } else if (fp->ff_blocks < (headblks + datablks)) {
3210 printf("hfs_relocate: allocation failed");
3211 retval = ENOSPC;
3212 goto restore;
3213 }
3214
3215 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
3216 if (retval) {
3217 retval = MacToVFSError(retval);
3218 } else if ((sector_a + 1) == sector_b) {
3219 retval = ENOSPC;
3220 goto restore;
3221 } else if ((eflags & kEFMetadataMask) &&
3222 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
3223 hfsmp->hfs_metazone_end)) {
3224 const char * filestr;
3225 char emptystr = '\0';
3226
3227 if (cp->c_desc.cd_nameptr != NULL) {
3228 filestr = (const char *)&cp->c_desc.cd_nameptr[0];
3229 } else if (vnode_name(vp) != NULL) {
3230 filestr = vnode_name(vp);
3231 } else {
3232 filestr = &emptystr;
3233 }
3234 printf("hfs_relocate: %s didn't move into MDZ (%d blks)\n", filestr, fp->ff_blocks);
3235 retval = ENOSPC;
3236 goto restore;
3237 }
3238 }
3239 /* Done with system locks and journal for now. */
3240 hfs_systemfile_unlock(hfsmp, lockflags);
3241 lockflags = 0;
3242 hfs_end_transaction(hfsmp);
3243 started_tr = 0;
3244
3245 if (retval) {
3246 /*
3247 * Check to see if failure is due to excessive fragmentation.
3248 */
3249 if ((retval == ENOSPC) &&
3250 (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
3251 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
3252 }
3253 goto out;
3254 }
3255 /*
3256 * STEP 2 - clone file data into the new allocation blocks.
3257 */
3258
3259 if (vnodetype == VLNK)
3260 retval = hfs_clonelink(vp, blksize, cred, p);
3261 else if (vnode_issystem(vp))
3262 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
3263 else
3264 retval = hfs_clonefile(vp, headblks, datablks, blksize);
3265
3266 /* Start transaction for step 3 or for a restore. */
3267 if (hfs_start_transaction(hfsmp) != 0) {
3268 retval = EINVAL;
3269 goto out;
3270 }
3271 started_tr = 1;
3272 if (retval)
3273 goto restore;
3274
3275 /*
3276 * STEP 3 - switch to cloned data and remove old blocks.
3277 */
3278 lockflags = SFL_BITMAP;
3279 if (overflow_extents(fp))
3280 lockflags |= SFL_EXTENTS;
3281 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3282
3283 retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
3284
3285 hfs_systemfile_unlock(hfsmp, lockflags);
3286 lockflags = 0;
3287 if (retval)
3288 goto restore;
3289 out:
3290 if (took_trunc_lock)
3291 hfs_unlock_truncate(cp, TRUE);
3292
3293 if (lockflags) {
3294 hfs_systemfile_unlock(hfsmp, lockflags);
3295 lockflags = 0;
3296 }
3297
3298 /* Push cnode's new extent data to disk. */
3299 if (retval == 0) {
3300 (void) hfs_update(vp, MNT_WAIT);
3301 }
3302 if (hfsmp->jnl) {
3303 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
3304 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
3305 else
3306 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
3307 }
3308 exit:
3309 if (started_tr)
3310 hfs_end_transaction(hfsmp);
3311
3312 return (retval);
3313
3314 restore:
3315 if (fp->ff_blocks == headblks) {
3316 if (took_trunc_lock)
3317 hfs_unlock_truncate(cp, TRUE);
3318 goto exit;
3319 }
3320 /*
3321 * Give back any newly allocated space.
3322 */
3323 if (lockflags == 0) {
3324 lockflags = SFL_BITMAP;
3325 if (overflow_extents(fp))
3326 lockflags |= SFL_EXTENTS;
3327 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
3328 }
3329
3330 (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
3331
3332 hfs_systemfile_unlock(hfsmp, lockflags);
3333 lockflags = 0;
3334
3335 if (took_trunc_lock)
3336 hfs_unlock_truncate(cp, TRUE);
3337 goto exit;
3338 }
3339
3340
3341 /*
3342 * Clone a symlink.
3343 *
3344 */
3345 static int
3346 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, __unused struct proc *p)
3347 {
3348 struct buf *head_bp = NULL;
3349 struct buf *tail_bp = NULL;
3350 int error;
3351
3352
3353 error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
3354 if (error)
3355 goto out;
3356
3357 tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
3358 if (tail_bp == NULL) {
3359 error = EIO;
3360 goto out;
3361 }
3362 bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
3363 error = (int)buf_bwrite(tail_bp);
3364 out:
3365 if (head_bp) {
3366 buf_markinvalid(head_bp);
3367 buf_brelse(head_bp);
3368 }
3369 (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
3370
3371 return (error);
3372 }
3373
3374 /*
3375 * Clone a file's data within the file.
3376 *
3377 */
3378 static int
3379 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
3380 {
3381 caddr_t bufp;
3382 size_t writebase;
3383 size_t bufsize;
3384 size_t copysize;
3385 size_t iosize;
3386 off_t filesize;
3387 size_t offset;
3388 uio_t auio;
3389 int error = 0;
3390
3391 filesize = VTOF(vp)->ff_blocks * blksize; /* virtual file size */
3392 writebase = blkstart * blksize;
3393 copysize = blkcnt * blksize;
3394 iosize = bufsize = MIN(copysize, 128 * 1024);
3395 offset = 0;
3396
3397 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3398 return (ENOMEM);
3399 }
3400 hfs_unlock(VTOC(vp));
3401
3402 auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
3403
3404 while (offset < copysize) {
3405 iosize = MIN(copysize - offset, iosize);
3406
3407 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
3408 uio_addiov(auio, (uintptr_t)bufp, iosize);
3409
3410 error = cluster_read(vp, auio, copysize, IO_NOCACHE);
3411 if (error) {
3412 printf("hfs_clonefile: cluster_read failed - %d\n", error);
3413 break;
3414 }
3415 if (uio_resid(auio) != 0) {
3416 printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
3417 error = EIO;
3418 break;
3419 }
3420
3421 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
3422 uio_addiov(auio, (uintptr_t)bufp, iosize);
3423
3424 error = cluster_write(vp, auio, filesize + offset,
3425 filesize + offset + iosize,
3426 uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
3427 if (error) {
3428 printf("hfs_clonefile: cluster_write failed - %d\n", error);
3429 break;
3430 }
3431 if (uio_resid(auio) != 0) {
3432 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3433 error = EIO;
3434 break;
3435 }
3436 offset += iosize;
3437 }
3438 uio_free(auio);
3439
3440 /*
3441 * No need to call ubc_sync_range or hfs_invalbuf
3442 * since the file was copied using IO_NOCACHE.
3443 */
3444
3445 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3446
3447 hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
3448 return (error);
3449 }
3450
3451 /*
3452 * Clone a system (metadata) file.
3453 *
3454 */
3455 static int
3456 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3457 kauth_cred_t cred, struct proc *p)
3458 {
3459 caddr_t bufp;
3460 char * offset;
3461 size_t bufsize;
3462 size_t iosize;
3463 struct buf *bp = NULL;
3464 daddr64_t blkno;
3465 daddr64_t blk;
3466 daddr64_t start_blk;
3467 daddr64_t last_blk;
3468 int breadcnt;
3469 int i;
3470 int error = 0;
3471
3472
3473 iosize = GetLogicalBlockSize(vp);
3474 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3475 breadcnt = bufsize / iosize;
3476
3477 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3478 return (ENOMEM);
3479 }
3480 start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3481 last_blk = ((daddr64_t)blkcnt * blksize) / iosize;
3482 blkno = 0;
3483
3484 while (blkno < last_blk) {
3485 /*
3486 * Read up to a megabyte
3487 */
3488 offset = bufp;
3489 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3490 error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3491 if (error) {
3492 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3493 goto out;
3494 }
3495 if (buf_count(bp) != iosize) {
3496 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3497 goto out;
3498 }
3499 bcopy((char *)buf_dataptr(bp), offset, iosize);
3500
3501 buf_markinvalid(bp);
3502 buf_brelse(bp);
3503 bp = NULL;
3504
3505 offset += iosize;
3506 }
3507
3508 /*
3509 * Write up to a megabyte
3510 */
3511 offset = bufp;
3512 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3513 bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3514 if (bp == NULL) {
3515 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3516 error = EIO;
3517 goto out;
3518 }
3519 bcopy(offset, (char *)buf_dataptr(bp), iosize);
3520 error = (int)buf_bwrite(bp);
3521 bp = NULL;
3522 if (error)
3523 goto out;
3524 offset += iosize;
3525 }
3526 }
3527 out:
3528 if (bp) {
3529 buf_brelse(bp);
3530 }
3531
3532 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3533
3534 error = hfs_fsync(vp, MNT_WAIT, 0, p);
3535
3536 return (error);
3537 }