]> git.saurik.com Git - apple/xnu.git/blob - bsd/hfs/hfs_readwrite.c
xnu-517.tar.gz
[apple/xnu.git] / bsd / hfs / hfs_readwrite.c
1 /*
2 * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
3 *
4 * @APPLE_LICENSE_HEADER_START@
5 *
6 * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
7 *
8 * This file contains Original Code and/or Modifications of Original Code
9 * as defined in and that are subject to the Apple Public Source License
10 * Version 2.0 (the 'License'). You may not use this file except in
11 * compliance with the License. Please obtain a copy of the License at
12 * http://www.opensource.apple.com/apsl/ and read it before using this
13 * file.
14 *
15 * The Original Code and all software distributed under the License are
16 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
17 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
18 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
20 * Please see the License for the specific language governing rights and
21 * limitations under the License.
22 *
23 * @APPLE_LICENSE_HEADER_END@
24 */
25 /* @(#)hfs_readwrite.c 1.0
26 *
27 * (c) 1998-2001 Apple Computer, Inc. All Rights Reserved
28 *
29 * hfs_readwrite.c -- vnode operations to deal with reading and writing files.
30 *
31 */
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/resourcevar.h>
36 #include <sys/kernel.h>
37 #include <sys/fcntl.h>
38 #include <sys/filedesc.h>
39 #include <sys/stat.h>
40 #include <sys/buf.h>
41 #include <sys/proc.h>
42 #include <sys/vnode.h>
43 #include <sys/uio.h>
44
45 #include <miscfs/specfs/specdev.h>
46
47 #include <sys/ubc.h>
48 #include <vm/vm_pageout.h>
49
50 #include <sys/kdebug.h>
51
52 #include "hfs.h"
53 #include "hfs_endian.h"
54 #include "hfs_quota.h"
55 #include "hfscommon/headers/FileMgrInternal.h"
56 #include "hfscommon/headers/BTreesInternal.h"
57 #include "hfs_cnode.h"
58 #include "hfs_dbg.h"
59
60 extern int overflow_extents(struct filefork *fp);
61
62 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
63
64 enum {
65 MAXHFSFILESIZE = 0x7FFFFFFF /* this needs to go in the mount structure */
66 };
67
68 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
69
70 static int hfs_clonelink(struct vnode *, int, struct ucred *, struct proc *);
71 static int hfs_clonefile(struct vnode *, int, int, int, struct ucred *, struct proc *);
72 static int hfs_clonesysfile(struct vnode *, int, int, int, struct ucred *, struct proc *);
73
74
75 /*****************************************************************************
76 *
77 * Operations on vnodes
78 *
79 *****************************************************************************/
80
81 /*
82 #% read vp L L L
83 #
84 vop_read {
85 IN struct vnode *vp;
86 INOUT struct uio *uio;
87 IN int ioflag;
88 IN struct ucred *cred;
89
90 */
91
92 int
93 hfs_read(ap)
94 struct vop_read_args /* {
95 struct vnode *a_vp;
96 struct uio *a_uio;
97 int a_ioflag;
98 struct ucred *a_cred;
99 } */ *ap;
100 {
101 register struct uio *uio = ap->a_uio;
102 register struct vnode *vp = ap->a_vp;
103 struct cnode *cp;
104 struct filefork *fp;
105 int devBlockSize = 0;
106 int retval = 0;
107 off_t filesize;
108 off_t filebytes;
109 off_t start_resid = uio->uio_resid;
110
111
112 /* Preflight checks */
113 if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
114 return (EPERM); /* can only read regular files */
115 if (uio->uio_resid == 0)
116 return (0); /* Nothing left to do */
117 if (uio->uio_offset < 0)
118 return (EINVAL); /* cant read from a negative offset */
119
120 cp = VTOC(vp);
121 fp = VTOF(vp);
122 filesize = fp->ff_size;
123 filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
124 if (uio->uio_offset > filesize) {
125 if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
126 return (EFBIG);
127 else
128 return (0);
129 }
130
131 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
132
133 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
134 (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
135
136 retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
137
138 cp->c_flag |= C_ACCESS;
139
140 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
141 (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
142
143 /*
144 * Keep track blocks read
145 */
146 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
147 /*
148 * If this file hasn't been seen since the start of
149 * the current sampling period then start over.
150 */
151 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
152 fp->ff_bytesread = start_resid - uio->uio_resid;
153 cp->c_atime = time.tv_sec;
154 } else {
155 fp->ff_bytesread += start_resid - uio->uio_resid;
156 }
157 }
158
159 return (retval);
160 }
161
162 /*
163 * Write data to a file or directory.
164 #% write vp L L L
165 #
166 vop_write {
167 IN struct vnode *vp;
168 INOUT struct uio *uio;
169 IN int ioflag;
170 IN struct ucred *cred;
171
172 */
173 int
174 hfs_write(ap)
175 struct vop_write_args /* {
176 struct vnode *a_vp;
177 struct uio *a_uio;
178 int a_ioflag;
179 struct ucred *a_cred;
180 } */ *ap;
181 {
182 struct vnode *vp = ap->a_vp;
183 struct uio *uio = ap->a_uio;
184 struct cnode *cp;
185 struct filefork *fp;
186 struct proc *p;
187 struct timeval tv;
188 ExtendedVCB *vcb;
189 int devBlockSize = 0;
190 off_t origFileSize, writelimit, bytesToAdd;
191 off_t actualBytesAdded;
192 u_long resid;
193 int eflags, ioflag;
194 int retval;
195 off_t filebytes;
196 struct hfsmount *hfsmp;
197 int started_tr = 0, grabbed_lock = 0;
198
199
200 if (uio->uio_offset < 0)
201 return (EINVAL);
202 if (uio->uio_resid == 0)
203 return (E_NONE);
204 if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
205 return (EPERM); /* Can only write regular files */
206
207 ioflag = ap->a_ioflag;
208 cp = VTOC(vp);
209 fp = VTOF(vp);
210 vcb = VTOVCB(vp);
211 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
212
213 if (ioflag & IO_APPEND)
214 uio->uio_offset = fp->ff_size;
215 if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
216 return (EPERM);
217
218 // XXXdbg - don't allow modification of the journal or journal_info_block
219 if (VTOHFS(vp)->jnl && cp->c_datafork) {
220 struct HFSPlusExtentDescriptor *extd;
221
222 extd = &cp->c_datafork->ff_extents[0];
223 if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
224 return EPERM;
225 }
226 }
227
228 writelimit = uio->uio_offset + uio->uio_resid;
229
230 /*
231 * Maybe this should be above the vnode op call, but so long as
232 * file servers have no limits, I don't think it matters.
233 */
234 p = uio->uio_procp;
235 if (vp->v_type == VREG && p &&
236 writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
237 psignal(p, SIGXFSZ);
238 return (EFBIG);
239 }
240 p = current_proc();
241
242 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
243
244 resid = uio->uio_resid;
245 origFileSize = fp->ff_size;
246 eflags = kEFDeferMask; /* defer file block allocations */
247 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
248
249 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
250 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
251 retval = 0;
252
253 /* Now test if we need to extend the file */
254 /* Doing so will adjust the filebytes for us */
255
256 #if QUOTA
257 if(writelimit > filebytes) {
258 bytesToAdd = writelimit - filebytes;
259
260 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)),
261 ap->a_cred, 0);
262 if (retval)
263 return (retval);
264 }
265 #endif /* QUOTA */
266
267 hfsmp = VTOHFS(vp);
268
269 #ifdef HFS_SPARSE_DEV
270 /*
271 * When the underlying device is sparse and space
272 * is low (< 8MB), stop doing delayed allocations
273 * and begin doing synchronous I/O.
274 */
275 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
276 (hfs_freeblks(hfsmp, 0) < 2048)) {
277 eflags &= ~kEFDeferMask;
278 ioflag |= IO_SYNC;
279 }
280 #endif /* HFS_SPARSE_DEV */
281
282 if (writelimit > filebytes) {
283 hfs_global_shared_lock_acquire(hfsmp);
284 grabbed_lock = 1;
285 }
286 if (hfsmp->jnl && (writelimit > filebytes)) {
287 if (journal_start_transaction(hfsmp->jnl) != 0) {
288 hfs_global_shared_lock_release(hfsmp);
289 return EINVAL;
290 }
291 started_tr = 1;
292 }
293
294 while (writelimit > filebytes) {
295 bytesToAdd = writelimit - filebytes;
296 if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
297 eflags |= kEFReserveMask;
298
299 /* lock extents b-tree (also protects volume bitmap) */
300 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
301 if (retval != E_NONE)
302 break;
303
304 /* Files that are changing size are not hot file candidates. */
305 if (hfsmp->hfc_stage == HFC_RECORDING) {
306 fp->ff_bytesread = 0;
307 }
308 retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
309 0, eflags, &actualBytesAdded));
310
311 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
312 if ((actualBytesAdded == 0) && (retval == E_NONE))
313 retval = ENOSPC;
314 if (retval != E_NONE)
315 break;
316 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
317 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
318 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
319 }
320
321 // XXXdbg
322 if (started_tr) {
323 tv = time;
324 VOP_UPDATE(vp, &tv, &tv, 1);
325
326 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
327 journal_end_transaction(hfsmp->jnl);
328 started_tr = 0;
329 }
330 if (grabbed_lock) {
331 hfs_global_shared_lock_release(hfsmp);
332 grabbed_lock = 0;
333 }
334
335 if (retval == E_NONE) {
336 off_t filesize;
337 off_t zero_off;
338 off_t tail_off;
339 off_t inval_start;
340 off_t inval_end;
341 off_t io_start, io_end;
342 int lflag;
343 struct rl_entry *invalid_range;
344
345 if (writelimit > fp->ff_size)
346 filesize = writelimit;
347 else
348 filesize = fp->ff_size;
349
350 lflag = (ioflag & IO_SYNC);
351
352 if (uio->uio_offset <= fp->ff_size) {
353 zero_off = uio->uio_offset & ~PAGE_MASK_64;
354
355 /* Check to see whether the area between the zero_offset and the start
356 of the transfer to see whether is invalid and should be zero-filled
357 as part of the transfer:
358 */
359 if (uio->uio_offset > zero_off) {
360 if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
361 lflag |= IO_HEADZEROFILL;
362 }
363 } else {
364 off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
365
366 /* The bytes between fp->ff_size and uio->uio_offset must never be
367 read without being zeroed. The current last block is filled with zeroes
368 if it holds valid data but in all cases merely do a little bookkeeping
369 to track the area from the end of the current last page to the start of
370 the area actually written. For the same reason only the bytes up to the
371 start of the page where this write will start is invalidated; any remainder
372 before uio->uio_offset is explicitly zeroed as part of the cluster_write.
373
374 Note that inval_start, the start of the page after the current EOF,
375 may be past the start of the write, in which case the zeroing
376 will be handled by the cluser_write of the actual data.
377 */
378 inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
379 inval_end = uio->uio_offset & ~PAGE_MASK_64;
380 zero_off = fp->ff_size;
381
382 if ((fp->ff_size & PAGE_MASK_64) &&
383 (rl_scan(&fp->ff_invalidranges,
384 eof_page_base,
385 fp->ff_size - 1,
386 &invalid_range) != RL_NOOVERLAP)) {
387 /* The page containing the EOF is not valid, so the
388 entire page must be made inaccessible now. If the write
389 starts on a page beyond the page containing the eof
390 (inval_end > eof_page_base), add the
391 whole page to the range to be invalidated. Otherwise
392 (i.e. if the write starts on the same page), zero-fill
393 the entire page explicitly now:
394 */
395 if (inval_end > eof_page_base) {
396 inval_start = eof_page_base;
397 } else {
398 zero_off = eof_page_base;
399 };
400 };
401
402 if (inval_start < inval_end) {
403 /* There's some range of data that's going to be marked invalid */
404
405 if (zero_off < inval_start) {
406 /* The pages between inval_start and inval_end are going to be invalidated,
407 and the actual write will start on a page past inval_end. Now's the last
408 chance to zero-fill the page containing the EOF:
409 */
410 retval = cluster_write(vp, (struct uio *) 0,
411 fp->ff_size, inval_start,
412 zero_off, (off_t)0, devBlockSize,
413 lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
414 if (retval) goto ioerr_exit;
415 };
416
417 /* Mark the remaining area of the newly allocated space as invalid: */
418 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
419 cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
420 zero_off = fp->ff_size = inval_end;
421 };
422
423 if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
424 };
425
426 /* Check to see whether the area between the end of the write and the end of
427 the page it falls in is invalid and should be zero-filled as part of the transfer:
428 */
429 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
430 if (tail_off > filesize) tail_off = filesize;
431 if (tail_off > writelimit) {
432 if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
433 lflag |= IO_TAILZEROFILL;
434 };
435 };
436
437 /*
438 * if the write starts beyond the current EOF (possibly advanced in the
439 * zeroing of the last block, above), then we'll zero fill from the current EOF
440 * to where the write begins:
441 *
442 * NOTE: If (and ONLY if) the portion of the file about to be written is
443 * before the current EOF it might be marked as invalid now and must be
444 * made readable (removed from the invalid ranges) before cluster_write
445 * tries to write it:
446 */
447 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
448 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
449 if (io_start < fp->ff_size) {
450 rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
451 };
452 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
453 tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
454
455 if (uio->uio_offset > fp->ff_size) {
456 fp->ff_size = uio->uio_offset;
457
458 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
459 }
460 if (resid > uio->uio_resid)
461 cp->c_flag |= C_CHANGE | C_UPDATE;
462 }
463
464 HFS_KNOTE(vp, NOTE_WRITE);
465
466 ioerr_exit:
467 /*
468 * If we successfully wrote any data, and we are not the superuser
469 * we clear the setuid and setgid bits as a precaution against
470 * tampering.
471 */
472 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
473 cp->c_mode &= ~(S_ISUID | S_ISGID);
474
475 if (retval) {
476 if (ioflag & IO_UNIT) {
477 (void)VOP_TRUNCATE(vp, origFileSize,
478 ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
479 uio->uio_offset -= resid - uio->uio_resid;
480 uio->uio_resid = resid;
481 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
482 }
483 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
484 tv = time;
485 retval = VOP_UPDATE(vp, &tv, &tv, 1);
486 }
487 vcb->vcbWrCnt++;
488
489 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
490 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
491
492 return (retval);
493 }
494
495
496 #ifdef HFS_SPARSE_DEV
497 struct hfs_backingstoreinfo {
498 int signature; /* == 3419115 */
499 int version; /* version of this struct (1) */
500 int backingfd; /* disk image file (on backing fs) */
501 int bandsize; /* sparse disk image band size */
502 };
503
504 #define HFSIOC_SETBACKINGSTOREINFO _IOW('h', 7, struct hfs_backingstoreinfo)
505 #define HFSIOC_CLRBACKINGSTOREINFO _IO('h', 8)
506
507 #define HFS_SETBACKINGSTOREINFO IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO)
508 #define HFS_CLRBACKINGSTOREINFO IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO)
509
510 #endif /* HFS_SPARSE_DEV */
511
512 /*
513
514 #% ioctl vp U U U
515 #
516 vop_ioctl {
517 IN struct vnode *vp;
518 IN u_long command;
519 IN caddr_t data;
520 IN int fflag;
521 IN struct ucred *cred;
522 IN struct proc *p;
523
524 */
525
526
527 /* ARGSUSED */
528 int
529 hfs_ioctl(ap)
530 struct vop_ioctl_args /* {
531 struct vnode *a_vp;
532 int a_command;
533 caddr_t a_data;
534 int a_fflag;
535 struct ucred *a_cred;
536 struct proc *a_p;
537 } */ *ap;
538 {
539 switch (ap->a_command) {
540
541 #ifdef HFS_SPARSE_DEV
542 case HFS_SETBACKINGSTOREINFO: {
543 struct hfsmount * hfsmp;
544 struct vnode * bsfs_rootvp;
545 struct vnode * di_vp;
546 struct file * di_fp;
547 struct hfs_backingstoreinfo *bsdata;
548 int error = 0;
549
550 hfsmp = VTOHFS(ap->a_vp);
551 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
552 return (EALREADY);
553 }
554 if (ap->a_p->p_ucred->cr_uid != 0 &&
555 ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
556 return (EACCES); /* must be owner of file system */
557 }
558 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
559 if (bsdata == NULL) {
560 return (EINVAL);
561 }
562 if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) {
563 return (error);
564 }
565 if (fref(di_fp) == -1) {
566 return (EBADF);
567 }
568 if (di_fp->f_type != DTYPE_VNODE) {
569 frele(di_fp);
570 return (EINVAL);
571 }
572 di_vp = (struct vnode *)di_fp->f_data;
573 if (ap->a_vp->v_mount == di_vp->v_mount) {
574 frele(di_fp);
575 return (EINVAL);
576 }
577
578 /*
579 * Obtain the backing fs root vnode and keep a reference
580 * on it. This reference will be dropped in hfs_unmount.
581 */
582 error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp);
583 if (error) {
584 frele(di_fp);
585 return (error);
586 }
587 VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p); /* Hold on to the reference */
588
589 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
590 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
591 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
592 hfsmp->hfs_sparsebandblks *= 4;
593
594 frele(di_fp);
595 return (0);
596 }
597 case HFS_CLRBACKINGSTOREINFO: {
598 struct hfsmount * hfsmp;
599 struct vnode * tmpvp;
600
601 hfsmp = VTOHFS(ap->a_vp);
602 if (ap->a_p->p_ucred->cr_uid != 0 &&
603 ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
604 return (EACCES); /* must be owner of file system */
605 }
606 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
607 hfsmp->hfs_backingfs_rootvp) {
608
609 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
610 tmpvp = hfsmp->hfs_backingfs_rootvp;
611 hfsmp->hfs_backingfs_rootvp = NULLVP;
612 hfsmp->hfs_sparsebandblks = 0;
613 vrele(tmpvp);
614 }
615 return (0);
616 }
617 #endif /* HFS_SPARSE_DEV */
618
619 case 6: {
620 int error;
621
622 ap->a_vp->v_flag |= VFULLFSYNC;
623 error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p);
624 ap->a_vp->v_flag &= ~VFULLFSYNC;
625
626 return error;
627 }
628 case 5: {
629 register struct vnode *vp;
630 register struct cnode *cp;
631 struct filefork *fp;
632 int error;
633
634 vp = ap->a_vp;
635 cp = VTOC(vp);
636 fp = VTOF(vp);
637
638 if (vp->v_type != VREG)
639 return EINVAL;
640
641 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
642 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
643 if (error)
644 return (error);
645
646 /*
647 * used by regression test to determine if
648 * all the dirty pages (via write) have been cleaned
649 * after a call to 'fsysnc'.
650 */
651 error = is_file_clean(vp, fp->ff_size);
652 VOP_UNLOCK(vp, 0, ap->a_p);
653
654 return (error);
655 }
656
657 case 1: {
658 register struct vnode *vp;
659 register struct radvisory *ra;
660 register struct cnode *cp;
661 struct filefork *fp;
662 int devBlockSize = 0;
663 int error;
664
665 vp = ap->a_vp;
666
667 if (vp->v_type != VREG)
668 return EINVAL;
669
670 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
671 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
672 if (error)
673 return (error);
674
675 ra = (struct radvisory *)(ap->a_data);
676 cp = VTOC(vp);
677 fp = VTOF(vp);
678
679 if (ra->ra_offset >= fp->ff_size) {
680 VOP_UNLOCK(vp, 0, ap->a_p);
681 return (EFBIG);
682 }
683 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
684
685 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
686 VOP_UNLOCK(vp, 0, ap->a_p);
687
688 return (error);
689 }
690
691 case 2: /* F_READBOOTBLOCKS */
692 case 3: /* F_WRITEBOOTBLOCKS */
693 {
694 struct vnode *vp = ap->a_vp;
695 struct vnode *devvp = NULL;
696 struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
697 int devBlockSize;
698 int error;
699 struct iovec aiov;
700 struct uio auio;
701 u_long blockNumber;
702 u_long blockOffset;
703 u_long xfersize;
704 struct buf *bp;
705
706 if ((vp->v_flag & VROOT) == 0) return EINVAL;
707 if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
708
709 devvp = VTOHFS(vp)->hfs_devvp;
710 aiov.iov_base = btd->fbt_buffer;
711 aiov.iov_len = btd->fbt_length;
712
713 auio.uio_iov = &aiov;
714 auio.uio_iovcnt = 1;
715 auio.uio_offset = btd->fbt_offset;
716 auio.uio_resid = btd->fbt_length;
717 auio.uio_segflg = UIO_USERSPACE;
718 auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
719 auio.uio_procp = ap->a_p;
720
721 VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
722
723 while (auio.uio_resid > 0) {
724 blockNumber = auio.uio_offset / devBlockSize;
725 error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
726 if (error) {
727 if (bp) brelse(bp);
728 return error;
729 };
730
731 blockOffset = auio.uio_offset % devBlockSize;
732 xfersize = devBlockSize - blockOffset;
733 error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
734 if (error) {
735 brelse(bp);
736 return error;
737 };
738 if (auio.uio_rw == UIO_WRITE) {
739 error = VOP_BWRITE(bp);
740 if (error) return error;
741 } else {
742 brelse(bp);
743 };
744 };
745 };
746 return 0;
747
748 case _IOC(IOC_OUT,'h', 4, 0): /* Create date in local time */
749 {
750 *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
751 return 0;
752 }
753
754 default:
755 return (ENOTTY);
756 }
757
758 /* Should never get here */
759 return 0;
760 }
761
762 /* ARGSUSED */
763 int
764 hfs_select(ap)
765 struct vop_select_args /* {
766 struct vnode *a_vp;
767 int a_which;
768 int a_fflags;
769 struct ucred *a_cred;
770 void *a_wql;
771 struct proc *a_p;
772 } */ *ap;
773 {
774 /*
775 * We should really check to see if I/O is possible.
776 */
777 return (1);
778 }
779
780 /*
781 * Bmap converts a the logical block number of a file to its physical block
782 * number on the disk.
783 */
784
785 /*
786 * vp - address of vnode file the file
787 * bn - which logical block to convert to a physical block number.
788 * vpp - returns the vnode for the block special file holding the filesystem
789 * containing the file of interest
790 * bnp - address of where to return the filesystem physical block number
791 #% bmap vp L L L
792 #% bmap vpp - U -
793 #
794 vop_bmap {
795 IN struct vnode *vp;
796 IN daddr_t bn;
797 OUT struct vnode **vpp;
798 IN daddr_t *bnp;
799 OUT int *runp;
800 */
801 /*
802 * Converts a logical block number to a physical block, and optionally returns
803 * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
804 * The physical block number is based on the device block size, currently its 512.
805 * The block run is returned in logical blocks, and is the REMAINING amount of blocks
806 */
807
808 int
809 hfs_bmap(ap)
810 struct vop_bmap_args /* {
811 struct vnode *a_vp;
812 daddr_t a_bn;
813 struct vnode **a_vpp;
814 daddr_t *a_bnp;
815 int *a_runp;
816 } */ *ap;
817 {
818 struct vnode *vp = ap->a_vp;
819 struct cnode *cp = VTOC(vp);
820 struct filefork *fp = VTOF(vp);
821 struct hfsmount *hfsmp = VTOHFS(vp);
822 int retval = E_NONE;
823 daddr_t logBlockSize;
824 size_t bytesContAvail = 0;
825 off_t blockposition;
826 struct proc *p = NULL;
827 int lockExtBtree;
828 struct rl_entry *invalid_range;
829 enum rl_overlaptype overlaptype;
830
831 /*
832 * Check for underlying vnode requests and ensure that logical
833 * to physical mapping is requested.
834 */
835 if (ap->a_vpp != NULL)
836 *ap->a_vpp = cp->c_devvp;
837 if (ap->a_bnp == NULL)
838 return (0);
839
840 /* Only clustered I/O should have delayed allocations. */
841 DBG_ASSERT(fp->ff_unallocblocks == 0);
842
843 logBlockSize = GetLogicalBlockSize(vp);
844 blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
845
846 lockExtBtree = overflow_extents(fp);
847 if (lockExtBtree) {
848 p = current_proc();
849 retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
850 LK_EXCLUSIVE | LK_CANRECURSE, p);
851 if (retval)
852 return (retval);
853 }
854
855 retval = MacToVFSError(
856 MapFileBlockC (HFSTOVCB(hfsmp),
857 (FCB*)fp,
858 MAXPHYSIO,
859 blockposition,
860 ap->a_bnp,
861 &bytesContAvail));
862
863 if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
864
865 if (retval == E_NONE) {
866 /* Adjust the mapping information for invalid file ranges: */
867 overlaptype = rl_scan(&fp->ff_invalidranges,
868 blockposition,
869 blockposition + MAXPHYSIO - 1,
870 &invalid_range);
871 if (overlaptype != RL_NOOVERLAP) {
872 switch(overlaptype) {
873 case RL_MATCHINGOVERLAP:
874 case RL_OVERLAPCONTAINSRANGE:
875 case RL_OVERLAPSTARTSBEFORE:
876 /* There's no valid block for this byte offset: */
877 *ap->a_bnp = (daddr_t)-1;
878 bytesContAvail = invalid_range->rl_end + 1 - blockposition;
879 break;
880
881 case RL_OVERLAPISCONTAINED:
882 case RL_OVERLAPENDSAFTER:
883 /* The range of interest hits an invalid block before the end: */
884 if (invalid_range->rl_start == blockposition) {
885 /* There's actually no valid information to be had starting here: */
886 *ap->a_bnp = (daddr_t)-1;
887 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
888 (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
889 bytesContAvail = invalid_range->rl_end + 1 - blockposition;
890 };
891 } else {
892 bytesContAvail = invalid_range->rl_start - blockposition;
893 };
894 break;
895 };
896 if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
897 };
898
899 /* Figure out how many read ahead blocks there are */
900 if (ap->a_runp != NULL) {
901 if (can_cluster(logBlockSize)) {
902 /* Make sure this result never goes negative: */
903 *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
904 } else {
905 *ap->a_runp = 0;
906 };
907 };
908 };
909
910 return (retval);
911 }
912
913 /* blktooff converts logical block number to file offset */
914
915 int
916 hfs_blktooff(ap)
917 struct vop_blktooff_args /* {
918 struct vnode *a_vp;
919 daddr_t a_lblkno;
920 off_t *a_offset;
921 } */ *ap;
922 {
923 if (ap->a_vp == NULL)
924 return (EINVAL);
925 *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
926
927 return(0);
928 }
929
930 int
931 hfs_offtoblk(ap)
932 struct vop_offtoblk_args /* {
933 struct vnode *a_vp;
934 off_t a_offset;
935 daddr_t *a_lblkno;
936 } */ *ap;
937 {
938 if (ap->a_vp == NULL)
939 return (EINVAL);
940 *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
941
942 return(0);
943 }
944
945 int
946 hfs_cmap(ap)
947 struct vop_cmap_args /* {
948 struct vnode *a_vp;
949 off_t a_foffset;
950 size_t a_size;
951 daddr_t *a_bpn;
952 size_t *a_run;
953 void *a_poff;
954 } */ *ap;
955 {
956 struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
957 struct filefork *fp = VTOF(ap->a_vp);
958 size_t bytesContAvail = 0;
959 int retval = E_NONE;
960 int lockExtBtree = 0;
961 struct proc *p = NULL;
962 struct rl_entry *invalid_range;
963 enum rl_overlaptype overlaptype;
964 int started_tr = 0, grabbed_lock = 0;
965 struct timeval tv;
966
967 /*
968 * Check for underlying vnode requests and ensure that logical
969 * to physical mapping is requested.
970 */
971 if (ap->a_bpn == NULL)
972 return (0);
973
974 p = current_proc();
975
976 if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) {
977 /*
978 * File blocks are getting remapped. Wait until its finished.
979 */
980 SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP);
981 (void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0);
982 if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP))
983 panic("hfs_cmap: no mappable blocks");
984 }
985
986 retry:
987 if (fp->ff_unallocblocks) {
988 lockExtBtree = 1;
989
990 // XXXdbg
991 hfs_global_shared_lock_acquire(hfsmp);
992 grabbed_lock = 1;
993
994 if (hfsmp->jnl) {
995 if (journal_start_transaction(hfsmp->jnl) != 0) {
996 hfs_global_shared_lock_release(hfsmp);
997 return EINVAL;
998 } else {
999 started_tr = 1;
1000 }
1001 }
1002
1003 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1004 if (started_tr) {
1005 journal_end_transaction(hfsmp->jnl);
1006 }
1007 if (grabbed_lock) {
1008 hfs_global_shared_lock_release(hfsmp);
1009 }
1010 return (retval);
1011 }
1012 } else if (overflow_extents(fp)) {
1013 lockExtBtree = 1;
1014 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1015 return retval;
1016 }
1017 }
1018
1019 /*
1020 * Check for any delayed allocations.
1021 */
1022 if (fp->ff_unallocblocks) {
1023 SInt64 reqbytes, actbytes;
1024
1025 //
1026 // Make sure we have a transaction. It's possible
1027 // that we came in and fp->ff_unallocblocks was zero
1028 // but during the time we blocked acquiring the extents
1029 // btree, ff_unallocblocks became non-zero and so we
1030 // will need to start a transaction.
1031 //
1032 if (hfsmp->jnl && started_tr == 0) {
1033 if (lockExtBtree) {
1034 (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1035 lockExtBtree = 0;
1036 }
1037
1038 goto retry;
1039 }
1040
1041 reqbytes = (SInt64)fp->ff_unallocblocks *
1042 (SInt64)HFSTOVCB(hfsmp)->blockSize;
1043 /*
1044 * Release the blocks on loan and aquire some real ones.
1045 * Note that we can race someone else for these blocks
1046 * (and lose) so cmap needs to handle a failure here.
1047 * Currently this race can't occur because all allocations
1048 * are protected by an exclusive lock on the Extents
1049 * Overflow file.
1050 */
1051 HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
1052 FTOC(fp)->c_blocks -= fp->ff_unallocblocks;
1053 fp->ff_blocks -= fp->ff_unallocblocks;
1054 fp->ff_unallocblocks = 0;
1055
1056 /* Files that are changing size are not hot file candidates. */
1057 if (hfsmp->hfc_stage == HFC_RECORDING) {
1058 fp->ff_bytesread = 0;
1059 }
1060 while (retval == 0 && reqbytes > 0) {
1061 retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
1062 (FCB*)fp, reqbytes, 0,
1063 kEFAllMask | kEFNoClumpMask, &actbytes));
1064 if (retval == 0 && actbytes == 0)
1065 retval = ENOSPC;
1066
1067 if (retval) {
1068 fp->ff_unallocblocks =
1069 reqbytes / HFSTOVCB(hfsmp)->blockSize;
1070 HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
1071 FTOC(fp)->c_blocks += fp->ff_unallocblocks;
1072 fp->ff_blocks += fp->ff_unallocblocks;
1073 }
1074 reqbytes -= actbytes;
1075 }
1076
1077 if (retval) {
1078 (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1079 VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
1080 if (started_tr) {
1081 tv = time;
1082 VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
1083
1084 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1085 journal_end_transaction(hfsmp->jnl);
1086 }
1087 if (grabbed_lock) {
1088 hfs_global_shared_lock_release(hfsmp);
1089 }
1090 return (retval);
1091 }
1092 }
1093
1094 retval = MacToVFSError(
1095 MapFileBlockC (HFSTOVCB(hfsmp),
1096 (FCB *)fp,
1097 ap->a_size,
1098 ap->a_foffset,
1099 ap->a_bpn,
1100 &bytesContAvail));
1101
1102 if (lockExtBtree)
1103 (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1104
1105 // XXXdbg
1106 if (started_tr) {
1107 tv = time;
1108 retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
1109
1110 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1111 journal_end_transaction(hfsmp->jnl);
1112 started_tr = 0;
1113 }
1114 if (grabbed_lock) {
1115 hfs_global_shared_lock_release(hfsmp);
1116 grabbed_lock = 0;
1117 }
1118
1119 if (retval == E_NONE) {
1120 /* Adjust the mapping information for invalid file ranges: */
1121 overlaptype = rl_scan(&fp->ff_invalidranges,
1122 ap->a_foffset,
1123 ap->a_foffset + (off_t)bytesContAvail - 1,
1124 &invalid_range);
1125 if (overlaptype != RL_NOOVERLAP) {
1126 switch(overlaptype) {
1127 case RL_MATCHINGOVERLAP:
1128 case RL_OVERLAPCONTAINSRANGE:
1129 case RL_OVERLAPSTARTSBEFORE:
1130 /* There's no valid block for this byte offset: */
1131 *ap->a_bpn = (daddr_t)-1;
1132
1133 /* There's no point limiting the amount to be returned if the
1134 invalid range that was hit extends all the way to the EOF
1135 (i.e. there's no valid bytes between the end of this range
1136 and the file's EOF):
1137 */
1138 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1139 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1140 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1141 };
1142 break;
1143
1144 case RL_OVERLAPISCONTAINED:
1145 case RL_OVERLAPENDSAFTER:
1146 /* The range of interest hits an invalid block before the end: */
1147 if (invalid_range->rl_start == ap->a_foffset) {
1148 /* There's actually no valid information to be had starting here: */
1149 *ap->a_bpn = (daddr_t)-1;
1150 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1151 (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1152 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1153 };
1154 } else {
1155 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1156 };
1157 break;
1158 };
1159 if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
1160 };
1161
1162 if (ap->a_run) *ap->a_run = bytesContAvail;
1163 };
1164
1165 if (ap->a_poff)
1166 *(int *)ap->a_poff = 0;
1167
1168 return (retval);
1169 }
1170
1171
1172 /*
1173 * Read or write a buffer that is not contiguous on disk. We loop over
1174 * each device block, copying to or from caller's buffer.
1175 *
1176 * We could be a bit more efficient by transferring as much data as is
1177 * contiguous. But since this routine should rarely be called, and that
1178 * would be more complicated; best to keep it simple.
1179 */
1180 static int
1181 hfs_strategy_fragmented(struct buf *bp)
1182 {
1183 register struct vnode *vp = bp->b_vp;
1184 register struct cnode *cp = VTOC(vp);
1185 register struct vnode *devvp = cp->c_devvp;
1186 caddr_t ioaddr; /* Address of fragment within bp */
1187 struct buf *frag = NULL; /* For reading or writing a single block */
1188 int retval = 0;
1189 long remaining; /* Bytes (in bp) left to transfer */
1190 off_t offset; /* Logical offset of current fragment in vp */
1191 u_long block_size; /* Size of one device block (and one I/O) */
1192
1193 /* Make sure we redo this mapping for the next I/O */
1194 bp->b_blkno = bp->b_lblkno;
1195
1196 /* Set up the logical position and number of bytes to read/write */
1197 offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
1198 block_size = VTOHFS(vp)->hfs_phys_block_size;
1199
1200 /* Get an empty buffer to do the deblocking */
1201 frag = geteblk(block_size);
1202 if (ISSET(bp->b_flags, B_READ))
1203 SET(frag->b_flags, B_READ);
1204
1205 for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
1206 ioaddr += block_size, offset += block_size,
1207 remaining -= block_size) {
1208 frag->b_resid = frag->b_bcount;
1209 CLR(frag->b_flags, B_DONE);
1210
1211 /* Map the current position to a physical block number */
1212 retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
1213 NULL, NULL);
1214 if (retval != 0)
1215 break;
1216
1217 /*
1218 * Did we try to read a hole?
1219 * (Should never happen for metadata!)
1220 */
1221 if ((long)frag->b_lblkno == -1) {
1222 bzero(ioaddr, block_size);
1223 continue;
1224 }
1225
1226 /* If writing, copy before I/O */
1227 if (!ISSET(bp->b_flags, B_READ))
1228 bcopy(ioaddr, frag->b_data, block_size);
1229
1230 /* Call the device to do the I/O and wait for it */
1231 frag->b_blkno = frag->b_lblkno;
1232 frag->b_vp = devvp; /* Used to dispatch via VOP_STRATEGY */
1233 frag->b_dev = devvp->v_rdev;
1234 retval = VOP_STRATEGY(frag);
1235 frag->b_vp = NULL;
1236 if (retval != 0)
1237 break;
1238 retval = biowait(frag);
1239 if (retval != 0)
1240 break;
1241
1242 /* If reading, copy after the I/O */
1243 if (ISSET(bp->b_flags, B_READ))
1244 bcopy(frag->b_data, ioaddr, block_size);
1245 }
1246
1247 frag->b_vp = NULL;
1248 //
1249 // XXXdbg - in the case that this is a meta-data block, it won't affect
1250 // the journal because this bp is for a physical disk block,
1251 // not a logical block that is part of the catalog or extents
1252 // files.
1253 SET(frag->b_flags, B_INVAL);
1254 brelse(frag);
1255
1256 if ((bp->b_error = retval) != 0)
1257 SET(bp->b_flags, B_ERROR);
1258
1259 biodone(bp); /* This I/O is now complete */
1260 return retval;
1261 }
1262
1263
1264 /*
1265 * Calculate the logical to physical mapping if not done already,
1266 * then call the device strategy routine.
1267 #
1268 #vop_strategy {
1269 # IN struct buf *bp;
1270 */
1271 int
1272 hfs_strategy(ap)
1273 struct vop_strategy_args /* {
1274 struct buf *a_bp;
1275 } */ *ap;
1276 {
1277 register struct buf *bp = ap->a_bp;
1278 register struct vnode *vp = bp->b_vp;
1279 register struct cnode *cp = VTOC(vp);
1280 int retval = 0;
1281 off_t offset;
1282 size_t bytes_contig;
1283
1284 if ( !(bp->b_flags & B_VECTORLIST)) {
1285 if (vp->v_type == VBLK || vp->v_type == VCHR)
1286 panic("hfs_strategy: device vnode passed!");
1287
1288 if (bp->b_flags & B_PAGELIST) {
1289 /*
1290 * If we have a page list associated with this bp,
1291 * then go through cluster_bp since it knows how to
1292 * deal with a page request that might span non-
1293 * contiguous physical blocks on the disk...
1294 */
1295 retval = cluster_bp(bp);
1296 vp = cp->c_devvp;
1297 bp->b_dev = vp->v_rdev;
1298
1299 return (retval);
1300 }
1301
1302 /*
1303 * If we don't already know the filesystem relative block
1304 * number then get it using VOP_BMAP(). If VOP_BMAP()
1305 * returns the block number as -1 then we've got a hole in
1306 * the file. Although HFS filesystems don't create files with
1307 * holes, invalidating of subranges of the file (lazy zero
1308 * filling) may create such a situation.
1309 */
1310 if (bp->b_blkno == bp->b_lblkno) {
1311 offset = (off_t) bp->b_lblkno *
1312 (off_t) GetLogicalBlockSize(vp);
1313
1314 if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
1315 &bp->b_blkno, &bytes_contig, NULL))) {
1316 bp->b_error = retval;
1317 bp->b_flags |= B_ERROR;
1318 biodone(bp);
1319 return (retval);
1320 }
1321 if (bytes_contig < bp->b_bcount)
1322 {
1323 /*
1324 * We were asked to read a block that wasn't
1325 * contiguous, so we have to read each of the
1326 * pieces and copy them into the buffer.
1327 * Since ordinary file I/O goes through
1328 * cluster_io (which won't ask us for
1329 * discontiguous data), this is probably an
1330 * attempt to read or write metadata.
1331 */
1332 return hfs_strategy_fragmented(bp);
1333 }
1334 if ((long)bp->b_blkno == -1)
1335 clrbuf(bp);
1336 }
1337 if ((long)bp->b_blkno == -1) {
1338 biodone(bp);
1339 return (0);
1340 }
1341 if (bp->b_validend == 0) {
1342 /*
1343 * Record the exact size of the I/O transfer about to
1344 * be made:
1345 */
1346 bp->b_validend = bp->b_bcount;
1347 }
1348 }
1349 vp = cp->c_devvp;
1350 bp->b_dev = vp->v_rdev;
1351
1352 return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
1353 }
1354
1355
1356 static int do_hfs_truncate(ap)
1357 struct vop_truncate_args /* {
1358 struct vnode *a_vp;
1359 off_t a_length;
1360 int a_flags;
1361 struct ucred *a_cred;
1362 struct proc *a_p;
1363 } */ *ap;
1364 {
1365 register struct vnode *vp = ap->a_vp;
1366 register struct cnode *cp = VTOC(vp);
1367 struct filefork *fp = VTOF(vp);
1368 off_t length;
1369 long vflags;
1370 struct timeval tv;
1371 int retval;
1372 off_t bytesToAdd;
1373 off_t actualBytesAdded;
1374 off_t filebytes;
1375 u_long fileblocks;
1376 int blksize;
1377 struct hfsmount *hfsmp;
1378
1379 if (vp->v_type != VREG && vp->v_type != VLNK)
1380 return (EISDIR); /* cannot truncate an HFS directory! */
1381
1382 length = ap->a_length;
1383 blksize = VTOVCB(vp)->blockSize;
1384 fileblocks = fp->ff_blocks;
1385 filebytes = (off_t)fileblocks * (off_t)blksize;
1386
1387 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1388 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1389
1390 if (length < 0)
1391 return (EINVAL);
1392
1393 if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1394 return (EFBIG);
1395
1396 hfsmp = VTOHFS(vp);
1397
1398 tv = time;
1399 retval = E_NONE;
1400
1401 /* Files that are changing size are not hot file candidates. */
1402 if (hfsmp->hfc_stage == HFC_RECORDING) {
1403 fp->ff_bytesread = 0;
1404 }
1405
1406 /*
1407 * We cannot just check if fp->ff_size == length (as an optimization)
1408 * since there may be extra physical blocks that also need truncation.
1409 */
1410 #if QUOTA
1411 if (retval = hfs_getinoquota(cp))
1412 return(retval);
1413 #endif /* QUOTA */
1414
1415 /*
1416 * Lengthen the size of the file. We must ensure that the
1417 * last byte of the file is allocated. Since the smallest
1418 * value of ff_size is 0, length will be at least 1.
1419 */
1420 if (length > fp->ff_size) {
1421 #if QUOTA
1422 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1423 ap->a_cred, 0);
1424 if (retval)
1425 goto Err_Exit;
1426 #endif /* QUOTA */
1427 /*
1428 * If we don't have enough physical space then
1429 * we need to extend the physical size.
1430 */
1431 if (length > filebytes) {
1432 int eflags;
1433 u_long blockHint = 0;
1434
1435 /* All or nothing and don't round up to clumpsize. */
1436 eflags = kEFAllMask | kEFNoClumpMask;
1437
1438 if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
1439 eflags |= kEFReserveMask; /* keep a reserve */
1440
1441 /*
1442 * Allocate Journal and Quota files in metadata zone.
1443 */
1444 if (filebytes == 0 &&
1445 hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1446 hfs_virtualmetafile(cp)) {
1447 eflags |= kEFMetadataMask;
1448 blockHint = hfsmp->hfs_metazone_start;
1449 }
1450 // XXXdbg
1451 hfs_global_shared_lock_acquire(hfsmp);
1452 if (hfsmp->jnl) {
1453 if (journal_start_transaction(hfsmp->jnl) != 0) {
1454 retval = EINVAL;
1455 goto Err_Exit;
1456 }
1457 }
1458
1459 /* lock extents b-tree (also protects volume bitmap) */
1460 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1461 if (retval) {
1462 if (hfsmp->jnl) {
1463 journal_end_transaction(hfsmp->jnl);
1464 }
1465 hfs_global_shared_lock_release(hfsmp);
1466
1467 goto Err_Exit;
1468 }
1469
1470 while ((length > filebytes) && (retval == E_NONE)) {
1471 bytesToAdd = length - filebytes;
1472 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1473 (FCB*)fp,
1474 bytesToAdd,
1475 blockHint,
1476 eflags,
1477 &actualBytesAdded));
1478
1479 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1480 if (actualBytesAdded == 0 && retval == E_NONE) {
1481 if (length > filebytes)
1482 length = filebytes;
1483 break;
1484 }
1485 } /* endwhile */
1486
1487 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1488
1489 // XXXdbg
1490 if (hfsmp->jnl) {
1491 tv = time;
1492 VOP_UPDATE(vp, &tv, &tv, 1);
1493
1494 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1495 journal_end_transaction(hfsmp->jnl);
1496 }
1497 hfs_global_shared_lock_release(hfsmp);
1498
1499 if (retval)
1500 goto Err_Exit;
1501
1502 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1503 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1504 }
1505
1506 if (!(ap->a_flags & IO_NOZEROFILL)) {
1507 if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1508 struct rl_entry *invalid_range;
1509 int devBlockSize;
1510 off_t zero_limit;
1511
1512 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1513 if (length < zero_limit) zero_limit = length;
1514
1515 if (length > fp->ff_size) {
1516 /* Extending the file: time to fill out the current last page w. zeroes? */
1517 if ((fp->ff_size & PAGE_MASK_64) &&
1518 (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1519 fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1520
1521 /* There's some valid data at the start of the (current) last page
1522 of the file, so zero out the remainder of that page to ensure the
1523 entire page contains valid data. Since there is no invalid range
1524 possible past the (current) eof, there's no need to remove anything
1525 from the invalid range list before calling cluster_write(): */
1526 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1527 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1528 fp->ff_size, (off_t)0, devBlockSize,
1529 (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1530 if (retval) goto Err_Exit;
1531
1532 /* Merely invalidate the remaining area, if necessary: */
1533 if (length > zero_limit) {
1534 rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1535 cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1536 }
1537 } else {
1538 /* The page containing the (current) eof is invalid: just add the
1539 remainder of the page to the invalid list, along with the area
1540 being newly allocated:
1541 */
1542 rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1543 cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1544 };
1545 }
1546 } else {
1547 panic("hfs_truncate: invoked on non-UBC object?!");
1548 };
1549 }
1550 cp->c_flag |= C_UPDATE;
1551 fp->ff_size = length;
1552
1553 if (UBCISVALID(vp))
1554 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
1555
1556 } else { /* Shorten the size of the file */
1557
1558 if (fp->ff_size > length) {
1559 /*
1560 * Any buffers that are past the truncation point need to be
1561 * invalidated (to maintain buffer cache consistency). For
1562 * simplicity, we invalidate all the buffers by calling vinvalbuf.
1563 */
1564 if (UBCISVALID(vp))
1565 ubc_setsize(vp, length); /* XXX check errors */
1566
1567 vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
1568 retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1569
1570 /* Any space previously marked as invalid is now irrelevant: */
1571 rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
1572 }
1573
1574 /*
1575 * Account for any unmapped blocks. Note that the new
1576 * file length can still end up with unmapped blocks.
1577 */
1578 if (fp->ff_unallocblocks > 0) {
1579 u_int32_t finalblks;
1580
1581 /* lock extents b-tree */
1582 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1583 LK_EXCLUSIVE, ap->a_p);
1584 if (retval)
1585 goto Err_Exit;
1586
1587 VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
1588 cp->c_blocks -= fp->ff_unallocblocks;
1589 fp->ff_blocks -= fp->ff_unallocblocks;
1590 fp->ff_unallocblocks = 0;
1591
1592 finalblks = (length + blksize - 1) / blksize;
1593 if (finalblks > fp->ff_blocks) {
1594 /* calculate required unmapped blocks */
1595 fp->ff_unallocblocks = finalblks - fp->ff_blocks;
1596 VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
1597 cp->c_blocks += fp->ff_unallocblocks;
1598 fp->ff_blocks += fp->ff_unallocblocks;
1599 }
1600 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1601 LK_RELEASE, ap->a_p);
1602 }
1603
1604 /*
1605 * For a TBE process the deallocation of the file blocks is
1606 * delayed until the file is closed. And hfs_close calls
1607 * truncate with the IO_NDELAY flag set. So when IO_NDELAY
1608 * isn't set, we make sure this isn't a TBE process.
1609 */
1610 if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
1611 #if QUOTA
1612 off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
1613 #endif /* QUOTA */
1614 // XXXdbg
1615 hfs_global_shared_lock_acquire(hfsmp);
1616 if (hfsmp->jnl) {
1617 if (journal_start_transaction(hfsmp->jnl) != 0) {
1618 retval = EINVAL;
1619 goto Err_Exit;
1620 }
1621 }
1622
1623 /* lock extents b-tree (also protects volume bitmap) */
1624 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1625 if (retval) {
1626 if (hfsmp->jnl) {
1627 journal_end_transaction(hfsmp->jnl);
1628 }
1629 hfs_global_shared_lock_release(hfsmp);
1630 goto Err_Exit;
1631 }
1632
1633 if (fp->ff_unallocblocks == 0)
1634 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
1635 (FCB*)fp, length, false));
1636
1637 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1638
1639 // XXXdbg
1640 if (hfsmp->jnl) {
1641 tv = time;
1642 VOP_UPDATE(vp, &tv, &tv, 1);
1643
1644 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1645 journal_end_transaction(hfsmp->jnl);
1646 }
1647 hfs_global_shared_lock_release(hfsmp);
1648
1649 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1650 if (retval)
1651 goto Err_Exit;
1652 #if QUOTA
1653 /* These are bytesreleased */
1654 (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
1655 #endif /* QUOTA */
1656 }
1657 /* Only set update flag if the logical length changes */
1658 if (fp->ff_size != length)
1659 cp->c_flag |= C_UPDATE;
1660 fp->ff_size = length;
1661 }
1662 cp->c_flag |= C_CHANGE;
1663 retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1664 if (retval) {
1665 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1666 -1, -1, -1, retval, 0);
1667 }
1668
1669 Err_Exit:
1670
1671 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
1672 (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1673
1674 return (retval);
1675 }
1676
1677
1678 /*
1679 #
1680 #% truncate vp L L L
1681 #
1682 vop_truncate {
1683 IN struct vnode *vp;
1684 IN off_t length;
1685 IN int flags; (IO_SYNC)
1686 IN struct ucred *cred;
1687 IN struct proc *p;
1688 };
1689 * Truncate a cnode to at most length size, freeing (or adding) the
1690 * disk blocks.
1691 */
1692 int hfs_truncate(ap)
1693 struct vop_truncate_args /* {
1694 struct vnode *a_vp;
1695 off_t a_length;
1696 int a_flags;
1697 struct ucred *a_cred;
1698 struct proc *a_p;
1699 } */ *ap;
1700 {
1701 register struct vnode *vp = ap->a_vp;
1702 register struct cnode *cp = VTOC(vp);
1703 struct filefork *fp = VTOF(vp);
1704 off_t length;
1705 off_t filebytes;
1706 u_long fileblocks;
1707 int blksize, error;
1708 u_int64_t nsize;
1709
1710 if (vp->v_type != VREG && vp->v_type != VLNK)
1711 return (EISDIR); /* cannot truncate an HFS directory! */
1712
1713 length = ap->a_length;
1714 blksize = VTOVCB(vp)->blockSize;
1715 fileblocks = fp->ff_blocks;
1716 filebytes = (off_t)fileblocks * (off_t)blksize;
1717
1718 // have to loop truncating or growing files that are
1719 // really big because otherwise transactions can get
1720 // enormous and consume too many kernel resources.
1721 if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) {
1722 while (filebytes > length) {
1723 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
1724 filebytes -= HFS_BIGFILE_SIZE;
1725 } else {
1726 filebytes = length;
1727 }
1728
1729 ap->a_length = filebytes;
1730 error = do_hfs_truncate(ap);
1731 if (error)
1732 break;
1733 }
1734 } else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) {
1735 while (filebytes < length) {
1736 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
1737 filebytes += HFS_BIGFILE_SIZE;
1738 } else {
1739 filebytes = (length - filebytes);
1740 }
1741
1742 ap->a_length = filebytes;
1743 error = do_hfs_truncate(ap);
1744 if (error)
1745 break;
1746 }
1747 } else {
1748 error = do_hfs_truncate(ap);
1749 }
1750
1751 return error;
1752 }
1753
1754
1755
1756 /*
1757 #
1758 #% allocate vp L L L
1759 #
1760 vop_allocate {
1761 IN struct vnode *vp;
1762 IN off_t length;
1763 IN int flags;
1764 OUT off_t *bytesallocated;
1765 IN off_t offset;
1766 IN struct ucred *cred;
1767 IN struct proc *p;
1768 };
1769 * allocate a cnode to at most length size
1770 */
1771 int hfs_allocate(ap)
1772 struct vop_allocate_args /* {
1773 struct vnode *a_vp;
1774 off_t a_length;
1775 u_int32_t a_flags;
1776 off_t *a_bytesallocated;
1777 off_t a_offset;
1778 struct ucred *a_cred;
1779 struct proc *a_p;
1780 } */ *ap;
1781 {
1782 struct vnode *vp = ap->a_vp;
1783 struct cnode *cp = VTOC(vp);
1784 struct filefork *fp = VTOF(vp);
1785 ExtendedVCB *vcb = VTOVCB(vp);
1786 off_t length = ap->a_length;
1787 off_t startingPEOF;
1788 off_t moreBytesRequested;
1789 off_t actualBytesAdded;
1790 off_t filebytes;
1791 u_long fileblocks;
1792 long vflags;
1793 struct timeval tv;
1794 int retval, retval2;
1795 UInt32 blockHint;
1796 UInt32 extendFlags; /* For call to ExtendFileC */
1797 struct hfsmount *hfsmp;
1798
1799 hfsmp = VTOHFS(vp);
1800
1801 *(ap->a_bytesallocated) = 0;
1802 fileblocks = fp->ff_blocks;
1803 filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
1804
1805 if (length < (off_t)0)
1806 return (EINVAL);
1807 if (vp->v_type != VREG)
1808 return (EISDIR);
1809 if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes))
1810 return (EINVAL);
1811
1812 /* Fill in the flags word for the call to Extend the file */
1813
1814 extendFlags = kEFNoClumpMask;
1815 if (ap->a_flags & ALLOCATECONTIG)
1816 extendFlags |= kEFContigMask;
1817 if (ap->a_flags & ALLOCATEALL)
1818 extendFlags |= kEFAllMask;
1819 if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
1820 extendFlags |= kEFReserveMask;
1821
1822 tv = time;
1823 retval = E_NONE;
1824 blockHint = 0;
1825 startingPEOF = filebytes;
1826
1827 if (ap->a_flags & ALLOCATEFROMPEOF)
1828 length += filebytes;
1829 else if (ap->a_flags & ALLOCATEFROMVOL)
1830 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1831
1832 /* If no changes are necesary, then we're done */
1833 if (filebytes == length)
1834 goto Std_Exit;
1835
1836 /*
1837 * Lengthen the size of the file. We must ensure that the
1838 * last byte of the file is allocated. Since the smallest
1839 * value of filebytes is 0, length will be at least 1.
1840 */
1841 if (length > filebytes) {
1842 moreBytesRequested = length - filebytes;
1843
1844 #if QUOTA
1845 retval = hfs_chkdq(cp,
1846 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
1847 ap->a_cred, 0);
1848 if (retval)
1849 return (retval);
1850
1851 #endif /* QUOTA */
1852 /*
1853 * Metadata zone checks.
1854 */
1855 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1856 /*
1857 * Allocate Journal and Quota files in metadata zone.
1858 */
1859 if (hfs_virtualmetafile(cp)) {
1860 extendFlags |= kEFMetadataMask;
1861 blockHint = hfsmp->hfs_metazone_start;
1862 } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
1863 (blockHint <= hfsmp->hfs_metazone_end)) {
1864 /*
1865 * Move blockHint outside metadata zone.
1866 */
1867 blockHint = hfsmp->hfs_metazone_end + 1;
1868 }
1869 }
1870
1871 // XXXdbg
1872 hfs_global_shared_lock_acquire(hfsmp);
1873 if (hfsmp->jnl) {
1874 if (journal_start_transaction(hfsmp->jnl) != 0) {
1875 retval = EINVAL;
1876 goto Err_Exit;
1877 }
1878 }
1879
1880 /* lock extents b-tree (also protects volume bitmap) */
1881 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1882 if (retval) {
1883 if (hfsmp->jnl) {
1884 journal_end_transaction(hfsmp->jnl);
1885 }
1886 hfs_global_shared_lock_release(hfsmp);
1887 goto Err_Exit;
1888 }
1889
1890 retval = MacToVFSError(ExtendFileC(vcb,
1891 (FCB*)fp,
1892 moreBytesRequested,
1893 blockHint,
1894 extendFlags,
1895 &actualBytesAdded));
1896
1897 *(ap->a_bytesallocated) = actualBytesAdded;
1898 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
1899
1900 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1901
1902 // XXXdbg
1903 if (hfsmp->jnl) {
1904 tv = time;
1905 VOP_UPDATE(vp, &tv, &tv, 1);
1906
1907 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1908 journal_end_transaction(hfsmp->jnl);
1909 }
1910 hfs_global_shared_lock_release(hfsmp);
1911
1912 /*
1913 * if we get an error and no changes were made then exit
1914 * otherwise we must do the VOP_UPDATE to reflect the changes
1915 */
1916 if (retval && (startingPEOF == filebytes))
1917 goto Err_Exit;
1918
1919 /*
1920 * Adjust actualBytesAdded to be allocation block aligned, not
1921 * clump size aligned.
1922 * NOTE: So what we are reporting does not affect reality
1923 * until the file is closed, when we truncate the file to allocation
1924 * block size.
1925 */
1926 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
1927 *(ap->a_bytesallocated) =
1928 roundup(moreBytesRequested, (off_t)vcb->blockSize);
1929
1930 } else { /* Shorten the size of the file */
1931
1932 if (fp->ff_size > length) {
1933 /*
1934 * Any buffers that are past the truncation point need to be
1935 * invalidated (to maintain buffer cache consistency). For
1936 * simplicity, we invalidate all the buffers by calling vinvalbuf.
1937 */
1938 vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
1939 (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1940 }
1941
1942 // XXXdbg
1943 hfs_global_shared_lock_acquire(hfsmp);
1944 if (hfsmp->jnl) {
1945 if (journal_start_transaction(hfsmp->jnl) != 0) {
1946 retval = EINVAL;
1947 goto Err_Exit;
1948 }
1949 }
1950
1951 /* lock extents b-tree (also protects volume bitmap) */
1952 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1953 if (retval) {
1954 if (hfsmp->jnl) {
1955 journal_end_transaction(hfsmp->jnl);
1956 }
1957 hfs_global_shared_lock_release(hfsmp);
1958
1959 goto Err_Exit;
1960 }
1961
1962 retval = MacToVFSError(
1963 TruncateFileC(
1964 vcb,
1965 (FCB*)fp,
1966 length,
1967 false));
1968 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1969 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
1970
1971 if (hfsmp->jnl) {
1972 tv = time;
1973 VOP_UPDATE(vp, &tv, &tv, 1);
1974
1975 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1976 journal_end_transaction(hfsmp->jnl);
1977 }
1978 hfs_global_shared_lock_release(hfsmp);
1979
1980
1981 /*
1982 * if we get an error and no changes were made then exit
1983 * otherwise we must do the VOP_UPDATE to reflect the changes
1984 */
1985 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
1986 #if QUOTA
1987 /* These are bytesreleased */
1988 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
1989 #endif /* QUOTA */
1990
1991 if (fp->ff_size > filebytes) {
1992 fp->ff_size = filebytes;
1993
1994 if (UBCISVALID(vp))
1995 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
1996 }
1997 }
1998
1999 Std_Exit:
2000 cp->c_flag |= C_CHANGE | C_UPDATE;
2001 retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
2002
2003 if (retval == 0)
2004 retval = retval2;
2005 Err_Exit:
2006 return (retval);
2007 }
2008
2009
2010 /*
2011 * pagein for HFS filesystem
2012 */
2013 int
2014 hfs_pagein(ap)
2015 struct vop_pagein_args /* {
2016 struct vnode *a_vp,
2017 upl_t a_pl,
2018 vm_offset_t a_pl_offset,
2019 off_t a_f_offset,
2020 size_t a_size,
2021 struct ucred *a_cred,
2022 int a_flags
2023 } */ *ap;
2024 {
2025 register struct vnode *vp = ap->a_vp;
2026 int devBlockSize = 0;
2027 int error;
2028
2029 if (vp->v_type != VREG)
2030 panic("hfs_pagein: vp not UBC type\n");
2031
2032 VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
2033
2034 error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2035 ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
2036 ap->a_flags);
2037 /*
2038 * Keep track blocks read
2039 */
2040 if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2041 struct cnode *cp;
2042
2043 cp = VTOC(vp);
2044 /*
2045 * If this file hasn't been seen since the start of
2046 * the current sampling period then start over.
2047 */
2048 if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
2049 VTOF(vp)->ff_bytesread = ap->a_size;
2050 else
2051 VTOF(vp)->ff_bytesread += ap->a_size;
2052
2053 cp->c_flag |= C_ACCESS;
2054 }
2055
2056 return (error);
2057 }
2058
2059 /*
2060 * pageout for HFS filesystem.
2061 */
2062 int
2063 hfs_pageout(ap)
2064 struct vop_pageout_args /* {
2065 struct vnode *a_vp,
2066 upl_t a_pl,
2067 vm_offset_t a_pl_offset,
2068 off_t a_f_offset,
2069 size_t a_size,
2070 struct ucred *a_cred,
2071 int a_flags
2072 } */ *ap;
2073 {
2074 struct vnode *vp = ap->a_vp;
2075 struct cnode *cp = VTOC(vp);
2076 struct filefork *fp = VTOF(vp);
2077 int retval;
2078 int devBlockSize = 0;
2079 off_t end_of_range;
2080 off_t filesize;
2081
2082 if (UBCINVALID(vp))
2083 panic("hfs_pageout: Not a VREG: vp=%x", vp);
2084
2085 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
2086 filesize = fp->ff_size;
2087 end_of_range = ap->a_f_offset + ap->a_size - 1;
2088
2089 if (cp->c_flag & C_RELOCATING) {
2090 if (end_of_range < (filesize / 2)) {
2091 return (EBUSY);
2092 }
2093 }
2094
2095 if (end_of_range >= filesize)
2096 end_of_range = (off_t)(filesize - 1);
2097 if (ap->a_f_offset < filesize) {
2098 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2099 cp->c_flag |= C_MODIFIED; /* leof is dirty */
2100 }
2101
2102 retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
2103 filesize, devBlockSize, ap->a_flags);
2104
2105 /*
2106 * If we successfully wrote any data, and we are not the superuser
2107 * we clear the setuid and setgid bits as a precaution against
2108 * tampering.
2109 */
2110 if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
2111 cp->c_mode &= ~(S_ISUID | S_ISGID);
2112
2113 return (retval);
2114 }
2115
2116 /*
2117 * Intercept B-Tree node writes to unswap them if necessary.
2118 #
2119 #vop_bwrite {
2120 # IN struct buf *bp;
2121 */
2122 int
2123 hfs_bwrite(ap)
2124 struct vop_bwrite_args /* {
2125 struct buf *a_bp;
2126 } */ *ap;
2127 {
2128 int retval = 0;
2129 register struct buf *bp = ap->a_bp;
2130 register struct vnode *vp = bp->b_vp;
2131 #if BYTE_ORDER == LITTLE_ENDIAN
2132 BlockDescriptor block;
2133
2134 /* Trap B-Tree writes */
2135 if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2136 (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
2137
2138 /* Swap if the B-Tree node is in native byte order */
2139 if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
2140 /* Prepare the block pointer */
2141 block.blockHeader = bp;
2142 block.buffer = bp->b_data;
2143 /* not found in cache ==> came from disk */
2144 block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
2145 block.blockSize = bp->b_bcount;
2146
2147 /* Endian un-swap B-Tree node */
2148 SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2149 }
2150
2151 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2152 }
2153 #endif
2154 /* This buffer shouldn't be locked anymore but if it is clear it */
2155 if (ISSET(bp->b_flags, B_LOCKED)) {
2156 // XXXdbg
2157 if (VTOHFS(vp)->jnl) {
2158 panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2159 }
2160 CLR(bp->b_flags, B_LOCKED);
2161 printf("hfs_bwrite: called with lock bit set\n");
2162 }
2163 retval = vn_bwrite (ap);
2164
2165 return (retval);
2166 }
2167
2168 /*
2169 * Relocate a file to a new location on disk
2170 * cnode must be locked on entry
2171 *
2172 * Relocation occurs by cloning the file's data from its
2173 * current set of blocks to a new set of blocks. During
2174 * the relocation all of the blocks (old and new) are
2175 * owned by the file.
2176 *
2177 * -----------------
2178 * |///////////////|
2179 * -----------------
2180 * 0 N (file offset)
2181 *
2182 * ----------------- -----------------
2183 * |///////////////| | | STEP 1 (aquire new blocks)
2184 * ----------------- -----------------
2185 * 0 N N+1 2N
2186 *
2187 * ----------------- -----------------
2188 * |///////////////| |///////////////| STEP 2 (clone data)
2189 * ----------------- -----------------
2190 * 0 N N+1 2N
2191 *
2192 * -----------------
2193 * |///////////////| STEP 3 (head truncate blocks)
2194 * -----------------
2195 * 0 N
2196 *
2197 * During steps 2 and 3 page-outs to file offsets less
2198 * than or equal to N are suspended.
2199 *
2200 * During step 3 page-ins to the file get supended.
2201 */
2202 __private_extern__
2203 int
2204 hfs_relocate(vp, blockHint, cred, p)
2205 struct vnode *vp;
2206 u_int32_t blockHint;
2207 struct ucred *cred;
2208 struct proc *p;
2209 {
2210 struct filefork *fp;
2211 struct hfsmount *hfsmp;
2212 ExtendedVCB *vcb;
2213
2214 u_int32_t headblks;
2215 u_int32_t datablks;
2216 u_int32_t blksize;
2217 u_int32_t realsize;
2218 u_int32_t growsize;
2219 u_int32_t nextallocsave;
2220 u_int32_t sector_a;
2221 u_int32_t sector_b;
2222 int eflags;
2223 u_int32_t oldstart; /* debug only */
2224 off_t newbytes;
2225 int retval;
2226
2227 if (vp->v_type != VREG && vp->v_type != VLNK) {
2228 return (EPERM);
2229 }
2230
2231 hfsmp = VTOHFS(vp);
2232 if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2233 return (ENOSPC);
2234 }
2235
2236 fp = VTOF(vp);
2237 if (fp->ff_unallocblocks)
2238 return (EINVAL);
2239 vcb = VTOVCB(vp);
2240 blksize = vcb->blockSize;
2241 if (blockHint == 0)
2242 blockHint = vcb->nextAllocation;
2243
2244 if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2245 (vp->v_type == VLNK && fp->ff_size > blksize)) {
2246 return (EFBIG);
2247 }
2248
2249 headblks = fp->ff_blocks;
2250 datablks = howmany(fp->ff_size, blksize);
2251 growsize = datablks * blksize;
2252 realsize = fp->ff_size;
2253 eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2254 if (blockHint >= hfsmp->hfs_metazone_start &&
2255 blockHint <= hfsmp->hfs_metazone_end)
2256 eflags |= kEFMetadataMask;
2257
2258 hfs_global_shared_lock_acquire(hfsmp);
2259 if (hfsmp->jnl) {
2260 if (journal_start_transaction(hfsmp->jnl) != 0) {
2261 return (EINVAL);
2262 }
2263 }
2264
2265 /* Lock extents b-tree (also protects volume bitmap) */
2266 retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
2267 if (retval)
2268 goto out2;
2269
2270 retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2271 if (retval) {
2272 retval = MacToVFSError(retval);
2273 goto out;
2274 }
2275
2276 /*
2277 * STEP 1 - aquire new allocation blocks.
2278 */
2279 nextallocsave = vcb->nextAllocation;
2280 retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2281 if (eflags & kEFMetadataMask)
2282 vcb->nextAllocation = nextallocsave;
2283
2284 retval = MacToVFSError(retval);
2285 if (retval == 0) {
2286 VTOC(vp)->c_flag |= C_MODIFIED;
2287 if (newbytes < growsize) {
2288 retval = ENOSPC;
2289 goto restore;
2290 } else if (fp->ff_blocks < (headblks + datablks)) {
2291 printf("hfs_relocate: allocation failed");
2292 retval = ENOSPC;
2293 goto restore;
2294 }
2295
2296 retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, &sector_b, NULL);
2297 if (retval) {
2298 retval = MacToVFSError(retval);
2299 } else if ((sector_a + 1) == sector_b) {
2300 retval = ENOSPC;
2301 goto restore;
2302 } else if ((eflags & kEFMetadataMask) &&
2303 ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2304 hfsmp->hfs_metazone_end)) {
2305 printf("hfs_relocate: didn't move into metadata zone\n");
2306 retval = ENOSPC;
2307 goto restore;
2308 }
2309 }
2310 if (retval) {
2311 /*
2312 * Check to see if failure is due to excessive fragmentation.
2313 */
2314 if (retval == ENOSPC &&
2315 hfs_freeblks(hfsmp, 0) > (datablks * 2)) {
2316 hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2317 }
2318 goto out;
2319 }
2320
2321 fp->ff_size = fp->ff_blocks * blksize;
2322 if (UBCISVALID(vp))
2323 (void) ubc_setsize(vp, fp->ff_size);
2324
2325 /*
2326 * STEP 2 - clone data into the new allocation blocks.
2327 */
2328
2329 if (vp->v_type == VLNK)
2330 retval = hfs_clonelink(vp, blksize, cred, p);
2331 else if (vp->v_flag & VSYSTEM)
2332 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2333 else
2334 retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p);
2335
2336 if (retval)
2337 goto restore;
2338
2339 oldstart = fp->ff_extents[0].startBlock;
2340
2341 /*
2342 * STEP 3 - switch to clone and remove old blocks.
2343 */
2344 SET(VTOC(vp)->c_flag, C_NOBLKMAP); /* suspend page-ins */
2345
2346 retval = HeadTruncateFile(vcb, (FCB*)fp, headblks);
2347
2348 CLR(VTOC(vp)->c_flag, C_NOBLKMAP); /* resume page-ins */
2349 if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP))
2350 wakeup(VTOC(vp));
2351 if (retval)
2352 goto restore;
2353
2354 fp->ff_size = realsize;
2355 if (UBCISVALID(vp)) {
2356 (void) ubc_setsize(vp, realsize);
2357 (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2358 }
2359
2360 CLR(VTOC(vp)->c_flag, C_RELOCATING); /* Resume page-outs for this file. */
2361 out:
2362 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
2363
2364 retval = VOP_FSYNC(vp, cred, MNT_WAIT, p);
2365 out2:
2366 if (hfsmp->jnl) {
2367 if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID)
2368 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2369 else
2370 (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2371 journal_end_transaction(hfsmp->jnl);
2372 }
2373 hfs_global_shared_lock_release(hfsmp);
2374
2375 return (retval);
2376
2377 restore:
2378 /*
2379 * Give back any newly allocated space.
2380 */
2381 if (fp->ff_size != realsize)
2382 fp->ff_size = realsize;
2383 (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false);
2384 if (UBCISVALID(vp))
2385 (void) ubc_setsize(vp, fp->ff_size);
2386 CLR(VTOC(vp)->c_flag, C_RELOCATING);
2387 goto out;
2388 }
2389
2390
2391 /*
2392 * Clone a symlink.
2393 *
2394 */
2395 static int
2396 hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p)
2397 {
2398 struct buf *head_bp = NULL;
2399 struct buf *tail_bp = NULL;
2400 int error;
2401
2402
2403 error = meta_bread(vp, 0, blksize, cred, &head_bp);
2404 if (error)
2405 goto out;
2406
2407 tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META);
2408 if (tail_bp == NULL) {
2409 error = EIO;
2410 goto out;
2411 }
2412 bcopy(head_bp->b_data, tail_bp->b_data, blksize);
2413 error = bwrite(tail_bp);
2414 out:
2415 if (head_bp) {
2416 head_bp->b_flags |= B_INVAL;
2417 brelse(head_bp);
2418 }
2419 (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2420
2421 return (error);
2422 }
2423
2424 /*
2425 * Clone a file's data within the file.
2426 *
2427 */
2428 static int
2429 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2430 struct ucred *cred, struct proc *p)
2431 {
2432 caddr_t bufp;
2433 size_t writebase;
2434 size_t bufsize;
2435 size_t copysize;
2436 size_t iosize;
2437 size_t filesize;
2438 size_t offset;
2439 struct uio auio;
2440 struct iovec aiov;
2441 int devblocksize;
2442 int didhold;
2443 int error;
2444
2445
2446 if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) {
2447 printf("hfs_clonefile: vinvalbuf failed - %d\n", error);
2448 return (error);
2449 }
2450
2451 if (!ubc_clean(vp, 1)) {
2452 printf("hfs_clonefile: not ubc_clean\n");
2453 return (EIO); /* XXX error code */
2454 }
2455
2456 /*
2457 * Suspend page-outs for this file.
2458 */
2459 SET(VTOC(vp)->c_flag, C_RELOCATING);
2460
2461 filesize = VTOF(vp)->ff_size;
2462 writebase = blkstart * blksize;
2463 copysize = blkcnt * blksize;
2464 iosize = bufsize = MIN(copysize, 4096 * 16);
2465 offset = 0;
2466
2467 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2468 return (ENOMEM);
2469 }
2470
2471 VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize);
2472
2473 auio.uio_iov = &aiov;
2474 auio.uio_iovcnt = 1;
2475 auio.uio_segflg = UIO_SYSSPACE;
2476 auio.uio_procp = p;
2477
2478 while (offset < copysize) {
2479 iosize = MIN(copysize - offset, iosize);
2480
2481 aiov.iov_base = bufp;
2482 aiov.iov_len = iosize;
2483 auio.uio_resid = iosize;
2484 auio.uio_offset = offset;
2485 auio.uio_rw = UIO_READ;
2486
2487 error = cluster_read(vp, &auio, copysize, devblocksize, 0);
2488 if (error) {
2489 printf("hfs_clonefile: cluster_read failed - %d\n", error);
2490 break;
2491 }
2492 if (auio.uio_resid != 0) {
2493 printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid);
2494 error = EIO;
2495 break;
2496 }
2497
2498
2499 aiov.iov_base = bufp;
2500 aiov.iov_len = iosize;
2501 auio.uio_resid = iosize;
2502 auio.uio_offset = writebase + offset;
2503 auio.uio_rw = UIO_WRITE;
2504
2505 error = cluster_write(vp, &auio, filesize + offset,
2506 filesize + offset + iosize,
2507 auio.uio_offset, 0, devblocksize, 0);
2508 if (error) {
2509 printf("hfs_clonefile: cluster_write failed - %d\n", error);
2510 break;
2511 }
2512 if (auio.uio_resid != 0) {
2513 printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2514 error = EIO;
2515 break;
2516 }
2517 offset += iosize;
2518 }
2519 if (error == 0) {
2520 /* Clean the pages in VM. */
2521 didhold = ubc_hold(vp);
2522 if (didhold)
2523 (void) ubc_clean(vp, 1);
2524
2525 /*
2526 * Clean out all associated buffers.
2527 */
2528 (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2529
2530 if (didhold)
2531 ubc_rele(vp);
2532 }
2533 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2534
2535 return (error);
2536 }
2537
2538 /*
2539 * Clone a system (metadata) file.
2540 *
2541 */
2542 static int
2543 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2544 struct ucred *cred, struct proc *p)
2545 {
2546 caddr_t bufp;
2547 char * offset;
2548 size_t bufsize;
2549 size_t iosize;
2550 struct buf *bp = NULL;
2551 daddr_t blkno;
2552 daddr_t blk;
2553 int breadcnt;
2554 int i;
2555 int error = 0;
2556
2557
2558 iosize = GetLogicalBlockSize(vp);
2559 bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2560 breadcnt = bufsize / iosize;
2561
2562 if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2563 return (ENOMEM);
2564 }
2565 blkstart = (blkstart * blksize) / iosize;
2566 blkcnt = (blkcnt * blksize) / iosize;
2567 blkno = 0;
2568
2569 while (blkno < blkcnt) {
2570 /*
2571 * Read up to a megabyte
2572 */
2573 offset = bufp;
2574 for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) {
2575 error = meta_bread(vp, blk, iosize, cred, &bp);
2576 if (error) {
2577 printf("hfs_clonesysfile: meta_bread error %d\n", error);
2578 goto out;
2579 }
2580 if (bp->b_bcount != iosize) {
2581 printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount);
2582 goto out;
2583 }
2584
2585 bcopy(bp->b_data, offset, iosize);
2586 bp->b_flags |= B_INVAL;
2587 brelse(bp);
2588 bp = NULL;
2589 offset += iosize;
2590 }
2591
2592 /*
2593 * Write up to a megabyte
2594 */
2595 offset = bufp;
2596 for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) {
2597 bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META);
2598 if (bp == NULL) {
2599 printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno);
2600 error = EIO;
2601 goto out;
2602 }
2603 bcopy(offset, bp->b_data, iosize);
2604 error = bwrite(bp);
2605 bp = NULL;
2606 if (error)
2607 goto out;
2608 offset += iosize;
2609 }
2610 }
2611 out:
2612 if (bp) {
2613 brelse(bp);
2614 }
2615
2616 kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2617
2618 error = VOP_FSYNC(vp, cred, MNT_WAIT, p);
2619
2620 return (error);
2621 }
2622