bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*      @(#)hfs_readwrite.c     1.0
  26  *
  27  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  28  *
  29  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  30  *
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/systm.h>
  35 #include <sys/resourcevar.h>
  36 #include <sys/kernel.h>
  37 #include <sys/fcntl.h>
  38 #include <sys/filedesc.h>
  39 #include <sys/stat.h>
  40 #include <sys/buf.h>
  41 #include <sys/proc.h>
  42 #include <sys/vnode.h>
  43 #include <sys/uio.h>
  44
  45 #include <miscfs/specfs/specdev.h>
  46
  47 #include <sys/ubc.h>
  48 #include <vm/vm_pageout.h>
  49
  50 #include <sys/kdebug.h>
  51
  52 #include        "hfs.h"
  53 #include        "hfs_endian.h"
  54 #include        "hfs_quota.h"
  55 #include        "hfscommon/headers/FileMgrInternal.h"
  56 #include        "hfscommon/headers/BTreesInternal.h"
  57 #include        "hfs_cnode.h"
  58 #include        "hfs_dbg.h"
  59
  60 extern int overflow_extents(struct filefork *fp);
  61
  62 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  63
  64 enum {
  65         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  66 };
  67
  68 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  69
  70 static int  hfs_clonelink(struct vnode *, int, struct ucred *, struct proc *);
  71 static int  hfs_clonefile(struct vnode *, int, int, int,  struct ucred *, struct proc *);
  72 static int  hfs_clonesysfile(struct vnode *, int, int, int, struct ucred *, struct proc *);
  73
  74
  75 /*****************************************************************************
  76 *
  77 *       Operations on vnodes
  78 *
  79 *****************************************************************************/
  80
  81 /*
  82 #% read         vp      L L L
  83 #
  84  vop_read {
  85      IN struct vnode *vp;
  86      INOUT struct uio *uio;
  87      IN int ioflag;
  88      IN struct ucred *cred;
  89
  90      */
  91
  92 int
  93 hfs_read(ap)
  94         struct vop_read_args /* {
  95                 struct vnode *a_vp;
  96                 struct uio *a_uio;
  97                 int a_ioflag;
  98                 struct ucred *a_cred;
  99         } */ *ap;
 100 {
 101         register struct uio *uio = ap->a_uio;
 102         register struct vnode *vp = ap->a_vp;
 103         struct cnode *cp;
 104         struct filefork *fp;
 105         int devBlockSize = 0;
 106         int retval = 0;
 107         off_t filesize;
 108         off_t filebytes;
 109         off_t start_resid = uio->uio_resid;
 110
 111
 112         /* Preflight checks */
 113         if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
 114                 return (EPERM);         /* can only read regular files */
 115         if (uio->uio_resid == 0)
 116                 return (0);             /* Nothing left to do */
 117         if (uio->uio_offset < 0)
 118                 return (EINVAL);        /* cant read from a negative offset */
 119
 120         cp = VTOC(vp);
 121         fp = VTOF(vp);
 122         filesize = fp->ff_size;
 123         filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
 124         if (uio->uio_offset > filesize) {
 125                 if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
 126                         return (EFBIG);
 127                 else
 128                         return (0);
 129         }
 130
 131         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 132
 133         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 134                 (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
 135
 136         retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
 137
 138         cp->c_flag |= C_ACCESS;
 139
 140         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 141                 (int)uio->uio_offset, uio->uio_resid, (int)filesize,  (int)filebytes, 0);
 142
 143         /*
 144          * Keep track blocks read
 145          */
 146         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 147                 /*
 148                  * If this file hasn't been seen since the start of
 149                  * the current sampling period then start over.
 150                  */
 151                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 152                         fp->ff_bytesread = start_resid - uio->uio_resid;
 153                         cp->c_atime = time.tv_sec;
 154                 } else {
 155                         fp->ff_bytesread += start_resid - uio->uio_resid;
 156                 }
 157         }
 158
 159         return (retval);
 160 }
 161
 162 /*
 163  * Write data to a file or directory.
 164 #% write        vp      L L L
 165 #
 166  vop_write {
 167      IN struct vnode *vp;
 168      INOUT struct uio *uio;
 169      IN int ioflag;
 170      IN struct ucred *cred;
 171
 172      */
 173 int
 174 hfs_write(ap)
 175         struct vop_write_args /* {
 176                 struct vnode *a_vp;
 177                 struct uio *a_uio;
 178                 int a_ioflag;
 179                 struct ucred *a_cred;
 180         } */ *ap;
 181 {
 182         struct vnode *vp = ap->a_vp;
 183         struct uio *uio = ap->a_uio;
 184         struct cnode *cp;
 185         struct filefork *fp;
 186         struct proc *p;
 187         struct timeval tv;
 188         ExtendedVCB *vcb;
 189         int devBlockSize = 0;
 190         off_t origFileSize, writelimit, bytesToAdd;
 191         off_t actualBytesAdded;
 192         u_long resid;
 193         int eflags, ioflag;
 194         int retval;
 195         off_t filebytes;
 196         struct hfsmount *hfsmp;
 197         int started_tr = 0, grabbed_lock = 0;
 198
 199
 200         if (uio->uio_offset < 0)
 201                 return (EINVAL);
 202         if (uio->uio_resid == 0)
 203                 return (E_NONE);
 204         if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
 205                 return (EPERM);         /* Can only write regular files */
 206
 207         ioflag = ap->a_ioflag;
 208         cp = VTOC(vp);
 209         fp = VTOF(vp);
 210         vcb = VTOVCB(vp);
 211         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 212
 213         if (ioflag & IO_APPEND)
 214                 uio->uio_offset = fp->ff_size;
 215         if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
 216                 return (EPERM);
 217
 218         // XXXdbg - don't allow modification of the journal or journal_info_block
 219         if (VTOHFS(vp)->jnl && cp->c_datafork) {
 220                 struct HFSPlusExtentDescriptor *extd;
 221
 222                 extd = &cp->c_datafork->ff_extents[0];
 223                 if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
 224                         return EPERM;
 225                 }
 226         }
 227
 228         writelimit = uio->uio_offset + uio->uio_resid;
 229
 230         /*
 231          * Maybe this should be above the vnode op call, but so long as
 232          * file servers have no limits, I don't think it matters.
 233          */
 234         p = uio->uio_procp;
 235         if (vp->v_type == VREG && p &&
 236             writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 237                 psignal(p, SIGXFSZ);
 238                 return (EFBIG);
 239         }
 240         p = current_proc();
 241
 242         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 243
 244         resid = uio->uio_resid;
 245         origFileSize = fp->ff_size;
 246         eflags = kEFDeferMask;  /* defer file block allocations */
 247         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 248
 249         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 250                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 251         retval = 0;
 252
 253         /* Now test if we need to extend the file */
 254         /* Doing so will adjust the filebytes for us */
 255
 256 #if QUOTA
 257         if(writelimit > filebytes) {
 258                 bytesToAdd = writelimit - filebytes;
 259
 260                 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)),
 261                                    ap->a_cred, 0);
 262                 if (retval)
 263                         return (retval);
 264         }
 265 #endif /* QUOTA */
 266
 267         hfsmp = VTOHFS(vp);
 268
 269 #ifdef HFS_SPARSE_DEV
 270         /*
 271          * When the underlying device is sparse and space
 272          * is low (< 8MB), stop doing delayed allocations
 273          * and begin doing synchronous I/O.
 274          */
 275         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 276             (hfs_freeblks(hfsmp, 0) < 2048)) {
 277                 eflags &= ~kEFDeferMask;
 278                 ioflag |= IO_SYNC;
 279         }
 280 #endif /* HFS_SPARSE_DEV */
 281
 282         if (writelimit > filebytes) {
 283                 hfs_global_shared_lock_acquire(hfsmp);
 284                 grabbed_lock = 1;
 285         }
 286         if (hfsmp->jnl && (writelimit > filebytes)) {
 287                 if (journal_start_transaction(hfsmp->jnl) != 0) {
 288                         hfs_global_shared_lock_release(hfsmp);
 289                         return EINVAL;
 290                 }
 291                 started_tr = 1;
 292         }
 293
 294         while (writelimit > filebytes) {
 295                 bytesToAdd = writelimit - filebytes;
 296                 if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
 297                         eflags |= kEFReserveMask;
 298
 299                 /* lock extents b-tree (also protects volume bitmap) */
 300                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
 301                 if (retval != E_NONE)
 302                         break;
 303
 304                 /* Files that are changing size are not hot file candidates. */
 305                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 306                         fp->ff_bytesread = 0;
 307                 }
 308                 retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
 309                                 0, eflags, &actualBytesAdded));
 310
 311                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
 312                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 313                         retval = ENOSPC;
 314                 if (retval != E_NONE)
 315                         break;
 316                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 317                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 318                         (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size,  (int)filebytes, 0);
 319         }
 320
 321         // XXXdbg
 322         if (started_tr) {
 323                 tv = time;
 324                 VOP_UPDATE(vp, &tv, &tv, 1);
 325
 326                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
 327                 journal_end_transaction(hfsmp->jnl);
 328                 started_tr = 0;
 329         }
 330         if (grabbed_lock) {
 331                 hfs_global_shared_lock_release(hfsmp);
 332                 grabbed_lock = 0;
 333         }
 334
 335         if (retval == E_NONE) {
 336                 off_t filesize;
 337                 off_t zero_off;
 338                 off_t tail_off;
 339                 off_t inval_start;
 340                 off_t inval_end;
 341                 off_t io_start, io_end;
 342                 int lflag;
 343                 struct rl_entry *invalid_range;
 344
 345                 if (writelimit > fp->ff_size)
 346                         filesize = writelimit;
 347                 else
 348                         filesize = fp->ff_size;
 349
 350                 lflag = (ioflag & IO_SYNC);
 351
 352                 if (uio->uio_offset <= fp->ff_size) {
 353                         zero_off = uio->uio_offset & ~PAGE_MASK_64;
 354
 355                         /* Check to see whether the area between the zero_offset and the start
 356                            of the transfer to see whether is invalid and should be zero-filled
 357                            as part of the transfer:
 358                          */
 359                         if (uio->uio_offset > zero_off) {
 360                                 if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
 361                                         lflag |= IO_HEADZEROFILL;
 362                         }
 363                 } else {
 364                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 365
 366                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 367                            read without being zeroed.  The current last block is filled with zeroes
 368                            if it holds valid data but in all cases merely do a little bookkeeping
 369                            to track the area from the end of the current last page to the start of
 370                            the area actually written.  For the same reason only the bytes up to the
 371                            start of the page where this write will start is invalidated; any remainder
 372                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 373
 374                            Note that inval_start, the start of the page after the current EOF,
 375                            may be past the start of the write, in which case the zeroing
 376                            will be handled by the cluser_write of the actual data.
 377                          */
 378                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 379                         inval_end = uio->uio_offset & ~PAGE_MASK_64;
 380                         zero_off = fp->ff_size;
 381
 382                         if ((fp->ff_size & PAGE_MASK_64) &&
 383                                 (rl_scan(&fp->ff_invalidranges,
 384                                                         eof_page_base,
 385                                                         fp->ff_size - 1,
 386                                                         &invalid_range) != RL_NOOVERLAP)) {
 387                                 /* The page containing the EOF is not valid, so the
 388                                    entire page must be made inaccessible now.  If the write
 389                                    starts on a page beyond the page containing the eof
 390                                    (inval_end > eof_page_base), add the
 391                                    whole page to the range to be invalidated.  Otherwise
 392                                    (i.e. if the write starts on the same page), zero-fill
 393                                    the entire page explicitly now:
 394                                  */
 395                                 if (inval_end > eof_page_base) {
 396                                         inval_start = eof_page_base;
 397                                 } else {
 398                                         zero_off = eof_page_base;
 399                                 };
 400                         };
 401
 402                         if (inval_start < inval_end) {
 403                                 /* There's some range of data that's going to be marked invalid */
 404
 405                                 if (zero_off < inval_start) {
 406                                         /* The pages between inval_start and inval_end are going to be invalidated,
 407                                            and the actual write will start on a page past inval_end.  Now's the last
 408                                            chance to zero-fill the page containing the EOF:
 409                                          */
 410                                         retval = cluster_write(vp, (struct uio *) 0,
 411                                                         fp->ff_size, inval_start,
 412                                                         zero_off, (off_t)0, devBlockSize,
 413                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 414                                         if (retval) goto ioerr_exit;
 415                                 };
 416
 417                                 /* Mark the remaining area of the newly allocated space as invalid: */
 418                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 419                                 cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
 420                                 zero_off = fp->ff_size = inval_end;
 421                         };
 422
 423                         if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
 424                 };
 425
 426                 /* Check to see whether the area between the end of the write and the end of
 427                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 428                  */
 429                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 430                 if (tail_off > filesize) tail_off = filesize;
 431                 if (tail_off > writelimit) {
 432                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 433                                 lflag |= IO_TAILZEROFILL;
 434                         };
 435                 };
 436
 437                 /*
 438                  * if the write starts beyond the current EOF (possibly advanced in the
 439                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 440                  * to where the write begins:
 441                  *
 442                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 443                  *       before the current EOF it might be marked as invalid now and must be
 444                  *       made readable (removed from the invalid ranges) before cluster_write
 445                  *       tries to write it:
 446                  */
 447                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
 448                 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 449                 if (io_start < fp->ff_size) {
 450                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 451                 };
 452                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 453                                 tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
 454
 455                 if (uio->uio_offset > fp->ff_size) {
 456                         fp->ff_size = uio->uio_offset;
 457
 458                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 459                 }
 460                 if (resid > uio->uio_resid)
 461                         cp->c_flag |= C_CHANGE | C_UPDATE;
 462         }
 463
 464         HFS_KNOTE(vp, NOTE_WRITE);
 465
 466 ioerr_exit:
 467         /*
 468          * If we successfully wrote any data, and we are not the superuser
 469          * we clear the setuid and setgid bits as a precaution against
 470          * tampering.
 471          */
 472         if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 473                 cp->c_mode &= ~(S_ISUID | S_ISGID);
 474
 475         if (retval) {
 476                 if (ioflag & IO_UNIT) {
 477                         (void)VOP_TRUNCATE(vp, origFileSize,
 478                                 ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
 479                         uio->uio_offset -= resid - uio->uio_resid;
 480                         uio->uio_resid = resid;
 481                         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 482                 }
 483         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 484                 tv = time;
 485                 retval = VOP_UPDATE(vp, &tv, &tv, 1);
 486         }
 487         vcb->vcbWrCnt++;
 488
 489         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 490                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 491
 492         return (retval);
 493 }
 494
 495
 496 #ifdef HFS_SPARSE_DEV
 497 struct hfs_backingstoreinfo {
 498         int  signature;   /* == 3419115 */
 499         int  version;     /* version of this struct (1) */
 500         int  backingfd;   /* disk image file (on backing fs) */
 501         int  bandsize;    /* sparse disk image band size */
 502 };
 503
 504 #define HFSIOC_SETBACKINGSTOREINFO   _IOW('h', 7, struct hfs_backingstoreinfo)
 505 #define HFSIOC_CLRBACKINGSTOREINFO   _IO('h', 8)
 506
 507 #define HFS_SETBACKINGSTOREINFO  IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO)
 508 #define HFS_CLRBACKINGSTOREINFO  IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO)
 509
 510 #endif /* HFS_SPARSE_DEV */
 511
 512 /*
 513
 514 #% ioctl        vp      U U U
 515 #
 516  vop_ioctl {
 517      IN struct vnode *vp;
 518      IN u_long command;
 519      IN caddr_t data;
 520      IN int fflag;
 521      IN struct ucred *cred;
 522      IN struct proc *p;
 523
 524      */
 525
 526
 527 /* ARGSUSED */
 528 int
 529 hfs_ioctl(ap)
 530         struct vop_ioctl_args /* {
 531                 struct vnode *a_vp;
 532                 int  a_command;
 533                 caddr_t  a_data;
 534                 int  a_fflag;
 535                 struct ucred *a_cred;
 536                 struct proc *a_p;
 537         } */ *ap;
 538 {
 539         switch (ap->a_command) {
 540
 541 #ifdef HFS_SPARSE_DEV
 542         case HFS_SETBACKINGSTOREINFO: {
 543                 struct hfsmount * hfsmp;
 544                 struct vnode * bsfs_rootvp;
 545                 struct vnode * di_vp;
 546                 struct file * di_fp;
 547                 struct hfs_backingstoreinfo *bsdata;
 548                 int error = 0;
 549
 550                 hfsmp = VTOHFS(ap->a_vp);
 551                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 552                         return (EALREADY);
 553                 }
 554                 if (ap->a_p->p_ucred->cr_uid != 0 &&
 555                         ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
 556                         return (EACCES); /* must be owner of file system */
 557                 }
 558                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 559                 if (bsdata == NULL) {
 560                         return (EINVAL);
 561                 }
 562                 if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) {
 563                         return (error);
 564                 }
 565                 if (fref(di_fp) == -1) {
 566                         return (EBADF);
 567                 }
 568                 if (di_fp->f_type != DTYPE_VNODE) {
 569                         frele(di_fp);
 570                         return (EINVAL);
 571                 }
 572                 di_vp = (struct vnode *)di_fp->f_data;
 573                 if (ap->a_vp->v_mount == di_vp->v_mount) {
 574                         frele(di_fp);
 575                         return (EINVAL);
 576                 }
 577
 578                 /*
 579                  * Obtain the backing fs root vnode and keep a reference
 580                  * on it.  This reference will be dropped in hfs_unmount.
 581                  */
 582                 error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp);
 583                 if (error) {
 584                         frele(di_fp);
 585                         return (error);
 586                 }
 587                 VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p);  /* Hold on to the reference */
 588
 589                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 590                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 591                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 592                 hfsmp->hfs_sparsebandblks *= 4;
 593
 594                 frele(di_fp);
 595                 return (0);
 596         }
 597         case HFS_CLRBACKINGSTOREINFO: {
 598                 struct hfsmount * hfsmp;
 599                 struct vnode * tmpvp;
 600
 601                 hfsmp = VTOHFS(ap->a_vp);
 602                 if (ap->a_p->p_ucred->cr_uid != 0 &&
 603                         ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
 604                         return (EACCES); /* must be owner of file system */
 605                 }
 606                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 607                     hfsmp->hfs_backingfs_rootvp) {
 608
 609                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 610                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 611                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 612                         hfsmp->hfs_sparsebandblks = 0;
 613                         vrele(tmpvp);
 614                 }
 615                 return (0);
 616         }
 617 #endif /* HFS_SPARSE_DEV */
 618
 619         case 6: {
 620                 int error;
 621
 622                 ap->a_vp->v_flag |= VFULLFSYNC;
 623                 error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p);
 624                 ap->a_vp->v_flag &= ~VFULLFSYNC;
 625
 626                 return error;
 627         }
 628         case 5: {
 629                 register struct vnode *vp;
 630                 register struct cnode *cp;
 631                 struct filefork *fp;
 632                 int error;
 633
 634                 vp = ap->a_vp;
 635                 cp = VTOC(vp);
 636                 fp = VTOF(vp);
 637
 638                 if (vp->v_type != VREG)
 639                         return EINVAL;
 640
 641                 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
 642                 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 643                 if (error)
 644                         return (error);
 645
 646                 /*
 647                  * used by regression test to determine if
 648                  * all the dirty pages (via write) have been cleaned
 649                  * after a call to 'fsysnc'.
 650                  */
 651                 error = is_file_clean(vp, fp->ff_size);
 652                 VOP_UNLOCK(vp, 0, ap->a_p);
 653
 654                 return (error);
 655         }
 656
 657         case 1: {
 658                 register struct vnode *vp;
 659                 register struct radvisory *ra;
 660                 register struct cnode *cp;
 661                 struct filefork *fp;
 662                 int devBlockSize = 0;
 663                 int error;
 664
 665                 vp = ap->a_vp;
 666
 667                 if (vp->v_type != VREG)
 668                         return EINVAL;
 669
 670                 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
 671                 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 672                 if (error)
 673                         return (error);
 674
 675                 ra = (struct radvisory *)(ap->a_data);
 676                 cp = VTOC(vp);
 677                 fp = VTOF(vp);
 678
 679                 if (ra->ra_offset >= fp->ff_size) {
 680                         VOP_UNLOCK(vp, 0, ap->a_p);
 681                         return (EFBIG);
 682                 }
 683                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 684
 685                 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
 686                 VOP_UNLOCK(vp, 0, ap->a_p);
 687
 688                 return (error);
 689         }
 690
 691         case 2: /* F_READBOOTBLOCKS */
 692         case 3: /* F_WRITEBOOTBLOCKS */
 693             {
 694             struct vnode *vp = ap->a_vp;
 695             struct vnode *devvp = NULL;
 696             struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
 697             int devBlockSize;
 698             int error;
 699             struct iovec aiov;
 700             struct uio auio;
 701             u_long blockNumber;
 702             u_long blockOffset;
 703             u_long xfersize;
 704             struct buf *bp;
 705
 706             if ((vp->v_flag & VROOT) == 0) return EINVAL;
 707             if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
 708
 709             devvp = VTOHFS(vp)->hfs_devvp;
 710             aiov.iov_base = btd->fbt_buffer;
 711             aiov.iov_len = btd->fbt_length;
 712
 713             auio.uio_iov = &aiov;
 714             auio.uio_iovcnt = 1;
 715             auio.uio_offset = btd->fbt_offset;
 716             auio.uio_resid = btd->fbt_length;
 717             auio.uio_segflg = UIO_USERSPACE;
 718             auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
 719             auio.uio_procp = ap->a_p;
 720
 721             VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
 722
 723             while (auio.uio_resid > 0) {
 724               blockNumber = auio.uio_offset / devBlockSize;
 725               error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
 726               if (error) {
 727                   if (bp) brelse(bp);
 728                   return error;
 729                 };
 730
 731                 blockOffset = auio.uio_offset % devBlockSize;
 732               xfersize = devBlockSize - blockOffset;
 733               error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
 734                 if (error) {
 735                   brelse(bp);
 736                   return error;
 737                 };
 738                 if (auio.uio_rw == UIO_WRITE) {
 739                   error = VOP_BWRITE(bp);
 740                   if (error) return error;
 741                 } else {
 742                   brelse(bp);
 743                 };
 744             };
 745         };
 746         return 0;
 747
 748         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
 749             {
 750             *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
 751             return 0;
 752             }
 753
 754         default:
 755             return (ENOTTY);
 756     }
 757
 758     /* Should never get here */
 759         return 0;
 760 }
 761
 762 /* ARGSUSED */
 763 int
 764 hfs_select(ap)
 765         struct vop_select_args /* {
 766                 struct vnode *a_vp;
 767                 int  a_which;
 768                 int  a_fflags;
 769                 struct ucred *a_cred;
 770                 void *a_wql;
 771                 struct proc *a_p;
 772         } */ *ap;
 773 {
 774         /*
 775          * We should really check to see if I/O is possible.
 776          */
 777         return (1);
 778 }
 779
 780 /*
 781  * Bmap converts a the logical block number of a file to its physical block
 782  * number on the disk.
 783  */
 784
 785 /*
 786  * vp  - address of vnode file the file
 787  * bn  - which logical block to convert to a physical block number.
 788  * vpp - returns the vnode for the block special file holding the filesystem
 789  *       containing the file of interest
 790  * bnp - address of where to return the filesystem physical block number
 791 #% bmap         vp      L L L
 792 #% bmap         vpp     - U -
 793 #
 794  vop_bmap {
 795      IN struct vnode *vp;
 796      IN daddr_t bn;
 797      OUT struct vnode **vpp;
 798      IN daddr_t *bnp;
 799      OUT int *runp;
 800      */
 801 /*
 802  * Converts a logical block number to a physical block, and optionally returns
 803  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
 804  * The physical block number is based on the device block size, currently its 512.
 805  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
 806  */
 807
 808 int
 809 hfs_bmap(ap)
 810         struct vop_bmap_args /* {
 811                 struct vnode *a_vp;
 812                 daddr_t a_bn;
 813                 struct vnode **a_vpp;
 814                 daddr_t *a_bnp;
 815                 int *a_runp;
 816         } */ *ap;
 817 {
 818         struct vnode *vp = ap->a_vp;
 819         struct cnode *cp = VTOC(vp);
 820         struct filefork *fp = VTOF(vp);
 821         struct hfsmount *hfsmp = VTOHFS(vp);
 822    int                                  retval = E_NONE;
 823     daddr_t                             logBlockSize;
 824     size_t                              bytesContAvail = 0;
 825     off_t blockposition;
 826     struct proc                 *p = NULL;
 827     int                                 lockExtBtree;
 828     struct rl_entry *invalid_range;
 829     enum rl_overlaptype overlaptype;
 830
 831         /*
 832          * Check for underlying vnode requests and ensure that logical
 833          * to physical mapping is requested.
 834          */
 835         if (ap->a_vpp != NULL)
 836                 *ap->a_vpp = cp->c_devvp;
 837         if (ap->a_bnp == NULL)
 838                 return (0);
 839
 840         /* Only clustered I/O should have delayed allocations. */
 841         DBG_ASSERT(fp->ff_unallocblocks == 0);
 842
 843         logBlockSize = GetLogicalBlockSize(vp);
 844         blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
 845
 846         lockExtBtree = overflow_extents(fp);
 847         if (lockExtBtree) {
 848                 p = current_proc();
 849                 retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
 850                                 LK_EXCLUSIVE | LK_CANRECURSE, p);
 851                 if (retval)
 852                         return (retval);
 853         }
 854
 855         retval = MacToVFSError(
 856                             MapFileBlockC (HFSTOVCB(hfsmp),
 857                                             (FCB*)fp,
 858                                             MAXPHYSIO,
 859                                             blockposition,
 860                                             ap->a_bnp,
 861                                             &bytesContAvail));
 862
 863     if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
 864
 865     if (retval == E_NONE) {
 866         /* Adjust the mapping information for invalid file ranges: */
 867         overlaptype = rl_scan(&fp->ff_invalidranges,
 868                             blockposition,
 869                             blockposition + MAXPHYSIO - 1,
 870                             &invalid_range);
 871         if (overlaptype != RL_NOOVERLAP) {
 872             switch(overlaptype) {
 873                 case RL_MATCHINGOVERLAP:
 874                 case RL_OVERLAPCONTAINSRANGE:
 875                 case RL_OVERLAPSTARTSBEFORE:
 876                     /* There's no valid block for this byte offset: */
 877                     *ap->a_bnp = (daddr_t)-1;
 878                     bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 879                     break;
 880
 881                 case RL_OVERLAPISCONTAINED:
 882                 case RL_OVERLAPENDSAFTER:
 883                     /* The range of interest hits an invalid block before the end: */
 884                     if (invalid_range->rl_start == blockposition) {
 885                         /* There's actually no valid information to be had starting here: */
 886                         *ap->a_bnp = (daddr_t)-1;
 887                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
 888                                                         (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
 889                                 bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 890                         };
 891                     } else {
 892                         bytesContAvail = invalid_range->rl_start - blockposition;
 893                     };
 894                     break;
 895             };
 896                         if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
 897         };
 898
 899         /* Figure out how many read ahead blocks there are */
 900         if (ap->a_runp != NULL) {
 901             if (can_cluster(logBlockSize)) {
 902                 /* Make sure this result never goes negative: */
 903                 *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
 904             } else {
 905                 *ap->a_runp = 0;
 906             };
 907         };
 908     };
 909
 910     return (retval);
 911 }
 912
 913 /* blktooff converts logical block number to file offset */
 914
 915 int
 916 hfs_blktooff(ap)
 917         struct vop_blktooff_args /* {
 918                 struct vnode *a_vp;
 919                 daddr_t a_lblkno;
 920                 off_t *a_offset;
 921         } */ *ap;
 922 {
 923         if (ap->a_vp == NULL)
 924                 return (EINVAL);
 925         *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
 926
 927         return(0);
 928 }
 929
 930 int
 931 hfs_offtoblk(ap)
 932         struct vop_offtoblk_args /* {
 933                 struct vnode *a_vp;
 934                 off_t a_offset;
 935                 daddr_t *a_lblkno;
 936         } */ *ap;
 937 {
 938         if (ap->a_vp == NULL)
 939                 return (EINVAL);
 940         *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
 941
 942         return(0);
 943 }
 944
 945 int
 946 hfs_cmap(ap)
 947         struct vop_cmap_args /* {
 948                 struct vnode *a_vp;
 949                 off_t a_foffset;
 950                 size_t a_size;
 951                 daddr_t *a_bpn;
 952                 size_t *a_run;
 953                 void *a_poff;
 954         } */ *ap;
 955 {
 956     struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
 957     struct filefork *fp = VTOF(ap->a_vp);
 958     size_t                              bytesContAvail = 0;
 959     int                 retval = E_NONE;
 960     int lockExtBtree = 0;
 961     struct proc         *p = NULL;
 962     struct rl_entry *invalid_range;
 963     enum rl_overlaptype overlaptype;
 964     int started_tr = 0, grabbed_lock = 0;
 965         struct timeval tv;
 966
 967         /*
 968          * Check for underlying vnode requests and ensure that logical
 969          * to physical mapping is requested.
 970          */
 971         if (ap->a_bpn == NULL)
 972                 return (0);
 973
 974         p = current_proc();
 975
 976         if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) {
 977                 /*
 978                  * File blocks are getting remapped. Wait until its finished.
 979                  */
 980                 SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP);
 981                 (void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0);
 982                 if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP))
 983                         panic("hfs_cmap: no mappable blocks");
 984         }
 985
 986   retry:
 987         if (fp->ff_unallocblocks) {
 988                 lockExtBtree = 1;
 989
 990                 // XXXdbg
 991                 hfs_global_shared_lock_acquire(hfsmp);
 992                 grabbed_lock = 1;
 993
 994                 if (hfsmp->jnl) {
 995                         if (journal_start_transaction(hfsmp->jnl) != 0) {
 996                                 hfs_global_shared_lock_release(hfsmp);
 997                                 return EINVAL;
 998                         } else {
 999                                 started_tr = 1;
1000                         }
1001                 }
1002
1003                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1004                         if (started_tr) {
1005                                 journal_end_transaction(hfsmp->jnl);
1006                         }
1007                         if (grabbed_lock) {
1008                                 hfs_global_shared_lock_release(hfsmp);
1009                         }
1010                         return (retval);
1011                 }
1012         } else if (overflow_extents(fp)) {
1013                 lockExtBtree = 1;
1014                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1015                         return retval;
1016                 }
1017         }
1018
1019         /*
1020          * Check for any delayed allocations.
1021          */
1022         if (fp->ff_unallocblocks) {
1023                 SInt64 reqbytes, actbytes;
1024
1025                 //
1026                 // Make sure we have a transaction.  It's possible
1027                 // that we came in and fp->ff_unallocblocks was zero
1028                 // but during the time we blocked acquiring the extents
1029                 // btree, ff_unallocblocks became non-zero and so we
1030                 // will need to start a transaction.
1031                 //
1032                 if (hfsmp->jnl && started_tr == 0) {
1033                     if (lockExtBtree) {
1034                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1035                         lockExtBtree = 0;
1036                     }
1037
1038                     goto retry;
1039                 }
1040
1041                 reqbytes = (SInt64)fp->ff_unallocblocks *
1042                              (SInt64)HFSTOVCB(hfsmp)->blockSize;
1043                 /*
1044                  * Release the blocks on loan and aquire some real ones.
1045                  * Note that we can race someone else for these blocks
1046                  * (and lose) so cmap needs to handle a failure here.
1047                  * Currently this race can't occur because all allocations
1048                  * are protected by an exclusive lock on the  Extents
1049                  * Overflow file.
1050                  */
1051                 HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
1052                 FTOC(fp)->c_blocks            -= fp->ff_unallocblocks;
1053                 fp->ff_blocks                 -= fp->ff_unallocblocks;
1054                 fp->ff_unallocblocks           = 0;
1055
1056                 /* Files that are changing size are not hot file candidates. */
1057                 if (hfsmp->hfc_stage == HFC_RECORDING) {
1058                         fp->ff_bytesread = 0;
1059                 }
1060                 while (retval == 0 && reqbytes > 0) {
1061                         retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
1062                                         (FCB*)fp, reqbytes, 0,
1063                                         kEFAllMask | kEFNoClumpMask, &actbytes));
1064                         if (retval == 0 && actbytes == 0)
1065                                 retval = ENOSPC;
1066
1067                         if (retval) {
1068                                 fp->ff_unallocblocks =
1069                                         reqbytes / HFSTOVCB(hfsmp)->blockSize;
1070                                 HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
1071                                 FTOC(fp)->c_blocks            += fp->ff_unallocblocks;
1072                                 fp->ff_blocks                 += fp->ff_unallocblocks;
1073                         }
1074                         reqbytes -= actbytes;
1075                 }
1076
1077                 if (retval) {
1078                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1079                         VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
1080                         if (started_tr) {
1081                                 tv = time;
1082                                 VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
1083
1084                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1085                                 journal_end_transaction(hfsmp->jnl);
1086                         }
1087                         if (grabbed_lock) {
1088                                 hfs_global_shared_lock_release(hfsmp);
1089                         }
1090                         return (retval);
1091                 }
1092         }
1093
1094         retval = MacToVFSError(
1095                            MapFileBlockC (HFSTOVCB(hfsmp),
1096                                           (FCB *)fp,
1097                                           ap->a_size,
1098                                           ap->a_foffset,
1099                                           ap->a_bpn,
1100                                           &bytesContAvail));
1101
1102         if (lockExtBtree)
1103                 (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1104
1105         // XXXdbg
1106         if (started_tr) {
1107                 tv = time;
1108                 retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
1109
1110                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1111                 journal_end_transaction(hfsmp->jnl);
1112                 started_tr = 0;
1113         }
1114         if (grabbed_lock) {
1115                 hfs_global_shared_lock_release(hfsmp);
1116                 grabbed_lock = 0;
1117         }
1118
1119     if (retval == E_NONE) {
1120         /* Adjust the mapping information for invalid file ranges: */
1121         overlaptype = rl_scan(&fp->ff_invalidranges,
1122                             ap->a_foffset,
1123                             ap->a_foffset + (off_t)bytesContAvail - 1,
1124                             &invalid_range);
1125         if (overlaptype != RL_NOOVERLAP) {
1126             switch(overlaptype) {
1127                 case RL_MATCHINGOVERLAP:
1128                 case RL_OVERLAPCONTAINSRANGE:
1129                 case RL_OVERLAPSTARTSBEFORE:
1130                     /* There's no valid block for this byte offset: */
1131                     *ap->a_bpn = (daddr_t)-1;
1132
1133                     /* There's no point limiting the amount to be returned if the
1134                        invalid range that was hit extends all the way to the EOF
1135                        (i.e. there's no valid bytes between the end of this range
1136                        and the file's EOF):
1137                      */
1138                     if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1139                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1140                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1141                     };
1142                     break;
1143
1144                 case RL_OVERLAPISCONTAINED:
1145                 case RL_OVERLAPENDSAFTER:
1146                     /* The range of interest hits an invalid block before the end: */
1147                     if (invalid_range->rl_start == ap->a_foffset) {
1148                         /* There's actually no valid information to be had starting here: */
1149                         *ap->a_bpn = (daddr_t)-1;
1150                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1151                                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1152                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1153                         };
1154                     } else {
1155                         bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1156                     };
1157                     break;
1158             };
1159             if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
1160         };
1161
1162         if (ap->a_run) *ap->a_run = bytesContAvail;
1163     };
1164
1165         if (ap->a_poff)
1166                 *(int *)ap->a_poff = 0;
1167
1168         return (retval);
1169 }
1170
1171
1172 /*
1173  * Read or write a buffer that is not contiguous on disk.  We loop over
1174  * each device block, copying to or from caller's buffer.
1175  *
1176  * We could be a bit more efficient by transferring as much data as is
1177  * contiguous.  But since this routine should rarely be called, and that
1178  * would be more complicated; best to keep it simple.
1179  */
1180 static int
1181 hfs_strategy_fragmented(struct buf *bp)
1182 {
1183         register struct vnode *vp = bp->b_vp;
1184         register struct cnode *cp = VTOC(vp);
1185         register struct vnode *devvp = cp->c_devvp;
1186         caddr_t ioaddr;         /* Address of fragment within bp  */
1187         struct buf *frag = NULL; /* For reading or writing a single block */
1188         int retval = 0;
1189         long remaining;         /* Bytes (in bp) left to transfer */
1190         off_t offset;           /* Logical offset of current fragment in vp */
1191         u_long block_size;      /* Size of one device block (and one I/O) */
1192
1193         /* Make sure we redo this mapping for the next I/O */
1194         bp->b_blkno = bp->b_lblkno;
1195
1196         /* Set up the logical position and number of bytes to read/write */
1197         offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
1198         block_size = VTOHFS(vp)->hfs_phys_block_size;
1199
1200         /* Get an empty buffer to do the deblocking */
1201         frag = geteblk(block_size);
1202         if (ISSET(bp->b_flags, B_READ))
1203                 SET(frag->b_flags, B_READ);
1204
1205         for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
1206             ioaddr += block_size, offset += block_size,
1207             remaining -= block_size) {
1208                 frag->b_resid = frag->b_bcount;
1209                 CLR(frag->b_flags, B_DONE);
1210
1211                 /* Map the current position to a physical block number */
1212                 retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
1213                     NULL, NULL);
1214                 if (retval != 0)
1215                         break;
1216
1217                 /*
1218                  * Did we try to read a hole?
1219                  * (Should never happen for metadata!)
1220                  */
1221                 if ((long)frag->b_lblkno == -1) {
1222                         bzero(ioaddr, block_size);
1223                         continue;
1224                 }
1225
1226                 /* If writing, copy before I/O */
1227                 if (!ISSET(bp->b_flags, B_READ))
1228                         bcopy(ioaddr, frag->b_data, block_size);
1229
1230                 /* Call the device to do the I/O and wait for it */
1231                 frag->b_blkno = frag->b_lblkno;
1232                 frag->b_vp = devvp;  /* Used to dispatch via VOP_STRATEGY */
1233                 frag->b_dev = devvp->v_rdev;
1234                 retval = VOP_STRATEGY(frag);
1235                 frag->b_vp = NULL;
1236                 if (retval != 0)
1237                         break;
1238                 retval = biowait(frag);
1239                 if (retval != 0)
1240                         break;
1241
1242                 /* If reading, copy after the I/O */
1243                 if (ISSET(bp->b_flags, B_READ))
1244                         bcopy(frag->b_data, ioaddr, block_size);
1245         }
1246
1247         frag->b_vp = NULL;
1248         //
1249         // XXXdbg - in the case that this is a meta-data block, it won't affect
1250         //          the journal because this bp is for a physical disk block,
1251         //          not a logical block that is part of the catalog or extents
1252         //          files.
1253         SET(frag->b_flags, B_INVAL);
1254         brelse(frag);
1255
1256         if ((bp->b_error = retval) != 0)
1257                 SET(bp->b_flags, B_ERROR);
1258
1259         biodone(bp);    /* This I/O is now complete */
1260         return retval;
1261 }
1262
1263
1264 /*
1265  * Calculate the logical to physical mapping if not done already,
1266  * then call the device strategy routine.
1267 #
1268 #vop_strategy {
1269 #       IN struct buf *bp;
1270     */
1271 int
1272 hfs_strategy(ap)
1273         struct vop_strategy_args /* {
1274                 struct buf *a_bp;
1275         } */ *ap;
1276 {
1277         register struct buf *bp = ap->a_bp;
1278         register struct vnode *vp = bp->b_vp;
1279         register struct cnode *cp = VTOC(vp);
1280         int retval = 0;
1281         off_t offset;
1282         size_t bytes_contig;
1283
1284         if ( !(bp->b_flags & B_VECTORLIST)) {
1285                 if (vp->v_type == VBLK || vp->v_type == VCHR)
1286                         panic("hfs_strategy: device vnode passed!");
1287
1288                 if (bp->b_flags & B_PAGELIST) {
1289                         /*
1290                          * If we have a page list associated with this bp,
1291                          * then go through cluster_bp since it knows how to
1292                          * deal with a page request that might span non-
1293                          * contiguous physical blocks on the disk...
1294                          */
1295                         retval = cluster_bp(bp);
1296                         vp = cp->c_devvp;
1297                         bp->b_dev = vp->v_rdev;
1298
1299                         return (retval);
1300                 }
1301
1302                 /*
1303                  * If we don't already know the filesystem relative block
1304                  * number then get it using VOP_BMAP().  If VOP_BMAP()
1305                  * returns the block number as -1 then we've got a hole in
1306                  * the file.  Although HFS filesystems don't create files with
1307                  * holes, invalidating of subranges of the file (lazy zero
1308                  * filling) may create such a situation.
1309                  */
1310                 if (bp->b_blkno == bp->b_lblkno) {
1311                         offset = (off_t) bp->b_lblkno *
1312                             (off_t) GetLogicalBlockSize(vp);
1313
1314                         if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
1315                             &bp->b_blkno, &bytes_contig, NULL))) {
1316                                 bp->b_error = retval;
1317                                 bp->b_flags |= B_ERROR;
1318                                 biodone(bp);
1319                                 return (retval);
1320                         }
1321                         if (bytes_contig < bp->b_bcount)
1322                         {
1323                                 /*
1324                                  * We were asked to read a block that wasn't
1325                                  * contiguous, so we have to read each of the
1326                                  * pieces and copy them into the buffer.
1327                                  * Since ordinary file I/O goes through
1328                                  * cluster_io (which won't ask us for
1329                                  * discontiguous data), this is probably an
1330                                  * attempt to read or write metadata.
1331                                  */
1332                                 return hfs_strategy_fragmented(bp);
1333                         }
1334                         if ((long)bp->b_blkno == -1)
1335                                 clrbuf(bp);
1336                 }
1337                 if ((long)bp->b_blkno == -1) {
1338                         biodone(bp);
1339                         return (0);
1340                 }
1341                 if (bp->b_validend == 0) {
1342                         /*
1343                          * Record the exact size of the I/O transfer about to
1344                          * be made:
1345                          */
1346                         bp->b_validend = bp->b_bcount;
1347                 }
1348         }
1349         vp = cp->c_devvp;
1350         bp->b_dev = vp->v_rdev;
1351
1352         return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
1353 }
1354
1355
1356 static int do_hfs_truncate(ap)
1357         struct vop_truncate_args /* {
1358                 struct vnode *a_vp;
1359                 off_t a_length;
1360                 int a_flags;
1361                 struct ucred *a_cred;
1362                 struct proc *a_p;
1363         } */ *ap;
1364 {
1365         register struct vnode *vp = ap->a_vp;
1366         register struct cnode *cp = VTOC(vp);
1367         struct filefork *fp = VTOF(vp);
1368         off_t length;
1369         long vflags;
1370         struct timeval tv;
1371         int retval;
1372         off_t bytesToAdd;
1373         off_t actualBytesAdded;
1374         off_t filebytes;
1375         u_long fileblocks;
1376         int blksize;
1377         struct hfsmount *hfsmp;
1378
1379         if (vp->v_type != VREG && vp->v_type != VLNK)
1380                 return (EISDIR);        /* cannot truncate an HFS directory! */
1381
1382         length = ap->a_length;
1383         blksize = VTOVCB(vp)->blockSize;
1384         fileblocks = fp->ff_blocks;
1385         filebytes = (off_t)fileblocks * (off_t)blksize;
1386
1387         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1388                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1389
1390         if (length < 0)
1391                 return (EINVAL);
1392
1393         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1394                 return (EFBIG);
1395
1396         hfsmp = VTOHFS(vp);
1397
1398         tv = time;
1399         retval = E_NONE;
1400
1401         /* Files that are changing size are not hot file candidates. */
1402         if (hfsmp->hfc_stage == HFC_RECORDING) {
1403                 fp->ff_bytesread = 0;
1404         }
1405
1406         /*
1407          * We cannot just check if fp->ff_size == length (as an optimization)
1408          * since there may be extra physical blocks that also need truncation.
1409          */
1410 #if QUOTA
1411         if (retval = hfs_getinoquota(cp))
1412                 return(retval);
1413 #endif /* QUOTA */
1414
1415         /*
1416          * Lengthen the size of the file. We must ensure that the
1417          * last byte of the file is allocated. Since the smallest
1418          * value of ff_size is 0, length will be at least 1.
1419          */
1420         if (length > fp->ff_size) {
1421 #if QUOTA
1422                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1423                                 ap->a_cred, 0);
1424                 if (retval)
1425                         goto Err_Exit;
1426 #endif /* QUOTA */
1427                 /*
1428                  * If we don't have enough physical space then
1429                  * we need to extend the physical size.
1430                  */
1431                 if (length > filebytes) {
1432                         int eflags;
1433                         u_long blockHint = 0;
1434
1435                         /* All or nothing and don't round up to clumpsize. */
1436                         eflags = kEFAllMask | kEFNoClumpMask;
1437
1438                         if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
1439                                 eflags |= kEFReserveMask;  /* keep a reserve */
1440
1441                         /*
1442                          * Allocate Journal and Quota files in metadata zone.
1443                          */
1444                         if (filebytes == 0 &&
1445                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1446                             hfs_virtualmetafile(cp)) {
1447                                 eflags |= kEFMetadataMask;
1448                                 blockHint = hfsmp->hfs_metazone_start;
1449                         }
1450                         // XXXdbg
1451                         hfs_global_shared_lock_acquire(hfsmp);
1452                         if (hfsmp->jnl) {
1453                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1454                                         retval = EINVAL;
1455                                         goto Err_Exit;
1456                                 }
1457                         }
1458
1459                         /* lock extents b-tree (also protects volume bitmap) */
1460                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1461                         if (retval) {
1462                                 if (hfsmp->jnl) {
1463                                         journal_end_transaction(hfsmp->jnl);
1464                                 }
1465                                 hfs_global_shared_lock_release(hfsmp);
1466
1467                                 goto Err_Exit;
1468                         }
1469
1470                         while ((length > filebytes) && (retval == E_NONE)) {
1471                                 bytesToAdd = length - filebytes;
1472                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1473                                                     (FCB*)fp,
1474                                                     bytesToAdd,
1475                                                     blockHint,
1476                                                     eflags,
1477                                                     &actualBytesAdded));
1478
1479                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1480                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1481                                         if (length > filebytes)
1482                                                 length = filebytes;
1483                                         break;
1484                                 }
1485                         } /* endwhile */
1486
1487                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1488
1489                         // XXXdbg
1490                         if (hfsmp->jnl) {
1491                                 tv = time;
1492                                 VOP_UPDATE(vp, &tv, &tv, 1);
1493
1494                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1495                                 journal_end_transaction(hfsmp->jnl);
1496                         }
1497                         hfs_global_shared_lock_release(hfsmp);
1498
1499                         if (retval)
1500                                 goto Err_Exit;
1501
1502                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1503                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1504                 }
1505
1506                 if (!(ap->a_flags & IO_NOZEROFILL)) {
1507                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1508                                 struct rl_entry *invalid_range;
1509                                 int devBlockSize;
1510                                 off_t zero_limit;
1511
1512                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1513                                 if (length < zero_limit) zero_limit = length;
1514
1515                                 if (length > fp->ff_size) {
1516                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1517                                         if ((fp->ff_size & PAGE_MASK_64) &&
1518                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1519                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1520
1521                                                 /* There's some valid data at the start of the (current) last page
1522                                                    of the file, so zero out the remainder of that page to ensure the
1523                                                    entire page contains valid data.  Since there is no invalid range
1524                                                    possible past the (current) eof, there's no need to remove anything
1525                                                    from the invalid range list before calling cluster_write():                                           */
1526                                                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1527                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1528                                                                 fp->ff_size, (off_t)0, devBlockSize,
1529                                                                 (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1530                                                 if (retval) goto Err_Exit;
1531
1532                                                 /* Merely invalidate the remaining area, if necessary: */
1533                                                 if (length > zero_limit) {
1534                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1535                                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1536                                                 }
1537                                         } else {
1538                                         /* The page containing the (current) eof is invalid: just add the
1539                                            remainder of the page to the invalid list, along with the area
1540                                            being newly allocated:
1541                                          */
1542                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1543                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1544                                         };
1545                                 }
1546                         } else {
1547                                         panic("hfs_truncate: invoked on non-UBC object?!");
1548                         };
1549                 }
1550                 cp->c_flag |= C_UPDATE;
1551                 fp->ff_size = length;
1552
1553                 if (UBCISVALID(vp))
1554                         ubc_setsize(vp, fp->ff_size);   /* XXX check errors */
1555
1556         } else { /* Shorten the size of the file */
1557
1558                 if (fp->ff_size > length) {
1559                         /*
1560                          * Any buffers that are past the truncation point need to be
1561                          * invalidated (to maintain buffer cache consistency).  For
1562                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1563                          */
1564                         if (UBCISVALID(vp))
1565                                 ubc_setsize(vp, length); /* XXX check errors */
1566
1567                         vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA;
1568                         retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1569
1570                         /* Any space previously marked as invalid is now irrelevant: */
1571                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
1572                 }
1573
1574                 /*
1575                  * Account for any unmapped blocks. Note that the new
1576                  * file length can still end up with unmapped blocks.
1577                  */
1578                 if (fp->ff_unallocblocks > 0) {
1579                         u_int32_t finalblks;
1580
1581                         /* lock extents b-tree */
1582                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1583                                         LK_EXCLUSIVE, ap->a_p);
1584                         if (retval)
1585                                 goto Err_Exit;
1586
1587                         VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
1588                         cp->c_blocks             -= fp->ff_unallocblocks;
1589                         fp->ff_blocks            -= fp->ff_unallocblocks;
1590                         fp->ff_unallocblocks      = 0;
1591
1592                         finalblks = (length + blksize - 1) / blksize;
1593                         if (finalblks > fp->ff_blocks) {
1594                                 /* calculate required unmapped blocks */
1595                                 fp->ff_unallocblocks      = finalblks - fp->ff_blocks;
1596                                 VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
1597                                 cp->c_blocks             += fp->ff_unallocblocks;
1598                                 fp->ff_blocks            += fp->ff_unallocblocks;
1599                         }
1600                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1601                                         LK_RELEASE, ap->a_p);
1602                 }
1603
1604                 /*
1605                  * For a TBE process the deallocation of the file blocks is
1606                  * delayed until the file is closed.  And hfs_close calls
1607                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
1608                  * isn't set, we make sure this isn't a TBE process.
1609                  */
1610                 if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
1611 #if QUOTA
1612                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
1613 #endif /* QUOTA */
1614                   // XXXdbg
1615                   hfs_global_shared_lock_acquire(hfsmp);
1616                         if (hfsmp->jnl) {
1617                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1618                                         retval = EINVAL;
1619                                         goto Err_Exit;
1620                                 }
1621                         }
1622
1623                         /* lock extents b-tree (also protects volume bitmap) */
1624                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1625                         if (retval) {
1626                                 if (hfsmp->jnl) {
1627                                         journal_end_transaction(hfsmp->jnl);
1628                                 }
1629                                 hfs_global_shared_lock_release(hfsmp);
1630                                 goto Err_Exit;
1631                         }
1632
1633                         if (fp->ff_unallocblocks == 0)
1634                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
1635                                                 (FCB*)fp, length, false));
1636
1637                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1638
1639                         // XXXdbg
1640                         if (hfsmp->jnl) {
1641                                 tv = time;
1642                                 VOP_UPDATE(vp, &tv, &tv, 1);
1643
1644                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1645                                 journal_end_transaction(hfsmp->jnl);
1646                         }
1647                         hfs_global_shared_lock_release(hfsmp);
1648
1649                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1650                         if (retval)
1651                                 goto Err_Exit;
1652 #if QUOTA
1653                         /* These are bytesreleased */
1654                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
1655 #endif /* QUOTA */
1656                 }
1657                 /* Only set update flag if the logical length changes */
1658                 if (fp->ff_size != length)
1659                         cp->c_flag |= C_UPDATE;
1660                 fp->ff_size = length;
1661         }
1662         cp->c_flag |= C_CHANGE;
1663         retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1664         if (retval) {
1665                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1666                      -1, -1, -1, retval, 0);
1667         }
1668
1669 Err_Exit:
1670
1671         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
1672                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1673
1674         return (retval);
1675 }
1676
1677
1678 /*
1679 #
1680 #% truncate     vp      L L L
1681 #
1682 vop_truncate {
1683     IN struct vnode *vp;
1684     IN off_t length;
1685     IN int flags;       (IO_SYNC)
1686     IN struct ucred *cred;
1687     IN struct proc *p;
1688 };
1689  * Truncate a cnode to at most length size, freeing (or adding) the
1690  * disk blocks.
1691  */
1692 int hfs_truncate(ap)
1693         struct vop_truncate_args /* {
1694                 struct vnode *a_vp;
1695                 off_t a_length;
1696                 int a_flags;
1697                 struct ucred *a_cred;
1698                 struct proc *a_p;
1699         } */ *ap;
1700 {
1701         register struct vnode *vp = ap->a_vp;
1702         register struct cnode *cp = VTOC(vp);
1703         struct filefork *fp = VTOF(vp);
1704         off_t length;
1705         off_t filebytes;
1706         u_long fileblocks;
1707         int blksize, error;
1708         u_int64_t nsize;
1709
1710         if (vp->v_type != VREG && vp->v_type != VLNK)
1711                 return (EISDIR);        /* cannot truncate an HFS directory! */
1712
1713         length = ap->a_length;
1714         blksize = VTOVCB(vp)->blockSize;
1715         fileblocks = fp->ff_blocks;
1716         filebytes = (off_t)fileblocks * (off_t)blksize;
1717
1718         // have to loop truncating or growing files that are
1719         // really big because otherwise transactions can get
1720         // enormous and consume too many kernel resources.
1721         if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) {
1722             while (filebytes > length) {
1723                 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
1724                     filebytes -= HFS_BIGFILE_SIZE;
1725                 } else {
1726                     filebytes = length;
1727                 }
1728
1729                 ap->a_length = filebytes;
1730                 error = do_hfs_truncate(ap);
1731                 if (error)
1732                     break;
1733             }
1734         } else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) {
1735             while (filebytes < length) {
1736                 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
1737                     filebytes += HFS_BIGFILE_SIZE;
1738                 } else {
1739                     filebytes = (length - filebytes);
1740                 }
1741
1742                 ap->a_length = filebytes;
1743                 error = do_hfs_truncate(ap);
1744                 if (error)
1745                     break;
1746             }
1747         } else {
1748             error = do_hfs_truncate(ap);
1749         }
1750
1751         return error;
1752 }
1753
1754
1755
1756 /*
1757 #
1758 #% allocate     vp      L L L
1759 #
1760 vop_allocate {
1761         IN struct vnode *vp;
1762         IN off_t length;
1763         IN int flags;
1764         OUT off_t *bytesallocated;
1765         IN off_t offset;
1766         IN struct ucred *cred;
1767         IN struct proc *p;
1768 };
1769  * allocate a cnode to at most length size
1770  */
1771 int hfs_allocate(ap)
1772         struct vop_allocate_args /* {
1773                 struct vnode *a_vp;
1774                 off_t a_length;
1775                 u_int32_t  a_flags;
1776                 off_t *a_bytesallocated;
1777                 off_t a_offset;
1778                 struct ucred *a_cred;
1779                 struct proc *a_p;
1780         } */ *ap;
1781 {
1782         struct vnode *vp = ap->a_vp;
1783         struct cnode *cp = VTOC(vp);
1784         struct filefork *fp = VTOF(vp);
1785         ExtendedVCB *vcb = VTOVCB(vp);
1786         off_t length = ap->a_length;
1787         off_t startingPEOF;
1788         off_t moreBytesRequested;
1789         off_t actualBytesAdded;
1790         off_t filebytes;
1791         u_long fileblocks;
1792         long vflags;
1793         struct timeval tv;
1794         int retval, retval2;
1795         UInt32 blockHint;
1796         UInt32 extendFlags;   /* For call to ExtendFileC */
1797         struct hfsmount *hfsmp;
1798
1799         hfsmp = VTOHFS(vp);
1800
1801         *(ap->a_bytesallocated) = 0;
1802         fileblocks = fp->ff_blocks;
1803         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
1804
1805         if (length < (off_t)0)
1806                 return (EINVAL);
1807         if (vp->v_type != VREG)
1808                 return (EISDIR);
1809         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes))
1810                 return (EINVAL);
1811
1812         /* Fill in the flags word for the call to Extend the file */
1813
1814         extendFlags = kEFNoClumpMask;
1815         if (ap->a_flags & ALLOCATECONTIG)
1816                 extendFlags |= kEFContigMask;
1817         if (ap->a_flags & ALLOCATEALL)
1818                 extendFlags |= kEFAllMask;
1819         if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
1820                 extendFlags |= kEFReserveMask;
1821
1822         tv = time;
1823         retval = E_NONE;
1824         blockHint = 0;
1825         startingPEOF = filebytes;
1826
1827         if (ap->a_flags & ALLOCATEFROMPEOF)
1828                 length += filebytes;
1829         else if (ap->a_flags & ALLOCATEFROMVOL)
1830                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1831
1832         /* If no changes are necesary, then we're done */
1833         if (filebytes == length)
1834                 goto Std_Exit;
1835
1836         /*
1837          * Lengthen the size of the file. We must ensure that the
1838          * last byte of the file is allocated. Since the smallest
1839          * value of filebytes is 0, length will be at least 1.
1840          */
1841         if (length > filebytes) {
1842                 moreBytesRequested = length - filebytes;
1843
1844 #if QUOTA
1845                 retval = hfs_chkdq(cp,
1846                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
1847                                 ap->a_cred, 0);
1848                 if (retval)
1849                         return (retval);
1850
1851 #endif /* QUOTA */
1852                 /*
1853                  * Metadata zone checks.
1854                  */
1855                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1856                         /*
1857                          * Allocate Journal and Quota files in metadata zone.
1858                          */
1859                         if (hfs_virtualmetafile(cp)) {
1860                                 extendFlags |= kEFMetadataMask;
1861                                 blockHint = hfsmp->hfs_metazone_start;
1862                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
1863                                    (blockHint <= hfsmp->hfs_metazone_end)) {
1864                                 /*
1865                                  * Move blockHint outside metadata zone.
1866                                  */
1867                                 blockHint = hfsmp->hfs_metazone_end + 1;
1868                         }
1869                 }
1870
1871                 // XXXdbg
1872                 hfs_global_shared_lock_acquire(hfsmp);
1873                 if (hfsmp->jnl) {
1874                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1875                                 retval = EINVAL;
1876                                 goto Err_Exit;
1877                         }
1878                 }
1879
1880                 /* lock extents b-tree (also protects volume bitmap) */
1881                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1882                 if (retval) {
1883                         if (hfsmp->jnl) {
1884                                 journal_end_transaction(hfsmp->jnl);
1885                         }
1886                         hfs_global_shared_lock_release(hfsmp);
1887                         goto Err_Exit;
1888                 }
1889
1890                 retval = MacToVFSError(ExtendFileC(vcb,
1891                                                 (FCB*)fp,
1892                                                 moreBytesRequested,
1893                                                 blockHint,
1894                                                 extendFlags,
1895                                                 &actualBytesAdded));
1896
1897                 *(ap->a_bytesallocated) = actualBytesAdded;
1898                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
1899
1900                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1901
1902                 // XXXdbg
1903                 if (hfsmp->jnl) {
1904                         tv = time;
1905                         VOP_UPDATE(vp, &tv, &tv, 1);
1906
1907                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1908                         journal_end_transaction(hfsmp->jnl);
1909                 }
1910                 hfs_global_shared_lock_release(hfsmp);
1911
1912                 /*
1913                  * if we get an error and no changes were made then exit
1914                  * otherwise we must do the VOP_UPDATE to reflect the changes
1915                  */
1916                 if (retval && (startingPEOF == filebytes))
1917                         goto Err_Exit;
1918
1919                 /*
1920                  * Adjust actualBytesAdded to be allocation block aligned, not
1921                  * clump size aligned.
1922                  * NOTE: So what we are reporting does not affect reality
1923                  * until the file is closed, when we truncate the file to allocation
1924                  * block size.
1925                  */
1926                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
1927                         *(ap->a_bytesallocated) =
1928                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
1929
1930         } else { /* Shorten the size of the file */
1931
1932                 if (fp->ff_size > length) {
1933                         /*
1934                          * Any buffers that are past the truncation point need to be
1935                          * invalidated (to maintain buffer cache consistency).  For
1936                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1937                          */
1938                         vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
1939                         (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1940                 }
1941
1942                 // XXXdbg
1943                 hfs_global_shared_lock_acquire(hfsmp);
1944                 if (hfsmp->jnl) {
1945                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1946                                 retval = EINVAL;
1947                                 goto Err_Exit;
1948                         }
1949                 }
1950
1951                 /* lock extents b-tree (also protects volume bitmap) */
1952                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1953                 if (retval) {
1954                         if (hfsmp->jnl) {
1955                                 journal_end_transaction(hfsmp->jnl);
1956                         }
1957                         hfs_global_shared_lock_release(hfsmp);
1958
1959                         goto Err_Exit;
1960                 }
1961
1962                 retval = MacToVFSError(
1963                             TruncateFileC(
1964                                             vcb,
1965                                             (FCB*)fp,
1966                                             length,
1967                                             false));
1968                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1969                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
1970
1971                 if (hfsmp->jnl) {
1972                         tv = time;
1973                         VOP_UPDATE(vp, &tv, &tv, 1);
1974
1975                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1976                         journal_end_transaction(hfsmp->jnl);
1977                 }
1978                 hfs_global_shared_lock_release(hfsmp);
1979
1980
1981                 /*
1982                  * if we get an error and no changes were made then exit
1983                  * otherwise we must do the VOP_UPDATE to reflect the changes
1984                  */
1985                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
1986 #if QUOTA
1987                 /* These are  bytesreleased */
1988                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
1989 #endif /* QUOTA */
1990
1991                 if (fp->ff_size > filebytes) {
1992                         fp->ff_size = filebytes;
1993
1994                         if (UBCISVALID(vp))
1995                                 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
1996                 }
1997         }
1998
1999 Std_Exit:
2000         cp->c_flag |= C_CHANGE | C_UPDATE;
2001         retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
2002
2003         if (retval == 0)
2004                 retval = retval2;
2005 Err_Exit:
2006         return (retval);
2007 }
2008
2009
2010 /*
2011  * pagein for HFS filesystem
2012  */
2013 int
2014 hfs_pagein(ap)
2015         struct vop_pagein_args /* {
2016                 struct vnode *a_vp,
2017                 upl_t         a_pl,
2018                 vm_offset_t   a_pl_offset,
2019                 off_t         a_f_offset,
2020                 size_t        a_size,
2021                 struct ucred *a_cred,
2022                 int           a_flags
2023         } */ *ap;
2024 {
2025         register struct vnode *vp = ap->a_vp;
2026         int devBlockSize = 0;
2027         int error;
2028
2029         if (vp->v_type != VREG)
2030                 panic("hfs_pagein: vp not UBC type\n");
2031
2032         VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
2033
2034         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2035                                 ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
2036                                 ap->a_flags);
2037         /*
2038          * Keep track blocks read
2039          */
2040         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2041                 struct cnode *cp;
2042
2043                 cp = VTOC(vp);
2044                 /*
2045                  * If this file hasn't been seen since the start of
2046                  * the current sampling period then start over.
2047                  */
2048                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
2049                         VTOF(vp)->ff_bytesread = ap->a_size;
2050                 else
2051                         VTOF(vp)->ff_bytesread += ap->a_size;
2052
2053                 cp->c_flag |= C_ACCESS;
2054         }
2055
2056         return (error);
2057 }
2058
2059 /*
2060  * pageout for HFS filesystem.
2061  */
2062 int
2063 hfs_pageout(ap)
2064         struct vop_pageout_args /* {
2065            struct vnode *a_vp,
2066            upl_t         a_pl,
2067            vm_offset_t   a_pl_offset,
2068            off_t         a_f_offset,
2069            size_t        a_size,
2070            struct ucred *a_cred,
2071            int           a_flags
2072         } */ *ap;
2073 {
2074         struct vnode *vp = ap->a_vp;
2075         struct cnode *cp = VTOC(vp);
2076         struct filefork *fp = VTOF(vp);
2077         int retval;
2078         int devBlockSize = 0;
2079         off_t end_of_range;
2080         off_t filesize;
2081
2082         if (UBCINVALID(vp))
2083                 panic("hfs_pageout: Not a  VREG: vp=%x", vp);
2084
2085         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
2086         filesize = fp->ff_size;
2087         end_of_range = ap->a_f_offset + ap->a_size - 1;
2088
2089         if (cp->c_flag & C_RELOCATING) {
2090                 if (end_of_range < (filesize / 2)) {
2091                         return (EBUSY);
2092                 }
2093         }
2094
2095         if (end_of_range >= filesize)
2096                 end_of_range = (off_t)(filesize - 1);
2097         if (ap->a_f_offset < filesize) {
2098                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2099                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2100         }
2101
2102         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
2103                                  filesize, devBlockSize, ap->a_flags);
2104
2105         /*
2106          * If we successfully wrote any data, and we are not the superuser
2107          * we clear the setuid and setgid bits as a precaution against
2108          * tampering.
2109          */
2110         if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
2111                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2112
2113         return (retval);
2114 }
2115
2116 /*
2117  * Intercept B-Tree node writes to unswap them if necessary.
2118 #
2119 #vop_bwrite {
2120 #       IN struct buf *bp;
2121  */
2122 int
2123 hfs_bwrite(ap)
2124         struct vop_bwrite_args /* {
2125                 struct buf *a_bp;
2126         } */ *ap;
2127 {
2128         int retval = 0;
2129         register struct buf *bp = ap->a_bp;
2130         register struct vnode *vp = bp->b_vp;
2131 #if BYTE_ORDER == LITTLE_ENDIAN
2132         BlockDescriptor block;
2133
2134         /* Trap B-Tree writes */
2135         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2136             (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
2137
2138                 /* Swap if the B-Tree node is in native byte order */
2139                 if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
2140                         /* Prepare the block pointer */
2141                         block.blockHeader = bp;
2142                         block.buffer = bp->b_data;
2143                         /* not found in cache ==> came from disk */
2144                         block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
2145                         block.blockSize = bp->b_bcount;
2146
2147                         /* Endian un-swap B-Tree node */
2148                         SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2149                 }
2150
2151                 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2152         }
2153 #endif
2154         /* This buffer shouldn't be locked anymore but if it is clear it */
2155         if (ISSET(bp->b_flags, B_LOCKED)) {
2156             // XXXdbg
2157             if (VTOHFS(vp)->jnl) {
2158                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2159             }
2160                 CLR(bp->b_flags, B_LOCKED);
2161                 printf("hfs_bwrite: called with lock bit set\n");
2162         }
2163         retval = vn_bwrite (ap);
2164
2165         return (retval);
2166 }
2167
2168 /*
2169  * Relocate a file to a new location on disk
2170  *  cnode must be locked on entry
2171  *
2172  * Relocation occurs by cloning the file's data from its
2173  * current set of blocks to a new set of blocks. During
2174  * the relocation all of the blocks (old and new) are
2175  * owned by the file.
2176  *
2177  * -----------------
2178  * |///////////////|
2179  * -----------------
2180  * 0               N (file offset)
2181  *
2182  * -----------------     -----------------
2183  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2184  * -----------------     -----------------
2185  * 0               N     N+1             2N
2186  *
2187  * -----------------     -----------------
2188  * |///////////////|     |///////////////|     STEP 2 (clone data)
2189  * -----------------     -----------------
2190  * 0               N     N+1             2N
2191  *
2192  *                       -----------------
2193  *                       |///////////////|     STEP 3 (head truncate blocks)
2194  *                       -----------------
2195  *                       0               N
2196  *
2197  * During steps 2 and 3 page-outs to file offsets less
2198  * than or equal to N are suspended.
2199  *
2200  * During step 3 page-ins to the file get supended.
2201  */
2202 __private_extern__
2203 int
2204 hfs_relocate(vp, blockHint, cred, p)
2205         struct  vnode *vp;
2206         u_int32_t  blockHint;
2207         struct  ucred *cred;
2208         struct  proc *p;
2209 {
2210         struct  filefork *fp;
2211         struct  hfsmount *hfsmp;
2212         ExtendedVCB *vcb;
2213
2214         u_int32_t  headblks;
2215         u_int32_t  datablks;
2216         u_int32_t  blksize;
2217         u_int32_t  realsize;
2218         u_int32_t  growsize;
2219         u_int32_t  nextallocsave;
2220         u_int32_t  sector_a;
2221         u_int32_t  sector_b;
2222         int eflags;
2223         u_int32_t  oldstart;  /* debug only */
2224         off_t  newbytes;
2225         int  retval;
2226
2227         if (vp->v_type != VREG && vp->v_type != VLNK) {
2228                 return (EPERM);
2229         }
2230
2231         hfsmp = VTOHFS(vp);
2232         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2233                 return (ENOSPC);
2234         }
2235
2236         fp = VTOF(vp);
2237         if (fp->ff_unallocblocks)
2238                 return (EINVAL);
2239         vcb = VTOVCB(vp);
2240         blksize = vcb->blockSize;
2241         if (blockHint == 0)
2242                 blockHint = vcb->nextAllocation;
2243
2244         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2245             (vp->v_type == VLNK && fp->ff_size > blksize)) {
2246                 return (EFBIG);
2247         }
2248
2249         headblks = fp->ff_blocks;
2250         datablks = howmany(fp->ff_size, blksize);
2251         growsize = datablks * blksize;
2252         realsize = fp->ff_size;
2253         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2254         if (blockHint >= hfsmp->hfs_metazone_start &&
2255             blockHint <= hfsmp->hfs_metazone_end)
2256                 eflags |= kEFMetadataMask;
2257
2258         hfs_global_shared_lock_acquire(hfsmp);
2259         if (hfsmp->jnl) {
2260                 if (journal_start_transaction(hfsmp->jnl) != 0) {
2261                         return (EINVAL);
2262                 }
2263         }
2264
2265         /* Lock extents b-tree (also protects volume bitmap) */
2266         retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
2267         if (retval)
2268                 goto out2;
2269
2270         retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2271         if (retval) {
2272                 retval = MacToVFSError(retval);
2273                 goto out;
2274         }
2275
2276         /*
2277          * STEP 1 - aquire new allocation blocks.
2278          */
2279         nextallocsave = vcb->nextAllocation;
2280         retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2281         if (eflags & kEFMetadataMask)
2282                 vcb->nextAllocation = nextallocsave;
2283
2284         retval = MacToVFSError(retval);
2285         if (retval == 0) {
2286                 VTOC(vp)->c_flag |= C_MODIFIED;
2287                 if (newbytes < growsize) {
2288                         retval = ENOSPC;
2289                         goto restore;
2290                 } else if (fp->ff_blocks < (headblks + datablks)) {
2291                         printf("hfs_relocate: allocation failed");
2292                         retval = ENOSPC;
2293                         goto restore;
2294                 }
2295
2296                 retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, &sector_b, NULL);
2297                 if (retval) {
2298                         retval = MacToVFSError(retval);
2299                 } else if ((sector_a + 1) == sector_b) {
2300                         retval = ENOSPC;
2301                         goto restore;
2302                 } else if ((eflags & kEFMetadataMask) &&
2303                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2304                               hfsmp->hfs_metazone_end)) {
2305                         printf("hfs_relocate: didn't move into metadata zone\n");
2306                         retval = ENOSPC;
2307                         goto restore;
2308                 }
2309         }
2310         if (retval) {
2311                 /*
2312                  * Check to see if failure is due to excessive fragmentation.
2313                  */
2314                 if (retval == ENOSPC &&
2315                     hfs_freeblks(hfsmp, 0) > (datablks * 2)) {
2316                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2317                 }
2318                 goto out;
2319         }
2320
2321         fp->ff_size = fp->ff_blocks * blksize;
2322         if (UBCISVALID(vp))
2323                 (void) ubc_setsize(vp, fp->ff_size);
2324
2325         /*
2326          * STEP 2 - clone data into the new allocation blocks.
2327          */
2328
2329         if (vp->v_type == VLNK)
2330                 retval = hfs_clonelink(vp, blksize, cred, p);
2331         else if (vp->v_flag & VSYSTEM)
2332                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2333         else
2334                 retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p);
2335
2336         if (retval)
2337                 goto restore;
2338
2339         oldstart = fp->ff_extents[0].startBlock;
2340
2341         /*
2342          * STEP 3 - switch to clone and remove old blocks.
2343          */
2344         SET(VTOC(vp)->c_flag, C_NOBLKMAP);   /* suspend page-ins */
2345
2346         retval = HeadTruncateFile(vcb, (FCB*)fp, headblks);
2347
2348         CLR(VTOC(vp)->c_flag, C_NOBLKMAP);   /* resume page-ins */
2349         if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP))
2350                 wakeup(VTOC(vp));
2351         if (retval)
2352                 goto restore;
2353
2354         fp->ff_size = realsize;
2355         if (UBCISVALID(vp)) {
2356                 (void) ubc_setsize(vp, realsize);
2357                 (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2358         }
2359
2360         CLR(VTOC(vp)->c_flag, C_RELOCATING);  /* Resume page-outs for this file. */
2361 out:
2362         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
2363
2364         retval = VOP_FSYNC(vp, cred, MNT_WAIT, p);
2365 out2:
2366         if (hfsmp->jnl) {
2367                 if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID)
2368                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2369                 else
2370                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2371                 journal_end_transaction(hfsmp->jnl);
2372         }
2373         hfs_global_shared_lock_release(hfsmp);
2374
2375         return (retval);
2376
2377 restore:
2378         /*
2379          * Give back any newly allocated space.
2380          */
2381         if (fp->ff_size != realsize)
2382                 fp->ff_size = realsize;
2383         (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false);
2384         if (UBCISVALID(vp))
2385                 (void) ubc_setsize(vp, fp->ff_size);
2386         CLR(VTOC(vp)->c_flag, C_RELOCATING);
2387         goto out;
2388 }
2389
2390
2391 /*
2392  * Clone a symlink.
2393  *
2394  */
2395 static int
2396 hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p)
2397 {
2398         struct buf *head_bp = NULL;
2399         struct buf *tail_bp = NULL;
2400         int error;
2401
2402
2403         error = meta_bread(vp, 0, blksize, cred, &head_bp);
2404         if (error)
2405                 goto out;
2406
2407         tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META);
2408         if (tail_bp == NULL) {
2409                 error = EIO;
2410                 goto out;
2411         }
2412         bcopy(head_bp->b_data, tail_bp->b_data, blksize);
2413         error = bwrite(tail_bp);
2414 out:
2415         if (head_bp) {
2416                 head_bp->b_flags |= B_INVAL;
2417                 brelse(head_bp);
2418         }
2419         (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2420
2421         return (error);
2422 }
2423
2424 /*
2425  * Clone a file's data within the file.
2426  *
2427  */
2428 static int
2429 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2430               struct ucred *cred, struct proc *p)
2431 {
2432         caddr_t  bufp;
2433         size_t  writebase;
2434         size_t  bufsize;
2435         size_t  copysize;
2436         size_t  iosize;
2437         size_t  filesize;
2438         size_t  offset;
2439         struct uio auio;
2440         struct iovec aiov;
2441         int  devblocksize;
2442         int  didhold;
2443         int  error;
2444
2445
2446         if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) {
2447                 printf("hfs_clonefile: vinvalbuf failed - %d\n", error);
2448                 return (error);
2449         }
2450
2451         if (!ubc_clean(vp, 1)) {
2452                 printf("hfs_clonefile: not ubc_clean\n");
2453                 return (EIO);  /* XXX error code */
2454         }
2455
2456         /*
2457          * Suspend page-outs for this file.
2458          */
2459         SET(VTOC(vp)->c_flag, C_RELOCATING);
2460
2461         filesize = VTOF(vp)->ff_size;
2462         writebase = blkstart * blksize;
2463         copysize = blkcnt * blksize;
2464         iosize = bufsize = MIN(copysize, 4096 * 16);
2465         offset = 0;
2466
2467         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2468                 return (ENOMEM);
2469         }
2470
2471         VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize);
2472
2473         auio.uio_iov = &aiov;
2474         auio.uio_iovcnt = 1;
2475         auio.uio_segflg = UIO_SYSSPACE;
2476         auio.uio_procp = p;
2477
2478         while (offset < copysize) {
2479                 iosize = MIN(copysize - offset, iosize);
2480
2481                 aiov.iov_base = bufp;
2482                 aiov.iov_len = iosize;
2483                 auio.uio_resid = iosize;
2484                 auio.uio_offset = offset;
2485                 auio.uio_rw = UIO_READ;
2486
2487                 error = cluster_read(vp, &auio, copysize, devblocksize, 0);
2488                 if (error) {
2489                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2490                         break;
2491                 }
2492                 if (auio.uio_resid != 0) {
2493                         printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid);
2494                         error = EIO;
2495                         break;
2496                 }
2497
2498
2499                 aiov.iov_base = bufp;
2500                 aiov.iov_len = iosize;
2501                 auio.uio_resid = iosize;
2502                 auio.uio_offset = writebase + offset;
2503                 auio.uio_rw = UIO_WRITE;
2504
2505                 error = cluster_write(vp, &auio, filesize + offset,
2506                                       filesize + offset + iosize,
2507                                       auio.uio_offset, 0, devblocksize, 0);
2508                 if (error) {
2509                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2510                         break;
2511                 }
2512                 if (auio.uio_resid != 0) {
2513                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2514                         error = EIO;
2515                         break;
2516                 }
2517                 offset += iosize;
2518         }
2519         if (error == 0) {
2520                 /* Clean the pages in VM. */
2521                 didhold = ubc_hold(vp);
2522                 if (didhold)
2523                         (void) ubc_clean(vp, 1);
2524
2525                 /*
2526                  * Clean out all associated buffers.
2527                  */
2528                 (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2529
2530                 if (didhold)
2531                         ubc_rele(vp);
2532         }
2533         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2534
2535         return (error);
2536 }
2537
2538 /*
2539  * Clone a system (metadata) file.
2540  *
2541  */
2542 static int
2543 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2544                  struct ucred *cred, struct proc *p)
2545 {
2546         caddr_t  bufp;
2547         char * offset;
2548         size_t  bufsize;
2549         size_t  iosize;
2550         struct buf *bp = NULL;
2551         daddr_t  blkno;
2552         daddr_t  blk;
2553         int  breadcnt;
2554         int  i;
2555         int  error = 0;
2556
2557
2558         iosize = GetLogicalBlockSize(vp);
2559         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2560         breadcnt = bufsize / iosize;
2561
2562         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2563                 return (ENOMEM);
2564         }
2565         blkstart = (blkstart * blksize) / iosize;
2566         blkcnt = (blkcnt * blksize) / iosize;
2567         blkno = 0;
2568
2569         while (blkno < blkcnt) {
2570                 /*
2571                  * Read up to a megabyte
2572                  */
2573                 offset = bufp;
2574                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) {
2575                         error = meta_bread(vp, blk, iosize, cred, &bp);
2576                         if (error) {
2577                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
2578                                 goto out;
2579                         }
2580                         if (bp->b_bcount != iosize) {
2581                                 printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount);
2582                                 goto out;
2583                         }
2584
2585                         bcopy(bp->b_data, offset, iosize);
2586                         bp->b_flags |= B_INVAL;
2587                         brelse(bp);
2588                         bp = NULL;
2589                         offset += iosize;
2590                 }
2591
2592                 /*
2593                  * Write up to a megabyte
2594                  */
2595                 offset = bufp;
2596                 for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) {
2597                         bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META);
2598                         if (bp == NULL) {
2599                                 printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno);
2600                                 error = EIO;
2601                                 goto out;
2602                         }
2603                         bcopy(offset, bp->b_data, iosize);
2604                         error = bwrite(bp);
2605                         bp = NULL;
2606                         if (error)
2607                                 goto out;
2608                         offset += iosize;
2609                 }
2610         }
2611 out:
2612         if (bp) {
2613                 brelse(bp);
2614         }
2615
2616         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2617
2618         error = VOP_FSYNC(vp, cred, MNT_WAIT, p);
2619
2620         return (error);
2621 }
2622