bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2003 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/filedesc.h>
  36 #include <sys/stat.h>
  37 #include <sys/buf.h>
  38 #include <sys/proc.h>
  39 #include <sys/vnode.h>
  40 #include <sys/uio.h>
  41
  42 #include <miscfs/specfs/specdev.h>
  43
  44 #include <sys/ubc.h>
  45 #include <vm/vm_pageout.h>
  46
  47 #include <sys/kdebug.h>
  48
  49 #include        "hfs.h"
  50 #include        "hfs_endian.h"
  51 #include        "hfs_quota.h"
  52 #include        "hfscommon/headers/FileMgrInternal.h"
  53 #include        "hfscommon/headers/BTreesInternal.h"
  54 #include        "hfs_cnode.h"
  55 #include        "hfs_dbg.h"
  56
  57 extern int overflow_extents(struct filefork *fp);
  58
  59 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  60
  61 enum {
  62         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  63 };
  64
  65 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  66
  67 static int  hfs_clonelink(struct vnode *, int, struct ucred *, struct proc *);
  68 static int  hfs_clonefile(struct vnode *, int, int, int,  struct ucred *, struct proc *);
  69 static int  hfs_clonesysfile(struct vnode *, int, int, int, struct ucred *, struct proc *);
  70
  71
  72 /*****************************************************************************
  73 *
  74 *       Operations on vnodes
  75 *
  76 *****************************************************************************/
  77
  78 /*
  79 #% read         vp      L L L
  80 #
  81  vop_read {
  82      IN struct vnode *vp;
  83      INOUT struct uio *uio;
  84      IN int ioflag;
  85      IN struct ucred *cred;
  86
  87      */
  88
  89 int
  90 hfs_read(ap)
  91         struct vop_read_args /* {
  92                 struct vnode *a_vp;
  93                 struct uio *a_uio;
  94                 int a_ioflag;
  95                 struct ucred *a_cred;
  96         } */ *ap;
  97 {
  98         register struct uio *uio = ap->a_uio;
  99         register struct vnode *vp = ap->a_vp;
 100         struct cnode *cp;
 101         struct filefork *fp;
 102         int devBlockSize = 0;
 103         int retval = 0;
 104         off_t filesize;
 105         off_t filebytes;
 106         off_t start_resid = uio->uio_resid;
 107
 108
 109         /* Preflight checks */
 110         if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
 111                 return (EPERM);         /* can only read regular files */
 112         if (uio->uio_resid == 0)
 113                 return (0);             /* Nothing left to do */
 114         if (uio->uio_offset < 0)
 115                 return (EINVAL);        /* cant read from a negative offset */
 116
 117         cp = VTOC(vp);
 118         fp = VTOF(vp);
 119         filesize = fp->ff_size;
 120         filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
 121         if (uio->uio_offset > filesize) {
 122                 if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
 123                         return (EFBIG);
 124                 else
 125                         return (0);
 126         }
 127
 128         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 129
 130         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 131                 (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
 132
 133         retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
 134
 135         cp->c_flag |= C_ACCESS;
 136
 137         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 138                 (int)uio->uio_offset, uio->uio_resid, (int)filesize,  (int)filebytes, 0);
 139
 140         /*
 141          * Keep track blocks read
 142          */
 143         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 144                 /*
 145                  * If this file hasn't been seen since the start of
 146                  * the current sampling period then start over.
 147                  */
 148                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 149                         fp->ff_bytesread = start_resid - uio->uio_resid;
 150                         cp->c_atime = time.tv_sec;
 151                 } else {
 152                         fp->ff_bytesread += start_resid - uio->uio_resid;
 153                 }
 154         }
 155
 156         return (retval);
 157 }
 158
 159 /*
 160  * Write data to a file or directory.
 161 #% write        vp      L L L
 162 #
 163  vop_write {
 164      IN struct vnode *vp;
 165      INOUT struct uio *uio;
 166      IN int ioflag;
 167      IN struct ucred *cred;
 168
 169      */
 170 int
 171 hfs_write(ap)
 172         struct vop_write_args /* {
 173                 struct vnode *a_vp;
 174                 struct uio *a_uio;
 175                 int a_ioflag;
 176                 struct ucred *a_cred;
 177         } */ *ap;
 178 {
 179         struct vnode *vp = ap->a_vp;
 180         struct uio *uio = ap->a_uio;
 181         struct cnode *cp;
 182         struct filefork *fp;
 183         struct proc *p;
 184         struct timeval tv;
 185         ExtendedVCB *vcb;
 186         int devBlockSize = 0;
 187         off_t origFileSize, writelimit, bytesToAdd;
 188         off_t actualBytesAdded;
 189         u_long resid;
 190         int eflags, ioflag;
 191         int retval;
 192         off_t filebytes;
 193         struct hfsmount *hfsmp;
 194         int started_tr = 0, grabbed_lock = 0;
 195
 196
 197         if (uio->uio_offset < 0)
 198                 return (EINVAL);
 199         if (uio->uio_resid == 0)
 200                 return (E_NONE);
 201         if ((vp->v_type != VREG) || !UBCINFOEXISTS(vp))
 202                 return (EPERM);         /* Can only write regular files */
 203
 204         ioflag = ap->a_ioflag;
 205         cp = VTOC(vp);
 206         fp = VTOF(vp);
 207         vcb = VTOVCB(vp);
 208         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 209
 210         if (ioflag & IO_APPEND)
 211                 uio->uio_offset = fp->ff_size;
 212         if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
 213                 return (EPERM);
 214
 215         // XXXdbg - don't allow modification of the journal or journal_info_block
 216         if (VTOHFS(vp)->jnl && cp->c_datafork) {
 217                 struct HFSPlusExtentDescriptor *extd;
 218
 219                 extd = &cp->c_datafork->ff_extents[0];
 220                 if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
 221                         return EPERM;
 222                 }
 223         }
 224
 225         writelimit = uio->uio_offset + uio->uio_resid;
 226
 227         /*
 228          * Maybe this should be above the vnode op call, but so long as
 229          * file servers have no limits, I don't think it matters.
 230          */
 231         p = uio->uio_procp;
 232         if (vp->v_type == VREG && p &&
 233             writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 234                 psignal(p, SIGXFSZ);
 235                 return (EFBIG);
 236         }
 237         p = current_proc();
 238
 239         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 240
 241         resid = uio->uio_resid;
 242         origFileSize = fp->ff_size;
 243         eflags = kEFDeferMask;  /* defer file block allocations */
 244         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 245
 246         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 247                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 248         retval = 0;
 249
 250         /* Now test if we need to extend the file */
 251         /* Doing so will adjust the filebytes for us */
 252
 253 #if QUOTA
 254         if(writelimit > filebytes) {
 255                 bytesToAdd = writelimit - filebytes;
 256
 257                 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)),
 258                                    ap->a_cred, 0);
 259                 if (retval)
 260                         return (retval);
 261         }
 262 #endif /* QUOTA */
 263
 264         hfsmp = VTOHFS(vp);
 265
 266 #ifdef HFS_SPARSE_DEV
 267         /*
 268          * When the underlying device is sparse and space
 269          * is low (< 8MB), stop doing delayed allocations
 270          * and begin doing synchronous I/O.
 271          */
 272         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 273             (hfs_freeblks(hfsmp, 0) < 2048)) {
 274                 eflags &= ~kEFDeferMask;
 275                 ioflag |= IO_SYNC;
 276         }
 277 #endif /* HFS_SPARSE_DEV */
 278
 279         if (writelimit > filebytes) {
 280                 hfs_global_shared_lock_acquire(hfsmp);
 281                 grabbed_lock = 1;
 282         }
 283         if (hfsmp->jnl && (writelimit > filebytes)) {
 284                 if (journal_start_transaction(hfsmp->jnl) != 0) {
 285                         hfs_global_shared_lock_release(hfsmp);
 286                         return EINVAL;
 287                 }
 288                 started_tr = 1;
 289         }
 290
 291         while (writelimit > filebytes) {
 292                 bytesToAdd = writelimit - filebytes;
 293                 if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
 294                         eflags |= kEFReserveMask;
 295
 296                 /* lock extents b-tree (also protects volume bitmap) */
 297                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
 298                 if (retval != E_NONE)
 299                         break;
 300
 301                 /* Files that are changing size are not hot file candidates. */
 302                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 303                         fp->ff_bytesread = 0;
 304                 }
 305                 retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
 306                                 0, eflags, &actualBytesAdded));
 307
 308                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
 309                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 310                         retval = ENOSPC;
 311                 if (retval != E_NONE)
 312                         break;
 313                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 314                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 315                         (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size,  (int)filebytes, 0);
 316         }
 317
 318         // XXXdbg
 319         if (started_tr) {
 320                 tv = time;
 321                 VOP_UPDATE(vp, &tv, &tv, 1);
 322
 323                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
 324                 journal_end_transaction(hfsmp->jnl);
 325                 started_tr = 0;
 326         }
 327         if (grabbed_lock) {
 328                 hfs_global_shared_lock_release(hfsmp);
 329                 grabbed_lock = 0;
 330         }
 331
 332         if (retval == E_NONE) {
 333                 off_t filesize;
 334                 off_t zero_off;
 335                 off_t tail_off;
 336                 off_t inval_start;
 337                 off_t inval_end;
 338                 off_t io_start, io_end;
 339                 int lflag;
 340                 struct rl_entry *invalid_range;
 341
 342                 if (writelimit > fp->ff_size)
 343                         filesize = writelimit;
 344                 else
 345                         filesize = fp->ff_size;
 346
 347                 lflag = (ioflag & IO_SYNC);
 348
 349                 if (uio->uio_offset <= fp->ff_size) {
 350                         zero_off = uio->uio_offset & ~PAGE_MASK_64;
 351
 352                         /* Check to see whether the area between the zero_offset and the start
 353                            of the transfer to see whether is invalid and should be zero-filled
 354                            as part of the transfer:
 355                          */
 356                         if (uio->uio_offset > zero_off) {
 357                                 if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
 358                                         lflag |= IO_HEADZEROFILL;
 359                         }
 360                 } else {
 361                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 362
 363                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 364                            read without being zeroed.  The current last block is filled with zeroes
 365                            if it holds valid data but in all cases merely do a little bookkeeping
 366                            to track the area from the end of the current last page to the start of
 367                            the area actually written.  For the same reason only the bytes up to the
 368                            start of the page where this write will start is invalidated; any remainder
 369                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 370
 371                            Note that inval_start, the start of the page after the current EOF,
 372                            may be past the start of the write, in which case the zeroing
 373                            will be handled by the cluser_write of the actual data.
 374                          */
 375                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 376                         inval_end = uio->uio_offset & ~PAGE_MASK_64;
 377                         zero_off = fp->ff_size;
 378
 379                         if ((fp->ff_size & PAGE_MASK_64) &&
 380                                 (rl_scan(&fp->ff_invalidranges,
 381                                                         eof_page_base,
 382                                                         fp->ff_size - 1,
 383                                                         &invalid_range) != RL_NOOVERLAP)) {
 384                                 /* The page containing the EOF is not valid, so the
 385                                    entire page must be made inaccessible now.  If the write
 386                                    starts on a page beyond the page containing the eof
 387                                    (inval_end > eof_page_base), add the
 388                                    whole page to the range to be invalidated.  Otherwise
 389                                    (i.e. if the write starts on the same page), zero-fill
 390                                    the entire page explicitly now:
 391                                  */
 392                                 if (inval_end > eof_page_base) {
 393                                         inval_start = eof_page_base;
 394                                 } else {
 395                                         zero_off = eof_page_base;
 396                                 };
 397                         };
 398
 399                         if (inval_start < inval_end) {
 400                                 /* There's some range of data that's going to be marked invalid */
 401
 402                                 if (zero_off < inval_start) {
 403                                         /* The pages between inval_start and inval_end are going to be invalidated,
 404                                            and the actual write will start on a page past inval_end.  Now's the last
 405                                            chance to zero-fill the page containing the EOF:
 406                                          */
 407                                         retval = cluster_write(vp, (struct uio *) 0,
 408                                                         fp->ff_size, inval_start,
 409                                                         zero_off, (off_t)0, devBlockSize,
 410                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 411                                         if (retval) goto ioerr_exit;
 412                                 };
 413
 414                                 /* Mark the remaining area of the newly allocated space as invalid: */
 415                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 416                                 cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
 417                                 zero_off = fp->ff_size = inval_end;
 418                         };
 419
 420                         if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
 421                 };
 422
 423                 /* Check to see whether the area between the end of the write and the end of
 424                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 425                  */
 426                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 427                 if (tail_off > filesize) tail_off = filesize;
 428                 if (tail_off > writelimit) {
 429                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 430                                 lflag |= IO_TAILZEROFILL;
 431                         };
 432                 };
 433
 434                 /*
 435                  * if the write starts beyond the current EOF (possibly advanced in the
 436                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 437                  * to where the write begins:
 438                  *
 439                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 440                  *       before the current EOF it might be marked as invalid now and must be
 441                  *       made readable (removed from the invalid ranges) before cluster_write
 442                  *       tries to write it:
 443                  */
 444                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
 445                 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 446                 if (io_start < fp->ff_size) {
 447                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 448                 };
 449                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 450                                 tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
 451
 452                 if (uio->uio_offset > fp->ff_size) {
 453                         fp->ff_size = uio->uio_offset;
 454
 455                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 456                 }
 457                 if (resid > uio->uio_resid)
 458                         cp->c_flag |= C_CHANGE | C_UPDATE;
 459         }
 460
 461         HFS_KNOTE(vp, NOTE_WRITE);
 462
 463 ioerr_exit:
 464         /*
 465          * If we successfully wrote any data, and we are not the superuser
 466          * we clear the setuid and setgid bits as a precaution against
 467          * tampering.
 468          */
 469         if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 470                 cp->c_mode &= ~(S_ISUID | S_ISGID);
 471
 472         if (retval) {
 473                 if (ioflag & IO_UNIT) {
 474                         (void)VOP_TRUNCATE(vp, origFileSize,
 475                                 ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
 476                         uio->uio_offset -= resid - uio->uio_resid;
 477                         uio->uio_resid = resid;
 478                         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 479                 }
 480         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 481                 tv = time;
 482                 retval = VOP_UPDATE(vp, &tv, &tv, 1);
 483         }
 484         vcb->vcbWrCnt++;
 485
 486         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 487                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 488
 489         return (retval);
 490 }
 491
 492
 493 #ifdef HFS_SPARSE_DEV
 494 struct hfs_backingstoreinfo {
 495         int  signature;   /* == 3419115 */
 496         int  version;     /* version of this struct (1) */
 497         int  backingfd;   /* disk image file (on backing fs) */
 498         int  bandsize;    /* sparse disk image band size */
 499 };
 500
 501 #define HFSIOC_SETBACKINGSTOREINFO   _IOW('h', 7, struct hfs_backingstoreinfo)
 502 #define HFSIOC_CLRBACKINGSTOREINFO   _IO('h', 8)
 503
 504 #define HFS_SETBACKINGSTOREINFO  IOCBASECMD(HFSIOC_SETBACKINGSTOREINFO)
 505 #define HFS_CLRBACKINGSTOREINFO  IOCBASECMD(HFSIOC_CLRBACKINGSTOREINFO)
 506
 507 #endif /* HFS_SPARSE_DEV */
 508
 509 /*
 510
 511 #% ioctl        vp      U U U
 512 #
 513  vop_ioctl {
 514      IN struct vnode *vp;
 515      IN u_long command;
 516      IN caddr_t data;
 517      IN int fflag;
 518      IN struct ucred *cred;
 519      IN struct proc *p;
 520
 521      */
 522
 523
 524 /* ARGSUSED */
 525 int
 526 hfs_ioctl(ap)
 527         struct vop_ioctl_args /* {
 528                 struct vnode *a_vp;
 529                 int  a_command;
 530                 caddr_t  a_data;
 531                 int  a_fflag;
 532                 struct ucred *a_cred;
 533                 struct proc *a_p;
 534         } */ *ap;
 535 {
 536         switch (ap->a_command) {
 537
 538 #ifdef HFS_SPARSE_DEV
 539         case HFS_SETBACKINGSTOREINFO: {
 540                 struct hfsmount * hfsmp;
 541                 struct vnode * bsfs_rootvp;
 542                 struct vnode * di_vp;
 543                 struct file * di_fp;
 544                 struct hfs_backingstoreinfo *bsdata;
 545                 int error = 0;
 546
 547                 hfsmp = VTOHFS(ap->a_vp);
 548                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 549                         return (EALREADY);
 550                 }
 551                 if (ap->a_p->p_ucred->cr_uid != 0 &&
 552                         ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
 553                         return (EACCES); /* must be owner of file system */
 554                 }
 555                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 556                 if (bsdata == NULL) {
 557                         return (EINVAL);
 558                 }
 559                 if (error = fdgetf(ap->a_p, bsdata->backingfd, &di_fp)) {
 560                         return (error);
 561                 }
 562                 if (fref(di_fp) == -1) {
 563                         return (EBADF);
 564                 }
 565                 if (di_fp->f_type != DTYPE_VNODE) {
 566                         frele(di_fp);
 567                         return (EINVAL);
 568                 }
 569                 di_vp = (struct vnode *)di_fp->f_data;
 570                 if (ap->a_vp->v_mount == di_vp->v_mount) {
 571                         frele(di_fp);
 572                         return (EINVAL);
 573                 }
 574
 575                 /*
 576                  * Obtain the backing fs root vnode and keep a reference
 577                  * on it.  This reference will be dropped in hfs_unmount.
 578                  */
 579                 error = VFS_ROOT(di_vp->v_mount, &bsfs_rootvp);
 580                 if (error) {
 581                         frele(di_fp);
 582                         return (error);
 583                 }
 584                 VOP_UNLOCK(bsfs_rootvp, 0, ap->a_p);  /* Hold on to the reference */
 585
 586                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 587                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 588                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 589                 hfsmp->hfs_sparsebandblks *= 4;
 590
 591                 frele(di_fp);
 592                 return (0);
 593         }
 594         case HFS_CLRBACKINGSTOREINFO: {
 595                 struct hfsmount * hfsmp;
 596                 struct vnode * tmpvp;
 597
 598                 hfsmp = VTOHFS(ap->a_vp);
 599                 if (ap->a_p->p_ucred->cr_uid != 0 &&
 600                         ap->a_p->p_ucred->cr_uid != (HFSTOVFS(hfsmp))->mnt_stat.f_owner) {
 601                         return (EACCES); /* must be owner of file system */
 602                 }
 603                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 604                     hfsmp->hfs_backingfs_rootvp) {
 605
 606                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 607                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 608                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 609                         hfsmp->hfs_sparsebandblks = 0;
 610                         vrele(tmpvp);
 611                 }
 612                 return (0);
 613         }
 614 #endif /* HFS_SPARSE_DEV */
 615
 616         case 6: {
 617                 int error;
 618
 619                 ap->a_vp->v_flag |= VFULLFSYNC;
 620                 error = VOP_FSYNC(ap->a_vp, ap->a_cred, MNT_NOWAIT, ap->a_p);
 621                 ap->a_vp->v_flag &= ~VFULLFSYNC;
 622
 623                 return error;
 624         }
 625         case 5: {
 626                 register struct vnode *vp;
 627                 register struct cnode *cp;
 628                 struct filefork *fp;
 629                 int error;
 630
 631                 vp = ap->a_vp;
 632                 cp = VTOC(vp);
 633                 fp = VTOF(vp);
 634
 635                 if (vp->v_type != VREG)
 636                         return EINVAL;
 637
 638                 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
 639                 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 640                 if (error)
 641                         return (error);
 642
 643                 /*
 644                  * used by regression test to determine if
 645                  * all the dirty pages (via write) have been cleaned
 646                  * after a call to 'fsysnc'.
 647                  */
 648                 error = is_file_clean(vp, fp->ff_size);
 649                 VOP_UNLOCK(vp, 0, ap->a_p);
 650
 651                 return (error);
 652         }
 653
 654         case 1: {
 655                 register struct vnode *vp;
 656                 register struct radvisory *ra;
 657                 register struct cnode *cp;
 658                 struct filefork *fp;
 659                 int devBlockSize = 0;
 660                 int error;
 661
 662                 vp = ap->a_vp;
 663
 664                 if (vp->v_type != VREG)
 665                         return EINVAL;
 666
 667                 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
 668                 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 669                 if (error)
 670                         return (error);
 671
 672                 ra = (struct radvisory *)(ap->a_data);
 673                 cp = VTOC(vp);
 674                 fp = VTOF(vp);
 675
 676                 if (ra->ra_offset >= fp->ff_size) {
 677                         VOP_UNLOCK(vp, 0, ap->a_p);
 678                         return (EFBIG);
 679                 }
 680                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 681
 682                 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
 683                 VOP_UNLOCK(vp, 0, ap->a_p);
 684
 685                 return (error);
 686         }
 687
 688         case 2: /* F_READBOOTBLOCKS */
 689         case 3: /* F_WRITEBOOTBLOCKS */
 690             {
 691             struct vnode *vp = ap->a_vp;
 692             struct vnode *devvp = NULL;
 693             struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
 694             int devBlockSize;
 695             int error;
 696             struct iovec aiov;
 697             struct uio auio;
 698             u_long blockNumber;
 699             u_long blockOffset;
 700             u_long xfersize;
 701             struct buf *bp;
 702
 703             if ((vp->v_flag & VROOT) == 0) return EINVAL;
 704             if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
 705
 706             devvp = VTOHFS(vp)->hfs_devvp;
 707             aiov.iov_base = btd->fbt_buffer;
 708             aiov.iov_len = btd->fbt_length;
 709
 710             auio.uio_iov = &aiov;
 711             auio.uio_iovcnt = 1;
 712             auio.uio_offset = btd->fbt_offset;
 713             auio.uio_resid = btd->fbt_length;
 714             auio.uio_segflg = UIO_USERSPACE;
 715             auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
 716             auio.uio_procp = ap->a_p;
 717
 718             VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
 719
 720             while (auio.uio_resid > 0) {
 721               blockNumber = auio.uio_offset / devBlockSize;
 722               error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
 723               if (error) {
 724                   if (bp) brelse(bp);
 725                   return error;
 726                 };
 727
 728                 blockOffset = auio.uio_offset % devBlockSize;
 729               xfersize = devBlockSize - blockOffset;
 730               error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
 731                 if (error) {
 732                   brelse(bp);
 733                   return error;
 734                 };
 735                 if (auio.uio_rw == UIO_WRITE) {
 736                   error = VOP_BWRITE(bp);
 737                   if (error) return error;
 738                 } else {
 739                   brelse(bp);
 740                 };
 741             };
 742         };
 743         return 0;
 744
 745         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
 746             {
 747             *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
 748             return 0;
 749             }
 750
 751         default:
 752             return (ENOTTY);
 753     }
 754
 755     /* Should never get here */
 756         return 0;
 757 }
 758
 759 /* ARGSUSED */
 760 int
 761 hfs_select(ap)
 762         struct vop_select_args /* {
 763                 struct vnode *a_vp;
 764                 int  a_which;
 765                 int  a_fflags;
 766                 struct ucred *a_cred;
 767                 void *a_wql;
 768                 struct proc *a_p;
 769         } */ *ap;
 770 {
 771         /*
 772          * We should really check to see if I/O is possible.
 773          */
 774         return (1);
 775 }
 776
 777 /*
 778  * Bmap converts a the logical block number of a file to its physical block
 779  * number on the disk.
 780  */
 781
 782 /*
 783  * vp  - address of vnode file the file
 784  * bn  - which logical block to convert to a physical block number.
 785  * vpp - returns the vnode for the block special file holding the filesystem
 786  *       containing the file of interest
 787  * bnp - address of where to return the filesystem physical block number
 788 #% bmap         vp      L L L
 789 #% bmap         vpp     - U -
 790 #
 791  vop_bmap {
 792      IN struct vnode *vp;
 793      IN daddr_t bn;
 794      OUT struct vnode **vpp;
 795      IN daddr_t *bnp;
 796      OUT int *runp;
 797      */
 798 /*
 799  * Converts a logical block number to a physical block, and optionally returns
 800  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
 801  * The physical block number is based on the device block size, currently its 512.
 802  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
 803  */
 804
 805 int
 806 hfs_bmap(ap)
 807         struct vop_bmap_args /* {
 808                 struct vnode *a_vp;
 809                 daddr_t a_bn;
 810                 struct vnode **a_vpp;
 811                 daddr_t *a_bnp;
 812                 int *a_runp;
 813         } */ *ap;
 814 {
 815         struct vnode *vp = ap->a_vp;
 816         struct cnode *cp = VTOC(vp);
 817         struct filefork *fp = VTOF(vp);
 818         struct hfsmount *hfsmp = VTOHFS(vp);
 819    int                                  retval = E_NONE;
 820     daddr_t                             logBlockSize;
 821     size_t                              bytesContAvail = 0;
 822     off_t blockposition;
 823     struct proc                 *p = NULL;
 824     int                                 lockExtBtree;
 825     struct rl_entry *invalid_range;
 826     enum rl_overlaptype overlaptype;
 827
 828         /*
 829          * Check for underlying vnode requests and ensure that logical
 830          * to physical mapping is requested.
 831          */
 832         if (ap->a_vpp != NULL)
 833                 *ap->a_vpp = cp->c_devvp;
 834         if (ap->a_bnp == NULL)
 835                 return (0);
 836
 837         /* Only clustered I/O should have delayed allocations. */
 838         DBG_ASSERT(fp->ff_unallocblocks == 0);
 839
 840         logBlockSize = GetLogicalBlockSize(vp);
 841         blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
 842
 843         lockExtBtree = overflow_extents(fp);
 844         if (lockExtBtree) {
 845                 p = current_proc();
 846                 retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
 847                                 LK_EXCLUSIVE | LK_CANRECURSE, p);
 848                 if (retval)
 849                         return (retval);
 850         }
 851
 852         retval = MacToVFSError(
 853                             MapFileBlockC (HFSTOVCB(hfsmp),
 854                                             (FCB*)fp,
 855                                             MAXPHYSIO,
 856                                             blockposition,
 857                                             ap->a_bnp,
 858                                             &bytesContAvail));
 859
 860     if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
 861
 862     if (retval == E_NONE) {
 863         /* Adjust the mapping information for invalid file ranges: */
 864         overlaptype = rl_scan(&fp->ff_invalidranges,
 865                             blockposition,
 866                             blockposition + MAXPHYSIO - 1,
 867                             &invalid_range);
 868         if (overlaptype != RL_NOOVERLAP) {
 869             switch(overlaptype) {
 870                 case RL_MATCHINGOVERLAP:
 871                 case RL_OVERLAPCONTAINSRANGE:
 872                 case RL_OVERLAPSTARTSBEFORE:
 873                     /* There's no valid block for this byte offset: */
 874                     *ap->a_bnp = (daddr_t)-1;
 875                     bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 876                     break;
 877
 878                 case RL_OVERLAPISCONTAINED:
 879                 case RL_OVERLAPENDSAFTER:
 880                     /* The range of interest hits an invalid block before the end: */
 881                     if (invalid_range->rl_start == blockposition) {
 882                         /* There's actually no valid information to be had starting here: */
 883                         *ap->a_bnp = (daddr_t)-1;
 884                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
 885                                                         (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
 886                                 bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 887                         };
 888                     } else {
 889                         bytesContAvail = invalid_range->rl_start - blockposition;
 890                     };
 891                     break;
 892             };
 893                         if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
 894         };
 895
 896         /* Figure out how many read ahead blocks there are */
 897         if (ap->a_runp != NULL) {
 898             if (can_cluster(logBlockSize)) {
 899                 /* Make sure this result never goes negative: */
 900                 *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
 901             } else {
 902                 *ap->a_runp = 0;
 903             };
 904         };
 905     };
 906
 907     return (retval);
 908 }
 909
 910 /* blktooff converts logical block number to file offset */
 911
 912 int
 913 hfs_blktooff(ap)
 914         struct vop_blktooff_args /* {
 915                 struct vnode *a_vp;
 916                 daddr_t a_lblkno;
 917                 off_t *a_offset;
 918         } */ *ap;
 919 {
 920         if (ap->a_vp == NULL)
 921                 return (EINVAL);
 922         *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
 923
 924         return(0);
 925 }
 926
 927 int
 928 hfs_offtoblk(ap)
 929         struct vop_offtoblk_args /* {
 930                 struct vnode *a_vp;
 931                 off_t a_offset;
 932                 daddr_t *a_lblkno;
 933         } */ *ap;
 934 {
 935         if (ap->a_vp == NULL)
 936                 return (EINVAL);
 937         *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
 938
 939         return(0);
 940 }
 941
 942 int
 943 hfs_cmap(ap)
 944         struct vop_cmap_args /* {
 945                 struct vnode *a_vp;
 946                 off_t a_foffset;
 947                 size_t a_size;
 948                 daddr_t *a_bpn;
 949                 size_t *a_run;
 950                 void *a_poff;
 951         } */ *ap;
 952 {
 953     struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
 954     struct filefork *fp = VTOF(ap->a_vp);
 955     size_t                              bytesContAvail = 0;
 956     int                 retval = E_NONE;
 957     int lockExtBtree = 0;
 958     struct proc         *p = NULL;
 959     struct rl_entry *invalid_range;
 960     enum rl_overlaptype overlaptype;
 961     int started_tr = 0, grabbed_lock = 0;
 962         struct timeval tv;
 963
 964         /*
 965          * Check for underlying vnode requests and ensure that logical
 966          * to physical mapping is requested.
 967          */
 968         if (ap->a_bpn == NULL)
 969                 return (0);
 970
 971         p = current_proc();
 972
 973         if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP)) {
 974                 /*
 975                  * File blocks are getting remapped. Wait until its finished.
 976                  */
 977                 SET(VTOC(ap->a_vp)->c_flag, C_WBLKMAP);
 978                 (void) tsleep((caddr_t)VTOC(ap->a_vp), PINOD, "hfs_cmap", 0);
 979                 if (ISSET(VTOC(ap->a_vp)->c_flag, C_NOBLKMAP))
 980                         panic("hfs_cmap: no mappable blocks");
 981         }
 982
 983   retry:
 984         if (fp->ff_unallocblocks) {
 985                 lockExtBtree = 1;
 986
 987                 // XXXdbg
 988                 hfs_global_shared_lock_acquire(hfsmp);
 989                 grabbed_lock = 1;
 990
 991                 if (hfsmp->jnl) {
 992                         if (journal_start_transaction(hfsmp->jnl) != 0) {
 993                                 hfs_global_shared_lock_release(hfsmp);
 994                                 return EINVAL;
 995                         } else {
 996                                 started_tr = 1;
 997                         }
 998                 }
 999
1000                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1001                         if (started_tr) {
1002                                 journal_end_transaction(hfsmp->jnl);
1003                         }
1004                         if (grabbed_lock) {
1005                                 hfs_global_shared_lock_release(hfsmp);
1006                         }
1007                         return (retval);
1008                 }
1009         } else if (overflow_extents(fp)) {
1010                 lockExtBtree = 1;
1011                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1012                         return retval;
1013                 }
1014         }
1015
1016         /*
1017          * Check for any delayed allocations.
1018          */
1019         if (fp->ff_unallocblocks) {
1020                 SInt64 reqbytes, actbytes;
1021
1022                 //
1023                 // Make sure we have a transaction.  It's possible
1024                 // that we came in and fp->ff_unallocblocks was zero
1025                 // but during the time we blocked acquiring the extents
1026                 // btree, ff_unallocblocks became non-zero and so we
1027                 // will need to start a transaction.
1028                 //
1029                 if (hfsmp->jnl && started_tr == 0) {
1030                     if (lockExtBtree) {
1031                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1032                         lockExtBtree = 0;
1033                     }
1034
1035                     goto retry;
1036                 }
1037
1038                 reqbytes = (SInt64)fp->ff_unallocblocks *
1039                              (SInt64)HFSTOVCB(hfsmp)->blockSize;
1040                 /*
1041                  * Release the blocks on loan and aquire some real ones.
1042                  * Note that we can race someone else for these blocks
1043                  * (and lose) so cmap needs to handle a failure here.
1044                  * Currently this race can't occur because all allocations
1045                  * are protected by an exclusive lock on the  Extents
1046                  * Overflow file.
1047                  */
1048                 HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
1049                 FTOC(fp)->c_blocks            -= fp->ff_unallocblocks;
1050                 fp->ff_blocks                 -= fp->ff_unallocblocks;
1051                 fp->ff_unallocblocks           = 0;
1052
1053                 /* Files that are changing size are not hot file candidates. */
1054                 if (hfsmp->hfc_stage == HFC_RECORDING) {
1055                         fp->ff_bytesread = 0;
1056                 }
1057                 while (retval == 0 && reqbytes > 0) {
1058                         retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
1059                                         (FCB*)fp, reqbytes, 0,
1060                                         kEFAllMask | kEFNoClumpMask, &actbytes));
1061                         if (retval == 0 && actbytes == 0)
1062                                 retval = ENOSPC;
1063
1064                         if (retval) {
1065                                 fp->ff_unallocblocks =
1066                                         reqbytes / HFSTOVCB(hfsmp)->blockSize;
1067                                 HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
1068                                 FTOC(fp)->c_blocks            += fp->ff_unallocblocks;
1069                                 fp->ff_blocks                 += fp->ff_unallocblocks;
1070                         }
1071                         reqbytes -= actbytes;
1072                 }
1073
1074                 if (retval) {
1075                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1076                         VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
1077                         if (started_tr) {
1078                                 tv = time;
1079                                 VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
1080
1081                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1082                                 journal_end_transaction(hfsmp->jnl);
1083                         }
1084                         if (grabbed_lock) {
1085                                 hfs_global_shared_lock_release(hfsmp);
1086                         }
1087                         return (retval);
1088                 }
1089         }
1090
1091         retval = MacToVFSError(
1092                            MapFileBlockC (HFSTOVCB(hfsmp),
1093                                           (FCB *)fp,
1094                                           ap->a_size,
1095                                           ap->a_foffset,
1096                                           ap->a_bpn,
1097                                           &bytesContAvail));
1098
1099         if (lockExtBtree)
1100                 (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1101
1102         // XXXdbg
1103         if (started_tr) {
1104                 tv = time;
1105                 retval = VOP_UPDATE(ap->a_vp, &tv, &tv, 1);
1106
1107                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1108                 journal_end_transaction(hfsmp->jnl);
1109                 started_tr = 0;
1110         }
1111         if (grabbed_lock) {
1112                 hfs_global_shared_lock_release(hfsmp);
1113                 grabbed_lock = 0;
1114         }
1115
1116     if (retval == E_NONE) {
1117         /* Adjust the mapping information for invalid file ranges: */
1118         overlaptype = rl_scan(&fp->ff_invalidranges,
1119                             ap->a_foffset,
1120                             ap->a_foffset + (off_t)bytesContAvail - 1,
1121                             &invalid_range);
1122         if (overlaptype != RL_NOOVERLAP) {
1123             switch(overlaptype) {
1124                 case RL_MATCHINGOVERLAP:
1125                 case RL_OVERLAPCONTAINSRANGE:
1126                 case RL_OVERLAPSTARTSBEFORE:
1127                     /* There's no valid block for this byte offset: */
1128                     *ap->a_bpn = (daddr_t)-1;
1129
1130                     /* There's no point limiting the amount to be returned if the
1131                        invalid range that was hit extends all the way to the EOF
1132                        (i.e. there's no valid bytes between the end of this range
1133                        and the file's EOF):
1134                      */
1135                     if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1136                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1137                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1138                     };
1139                     break;
1140
1141                 case RL_OVERLAPISCONTAINED:
1142                 case RL_OVERLAPENDSAFTER:
1143                     /* The range of interest hits an invalid block before the end: */
1144                     if (invalid_range->rl_start == ap->a_foffset) {
1145                         /* There's actually no valid information to be had starting here: */
1146                         *ap->a_bpn = (daddr_t)-1;
1147                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1148                                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1149                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1150                         };
1151                     } else {
1152                         bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1153                     };
1154                     break;
1155             };
1156             if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
1157         };
1158
1159         if (ap->a_run) *ap->a_run = bytesContAvail;
1160     };
1161
1162         if (ap->a_poff)
1163                 *(int *)ap->a_poff = 0;
1164
1165         return (retval);
1166 }
1167
1168
1169 /*
1170  * Read or write a buffer that is not contiguous on disk.  We loop over
1171  * each device block, copying to or from caller's buffer.
1172  *
1173  * We could be a bit more efficient by transferring as much data as is
1174  * contiguous.  But since this routine should rarely be called, and that
1175  * would be more complicated; best to keep it simple.
1176  */
1177 static int
1178 hfs_strategy_fragmented(struct buf *bp)
1179 {
1180         register struct vnode *vp = bp->b_vp;
1181         register struct cnode *cp = VTOC(vp);
1182         register struct vnode *devvp = cp->c_devvp;
1183         caddr_t ioaddr;         /* Address of fragment within bp  */
1184         struct buf *frag = NULL; /* For reading or writing a single block */
1185         int retval = 0;
1186         long remaining;         /* Bytes (in bp) left to transfer */
1187         off_t offset;           /* Logical offset of current fragment in vp */
1188         u_long block_size;      /* Size of one device block (and one I/O) */
1189
1190         /* Make sure we redo this mapping for the next I/O */
1191         bp->b_blkno = bp->b_lblkno;
1192
1193         /* Set up the logical position and number of bytes to read/write */
1194         offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
1195         block_size = VTOHFS(vp)->hfs_phys_block_size;
1196
1197         /* Get an empty buffer to do the deblocking */
1198         frag = geteblk(block_size);
1199         if (ISSET(bp->b_flags, B_READ))
1200                 SET(frag->b_flags, B_READ);
1201
1202         for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
1203             ioaddr += block_size, offset += block_size,
1204             remaining -= block_size) {
1205                 frag->b_resid = frag->b_bcount;
1206                 CLR(frag->b_flags, B_DONE);
1207
1208                 /* Map the current position to a physical block number */
1209                 retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
1210                     NULL, NULL);
1211                 if (retval != 0)
1212                         break;
1213
1214                 /*
1215                  * Did we try to read a hole?
1216                  * (Should never happen for metadata!)
1217                  */
1218                 if ((long)frag->b_lblkno == -1) {
1219                         bzero(ioaddr, block_size);
1220                         continue;
1221                 }
1222
1223                 /* If writing, copy before I/O */
1224                 if (!ISSET(bp->b_flags, B_READ))
1225                         bcopy(ioaddr, frag->b_data, block_size);
1226
1227                 /* Call the device to do the I/O and wait for it */
1228                 frag->b_blkno = frag->b_lblkno;
1229                 frag->b_vp = devvp;  /* Used to dispatch via VOP_STRATEGY */
1230                 frag->b_dev = devvp->v_rdev;
1231                 retval = VOP_STRATEGY(frag);
1232                 frag->b_vp = NULL;
1233                 if (retval != 0)
1234                         break;
1235                 retval = biowait(frag);
1236                 if (retval != 0)
1237                         break;
1238
1239                 /* If reading, copy after the I/O */
1240                 if (ISSET(bp->b_flags, B_READ))
1241                         bcopy(frag->b_data, ioaddr, block_size);
1242         }
1243
1244         frag->b_vp = NULL;
1245         //
1246         // XXXdbg - in the case that this is a meta-data block, it won't affect
1247         //          the journal because this bp is for a physical disk block,
1248         //          not a logical block that is part of the catalog or extents
1249         //          files.
1250         SET(frag->b_flags, B_INVAL);
1251         brelse(frag);
1252
1253         if ((bp->b_error = retval) != 0)
1254                 SET(bp->b_flags, B_ERROR);
1255
1256         biodone(bp);    /* This I/O is now complete */
1257         return retval;
1258 }
1259
1260
1261 /*
1262  * Calculate the logical to physical mapping if not done already,
1263  * then call the device strategy routine.
1264 #
1265 #vop_strategy {
1266 #       IN struct buf *bp;
1267     */
1268 int
1269 hfs_strategy(ap)
1270         struct vop_strategy_args /* {
1271                 struct buf *a_bp;
1272         } */ *ap;
1273 {
1274         register struct buf *bp = ap->a_bp;
1275         register struct vnode *vp = bp->b_vp;
1276         register struct cnode *cp = VTOC(vp);
1277         int retval = 0;
1278         off_t offset;
1279         size_t bytes_contig;
1280
1281         if ( !(bp->b_flags & B_VECTORLIST)) {
1282                 if (vp->v_type == VBLK || vp->v_type == VCHR)
1283                         panic("hfs_strategy: device vnode passed!");
1284
1285                 if (bp->b_flags & B_PAGELIST) {
1286                         /*
1287                          * If we have a page list associated with this bp,
1288                          * then go through cluster_bp since it knows how to
1289                          * deal with a page request that might span non-
1290                          * contiguous physical blocks on the disk...
1291                          */
1292                         retval = cluster_bp(bp);
1293                         vp = cp->c_devvp;
1294                         bp->b_dev = vp->v_rdev;
1295
1296                         return (retval);
1297                 }
1298
1299                 /*
1300                  * If we don't already know the filesystem relative block
1301                  * number then get it using VOP_BMAP().  If VOP_BMAP()
1302                  * returns the block number as -1 then we've got a hole in
1303                  * the file.  Although HFS filesystems don't create files with
1304                  * holes, invalidating of subranges of the file (lazy zero
1305                  * filling) may create such a situation.
1306                  */
1307                 if (bp->b_blkno == bp->b_lblkno) {
1308                         offset = (off_t) bp->b_lblkno *
1309                             (off_t) GetLogicalBlockSize(vp);
1310
1311                         if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
1312                             &bp->b_blkno, &bytes_contig, NULL))) {
1313                                 bp->b_error = retval;
1314                                 bp->b_flags |= B_ERROR;
1315                                 biodone(bp);
1316                                 return (retval);
1317                         }
1318                         if (bytes_contig < bp->b_bcount)
1319                         {
1320                                 /*
1321                                  * We were asked to read a block that wasn't
1322                                  * contiguous, so we have to read each of the
1323                                  * pieces and copy them into the buffer.
1324                                  * Since ordinary file I/O goes through
1325                                  * cluster_io (which won't ask us for
1326                                  * discontiguous data), this is probably an
1327                                  * attempt to read or write metadata.
1328                                  */
1329                                 return hfs_strategy_fragmented(bp);
1330                         }
1331                         if ((long)bp->b_blkno == -1)
1332                                 clrbuf(bp);
1333                 }
1334                 if ((long)bp->b_blkno == -1) {
1335                         biodone(bp);
1336                         return (0);
1337                 }
1338                 if (bp->b_validend == 0) {
1339                         /*
1340                          * Record the exact size of the I/O transfer about to
1341                          * be made:
1342                          */
1343                         bp->b_validend = bp->b_bcount;
1344                 }
1345         }
1346         vp = cp->c_devvp;
1347         bp->b_dev = vp->v_rdev;
1348
1349         return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
1350 }
1351
1352
1353 static int do_hfs_truncate(ap)
1354         struct vop_truncate_args /* {
1355                 struct vnode *a_vp;
1356                 off_t a_length;
1357                 int a_flags;
1358                 struct ucred *a_cred;
1359                 struct proc *a_p;
1360         } */ *ap;
1361 {
1362         register struct vnode *vp = ap->a_vp;
1363         register struct cnode *cp = VTOC(vp);
1364         struct filefork *fp = VTOF(vp);
1365         off_t length;
1366         long vflags;
1367         struct timeval tv;
1368         int retval;
1369         off_t bytesToAdd;
1370         off_t actualBytesAdded;
1371         off_t filebytes;
1372         u_long fileblocks;
1373         int blksize;
1374         struct hfsmount *hfsmp;
1375
1376         if (vp->v_type != VREG && vp->v_type != VLNK)
1377                 return (EISDIR);        /* cannot truncate an HFS directory! */
1378
1379         length = ap->a_length;
1380         blksize = VTOVCB(vp)->blockSize;
1381         fileblocks = fp->ff_blocks;
1382         filebytes = (off_t)fileblocks * (off_t)blksize;
1383
1384         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1385                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1386
1387         if (length < 0)
1388                 return (EINVAL);
1389
1390         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1391                 return (EFBIG);
1392
1393         hfsmp = VTOHFS(vp);
1394
1395         tv = time;
1396         retval = E_NONE;
1397
1398         /* Files that are changing size are not hot file candidates. */
1399         if (hfsmp->hfc_stage == HFC_RECORDING) {
1400                 fp->ff_bytesread = 0;
1401         }
1402
1403         /*
1404          * We cannot just check if fp->ff_size == length (as an optimization)
1405          * since there may be extra physical blocks that also need truncation.
1406          */
1407 #if QUOTA
1408         if (retval = hfs_getinoquota(cp))
1409                 return(retval);
1410 #endif /* QUOTA */
1411
1412         /*
1413          * Lengthen the size of the file. We must ensure that the
1414          * last byte of the file is allocated. Since the smallest
1415          * value of ff_size is 0, length will be at least 1.
1416          */
1417         if (length > fp->ff_size) {
1418 #if QUOTA
1419                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1420                                 ap->a_cred, 0);
1421                 if (retval)
1422                         goto Err_Exit;
1423 #endif /* QUOTA */
1424                 /*
1425                  * If we don't have enough physical space then
1426                  * we need to extend the physical size.
1427                  */
1428                 if (length > filebytes) {
1429                         int eflags;
1430                         u_long blockHint = 0;
1431
1432                         /* All or nothing and don't round up to clumpsize. */
1433                         eflags = kEFAllMask | kEFNoClumpMask;
1434
1435                         if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
1436                                 eflags |= kEFReserveMask;  /* keep a reserve */
1437
1438                         /*
1439                          * Allocate Journal and Quota files in metadata zone.
1440                          */
1441                         if (filebytes == 0 &&
1442                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1443                             hfs_virtualmetafile(cp)) {
1444                                 eflags |= kEFMetadataMask;
1445                                 blockHint = hfsmp->hfs_metazone_start;
1446                         }
1447                         // XXXdbg
1448                         hfs_global_shared_lock_acquire(hfsmp);
1449                         if (hfsmp->jnl) {
1450                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1451                                         retval = EINVAL;
1452                                         goto Err_Exit;
1453                                 }
1454                         }
1455
1456                         /* lock extents b-tree (also protects volume bitmap) */
1457                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1458                         if (retval) {
1459                                 if (hfsmp->jnl) {
1460                                         journal_end_transaction(hfsmp->jnl);
1461                                 }
1462                                 hfs_global_shared_lock_release(hfsmp);
1463
1464                                 goto Err_Exit;
1465                         }
1466
1467                         while ((length > filebytes) && (retval == E_NONE)) {
1468                                 bytesToAdd = length - filebytes;
1469                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1470                                                     (FCB*)fp,
1471                                                     bytesToAdd,
1472                                                     blockHint,
1473                                                     eflags,
1474                                                     &actualBytesAdded));
1475
1476                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1477                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1478                                         if (length > filebytes)
1479                                                 length = filebytes;
1480                                         break;
1481                                 }
1482                         } /* endwhile */
1483
1484                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1485
1486                         // XXXdbg
1487                         if (hfsmp->jnl) {
1488                                 tv = time;
1489                                 VOP_UPDATE(vp, &tv, &tv, 1);
1490
1491                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1492                                 journal_end_transaction(hfsmp->jnl);
1493                         }
1494                         hfs_global_shared_lock_release(hfsmp);
1495
1496                         if (retval)
1497                                 goto Err_Exit;
1498
1499                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1500                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1501                 }
1502
1503                 if (!(ap->a_flags & IO_NOZEROFILL)) {
1504                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1505                                 struct rl_entry *invalid_range;
1506                                 int devBlockSize;
1507                                 off_t zero_limit;
1508
1509                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1510                                 if (length < zero_limit) zero_limit = length;
1511
1512                                 if (length > fp->ff_size) {
1513                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1514                                         if ((fp->ff_size & PAGE_MASK_64) &&
1515                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1516                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1517
1518                                                 /* There's some valid data at the start of the (current) last page
1519                                                    of the file, so zero out the remainder of that page to ensure the
1520                                                    entire page contains valid data.  Since there is no invalid range
1521                                                    possible past the (current) eof, there's no need to remove anything
1522                                                    from the invalid range list before calling cluster_write():                                           */
1523                                                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1524                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1525                                                                 fp->ff_size, (off_t)0, devBlockSize,
1526                                                                 (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1527                                                 if (retval) goto Err_Exit;
1528
1529                                                 /* Merely invalidate the remaining area, if necessary: */
1530                                                 if (length > zero_limit) {
1531                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1532                                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1533                                                 }
1534                                         } else {
1535                                         /* The page containing the (current) eof is invalid: just add the
1536                                            remainder of the page to the invalid list, along with the area
1537                                            being newly allocated:
1538                                          */
1539                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1540                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1541                                         };
1542                                 }
1543                         } else {
1544                                         panic("hfs_truncate: invoked on non-UBC object?!");
1545                         };
1546                 }
1547                 cp->c_flag |= C_UPDATE;
1548                 fp->ff_size = length;
1549
1550                 if (UBCISVALID(vp))
1551                         ubc_setsize(vp, fp->ff_size);   /* XXX check errors */
1552
1553         } else { /* Shorten the size of the file */
1554
1555                 if (fp->ff_size > length) {
1556                         /*
1557                          * Any buffers that are past the truncation point need to be
1558                          * invalidated (to maintain buffer cache consistency).  For
1559                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1560                          */
1561                         if (UBCISVALID(vp))
1562                                 ubc_setsize(vp, length); /* XXX check errors */
1563
1564                         vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA;
1565                         retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1566
1567                         /* Any space previously marked as invalid is now irrelevant: */
1568                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
1569                 }
1570
1571                 /*
1572                  * Account for any unmapped blocks. Note that the new
1573                  * file length can still end up with unmapped blocks.
1574                  */
1575                 if (fp->ff_unallocblocks > 0) {
1576                         u_int32_t finalblks;
1577
1578                         /* lock extents b-tree */
1579                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1580                                         LK_EXCLUSIVE, ap->a_p);
1581                         if (retval)
1582                                 goto Err_Exit;
1583
1584                         VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
1585                         cp->c_blocks             -= fp->ff_unallocblocks;
1586                         fp->ff_blocks            -= fp->ff_unallocblocks;
1587                         fp->ff_unallocblocks      = 0;
1588
1589                         finalblks = (length + blksize - 1) / blksize;
1590                         if (finalblks > fp->ff_blocks) {
1591                                 /* calculate required unmapped blocks */
1592                                 fp->ff_unallocblocks      = finalblks - fp->ff_blocks;
1593                                 VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
1594                                 cp->c_blocks             += fp->ff_unallocblocks;
1595                                 fp->ff_blocks            += fp->ff_unallocblocks;
1596                         }
1597                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1598                                         LK_RELEASE, ap->a_p);
1599                 }
1600
1601                 /*
1602                  * For a TBE process the deallocation of the file blocks is
1603                  * delayed until the file is closed.  And hfs_close calls
1604                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
1605                  * isn't set, we make sure this isn't a TBE process.
1606                  */
1607                 if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
1608 #if QUOTA
1609                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
1610 #endif /* QUOTA */
1611                   // XXXdbg
1612                   hfs_global_shared_lock_acquire(hfsmp);
1613                         if (hfsmp->jnl) {
1614                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1615                                         retval = EINVAL;
1616                                         goto Err_Exit;
1617                                 }
1618                         }
1619
1620                         /* lock extents b-tree (also protects volume bitmap) */
1621                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1622                         if (retval) {
1623                                 if (hfsmp->jnl) {
1624                                         journal_end_transaction(hfsmp->jnl);
1625                                 }
1626                                 hfs_global_shared_lock_release(hfsmp);
1627                                 goto Err_Exit;
1628                         }
1629
1630                         if (fp->ff_unallocblocks == 0)
1631                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
1632                                                 (FCB*)fp, length, false));
1633
1634                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1635
1636                         // XXXdbg
1637                         if (hfsmp->jnl) {
1638                                 tv = time;
1639                                 VOP_UPDATE(vp, &tv, &tv, 1);
1640
1641                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1642                                 journal_end_transaction(hfsmp->jnl);
1643                         }
1644                         hfs_global_shared_lock_release(hfsmp);
1645
1646                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1647                         if (retval)
1648                                 goto Err_Exit;
1649 #if QUOTA
1650                         /* These are bytesreleased */
1651                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
1652 #endif /* QUOTA */
1653                 }
1654                 /* Only set update flag if the logical length changes */
1655                 if (fp->ff_size != length)
1656                         cp->c_flag |= C_UPDATE;
1657                 fp->ff_size = length;
1658         }
1659         cp->c_flag |= C_CHANGE;
1660         retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1661         if (retval) {
1662                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1663                      -1, -1, -1, retval, 0);
1664         }
1665
1666 Err_Exit:
1667
1668         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
1669                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1670
1671         return (retval);
1672 }
1673
1674
1675 /*
1676 #
1677 #% truncate     vp      L L L
1678 #
1679 vop_truncate {
1680     IN struct vnode *vp;
1681     IN off_t length;
1682     IN int flags;       (IO_SYNC)
1683     IN struct ucred *cred;
1684     IN struct proc *p;
1685 };
1686  * Truncate a cnode to at most length size, freeing (or adding) the
1687  * disk blocks.
1688  */
1689 int hfs_truncate(ap)
1690         struct vop_truncate_args /* {
1691                 struct vnode *a_vp;
1692                 off_t a_length;
1693                 int a_flags;
1694                 struct ucred *a_cred;
1695                 struct proc *a_p;
1696         } */ *ap;
1697 {
1698         register struct vnode *vp = ap->a_vp;
1699         register struct cnode *cp = VTOC(vp);
1700         struct filefork *fp = VTOF(vp);
1701         off_t length;
1702         off_t filebytes;
1703         u_long fileblocks;
1704         int blksize, error;
1705         u_int64_t nsize;
1706
1707         if (vp->v_type != VREG && vp->v_type != VLNK)
1708                 return (EISDIR);        /* cannot truncate an HFS directory! */
1709
1710         length = ap->a_length;
1711         blksize = VTOVCB(vp)->blockSize;
1712         fileblocks = fp->ff_blocks;
1713         filebytes = (off_t)fileblocks * (off_t)blksize;
1714
1715         // have to loop truncating or growing files that are
1716         // really big because otherwise transactions can get
1717         // enormous and consume too many kernel resources.
1718         if (length < filebytes && (filebytes - length) > HFS_BIGFILE_SIZE) {
1719             while (filebytes > length) {
1720                 if ((filebytes - length) > HFS_BIGFILE_SIZE) {
1721                     filebytes -= HFS_BIGFILE_SIZE;
1722                 } else {
1723                     filebytes = length;
1724                 }
1725
1726                 ap->a_length = filebytes;
1727                 error = do_hfs_truncate(ap);
1728                 if (error)
1729                     break;
1730             }
1731         } else if (length > filebytes && (length - filebytes) > HFS_BIGFILE_SIZE) {
1732             while (filebytes < length) {
1733                 if ((length - filebytes) > HFS_BIGFILE_SIZE) {
1734                     filebytes += HFS_BIGFILE_SIZE;
1735                 } else {
1736                     filebytes = (length - filebytes);
1737                 }
1738
1739                 ap->a_length = filebytes;
1740                 error = do_hfs_truncate(ap);
1741                 if (error)
1742                     break;
1743             }
1744         } else {
1745             error = do_hfs_truncate(ap);
1746         }
1747
1748         return error;
1749 }
1750
1751
1752
1753 /*
1754 #
1755 #% allocate     vp      L L L
1756 #
1757 vop_allocate {
1758         IN struct vnode *vp;
1759         IN off_t length;
1760         IN int flags;
1761         OUT off_t *bytesallocated;
1762         IN off_t offset;
1763         IN struct ucred *cred;
1764         IN struct proc *p;
1765 };
1766  * allocate a cnode to at most length size
1767  */
1768 int hfs_allocate(ap)
1769         struct vop_allocate_args /* {
1770                 struct vnode *a_vp;
1771                 off_t a_length;
1772                 u_int32_t  a_flags;
1773                 off_t *a_bytesallocated;
1774                 off_t a_offset;
1775                 struct ucred *a_cred;
1776                 struct proc *a_p;
1777         } */ *ap;
1778 {
1779         struct vnode *vp = ap->a_vp;
1780         struct cnode *cp = VTOC(vp);
1781         struct filefork *fp = VTOF(vp);
1782         ExtendedVCB *vcb = VTOVCB(vp);
1783         off_t length = ap->a_length;
1784         off_t startingPEOF;
1785         off_t moreBytesRequested;
1786         off_t actualBytesAdded;
1787         off_t filebytes;
1788         u_long fileblocks;
1789         long vflags;
1790         struct timeval tv;
1791         int retval, retval2;
1792         UInt32 blockHint;
1793         UInt32 extendFlags;   /* For call to ExtendFileC */
1794         struct hfsmount *hfsmp;
1795
1796         hfsmp = VTOHFS(vp);
1797
1798         *(ap->a_bytesallocated) = 0;
1799         fileblocks = fp->ff_blocks;
1800         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
1801
1802         if (length < (off_t)0)
1803                 return (EINVAL);
1804         if (vp->v_type != VREG)
1805                 return (EISDIR);
1806         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes))
1807                 return (EINVAL);
1808
1809         /* Fill in the flags word for the call to Extend the file */
1810
1811         extendFlags = kEFNoClumpMask;
1812         if (ap->a_flags & ALLOCATECONTIG)
1813                 extendFlags |= kEFContigMask;
1814         if (ap->a_flags & ALLOCATEALL)
1815                 extendFlags |= kEFAllMask;
1816         if (ap->a_cred && suser(ap->a_cred, NULL) != 0)
1817                 extendFlags |= kEFReserveMask;
1818
1819         tv = time;
1820         retval = E_NONE;
1821         blockHint = 0;
1822         startingPEOF = filebytes;
1823
1824         if (ap->a_flags & ALLOCATEFROMPEOF)
1825                 length += filebytes;
1826         else if (ap->a_flags & ALLOCATEFROMVOL)
1827                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1828
1829         /* If no changes are necesary, then we're done */
1830         if (filebytes == length)
1831                 goto Std_Exit;
1832
1833         /*
1834          * Lengthen the size of the file. We must ensure that the
1835          * last byte of the file is allocated. Since the smallest
1836          * value of filebytes is 0, length will be at least 1.
1837          */
1838         if (length > filebytes) {
1839                 moreBytesRequested = length - filebytes;
1840
1841 #if QUOTA
1842                 retval = hfs_chkdq(cp,
1843                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
1844                                 ap->a_cred, 0);
1845                 if (retval)
1846                         return (retval);
1847
1848 #endif /* QUOTA */
1849                 /*
1850                  * Metadata zone checks.
1851                  */
1852                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
1853                         /*
1854                          * Allocate Journal and Quota files in metadata zone.
1855                          */
1856                         if (hfs_virtualmetafile(cp)) {
1857                                 extendFlags |= kEFMetadataMask;
1858                                 blockHint = hfsmp->hfs_metazone_start;
1859                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
1860                                    (blockHint <= hfsmp->hfs_metazone_end)) {
1861                                 /*
1862                                  * Move blockHint outside metadata zone.
1863                                  */
1864                                 blockHint = hfsmp->hfs_metazone_end + 1;
1865                         }
1866                 }
1867
1868                 // XXXdbg
1869                 hfs_global_shared_lock_acquire(hfsmp);
1870                 if (hfsmp->jnl) {
1871                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1872                                 retval = EINVAL;
1873                                 goto Err_Exit;
1874                         }
1875                 }
1876
1877                 /* lock extents b-tree (also protects volume bitmap) */
1878                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1879                 if (retval) {
1880                         if (hfsmp->jnl) {
1881                                 journal_end_transaction(hfsmp->jnl);
1882                         }
1883                         hfs_global_shared_lock_release(hfsmp);
1884                         goto Err_Exit;
1885                 }
1886
1887                 retval = MacToVFSError(ExtendFileC(vcb,
1888                                                 (FCB*)fp,
1889                                                 moreBytesRequested,
1890                                                 blockHint,
1891                                                 extendFlags,
1892                                                 &actualBytesAdded));
1893
1894                 *(ap->a_bytesallocated) = actualBytesAdded;
1895                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
1896
1897                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1898
1899                 // XXXdbg
1900                 if (hfsmp->jnl) {
1901                         tv = time;
1902                         VOP_UPDATE(vp, &tv, &tv, 1);
1903
1904                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1905                         journal_end_transaction(hfsmp->jnl);
1906                 }
1907                 hfs_global_shared_lock_release(hfsmp);
1908
1909                 /*
1910                  * if we get an error and no changes were made then exit
1911                  * otherwise we must do the VOP_UPDATE to reflect the changes
1912                  */
1913                 if (retval && (startingPEOF == filebytes))
1914                         goto Err_Exit;
1915
1916                 /*
1917                  * Adjust actualBytesAdded to be allocation block aligned, not
1918                  * clump size aligned.
1919                  * NOTE: So what we are reporting does not affect reality
1920                  * until the file is closed, when we truncate the file to allocation
1921                  * block size.
1922                  */
1923                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
1924                         *(ap->a_bytesallocated) =
1925                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
1926
1927         } else { /* Shorten the size of the file */
1928
1929                 if (fp->ff_size > length) {
1930                         /*
1931                          * Any buffers that are past the truncation point need to be
1932                          * invalidated (to maintain buffer cache consistency).  For
1933                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1934                          */
1935                         vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
1936                         (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1937                 }
1938
1939                 // XXXdbg
1940                 hfs_global_shared_lock_acquire(hfsmp);
1941                 if (hfsmp->jnl) {
1942                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1943                                 retval = EINVAL;
1944                                 goto Err_Exit;
1945                         }
1946                 }
1947
1948                 /* lock extents b-tree (also protects volume bitmap) */
1949                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1950                 if (retval) {
1951                         if (hfsmp->jnl) {
1952                                 journal_end_transaction(hfsmp->jnl);
1953                         }
1954                         hfs_global_shared_lock_release(hfsmp);
1955
1956                         goto Err_Exit;
1957                 }
1958
1959                 retval = MacToVFSError(
1960                             TruncateFileC(
1961                                             vcb,
1962                                             (FCB*)fp,
1963                                             length,
1964                                             false));
1965                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1966                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
1967
1968                 if (hfsmp->jnl) {
1969                         tv = time;
1970                         VOP_UPDATE(vp, &tv, &tv, 1);
1971
1972                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1973                         journal_end_transaction(hfsmp->jnl);
1974                 }
1975                 hfs_global_shared_lock_release(hfsmp);
1976
1977
1978                 /*
1979                  * if we get an error and no changes were made then exit
1980                  * otherwise we must do the VOP_UPDATE to reflect the changes
1981                  */
1982                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
1983 #if QUOTA
1984                 /* These are  bytesreleased */
1985                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
1986 #endif /* QUOTA */
1987
1988                 if (fp->ff_size > filebytes) {
1989                         fp->ff_size = filebytes;
1990
1991                         if (UBCISVALID(vp))
1992                                 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
1993                 }
1994         }
1995
1996 Std_Exit:
1997         cp->c_flag |= C_CHANGE | C_UPDATE;
1998         retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1999
2000         if (retval == 0)
2001                 retval = retval2;
2002 Err_Exit:
2003         return (retval);
2004 }
2005
2006
2007 /*
2008  * pagein for HFS filesystem
2009  */
2010 int
2011 hfs_pagein(ap)
2012         struct vop_pagein_args /* {
2013                 struct vnode *a_vp,
2014                 upl_t         a_pl,
2015                 vm_offset_t   a_pl_offset,
2016                 off_t         a_f_offset,
2017                 size_t        a_size,
2018                 struct ucred *a_cred,
2019                 int           a_flags
2020         } */ *ap;
2021 {
2022         register struct vnode *vp = ap->a_vp;
2023         int devBlockSize = 0;
2024         int error;
2025
2026         if (vp->v_type != VREG)
2027                 panic("hfs_pagein: vp not UBC type\n");
2028
2029         VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
2030
2031         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2032                                 ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
2033                                 ap->a_flags);
2034         /*
2035          * Keep track blocks read
2036          */
2037         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2038                 struct cnode *cp;
2039
2040                 cp = VTOC(vp);
2041                 /*
2042                  * If this file hasn't been seen since the start of
2043                  * the current sampling period then start over.
2044                  */
2045                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase)
2046                         VTOF(vp)->ff_bytesread = ap->a_size;
2047                 else
2048                         VTOF(vp)->ff_bytesread += ap->a_size;
2049
2050                 cp->c_flag |= C_ACCESS;
2051         }
2052
2053         return (error);
2054 }
2055
2056 /*
2057  * pageout for HFS filesystem.
2058  */
2059 int
2060 hfs_pageout(ap)
2061         struct vop_pageout_args /* {
2062            struct vnode *a_vp,
2063            upl_t         a_pl,
2064            vm_offset_t   a_pl_offset,
2065            off_t         a_f_offset,
2066            size_t        a_size,
2067            struct ucred *a_cred,
2068            int           a_flags
2069         } */ *ap;
2070 {
2071         struct vnode *vp = ap->a_vp;
2072         struct cnode *cp = VTOC(vp);
2073         struct filefork *fp = VTOF(vp);
2074         int retval;
2075         int devBlockSize = 0;
2076         off_t end_of_range;
2077         off_t filesize;
2078
2079         if (UBCINVALID(vp))
2080                 panic("hfs_pageout: Not a  VREG: vp=%x", vp);
2081
2082         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
2083         filesize = fp->ff_size;
2084         end_of_range = ap->a_f_offset + ap->a_size - 1;
2085
2086         if (cp->c_flag & C_RELOCATING) {
2087                 if (end_of_range < (filesize / 2)) {
2088                         return (EBUSY);
2089                 }
2090         }
2091
2092         if (end_of_range >= filesize)
2093                 end_of_range = (off_t)(filesize - 1);
2094         if (ap->a_f_offset < filesize) {
2095                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2096                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2097         }
2098
2099         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
2100                                  filesize, devBlockSize, ap->a_flags);
2101
2102         /*
2103          * If we successfully wrote any data, and we are not the superuser
2104          * we clear the setuid and setgid bits as a precaution against
2105          * tampering.
2106          */
2107         if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
2108                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2109
2110         return (retval);
2111 }
2112
2113 /*
2114  * Intercept B-Tree node writes to unswap them if necessary.
2115 #
2116 #vop_bwrite {
2117 #       IN struct buf *bp;
2118  */
2119 int
2120 hfs_bwrite(ap)
2121         struct vop_bwrite_args /* {
2122                 struct buf *a_bp;
2123         } */ *ap;
2124 {
2125         int retval = 0;
2126         register struct buf *bp = ap->a_bp;
2127         register struct vnode *vp = bp->b_vp;
2128 #if BYTE_ORDER == LITTLE_ENDIAN
2129         BlockDescriptor block;
2130
2131         /* Trap B-Tree writes */
2132         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2133             (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
2134
2135                 /* Swap if the B-Tree node is in native byte order */
2136                 if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
2137                         /* Prepare the block pointer */
2138                         block.blockHeader = bp;
2139                         block.buffer = bp->b_data;
2140                         /* not found in cache ==> came from disk */
2141                         block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
2142                         block.blockSize = bp->b_bcount;
2143
2144                         /* Endian un-swap B-Tree node */
2145                         SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2146                 }
2147
2148                 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2149         }
2150 #endif
2151         /* This buffer shouldn't be locked anymore but if it is clear it */
2152         if (ISSET(bp->b_flags, B_LOCKED)) {
2153             // XXXdbg
2154             if (VTOHFS(vp)->jnl) {
2155                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2156             }
2157                 CLR(bp->b_flags, B_LOCKED);
2158                 printf("hfs_bwrite: called with lock bit set\n");
2159         }
2160         retval = vn_bwrite (ap);
2161
2162         return (retval);
2163 }
2164
2165 /*
2166  * Relocate a file to a new location on disk
2167  *  cnode must be locked on entry
2168  *
2169  * Relocation occurs by cloning the file's data from its
2170  * current set of blocks to a new set of blocks. During
2171  * the relocation all of the blocks (old and new) are
2172  * owned by the file.
2173  *
2174  * -----------------
2175  * |///////////////|
2176  * -----------------
2177  * 0               N (file offset)
2178  *
2179  * -----------------     -----------------
2180  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2181  * -----------------     -----------------
2182  * 0               N     N+1             2N
2183  *
2184  * -----------------     -----------------
2185  * |///////////////|     |///////////////|     STEP 2 (clone data)
2186  * -----------------     -----------------
2187  * 0               N     N+1             2N
2188  *
2189  *                       -----------------
2190  *                       |///////////////|     STEP 3 (head truncate blocks)
2191  *                       -----------------
2192  *                       0               N
2193  *
2194  * During steps 2 and 3 page-outs to file offsets less
2195  * than or equal to N are suspended.
2196  *
2197  * During step 3 page-ins to the file get supended.
2198  */
2199 __private_extern__
2200 int
2201 hfs_relocate(vp, blockHint, cred, p)
2202         struct  vnode *vp;
2203         u_int32_t  blockHint;
2204         struct  ucred *cred;
2205         struct  proc *p;
2206 {
2207         struct  filefork *fp;
2208         struct  hfsmount *hfsmp;
2209         ExtendedVCB *vcb;
2210
2211         u_int32_t  headblks;
2212         u_int32_t  datablks;
2213         u_int32_t  blksize;
2214         u_int32_t  realsize;
2215         u_int32_t  growsize;
2216         u_int32_t  nextallocsave;
2217         u_int32_t  sector_a;
2218         u_int32_t  sector_b;
2219         int eflags;
2220         u_int32_t  oldstart;  /* debug only */
2221         off_t  newbytes;
2222         int  retval, need_vinval=0;
2223
2224         if (vp->v_type != VREG && vp->v_type != VLNK) {
2225                 return (EPERM);
2226         }
2227
2228         hfsmp = VTOHFS(vp);
2229         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2230                 return (ENOSPC);
2231         }
2232
2233         fp = VTOF(vp);
2234         if (fp->ff_unallocblocks)
2235                 return (EINVAL);
2236         vcb = VTOVCB(vp);
2237         blksize = vcb->blockSize;
2238         if (blockHint == 0)
2239                 blockHint = vcb->nextAllocation;
2240
2241         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2242             (vp->v_type == VLNK && fp->ff_size > blksize)) {
2243                 return (EFBIG);
2244         }
2245
2246         headblks = fp->ff_blocks;
2247         datablks = howmany(fp->ff_size, blksize);
2248         growsize = datablks * blksize;
2249         realsize = fp->ff_size;
2250         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2251         if (blockHint >= hfsmp->hfs_metazone_start &&
2252             blockHint <= hfsmp->hfs_metazone_end)
2253                 eflags |= kEFMetadataMask;
2254
2255         hfs_global_shared_lock_acquire(hfsmp);
2256         if (hfsmp->jnl) {
2257                 if (journal_start_transaction(hfsmp->jnl) != 0) {
2258                         return (EINVAL);
2259                 }
2260         }
2261
2262         /* Lock extents b-tree (also protects volume bitmap) */
2263         retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
2264         if (retval)
2265                 goto out2;
2266
2267         retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2268         if (retval) {
2269                 retval = MacToVFSError(retval);
2270                 goto out;
2271         }
2272
2273         /*
2274          * STEP 1 - aquire new allocation blocks.
2275          */
2276         nextallocsave = vcb->nextAllocation;
2277         retval = ExtendFileC(vcb, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2278         if (eflags & kEFMetadataMask)
2279                 vcb->nextAllocation = nextallocsave;
2280
2281         retval = MacToVFSError(retval);
2282         if (retval == 0) {
2283                 VTOC(vp)->c_flag |= C_MODIFIED;
2284                 if (newbytes < growsize) {
2285                         retval = ENOSPC;
2286                         goto restore;
2287                 } else if (fp->ff_blocks < (headblks + datablks)) {
2288                         printf("hfs_relocate: allocation failed");
2289                         retval = ENOSPC;
2290                         goto restore;
2291                 }
2292
2293                 retval = MapFileBlockC(vcb, (FCB *)fp, 1, growsize, &sector_b, NULL);
2294                 if (retval) {
2295                         retval = MacToVFSError(retval);
2296                 } else if ((sector_a + 1) == sector_b) {
2297                         retval = ENOSPC;
2298                         goto restore;
2299                 } else if ((eflags & kEFMetadataMask) &&
2300                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2301                               hfsmp->hfs_metazone_end)) {
2302                         printf("hfs_relocate: didn't move into metadata zone\n");
2303                         retval = ENOSPC;
2304                         goto restore;
2305                 }
2306         }
2307         if (retval) {
2308                 /*
2309                  * Check to see if failure is due to excessive fragmentation.
2310                  */
2311                 if (retval == ENOSPC &&
2312                     hfs_freeblks(hfsmp, 0) > (datablks * 2)) {
2313                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2314                 }
2315                 goto out;
2316         }
2317
2318         fp->ff_size = fp->ff_blocks * blksize;
2319         if (UBCISVALID(vp))
2320                 (void) ubc_setsize(vp, fp->ff_size);
2321
2322         /*
2323          * STEP 2 - clone data into the new allocation blocks.
2324          */
2325
2326         // XXXdbg - unlock the extents overflow file because hfs_clonefile()
2327         //          calls vinvalbuf() which calls hfs_fsync() which can
2328         //          call hfs_metasync() which may need to lock the catalog
2329         //          file -- but the catalog file may be locked and blocked
2330         //          waiting for the extents overflow file if we're unlucky.
2331         //          see radar 3742973 for more details.
2332         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
2333
2334         if (vp->v_type == VLNK)
2335                 retval = hfs_clonelink(vp, blksize, cred, p);
2336         else if (vp->v_flag & VSYSTEM)
2337                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2338         else
2339                 retval = hfs_clonefile(vp, headblks, datablks, blksize, cred, p);
2340
2341         // XXXdbg - relock the extents overflow file
2342         (void)hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE, p);
2343
2344         if (retval)
2345                 goto restore;
2346
2347         oldstart = fp->ff_extents[0].startBlock;
2348
2349         /*
2350          * STEP 3 - switch to clone and remove old blocks.
2351          */
2352         SET(VTOC(vp)->c_flag, C_NOBLKMAP);   /* suspend page-ins */
2353
2354         retval = HeadTruncateFile(vcb, (FCB*)fp, headblks);
2355
2356         CLR(VTOC(vp)->c_flag, C_NOBLKMAP);   /* resume page-ins */
2357         if (ISSET(VTOC(vp)->c_flag, C_WBLKMAP))
2358                 wakeup(VTOC(vp));
2359         if (retval)
2360                 goto restore;
2361
2362         fp->ff_size = realsize;
2363         if (UBCISVALID(vp)) {
2364                 (void) ubc_setsize(vp, realsize);
2365                 need_vinval = 1;
2366         }
2367
2368         CLR(VTOC(vp)->c_flag, C_RELOCATING);  /* Resume page-outs for this file. */
2369 out:
2370         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
2371
2372         // XXXdbg - do this after unlocking the extents-overflow
2373         // file to avoid deadlocks (see comment above by STEP 2)
2374         if (need_vinval) {
2375             (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2376         }
2377
2378         retval = VOP_FSYNC(vp, cred, MNT_WAIT, p);
2379 out2:
2380         if (hfsmp->jnl) {
2381                 if (VTOC(vp)->c_cnid < kHFSFirstUserCatalogNodeID)
2382                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2383                 else
2384                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2385                 journal_end_transaction(hfsmp->jnl);
2386         }
2387         hfs_global_shared_lock_release(hfsmp);
2388
2389         return (retval);
2390
2391 restore:
2392         /*
2393          * Give back any newly allocated space.
2394          */
2395         if (fp->ff_size != realsize)
2396                 fp->ff_size = realsize;
2397         (void) TruncateFileC(vcb, (FCB*)fp, fp->ff_size, false);
2398         if (UBCISVALID(vp))
2399                 (void) ubc_setsize(vp, fp->ff_size);
2400         CLR(VTOC(vp)->c_flag, C_RELOCATING);
2401         goto out;
2402 }
2403
2404
2405 /*
2406  * Clone a symlink.
2407  *
2408  */
2409 static int
2410 hfs_clonelink(struct vnode *vp, int blksize, struct ucred *cred, struct proc *p)
2411 {
2412         struct buf *head_bp = NULL;
2413         struct buf *tail_bp = NULL;
2414         int error;
2415
2416
2417         error = meta_bread(vp, 0, blksize, cred, &head_bp);
2418         if (error)
2419                 goto out;
2420
2421         tail_bp = getblk(vp, 1, blksize, 0, 0, BLK_META);
2422         if (tail_bp == NULL) {
2423                 error = EIO;
2424                 goto out;
2425         }
2426         bcopy(head_bp->b_data, tail_bp->b_data, blksize);
2427         error = bwrite(tail_bp);
2428 out:
2429         if (head_bp) {
2430                 head_bp->b_flags |= B_INVAL;
2431                 brelse(head_bp);
2432         }
2433         (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2434
2435         return (error);
2436 }
2437
2438 /*
2439  * Clone a file's data within the file.
2440  *
2441  */
2442 static int
2443 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2444               struct ucred *cred, struct proc *p)
2445 {
2446         caddr_t  bufp;
2447         size_t  writebase;
2448         size_t  bufsize;
2449         size_t  copysize;
2450         size_t  iosize;
2451         size_t  filesize;
2452         size_t  offset;
2453         struct uio auio;
2454         struct iovec aiov;
2455         int  devblocksize;
2456         int  didhold;
2457         int  error;
2458
2459
2460         if ((error = vinvalbuf(vp, V_SAVE, cred, p, 0, 0))) {
2461                 printf("hfs_clonefile: vinvalbuf failed - %d\n", error);
2462                 return (error);
2463         }
2464
2465         if (!ubc_clean(vp, 1)) {
2466                 printf("hfs_clonefile: not ubc_clean\n");
2467                 return (EIO);  /* XXX error code */
2468         }
2469
2470         /*
2471          * Suspend page-outs for this file.
2472          */
2473         SET(VTOC(vp)->c_flag, C_RELOCATING);
2474
2475         filesize = VTOF(vp)->ff_size;
2476         writebase = blkstart * blksize;
2477         copysize = blkcnt * blksize;
2478         iosize = bufsize = MIN(copysize, 4096 * 16);
2479         offset = 0;
2480
2481         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2482                 return (ENOMEM);
2483         }
2484
2485         VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devblocksize);
2486
2487         auio.uio_iov = &aiov;
2488         auio.uio_iovcnt = 1;
2489         auio.uio_segflg = UIO_SYSSPACE;
2490         auio.uio_procp = p;
2491
2492         while (offset < copysize) {
2493                 iosize = MIN(copysize - offset, iosize);
2494
2495                 aiov.iov_base = bufp;
2496                 aiov.iov_len = iosize;
2497                 auio.uio_resid = iosize;
2498                 auio.uio_offset = offset;
2499                 auio.uio_rw = UIO_READ;
2500
2501                 error = cluster_read(vp, &auio, copysize, devblocksize, 0);
2502                 if (error) {
2503                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2504                         break;
2505                 }
2506                 if (auio.uio_resid != 0) {
2507                         printf("clonedata: cluster_read: uio_resid = %d\n", (int)auio.uio_resid);
2508                         error = EIO;
2509                         break;
2510                 }
2511
2512
2513                 aiov.iov_base = bufp;
2514                 aiov.iov_len = iosize;
2515                 auio.uio_resid = iosize;
2516                 auio.uio_offset = writebase + offset;
2517                 auio.uio_rw = UIO_WRITE;
2518
2519                 error = cluster_write(vp, &auio, filesize + offset,
2520                                       filesize + offset + iosize,
2521                                       auio.uio_offset, 0, devblocksize, 0);
2522                 if (error) {
2523                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2524                         break;
2525                 }
2526                 if (auio.uio_resid != 0) {
2527                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2528                         error = EIO;
2529                         break;
2530                 }
2531                 offset += iosize;
2532         }
2533         if (error == 0) {
2534                 /* Clean the pages in VM. */
2535                 didhold = ubc_hold(vp);
2536                 if (didhold)
2537                         (void) ubc_clean(vp, 1);
2538
2539                 /*
2540                  * Clean out all associated buffers.
2541                  */
2542                 (void) vinvalbuf(vp, V_SAVE, cred, p, 0, 0);
2543
2544                 if (didhold)
2545                         ubc_rele(vp);
2546         }
2547         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2548
2549         return (error);
2550 }
2551
2552 /*
2553  * Clone a system (metadata) file.
2554  *
2555  */
2556 static int
2557 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2558                  struct ucred *cred, struct proc *p)
2559 {
2560         caddr_t  bufp;
2561         char * offset;
2562         size_t  bufsize;
2563         size_t  iosize;
2564         struct buf *bp = NULL;
2565         daddr_t  blkno;
2566         daddr_t  blk;
2567         int  breadcnt;
2568         int  i;
2569         int  error = 0;
2570
2571
2572         iosize = GetLogicalBlockSize(vp);
2573         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2574         breadcnt = bufsize / iosize;
2575
2576         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2577                 return (ENOMEM);
2578         }
2579         blkstart = (blkstart * blksize) / iosize;
2580         blkcnt = (blkcnt * blksize) / iosize;
2581         blkno = 0;
2582
2583         while (blkno < blkcnt) {
2584                 /*
2585                  * Read up to a megabyte
2586                  */
2587                 offset = bufp;
2588                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < blkcnt); ++i, ++blk) {
2589                         error = meta_bread(vp, blk, iosize, cred, &bp);
2590                         if (error) {
2591                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
2592                                 goto out;
2593                         }
2594                         if (bp->b_bcount != iosize) {
2595                                 printf("hfs_clonesysfile: b_bcount is only %d\n", bp->b_bcount);
2596                                 goto out;
2597                         }
2598
2599                         bcopy(bp->b_data, offset, iosize);
2600                         bp->b_flags |= B_INVAL;
2601                         brelse(bp);
2602                         bp = NULL;
2603                         offset += iosize;
2604                 }
2605
2606                 /*
2607                  * Write up to a megabyte
2608                  */
2609                 offset = bufp;
2610                 for (i = 0; (i < breadcnt) && (blkno < blkcnt); ++i, ++blkno) {
2611                         bp = getblk(vp, blkstart + blkno, iosize, 0, 0, BLK_META);
2612                         if (bp == NULL) {
2613                                 printf("hfs_clonesysfile: getblk failed on blk %d\n", blkstart + blkno);
2614                                 error = EIO;
2615                                 goto out;
2616                         }
2617                         bcopy(offset, bp->b_data, iosize);
2618                         error = bwrite(bp);
2619                         bp = NULL;
2620                         if (error)
2621                                 goto out;
2622                         offset += iosize;
2623                 }
2624         }
2625 out:
2626         if (bp) {
2627                 brelse(bp);
2628         }
2629
2630         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2631
2632         error = VOP_FSYNC(vp, cred, MNT_WAIT, p);
2633
2634         return (error);
2635 }
2636