bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
   7  *
   8  * This file contains Original Code and/or Modifications of Original Code
   9  * as defined in and that are subject to the Apple Public Source License
  10  * Version 2.0 (the 'License'). You may not use this file except in
  11  * compliance with the License. Please obtain a copy of the License at
  12  * http://www.opensource.apple.com/apsl/ and read it before using this
  13  * file.
  14  *
  15  * The Original Code and all software distributed under the License are
  16  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  17  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  18  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  19  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  20  * Please see the License for the specific language governing rights and
  21  * limitations under the License.
  22  *
  23  * @APPLE_LICENSE_HEADER_END@
  24  */
  25 /*      @(#)hfs_readwrite.c     1.0
  26  *
  27  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  28  *
  29  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  30  *
  31  */
  32
  33 #include <sys/param.h>
  34 #include <sys/systm.h>
  35 #include <sys/resourcevar.h>
  36 #include <sys/kernel.h>
  37 #include <sys/fcntl.h>
  38 #include <sys/stat.h>
  39 #include <sys/buf.h>
  40 #include <sys/proc.h>
  41 #include <sys/vnode.h>
  42 #include <sys/uio.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45
  46 #include <sys/ubc.h>
  47 #include <vm/vm_pageout.h>
  48
  49 #include <sys/kdebug.h>
  50
  51 #include        "hfs.h"
  52 #include        "hfs_endian.h"
  53 #include        "hfs_quota.h"
  54 #include        "hfscommon/headers/FileMgrInternal.h"
  55 #include        "hfscommon/headers/BTreesInternal.h"
  56 #include        "hfs_cnode.h"
  57 #include        "hfs_dbg.h"
  58
  59 extern int overflow_extents(struct filefork *fp);
  60
  61 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  62
  63 enum {
  64         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  65 };
  66
  67 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  68
  69
  70 /*****************************************************************************
  71 *
  72 *       Operations on vnodes
  73 *
  74 *****************************************************************************/
  75
  76 /*
  77 #% read         vp      L L L
  78 #
  79  vop_read {
  80      IN struct vnode *vp;
  81      INOUT struct uio *uio;
  82      IN int ioflag;
  83      IN struct ucred *cred;
  84
  85      */
  86
  87 int
  88 hfs_read(ap)
  89         struct vop_read_args /* {
  90                 struct vnode *a_vp;
  91                 struct uio *a_uio;
  92                 int a_ioflag;
  93                 struct ucred *a_cred;
  94         } */ *ap;
  95 {
  96         register struct uio *uio = ap->a_uio;
  97         register struct vnode *vp = ap->a_vp;
  98         struct cnode *cp;
  99         struct filefork *fp;
 100         struct buf *bp;
 101         daddr_t logBlockNo;
 102         u_long fragSize, moveSize, startOffset, ioxfersize;
 103         int devBlockSize = 0;
 104         off_t bytesRemaining;
 105         int retval = 0;
 106         off_t filesize;
 107         off_t filebytes;
 108
 109         /* Preflight checks */
 110         if (vp->v_type != VREG && vp->v_type != VLNK)
 111                 return (EISDIR);        /* HFS can only read files */
 112         if (uio->uio_resid == 0)
 113                 return (0);             /* Nothing left to do */
 114         if (uio->uio_offset < 0)
 115                 return (EINVAL);        /* cant read from a negative offset */
 116
 117         cp = VTOC(vp);
 118         fp = VTOF(vp);
 119         filesize = fp->ff_size;
 120         filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
 121         if (uio->uio_offset > filesize) {
 122                 if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
 123                         return (EFBIG);
 124                 else
 125                         return (0);
 126         }
 127
 128         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 129
 130         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 131                 (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
 132
 133         if (UBCISVALID(vp)) {
 134                 retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
 135         } else {
 136
 137                 for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 138
 139             if ((bytesRemaining = (filesize - uio->uio_offset)) <= 0)
 140                 break;
 141
 142             logBlockNo  = (daddr_t)(uio->uio_offset / PAGE_SIZE_64);
 143             startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64);
 144             fragSize    = PAGE_SIZE;
 145
 146             if (((logBlockNo * PAGE_SIZE) + fragSize) < filesize)
 147                 ioxfersize = fragSize;
 148             else {
 149                 ioxfersize = filesize - (logBlockNo * PAGE_SIZE);
 150                 ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1);
 151             }
 152                 moveSize = ioxfersize;
 153                 moveSize -= startOffset;
 154
 155             if (bytesRemaining < moveSize)
 156                 moveSize = bytesRemaining;
 157
 158             if (uio->uio_resid < moveSize) {
 159                 moveSize = uio->uio_resid;
 160             };
 161             if (moveSize == 0) {
 162                 break;
 163             };
 164
 165             if (( uio->uio_offset + fragSize) >= filesize) {
 166                 retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
 167
 168             } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) {
 169                 daddr_t nextLogBlockNo = logBlockNo + 1;
 170                 int nextsize;
 171
 172                 if (((nextLogBlockNo * PAGE_SIZE) +
 173                      (daddr_t)fragSize) < filesize)
 174                     nextsize = fragSize;
 175                 else {
 176                     nextsize = filesize - (nextLogBlockNo * PAGE_SIZE);
 177                     nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1);
 178                 }
 179                 retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp);
 180             } else {
 181                 retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
 182             };
 183
 184             if (retval != E_NONE) {
 185                 if (bp) {
 186                     brelse(bp);
 187                     bp = NULL;
 188                 }
 189                 break;
 190             };
 191             vp->v_lastr = logBlockNo;
 192
 193             /*
 194              * We should only get non-zero b_resid when an I/O retval
 195              * has occurred, which should cause us to break above.
 196              * However, if the short read did not cause an retval,
 197              * then we want to ensure that we do not uiomove bad
 198              * or uninitialized data.
 199              */
 200             ioxfersize -= bp->b_resid;
 201
 202             if (ioxfersize < moveSize) {                        /* XXX PPD This should take the offset into account, too! */
 203                 if (ioxfersize == 0)
 204                     break;
 205                 moveSize = ioxfersize;
 206             }
 207             if ((startOffset + moveSize) > bp->b_bcount)
 208                 panic("hfs_read: bad startOffset or moveSize\n");
 209
 210             if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio)))
 211                 break;
 212
 213             if (S_ISREG(cp->c_mode) &&
 214                 (((startOffset + moveSize) == fragSize) || (uio->uio_offset == filesize))) {
 215                 bp->b_flags |= B_AGE;
 216             };
 217
 218             brelse(bp);
 219             /* Start of loop resets bp to NULL before reaching outside this block... */
 220         }
 221
 222                 if (bp != NULL) {
 223                         brelse(bp);
 224                 }
 225         }
 226
 227         cp->c_flag |= C_ACCESS;
 228
 229         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 230                 (int)uio->uio_offset, uio->uio_resid, (int)filesize,  (int)filebytes, 0);
 231
 232         return (retval);
 233 }
 234
 235 /*
 236  * Write data to a file or directory.
 237 #% write        vp      L L L
 238 #
 239  vop_write {
 240      IN struct vnode *vp;
 241      INOUT struct uio *uio;
 242      IN int ioflag;
 243      IN struct ucred *cred;
 244
 245      */
 246 int
 247 hfs_write(ap)
 248         struct vop_write_args /* {
 249                 struct vnode *a_vp;
 250                 struct uio *a_uio;
 251                 int a_ioflag;
 252                 struct ucred *a_cred;
 253         } */ *ap;
 254 {
 255         struct vnode *vp = ap->a_vp;
 256         struct uio *uio = ap->a_uio;
 257         struct cnode *cp;
 258         struct filefork *fp;
 259         struct buf *bp;
 260         struct proc *p;
 261         struct timeval tv;
 262         ExtendedVCB *vcb;
 263     int                                 devBlockSize = 0;
 264     daddr_t                     logBlockNo;
 265     long                                fragSize;
 266     off_t                               origFileSize, currOffset, writelimit, bytesToAdd;
 267     off_t                               actualBytesAdded;
 268     u_long                              blkoffset, resid, xfersize, clearSize;
 269     int                                 eflags, ioflag;
 270     int                                 retval;
 271         off_t filebytes;
 272         u_long fileblocks;
 273         struct hfsmount *hfsmp;
 274         int started_tr = 0, grabbed_lock = 0;
 275
 276         ioflag = ap->a_ioflag;
 277
 278         if (uio->uio_offset < 0)
 279                 return (EINVAL);
 280         if (uio->uio_resid == 0)
 281                 return (E_NONE);
 282         if (vp->v_type != VREG && vp->v_type != VLNK)
 283                 return (EISDIR);        /* Can only write files */
 284
 285         cp = VTOC(vp);
 286         fp = VTOF(vp);
 287         vcb = VTOVCB(vp);
 288         fileblocks = fp->ff_blocks;
 289         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
 290
 291         if (ioflag & IO_APPEND)
 292                 uio->uio_offset = fp->ff_size;
 293         if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
 294                 return (EPERM);
 295
 296         // XXXdbg - don't allow modification of the journal or journal_info_block
 297         if (VTOHFS(vp)->jnl && cp->c_datafork) {
 298                 struct HFSPlusExtentDescriptor *extd;
 299
 300                 extd = &cp->c_datafork->ff_data.cf_extents[0];
 301                 if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
 302                         return EPERM;
 303                 }
 304         }
 305
 306         writelimit = uio->uio_offset + uio->uio_resid;
 307
 308         /*
 309          * Maybe this should be above the vnode op call, but so long as
 310          * file servers have no limits, I don't think it matters.
 311          */
 312         p = uio->uio_procp;
 313         if (vp->v_type == VREG && p &&
 314             writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 315                 psignal(p, SIGXFSZ);
 316                 return (EFBIG);
 317         }
 318         p = current_proc();
 319
 320         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 321
 322         resid = uio->uio_resid;
 323         origFileSize = fp->ff_size;
 324         eflags = kEFDeferMask;  /* defer file block allocations */
 325         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 326
 327         /*
 328          * NOTE: In the following loop there are two positions tracked:
 329          * currOffset is the current I/O starting offset.  currOffset
 330          * is never >LEOF; the LEOF is nudged along with currOffset as
 331          * data is zeroed or written. uio->uio_offset is the start of
 332          * the current I/O operation.  It may be arbitrarily beyond
 333          * currOffset.
 334          *
 335          * The following is true at all times:
 336          *   currOffset <= LEOF <= uio->uio_offset <= writelimit
 337          */
 338         currOffset = MIN(uio->uio_offset, fp->ff_size);
 339
 340         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 341                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 342         retval = 0;
 343
 344         /* Now test if we need to extend the file */
 345         /* Doing so will adjust the filebytes for us */
 346
 347 #if QUOTA
 348         if(writelimit > filebytes) {
 349                 bytesToAdd = writelimit - filebytes;
 350
 351                 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)),
 352                                    ap->a_cred, 0);
 353                 if (retval)
 354                         return (retval);
 355         }
 356 #endif /* QUOTA */
 357
 358         hfsmp = VTOHFS(vp);
 359         if (writelimit > filebytes) {
 360                 hfs_global_shared_lock_acquire(hfsmp);
 361                 grabbed_lock = 1;
 362         }
 363         if (hfsmp->jnl && (writelimit > filebytes)) {
 364                 if (journal_start_transaction(hfsmp->jnl) != 0) {
 365                         hfs_global_shared_lock_release(hfsmp);
 366                         return EINVAL;
 367                 }
 368                 started_tr = 1;
 369         }
 370
 371         while (writelimit > filebytes) {
 372
 373                 bytesToAdd = writelimit - filebytes;
 374                 if (suser(ap->a_cred, NULL) != 0)
 375                         eflags |= kEFReserveMask;
 376
 377                 /* lock extents b-tree (also protects volume bitmap) */
 378                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
 379                 if (retval != E_NONE)
 380                         break;
 381
 382                 retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
 383                                 0, eflags, &actualBytesAdded));
 384
 385                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
 386                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 387                         retval = ENOSPC;
 388                 if (retval != E_NONE)
 389                         break;
 390                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 391                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 392                         (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size,  (int)filebytes, 0);
 393         }
 394
 395         // XXXdbg
 396         if (started_tr) {
 397                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
 398                 journal_end_transaction(hfsmp->jnl);
 399                 started_tr = 0;
 400         }
 401         if (grabbed_lock) {
 402                 hfs_global_shared_lock_release(hfsmp);
 403                 grabbed_lock = 0;
 404         }
 405
 406         if (UBCISVALID(vp) && retval == E_NONE) {
 407                 off_t filesize;
 408                 off_t zero_off;
 409                 off_t tail_off;
 410                 off_t inval_start;
 411                 off_t inval_end;
 412                 off_t io_start, io_end;
 413                 int lflag;
 414                 struct rl_entry *invalid_range;
 415
 416                 if (writelimit > fp->ff_size)
 417                         filesize = writelimit;
 418                 else
 419                         filesize = fp->ff_size;
 420
 421                 lflag = (ioflag & IO_SYNC);
 422
 423                 if (uio->uio_offset <= fp->ff_size) {
 424                         zero_off = uio->uio_offset & ~PAGE_MASK_64;
 425
 426                         /* Check to see whether the area between the zero_offset and the start
 427                            of the transfer to see whether is invalid and should be zero-filled
 428                            as part of the transfer:
 429                          */
 430                         if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
 431                                 lflag |= IO_HEADZEROFILL;
 432                 } else {
 433                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 434
 435                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 436                            read without being zeroed.  The current last block is filled with zeroes
 437                            if it holds valid data but in all cases merely do a little bookkeeping
 438                            to track the area from the end of the current last page to the start of
 439                            the area actually written.  For the same reason only the bytes up to the
 440                            start of the page where this write will start is invalidated; any remainder
 441                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 442
 443                            Note that inval_start, the start of the page after the current EOF,
 444                            may be past the start of the write, in which case the zeroing
 445                            will be handled by the cluser_write of the actual data.
 446                          */
 447                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 448                         inval_end = uio->uio_offset & ~PAGE_MASK_64;
 449                         zero_off = fp->ff_size;
 450
 451                         if ((fp->ff_size & PAGE_MASK_64) &&
 452                                 (rl_scan(&fp->ff_invalidranges,
 453                                                         eof_page_base,
 454                                                         fp->ff_size - 1,
 455                                                         &invalid_range) != RL_NOOVERLAP)) {
 456                                 /* The page containing the EOF is not valid, so the
 457                                    entire page must be made inaccessible now.  If the write
 458                                    starts on a page beyond the page containing the eof
 459                                    (inval_end > eof_page_base), add the
 460                                    whole page to the range to be invalidated.  Otherwise
 461                                    (i.e. if the write starts on the same page), zero-fill
 462                                    the entire page explicitly now:
 463                                  */
 464                                 if (inval_end > eof_page_base) {
 465                                         inval_start = eof_page_base;
 466                                 } else {
 467                                         zero_off = eof_page_base;
 468                                 };
 469                         };
 470
 471                         if (inval_start < inval_end) {
 472                                 /* There's some range of data that's going to be marked invalid */
 473
 474                                 if (zero_off < inval_start) {
 475                                         /* The pages between inval_start and inval_end are going to be invalidated,
 476                                            and the actual write will start on a page past inval_end.  Now's the last
 477                                            chance to zero-fill the page containing the EOF:
 478                                          */
 479                                         retval = cluster_write(vp, (struct uio *) 0,
 480                                                         fp->ff_size, inval_start,
 481                                                         zero_off, (off_t)0, devBlockSize,
 482                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 483                                         if (retval) goto ioerr_exit;
 484                                 };
 485
 486                                 /* Mark the remaining area of the newly allocated space as invalid: */
 487                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 488                                 cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
 489                                 zero_off = fp->ff_size = inval_end;
 490                         };
 491
 492                         if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
 493                 };
 494
 495                 /* Check to see whether the area between the end of the write and the end of
 496                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 497                  */
 498                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 499                 if (tail_off > filesize) tail_off = filesize;
 500                 if (tail_off > writelimit) {
 501                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 502                                 lflag |= IO_TAILZEROFILL;
 503                         };
 504                 };
 505
 506                 /*
 507                  * if the write starts beyond the current EOF (possibly advanced in the
 508                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 509                  * to where the write begins:
 510                  *
 511                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 512                  *       before the current EOF it might be marked as invalid now and must be
 513                  *       made readable (removed from the invalid ranges) before cluster_write
 514                  *       tries to write it:
 515                  */
 516                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
 517                 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 518                 if (io_start < fp->ff_size) {
 519                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 520                 };
 521                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 522                                 tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
 523
 524                 if (uio->uio_offset > fp->ff_size) {
 525                         fp->ff_size = uio->uio_offset;
 526
 527                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 528                 }
 529                 if (resid > uio->uio_resid)
 530                         cp->c_flag |= C_CHANGE | C_UPDATE;
 531         } else {
 532                 while (retval == E_NONE && uio->uio_resid > 0) {
 533                         logBlockNo = currOffset / PAGE_SIZE;
 534                         blkoffset  = currOffset & PAGE_MASK;
 535
 536                         if ((filebytes - currOffset) < PAGE_SIZE_64)
 537                                 fragSize = filebytes - ((off_t)logBlockNo * PAGE_SIZE_64);
 538                         else
 539                                 fragSize = PAGE_SIZE;
 540                         xfersize = fragSize - blkoffset;
 541
 542                         /* Make any adjustments for boundary conditions */
 543                         if (currOffset + (off_t)xfersize > writelimit)
 544                                 xfersize = writelimit - currOffset;
 545
 546                         /*
 547                          * There is no need to read into bp if:
 548                          * We start on a block boundary and will overwrite the whole block
 549                          *
 550                          *                                              OR
 551                          */
 552                         if ((blkoffset == 0) && (xfersize >= fragSize)) {
 553                                 bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
 554                                 retval = 0;
 555
 556                                 if (bp->b_blkno == -1) {
 557                                         brelse(bp);
 558                                         retval = EIO;           /* XXX */
 559                                         break;
 560                                 }
 561                         } else {
 562
 563                                 if (currOffset == fp->ff_size && blkoffset == 0) {
 564                                         bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
 565                                         retval = 0;
 566                                         if (bp->b_blkno == -1) {
 567                                                 brelse(bp);
 568                                                 retval = EIO;           /* XXX */
 569                                                 break;
 570                                         }
 571                                 } else {
 572                                         /*
 573                                          * This I/O transfer is not sufficiently aligned,
 574                                          * so read the affected block into a buffer:
 575                                          */
 576                                         retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp);
 577                                         if (retval != E_NONE) {
 578                                                 if (bp)
 579                                                 brelse(bp);
 580                                                 break;
 581                                         }
 582                                 }
 583                         }
 584
 585                         /* See if we are starting to write within file boundaries:
 586                          * If not, then we need to present a "hole" for the area
 587                          * between the current EOF and the start of the current
 588                          * I/O operation:
 589                          *
 590                          * Note that currOffset is only less than uio_offset if
 591                          * uio_offset > LEOF...
 592                          */
 593                         if (uio->uio_offset > currOffset) {
 594                                 clearSize = MIN(uio->uio_offset - currOffset, xfersize);
 595                                 bzero(bp->b_data + blkoffset, clearSize);
 596                                 currOffset += clearSize;
 597                                 blkoffset += clearSize;
 598                                 xfersize -= clearSize;
 599                         }
 600
 601                         if (xfersize > 0) {
 602                                 retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio);
 603                                 currOffset += xfersize;
 604                         }
 605
 606                         if (ioflag & IO_SYNC) {
 607                                 (void)VOP_BWRITE(bp);
 608                         } else if ((xfersize + blkoffset) == fragSize) {
 609                                 bp->b_flags |= B_AGE;
 610                                 bawrite(bp);
 611                         } else {
 612                                 bdwrite(bp);
 613                         }
 614
 615                         /* Update the EOF if we just extended the file
 616                          * (the PEOF has already been moved out and the
 617                          * block mapping table has been updated):
 618                          */
 619                         if (currOffset > fp->ff_size) {
 620                                 fp->ff_size = currOffset;
 621                                 if (UBCISVALID(vp))
 622                                         ubc_setsize(vp, fp->ff_size); /* XXX check errors */
 623                         }
 624                         if (retval || (resid == 0))
 625                                 break;
 626                         cp->c_flag |= C_CHANGE | C_UPDATE;
 627                 } /* endwhile */
 628         }
 629
 630 ioerr_exit:
 631         /*
 632          * If we successfully wrote any data, and we are not the superuser
 633          * we clear the setuid and setgid bits as a precaution against
 634          * tampering.
 635          */
 636         if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 637                 cp->c_mode &= ~(S_ISUID | S_ISGID);
 638
 639         if (retval) {
 640                 if (ioflag & IO_UNIT) {
 641                         (void)VOP_TRUNCATE(vp, origFileSize,
 642                                 ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
 643                         uio->uio_offset -= resid - uio->uio_resid;
 644                         uio->uio_resid = resid;
 645                         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 646                 }
 647         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 648                 tv = time;
 649                 retval = VOP_UPDATE(vp, &tv, &tv, 1);
 650         }
 651
 652         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 653                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 654
 655         return (retval);
 656 }
 657
 658
 659 /*
 660
 661 #% ioctl        vp      U U U
 662 #
 663  vop_ioctl {
 664      IN struct vnode *vp;
 665      IN u_long command;
 666      IN caddr_t data;
 667      IN int fflag;
 668      IN struct ucred *cred;
 669      IN struct proc *p;
 670
 671      */
 672
 673
 674 /* ARGSUSED */
 675 int
 676 hfs_ioctl(ap)
 677         struct vop_ioctl_args /* {
 678                 struct vnode *a_vp;
 679                 int  a_command;
 680                 caddr_t  a_data;
 681                 int  a_fflag;
 682                 struct ucred *a_cred;
 683                 struct proc *a_p;
 684         } */ *ap;
 685 {
 686         switch (ap->a_command) {
 687         case 1: {
 688                 register struct cnode *cp;
 689                 register struct vnode *vp;
 690                 register struct radvisory *ra;
 691                 struct filefork *fp;
 692                 int devBlockSize = 0;
 693                 int error;
 694
 695                 vp = ap->a_vp;
 696
 697                 if (vp->v_type != VREG)
 698                         return EINVAL;
 699
 700                 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
 701                 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 702                 if (error)
 703                         return (error);
 704
 705                 ra = (struct radvisory *)(ap->a_data);
 706                 cp = VTOC(vp);
 707                 fp = VTOF(vp);
 708
 709                 if (ra->ra_offset >= fp->ff_size) {
 710                         VOP_UNLOCK(vp, 0, ap->a_p);
 711                         return (EFBIG);
 712                 }
 713                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 714
 715                 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
 716                 VOP_UNLOCK(vp, 0, ap->a_p);
 717
 718                 return (error);
 719         }
 720
 721         case 2: /* F_READBOOTBLOCKS */
 722         case 3: /* F_WRITEBOOTBLOCKS */
 723             {
 724             struct vnode *vp = ap->a_vp;
 725             struct vnode *devvp = NULL;
 726             struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
 727             int devBlockSize;
 728             int error;
 729             struct iovec aiov;
 730             struct uio auio;
 731             u_long blockNumber;
 732             u_long blockOffset;
 733             u_long xfersize;
 734             struct buf *bp;
 735
 736             if ((vp->v_flag & VROOT) == 0) return EINVAL;
 737             if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
 738
 739             devvp = VTOHFS(vp)->hfs_devvp;
 740             aiov.iov_base = btd->fbt_buffer;
 741             aiov.iov_len = btd->fbt_length;
 742
 743             auio.uio_iov = &aiov;
 744             auio.uio_iovcnt = 1;
 745             auio.uio_offset = btd->fbt_offset;
 746             auio.uio_resid = btd->fbt_length;
 747             auio.uio_segflg = UIO_USERSPACE;
 748             auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
 749             auio.uio_procp = ap->a_p;
 750
 751             VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
 752
 753             while (auio.uio_resid > 0) {
 754               blockNumber = auio.uio_offset / devBlockSize;
 755               error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
 756               if (error) {
 757                   if (bp) brelse(bp);
 758                   return error;
 759                 };
 760
 761                 blockOffset = auio.uio_offset % devBlockSize;
 762               xfersize = devBlockSize - blockOffset;
 763               error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
 764                 if (error) {
 765                   brelse(bp);
 766                   return error;
 767                 };
 768                 if (auio.uio_rw == UIO_WRITE) {
 769                   error = VOP_BWRITE(bp);
 770                   if (error) return error;
 771                 } else {
 772                   brelse(bp);
 773                 };
 774             };
 775         };
 776         return 0;
 777
 778         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
 779             {
 780             *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
 781             return 0;
 782             }
 783
 784         default:
 785             return (ENOTTY);
 786     }
 787
 788     /* Should never get here */
 789         return 0;
 790 }
 791
 792 /* ARGSUSED */
 793 int
 794 hfs_select(ap)
 795         struct vop_select_args /* {
 796                 struct vnode *a_vp;
 797                 int  a_which;
 798                 int  a_fflags;
 799                 struct ucred *a_cred;
 800                 void *a_wql;
 801                 struct proc *a_p;
 802         } */ *ap;
 803 {
 804         /*
 805          * We should really check to see if I/O is possible.
 806          */
 807         return (1);
 808 }
 809
 810 /*
 811  * Bmap converts a the logical block number of a file to its physical block
 812  * number on the disk.
 813  */
 814
 815 /*
 816  * vp  - address of vnode file the file
 817  * bn  - which logical block to convert to a physical block number.
 818  * vpp - returns the vnode for the block special file holding the filesystem
 819  *       containing the file of interest
 820  * bnp - address of where to return the filesystem physical block number
 821 #% bmap         vp      L L L
 822 #% bmap         vpp     - U -
 823 #
 824  vop_bmap {
 825      IN struct vnode *vp;
 826      IN daddr_t bn;
 827      OUT struct vnode **vpp;
 828      IN daddr_t *bnp;
 829      OUT int *runp;
 830      */
 831 /*
 832  * Converts a logical block number to a physical block, and optionally returns
 833  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
 834  * The physical block number is based on the device block size, currently its 512.
 835  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
 836  */
 837
 838 int
 839 hfs_bmap(ap)
 840         struct vop_bmap_args /* {
 841                 struct vnode *a_vp;
 842                 daddr_t a_bn;
 843                 struct vnode **a_vpp;
 844                 daddr_t *a_bnp;
 845                 int *a_runp;
 846         } */ *ap;
 847 {
 848         struct vnode *vp = ap->a_vp;
 849         struct cnode *cp = VTOC(vp);
 850         struct filefork *fp = VTOF(vp);
 851         struct hfsmount *hfsmp = VTOHFS(vp);
 852    int                                  retval = E_NONE;
 853     daddr_t                             logBlockSize;
 854     size_t                              bytesContAvail = 0;
 855     off_t blockposition;
 856     struct proc                 *p = NULL;
 857     int                                 lockExtBtree;
 858     struct rl_entry *invalid_range;
 859     enum rl_overlaptype overlaptype;
 860
 861         /*
 862          * Check for underlying vnode requests and ensure that logical
 863          * to physical mapping is requested.
 864          */
 865         if (ap->a_vpp != NULL)
 866                 *ap->a_vpp = cp->c_devvp;
 867         if (ap->a_bnp == NULL)
 868                 return (0);
 869
 870         /* Only clustered I/O should have delayed allocations. */
 871         DBG_ASSERT(fp->ff_unallocblocks == 0);
 872
 873         logBlockSize = GetLogicalBlockSize(vp);
 874         blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
 875
 876         lockExtBtree = overflow_extents(fp);
 877         if (lockExtBtree) {
 878                 p = current_proc();
 879                 retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
 880                                 LK_EXCLUSIVE | LK_CANRECURSE, p);
 881                 if (retval)
 882                         return (retval);
 883         }
 884
 885         retval = MacToVFSError(
 886                             MapFileBlockC (HFSTOVCB(hfsmp),
 887                                             (FCB*)fp,
 888                                             MAXPHYSIO,
 889                                             blockposition,
 890                                             ap->a_bnp,
 891                                             &bytesContAvail));
 892
 893     if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
 894
 895     if (retval == E_NONE) {
 896         /* Adjust the mapping information for invalid file ranges: */
 897         overlaptype = rl_scan(&fp->ff_invalidranges,
 898                             blockposition,
 899                             blockposition + MAXPHYSIO - 1,
 900                             &invalid_range);
 901         if (overlaptype != RL_NOOVERLAP) {
 902             switch(overlaptype) {
 903                 case RL_MATCHINGOVERLAP:
 904                 case RL_OVERLAPCONTAINSRANGE:
 905                 case RL_OVERLAPSTARTSBEFORE:
 906                     /* There's no valid block for this byte offset: */
 907                     *ap->a_bnp = (daddr_t)-1;
 908                     bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 909                     break;
 910
 911                 case RL_OVERLAPISCONTAINED:
 912                 case RL_OVERLAPENDSAFTER:
 913                     /* The range of interest hits an invalid block before the end: */
 914                     if (invalid_range->rl_start == blockposition) {
 915                         /* There's actually no valid information to be had starting here: */
 916                         *ap->a_bnp = (daddr_t)-1;
 917                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
 918                                                         (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
 919                                 bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 920                         };
 921                     } else {
 922                         bytesContAvail = invalid_range->rl_start - blockposition;
 923                     };
 924                     break;
 925             };
 926                         if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
 927         };
 928
 929         /* Figure out how many read ahead blocks there are */
 930         if (ap->a_runp != NULL) {
 931             if (can_cluster(logBlockSize)) {
 932                 /* Make sure this result never goes negative: */
 933                 *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
 934             } else {
 935                 *ap->a_runp = 0;
 936             };
 937         };
 938     };
 939
 940     return (retval);
 941 }
 942
 943 /* blktooff converts logical block number to file offset */
 944
 945 int
 946 hfs_blktooff(ap)
 947         struct vop_blktooff_args /* {
 948                 struct vnode *a_vp;
 949                 daddr_t a_lblkno;
 950                 off_t *a_offset;
 951         } */ *ap;
 952 {
 953         if (ap->a_vp == NULL)
 954                 return (EINVAL);
 955         *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
 956
 957         return(0);
 958 }
 959
 960 int
 961 hfs_offtoblk(ap)
 962         struct vop_offtoblk_args /* {
 963                 struct vnode *a_vp;
 964                 off_t a_offset;
 965                 daddr_t *a_lblkno;
 966         } */ *ap;
 967 {
 968         if (ap->a_vp == NULL)
 969                 return (EINVAL);
 970         *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
 971
 972         return(0);
 973 }
 974
 975 int
 976 hfs_cmap(ap)
 977         struct vop_cmap_args /* {
 978                 struct vnode *a_vp;
 979                 off_t a_foffset;
 980                 size_t a_size;
 981                 daddr_t *a_bpn;
 982                 size_t *a_run;
 983                 void *a_poff;
 984         } */ *ap;
 985 {
 986     struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
 987     struct filefork *fp = VTOF(ap->a_vp);
 988     size_t                              bytesContAvail = 0;
 989     int                 retval = E_NONE;
 990     int lockExtBtree = 0;
 991     struct proc         *p = NULL;
 992     struct rl_entry *invalid_range;
 993     enum rl_overlaptype overlaptype;
 994     int started_tr = 0, grabbed_lock = 0;
 995
 996         /*
 997          * Check for underlying vnode requests and ensure that logical
 998          * to physical mapping is requested.
 999          */
1000         if (ap->a_bpn == NULL)
1001                 return (0);
1002
1003         p = current_proc();
1004   retry:
1005         if (fp->ff_unallocblocks) {
1006                 lockExtBtree = 1;
1007
1008                 // XXXdbg
1009                 hfs_global_shared_lock_acquire(hfsmp);
1010                 grabbed_lock = 1;
1011
1012                 if (hfsmp->jnl) {
1013                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1014                                 hfs_global_shared_lock_release(hfsmp);
1015                                 return EINVAL;
1016                         } else {
1017                                 started_tr = 1;
1018                         }
1019                 }
1020
1021                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1022                         if (started_tr) {
1023                                 journal_end_transaction(hfsmp->jnl);
1024                         }
1025                         if (grabbed_lock) {
1026                                 hfs_global_shared_lock_release(hfsmp);
1027                         }
1028                         return (retval);
1029                 }
1030         } else if (overflow_extents(fp)) {
1031                 lockExtBtree = 1;
1032                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1033                         return retval;
1034                 }
1035         }
1036
1037         /*
1038          * Check for any delayed allocations.
1039          */
1040         if (fp->ff_unallocblocks) {
1041                 SInt64 reqbytes, actbytes;
1042
1043                 //
1044                 // Make sure we have a transaction.  It's possible
1045                 // that we came in and fp->ff_unallocblocks was zero
1046                 // but during the time we blocked acquiring the extents
1047                 // btree, ff_unallocblocks became non-zero and so we
1048                 // will need to start a transaction.
1049                 //
1050                 if (hfsmp->jnl && started_tr == 0) {
1051                     if (lockExtBtree) {
1052                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1053                         lockExtBtree = 0;
1054                     }
1055
1056                     goto retry;
1057                 }
1058
1059                 reqbytes = (SInt64)fp->ff_unallocblocks *
1060                              (SInt64)HFSTOVCB(hfsmp)->blockSize;
1061                 /*
1062                  * Release the blocks on loan and aquire some real ones.
1063                  * Note that we can race someone else for these blocks
1064                  * (and lose) so cmap needs to handle a failure here.
1065                  * Currently this race can't occur because all allocations
1066                  * are protected by an exclusive lock on the  Extents
1067                  * Overflow file.
1068                  */
1069                 HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
1070                 FTOC(fp)->c_blocks            -= fp->ff_unallocblocks;
1071                 fp->ff_blocks                 -= fp->ff_unallocblocks;
1072                 fp->ff_unallocblocks           = 0;
1073
1074                 while (retval == 0 && reqbytes > 0) {
1075                         retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
1076                                         (FCB*)fp, reqbytes, 0,
1077                                         kEFAllMask | kEFNoClumpMask, &actbytes));
1078                         if (retval == 0 && actbytes == 0)
1079                                 retval = ENOSPC;
1080
1081                         if (retval) {
1082                                 fp->ff_unallocblocks =
1083                                         reqbytes / HFSTOVCB(hfsmp)->blockSize;
1084                                 HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
1085                                 FTOC(fp)->c_blocks            += fp->ff_unallocblocks;
1086                                 fp->ff_blocks                 += fp->ff_unallocblocks;
1087                         }
1088                         reqbytes -= actbytes;
1089                 }
1090
1091                 if (retval) {
1092                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1093                         if (started_tr) {
1094                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1095                                 journal_end_transaction(hfsmp->jnl);
1096                         }
1097                         if (grabbed_lock) {
1098                                 hfs_global_shared_lock_release(hfsmp);
1099                         }
1100                         return (retval);
1101                 }
1102                 VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
1103         }
1104
1105         retval = MacToVFSError(
1106                            MapFileBlockC (HFSTOVCB(hfsmp),
1107                                           (FCB *)fp,
1108                                           ap->a_size,
1109                                           ap->a_foffset,
1110                                           ap->a_bpn,
1111                                           &bytesContAvail));
1112
1113         if (lockExtBtree)
1114                 (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1115
1116         // XXXdbg
1117         if (started_tr) {
1118                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1119                 journal_end_transaction(hfsmp->jnl);
1120                 started_tr = 0;
1121         }
1122         if (grabbed_lock) {
1123                 hfs_global_shared_lock_release(hfsmp);
1124                 grabbed_lock = 0;
1125         }
1126
1127     if (retval == E_NONE) {
1128         /* Adjust the mapping information for invalid file ranges: */
1129         overlaptype = rl_scan(&fp->ff_invalidranges,
1130                             ap->a_foffset,
1131                             ap->a_foffset + (off_t)bytesContAvail - 1,
1132                             &invalid_range);
1133         if (overlaptype != RL_NOOVERLAP) {
1134             switch(overlaptype) {
1135                 case RL_MATCHINGOVERLAP:
1136                 case RL_OVERLAPCONTAINSRANGE:
1137                 case RL_OVERLAPSTARTSBEFORE:
1138                     /* There's no valid block for this byte offset: */
1139                     *ap->a_bpn = (daddr_t)-1;
1140
1141                     /* There's no point limiting the amount to be returned if the
1142                        invalid range that was hit extends all the way to the EOF
1143                        (i.e. there's no valid bytes between the end of this range
1144                        and the file's EOF):
1145                      */
1146                     if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1147                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1148                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1149                     };
1150                     break;
1151
1152                 case RL_OVERLAPISCONTAINED:
1153                 case RL_OVERLAPENDSAFTER:
1154                     /* The range of interest hits an invalid block before the end: */
1155                     if (invalid_range->rl_start == ap->a_foffset) {
1156                         /* There's actually no valid information to be had starting here: */
1157                         *ap->a_bpn = (daddr_t)-1;
1158                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1159                                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1160                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1161                         };
1162                     } else {
1163                         bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1164                     };
1165                     break;
1166             };
1167             if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
1168         };
1169
1170         if (ap->a_run) *ap->a_run = bytesContAvail;
1171     };
1172
1173         if (ap->a_poff)
1174                 *(int *)ap->a_poff = 0;
1175
1176         return (retval);
1177 }
1178
1179
1180 /*
1181  * Read or write a buffer that is not contiguous on disk.  We loop over
1182  * each device block, copying to or from caller's buffer.
1183  *
1184  * We could be a bit more efficient by transferring as much data as is
1185  * contiguous.  But since this routine should rarely be called, and that
1186  * would be more complicated; best to keep it simple.
1187  */
1188 static int
1189 hfs_strategy_fragmented(struct buf *bp)
1190 {
1191         register struct vnode *vp = bp->b_vp;
1192         register struct cnode *cp = VTOC(vp);
1193         register struct vnode *devvp = cp->c_devvp;
1194         caddr_t ioaddr;         /* Address of fragment within bp  */
1195         struct buf *frag = NULL; /* For reading or writing a single block */
1196         int retval = 0;
1197         long remaining;         /* Bytes (in bp) left to transfer */
1198         off_t offset;           /* Logical offset of current fragment in vp */
1199         u_long block_size;      /* Size of one device block (and one I/O) */
1200
1201         /* Make sure we redo this mapping for the next I/O */
1202         bp->b_blkno = bp->b_lblkno;
1203
1204         /* Set up the logical position and number of bytes to read/write */
1205         offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
1206         block_size = VTOHFS(vp)->hfs_phys_block_size;
1207
1208         /* Get an empty buffer to do the deblocking */
1209         frag = geteblk(block_size);
1210         if (ISSET(bp->b_flags, B_READ))
1211                 SET(frag->b_flags, B_READ);
1212
1213         for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
1214             ioaddr += block_size, offset += block_size,
1215             remaining -= block_size) {
1216                 frag->b_resid = frag->b_bcount;
1217                 CLR(frag->b_flags, B_DONE);
1218
1219                 /* Map the current position to a physical block number */
1220                 retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
1221                     NULL, NULL);
1222                 if (retval != 0)
1223                         break;
1224
1225                 /*
1226                  * Did we try to read a hole?
1227                  * (Should never happen for metadata!)
1228                  */
1229                 if ((long)frag->b_lblkno == -1) {
1230                         bzero(ioaddr, block_size);
1231                         continue;
1232                 }
1233
1234                 /* If writing, copy before I/O */
1235                 if (!ISSET(bp->b_flags, B_READ))
1236                         bcopy(ioaddr, frag->b_data, block_size);
1237
1238                 /* Call the device to do the I/O and wait for it */
1239                 frag->b_blkno = frag->b_lblkno;
1240                 frag->b_vp = devvp;  /* Used to dispatch via VOP_STRATEGY */
1241                 frag->b_dev = devvp->v_rdev;
1242                 retval = VOP_STRATEGY(frag);
1243                 frag->b_vp = NULL;
1244                 if (retval != 0)
1245                         break;
1246                 retval = biowait(frag);
1247                 if (retval != 0)
1248                         break;
1249
1250                 /* If reading, copy after the I/O */
1251                 if (ISSET(bp->b_flags, B_READ))
1252                         bcopy(frag->b_data, ioaddr, block_size);
1253         }
1254
1255         frag->b_vp = NULL;
1256         //
1257         // XXXdbg - in the case that this is a meta-data block, it won't affect
1258         //          the journal because this bp is for a physical disk block,
1259         //          not a logical block that is part of the catalog or extents
1260         //          files.
1261         SET(frag->b_flags, B_INVAL);
1262         brelse(frag);
1263
1264         if ((bp->b_error = retval) != 0)
1265                 SET(bp->b_flags, B_ERROR);
1266
1267         biodone(bp);    /* This I/O is now complete */
1268         return retval;
1269 }
1270
1271
1272 /*
1273  * Calculate the logical to physical mapping if not done already,
1274  * then call the device strategy routine.
1275 #
1276 #vop_strategy {
1277 #       IN struct buf *bp;
1278     */
1279 int
1280 hfs_strategy(ap)
1281         struct vop_strategy_args /* {
1282                 struct buf *a_bp;
1283         } */ *ap;
1284 {
1285         register struct buf *bp = ap->a_bp;
1286         register struct vnode *vp = bp->b_vp;
1287         register struct cnode *cp = VTOC(vp);
1288         int retval = 0;
1289         off_t offset;
1290         size_t bytes_contig;
1291
1292         if ( !(bp->b_flags & B_VECTORLIST)) {
1293                 if (vp->v_type == VBLK || vp->v_type == VCHR)
1294                         panic("hfs_strategy: device vnode passed!");
1295
1296                 if (bp->b_flags & B_PAGELIST) {
1297                         /*
1298                          * If we have a page list associated with this bp,
1299                          * then go through cluster_bp since it knows how to
1300                          * deal with a page request that might span non-
1301                          * contiguous physical blocks on the disk...
1302                          */
1303                         retval = cluster_bp(bp);
1304                         vp = cp->c_devvp;
1305                         bp->b_dev = vp->v_rdev;
1306
1307                         return (retval);
1308                 }
1309
1310                 /*
1311                  * If we don't already know the filesystem relative block
1312                  * number then get it using VOP_BMAP().  If VOP_BMAP()
1313                  * returns the block number as -1 then we've got a hole in
1314                  * the file.  Although HFS filesystems don't create files with
1315                  * holes, invalidating of subranges of the file (lazy zero
1316                  * filling) may create such a situation.
1317                  */
1318                 if (bp->b_blkno == bp->b_lblkno) {
1319                         offset = (off_t) bp->b_lblkno *
1320                             (off_t) GetLogicalBlockSize(vp);
1321
1322                         if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
1323                             &bp->b_blkno, &bytes_contig, NULL))) {
1324                                 bp->b_error = retval;
1325                                 bp->b_flags |= B_ERROR;
1326                                 biodone(bp);
1327                                 return (retval);
1328                         }
1329                         if (bytes_contig < bp->b_bcount)
1330                         {
1331                                 /*
1332                                  * We were asked to read a block that wasn't
1333                                  * contiguous, so we have to read each of the
1334                                  * pieces and copy them into the buffer.
1335                                  * Since ordinary file I/O goes through
1336                                  * cluster_io (which won't ask us for
1337                                  * discontiguous data), this is probably an
1338                                  * attempt to read or write metadata.
1339                                  */
1340                                 return hfs_strategy_fragmented(bp);
1341                         }
1342                         if ((long)bp->b_blkno == -1)
1343                                 clrbuf(bp);
1344                 }
1345                 if ((long)bp->b_blkno == -1) {
1346                         biodone(bp);
1347                         return (0);
1348                 }
1349                 if (bp->b_validend == 0) {
1350                         /*
1351                          * Record the exact size of the I/O transfer about to
1352                          * be made:
1353                          */
1354                         bp->b_validend = bp->b_bcount;
1355                 }
1356         }
1357         vp = cp->c_devvp;
1358         bp->b_dev = vp->v_rdev;
1359
1360         return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
1361 }
1362
1363
1364 /*
1365 #
1366 #% truncate     vp      L L L
1367 #
1368 vop_truncate {
1369     IN struct vnode *vp;
1370     IN off_t length;
1371     IN int flags;       (IO_SYNC)
1372     IN struct ucred *cred;
1373     IN struct proc *p;
1374 };
1375  * Truncate a cnode to at most length size, freeing (or adding) the
1376  * disk blocks.
1377  */
1378 int hfs_truncate(ap)
1379         struct vop_truncate_args /* {
1380                 struct vnode *a_vp;
1381                 off_t a_length;
1382                 int a_flags;
1383                 struct ucred *a_cred;
1384                 struct proc *a_p;
1385         } */ *ap;
1386 {
1387         register struct vnode *vp = ap->a_vp;
1388         register struct cnode *cp = VTOC(vp);
1389         struct filefork *fp = VTOF(vp);
1390         off_t length;
1391         long vflags;
1392         struct timeval tv;
1393         int retval;
1394         off_t bytesToAdd;
1395         off_t actualBytesAdded;
1396         off_t filebytes;
1397         u_long fileblocks;
1398         int blksize;
1399         struct hfsmount *hfsmp;
1400
1401         if (vp->v_type != VREG && vp->v_type != VLNK)
1402                 return (EISDIR);        /* cannot truncate an HFS directory! */
1403
1404         length = ap->a_length;
1405         blksize = VTOVCB(vp)->blockSize;
1406         fileblocks = fp->ff_blocks;
1407         filebytes = (off_t)fileblocks * (off_t)blksize;
1408
1409         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1410                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1411
1412         if (length < 0)
1413                 return (EINVAL);
1414
1415         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1416                 return (EFBIG);
1417
1418         hfsmp = VTOHFS(vp);
1419
1420         tv = time;
1421         retval = E_NONE;
1422
1423         /*
1424          * We cannot just check if fp->ff_size == length (as an optimization)
1425          * since there may be extra physical blocks that also need truncation.
1426          */
1427 #if QUOTA
1428         if (retval = hfs_getinoquota(cp))
1429                 return(retval);
1430 #endif /* QUOTA */
1431
1432         /*
1433          * Lengthen the size of the file. We must ensure that the
1434          * last byte of the file is allocated. Since the smallest
1435          * value of ff_size is 0, length will be at least 1.
1436          */
1437         if (length > fp->ff_size) {
1438 #if QUOTA
1439                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1440                                 ap->a_cred, 0);
1441                 if (retval)
1442                         goto Err_Exit;
1443 #endif /* QUOTA */
1444                 /*
1445                  * If we don't have enough physical space then
1446                  * we need to extend the physical size.
1447                  */
1448                 if (length > filebytes) {
1449                         int eflags;
1450
1451                         /* All or nothing and don't round up to clumpsize. */
1452                         eflags = kEFAllMask | kEFNoClumpMask;
1453
1454                         if (suser(ap->a_cred, NULL) != 0)
1455                                 eflags |= kEFReserveMask;  /* keep a reserve */
1456
1457                         // XXXdbg
1458                         hfs_global_shared_lock_acquire(hfsmp);
1459                         if (hfsmp->jnl) {
1460                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1461                                         retval = EINVAL;
1462                                         goto Err_Exit;
1463                                 }
1464                         }
1465
1466                         /* lock extents b-tree (also protects volume bitmap) */
1467                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1468                         if (retval) {
1469                                 if (hfsmp->jnl) {
1470                                         journal_end_transaction(hfsmp->jnl);
1471                                 }
1472                                 hfs_global_shared_lock_release(hfsmp);
1473
1474                                 goto Err_Exit;
1475                         }
1476
1477                         while ((length > filebytes) && (retval == E_NONE)) {
1478                                 bytesToAdd = length - filebytes;
1479                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1480                                                     (FCB*)fp,
1481                                                     bytesToAdd,
1482                                                     0,
1483                                                     eflags,
1484                                                     &actualBytesAdded));
1485
1486                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1487                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1488                                         if (length > filebytes)
1489                                                 length = filebytes;
1490                                         break;
1491                                 }
1492                         } /* endwhile */
1493
1494                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1495
1496                         // XXXdbg
1497                         if (hfsmp->jnl) {
1498                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1499                                 journal_end_transaction(hfsmp->jnl);
1500                         }
1501                         hfs_global_shared_lock_release(hfsmp);
1502
1503                         if (retval)
1504                                 goto Err_Exit;
1505
1506                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1507                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1508                 }
1509
1510                 if (!(ap->a_flags & IO_NOZEROFILL)) {
1511                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1512                                 struct rl_entry *invalid_range;
1513                                 int devBlockSize;
1514                                 off_t zero_limit;
1515
1516                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1517                                 if (length < zero_limit) zero_limit = length;
1518
1519                                 if (length > fp->ff_size) {
1520                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1521                                         if ((fp->ff_size & PAGE_MASK_64) &&
1522                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1523                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1524
1525                                                 /* There's some valid data at the start of the (current) last page
1526                                                    of the file, so zero out the remainder of that page to ensure the
1527                                                    entire page contains valid data.  Since there is no invalid range
1528                                                    possible past the (current) eof, there's no need to remove anything
1529                                                    from the invalid range list before calling cluster_write():                                           */
1530                                                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1531                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1532                                                                 fp->ff_size, (off_t)0, devBlockSize,
1533                                                                 (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1534                                                 if (retval) goto Err_Exit;
1535
1536                                                 /* Merely invalidate the remaining area, if necessary: */
1537                                                 if (length > zero_limit) {
1538                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1539                                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1540                                                 }
1541                                         } else {
1542                                         /* The page containing the (current) eof is invalid: just add the
1543                                            remainder of the page to the invalid list, along with the area
1544                                            being newly allocated:
1545                                          */
1546                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1547                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1548                                         };
1549                                 }
1550                         } else {
1551                                         panic("hfs_truncate: invoked on non-UBC object?!");
1552                         };
1553                 }
1554                 cp->c_flag |= C_UPDATE;
1555                 fp->ff_size = length;
1556
1557                 if (UBCISVALID(vp))
1558                         ubc_setsize(vp, fp->ff_size);   /* XXX check errors */
1559
1560         } else { /* Shorten the size of the file */
1561
1562                 if (fp->ff_size > length) {
1563                         /*
1564                          * Any buffers that are past the truncation point need to be
1565                          * invalidated (to maintain buffer cache consistency).  For
1566                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1567                          */
1568                         if (UBCISVALID(vp))
1569                                 ubc_setsize(vp, length); /* XXX check errors */
1570
1571                         vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA;
1572                         retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1573
1574                         /* Any space previously marked as invalid is now irrelevant: */
1575                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
1576                 }
1577
1578                 /*
1579                  * Account for any unmapped blocks. Note that the new
1580                  * file length can still end up with unmapped blocks.
1581                  */
1582                 if (fp->ff_unallocblocks > 0) {
1583                         u_int32_t finalblks;
1584
1585                         /* lock extents b-tree */
1586                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1587                                         LK_EXCLUSIVE, ap->a_p);
1588                         if (retval)
1589                                 goto Err_Exit;
1590
1591                         VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
1592                         cp->c_blocks             -= fp->ff_unallocblocks;
1593                         fp->ff_blocks            -= fp->ff_unallocblocks;
1594                         fp->ff_unallocblocks      = 0;
1595
1596                         finalblks = (length + blksize - 1) / blksize;
1597                         if (finalblks > fp->ff_blocks) {
1598                                 /* calculate required unmapped blocks */
1599                                 fp->ff_unallocblocks      = finalblks - fp->ff_blocks;
1600                                 VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
1601                                 cp->c_blocks             += fp->ff_unallocblocks;
1602                                 fp->ff_blocks            += fp->ff_unallocblocks;
1603                         }
1604                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1605                                         LK_RELEASE, ap->a_p);
1606                 }
1607
1608                 /*
1609                  * For a TBE process the deallocation of the file blocks is
1610                  * delayed until the file is closed.  And hfs_close calls
1611                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
1612                  * isn't set, we make sure this isn't a TBE process.
1613                  */
1614                 if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
1615 #if QUOTA
1616                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
1617 #endif /* QUOTA */
1618                   // XXXdbg
1619                   hfs_global_shared_lock_acquire(hfsmp);
1620                         if (hfsmp->jnl) {
1621                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1622                                         retval = EINVAL;
1623                                         goto Err_Exit;
1624                                 }
1625                         }
1626
1627                         /* lock extents b-tree (also protects volume bitmap) */
1628                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1629                         if (retval) {
1630                                 if (hfsmp->jnl) {
1631                                         journal_end_transaction(hfsmp->jnl);
1632                                 }
1633                                 hfs_global_shared_lock_release(hfsmp);
1634                                 goto Err_Exit;
1635                         }
1636
1637                         if (fp->ff_unallocblocks == 0)
1638                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
1639                                                 (FCB*)fp, length, false));
1640
1641                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1642
1643                         // XXXdbg
1644                         if (hfsmp->jnl) {
1645                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1646                                 journal_end_transaction(hfsmp->jnl);
1647                         }
1648                         hfs_global_shared_lock_release(hfsmp);
1649
1650                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1651                         if (retval)
1652                                 goto Err_Exit;
1653 #if QUOTA
1654                         /* These are bytesreleased */
1655                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
1656 #endif /* QUOTA */
1657                 }
1658                 /* Only set update flag if the logical length changes */
1659                 if (fp->ff_size != length)
1660                         cp->c_flag |= C_UPDATE;
1661                 fp->ff_size = length;
1662         }
1663         cp->c_flag |= C_CHANGE;
1664         retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1665         if (retval) {
1666                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1667                      -1, -1, -1, retval, 0);
1668         }
1669
1670 Err_Exit:
1671
1672         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
1673                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1674
1675         return (retval);
1676 }
1677
1678
1679
1680 /*
1681 #
1682 #% allocate     vp      L L L
1683 #
1684 vop_allocate {
1685         IN struct vnode *vp;
1686         IN off_t length;
1687         IN int flags;
1688         OUT off_t *bytesallocated;
1689         IN off_t offset;
1690         IN struct ucred *cred;
1691         IN struct proc *p;
1692 };
1693  * allocate a cnode to at most length size
1694  */
1695 int hfs_allocate(ap)
1696         struct vop_allocate_args /* {
1697                 struct vnode *a_vp;
1698                 off_t a_length;
1699                 u_int32_t  a_flags;
1700                 off_t *a_bytesallocated;
1701                 off_t a_offset;
1702                 struct ucred *a_cred;
1703                 struct proc *a_p;
1704         } */ *ap;
1705 {
1706         struct vnode *vp = ap->a_vp;
1707         struct cnode *cp = VTOC(vp);
1708         struct filefork *fp = VTOF(vp);
1709         off_t length = ap->a_length;
1710         off_t startingPEOF;
1711         off_t moreBytesRequested;
1712         off_t actualBytesAdded;
1713         off_t filebytes;
1714         u_long fileblocks;
1715         long vflags;
1716         struct timeval tv;
1717         int retval, retval2;
1718         UInt32 blockHint;
1719         UInt32 extendFlags =0;   /* For call to ExtendFileC */
1720         struct hfsmount *hfsmp;
1721
1722         hfsmp = VTOHFS(vp);
1723
1724         *(ap->a_bytesallocated) = 0;
1725         fileblocks = fp->ff_blocks;
1726         filebytes = (off_t)fileblocks * (off_t)VTOVCB(vp)->blockSize;
1727
1728         if (length < (off_t)0)
1729                 return (EINVAL);
1730         if (vp->v_type != VREG && vp->v_type != VLNK)
1731                 return (EISDIR);
1732         if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= filebytes))
1733                 return (EINVAL);
1734
1735         /* Fill in the flags word for the call to Extend the file */
1736
1737         if (ap->a_flags & ALLOCATECONTIG)
1738                 extendFlags |= kEFContigMask;
1739
1740         if (ap->a_flags & ALLOCATEALL)
1741                 extendFlags |= kEFAllMask;
1742
1743         if (suser(ap->a_cred, NULL) != 0)
1744                 extendFlags |= kEFReserveMask;
1745
1746         tv = time;
1747         retval = E_NONE;
1748         blockHint = 0;
1749         startingPEOF = filebytes;
1750
1751         if (ap->a_flags & ALLOCATEFROMPEOF)
1752                 length += filebytes;
1753         else if (ap->a_flags & ALLOCATEFROMVOL)
1754                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1755
1756         /* If no changes are necesary, then we're done */
1757         if (filebytes == length)
1758                 goto Std_Exit;
1759
1760         /*
1761          * Lengthen the size of the file. We must ensure that the
1762          * last byte of the file is allocated. Since the smallest
1763          * value of filebytes is 0, length will be at least 1.
1764          */
1765         if (length > filebytes) {
1766                 moreBytesRequested = length - filebytes;
1767
1768 #if QUOTA
1769                 retval = hfs_chkdq(cp,
1770                                 (int64_t)(roundup(moreBytesRequested, VTOVCB(vp)->blockSize)),
1771                                 ap->a_cred, 0);
1772                 if (retval)
1773                         return (retval);
1774
1775 #endif /* QUOTA */
1776                 // XXXdbg
1777                 hfs_global_shared_lock_acquire(hfsmp);
1778                 if (hfsmp->jnl) {
1779                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1780                                 retval = EINVAL;
1781                                 goto Err_Exit;
1782                         }
1783                 }
1784
1785                 /* lock extents b-tree (also protects volume bitmap) */
1786                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1787                 if (retval) {
1788                         if (hfsmp->jnl) {
1789                                 journal_end_transaction(hfsmp->jnl);
1790                         }
1791                         hfs_global_shared_lock_release(hfsmp);
1792                         goto Err_Exit;
1793                 }
1794
1795                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1796                                                 (FCB*)fp,
1797                                                 moreBytesRequested,
1798                                                 blockHint,
1799                                                 extendFlags,
1800                                                 &actualBytesAdded));
1801
1802                 *(ap->a_bytesallocated) = actualBytesAdded;
1803                 filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
1804
1805                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1806
1807                 // XXXdbg
1808                 if (hfsmp->jnl) {
1809                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1810                         journal_end_transaction(hfsmp->jnl);
1811                 }
1812                 hfs_global_shared_lock_release(hfsmp);
1813
1814                 /*
1815                  * if we get an error and no changes were made then exit
1816                  * otherwise we must do the VOP_UPDATE to reflect the changes
1817                  */
1818                 if (retval && (startingPEOF == filebytes))
1819                         goto Err_Exit;
1820
1821                 /*
1822                  * Adjust actualBytesAdded to be allocation block aligned, not
1823                  * clump size aligned.
1824                  * NOTE: So what we are reporting does not affect reality
1825                  * until the file is closed, when we truncate the file to allocation
1826                  * block size.
1827                  */
1828                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
1829                         *(ap->a_bytesallocated) =
1830                                 roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize);
1831
1832         } else { /* Shorten the size of the file */
1833
1834                 if (fp->ff_size > length) {
1835                         /*
1836                          * Any buffers that are past the truncation point need to be
1837                          * invalidated (to maintain buffer cache consistency).  For
1838                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1839                          */
1840                         vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
1841                         (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1842                 }
1843
1844                 // XXXdbg
1845                 hfs_global_shared_lock_acquire(hfsmp);
1846                 if (hfsmp->jnl) {
1847                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1848                                 retval = EINVAL;
1849                                 goto Err_Exit;
1850                         }
1851                 }
1852
1853                 /* lock extents b-tree (also protects volume bitmap) */
1854                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1855                 if (retval) {
1856                         if (hfsmp->jnl) {
1857                                 journal_end_transaction(hfsmp->jnl);
1858                         }
1859                         hfs_global_shared_lock_release(hfsmp);
1860
1861                         goto Err_Exit;
1862                 }
1863
1864                 retval = MacToVFSError(
1865                             TruncateFileC(
1866                                             VTOVCB(vp),
1867                                             (FCB*)fp,
1868                                             length,
1869                                             false));
1870                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1871                 filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
1872
1873                 if (hfsmp->jnl) {
1874                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1875                         journal_end_transaction(hfsmp->jnl);
1876                 }
1877                 hfs_global_shared_lock_release(hfsmp);
1878
1879
1880                 /*
1881                  * if we get an error and no changes were made then exit
1882                  * otherwise we must do the VOP_UPDATE to reflect the changes
1883                  */
1884                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
1885 #if QUOTA
1886                 /* These are  bytesreleased */
1887                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
1888 #endif /* QUOTA */
1889
1890                 if (fp->ff_size > filebytes) {
1891                         fp->ff_size = filebytes;
1892
1893                         if (UBCISVALID(vp))
1894                                 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
1895                 }
1896         }
1897
1898 Std_Exit:
1899         cp->c_flag |= C_CHANGE | C_UPDATE;
1900         retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1901
1902         if (retval == 0)
1903                 retval = retval2;
1904 Err_Exit:
1905         return (retval);
1906 }
1907
1908
1909 /*
1910  * pagein for HFS filesystem
1911  */
1912 int
1913 hfs_pagein(ap)
1914         struct vop_pagein_args /* {
1915                 struct vnode *a_vp,
1916                 upl_t         a_pl,
1917                 vm_offset_t   a_pl_offset,
1918                 off_t         a_f_offset,
1919                 size_t        a_size,
1920                 struct ucred *a_cred,
1921                 int           a_flags
1922         } */ *ap;
1923 {
1924         register struct vnode *vp = ap->a_vp;
1925         int devBlockSize = 0;
1926         int error;
1927
1928         if (vp->v_type != VREG && vp->v_type != VLNK)
1929                 panic("hfs_pagein: vp not UBC type\n");
1930
1931         VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
1932
1933         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
1934                                 ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
1935                                 ap->a_flags);
1936         return (error);
1937 }
1938
1939 /*
1940  * pageout for HFS filesystem.
1941  */
1942 int
1943 hfs_pageout(ap)
1944         struct vop_pageout_args /* {
1945            struct vnode *a_vp,
1946            upl_t         a_pl,
1947            vm_offset_t   a_pl_offset,
1948            off_t         a_f_offset,
1949            size_t        a_size,
1950            struct ucred *a_cred,
1951            int           a_flags
1952         } */ *ap;
1953 {
1954         struct vnode *vp = ap->a_vp;
1955         struct cnode *cp = VTOC(vp);
1956         struct filefork *fp = VTOF(vp);
1957         int retval;
1958         int devBlockSize = 0;
1959         off_t end_of_range;
1960         off_t filesize;
1961
1962         if (UBCINVALID(vp))
1963                 panic("hfs_pageout: Not a  VREG: vp=%x", vp);
1964
1965         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1966         filesize = fp->ff_size;
1967         end_of_range = ap->a_f_offset + ap->a_size - 1;
1968
1969         if (end_of_range >= filesize)
1970                 end_of_range = (off_t)(filesize - 1);
1971         if (ap->a_f_offset < filesize)
1972                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
1973
1974         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
1975                                  filesize, devBlockSize, ap->a_flags);
1976
1977         /*
1978          * If we successfully wrote any data, and we are not the superuser
1979          * we clear the setuid and setgid bits as a precaution against
1980          * tampering.
1981          */
1982         if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
1983                 cp->c_mode &= ~(S_ISUID | S_ISGID);
1984
1985         return (retval);
1986 }
1987
1988 /*
1989  * Intercept B-Tree node writes to unswap them if necessary.
1990 #
1991 #vop_bwrite {
1992 #       IN struct buf *bp;
1993  */
1994 int
1995 hfs_bwrite(ap)
1996         struct vop_bwrite_args /* {
1997                 struct buf *a_bp;
1998         } */ *ap;
1999 {
2000         int retval = 0;
2001         register struct buf *bp = ap->a_bp;
2002         register struct vnode *vp = bp->b_vp;
2003 #if BYTE_ORDER == LITTLE_ENDIAN
2004         BlockDescriptor block;
2005
2006         /* Trap B-Tree writes */
2007         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2008             (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
2009
2010                 /* Swap if the B-Tree node is in native byte order */
2011                 if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
2012                         /* Prepare the block pointer */
2013                         block.blockHeader = bp;
2014                         block.buffer = bp->b_data;
2015                         /* not found in cache ==> came from disk */
2016                         block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
2017                         block.blockSize = bp->b_bcount;
2018
2019                         /* Endian un-swap B-Tree node */
2020                         SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2021                 }
2022
2023                 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2024         }
2025 #endif
2026         /* This buffer shouldn't be locked anymore but if it is clear it */
2027         if (ISSET(bp->b_flags, B_LOCKED)) {
2028             // XXXdbg
2029             if (VTOHFS(vp)->jnl) {
2030                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2031             }
2032                 CLR(bp->b_flags, B_LOCKED);
2033                 printf("hfs_bwrite: called with lock bit set\n");
2034         }
2035         retval = vn_bwrite (ap);
2036
2037         return (retval);
2038 }