bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/stat.h>
  36 #include <sys/buf.h>
  37 #include <sys/proc.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40
  41 #include <miscfs/specfs/specdev.h>
  42
  43 #include <sys/ubc.h>
  44 #include <vm/vm_pageout.h>
  45
  46 #include <sys/kdebug.h>
  47
  48 #include        "hfs.h"
  49 #include        "hfs_endian.h"
  50 #include        "hfs_quota.h"
  51 #include        "hfscommon/headers/FileMgrInternal.h"
  52 #include        "hfscommon/headers/BTreesInternal.h"
  53 #include        "hfs_cnode.h"
  54 #include        "hfs_dbg.h"
  55
  56 extern int overflow_extents(struct filefork *fp);
  57
  58 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  59
  60 enum {
  61         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  62 };
  63
  64 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  65
  66
  67 /*****************************************************************************
  68 *
  69 *       Operations on vnodes
  70 *
  71 *****************************************************************************/
  72
  73 /*
  74 #% read         vp      L L L
  75 #
  76  vop_read {
  77      IN struct vnode *vp;
  78      INOUT struct uio *uio;
  79      IN int ioflag;
  80      IN struct ucred *cred;
  81
  82      */
  83
  84 int
  85 hfs_read(ap)
  86         struct vop_read_args /* {
  87                 struct vnode *a_vp;
  88                 struct uio *a_uio;
  89                 int a_ioflag;
  90                 struct ucred *a_cred;
  91         } */ *ap;
  92 {
  93         register struct uio *uio = ap->a_uio;
  94         register struct vnode *vp = ap->a_vp;
  95         struct cnode *cp;
  96         struct filefork *fp;
  97         struct buf *bp;
  98         daddr_t logBlockNo;
  99         u_long fragSize, moveSize, startOffset, ioxfersize;
 100         int devBlockSize = 0;
 101         off_t bytesRemaining;
 102         int retval = 0;
 103         off_t filesize;
 104         off_t filebytes;
 105
 106         /* Preflight checks */
 107         if (vp->v_type != VREG && vp->v_type != VLNK)
 108                 return (EISDIR);        /* HFS can only read files */
 109         if (uio->uio_resid == 0)
 110                 return (0);             /* Nothing left to do */
 111         if (uio->uio_offset < 0)
 112                 return (EINVAL);        /* cant read from a negative offset */
 113
 114         cp = VTOC(vp);
 115         fp = VTOF(vp);
 116         filesize = fp->ff_size;
 117         filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
 118         if (uio->uio_offset > filesize) {
 119                 if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
 120                         return (EFBIG);
 121                 else
 122                         return (0);
 123         }
 124
 125         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 126
 127         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 128                 (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
 129
 130         if (UBCISVALID(vp)) {
 131                 retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
 132         } else {
 133
 134                 for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 135
 136             if ((bytesRemaining = (filesize - uio->uio_offset)) <= 0)
 137                 break;
 138
 139             logBlockNo  = (daddr_t)(uio->uio_offset / PAGE_SIZE_64);
 140             startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64);
 141             fragSize    = PAGE_SIZE;
 142
 143             if (((logBlockNo * PAGE_SIZE) + fragSize) < filesize)
 144                 ioxfersize = fragSize;
 145             else {
 146                 ioxfersize = filesize - (logBlockNo * PAGE_SIZE);
 147                 ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1);
 148             }
 149                 moveSize = ioxfersize;
 150                 moveSize -= startOffset;
 151
 152             if (bytesRemaining < moveSize)
 153                 moveSize = bytesRemaining;
 154
 155             if (uio->uio_resid < moveSize) {
 156                 moveSize = uio->uio_resid;
 157             };
 158             if (moveSize == 0) {
 159                 break;
 160             };
 161
 162             if (( uio->uio_offset + fragSize) >= filesize) {
 163                 retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
 164
 165             } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) {
 166                 daddr_t nextLogBlockNo = logBlockNo + 1;
 167                 int nextsize;
 168
 169                 if (((nextLogBlockNo * PAGE_SIZE) +
 170                      (daddr_t)fragSize) < filesize)
 171                     nextsize = fragSize;
 172                 else {
 173                     nextsize = filesize - (nextLogBlockNo * PAGE_SIZE);
 174                     nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1);
 175                 }
 176                 retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp);
 177             } else {
 178                 retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
 179             };
 180
 181             if (retval != E_NONE) {
 182                 if (bp) {
 183                     brelse(bp);
 184                     bp = NULL;
 185                 }
 186                 break;
 187             };
 188             vp->v_lastr = logBlockNo;
 189
 190             /*
 191              * We should only get non-zero b_resid when an I/O retval
 192              * has occurred, which should cause us to break above.
 193              * However, if the short read did not cause an retval,
 194              * then we want to ensure that we do not uiomove bad
 195              * or uninitialized data.
 196              */
 197             ioxfersize -= bp->b_resid;
 198
 199             if (ioxfersize < moveSize) {                        /* XXX PPD This should take the offset into account, too! */
 200                 if (ioxfersize == 0)
 201                     break;
 202                 moveSize = ioxfersize;
 203             }
 204             if ((startOffset + moveSize) > bp->b_bcount)
 205                 panic("hfs_read: bad startOffset or moveSize\n");
 206
 207             if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio)))
 208                 break;
 209
 210             if (S_ISREG(cp->c_mode) &&
 211                 (((startOffset + moveSize) == fragSize) || (uio->uio_offset == filesize))) {
 212                 bp->b_flags |= B_AGE;
 213             };
 214
 215             brelse(bp);
 216             /* Start of loop resets bp to NULL before reaching outside this block... */
 217         }
 218
 219                 if (bp != NULL) {
 220                         brelse(bp);
 221                 }
 222         }
 223
 224         cp->c_flag |= C_ACCESS;
 225
 226         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 227                 (int)uio->uio_offset, uio->uio_resid, (int)filesize,  (int)filebytes, 0);
 228
 229         return (retval);
 230 }
 231
 232 /*
 233  * Write data to a file or directory.
 234 #% write        vp      L L L
 235 #
 236  vop_write {
 237      IN struct vnode *vp;
 238      INOUT struct uio *uio;
 239      IN int ioflag;
 240      IN struct ucred *cred;
 241
 242      */
 243 int
 244 hfs_write(ap)
 245         struct vop_write_args /* {
 246                 struct vnode *a_vp;
 247                 struct uio *a_uio;
 248                 int a_ioflag;
 249                 struct ucred *a_cred;
 250         } */ *ap;
 251 {
 252         struct vnode *vp = ap->a_vp;
 253         struct uio *uio = ap->a_uio;
 254         struct cnode *cp;
 255         struct filefork *fp;
 256         struct buf *bp;
 257         struct proc *p;
 258         struct timeval tv;
 259         ExtendedVCB *vcb;
 260     int                                 devBlockSize = 0;
 261     daddr_t                     logBlockNo;
 262     long                                fragSize;
 263     off_t                               origFileSize, currOffset, writelimit, bytesToAdd;
 264     off_t                               actualBytesAdded;
 265     u_long                              blkoffset, resid, xfersize, clearSize;
 266     int                                 eflags, ioflag;
 267     int                                 retval;
 268         off_t filebytes;
 269         u_long fileblocks;
 270         struct hfsmount *hfsmp;
 271         int started_tr = 0, grabbed_lock = 0;
 272
 273         ioflag = ap->a_ioflag;
 274
 275         if (uio->uio_offset < 0)
 276                 return (EINVAL);
 277         if (uio->uio_resid == 0)
 278                 return (E_NONE);
 279         if (vp->v_type != VREG && vp->v_type != VLNK)
 280                 return (EISDIR);        /* Can only write files */
 281
 282         cp = VTOC(vp);
 283         fp = VTOF(vp);
 284         vcb = VTOVCB(vp);
 285         fileblocks = fp->ff_blocks;
 286         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
 287
 288         if (ioflag & IO_APPEND)
 289                 uio->uio_offset = fp->ff_size;
 290         if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
 291                 return (EPERM);
 292
 293         // XXXdbg - don't allow modification of the journal or journal_info_block
 294         if (VTOHFS(vp)->jnl && cp->c_datafork) {
 295                 struct HFSPlusExtentDescriptor *extd;
 296
 297                 extd = &cp->c_datafork->ff_data.cf_extents[0];
 298                 if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
 299                         return EPERM;
 300                 }
 301         }
 302
 303         writelimit = uio->uio_offset + uio->uio_resid;
 304
 305         /*
 306          * Maybe this should be above the vnode op call, but so long as
 307          * file servers have no limits, I don't think it matters.
 308          */
 309         p = uio->uio_procp;
 310         if (vp->v_type == VREG && p &&
 311             writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 312                 psignal(p, SIGXFSZ);
 313                 return (EFBIG);
 314         }
 315         p = current_proc();
 316
 317         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 318
 319         resid = uio->uio_resid;
 320         origFileSize = fp->ff_size;
 321         eflags = kEFDeferMask;  /* defer file block allocations */
 322         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 323
 324         /*
 325          * NOTE: In the following loop there are two positions tracked:
 326          * currOffset is the current I/O starting offset.  currOffset
 327          * is never >LEOF; the LEOF is nudged along with currOffset as
 328          * data is zeroed or written. uio->uio_offset is the start of
 329          * the current I/O operation.  It may be arbitrarily beyond
 330          * currOffset.
 331          *
 332          * The following is true at all times:
 333          *   currOffset <= LEOF <= uio->uio_offset <= writelimit
 334          */
 335         currOffset = MIN(uio->uio_offset, fp->ff_size);
 336
 337         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 338                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 339         retval = 0;
 340
 341         /* Now test if we need to extend the file */
 342         /* Doing so will adjust the filebytes for us */
 343
 344 #if QUOTA
 345         if(writelimit > filebytes) {
 346                 bytesToAdd = writelimit - filebytes;
 347
 348                 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)),
 349                                    ap->a_cred, 0);
 350                 if (retval)
 351                         return (retval);
 352         }
 353 #endif /* QUOTA */
 354
 355         hfsmp = VTOHFS(vp);
 356         if (writelimit > filebytes) {
 357                 hfs_global_shared_lock_acquire(hfsmp);
 358                 grabbed_lock = 1;
 359         }
 360         if (hfsmp->jnl && (writelimit > filebytes)) {
 361                 if (journal_start_transaction(hfsmp->jnl) != 0) {
 362                         hfs_global_shared_lock_release(hfsmp);
 363                         return EINVAL;
 364                 }
 365                 started_tr = 1;
 366         }
 367
 368         while (writelimit > filebytes) {
 369
 370                 bytesToAdd = writelimit - filebytes;
 371                 if (suser(ap->a_cred, NULL) != 0)
 372                         eflags |= kEFReserveMask;
 373
 374                 /* lock extents b-tree (also protects volume bitmap) */
 375                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
 376                 if (retval != E_NONE)
 377                         break;
 378
 379                 retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
 380                                 0, eflags, &actualBytesAdded));
 381
 382                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
 383                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 384                         retval = ENOSPC;
 385                 if (retval != E_NONE)
 386                         break;
 387                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 388                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 389                         (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size,  (int)filebytes, 0);
 390         }
 391
 392         // XXXdbg
 393         if (started_tr) {
 394                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
 395                 journal_end_transaction(hfsmp->jnl);
 396                 started_tr = 0;
 397         }
 398         if (grabbed_lock) {
 399                 hfs_global_shared_lock_release(hfsmp);
 400                 grabbed_lock = 0;
 401         }
 402
 403         if (UBCISVALID(vp) && retval == E_NONE) {
 404                 off_t filesize;
 405                 off_t zero_off;
 406                 off_t tail_off;
 407                 off_t inval_start;
 408                 off_t inval_end;
 409                 off_t io_start, io_end;
 410                 int lflag;
 411                 struct rl_entry *invalid_range;
 412
 413                 if (writelimit > fp->ff_size)
 414                         filesize = writelimit;
 415                 else
 416                         filesize = fp->ff_size;
 417
 418                 lflag = (ioflag & IO_SYNC);
 419
 420                 if (uio->uio_offset <= fp->ff_size) {
 421                         zero_off = uio->uio_offset & ~PAGE_MASK_64;
 422
 423                         /* Check to see whether the area between the zero_offset and the start
 424                            of the transfer to see whether is invalid and should be zero-filled
 425                            as part of the transfer:
 426                          */
 427                         if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
 428                                 lflag |= IO_HEADZEROFILL;
 429                 } else {
 430                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 431
 432                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 433                            read without being zeroed.  The current last block is filled with zeroes
 434                            if it holds valid data but in all cases merely do a little bookkeeping
 435                            to track the area from the end of the current last page to the start of
 436                            the area actually written.  For the same reason only the bytes up to the
 437                            start of the page where this write will start is invalidated; any remainder
 438                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 439
 440                            Note that inval_start, the start of the page after the current EOF,
 441                            may be past the start of the write, in which case the zeroing
 442                            will be handled by the cluser_write of the actual data.
 443                          */
 444                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 445                         inval_end = uio->uio_offset & ~PAGE_MASK_64;
 446                         zero_off = fp->ff_size;
 447
 448                         if ((fp->ff_size & PAGE_MASK_64) &&
 449                                 (rl_scan(&fp->ff_invalidranges,
 450                                                         eof_page_base,
 451                                                         fp->ff_size - 1,
 452                                                         &invalid_range) != RL_NOOVERLAP)) {
 453                                 /* The page containing the EOF is not valid, so the
 454                                    entire page must be made inaccessible now.  If the write
 455                                    starts on a page beyond the page containing the eof
 456                                    (inval_end > eof_page_base), add the
 457                                    whole page to the range to be invalidated.  Otherwise
 458                                    (i.e. if the write starts on the same page), zero-fill
 459                                    the entire page explicitly now:
 460                                  */
 461                                 if (inval_end > eof_page_base) {
 462                                         inval_start = eof_page_base;
 463                                 } else {
 464                                         zero_off = eof_page_base;
 465                                 };
 466                         };
 467
 468                         if (inval_start < inval_end) {
 469                                 /* There's some range of data that's going to be marked invalid */
 470
 471                                 if (zero_off < inval_start) {
 472                                         /* The pages between inval_start and inval_end are going to be invalidated,
 473                                            and the actual write will start on a page past inval_end.  Now's the last
 474                                            chance to zero-fill the page containing the EOF:
 475                                          */
 476                                         retval = cluster_write(vp, (struct uio *) 0,
 477                                                         fp->ff_size, inval_start,
 478                                                         zero_off, (off_t)0, devBlockSize,
 479                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 480                                         if (retval) goto ioerr_exit;
 481                                 };
 482
 483                                 /* Mark the remaining area of the newly allocated space as invalid: */
 484                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 485                                 cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
 486                                 zero_off = fp->ff_size = inval_end;
 487                         };
 488
 489                         if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
 490                 };
 491
 492                 /* Check to see whether the area between the end of the write and the end of
 493                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 494                  */
 495                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 496                 if (tail_off > filesize) tail_off = filesize;
 497                 if (tail_off > writelimit) {
 498                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 499                                 lflag |= IO_TAILZEROFILL;
 500                         };
 501                 };
 502
 503                 /*
 504                  * if the write starts beyond the current EOF (possibly advanced in the
 505                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 506                  * to where the write begins:
 507                  *
 508                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 509                  *       before the current EOF it might be marked as invalid now and must be
 510                  *       made readable (removed from the invalid ranges) before cluster_write
 511                  *       tries to write it:
 512                  */
 513                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
 514                 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 515                 if (io_start < fp->ff_size) {
 516                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 517                 };
 518                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 519                                 tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
 520
 521                 if (uio->uio_offset > fp->ff_size) {
 522                         fp->ff_size = uio->uio_offset;
 523
 524                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 525                 }
 526                 if (resid > uio->uio_resid)
 527                         cp->c_flag |= C_CHANGE | C_UPDATE;
 528         } else {
 529                 while (retval == E_NONE && uio->uio_resid > 0) {
 530                         logBlockNo = currOffset / PAGE_SIZE;
 531                         blkoffset  = currOffset & PAGE_MASK;
 532
 533                         if ((filebytes - currOffset) < PAGE_SIZE_64)
 534                                 fragSize = filebytes - ((off_t)logBlockNo * PAGE_SIZE_64);
 535                         else
 536                                 fragSize = PAGE_SIZE;
 537                         xfersize = fragSize - blkoffset;
 538
 539                         /* Make any adjustments for boundary conditions */
 540                         if (currOffset + (off_t)xfersize > writelimit)
 541                                 xfersize = writelimit - currOffset;
 542
 543                         /*
 544                          * There is no need to read into bp if:
 545                          * We start on a block boundary and will overwrite the whole block
 546                          *
 547                          *                                              OR
 548                          */
 549                         if ((blkoffset == 0) && (xfersize >= fragSize)) {
 550                                 bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
 551                                 retval = 0;
 552
 553                                 if (bp->b_blkno == -1) {
 554                                         brelse(bp);
 555                                         retval = EIO;           /* XXX */
 556                                         break;
 557                                 }
 558                         } else {
 559
 560                                 if (currOffset == fp->ff_size && blkoffset == 0) {
 561                                         bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
 562                                         retval = 0;
 563                                         if (bp->b_blkno == -1) {
 564                                                 brelse(bp);
 565                                                 retval = EIO;           /* XXX */
 566                                                 break;
 567                                         }
 568                                 } else {
 569                                         /*
 570                                          * This I/O transfer is not sufficiently aligned,
 571                                          * so read the affected block into a buffer:
 572                                          */
 573                                         retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp);
 574                                         if (retval != E_NONE) {
 575                                                 if (bp)
 576                                                 brelse(bp);
 577                                                 break;
 578                                         }
 579                                 }
 580                         }
 581
 582                         /* See if we are starting to write within file boundaries:
 583                          * If not, then we need to present a "hole" for the area
 584                          * between the current EOF and the start of the current
 585                          * I/O operation:
 586                          *
 587                          * Note that currOffset is only less than uio_offset if
 588                          * uio_offset > LEOF...
 589                          */
 590                         if (uio->uio_offset > currOffset) {
 591                                 clearSize = MIN(uio->uio_offset - currOffset, xfersize);
 592                                 bzero(bp->b_data + blkoffset, clearSize);
 593                                 currOffset += clearSize;
 594                                 blkoffset += clearSize;
 595                                 xfersize -= clearSize;
 596                         }
 597
 598                         if (xfersize > 0) {
 599                                 retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio);
 600                                 currOffset += xfersize;
 601                         }
 602
 603                         if (ioflag & IO_SYNC) {
 604                                 (void)VOP_BWRITE(bp);
 605                         } else if ((xfersize + blkoffset) == fragSize) {
 606                                 bp->b_flags |= B_AGE;
 607                                 bawrite(bp);
 608                         } else {
 609                                 bdwrite(bp);
 610                         }
 611
 612                         /* Update the EOF if we just extended the file
 613                          * (the PEOF has already been moved out and the
 614                          * block mapping table has been updated):
 615                          */
 616                         if (currOffset > fp->ff_size) {
 617                                 fp->ff_size = currOffset;
 618                                 if (UBCISVALID(vp))
 619                                         ubc_setsize(vp, fp->ff_size); /* XXX check errors */
 620                         }
 621                         if (retval || (resid == 0))
 622                                 break;
 623                         cp->c_flag |= C_CHANGE | C_UPDATE;
 624                 } /* endwhile */
 625         }
 626
 627 ioerr_exit:
 628         /*
 629          * If we successfully wrote any data, and we are not the superuser
 630          * we clear the setuid and setgid bits as a precaution against
 631          * tampering.
 632          */
 633         if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 634                 cp->c_mode &= ~(S_ISUID | S_ISGID);
 635
 636         if (retval) {
 637                 if (ioflag & IO_UNIT) {
 638                         (void)VOP_TRUNCATE(vp, origFileSize,
 639                                 ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
 640                         uio->uio_offset -= resid - uio->uio_resid;
 641                         uio->uio_resid = resid;
 642                         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 643                 }
 644         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 645                 tv = time;
 646                 retval = VOP_UPDATE(vp, &tv, &tv, 1);
 647         }
 648
 649         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 650                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 651
 652         return (retval);
 653 }
 654
 655
 656 /*
 657
 658 #% ioctl        vp      U U U
 659 #
 660  vop_ioctl {
 661      IN struct vnode *vp;
 662      IN u_long command;
 663      IN caddr_t data;
 664      IN int fflag;
 665      IN struct ucred *cred;
 666      IN struct proc *p;
 667
 668      */
 669
 670
 671 /* ARGSUSED */
 672 int
 673 hfs_ioctl(ap)
 674         struct vop_ioctl_args /* {
 675                 struct vnode *a_vp;
 676                 int  a_command;
 677                 caddr_t  a_data;
 678                 int  a_fflag;
 679                 struct ucred *a_cred;
 680                 struct proc *a_p;
 681         } */ *ap;
 682 {
 683         switch (ap->a_command) {
 684         case 1: {
 685                 register struct cnode *cp;
 686                 register struct vnode *vp;
 687                 register struct radvisory *ra;
 688                 struct filefork *fp;
 689                 int devBlockSize = 0;
 690                 int error;
 691
 692                 vp = ap->a_vp;
 693
 694                 if (vp->v_type != VREG)
 695                         return EINVAL;
 696
 697                 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
 698                 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 699                 if (error)
 700                         return (error);
 701
 702                 ra = (struct radvisory *)(ap->a_data);
 703                 cp = VTOC(vp);
 704                 fp = VTOF(vp);
 705
 706                 if (ra->ra_offset >= fp->ff_size) {
 707                         VOP_UNLOCK(vp, 0, ap->a_p);
 708                         return (EFBIG);
 709                 }
 710                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 711
 712                 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
 713                 VOP_UNLOCK(vp, 0, ap->a_p);
 714
 715                 return (error);
 716         }
 717
 718         case 2: /* F_READBOOTBLOCKS */
 719         case 3: /* F_WRITEBOOTBLOCKS */
 720             {
 721             struct vnode *vp = ap->a_vp;
 722             struct vnode *devvp = NULL;
 723             struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
 724             int devBlockSize;
 725             int error;
 726             struct iovec aiov;
 727             struct uio auio;
 728             u_long blockNumber;
 729             u_long blockOffset;
 730             u_long xfersize;
 731             struct buf *bp;
 732
 733             if ((vp->v_flag & VROOT) == 0) return EINVAL;
 734             if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
 735
 736             devvp = VTOHFS(vp)->hfs_devvp;
 737             aiov.iov_base = btd->fbt_buffer;
 738             aiov.iov_len = btd->fbt_length;
 739
 740             auio.uio_iov = &aiov;
 741             auio.uio_iovcnt = 1;
 742             auio.uio_offset = btd->fbt_offset;
 743             auio.uio_resid = btd->fbt_length;
 744             auio.uio_segflg = UIO_USERSPACE;
 745             auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
 746             auio.uio_procp = ap->a_p;
 747
 748             VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
 749
 750             while (auio.uio_resid > 0) {
 751               blockNumber = auio.uio_offset / devBlockSize;
 752               error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
 753               if (error) {
 754                   if (bp) brelse(bp);
 755                   return error;
 756                 };
 757
 758                 blockOffset = auio.uio_offset % devBlockSize;
 759               xfersize = devBlockSize - blockOffset;
 760               error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
 761                 if (error) {
 762                   brelse(bp);
 763                   return error;
 764                 };
 765                 if (auio.uio_rw == UIO_WRITE) {
 766                   error = VOP_BWRITE(bp);
 767                   if (error) return error;
 768                 } else {
 769                   brelse(bp);
 770                 };
 771             };
 772         };
 773         return 0;
 774
 775         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
 776             {
 777             *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
 778             return 0;
 779             }
 780
 781         default:
 782             return (ENOTTY);
 783     }
 784
 785     /* Should never get here */
 786         return 0;
 787 }
 788
 789 /* ARGSUSED */
 790 int
 791 hfs_select(ap)
 792         struct vop_select_args /* {
 793                 struct vnode *a_vp;
 794                 int  a_which;
 795                 int  a_fflags;
 796                 struct ucred *a_cred;
 797                 void *a_wql;
 798                 struct proc *a_p;
 799         } */ *ap;
 800 {
 801         /*
 802          * We should really check to see if I/O is possible.
 803          */
 804         return (1);
 805 }
 806
 807 /*
 808  * Bmap converts a the logical block number of a file to its physical block
 809  * number on the disk.
 810  */
 811
 812 /*
 813  * vp  - address of vnode file the file
 814  * bn  - which logical block to convert to a physical block number.
 815  * vpp - returns the vnode for the block special file holding the filesystem
 816  *       containing the file of interest
 817  * bnp - address of where to return the filesystem physical block number
 818 #% bmap         vp      L L L
 819 #% bmap         vpp     - U -
 820 #
 821  vop_bmap {
 822      IN struct vnode *vp;
 823      IN daddr_t bn;
 824      OUT struct vnode **vpp;
 825      IN daddr_t *bnp;
 826      OUT int *runp;
 827      */
 828 /*
 829  * Converts a logical block number to a physical block, and optionally returns
 830  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
 831  * The physical block number is based on the device block size, currently its 512.
 832  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
 833  */
 834
 835 int
 836 hfs_bmap(ap)
 837         struct vop_bmap_args /* {
 838                 struct vnode *a_vp;
 839                 daddr_t a_bn;
 840                 struct vnode **a_vpp;
 841                 daddr_t *a_bnp;
 842                 int *a_runp;
 843         } */ *ap;
 844 {
 845         struct vnode *vp = ap->a_vp;
 846         struct cnode *cp = VTOC(vp);
 847         struct filefork *fp = VTOF(vp);
 848         struct hfsmount *hfsmp = VTOHFS(vp);
 849    int                                  retval = E_NONE;
 850     daddr_t                             logBlockSize;
 851     size_t                              bytesContAvail = 0;
 852     off_t blockposition;
 853     struct proc                 *p = NULL;
 854     int                                 lockExtBtree;
 855     struct rl_entry *invalid_range;
 856     enum rl_overlaptype overlaptype;
 857
 858         /*
 859          * Check for underlying vnode requests and ensure that logical
 860          * to physical mapping is requested.
 861          */
 862         if (ap->a_vpp != NULL)
 863                 *ap->a_vpp = cp->c_devvp;
 864         if (ap->a_bnp == NULL)
 865                 return (0);
 866
 867         /* Only clustered I/O should have delayed allocations. */
 868         DBG_ASSERT(fp->ff_unallocblocks == 0);
 869
 870         logBlockSize = GetLogicalBlockSize(vp);
 871         blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
 872
 873         lockExtBtree = overflow_extents(fp);
 874         if (lockExtBtree) {
 875                 p = current_proc();
 876                 retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
 877                                 LK_EXCLUSIVE | LK_CANRECURSE, p);
 878                 if (retval)
 879                         return (retval);
 880         }
 881
 882         retval = MacToVFSError(
 883                             MapFileBlockC (HFSTOVCB(hfsmp),
 884                                             (FCB*)fp,
 885                                             MAXPHYSIO,
 886                                             blockposition,
 887                                             ap->a_bnp,
 888                                             &bytesContAvail));
 889
 890     if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
 891
 892     if (retval == E_NONE) {
 893         /* Adjust the mapping information for invalid file ranges: */
 894         overlaptype = rl_scan(&fp->ff_invalidranges,
 895                             blockposition,
 896                             blockposition + MAXPHYSIO - 1,
 897                             &invalid_range);
 898         if (overlaptype != RL_NOOVERLAP) {
 899             switch(overlaptype) {
 900                 case RL_MATCHINGOVERLAP:
 901                 case RL_OVERLAPCONTAINSRANGE:
 902                 case RL_OVERLAPSTARTSBEFORE:
 903                     /* There's no valid block for this byte offset: */
 904                     *ap->a_bnp = (daddr_t)-1;
 905                     bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 906                     break;
 907
 908                 case RL_OVERLAPISCONTAINED:
 909                 case RL_OVERLAPENDSAFTER:
 910                     /* The range of interest hits an invalid block before the end: */
 911                     if (invalid_range->rl_start == blockposition) {
 912                         /* There's actually no valid information to be had starting here: */
 913                         *ap->a_bnp = (daddr_t)-1;
 914                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
 915                                                         (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
 916                                 bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 917                         };
 918                     } else {
 919                         bytesContAvail = invalid_range->rl_start - blockposition;
 920                     };
 921                     break;
 922             };
 923                         if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
 924         };
 925
 926         /* Figure out how many read ahead blocks there are */
 927         if (ap->a_runp != NULL) {
 928             if (can_cluster(logBlockSize)) {
 929                 /* Make sure this result never goes negative: */
 930                 *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
 931             } else {
 932                 *ap->a_runp = 0;
 933             };
 934         };
 935     };
 936
 937     return (retval);
 938 }
 939
 940 /* blktooff converts logical block number to file offset */
 941
 942 int
 943 hfs_blktooff(ap)
 944         struct vop_blktooff_args /* {
 945                 struct vnode *a_vp;
 946                 daddr_t a_lblkno;
 947                 off_t *a_offset;
 948         } */ *ap;
 949 {
 950         if (ap->a_vp == NULL)
 951                 return (EINVAL);
 952         *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
 953
 954         return(0);
 955 }
 956
 957 int
 958 hfs_offtoblk(ap)
 959         struct vop_offtoblk_args /* {
 960                 struct vnode *a_vp;
 961                 off_t a_offset;
 962                 daddr_t *a_lblkno;
 963         } */ *ap;
 964 {
 965         if (ap->a_vp == NULL)
 966                 return (EINVAL);
 967         *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
 968
 969         return(0);
 970 }
 971
 972 int
 973 hfs_cmap(ap)
 974         struct vop_cmap_args /* {
 975                 struct vnode *a_vp;
 976                 off_t a_foffset;
 977                 size_t a_size;
 978                 daddr_t *a_bpn;
 979                 size_t *a_run;
 980                 void *a_poff;
 981         } */ *ap;
 982 {
 983     struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
 984     struct filefork *fp = VTOF(ap->a_vp);
 985     size_t                              bytesContAvail = 0;
 986     int                 retval = E_NONE;
 987     int lockExtBtree = 0;
 988     struct proc         *p = NULL;
 989     struct rl_entry *invalid_range;
 990     enum rl_overlaptype overlaptype;
 991     int started_tr = 0, grabbed_lock = 0;
 992
 993         /*
 994          * Check for underlying vnode requests and ensure that logical
 995          * to physical mapping is requested.
 996          */
 997         if (ap->a_bpn == NULL)
 998                 return (0);
 999
1000         p = current_proc();
1001   retry:
1002         if (fp->ff_unallocblocks) {
1003                 lockExtBtree = 1;
1004
1005                 // XXXdbg
1006                 hfs_global_shared_lock_acquire(hfsmp);
1007                 grabbed_lock = 1;
1008
1009                 if (hfsmp->jnl) {
1010                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1011                                 hfs_global_shared_lock_release(hfsmp);
1012                                 return EINVAL;
1013                         } else {
1014                                 started_tr = 1;
1015                         }
1016                 }
1017
1018                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1019                         if (started_tr) {
1020                                 journal_end_transaction(hfsmp->jnl);
1021                         }
1022                         if (grabbed_lock) {
1023                                 hfs_global_shared_lock_release(hfsmp);
1024                         }
1025                         return (retval);
1026                 }
1027         } else if (overflow_extents(fp)) {
1028                 lockExtBtree = 1;
1029                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1030                         return retval;
1031                 }
1032         }
1033
1034         /*
1035          * Check for any delayed allocations.
1036          */
1037         if (fp->ff_unallocblocks) {
1038                 SInt64 reqbytes, actbytes;
1039
1040                 //
1041                 // Make sure we have a transaction.  It's possible
1042                 // that we came in and fp->ff_unallocblocks was zero
1043                 // but during the time we blocked acquiring the extents
1044                 // btree, ff_unallocblocks became non-zero and so we
1045                 // will need to start a transaction.
1046                 //
1047                 if (hfsmp->jnl && started_tr == 0) {
1048                     if (lockExtBtree) {
1049                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1050                         lockExtBtree = 0;
1051                     }
1052
1053                     goto retry;
1054                 }
1055
1056                 reqbytes = (SInt64)fp->ff_unallocblocks *
1057                              (SInt64)HFSTOVCB(hfsmp)->blockSize;
1058                 /*
1059                  * Release the blocks on loan and aquire some real ones.
1060                  * Note that we can race someone else for these blocks
1061                  * (and lose) so cmap needs to handle a failure here.
1062                  * Currently this race can't occur because all allocations
1063                  * are protected by an exclusive lock on the  Extents
1064                  * Overflow file.
1065                  */
1066                 HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
1067                 FTOC(fp)->c_blocks            -= fp->ff_unallocblocks;
1068                 fp->ff_blocks                 -= fp->ff_unallocblocks;
1069                 fp->ff_unallocblocks           = 0;
1070
1071                 while (retval == 0 && reqbytes > 0) {
1072                         retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
1073                                         (FCB*)fp, reqbytes, 0,
1074                                         kEFAllMask | kEFNoClumpMask, &actbytes));
1075                         if (retval == 0 && actbytes == 0)
1076                                 retval = ENOSPC;
1077
1078                         if (retval) {
1079                                 fp->ff_unallocblocks =
1080                                         reqbytes / HFSTOVCB(hfsmp)->blockSize;
1081                                 HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
1082                                 FTOC(fp)->c_blocks            += fp->ff_unallocblocks;
1083                                 fp->ff_blocks                 += fp->ff_unallocblocks;
1084                         }
1085                         reqbytes -= actbytes;
1086                 }
1087
1088                 if (retval) {
1089                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1090                         if (started_tr) {
1091                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1092                                 journal_end_transaction(hfsmp->jnl);
1093                         }
1094                         if (grabbed_lock) {
1095                                 hfs_global_shared_lock_release(hfsmp);
1096                         }
1097                         return (retval);
1098                 }
1099                 VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
1100         }
1101
1102         retval = MacToVFSError(
1103                            MapFileBlockC (HFSTOVCB(hfsmp),
1104                                           (FCB *)fp,
1105                                           ap->a_size,
1106                                           ap->a_foffset,
1107                                           ap->a_bpn,
1108                                           &bytesContAvail));
1109
1110         if (lockExtBtree)
1111                 (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1112
1113         // XXXdbg
1114         if (started_tr) {
1115                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1116                 journal_end_transaction(hfsmp->jnl);
1117                 started_tr = 0;
1118         }
1119         if (grabbed_lock) {
1120                 hfs_global_shared_lock_release(hfsmp);
1121                 grabbed_lock = 0;
1122         }
1123
1124     if (retval == E_NONE) {
1125         /* Adjust the mapping information for invalid file ranges: */
1126         overlaptype = rl_scan(&fp->ff_invalidranges,
1127                             ap->a_foffset,
1128                             ap->a_foffset + (off_t)bytesContAvail - 1,
1129                             &invalid_range);
1130         if (overlaptype != RL_NOOVERLAP) {
1131             switch(overlaptype) {
1132                 case RL_MATCHINGOVERLAP:
1133                 case RL_OVERLAPCONTAINSRANGE:
1134                 case RL_OVERLAPSTARTSBEFORE:
1135                     /* There's no valid block for this byte offset: */
1136                     *ap->a_bpn = (daddr_t)-1;
1137
1138                     /* There's no point limiting the amount to be returned if the
1139                        invalid range that was hit extends all the way to the EOF
1140                        (i.e. there's no valid bytes between the end of this range
1141                        and the file's EOF):
1142                      */
1143                     if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1144                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1145                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1146                     };
1147                     break;
1148
1149                 case RL_OVERLAPISCONTAINED:
1150                 case RL_OVERLAPENDSAFTER:
1151                     /* The range of interest hits an invalid block before the end: */
1152                     if (invalid_range->rl_start == ap->a_foffset) {
1153                         /* There's actually no valid information to be had starting here: */
1154                         *ap->a_bpn = (daddr_t)-1;
1155                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1156                                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1157                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1158                         };
1159                     } else {
1160                         bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1161                     };
1162                     break;
1163             };
1164             if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
1165         };
1166
1167         if (ap->a_run) *ap->a_run = bytesContAvail;
1168     };
1169
1170         if (ap->a_poff)
1171                 *(int *)ap->a_poff = 0;
1172
1173         return (retval);
1174 }
1175
1176
1177 /*
1178  * Read or write a buffer that is not contiguous on disk.  We loop over
1179  * each device block, copying to or from caller's buffer.
1180  *
1181  * We could be a bit more efficient by transferring as much data as is
1182  * contiguous.  But since this routine should rarely be called, and that
1183  * would be more complicated; best to keep it simple.
1184  */
1185 static int
1186 hfs_strategy_fragmented(struct buf *bp)
1187 {
1188         register struct vnode *vp = bp->b_vp;
1189         register struct cnode *cp = VTOC(vp);
1190         register struct vnode *devvp = cp->c_devvp;
1191         caddr_t ioaddr;         /* Address of fragment within bp  */
1192         struct buf *frag = NULL; /* For reading or writing a single block */
1193         int retval = 0;
1194         long remaining;         /* Bytes (in bp) left to transfer */
1195         off_t offset;           /* Logical offset of current fragment in vp */
1196         u_long block_size;      /* Size of one device block (and one I/O) */
1197
1198         /* Make sure we redo this mapping for the next I/O */
1199         bp->b_blkno = bp->b_lblkno;
1200
1201         /* Set up the logical position and number of bytes to read/write */
1202         offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
1203         block_size = VTOHFS(vp)->hfs_phys_block_size;
1204
1205         /* Get an empty buffer to do the deblocking */
1206         frag = geteblk(block_size);
1207         if (ISSET(bp->b_flags, B_READ))
1208                 SET(frag->b_flags, B_READ);
1209
1210         for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
1211             ioaddr += block_size, offset += block_size,
1212             remaining -= block_size) {
1213                 frag->b_resid = frag->b_bcount;
1214                 CLR(frag->b_flags, B_DONE);
1215
1216                 /* Map the current position to a physical block number */
1217                 retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
1218                     NULL, NULL);
1219                 if (retval != 0)
1220                         break;
1221
1222                 /*
1223                  * Did we try to read a hole?
1224                  * (Should never happen for metadata!)
1225                  */
1226                 if ((long)frag->b_lblkno == -1) {
1227                         bzero(ioaddr, block_size);
1228                         continue;
1229                 }
1230
1231                 /* If writing, copy before I/O */
1232                 if (!ISSET(bp->b_flags, B_READ))
1233                         bcopy(ioaddr, frag->b_data, block_size);
1234
1235                 /* Call the device to do the I/O and wait for it */
1236                 frag->b_blkno = frag->b_lblkno;
1237                 frag->b_vp = devvp;  /* Used to dispatch via VOP_STRATEGY */
1238                 frag->b_dev = devvp->v_rdev;
1239                 retval = VOP_STRATEGY(frag);
1240                 frag->b_vp = NULL;
1241                 if (retval != 0)
1242                         break;
1243                 retval = biowait(frag);
1244                 if (retval != 0)
1245                         break;
1246
1247                 /* If reading, copy after the I/O */
1248                 if (ISSET(bp->b_flags, B_READ))
1249                         bcopy(frag->b_data, ioaddr, block_size);
1250         }
1251
1252         frag->b_vp = NULL;
1253         //
1254         // XXXdbg - in the case that this is a meta-data block, it won't affect
1255         //          the journal because this bp is for a physical disk block,
1256         //          not a logical block that is part of the catalog or extents
1257         //          files.
1258         SET(frag->b_flags, B_INVAL);
1259         brelse(frag);
1260
1261         if ((bp->b_error = retval) != 0)
1262                 SET(bp->b_flags, B_ERROR);
1263
1264         biodone(bp);    /* This I/O is now complete */
1265         return retval;
1266 }
1267
1268
1269 /*
1270  * Calculate the logical to physical mapping if not done already,
1271  * then call the device strategy routine.
1272 #
1273 #vop_strategy {
1274 #       IN struct buf *bp;
1275     */
1276 int
1277 hfs_strategy(ap)
1278         struct vop_strategy_args /* {
1279                 struct buf *a_bp;
1280         } */ *ap;
1281 {
1282         register struct buf *bp = ap->a_bp;
1283         register struct vnode *vp = bp->b_vp;
1284         register struct cnode *cp = VTOC(vp);
1285         int retval = 0;
1286         off_t offset;
1287         size_t bytes_contig;
1288
1289         if ( !(bp->b_flags & B_VECTORLIST)) {
1290                 if (vp->v_type == VBLK || vp->v_type == VCHR)
1291                         panic("hfs_strategy: device vnode passed!");
1292
1293                 if (bp->b_flags & B_PAGELIST) {
1294                         /*
1295                          * If we have a page list associated with this bp,
1296                          * then go through cluster_bp since it knows how to
1297                          * deal with a page request that might span non-
1298                          * contiguous physical blocks on the disk...
1299                          */
1300                         retval = cluster_bp(bp);
1301                         vp = cp->c_devvp;
1302                         bp->b_dev = vp->v_rdev;
1303
1304                         return (retval);
1305                 }
1306
1307                 /*
1308                  * If we don't already know the filesystem relative block
1309                  * number then get it using VOP_BMAP().  If VOP_BMAP()
1310                  * returns the block number as -1 then we've got a hole in
1311                  * the file.  Although HFS filesystems don't create files with
1312                  * holes, invalidating of subranges of the file (lazy zero
1313                  * filling) may create such a situation.
1314                  */
1315                 if (bp->b_blkno == bp->b_lblkno) {
1316                         offset = (off_t) bp->b_lblkno *
1317                             (off_t) GetLogicalBlockSize(vp);
1318
1319                         if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
1320                             &bp->b_blkno, &bytes_contig, NULL))) {
1321                                 bp->b_error = retval;
1322                                 bp->b_flags |= B_ERROR;
1323                                 biodone(bp);
1324                                 return (retval);
1325                         }
1326                         if (bytes_contig < bp->b_bcount)
1327                         {
1328                                 /*
1329                                  * We were asked to read a block that wasn't
1330                                  * contiguous, so we have to read each of the
1331                                  * pieces and copy them into the buffer.
1332                                  * Since ordinary file I/O goes through
1333                                  * cluster_io (which won't ask us for
1334                                  * discontiguous data), this is probably an
1335                                  * attempt to read or write metadata.
1336                                  */
1337                                 return hfs_strategy_fragmented(bp);
1338                         }
1339                         if ((long)bp->b_blkno == -1)
1340                                 clrbuf(bp);
1341                 }
1342                 if ((long)bp->b_blkno == -1) {
1343                         biodone(bp);
1344                         return (0);
1345                 }
1346                 if (bp->b_validend == 0) {
1347                         /*
1348                          * Record the exact size of the I/O transfer about to
1349                          * be made:
1350                          */
1351                         bp->b_validend = bp->b_bcount;
1352                 }
1353         }
1354         vp = cp->c_devvp;
1355         bp->b_dev = vp->v_rdev;
1356
1357         return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
1358 }
1359
1360
1361 /*
1362 #
1363 #% truncate     vp      L L L
1364 #
1365 vop_truncate {
1366     IN struct vnode *vp;
1367     IN off_t length;
1368     IN int flags;       (IO_SYNC)
1369     IN struct ucred *cred;
1370     IN struct proc *p;
1371 };
1372  * Truncate a cnode to at most length size, freeing (or adding) the
1373  * disk blocks.
1374  */
1375 int hfs_truncate(ap)
1376         struct vop_truncate_args /* {
1377                 struct vnode *a_vp;
1378                 off_t a_length;
1379                 int a_flags;
1380                 struct ucred *a_cred;
1381                 struct proc *a_p;
1382         } */ *ap;
1383 {
1384         register struct vnode *vp = ap->a_vp;
1385         register struct cnode *cp = VTOC(vp);
1386         struct filefork *fp = VTOF(vp);
1387         off_t length;
1388         long vflags;
1389         struct timeval tv;
1390         int retval;
1391         off_t bytesToAdd;
1392         off_t actualBytesAdded;
1393         off_t filebytes;
1394         u_long fileblocks;
1395         int blksize;
1396         struct hfsmount *hfsmp;
1397
1398         if (vp->v_type != VREG && vp->v_type != VLNK)
1399                 return (EISDIR);        /* cannot truncate an HFS directory! */
1400
1401         length = ap->a_length;
1402         blksize = VTOVCB(vp)->blockSize;
1403         fileblocks = fp->ff_blocks;
1404         filebytes = (off_t)fileblocks * (off_t)blksize;
1405
1406         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1407                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1408
1409         if (length < 0)
1410                 return (EINVAL);
1411
1412         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1413                 return (EFBIG);
1414
1415         hfsmp = VTOHFS(vp);
1416
1417         tv = time;
1418         retval = E_NONE;
1419
1420         /*
1421          * We cannot just check if fp->ff_size == length (as an optimization)
1422          * since there may be extra physical blocks that also need truncation.
1423          */
1424 #if QUOTA
1425         if (retval = hfs_getinoquota(cp))
1426                 return(retval);
1427 #endif /* QUOTA */
1428
1429         /*
1430          * Lengthen the size of the file. We must ensure that the
1431          * last byte of the file is allocated. Since the smallest
1432          * value of ff_size is 0, length will be at least 1.
1433          */
1434         if (length > fp->ff_size) {
1435 #if QUOTA
1436                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1437                                 ap->a_cred, 0);
1438                 if (retval)
1439                         goto Err_Exit;
1440 #endif /* QUOTA */
1441                 /*
1442                  * If we don't have enough physical space then
1443                  * we need to extend the physical size.
1444                  */
1445                 if (length > filebytes) {
1446                         int eflags;
1447
1448                         /* All or nothing and don't round up to clumpsize. */
1449                         eflags = kEFAllMask | kEFNoClumpMask;
1450
1451                         if (suser(ap->a_cred, NULL) != 0)
1452                                 eflags |= kEFReserveMask;  /* keep a reserve */
1453
1454                         // XXXdbg
1455                         hfs_global_shared_lock_acquire(hfsmp);
1456                         if (hfsmp->jnl) {
1457                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1458                                         retval = EINVAL;
1459                                         goto Err_Exit;
1460                                 }
1461                         }
1462
1463                         /* lock extents b-tree (also protects volume bitmap) */
1464                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1465                         if (retval) {
1466                                 if (hfsmp->jnl) {
1467                                         journal_end_transaction(hfsmp->jnl);
1468                                 }
1469                                 hfs_global_shared_lock_release(hfsmp);
1470
1471                                 goto Err_Exit;
1472                         }
1473
1474                         while ((length > filebytes) && (retval == E_NONE)) {
1475                                 bytesToAdd = length - filebytes;
1476                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1477                                                     (FCB*)fp,
1478                                                     bytesToAdd,
1479                                                     0,
1480                                                     eflags,
1481                                                     &actualBytesAdded));
1482
1483                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1484                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1485                                         if (length > filebytes)
1486                                                 length = filebytes;
1487                                         break;
1488                                 }
1489                         } /* endwhile */
1490
1491                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1492
1493                         // XXXdbg
1494                         if (hfsmp->jnl) {
1495                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1496                                 journal_end_transaction(hfsmp->jnl);
1497                         }
1498                         hfs_global_shared_lock_release(hfsmp);
1499
1500                         if (retval)
1501                                 goto Err_Exit;
1502
1503                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1504                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1505                 }
1506
1507                 if (!(ap->a_flags & IO_NOZEROFILL)) {
1508                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1509                                 struct rl_entry *invalid_range;
1510                                 int devBlockSize;
1511                                 off_t zero_limit;
1512
1513                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1514                                 if (length < zero_limit) zero_limit = length;
1515
1516                                 if (length > fp->ff_size) {
1517                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1518                                         if ((fp->ff_size & PAGE_MASK_64) &&
1519                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1520                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1521
1522                                                 /* There's some valid data at the start of the (current) last page
1523                                                    of the file, so zero out the remainder of that page to ensure the
1524                                                    entire page contains valid data.  Since there is no invalid range
1525                                                    possible past the (current) eof, there's no need to remove anything
1526                                                    from the invalid range list before calling cluster_write():                                           */
1527                                                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1528                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1529                                                                 fp->ff_size, (off_t)0, devBlockSize,
1530                                                                 (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1531                                                 if (retval) goto Err_Exit;
1532
1533                                                 /* Merely invalidate the remaining area, if necessary: */
1534                                                 if (length > zero_limit) {
1535                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1536                                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1537                                                 }
1538                                         } else {
1539                                         /* The page containing the (current) eof is invalid: just add the
1540                                            remainder of the page to the invalid list, along with the area
1541                                            being newly allocated:
1542                                          */
1543                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1544                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1545                                         };
1546                                 }
1547                         } else {
1548                                         panic("hfs_truncate: invoked on non-UBC object?!");
1549                         };
1550                 }
1551                 cp->c_flag |= C_UPDATE;
1552                 fp->ff_size = length;
1553
1554                 if (UBCISVALID(vp))
1555                         ubc_setsize(vp, fp->ff_size);   /* XXX check errors */
1556
1557         } else { /* Shorten the size of the file */
1558
1559                 if (fp->ff_size > length) {
1560                         /*
1561                          * Any buffers that are past the truncation point need to be
1562                          * invalidated (to maintain buffer cache consistency).  For
1563                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1564                          */
1565                         if (UBCISVALID(vp))
1566                                 ubc_setsize(vp, length); /* XXX check errors */
1567
1568                         vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA;
1569                         retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1570
1571                         /* Any space previously marked as invalid is now irrelevant: */
1572                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
1573                 }
1574
1575                 /*
1576                  * Account for any unmapped blocks. Note that the new
1577                  * file length can still end up with unmapped blocks.
1578                  */
1579                 if (fp->ff_unallocblocks > 0) {
1580                         u_int32_t finalblks;
1581
1582                         /* lock extents b-tree */
1583                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1584                                         LK_EXCLUSIVE, ap->a_p);
1585                         if (retval)
1586                                 goto Err_Exit;
1587
1588                         VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
1589                         cp->c_blocks             -= fp->ff_unallocblocks;
1590                         fp->ff_blocks            -= fp->ff_unallocblocks;
1591                         fp->ff_unallocblocks      = 0;
1592
1593                         finalblks = (length + blksize - 1) / blksize;
1594                         if (finalblks > fp->ff_blocks) {
1595                                 /* calculate required unmapped blocks */
1596                                 fp->ff_unallocblocks      = finalblks - fp->ff_blocks;
1597                                 VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
1598                                 cp->c_blocks             += fp->ff_unallocblocks;
1599                                 fp->ff_blocks            += fp->ff_unallocblocks;
1600                         }
1601                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1602                                         LK_RELEASE, ap->a_p);
1603                 }
1604
1605                 /*
1606                  * For a TBE process the deallocation of the file blocks is
1607                  * delayed until the file is closed.  And hfs_close calls
1608                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
1609                  * isn't set, we make sure this isn't a TBE process.
1610                  */
1611                 if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
1612 #if QUOTA
1613                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
1614 #endif /* QUOTA */
1615                   // XXXdbg
1616                   hfs_global_shared_lock_acquire(hfsmp);
1617                         if (hfsmp->jnl) {
1618                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1619                                         retval = EINVAL;
1620                                         goto Err_Exit;
1621                                 }
1622                         }
1623
1624                         /* lock extents b-tree (also protects volume bitmap) */
1625                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1626                         if (retval) {
1627                                 if (hfsmp->jnl) {
1628                                         journal_end_transaction(hfsmp->jnl);
1629                                 }
1630                                 hfs_global_shared_lock_release(hfsmp);
1631                                 goto Err_Exit;
1632                         }
1633
1634                         if (fp->ff_unallocblocks == 0)
1635                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
1636                                                 (FCB*)fp, length, false));
1637
1638                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1639
1640                         // XXXdbg
1641                         if (hfsmp->jnl) {
1642                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1643                                 journal_end_transaction(hfsmp->jnl);
1644                         }
1645                         hfs_global_shared_lock_release(hfsmp);
1646
1647                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1648                         if (retval)
1649                                 goto Err_Exit;
1650 #if QUOTA
1651                         /* These are bytesreleased */
1652                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
1653 #endif /* QUOTA */
1654                 }
1655                 /* Only set update flag if the logical length changes */
1656                 if (fp->ff_size != length)
1657                         cp->c_flag |= C_UPDATE;
1658                 fp->ff_size = length;
1659         }
1660         cp->c_flag |= C_CHANGE;
1661         retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1662         if (retval) {
1663                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1664                      -1, -1, -1, retval, 0);
1665         }
1666
1667 Err_Exit:
1668
1669         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
1670                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1671
1672         return (retval);
1673 }
1674
1675
1676
1677 /*
1678 #
1679 #% allocate     vp      L L L
1680 #
1681 vop_allocate {
1682         IN struct vnode *vp;
1683         IN off_t length;
1684         IN int flags;
1685         OUT off_t *bytesallocated;
1686         IN off_t offset;
1687         IN struct ucred *cred;
1688         IN struct proc *p;
1689 };
1690  * allocate a cnode to at most length size
1691  */
1692 int hfs_allocate(ap)
1693         struct vop_allocate_args /* {
1694                 struct vnode *a_vp;
1695                 off_t a_length;
1696                 u_int32_t  a_flags;
1697                 off_t *a_bytesallocated;
1698                 off_t a_offset;
1699                 struct ucred *a_cred;
1700                 struct proc *a_p;
1701         } */ *ap;
1702 {
1703         struct vnode *vp = ap->a_vp;
1704         struct cnode *cp = VTOC(vp);
1705         struct filefork *fp = VTOF(vp);
1706         off_t length = ap->a_length;
1707         off_t startingPEOF;
1708         off_t moreBytesRequested;
1709         off_t actualBytesAdded;
1710         off_t filebytes;
1711         u_long fileblocks;
1712         long vflags;
1713         struct timeval tv;
1714         int retval, retval2;
1715         UInt32 blockHint;
1716         UInt32 extendFlags =0;   /* For call to ExtendFileC */
1717         struct hfsmount *hfsmp;
1718
1719         hfsmp = VTOHFS(vp);
1720
1721         *(ap->a_bytesallocated) = 0;
1722         fileblocks = fp->ff_blocks;
1723         filebytes = (off_t)fileblocks * (off_t)VTOVCB(vp)->blockSize;
1724
1725         if (length < (off_t)0)
1726                 return (EINVAL);
1727         if (vp->v_type != VREG && vp->v_type != VLNK)
1728                 return (EISDIR);
1729         if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= filebytes))
1730                 return (EINVAL);
1731
1732         /* Fill in the flags word for the call to Extend the file */
1733
1734         if (ap->a_flags & ALLOCATECONTIG)
1735                 extendFlags |= kEFContigMask;
1736
1737         if (ap->a_flags & ALLOCATEALL)
1738                 extendFlags |= kEFAllMask;
1739
1740         if (suser(ap->a_cred, NULL) != 0)
1741                 extendFlags |= kEFReserveMask;
1742
1743         tv = time;
1744         retval = E_NONE;
1745         blockHint = 0;
1746         startingPEOF = filebytes;
1747
1748         if (ap->a_flags & ALLOCATEFROMPEOF)
1749                 length += filebytes;
1750         else if (ap->a_flags & ALLOCATEFROMVOL)
1751                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1752
1753         /* If no changes are necesary, then we're done */
1754         if (filebytes == length)
1755                 goto Std_Exit;
1756
1757         /*
1758          * Lengthen the size of the file. We must ensure that the
1759          * last byte of the file is allocated. Since the smallest
1760          * value of filebytes is 0, length will be at least 1.
1761          */
1762         if (length > filebytes) {
1763                 moreBytesRequested = length - filebytes;
1764
1765 #if QUOTA
1766                 retval = hfs_chkdq(cp,
1767                                 (int64_t)(roundup(moreBytesRequested, VTOVCB(vp)->blockSize)),
1768                                 ap->a_cred, 0);
1769                 if (retval)
1770                         return (retval);
1771
1772 #endif /* QUOTA */
1773                 // XXXdbg
1774                 hfs_global_shared_lock_acquire(hfsmp);
1775                 if (hfsmp->jnl) {
1776                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1777                                 retval = EINVAL;
1778                                 goto Err_Exit;
1779                         }
1780                 }
1781
1782                 /* lock extents b-tree (also protects volume bitmap) */
1783                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1784                 if (retval) {
1785                         if (hfsmp->jnl) {
1786                                 journal_end_transaction(hfsmp->jnl);
1787                         }
1788                         hfs_global_shared_lock_release(hfsmp);
1789                         goto Err_Exit;
1790                 }
1791
1792                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1793                                                 (FCB*)fp,
1794                                                 moreBytesRequested,
1795                                                 blockHint,
1796                                                 extendFlags,
1797                                                 &actualBytesAdded));
1798
1799                 *(ap->a_bytesallocated) = actualBytesAdded;
1800                 filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
1801
1802                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1803
1804                 // XXXdbg
1805                 if (hfsmp->jnl) {
1806                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1807                         journal_end_transaction(hfsmp->jnl);
1808                 }
1809                 hfs_global_shared_lock_release(hfsmp);
1810
1811                 /*
1812                  * if we get an error and no changes were made then exit
1813                  * otherwise we must do the VOP_UPDATE to reflect the changes
1814                  */
1815                 if (retval && (startingPEOF == filebytes))
1816                         goto Err_Exit;
1817
1818                 /*
1819                  * Adjust actualBytesAdded to be allocation block aligned, not
1820                  * clump size aligned.
1821                  * NOTE: So what we are reporting does not affect reality
1822                  * until the file is closed, when we truncate the file to allocation
1823                  * block size.
1824                  */
1825                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
1826                         *(ap->a_bytesallocated) =
1827                                 roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize);
1828
1829         } else { /* Shorten the size of the file */
1830
1831                 if (fp->ff_size > length) {
1832                         /*
1833                          * Any buffers that are past the truncation point need to be
1834                          * invalidated (to maintain buffer cache consistency).  For
1835                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1836                          */
1837                         vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
1838                         (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1839                 }
1840
1841                 // XXXdbg
1842                 hfs_global_shared_lock_acquire(hfsmp);
1843                 if (hfsmp->jnl) {
1844                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1845                                 retval = EINVAL;
1846                                 goto Err_Exit;
1847                         }
1848                 }
1849
1850                 /* lock extents b-tree (also protects volume bitmap) */
1851                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1852                 if (retval) {
1853                         if (hfsmp->jnl) {
1854                                 journal_end_transaction(hfsmp->jnl);
1855                         }
1856                         hfs_global_shared_lock_release(hfsmp);
1857
1858                         goto Err_Exit;
1859                 }
1860
1861                 retval = MacToVFSError(
1862                             TruncateFileC(
1863                                             VTOVCB(vp),
1864                                             (FCB*)fp,
1865                                             length,
1866                                             false));
1867                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1868                 filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
1869
1870                 if (hfsmp->jnl) {
1871                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1872                         journal_end_transaction(hfsmp->jnl);
1873                 }
1874                 hfs_global_shared_lock_release(hfsmp);
1875
1876
1877                 /*
1878                  * if we get an error and no changes were made then exit
1879                  * otherwise we must do the VOP_UPDATE to reflect the changes
1880                  */
1881                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
1882 #if QUOTA
1883                 /* These are  bytesreleased */
1884                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
1885 #endif /* QUOTA */
1886
1887                 if (fp->ff_size > filebytes) {
1888                         fp->ff_size = filebytes;
1889
1890                         if (UBCISVALID(vp))
1891                                 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
1892                 }
1893         }
1894
1895 Std_Exit:
1896         cp->c_flag |= C_CHANGE | C_UPDATE;
1897         retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1898
1899         if (retval == 0)
1900                 retval = retval2;
1901 Err_Exit:
1902         return (retval);
1903 }
1904
1905
1906 /*
1907  * pagein for HFS filesystem
1908  */
1909 int
1910 hfs_pagein(ap)
1911         struct vop_pagein_args /* {
1912                 struct vnode *a_vp,
1913                 upl_t         a_pl,
1914                 vm_offset_t   a_pl_offset,
1915                 off_t         a_f_offset,
1916                 size_t        a_size,
1917                 struct ucred *a_cred,
1918                 int           a_flags
1919         } */ *ap;
1920 {
1921         register struct vnode *vp = ap->a_vp;
1922         int devBlockSize = 0;
1923         int error;
1924
1925         if (vp->v_type != VREG && vp->v_type != VLNK)
1926                 panic("hfs_pagein: vp not UBC type\n");
1927
1928         VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
1929
1930         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
1931                                 ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
1932                                 ap->a_flags);
1933         return (error);
1934 }
1935
1936 /*
1937  * pageout for HFS filesystem.
1938  */
1939 int
1940 hfs_pageout(ap)
1941         struct vop_pageout_args /* {
1942            struct vnode *a_vp,
1943            upl_t         a_pl,
1944            vm_offset_t   a_pl_offset,
1945            off_t         a_f_offset,
1946            size_t        a_size,
1947            struct ucred *a_cred,
1948            int           a_flags
1949         } */ *ap;
1950 {
1951         struct vnode *vp = ap->a_vp;
1952         struct cnode *cp = VTOC(vp);
1953         struct filefork *fp = VTOF(vp);
1954         int retval;
1955         int devBlockSize = 0;
1956         off_t end_of_range;
1957         off_t filesize;
1958
1959         if (UBCINVALID(vp))
1960                 panic("hfs_pageout: Not a  VREG: vp=%x", vp);
1961
1962         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1963         filesize = fp->ff_size;
1964         end_of_range = ap->a_f_offset + ap->a_size - 1;
1965
1966         if (end_of_range >= filesize)
1967                 end_of_range = (off_t)(filesize - 1);
1968         if (ap->a_f_offset < filesize)
1969                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
1970
1971         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
1972                                  filesize, devBlockSize, ap->a_flags);
1973
1974         /*
1975          * If we successfully wrote any data, and we are not the superuser
1976          * we clear the setuid and setgid bits as a precaution against
1977          * tampering.
1978          */
1979         if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
1980                 cp->c_mode &= ~(S_ISUID | S_ISGID);
1981
1982         return (retval);
1983 }
1984
1985 /*
1986  * Intercept B-Tree node writes to unswap them if necessary.
1987 #
1988 #vop_bwrite {
1989 #       IN struct buf *bp;
1990  */
1991 int
1992 hfs_bwrite(ap)
1993         struct vop_bwrite_args /* {
1994                 struct buf *a_bp;
1995         } */ *ap;
1996 {
1997         int retval = 0;
1998         register struct buf *bp = ap->a_bp;
1999         register struct vnode *vp = bp->b_vp;
2000 #if BYTE_ORDER == LITTLE_ENDIAN
2001         BlockDescriptor block;
2002
2003         /* Trap B-Tree writes */
2004         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2005             (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
2006
2007                 /* Swap if the B-Tree node is in native byte order */
2008                 if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
2009                         /* Prepare the block pointer */
2010                         block.blockHeader = bp;
2011                         block.buffer = bp->b_data;
2012                         /* not found in cache ==> came from disk */
2013                         block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
2014                         block.blockSize = bp->b_bcount;
2015
2016                         /* Endian un-swap B-Tree node */
2017                         SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2018                 }
2019
2020                 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2021         }
2022 #endif
2023         /* This buffer shouldn't be locked anymore but if it is clear it */
2024         if (ISSET(bp->b_flags, B_LOCKED)) {
2025             // XXXdbg
2026             if (VTOHFS(vp)->jnl) {
2027                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2028             }
2029                 CLR(bp->b_flags, B_LOCKED);
2030                 printf("hfs_bwrite: called with lock bit set\n");
2031         }
2032         retval = vn_bwrite (ap);
2033
2034         return (retval);
2035 }