bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2002 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/stat.h>
  36 #include <sys/buf.h>
  37 #include <sys/proc.h>
  38 #include <sys/vnode.h>
  39 #include <sys/uio.h>
  40
  41 #include <miscfs/specfs/specdev.h>
  42
  43 #include <sys/ubc.h>
  44 #include <vm/vm_pageout.h>
  45
  46 #include <sys/kdebug.h>
  47
  48 #include        "hfs.h"
  49 #include        "hfs_endian.h"
  50 #include        "hfs_quota.h"
  51 #include        "hfscommon/headers/FileMgrInternal.h"
  52 #include        "hfscommon/headers/BTreesInternal.h"
  53 #include        "hfs_cnode.h"
  54 #include        "hfs_dbg.h"
  55
  56 extern int overflow_extents(struct filefork *fp);
  57
  58 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  59
  60 enum {
  61         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  62 };
  63
  64 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  65
  66
  67 /*****************************************************************************
  68 *
  69 *       Operations on vnodes
  70 *
  71 *****************************************************************************/
  72
  73 /*
  74 #% read         vp      L L L
  75 #
  76  vop_read {
  77      IN struct vnode *vp;
  78      INOUT struct uio *uio;
  79      IN int ioflag;
  80      IN struct ucred *cred;
  81
  82      */
  83
  84 int
  85 hfs_read(ap)
  86         struct vop_read_args /* {
  87                 struct vnode *a_vp;
  88                 struct uio *a_uio;
  89                 int a_ioflag;
  90                 struct ucred *a_cred;
  91         } */ *ap;
  92 {
  93         register struct uio *uio = ap->a_uio;
  94         register struct vnode *vp = ap->a_vp;
  95         struct cnode *cp;
  96         struct filefork *fp;
  97         struct buf *bp;
  98         daddr_t logBlockNo;
  99         u_long fragSize, moveSize, startOffset, ioxfersize;
 100         int devBlockSize = 0;
 101         off_t bytesRemaining;
 102         int retval = 0;
 103         off_t filesize;
 104         off_t filebytes;
 105
 106         /* Preflight checks */
 107         if (vp->v_type != VREG && vp->v_type != VLNK)
 108                 return (EISDIR);        /* HFS can only read files */
 109         if (uio->uio_resid == 0)
 110                 return (0);             /* Nothing left to do */
 111         if (uio->uio_offset < 0)
 112                 return (EINVAL);        /* cant read from a negative offset */
 113
 114         cp = VTOC(vp);
 115         fp = VTOF(vp);
 116         filesize = fp->ff_size;
 117         filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
 118         if (uio->uio_offset > filesize) {
 119                 if ((!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
 120                         return (EFBIG);
 121                 else
 122                         return (0);
 123         }
 124
 125         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 126
 127         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 128                 (int)uio->uio_offset, uio->uio_resid, (int)filesize, (int)filebytes, 0);
 129
 130         if (UBCISVALID(vp)) {
 131                 retval = cluster_read(vp, uio, filesize, devBlockSize, 0);
 132         } else {
 133
 134                 for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 135
 136             if ((bytesRemaining = (filesize - uio->uio_offset)) <= 0)
 137                 break;
 138
 139             logBlockNo  = (daddr_t)(uio->uio_offset / PAGE_SIZE_64);
 140             startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64);
 141             fragSize    = PAGE_SIZE;
 142
 143             if (((logBlockNo * PAGE_SIZE) + fragSize) < filesize)
 144                 ioxfersize = fragSize;
 145             else {
 146                 ioxfersize = filesize - (logBlockNo * PAGE_SIZE);
 147                 ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1);
 148             }
 149                 moveSize = ioxfersize;
 150                 moveSize -= startOffset;
 151
 152             if (bytesRemaining < moveSize)
 153                 moveSize = bytesRemaining;
 154
 155             if (uio->uio_resid < moveSize) {
 156                 moveSize = uio->uio_resid;
 157             };
 158             if (moveSize == 0) {
 159                 break;
 160             };
 161
 162             if (( uio->uio_offset + fragSize) >= filesize) {
 163                 retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
 164
 165             } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) {
 166                 daddr_t nextLogBlockNo = logBlockNo + 1;
 167                 int nextsize;
 168
 169                 if (((nextLogBlockNo * PAGE_SIZE) +
 170                      (daddr_t)fragSize) < filesize)
 171                     nextsize = fragSize;
 172                 else {
 173                     nextsize = filesize - (nextLogBlockNo * PAGE_SIZE);
 174                     nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1);
 175                 }
 176                 retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp);
 177             } else {
 178                 retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
 179             };
 180
 181             if (retval != E_NONE) {
 182                 if (bp) {
 183                     brelse(bp);
 184                     bp = NULL;
 185                 }
 186                 break;
 187             };
 188             vp->v_lastr = logBlockNo;
 189
 190             /*
 191              * We should only get non-zero b_resid when an I/O retval
 192              * has occurred, which should cause us to break above.
 193              * However, if the short read did not cause an retval,
 194              * then we want to ensure that we do not uiomove bad
 195              * or uninitialized data.
 196              */
 197             ioxfersize -= bp->b_resid;
 198
 199             if (ioxfersize < moveSize) {                        /* XXX PPD This should take the offset into account, too! */
 200                 if (ioxfersize == 0)
 201                     break;
 202                 moveSize = ioxfersize;
 203             }
 204             if ((startOffset + moveSize) > bp->b_bcount)
 205                 panic("hfs_read: bad startOffset or moveSize\n");
 206
 207             if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio)))
 208                 break;
 209
 210             if (S_ISREG(cp->c_mode) &&
 211                 (((startOffset + moveSize) == fragSize) || (uio->uio_offset == filesize))) {
 212                 bp->b_flags |= B_AGE;
 213             };
 214
 215             brelse(bp);
 216             /* Start of loop resets bp to NULL before reaching outside this block... */
 217         }
 218
 219                 if (bp != NULL) {
 220                         brelse(bp);
 221                 }
 222         }
 223
 224         cp->c_flag |= C_ACCESS;
 225
 226         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 227                 (int)uio->uio_offset, uio->uio_resid, (int)filesize,  (int)filebytes, 0);
 228
 229         return (retval);
 230 }
 231
 232 /*
 233  * Write data to a file or directory.
 234 #% write        vp      L L L
 235 #
 236  vop_write {
 237      IN struct vnode *vp;
 238      INOUT struct uio *uio;
 239      IN int ioflag;
 240      IN struct ucred *cred;
 241
 242      */
 243 int
 244 hfs_write(ap)
 245         struct vop_write_args /* {
 246                 struct vnode *a_vp;
 247                 struct uio *a_uio;
 248                 int a_ioflag;
 249                 struct ucred *a_cred;
 250         } */ *ap;
 251 {
 252         struct vnode *vp = ap->a_vp;
 253         struct uio *uio = ap->a_uio;
 254         struct cnode *cp;
 255         struct filefork *fp;
 256         struct buf *bp;
 257         struct proc *p;
 258         struct timeval tv;
 259         ExtendedVCB *vcb;
 260     int                                 devBlockSize = 0;
 261     daddr_t                     logBlockNo;
 262     long                                fragSize;
 263     off_t                               origFileSize, currOffset, writelimit, bytesToAdd;
 264     off_t                               actualBytesAdded;
 265     u_long                              blkoffset, resid, xfersize, clearSize;
 266     int                                 eflags, ioflag;
 267     int                                 retval;
 268         off_t filebytes;
 269         u_long fileblocks;
 270         struct hfsmount *hfsmp;
 271         int started_tr = 0, grabbed_lock = 0;
 272
 273         ioflag = ap->a_ioflag;
 274
 275         if (uio->uio_offset < 0)
 276                 return (EINVAL);
 277         if (uio->uio_resid == 0)
 278                 return (E_NONE);
 279         if (vp->v_type != VREG && vp->v_type != VLNK)
 280                 return (EISDIR);        /* Can only write files */
 281
 282         cp = VTOC(vp);
 283         fp = VTOF(vp);
 284         vcb = VTOVCB(vp);
 285         fileblocks = fp->ff_blocks;
 286         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
 287
 288         if (ioflag & IO_APPEND)
 289                 uio->uio_offset = fp->ff_size;
 290         if ((cp->c_flags & APPEND) && uio->uio_offset != fp->ff_size)
 291                 return (EPERM);
 292
 293         // XXXdbg - don't allow modification of the journal or journal_info_block
 294         if (VTOHFS(vp)->jnl && cp->c_datafork) {
 295                 struct HFSPlusExtentDescriptor *extd;
 296
 297                 extd = &cp->c_datafork->ff_data.cf_extents[0];
 298                 if (extd->startBlock == VTOVCB(vp)->vcbJinfoBlock || extd->startBlock == VTOHFS(vp)->jnl_start) {
 299                         return EPERM;
 300                 }
 301         }
 302
 303         writelimit = uio->uio_offset + uio->uio_resid;
 304
 305         /*
 306          * Maybe this should be above the vnode op call, but so long as
 307          * file servers have no limits, I don't think it matters.
 308          */
 309         p = uio->uio_procp;
 310         if (vp->v_type == VREG && p &&
 311             writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 312                 psignal(p, SIGXFSZ);
 313                 return (EFBIG);
 314         }
 315         p = current_proc();
 316
 317         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 318
 319         resid = uio->uio_resid;
 320         origFileSize = fp->ff_size;
 321         eflags = kEFDeferMask;  /* defer file block allocations */
 322         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 323
 324         /*
 325          * NOTE: In the following loop there are two positions tracked:
 326          * currOffset is the current I/O starting offset.  currOffset
 327          * is never >LEOF; the LEOF is nudged along with currOffset as
 328          * data is zeroed or written. uio->uio_offset is the start of
 329          * the current I/O operation.  It may be arbitrarily beyond
 330          * currOffset.
 331          *
 332          * The following is true at all times:
 333          *   currOffset <= LEOF <= uio->uio_offset <= writelimit
 334          */
 335         currOffset = MIN(uio->uio_offset, fp->ff_size);
 336
 337         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 338                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 339         retval = 0;
 340
 341         /* Now test if we need to extend the file */
 342         /* Doing so will adjust the filebytes for us */
 343
 344 #if QUOTA
 345         if(writelimit > filebytes) {
 346                 bytesToAdd = writelimit - filebytes;
 347
 348                 retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, vcb->blockSize)),
 349                                    ap->a_cred, 0);
 350                 if (retval)
 351                         return (retval);
 352         }
 353 #endif /* QUOTA */
 354
 355         hfsmp = VTOHFS(vp);
 356         if (writelimit > filebytes) {
 357                 hfs_global_shared_lock_acquire(hfsmp);
 358                 grabbed_lock = 1;
 359         }
 360         if (hfsmp->jnl && (writelimit > filebytes)) {
 361                 if (journal_start_transaction(hfsmp->jnl) != 0) {
 362                         hfs_global_shared_lock_release(hfsmp);
 363                         return EINVAL;
 364                 }
 365                 started_tr = 1;
 366         }
 367
 368         while (writelimit > filebytes) {
 369
 370                 bytesToAdd = writelimit - filebytes;
 371                 if (suser(ap->a_cred, NULL) != 0)
 372                         eflags |= kEFReserveMask;
 373
 374                 /* lock extents b-tree (also protects volume bitmap) */
 375                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, current_proc());
 376                 if (retval != E_NONE)
 377                         break;
 378
 379                 retval = MacToVFSError(ExtendFileC (vcb, (FCB*)fp, bytesToAdd,
 380                                 0, eflags, &actualBytesAdded));
 381
 382                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, p);
 383                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 384                         retval = ENOSPC;
 385                 if (retval != E_NONE)
 386                         break;
 387                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 388                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 389                         (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size,  (int)filebytes, 0);
 390         }
 391
 392         // XXXdbg
 393         if (started_tr) {
 394                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
 395                 journal_end_transaction(hfsmp->jnl);
 396                 started_tr = 0;
 397         }
 398         if (grabbed_lock) {
 399                 hfs_global_shared_lock_release(hfsmp);
 400                 grabbed_lock = 0;
 401         }
 402
 403         if (UBCISVALID(vp) && retval == E_NONE) {
 404                 off_t filesize;
 405                 off_t zero_off;
 406                 off_t tail_off;
 407                 off_t inval_start;
 408                 off_t inval_end;
 409                 off_t io_start, io_end;
 410                 int lflag;
 411                 struct rl_entry *invalid_range;
 412
 413                 if (writelimit > fp->ff_size)
 414                         filesize = writelimit;
 415                 else
 416                         filesize = fp->ff_size;
 417
 418                 lflag = (ioflag & IO_SYNC);
 419
 420                 if (uio->uio_offset <= fp->ff_size) {
 421                         zero_off = uio->uio_offset & ~PAGE_MASK_64;
 422
 423                         /* Check to see whether the area between the zero_offset and the start
 424                            of the transfer to see whether is invalid and should be zero-filled
 425                            as part of the transfer:
 426                          */
 427                         if (rl_scan(&fp->ff_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP)
 428                                 lflag |= IO_HEADZEROFILL;
 429                 } else {
 430                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 431
 432                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 433                            read without being zeroed.  The current last block is filled with zeroes
 434                            if it holds valid data but in all cases merely do a little bookkeeping
 435                            to track the area from the end of the current last page to the start of
 436                            the area actually written.  For the same reason only the bytes up to the
 437                            start of the page where this write will start is invalidated; any remainder
 438                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 439
 440                            Note that inval_start, the start of the page after the current EOF,
 441                            may be past the start of the write, in which case the zeroing
 442                            will be handled by the cluser_write of the actual data.
 443                          */
 444                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 445                         inval_end = uio->uio_offset & ~PAGE_MASK_64;
 446                         zero_off = fp->ff_size;
 447
 448                         if ((fp->ff_size & PAGE_MASK_64) &&
 449                                 (rl_scan(&fp->ff_invalidranges,
 450                                                         eof_page_base,
 451                                                         fp->ff_size - 1,
 452                                                         &invalid_range) != RL_NOOVERLAP)) {
 453                                 /* The page containing the EOF is not valid, so the
 454                                    entire page must be made inaccessible now.  If the write
 455                                    starts on a page beyond the page containing the eof
 456                                    (inval_end > eof_page_base), add the
 457                                    whole page to the range to be invalidated.  Otherwise
 458                                    (i.e. if the write starts on the same page), zero-fill
 459                                    the entire page explicitly now:
 460                                  */
 461                                 if (inval_end > eof_page_base) {
 462                                         inval_start = eof_page_base;
 463                                 } else {
 464                                         zero_off = eof_page_base;
 465                                 };
 466                         };
 467
 468                         if (inval_start < inval_end) {
 469                                 /* There's some range of data that's going to be marked invalid */
 470
 471                                 if (zero_off < inval_start) {
 472                                         /* The pages between inval_start and inval_end are going to be invalidated,
 473                                            and the actual write will start on a page past inval_end.  Now's the last
 474                                            chance to zero-fill the page containing the EOF:
 475                                          */
 476                                         retval = cluster_write(vp, (struct uio *) 0,
 477                                                         fp->ff_size, inval_start,
 478                                                         zero_off, (off_t)0, devBlockSize,
 479                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 480                                         if (retval) goto ioerr_exit;
 481                                 };
 482
 483                                 /* Mark the remaining area of the newly allocated space as invalid: */
 484                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 485                                 cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
 486                                 zero_off = fp->ff_size = inval_end;
 487                         };
 488
 489                         if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
 490                 };
 491
 492                 /* Check to see whether the area between the end of the write and the end of
 493                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 494                  */
 495                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 496                 if (tail_off > filesize) tail_off = filesize;
 497                 if (tail_off > writelimit) {
 498                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 499                                 lflag |= IO_TAILZEROFILL;
 500                         };
 501                 };
 502
 503                 /*
 504                  * if the write starts beyond the current EOF (possibly advanced in the
 505                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 506                  * to where the write begins:
 507                  *
 508                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 509                  *       before the current EOF it might be marked as invalid now and must be
 510                  *       made readable (removed from the invalid ranges) before cluster_write
 511                  *       tries to write it:
 512                  */
 513                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
 514                 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 515                 if (io_start < fp->ff_size) {
 516                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 517                 };
 518                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 519                                 tail_off, devBlockSize, lflag | IO_NOZERODIRTY);
 520
 521                 if (uio->uio_offset > fp->ff_size) {
 522                         fp->ff_size = uio->uio_offset;
 523
 524                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 525                 }
 526                 if (resid > uio->uio_resid)
 527                         cp->c_flag |= C_CHANGE | C_UPDATE;
 528         } else {
 529                 while (retval == E_NONE && uio->uio_resid > 0) {
 530                         logBlockNo = currOffset / PAGE_SIZE;
 531                         blkoffset  = currOffset & PAGE_MASK;
 532
 533                         if ((filebytes - currOffset) < PAGE_SIZE_64)
 534                                 fragSize = filebytes - ((off_t)logBlockNo * PAGE_SIZE_64);
 535                         else
 536                                 fragSize = PAGE_SIZE;
 537                         xfersize = fragSize - blkoffset;
 538
 539                         /* Make any adjustments for boundary conditions */
 540                         if (currOffset + (off_t)xfersize > writelimit)
 541                                 xfersize = writelimit - currOffset;
 542
 543                         /*
 544                          * There is no need to read into bp if:
 545                          * We start on a block boundary and will overwrite the whole block
 546                          *
 547                          *                                              OR
 548                          */
 549                         if ((blkoffset == 0) && (xfersize >= fragSize)) {
 550                                 bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
 551                                 retval = 0;
 552
 553                                 if (bp->b_blkno == -1) {
 554                                         brelse(bp);
 555                                         retval = EIO;           /* XXX */
 556                                         break;
 557                                 }
 558                         } else {
 559
 560                                 if (currOffset == fp->ff_size && blkoffset == 0) {
 561                                         bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
 562                                         retval = 0;
 563                                         if (bp->b_blkno == -1) {
 564                                                 brelse(bp);
 565                                                 retval = EIO;           /* XXX */
 566                                                 break;
 567                                         }
 568                                 } else {
 569                                         /*
 570                                          * This I/O transfer is not sufficiently aligned,
 571                                          * so read the affected block into a buffer:
 572                                          */
 573                                         retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp);
 574                                         if (retval != E_NONE) {
 575                                                 if (bp)
 576                                                 brelse(bp);
 577                                                 break;
 578                                         }
 579                                 }
 580                         }
 581
 582                         /* See if we are starting to write within file boundaries:
 583                          * If not, then we need to present a "hole" for the area
 584                          * between the current EOF and the start of the current
 585                          * I/O operation:
 586                          *
 587                          * Note that currOffset is only less than uio_offset if
 588                          * uio_offset > LEOF...
 589                          */
 590                         if (uio->uio_offset > currOffset) {
 591                                 clearSize = MIN(uio->uio_offset - currOffset, xfersize);
 592                                 bzero(bp->b_data + blkoffset, clearSize);
 593                                 currOffset += clearSize;
 594                                 blkoffset += clearSize;
 595                                 xfersize -= clearSize;
 596                         }
 597
 598                         if (xfersize > 0) {
 599                                 retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio);
 600                                 currOffset += xfersize;
 601                         }
 602
 603                         if (ioflag & IO_SYNC) {
 604                                 (void)VOP_BWRITE(bp);
 605                         } else if ((xfersize + blkoffset) == fragSize) {
 606                                 bp->b_flags |= B_AGE;
 607                                 bawrite(bp);
 608                         } else {
 609                                 bdwrite(bp);
 610                         }
 611
 612                         /* Update the EOF if we just extended the file
 613                          * (the PEOF has already been moved out and the
 614                          * block mapping table has been updated):
 615                          */
 616                         if (currOffset > fp->ff_size) {
 617                                 fp->ff_size = currOffset;
 618                                 if (UBCISVALID(vp))
 619                                         ubc_setsize(vp, fp->ff_size); /* XXX check errors */
 620                         }
 621                         if (retval || (resid == 0))
 622                                 break;
 623                         cp->c_flag |= C_CHANGE | C_UPDATE;
 624                 } /* endwhile */
 625         }
 626
 627 ioerr_exit:
 628         /*
 629          * If we successfully wrote any data, and we are not the superuser
 630          * we clear the setuid and setgid bits as a precaution against
 631          * tampering.
 632          */
 633         if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 634                 cp->c_mode &= ~(S_ISUID | S_ISGID);
 635
 636         if (retval) {
 637                 if (ioflag & IO_UNIT) {
 638                         (void)VOP_TRUNCATE(vp, origFileSize,
 639                                 ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
 640                         uio->uio_offset -= resid - uio->uio_resid;
 641                         uio->uio_resid = resid;
 642                         filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
 643                 }
 644         } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 645                 tv = time;
 646                 retval = VOP_UPDATE(vp, &tv, &tv, 1);
 647         }
 648
 649         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 650                 (int)uio->uio_offset, uio->uio_resid, (int)fp->ff_size, (int)filebytes, 0);
 651
 652         return (retval);
 653 }
 654
 655
 656 /*
 657
 658 #% ioctl        vp      U U U
 659 #
 660  vop_ioctl {
 661      IN struct vnode *vp;
 662      IN u_long command;
 663      IN caddr_t data;
 664      IN int fflag;
 665      IN struct ucred *cred;
 666      IN struct proc *p;
 667
 668      */
 669
 670
 671 /* ARGSUSED */
 672 int
 673 hfs_ioctl(ap)
 674         struct vop_ioctl_args /* {
 675                 struct vnode *a_vp;
 676                 int  a_command;
 677                 caddr_t  a_data;
 678                 int  a_fflag;
 679                 struct ucred *a_cred;
 680                 struct proc *a_p;
 681         } */ *ap;
 682 {
 683         switch (ap->a_command) {
 684         case 1: {
 685                 register struct cnode *cp;
 686                 register struct vnode *vp;
 687                 register struct radvisory *ra;
 688                 struct filefork *fp;
 689                 int devBlockSize = 0;
 690                 int error;
 691
 692                 vp = ap->a_vp;
 693
 694                 if (vp->v_type != VREG)
 695                         return EINVAL;
 696
 697                 VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
 698                 error = vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 699                 if (error)
 700                         return (error);
 701
 702                 ra = (struct radvisory *)(ap->a_data);
 703                 cp = VTOC(vp);
 704                 fp = VTOF(vp);
 705
 706                 if (ra->ra_offset >= fp->ff_size) {
 707                         VOP_UNLOCK(vp, 0, ap->a_p);
 708                         return (EFBIG);
 709                 }
 710                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
 711
 712                 error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count, devBlockSize);
 713                 VOP_UNLOCK(vp, 0, ap->a_p);
 714
 715                 return (error);
 716         }
 717
 718         case 2: /* F_READBOOTBLOCKS */
 719         case 3: /* F_WRITEBOOTBLOCKS */
 720             {
 721             struct vnode *vp = ap->a_vp;
 722             struct vnode *devvp = NULL;
 723             struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
 724             int devBlockSize;
 725             int error;
 726             struct iovec aiov;
 727             struct uio auio;
 728             u_long blockNumber;
 729             u_long blockOffset;
 730             u_long xfersize;
 731             struct buf *bp;
 732
 733             if ((vp->v_flag & VROOT) == 0) return EINVAL;
 734             if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
 735
 736             devvp = VTOHFS(vp)->hfs_devvp;
 737             aiov.iov_base = btd->fbt_buffer;
 738             aiov.iov_len = btd->fbt_length;
 739
 740             auio.uio_iov = &aiov;
 741             auio.uio_iovcnt = 1;
 742             auio.uio_offset = btd->fbt_offset;
 743             auio.uio_resid = btd->fbt_length;
 744             auio.uio_segflg = UIO_USERSPACE;
 745             auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
 746             auio.uio_procp = ap->a_p;
 747
 748             VOP_DEVBLOCKSIZE(devvp, &devBlockSize);
 749
 750             while (auio.uio_resid > 0) {
 751               blockNumber = auio.uio_offset / devBlockSize;
 752               error = bread(devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
 753               if (error) {
 754                   if (bp) brelse(bp);
 755                   return error;
 756                 };
 757
 758                 blockOffset = auio.uio_offset % devBlockSize;
 759               xfersize = devBlockSize - blockOffset;
 760               error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
 761                 if (error) {
 762                   brelse(bp);
 763                   return error;
 764                 };
 765                 if (auio.uio_rw == UIO_WRITE) {
 766                   error = VOP_BWRITE(bp);
 767                   if (error) return error;
 768                 } else {
 769                   brelse(bp);
 770                 };
 771             };
 772         };
 773         return 0;
 774
 775         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
 776             {
 777             *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
 778             return 0;
 779             }
 780
 781         default:
 782             return (ENOTTY);
 783     }
 784
 785     /* Should never get here */
 786         return 0;
 787 }
 788
 789 /* ARGSUSED */
 790 int
 791 hfs_select(ap)
 792         struct vop_select_args /* {
 793                 struct vnode *a_vp;
 794                 int  a_which;
 795                 int  a_fflags;
 796                 struct ucred *a_cred;
 797                 void *a_wql;
 798                 struct proc *a_p;
 799         } */ *ap;
 800 {
 801         /*
 802          * We should really check to see if I/O is possible.
 803          */
 804         return (1);
 805 }
 806
 807 /*
 808  * Bmap converts a the logical block number of a file to its physical block
 809  * number on the disk.
 810  */
 811
 812 /*
 813  * vp  - address of vnode file the file
 814  * bn  - which logical block to convert to a physical block number.
 815  * vpp - returns the vnode for the block special file holding the filesystem
 816  *       containing the file of interest
 817  * bnp - address of where to return the filesystem physical block number
 818 #% bmap         vp      L L L
 819 #% bmap         vpp     - U -
 820 #
 821  vop_bmap {
 822      IN struct vnode *vp;
 823      IN daddr_t bn;
 824      OUT struct vnode **vpp;
 825      IN daddr_t *bnp;
 826      OUT int *runp;
 827      */
 828 /*
 829  * Converts a logical block number to a physical block, and optionally returns
 830  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
 831  * The physical block number is based on the device block size, currently its 512.
 832  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
 833  */
 834
 835 int
 836 hfs_bmap(ap)
 837         struct vop_bmap_args /* {
 838                 struct vnode *a_vp;
 839                 daddr_t a_bn;
 840                 struct vnode **a_vpp;
 841                 daddr_t *a_bnp;
 842                 int *a_runp;
 843         } */ *ap;
 844 {
 845         struct vnode *vp = ap->a_vp;
 846         struct cnode *cp = VTOC(vp);
 847         struct filefork *fp = VTOF(vp);
 848         struct hfsmount *hfsmp = VTOHFS(vp);
 849    int                                  retval = E_NONE;
 850     daddr_t                             logBlockSize;
 851     size_t                              bytesContAvail = 0;
 852     off_t blockposition;
 853     struct proc                 *p = NULL;
 854     int                                 lockExtBtree;
 855     struct rl_entry *invalid_range;
 856     enum rl_overlaptype overlaptype;
 857
 858         /*
 859          * Check for underlying vnode requests and ensure that logical
 860          * to physical mapping is requested.
 861          */
 862         if (ap->a_vpp != NULL)
 863                 *ap->a_vpp = cp->c_devvp;
 864         if (ap->a_bnp == NULL)
 865                 return (0);
 866
 867         /* Only clustered I/O should have delayed allocations. */
 868         DBG_ASSERT(fp->ff_unallocblocks == 0);
 869
 870         logBlockSize = GetLogicalBlockSize(vp);
 871         blockposition = (off_t)ap->a_bn * (off_t)logBlockSize;
 872
 873         lockExtBtree = overflow_extents(fp);
 874         if (lockExtBtree) {
 875                 p = current_proc();
 876                 retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID,
 877                                 LK_EXCLUSIVE | LK_CANRECURSE, p);
 878                 if (retval)
 879                         return (retval);
 880         }
 881
 882         retval = MacToVFSError(
 883                             MapFileBlockC (HFSTOVCB(hfsmp),
 884                                             (FCB*)fp,
 885                                             MAXPHYSIO,
 886                                             blockposition,
 887                                             ap->a_bnp,
 888                                             &bytesContAvail));
 889
 890     if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
 891
 892     if (retval == E_NONE) {
 893         /* Adjust the mapping information for invalid file ranges: */
 894         overlaptype = rl_scan(&fp->ff_invalidranges,
 895                             blockposition,
 896                             blockposition + MAXPHYSIO - 1,
 897                             &invalid_range);
 898         if (overlaptype != RL_NOOVERLAP) {
 899             switch(overlaptype) {
 900                 case RL_MATCHINGOVERLAP:
 901                 case RL_OVERLAPCONTAINSRANGE:
 902                 case RL_OVERLAPSTARTSBEFORE:
 903                     /* There's no valid block for this byte offset: */
 904                     *ap->a_bnp = (daddr_t)-1;
 905                     bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 906                     break;
 907
 908                 case RL_OVERLAPISCONTAINED:
 909                 case RL_OVERLAPENDSAFTER:
 910                     /* The range of interest hits an invalid block before the end: */
 911                     if (invalid_range->rl_start == blockposition) {
 912                         /* There's actually no valid information to be had starting here: */
 913                         *ap->a_bnp = (daddr_t)-1;
 914                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
 915                                                         (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
 916                                 bytesContAvail = invalid_range->rl_end + 1 - blockposition;
 917                         };
 918                     } else {
 919                         bytesContAvail = invalid_range->rl_start - blockposition;
 920                     };
 921                     break;
 922             };
 923                         if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
 924         };
 925
 926         /* Figure out how many read ahead blocks there are */
 927         if (ap->a_runp != NULL) {
 928             if (can_cluster(logBlockSize)) {
 929                 /* Make sure this result never goes negative: */
 930                 *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
 931             } else {
 932                 *ap->a_runp = 0;
 933             };
 934         };
 935     };
 936
 937     return (retval);
 938 }
 939
 940 /* blktooff converts logical block number to file offset */
 941
 942 int
 943 hfs_blktooff(ap)
 944         struct vop_blktooff_args /* {
 945                 struct vnode *a_vp;
 946                 daddr_t a_lblkno;
 947                 off_t *a_offset;
 948         } */ *ap;
 949 {
 950         if (ap->a_vp == NULL)
 951                 return (EINVAL);
 952         *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
 953
 954         return(0);
 955 }
 956
 957 int
 958 hfs_offtoblk(ap)
 959         struct vop_offtoblk_args /* {
 960                 struct vnode *a_vp;
 961                 off_t a_offset;
 962                 daddr_t *a_lblkno;
 963         } */ *ap;
 964 {
 965         if (ap->a_vp == NULL)
 966                 return (EINVAL);
 967         *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
 968
 969         return(0);
 970 }
 971
 972 int
 973 hfs_cmap(ap)
 974         struct vop_cmap_args /* {
 975                 struct vnode *a_vp;
 976                 off_t a_foffset;
 977                 size_t a_size;
 978                 daddr_t *a_bpn;
 979                 size_t *a_run;
 980                 void *a_poff;
 981         } */ *ap;
 982 {
 983     struct hfsmount *hfsmp = VTOHFS(ap->a_vp);
 984     struct filefork *fp = VTOF(ap->a_vp);
 985     size_t                              bytesContAvail = 0;
 986     int                 retval = E_NONE;
 987     int lockExtBtree = 0;
 988     struct proc         *p = NULL;
 989     struct rl_entry *invalid_range;
 990     enum rl_overlaptype overlaptype;
 991     int started_tr = 0, grabbed_lock = 0;
 992
 993         /*
 994          * Check for underlying vnode requests and ensure that logical
 995          * to physical mapping is requested.
 996          */
 997         if (ap->a_bpn == NULL)
 998                 return (0);
 999
1000         p = current_proc();
1001         if (fp->ff_unallocblocks) {
1002                 lockExtBtree = 1;
1003
1004                 // XXXdbg
1005                 hfs_global_shared_lock_acquire(hfsmp);
1006                 grabbed_lock = 1;
1007
1008                 if (hfsmp->jnl) {
1009                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1010                                 hfs_global_shared_lock_release(hfsmp);
1011                                 return EINVAL;
1012                         } else {
1013                                 started_tr = 1;
1014                         }
1015                 }
1016
1017                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1018                         if (started_tr) {
1019                                 journal_end_transaction(hfsmp->jnl);
1020                         }
1021                         if (grabbed_lock) {
1022                                 hfs_global_shared_lock_release(hfsmp);
1023                         }
1024                         return (retval);
1025                 }
1026         } else if (overflow_extents(fp)) {
1027                 lockExtBtree = 1;
1028                 if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1029                         return retval;
1030                 }
1031         }
1032
1033         /*
1034          * Check for any delayed allocations.
1035          */
1036         if (fp->ff_unallocblocks) {
1037                 SInt64 reqbytes, actbytes;
1038
1039                 reqbytes = (SInt64)fp->ff_unallocblocks *
1040                              (SInt64)HFSTOVCB(hfsmp)->blockSize;
1041                 /*
1042                  * Release the blocks on loan and aquire some real ones.
1043                  * Note that we can race someone else for these blocks
1044                  * (and lose) so cmap needs to handle a failure here.
1045                  * Currently this race can't occur because all allocations
1046                  * are protected by an exclusive lock on the  Extents
1047                  * Overflow file.
1048                  */
1049                 HFSTOVCB(hfsmp)->loanedBlocks -= fp->ff_unallocblocks;
1050                 FTOC(fp)->c_blocks            -= fp->ff_unallocblocks;
1051                 fp->ff_blocks                 -= fp->ff_unallocblocks;
1052                 fp->ff_unallocblocks           = 0;
1053
1054                 while (retval == 0 && reqbytes > 0) {
1055                         retval = MacToVFSError(ExtendFileC(HFSTOVCB(hfsmp),
1056                                         (FCB*)fp, reqbytes, 0,
1057                                         kEFAllMask | kEFNoClumpMask, &actbytes));
1058                         if (retval == 0 && actbytes == 0)
1059                                 retval = ENOSPC;
1060
1061                         if (retval) {
1062                                 fp->ff_unallocblocks =
1063                                         reqbytes / HFSTOVCB(hfsmp)->blockSize;
1064                                 HFSTOVCB(hfsmp)->loanedBlocks += fp->ff_unallocblocks;
1065                                 FTOC(fp)->c_blocks            += fp->ff_unallocblocks;
1066                                 fp->ff_blocks                 += fp->ff_unallocblocks;
1067                         }
1068                         reqbytes -= actbytes;
1069                 }
1070
1071                 if (retval) {
1072                         (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1073                         if (started_tr) {
1074                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1075                                 journal_end_transaction(hfsmp->jnl);
1076                         }
1077                         if (grabbed_lock) {
1078                                 hfs_global_shared_lock_release(hfsmp);
1079                         }
1080                         return (retval);
1081                 }
1082                 VTOC(ap->a_vp)->c_flag |= C_MODIFIED;
1083         }
1084
1085         retval = MacToVFSError(
1086                            MapFileBlockC (HFSTOVCB(hfsmp),
1087                                           (FCB *)fp,
1088                                           ap->a_size,
1089                                           ap->a_foffset,
1090                                           ap->a_bpn,
1091                                           &bytesContAvail));
1092
1093         if (lockExtBtree)
1094                 (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1095
1096         // XXXdbg
1097         if (started_tr) {
1098                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1099                 journal_end_transaction(hfsmp->jnl);
1100                 started_tr = 0;
1101         }
1102         if (grabbed_lock) {
1103                 hfs_global_shared_lock_release(hfsmp);
1104                 grabbed_lock = 0;
1105         }
1106
1107     if (retval == E_NONE) {
1108         /* Adjust the mapping information for invalid file ranges: */
1109         overlaptype = rl_scan(&fp->ff_invalidranges,
1110                             ap->a_foffset,
1111                             ap->a_foffset + (off_t)bytesContAvail - 1,
1112                             &invalid_range);
1113         if (overlaptype != RL_NOOVERLAP) {
1114             switch(overlaptype) {
1115                 case RL_MATCHINGOVERLAP:
1116                 case RL_OVERLAPCONTAINSRANGE:
1117                 case RL_OVERLAPSTARTSBEFORE:
1118                     /* There's no valid block for this byte offset: */
1119                     *ap->a_bpn = (daddr_t)-1;
1120
1121                     /* There's no point limiting the amount to be returned if the
1122                        invalid range that was hit extends all the way to the EOF
1123                        (i.e. there's no valid bytes between the end of this range
1124                        and the file's EOF):
1125                      */
1126                     if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1127                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1128                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1129                     };
1130                     break;
1131
1132                 case RL_OVERLAPISCONTAINED:
1133                 case RL_OVERLAPENDSAFTER:
1134                     /* The range of interest hits an invalid block before the end: */
1135                     if (invalid_range->rl_start == ap->a_foffset) {
1136                         /* There's actually no valid information to be had starting here: */
1137                         *ap->a_bpn = (daddr_t)-1;
1138                                                 if ((fp->ff_size > (invalid_range->rl_end + 1)) &&
1139                                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1140                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1141                         };
1142                     } else {
1143                         bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1144                     };
1145                     break;
1146             };
1147             if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
1148         };
1149
1150         if (ap->a_run) *ap->a_run = bytesContAvail;
1151     };
1152
1153         if (ap->a_poff)
1154                 *(int *)ap->a_poff = 0;
1155
1156         return (retval);
1157 }
1158
1159
1160 /*
1161  * Read or write a buffer that is not contiguous on disk.  We loop over
1162  * each device block, copying to or from caller's buffer.
1163  *
1164  * We could be a bit more efficient by transferring as much data as is
1165  * contiguous.  But since this routine should rarely be called, and that
1166  * would be more complicated; best to keep it simple.
1167  */
1168 static int
1169 hfs_strategy_fragmented(struct buf *bp)
1170 {
1171         register struct vnode *vp = bp->b_vp;
1172         register struct cnode *cp = VTOC(vp);
1173         register struct vnode *devvp = cp->c_devvp;
1174         caddr_t ioaddr;         /* Address of fragment within bp  */
1175         struct buf *frag = NULL; /* For reading or writing a single block */
1176         int retval = 0;
1177         long remaining;         /* Bytes (in bp) left to transfer */
1178         off_t offset;           /* Logical offset of current fragment in vp */
1179         u_long block_size;      /* Size of one device block (and one I/O) */
1180
1181         /* Make sure we redo this mapping for the next I/O */
1182         bp->b_blkno = bp->b_lblkno;
1183
1184         /* Set up the logical position and number of bytes to read/write */
1185         offset = (off_t) bp->b_lblkno * (off_t) GetLogicalBlockSize(vp);
1186         block_size = VTOHFS(vp)->hfs_phys_block_size;
1187
1188         /* Get an empty buffer to do the deblocking */
1189         frag = geteblk(block_size);
1190         if (ISSET(bp->b_flags, B_READ))
1191                 SET(frag->b_flags, B_READ);
1192
1193         for (ioaddr = bp->b_data, remaining = bp->b_bcount; remaining != 0;
1194             ioaddr += block_size, offset += block_size,
1195             remaining -= block_size) {
1196                 frag->b_resid = frag->b_bcount;
1197                 CLR(frag->b_flags, B_DONE);
1198
1199                 /* Map the current position to a physical block number */
1200                 retval = VOP_CMAP(vp, offset, block_size, &frag->b_lblkno,
1201                     NULL, NULL);
1202                 if (retval != 0)
1203                         break;
1204
1205                 /*
1206                  * Did we try to read a hole?
1207                  * (Should never happen for metadata!)
1208                  */
1209                 if ((long)frag->b_lblkno == -1) {
1210                         bzero(ioaddr, block_size);
1211                         continue;
1212                 }
1213
1214                 /* If writing, copy before I/O */
1215                 if (!ISSET(bp->b_flags, B_READ))
1216                         bcopy(ioaddr, frag->b_data, block_size);
1217
1218                 /* Call the device to do the I/O and wait for it */
1219                 frag->b_blkno = frag->b_lblkno;
1220                 frag->b_vp = devvp;  /* Used to dispatch via VOP_STRATEGY */
1221                 frag->b_dev = devvp->v_rdev;
1222                 retval = VOP_STRATEGY(frag);
1223                 frag->b_vp = NULL;
1224                 if (retval != 0)
1225                         break;
1226                 retval = biowait(frag);
1227                 if (retval != 0)
1228                         break;
1229
1230                 /* If reading, copy after the I/O */
1231                 if (ISSET(bp->b_flags, B_READ))
1232                         bcopy(frag->b_data, ioaddr, block_size);
1233         }
1234
1235         frag->b_vp = NULL;
1236         //
1237         // XXXdbg - in the case that this is a meta-data block, it won't affect
1238         //          the journal because this bp is for a physical disk block,
1239         //          not a logical block that is part of the catalog or extents
1240         //          files.
1241         SET(frag->b_flags, B_INVAL);
1242         brelse(frag);
1243
1244         if ((bp->b_error = retval) != 0)
1245                 SET(bp->b_flags, B_ERROR);
1246
1247         biodone(bp);    /* This I/O is now complete */
1248         return retval;
1249 }
1250
1251
1252 /*
1253  * Calculate the logical to physical mapping if not done already,
1254  * then call the device strategy routine.
1255 #
1256 #vop_strategy {
1257 #       IN struct buf *bp;
1258     */
1259 int
1260 hfs_strategy(ap)
1261         struct vop_strategy_args /* {
1262                 struct buf *a_bp;
1263         } */ *ap;
1264 {
1265         register struct buf *bp = ap->a_bp;
1266         register struct vnode *vp = bp->b_vp;
1267         register struct cnode *cp = VTOC(vp);
1268         int retval = 0;
1269         off_t offset;
1270         size_t bytes_contig;
1271
1272         if ( !(bp->b_flags & B_VECTORLIST)) {
1273                 if (vp->v_type == VBLK || vp->v_type == VCHR)
1274                         panic("hfs_strategy: device vnode passed!");
1275
1276                 if (bp->b_flags & B_PAGELIST) {
1277                         /*
1278                          * If we have a page list associated with this bp,
1279                          * then go through cluster_bp since it knows how to
1280                          * deal with a page request that might span non-
1281                          * contiguous physical blocks on the disk...
1282                          */
1283                         retval = cluster_bp(bp);
1284                         vp = cp->c_devvp;
1285                         bp->b_dev = vp->v_rdev;
1286
1287                         return (retval);
1288                 }
1289
1290                 /*
1291                  * If we don't already know the filesystem relative block
1292                  * number then get it using VOP_BMAP().  If VOP_BMAP()
1293                  * returns the block number as -1 then we've got a hole in
1294                  * the file.  Although HFS filesystems don't create files with
1295                  * holes, invalidating of subranges of the file (lazy zero
1296                  * filling) may create such a situation.
1297                  */
1298                 if (bp->b_blkno == bp->b_lblkno) {
1299                         offset = (off_t) bp->b_lblkno *
1300                             (off_t) GetLogicalBlockSize(vp);
1301
1302                         if ((retval = VOP_CMAP(vp, offset, bp->b_bcount,
1303                             &bp->b_blkno, &bytes_contig, NULL))) {
1304                                 bp->b_error = retval;
1305                                 bp->b_flags |= B_ERROR;
1306                                 biodone(bp);
1307                                 return (retval);
1308                         }
1309                         if (bytes_contig < bp->b_bcount)
1310                         {
1311                                 /*
1312                                  * We were asked to read a block that wasn't
1313                                  * contiguous, so we have to read each of the
1314                                  * pieces and copy them into the buffer.
1315                                  * Since ordinary file I/O goes through
1316                                  * cluster_io (which won't ask us for
1317                                  * discontiguous data), this is probably an
1318                                  * attempt to read or write metadata.
1319                                  */
1320                                 return hfs_strategy_fragmented(bp);
1321                         }
1322                         if ((long)bp->b_blkno == -1)
1323                                 clrbuf(bp);
1324                 }
1325                 if ((long)bp->b_blkno == -1) {
1326                         biodone(bp);
1327                         return (0);
1328                 }
1329                 if (bp->b_validend == 0) {
1330                         /*
1331                          * Record the exact size of the I/O transfer about to
1332                          * be made:
1333                          */
1334                         bp->b_validend = bp->b_bcount;
1335                 }
1336         }
1337         vp = cp->c_devvp;
1338         bp->b_dev = vp->v_rdev;
1339
1340         return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
1341 }
1342
1343
1344 /*
1345 #
1346 #% truncate     vp      L L L
1347 #
1348 vop_truncate {
1349     IN struct vnode *vp;
1350     IN off_t length;
1351     IN int flags;       (IO_SYNC)
1352     IN struct ucred *cred;
1353     IN struct proc *p;
1354 };
1355  * Truncate a cnode to at most length size, freeing (or adding) the
1356  * disk blocks.
1357  */
1358 int hfs_truncate(ap)
1359         struct vop_truncate_args /* {
1360                 struct vnode *a_vp;
1361                 off_t a_length;
1362                 int a_flags;
1363                 struct ucred *a_cred;
1364                 struct proc *a_p;
1365         } */ *ap;
1366 {
1367         register struct vnode *vp = ap->a_vp;
1368         register struct cnode *cp = VTOC(vp);
1369         struct filefork *fp = VTOF(vp);
1370         off_t length;
1371         long vflags;
1372         struct timeval tv;
1373         int retval;
1374         off_t bytesToAdd;
1375         off_t actualBytesAdded;
1376         off_t filebytes;
1377         u_long fileblocks;
1378         int blksize;
1379         struct hfsmount *hfsmp;
1380
1381         if (vp->v_type != VREG && vp->v_type != VLNK)
1382                 return (EISDIR);        /* cannot truncate an HFS directory! */
1383
1384         length = ap->a_length;
1385         blksize = VTOVCB(vp)->blockSize;
1386         fileblocks = fp->ff_blocks;
1387         filebytes = (off_t)fileblocks * (off_t)blksize;
1388
1389         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1390                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1391
1392         if (length < 0)
1393                 return (EINVAL);
1394
1395         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1396                 return (EFBIG);
1397
1398         hfsmp = VTOHFS(vp);
1399
1400         tv = time;
1401         retval = E_NONE;
1402
1403         /*
1404          * We cannot just check if fp->ff_size == length (as an optimization)
1405          * since there may be extra physical blocks that also need truncation.
1406          */
1407 #if QUOTA
1408         if (retval = hfs_getinoquota(cp))
1409                 return(retval);
1410 #endif /* QUOTA */
1411
1412         /*
1413          * Lengthen the size of the file. We must ensure that the
1414          * last byte of the file is allocated. Since the smallest
1415          * value of ff_size is 0, length will be at least 1.
1416          */
1417         if (length > fp->ff_size) {
1418 #if QUOTA
1419                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1420                                 ap->a_cred, 0);
1421                 if (retval)
1422                         goto Err_Exit;
1423 #endif /* QUOTA */
1424                 /*
1425                  * If we don't have enough physical space then
1426                  * we need to extend the physical size.
1427                  */
1428                 if (length > filebytes) {
1429                         int eflags;
1430
1431                         /* All or nothing and don't round up to clumpsize. */
1432                         eflags = kEFAllMask | kEFNoClumpMask;
1433
1434                         if (suser(ap->a_cred, NULL) != 0)
1435                                 eflags |= kEFReserveMask;  /* keep a reserve */
1436
1437                         // XXXdbg
1438                         hfs_global_shared_lock_acquire(hfsmp);
1439                         if (hfsmp->jnl) {
1440                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1441                                         retval = EINVAL;
1442                                         goto Err_Exit;
1443                                 }
1444                         }
1445
1446                         /* lock extents b-tree (also protects volume bitmap) */
1447                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1448                         if (retval) {
1449                                 if (hfsmp->jnl) {
1450                                         journal_end_transaction(hfsmp->jnl);
1451                                 }
1452                                 hfs_global_shared_lock_release(hfsmp);
1453
1454                                 goto Err_Exit;
1455                         }
1456
1457                         while ((length > filebytes) && (retval == E_NONE)) {
1458                                 bytesToAdd = length - filebytes;
1459                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1460                                                     (FCB*)fp,
1461                                                     bytesToAdd,
1462                                                     0,
1463                                                     eflags,
1464                                                     &actualBytesAdded));
1465
1466                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1467                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1468                                         if (length > filebytes)
1469                                                 length = filebytes;
1470                                         break;
1471                                 }
1472                         } /* endwhile */
1473
1474                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1475
1476                         // XXXdbg
1477                         if (hfsmp->jnl) {
1478                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1479                                 journal_end_transaction(hfsmp->jnl);
1480                         }
1481                         hfs_global_shared_lock_release(hfsmp);
1482
1483                         if (retval)
1484                                 goto Err_Exit;
1485
1486                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1487                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1488                 }
1489
1490                 if (!(ap->a_flags & IO_NOZEROFILL)) {
1491                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1492                                 struct rl_entry *invalid_range;
1493                                 int devBlockSize;
1494                                 off_t zero_limit;
1495
1496                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1497                                 if (length < zero_limit) zero_limit = length;
1498
1499                                 if (length > fp->ff_size) {
1500                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1501                                         if ((fp->ff_size & PAGE_MASK_64) &&
1502                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1503                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1504
1505                                                 /* There's some valid data at the start of the (current) last page
1506                                                    of the file, so zero out the remainder of that page to ensure the
1507                                                    entire page contains valid data.  Since there is no invalid range
1508                                                    possible past the (current) eof, there's no need to remove anything
1509                                                    from the invalid range list before calling cluster_write():                                           */
1510                                                 VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1511                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1512                                                                 fp->ff_size, (off_t)0, devBlockSize,
1513                                                                 (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1514                                                 if (retval) goto Err_Exit;
1515
1516                                                 /* Merely invalidate the remaining area, if necessary: */
1517                                                 if (length > zero_limit) {
1518                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1519                                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1520                                                 }
1521                                         } else {
1522                                         /* The page containing the (current) eof is invalid: just add the
1523                                            remainder of the page to the invalid list, along with the area
1524                                            being newly allocated:
1525                                          */
1526                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1527                                         cp->c_zftimeout = time.tv_sec + ZFTIMELIMIT;
1528                                         };
1529                                 }
1530                         } else {
1531                                         panic("hfs_truncate: invoked on non-UBC object?!");
1532                         };
1533                 }
1534                 cp->c_flag |= C_UPDATE;
1535                 fp->ff_size = length;
1536
1537                 if (UBCISVALID(vp))
1538                         ubc_setsize(vp, fp->ff_size);   /* XXX check errors */
1539
1540         } else { /* Shorten the size of the file */
1541
1542                 if (fp->ff_size > length) {
1543                         /*
1544                          * Any buffers that are past the truncation point need to be
1545                          * invalidated (to maintain buffer cache consistency).  For
1546                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1547                          */
1548                         if (UBCISVALID(vp))
1549                                 ubc_setsize(vp, length); /* XXX check errors */
1550
1551                         vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA;
1552                         retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1553
1554                         /* Any space previously marked as invalid is now irrelevant: */
1555                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
1556                 }
1557
1558                 /*
1559                  * Account for any unmapped blocks. Note that the new
1560                  * file length can still end up with unmapped blocks.
1561                  */
1562                 if (fp->ff_unallocblocks > 0) {
1563                         u_int32_t finalblks;
1564
1565                         /* lock extents b-tree */
1566                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1567                                         LK_EXCLUSIVE, ap->a_p);
1568                         if (retval)
1569                                 goto Err_Exit;
1570
1571                         VTOVCB(vp)->loanedBlocks -= fp->ff_unallocblocks;
1572                         cp->c_blocks             -= fp->ff_unallocblocks;
1573                         fp->ff_blocks            -= fp->ff_unallocblocks;
1574                         fp->ff_unallocblocks      = 0;
1575
1576                         finalblks = (length + blksize - 1) / blksize;
1577                         if (finalblks > fp->ff_blocks) {
1578                                 /* calculate required unmapped blocks */
1579                                 fp->ff_unallocblocks      = finalblks - fp->ff_blocks;
1580                                 VTOVCB(vp)->loanedBlocks += fp->ff_unallocblocks;
1581                                 cp->c_blocks             += fp->ff_unallocblocks;
1582                                 fp->ff_blocks            += fp->ff_unallocblocks;
1583                         }
1584                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID,
1585                                         LK_RELEASE, ap->a_p);
1586                 }
1587
1588                 /*
1589                  * For a TBE process the deallocation of the file blocks is
1590                  * delayed until the file is closed.  And hfs_close calls
1591                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
1592                  * isn't set, we make sure this isn't a TBE process.
1593                  */
1594                 if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
1595 #if QUOTA
1596                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
1597 #endif /* QUOTA */
1598                   // XXXdbg
1599                   hfs_global_shared_lock_acquire(hfsmp);
1600                         if (hfsmp->jnl) {
1601                                 if (journal_start_transaction(hfsmp->jnl) != 0) {
1602                                         retval = EINVAL;
1603                                         goto Err_Exit;
1604                                 }
1605                         }
1606
1607                         /* lock extents b-tree (also protects volume bitmap) */
1608                         retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1609                         if (retval) {
1610                                 if (hfsmp->jnl) {
1611                                         journal_end_transaction(hfsmp->jnl);
1612                                 }
1613                                 hfs_global_shared_lock_release(hfsmp);
1614                                 goto Err_Exit;
1615                         }
1616
1617                         if (fp->ff_unallocblocks == 0)
1618                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
1619                                                 (FCB*)fp, length, false));
1620
1621                         (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1622
1623                         // XXXdbg
1624                         if (hfsmp->jnl) {
1625                                 hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1626                                 journal_end_transaction(hfsmp->jnl);
1627                         }
1628                         hfs_global_shared_lock_release(hfsmp);
1629
1630                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1631                         if (retval)
1632                                 goto Err_Exit;
1633 #if QUOTA
1634                         /* These are bytesreleased */
1635                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
1636 #endif /* QUOTA */
1637                 }
1638                 /* Only set update flag if the logical length changes */
1639                 if (fp->ff_size != length)
1640                         cp->c_flag |= C_UPDATE;
1641                 fp->ff_size = length;
1642         }
1643         cp->c_flag |= C_CHANGE;
1644         retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1645         if (retval) {
1646                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1647                      -1, -1, -1, retval, 0);
1648         }
1649
1650 Err_Exit:
1651
1652         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
1653                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
1654
1655         return (retval);
1656 }
1657
1658
1659
1660 /*
1661 #
1662 #% allocate     vp      L L L
1663 #
1664 vop_allocate {
1665         IN struct vnode *vp;
1666         IN off_t length;
1667         IN int flags;
1668         OUT off_t *bytesallocated;
1669         IN off_t offset;
1670         IN struct ucred *cred;
1671         IN struct proc *p;
1672 };
1673  * allocate a cnode to at most length size
1674  */
1675 int hfs_allocate(ap)
1676         struct vop_allocate_args /* {
1677                 struct vnode *a_vp;
1678                 off_t a_length;
1679                 u_int32_t  a_flags;
1680                 off_t *a_bytesallocated;
1681                 off_t a_offset;
1682                 struct ucred *a_cred;
1683                 struct proc *a_p;
1684         } */ *ap;
1685 {
1686         struct vnode *vp = ap->a_vp;
1687         struct cnode *cp = VTOC(vp);
1688         struct filefork *fp = VTOF(vp);
1689         off_t length = ap->a_length;
1690         off_t startingPEOF;
1691         off_t moreBytesRequested;
1692         off_t actualBytesAdded;
1693         off_t filebytes;
1694         u_long fileblocks;
1695         long vflags;
1696         struct timeval tv;
1697         int retval, retval2;
1698         UInt32 blockHint;
1699         UInt32 extendFlags =0;   /* For call to ExtendFileC */
1700         struct hfsmount *hfsmp;
1701
1702         hfsmp = VTOHFS(vp);
1703
1704         *(ap->a_bytesallocated) = 0;
1705         fileblocks = fp->ff_blocks;
1706         filebytes = (off_t)fileblocks * (off_t)VTOVCB(vp)->blockSize;
1707
1708         if (length < (off_t)0)
1709                 return (EINVAL);
1710         if (vp->v_type != VREG && vp->v_type != VLNK)
1711                 return (EISDIR);
1712         if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= filebytes))
1713                 return (EINVAL);
1714
1715         /* Fill in the flags word for the call to Extend the file */
1716
1717         if (ap->a_flags & ALLOCATECONTIG)
1718                 extendFlags |= kEFContigMask;
1719
1720         if (ap->a_flags & ALLOCATEALL)
1721                 extendFlags |= kEFAllMask;
1722
1723         if (suser(ap->a_cred, NULL) != 0)
1724                 extendFlags |= kEFReserveMask;
1725
1726         tv = time;
1727         retval = E_NONE;
1728         blockHint = 0;
1729         startingPEOF = filebytes;
1730
1731         if (ap->a_flags & ALLOCATEFROMPEOF)
1732                 length += filebytes;
1733         else if (ap->a_flags & ALLOCATEFROMVOL)
1734                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
1735
1736         /* If no changes are necesary, then we're done */
1737         if (filebytes == length)
1738                 goto Std_Exit;
1739
1740         /*
1741          * Lengthen the size of the file. We must ensure that the
1742          * last byte of the file is allocated. Since the smallest
1743          * value of filebytes is 0, length will be at least 1.
1744          */
1745         if (length > filebytes) {
1746                 moreBytesRequested = length - filebytes;
1747
1748 #if QUOTA
1749                 retval = hfs_chkdq(cp,
1750                                 (int64_t)(roundup(moreBytesRequested, VTOVCB(vp)->blockSize)),
1751                                 ap->a_cred, 0);
1752                 if (retval)
1753                         return (retval);
1754
1755 #endif /* QUOTA */
1756                 // XXXdbg
1757                 hfs_global_shared_lock_acquire(hfsmp);
1758                 if (hfsmp->jnl) {
1759                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1760                                 retval = EINVAL;
1761                                 goto Err_Exit;
1762                         }
1763                 }
1764
1765                 /* lock extents b-tree (also protects volume bitmap) */
1766                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1767                 if (retval) {
1768                         if (hfsmp->jnl) {
1769                                 journal_end_transaction(hfsmp->jnl);
1770                         }
1771                         hfs_global_shared_lock_release(hfsmp);
1772                         goto Err_Exit;
1773                 }
1774
1775                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1776                                                 (FCB*)fp,
1777                                                 moreBytesRequested,
1778                                                 blockHint,
1779                                                 extendFlags,
1780                                                 &actualBytesAdded));
1781
1782                 *(ap->a_bytesallocated) = actualBytesAdded;
1783                 filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
1784
1785                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1786
1787                 // XXXdbg
1788                 if (hfsmp->jnl) {
1789                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1790                         journal_end_transaction(hfsmp->jnl);
1791                 }
1792                 hfs_global_shared_lock_release(hfsmp);
1793
1794                 /*
1795                  * if we get an error and no changes were made then exit
1796                  * otherwise we must do the VOP_UPDATE to reflect the changes
1797                  */
1798                 if (retval && (startingPEOF == filebytes))
1799                         goto Err_Exit;
1800
1801                 /*
1802                  * Adjust actualBytesAdded to be allocation block aligned, not
1803                  * clump size aligned.
1804                  * NOTE: So what we are reporting does not affect reality
1805                  * until the file is closed, when we truncate the file to allocation
1806                  * block size.
1807                  */
1808                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
1809                         *(ap->a_bytesallocated) =
1810                                 roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize);
1811
1812         } else { /* Shorten the size of the file */
1813
1814                 if (fp->ff_size > length) {
1815                         /*
1816                          * Any buffers that are past the truncation point need to be
1817                          * invalidated (to maintain buffer cache consistency).  For
1818                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1819                          */
1820                         vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
1821                         (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1822                 }
1823
1824                 // XXXdbg
1825                 hfs_global_shared_lock_acquire(hfsmp);
1826                 if (hfsmp->jnl) {
1827                         if (journal_start_transaction(hfsmp->jnl) != 0) {
1828                                 retval = EINVAL;
1829                                 goto Err_Exit;
1830                         }
1831                 }
1832
1833                 /* lock extents b-tree (also protects volume bitmap) */
1834                 retval = hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1835                 if (retval) {
1836                         if (hfsmp->jnl) {
1837                                 journal_end_transaction(hfsmp->jnl);
1838                         }
1839                         hfs_global_shared_lock_release(hfsmp);
1840
1841                         goto Err_Exit;
1842                 }
1843
1844                 retval = MacToVFSError(
1845                             TruncateFileC(
1846                                             VTOVCB(vp),
1847                                             (FCB*)fp,
1848                                             length,
1849                                             false));
1850                 (void) hfs_metafilelocking(VTOHFS(vp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1851                 filebytes = (off_t)fp->ff_blocks * (off_t)VTOVCB(vp)->blockSize;
1852
1853                 if (hfsmp->jnl) {
1854                         hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
1855                         journal_end_transaction(hfsmp->jnl);
1856                 }
1857                 hfs_global_shared_lock_release(hfsmp);
1858
1859
1860                 /*
1861                  * if we get an error and no changes were made then exit
1862                  * otherwise we must do the VOP_UPDATE to reflect the changes
1863                  */
1864                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
1865 #if QUOTA
1866                 /* These are  bytesreleased */
1867                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
1868 #endif /* QUOTA */
1869
1870                 if (fp->ff_size > filebytes) {
1871                         fp->ff_size = filebytes;
1872
1873                         if (UBCISVALID(vp))
1874                                 ubc_setsize(vp, fp->ff_size); /* XXX check errors */
1875                 }
1876         }
1877
1878 Std_Exit:
1879         cp->c_flag |= C_CHANGE | C_UPDATE;
1880         retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1881
1882         if (retval == 0)
1883                 retval = retval2;
1884 Err_Exit:
1885         return (retval);
1886 }
1887
1888
1889 /*
1890  * pagein for HFS filesystem
1891  */
1892 int
1893 hfs_pagein(ap)
1894         struct vop_pagein_args /* {
1895                 struct vnode *a_vp,
1896                 upl_t         a_pl,
1897                 vm_offset_t   a_pl_offset,
1898                 off_t         a_f_offset,
1899                 size_t        a_size,
1900                 struct ucred *a_cred,
1901                 int           a_flags
1902         } */ *ap;
1903 {
1904         register struct vnode *vp = ap->a_vp;
1905         int devBlockSize = 0;
1906         int error;
1907
1908         if (vp->v_type != VREG && vp->v_type != VLNK)
1909                 panic("hfs_pagein: vp not UBC type\n");
1910
1911         VOP_DEVBLOCKSIZE(VTOC(vp)->c_devvp, &devBlockSize);
1912
1913         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
1914                                 ap->a_size, (off_t)VTOF(vp)->ff_size, devBlockSize,
1915                                 ap->a_flags);
1916         return (error);
1917 }
1918
1919 /*
1920  * pageout for HFS filesystem.
1921  */
1922 int
1923 hfs_pageout(ap)
1924         struct vop_pageout_args /* {
1925            struct vnode *a_vp,
1926            upl_t         a_pl,
1927            vm_offset_t   a_pl_offset,
1928            off_t         a_f_offset,
1929            size_t        a_size,
1930            struct ucred *a_cred,
1931            int           a_flags
1932         } */ *ap;
1933 {
1934         struct vnode *vp = ap->a_vp;
1935         struct cnode *cp = VTOC(vp);
1936         struct filefork *fp = VTOF(vp);
1937         int retval;
1938         int devBlockSize = 0;
1939         off_t end_of_range;
1940         off_t filesize;
1941
1942         if (UBCINVALID(vp))
1943                 panic("hfs_pageout: Not a  VREG: vp=%x", vp);
1944
1945         VOP_DEVBLOCKSIZE(cp->c_devvp, &devBlockSize);
1946         filesize = fp->ff_size;
1947         end_of_range = ap->a_f_offset + ap->a_size - 1;
1948
1949         if (end_of_range >= filesize)
1950                 end_of_range = (off_t)(filesize - 1);
1951         if (ap->a_f_offset < filesize)
1952                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
1953
1954         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
1955                                  filesize, devBlockSize, ap->a_flags);
1956
1957         /*
1958          * If we successfully wrote any data, and we are not the superuser
1959          * we clear the setuid and setgid bits as a precaution against
1960          * tampering.
1961          */
1962         if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
1963                 cp->c_mode &= ~(S_ISUID | S_ISGID);
1964
1965         return (retval);
1966 }
1967
1968 /*
1969  * Intercept B-Tree node writes to unswap them if necessary.
1970 #
1971 #vop_bwrite {
1972 #       IN struct buf *bp;
1973  */
1974 int
1975 hfs_bwrite(ap)
1976         struct vop_bwrite_args /* {
1977                 struct buf *a_bp;
1978         } */ *ap;
1979 {
1980         int retval = 0;
1981         register struct buf *bp = ap->a_bp;
1982         register struct vnode *vp = bp->b_vp;
1983 #if BYTE_ORDER == LITTLE_ENDIAN
1984         BlockDescriptor block;
1985
1986         /* Trap B-Tree writes */
1987         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
1988             (VTOC(vp)->c_fileid == kHFSCatalogFileID)) {
1989
1990                 /* Swap if the B-Tree node is in native byte order */
1991                 if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
1992                         /* Prepare the block pointer */
1993                         block.blockHeader = bp;
1994                         block.buffer = bp->b_data;
1995                         /* not found in cache ==> came from disk */
1996                         block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;
1997                         block.blockSize = bp->b_bcount;
1998
1999                         /* Endian un-swap B-Tree node */
2000                         SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2001                 }
2002
2003                 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2004         }
2005 #endif
2006         /* This buffer shouldn't be locked anymore but if it is clear it */
2007         if (ISSET(bp->b_flags, B_LOCKED)) {
2008             // XXXdbg
2009             if (VTOHFS(vp)->jnl) {
2010                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2011             }
2012                 CLR(bp->b_flags, B_LOCKED);
2013                 printf("hfs_bwrite: called with lock bit set\n");
2014         }
2015         retval = vn_bwrite (ap);
2016
2017         return (retval);
2018 }