bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1990, 1992 NeXT Computer, Inc.  All Rights Reserved
  25  *      (c) 1998       Apple Computer, Inc.  All Rights Reserved
  26  *
  27  *
  28  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  29  *
  30  *      MODIFICATION HISTORY:
  31  *       9-Nov-1999     Scott Roberts   hfs_allocate now returns sizes based on allocation block boundaries (#2398794)
  32  *       3-Feb-1999     Pat Dirks               Merged in Joe's change to hfs_truncate to skip vinvalbuf if LEOF isn't changing (#2302796)
  33  *                                                              Removed superfluous (and potentially dangerous) second call to vinvalbuf() in hfs_truncate.
  34  *       2-Dec-1998     Pat Dirks               Added support for read/write bootstrap ioctls.
  35  *      10-Nov-1998     Pat Dirks               Changed read/write/truncate logic to optimize block sizes for first extents of a file.
  36  *                              Changed hfs_strategy to correct I/O sizes from cluser code I/O requests in light of
  37  *                              different block sizing.  Changed bexpand to handle RELEASE_BUFFER flag.
  38  *      22-Sep-1998     Don Brady               Changed truncate zero-fill to use bwrite after several bawrites have been queued.
  39  *      11-Sep-1998     Pat Dirks               Fixed buffering logic to not rely on B_CACHE, which is set for empty buffers that
  40  *                                                              have been pre-read by cluster_read (use b_validend > 0 instead).
  41  *  27-Aug-1998 Pat Dirks               Changed hfs_truncate to use cluster_write in place of bawrite where possible.
  42  *      25-Aug-1998     Pat Dirks               Changed hfs_write to do small device-block aligned writes into buffers without doing
  43  *                                                              read-ahead of the buffer.  Added bexpand to deal with incomplete [dirty] buffers.
  44  *                                                              Fixed can_cluster macro to use MAXPHYSIO instead of MAXBSIZE.
  45  *      19-Aug-1998     Don Brady               Remove optimization in hfs_truncate that prevented extra physical blocks from
  46  *                                                              being truncated (radar #2265750). Also set fcb->fcbEOF before calling vinvalbuf.
  47  *       7-Jul-1998     Pat Dirks               Added code to honor IO_NOZEROFILL in hfs_truncate.
  48  *      16-Jul-1998     Don Brady               In hfs_bmap use MAXPHYSIO instead of MAXBSIZE when calling MapFileBlockC (radar #2263753).
  49  *      16-Jul-1998     Don Brady               Fix error handling in hfs_allocate (radar #2252265).
  50  *      04-Jul-1998     chw                             Synchronized options in hfs_allocate with flags in call to ExtendFileC
  51  *      25-Jun-1998     Don Brady               Add missing blockNo incrementing to zero fill loop in hfs_truncate.
  52  *      22-Jun-1998     Don Brady               Add bp = NULL assignment after brelse in hfs_read.
  53  *       4-Jun-1998     Pat Dirks               Split off from hfs_vnodeops.c
  54  */
  55
  56 #include <sys/param.h>
  57 #include <sys/systm.h>
  58 #include <sys/resourcevar.h>
  59 #include <sys/kernel.h>
  60 #include <sys/fcntl.h>
  61 #include <sys/stat.h>
  62 #include <sys/buf.h>
  63 #include <sys/proc.h>
  64 //#include <mach/machine/vm_types.h>
  65 #include <sys/vnode.h>
  66 #include <sys/uio.h>
  67
  68 #include <miscfs/specfs/specdev.h>
  69
  70 #include <sys/ubc.h>
  71 #include <vm/vm_pageout.h>
  72
  73 #include <sys/kdebug.h>
  74
  75 #include        "hfs.h"
  76 #include        "hfs_dbg.h"
  77 #include        "hfs_endian.h"
  78 #include        "hfscommon/headers/FileMgrInternal.h"
  79 #include        "hfscommon/headers/BTreesInternal.h"
  80
  81
  82 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  83
  84 enum {
  85         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  86 };
  87
  88 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  89
  90 #if DBG_VOP_TEST_LOCKS
  91 extern void DbgVopTest(int maxSlots, int retval, VopDbgStoreRec *VopDbgStore, char *funcname);
  92 #endif
  93
  94 #if HFS_DIAGNOSTIC
  95 void debug_check_blocksizes(struct vnode *vp);
  96 #endif
  97
  98 /*****************************************************************************
  99 *
 100 *       Operations on vnodes
 101 *
 102 *****************************************************************************/
 103
 104 /*
 105 #% read         vp      L L L
 106 #
 107  vop_read {
 108      IN struct vnode *vp;
 109      INOUT struct uio *uio;
 110      IN int ioflag;
 111      IN struct ucred *cred;
 112
 113      */
 114
 115 int
 116 hfs_read(ap)
 117 struct vop_read_args /* {
 118     struct vnode *a_vp;
 119     struct uio *a_uio;
 120     int a_ioflag;
 121     struct ucred *a_cred;
 122 } */ *ap;
 123 {
 124     register struct vnode       *vp;
 125     struct hfsnode                      *hp;
 126     register struct uio         *uio;
 127     struct buf                          *bp;
 128     daddr_t                             logBlockNo;
 129     u_long                                      fragSize, moveSize, startOffset, ioxfersize;
 130     int                                         devBlockSize = 0;
 131     off_t                                       bytesRemaining;
 132     int                                         retval;
 133     u_short                             mode;
 134     FCB                                         *fcb;
 135
 136     DBG_FUNC_NAME("hfs_read");
 137     DBG_VOP_LOCKS_DECL(1);
 138     DBG_VOP_PRINT_FUNCNAME();
 139     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
 140     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
 141
 142     vp = ap->a_vp;
 143     hp = VTOH(vp);
 144     fcb = HTOFCB(hp);
 145     mode = hp->h_meta->h_mode;
 146     uio = ap->a_uio;
 147
 148 #if HFS_DIAGNOSTIC
 149     if (uio->uio_rw != UIO_READ)
 150         panic("%s: mode", funcname);
 151 #endif
 152
 153     /* Can only read files */
 154     if (ap->a_vp->v_type != VREG && ap->a_vp->v_type != VLNK) {
 155         DBG_VOP_LOCKS_TEST(EISDIR);
 156         return (EISDIR);
 157     }
 158     DBG_RW(("\tfile size Ox%X\n", (u_int)fcb->fcbEOF));
 159     DBG_RW(("\tstarting at offset Ox%X of file, length Ox%X\n", (u_int)uio->uio_offset, (u_int)uio->uio_resid));
 160
 161 #if HFS_DIAGNOSTIC
 162     debug_check_blocksizes(vp);
 163 #endif
 164
 165     /*
 166      * If they didn't ask for any data, then we are done.
 167      */
 168     if (uio->uio_resid == 0) {
 169         DBG_VOP_LOCKS_TEST(E_NONE);
 170         return (E_NONE);
 171     }
 172
 173     /* cant read from a negative offset */
 174     if (uio->uio_offset < 0) {
 175         DBG_VOP_LOCKS_TEST(EINVAL);
 176         return (EINVAL);
 177     }
 178
 179     if (uio->uio_offset > fcb->fcbEOF) {
 180         if ( (!ISHFSPLUS(VTOVCB(vp))) && (uio->uio_offset > (off_t)MAXHFSFILESIZE))
 181             retval = EFBIG;
 182         else
 183             retval = E_NONE;
 184
 185         DBG_VOP_LOCKS_TEST(retval);
 186         return (retval);
 187     }
 188
 189     VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
 190
 191     KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 192                  (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
 193
 194     if (UBCISVALID(vp))
 195         retval = cluster_read(vp, uio, (off_t)fcb->fcbEOF, devBlockSize, 0);
 196     else {
 197
 198         for (retval = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
 199
 200             if ((bytesRemaining = (fcb->fcbEOF - uio->uio_offset)) <= 0)
 201                 break;
 202
 203             logBlockNo  = (daddr_t)(uio->uio_offset / PAGE_SIZE_64);
 204             startOffset = (u_long) (uio->uio_offset & PAGE_MASK_64);
 205             fragSize    = PAGE_SIZE;
 206
 207             if (((logBlockNo * PAGE_SIZE) + fragSize) < fcb->fcbEOF)
 208                 ioxfersize = fragSize;
 209             else {
 210                 ioxfersize = fcb->fcbEOF - (logBlockNo * PAGE_SIZE);
 211                 ioxfersize = (ioxfersize + (devBlockSize - 1)) & ~(devBlockSize - 1);
 212             }
 213             DBG_RW(("\tat logBlockNo Ox%X, with Ox%lX left to read\n", logBlockNo, (UInt32)uio->uio_resid));
 214             moveSize = ioxfersize;
 215             DBG_RW(("\tmoveSize = Ox%lX; ioxfersize = Ox%lX; startOffset = Ox%lX.\n",
 216                     moveSize, ioxfersize, startOffset));
 217             DBG_ASSERT(moveSize >= startOffset);
 218             moveSize -= startOffset;
 219
 220             if (bytesRemaining < moveSize)
 221                 moveSize = bytesRemaining;
 222
 223             if (uio->uio_resid < moveSize) {
 224                 moveSize = uio->uio_resid;
 225                 DBG_RW(("\treducing moveSize to Ox%lX (uio->uio_resid).\n", moveSize));
 226             };
 227             if (moveSize == 0) {
 228                 break;
 229             };
 230
 231             DBG_RW(("\tat logBlockNo Ox%X, extent of Ox%lX, xfer of Ox%lX; moveSize = Ox%lX\n", logBlockNo, fragSize, ioxfersize, moveSize));
 232
 233             if (( uio->uio_offset + fragSize) >= fcb->fcbEOF) {
 234                 retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
 235
 236             } else if (logBlockNo - 1 == vp->v_lastr && !(vp->v_flag & VRAOFF)) {
 237                 daddr_t nextLogBlockNo = logBlockNo + 1;
 238                 int nextsize;
 239
 240                 if (((nextLogBlockNo * PAGE_SIZE) +
 241                      (daddr_t)fragSize) < fcb->fcbEOF)
 242                     nextsize = fragSize;
 243                 else {
 244                     nextsize = fcb->fcbEOF - (nextLogBlockNo * PAGE_SIZE);
 245                     nextsize = (nextsize + (devBlockSize - 1)) & ~(devBlockSize - 1);
 246                 }
 247                 retval = breadn(vp, logBlockNo, ioxfersize, &nextLogBlockNo, &nextsize, 1, NOCRED, &bp);
 248             } else {
 249                 retval = bread(vp, logBlockNo, ioxfersize, NOCRED, &bp);
 250             };
 251
 252             if (retval != E_NONE) {
 253                 if (bp) {
 254                     brelse(bp);
 255                     bp = NULL;
 256                 }
 257                 break;
 258             };
 259             vp->v_lastr = logBlockNo;
 260
 261             /*
 262              * We should only get non-zero b_resid when an I/O retval
 263              * has occurred, which should cause us to break above.
 264              * However, if the short read did not cause an retval,
 265              * then we want to ensure that we do not uiomove bad
 266              * or uninitialized data.
 267              */
 268             ioxfersize -= bp->b_resid;
 269
 270             if (ioxfersize < moveSize) {                        /* XXX PPD This should take the offset into account, too! */
 271                 if (ioxfersize == 0)
 272                     break;
 273                 moveSize = ioxfersize;
 274             }
 275             if ((startOffset + moveSize) > bp->b_bcount)
 276                 panic("hfs_read: bad startOffset or moveSize\n");
 277
 278             DBG_RW(("\tcopying Ox%lX bytes from %lX; resid = Ox%lX...\n", moveSize, (char *)bp->b_data + startOffset, bp->b_resid));
 279
 280             if ((retval = uiomove((caddr_t)bp->b_data + startOffset, (int)moveSize, uio)))
 281                 break;
 282
 283             if (S_ISREG(mode) &&
 284                 (((startOffset + moveSize) == fragSize) || (uio->uio_offset == fcb->fcbEOF))) {
 285                 bp->b_flags |= B_AGE;
 286             };
 287
 288             DBG_ASSERT(bp->b_bcount == bp->b_validend);
 289
 290             brelse(bp);
 291             /* Start of loop resets bp to NULL before reaching outside this block... */
 292         }
 293
 294         if (bp != NULL) {
 295             DBG_ASSERT(bp->b_bcount == bp->b_validend);
 296             brelse(bp);
 297         };
 298     }
 299
 300     if (HTOVCB(hp)->vcbSigWord == kHFSPlusSigWord)
 301         hp->h_nodeflags |= IN_ACCESS;
 302
 303     DBG_VOP_LOCKS_TEST(retval);
 304
 305     #if HFS_DIAGNOSTIC
 306         debug_check_blocksizes(vp);
 307     #endif
 308
 309     KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 310                 (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
 311
 312     return (retval);
 313 }
 314
 315 /*
 316  * Write data to a file or directory.
 317 #% write        vp      L L L
 318 #
 319  vop_write {
 320      IN struct vnode *vp;
 321      INOUT struct uio *uio;
 322      IN int ioflag;
 323      IN struct ucred *cred;
 324
 325      */
 326 int
 327 hfs_write(ap)
 328 struct vop_write_args /* {
 329     struct vnode *a_vp;
 330     struct uio *a_uio;
 331     int a_ioflag;
 332     struct ucred *a_cred;
 333 } */ *ap;
 334 {
 335     struct hfsnode              *hp = VTOH(ap->a_vp);
 336     struct uio                  *uio = ap->a_uio;
 337     struct vnode                *vp = ap->a_vp ;
 338     struct vnode                *dev;
 339     struct buf                  *bp;
 340     struct proc                 *p, *cp;
 341     struct timeval tv;
 342     FCB                                 *fcb = HTOFCB(hp);
 343     ExtendedVCB                 *vcb = HTOVCB(hp);
 344     int                                 devBlockSize = 0;
 345     daddr_t                     logBlockNo;
 346     long                                fragSize;
 347     off_t                               origFileSize, currOffset, writelimit, bytesToAdd;
 348     off_t                               actualBytesAdded;
 349     u_long                              blkoffset, resid, xfersize, clearSize;
 350     int                                 flags, ioflag;
 351     int                                 retval;
 352     DBG_FUNC_NAME("hfs_write");
 353     DBG_VOP_LOCKS_DECL(1);
 354     DBG_VOP_PRINT_FUNCNAME();
 355     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
 356     DBG_RW(("\thfsnode 0x%x (%s)\n", (u_int)hp, H_NAME(hp)));
 357     DBG_RW(("\tstarting at offset Ox%lX of file, length Ox%lX\n", (UInt32)uio->uio_offset, (UInt32)uio->uio_resid));
 358
 359     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
 360
 361     dev = hp->h_meta->h_devvp;
 362
 363 #if HFS_DIAGNOSTIC
 364     debug_check_blocksizes(vp);
 365 #endif
 366
 367     if (uio->uio_offset < 0) {
 368         DBG_VOP_LOCKS_TEST(EINVAL);
 369         return (EINVAL);
 370     }
 371
 372     if (uio->uio_resid == 0) {
 373         DBG_VOP_LOCKS_TEST(E_NONE);
 374         return (E_NONE);
 375     }
 376
 377     if (ap->a_vp->v_type != VREG && ap->a_vp->v_type != VLNK) {         /* Can only write files */
 378         DBG_VOP_LOCKS_TEST(EISDIR);
 379         return (EISDIR);
 380     };
 381
 382 #if HFS_DIAGNOSTIC
 383         if (uio->uio_rw != UIO_WRITE)
 384                 panic("%s: mode", funcname);
 385 #endif
 386
 387     ioflag = ap->a_ioflag;
 388     uio = ap->a_uio;
 389     vp = ap->a_vp;
 390
 391     if (ioflag & IO_APPEND) uio->uio_offset = fcb->fcbEOF;
 392     if ((hp->h_meta->h_pflags & APPEND) && uio->uio_offset != fcb->fcbEOF)
 393         return (EPERM);
 394
 395         writelimit = uio->uio_offset + uio->uio_resid;
 396
 397     /*
 398     * Maybe this should be above the vnode op call, but so long as
 399     * file servers have no limits, I don't think it matters.
 400     */
 401     p = uio->uio_procp;
 402     if (vp->v_type == VREG && p &&
 403         writelimit > p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
 404         psignal(p, SIGXFSZ);
 405         return (EFBIG);
 406     };
 407     VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
 408
 409     resid = uio->uio_resid;
 410     origFileSize = fcb->fcbEOF;
 411     flags = ioflag & IO_SYNC ? B_SYNC : 0;
 412
 413     DBG_RW(("\tLEOF is 0x%lX, PEOF is 0x%lX.\n", fcb->fcbEOF, fcb->fcbPLen));
 414
 415     /*
 416     NOTE:       In the following loop there are two positions tracked:
 417     currOffset is the current I/O starting offset.  currOffset is never >LEOF; the
 418     LEOF is nudged along with currOffset as data is zeroed or written.
 419     uio->uio_offset is the start of the current I/O operation.  It may be arbitrarily
 420     beyond currOffset.
 421
 422     The following is true at all times:
 423
 424     currOffset <= LEOF <= uio->uio_offset <= writelimit
 425     */
 426     currOffset = MIN(uio->uio_offset, fcb->fcbEOF);
 427
 428     DBG_RW(("\tstarting I/O loop at 0x%lX.\n", (u_long)currOffset));
 429
 430     cp = current_proc();
 431
 432     KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 433                 (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
 434     retval = 0;
 435
 436     /* Now test if we need to extend the file */
 437     /* Doing so will adjust the fcbPLen for us */
 438
 439     while (writelimit > (off_t)fcb->fcbPLen) {
 440
 441         bytesToAdd = writelimit - fcb->fcbPLen;
 442         DBG_RW(("\textending file by 0x%lX bytes; 0x%lX blocks free",
 443                 (unsigned long)bytesToAdd, (unsigned long)vcb->freeBlocks));
 444
 445         /* lock extents b-tree (also protects volume bitmap) */
 446         retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, cp);
 447         if (retval != E_NONE)
 448             break;
 449
 450         retval = MacToVFSError(
 451                             ExtendFileC (vcb,
 452                                             fcb,
 453                                             bytesToAdd,
 454                                             0,
 455                                             kEFContigBit,
 456                                             &actualBytesAdded));
 457
 458         (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, cp);
 459         DBG_VOP_CONT(("\tactual bytes added = 0x%lX bytes, retval = %d...\n", actualBytesAdded, retval));
 460         if ((actualBytesAdded == 0) && (retval == E_NONE)) retval = ENOSPC;
 461         if (retval != E_NONE) break;
 462
 463         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 464                     (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF,  (int)fcb->fcbPLen, 0);
 465     };
 466
 467         if (UBCISVALID(vp) && retval == E_NONE) {
 468                 off_t filesize;
 469                 off_t zero_off;
 470                 off_t tail_off;
 471                 off_t inval_start;
 472                 off_t inval_end;
 473                 off_t io_start, io_end;
 474                 int lflag;
 475                 struct rl_entry *invalid_range;
 476
 477                 if (writelimit > fcb->fcbEOF)
 478                         filesize = writelimit;
 479                 else
 480                         filesize = fcb->fcbEOF;
 481
 482                 lflag = (ioflag & IO_SYNC);
 483
 484                 if (uio->uio_offset <= fcb->fcbEOF) {
 485                         zero_off = uio->uio_offset & ~PAGE_MASK_64;
 486
 487                         /* Check to see whether the area between the zero_offset and the start
 488                            of the transfer to see whether is invalid and should be zero-filled
 489                            as part of the transfer:
 490                          */
 491                         if (rl_scan(&hp->h_invalidranges, zero_off, uio->uio_offset - 1, &invalid_range) != RL_NOOVERLAP) {
 492                                 lflag |= IO_HEADZEROFILL;
 493                         };
 494                 } else {
 495                         off_t eof_page_base = fcb->fcbEOF & ~PAGE_MASK_64;
 496
 497                         /* The bytes between fcb->fcbEOF and uio->uio_offset must never be
 498                            read without being zeroed.  The current last block is filled with zeroes
 499                            if it holds valid data but in all cases merely do a little bookkeeping
 500                            to track the area from the end of the current last page to the start of
 501                            the area actually written.  For the same reason only the bytes up to the
 502                            start of the page where this write will start is invalidated; any remainder
 503                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 504
 505                            Note that inval_start, the start of the page after the current EOF,
 506                            may be past the start of the write, in which case the zeroing
 507                            will be handled by the cluser_write of the actual data.
 508                          */
 509                         inval_start = (fcb->fcbEOF + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 510                         inval_end = uio->uio_offset & ~PAGE_MASK_64;
 511                         zero_off = fcb->fcbEOF;
 512
 513                         if ((fcb->fcbEOF & PAGE_MASK_64) &&
 514                                 (rl_scan(&hp->h_invalidranges,
 515                                                         eof_page_base,
 516                                                         fcb->fcbEOF - 1,
 517                                                         &invalid_range) != RL_NOOVERLAP)) {
 518                                 /* The page containing the EOF is not valid, so the
 519                                    entire page must be made inaccessible now.  If the write
 520                                    starts on a page beyond the page containing the eof
 521                                    (inval_end > eof_page_base), add the
 522                                    whole page to the range to be invalidated.  Otherwise
 523                                    (i.e. if the write starts on the same page), zero-fill
 524                                    the entire page explicitly now:
 525                                  */
 526                                 if (inval_end > eof_page_base) {
 527                                         inval_start = eof_page_base;
 528                                 } else {
 529                                         zero_off = eof_page_base;
 530                                 };
 531                         };
 532
 533                         if (inval_start < inval_end) {
 534                                 /* There's some range of data that's going to be marked invalid */
 535
 536                                 if (zero_off < inval_start) {
 537                                         /* The pages between inval_start and inval_end are going to be invalidated,
 538                                            and the actual write will start on a page past inval_end.  Now's the last
 539                                            chance to zero-fill the page containing the EOF:
 540                                          */
 541                                         retval = cluster_write(vp, (struct uio *) 0, fcb->fcbEOF, inval_start,
 542                                                                                         zero_off, (off_t)0, devBlockSize, lflag | IO_HEADZEROFILL);
 543                                         if (retval) goto ioerr_exit;
 544                                 };
 545
 546                                 /* Mark the remaining area of the newly allocated space as invalid: */
 547                                 rl_add(inval_start, inval_end - 1 , &hp->h_invalidranges);
 548                                 zero_off = fcb->fcbEOF = inval_end;
 549                         };
 550
 551                         if (uio->uio_offset > zero_off) lflag |= IO_HEADZEROFILL;
 552                 };
 553
 554                 /* Check to see whether the area between the end of the write and the end of
 555                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 556                  */
 557                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 558                 if (tail_off > filesize) tail_off = filesize;
 559                 if (tail_off > writelimit) {
 560                         if (rl_scan(&hp->h_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 561                                 lflag |= IO_TAILZEROFILL;
 562                         };
 563                 };
 564
 565                 /*
 566                  * if the write starts beyond the current EOF (possibly advanced in the
 567                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 568                  * to where the write begins:
 569                  *
 570                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 571                  *       before the current EOF it might be marked as invalid now and must be
 572                  *       made readable (removed from the invalid ranges) before cluster_write
 573                  *       tries to write it:
 574                  */
 575                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : uio->uio_offset;
 576                 io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 577                 if (io_start < fcb->fcbEOF) {
 578                         rl_remove(io_start, io_end - 1, &hp->h_invalidranges);
 579                 };
 580                 retval = cluster_write(vp, uio, fcb->fcbEOF, filesize, zero_off, tail_off, devBlockSize, lflag);
 581
 582                 if (uio->uio_offset > fcb->fcbEOF) {
 583                         fcb->fcbEOF = uio->uio_offset;
 584
 585                         ubc_setsize(vp, (off_t)fcb->fcbEOF);       /* XXX check errors */
 586                 }
 587                 if (resid > uio->uio_resid) hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
 588
 589     } else {
 590
 591         while (retval == E_NONE && uio->uio_resid > 0) {
 592             logBlockNo = currOffset / PAGE_SIZE;
 593             blkoffset  = currOffset & PAGE_MASK;
 594
 595             if (((off_t)(fcb->fcbPLen) - currOffset) < PAGE_SIZE_64)
 596                 fragSize = (off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * PAGE_SIZE_64);
 597             else
 598                 fragSize = PAGE_SIZE;
 599             xfersize = fragSize - blkoffset;
 600
 601             DBG_RW(("\tcurrOffset = Ox%lX, logBlockNo = Ox%X, blkoffset = Ox%lX, xfersize = Ox%lX, fragSize = Ox%lX.\n",
 602                     (unsigned long)currOffset, logBlockNo, blkoffset, xfersize, fragSize));
 603
 604             /* Make any adjustments for boundary conditions */
 605             if (currOffset + (off_t)xfersize > writelimit) {
 606                 xfersize = writelimit - currOffset;
 607                 DBG_RW(("\ttrimming xfersize to 0x%lX to match writelimit (uio_resid)...\n", xfersize));
 608             };
 609
 610             /*
 611             * There is no need to read into bp if:
 612             * We start on a block boundary and will overwrite the whole block
 613             *
 614             *                                           OR
 615             */
 616             if ((blkoffset == 0) && (xfersize >= fragSize)) {
 617                 DBG_RW(("\tRequesting %ld-byte block Ox%lX w/o read...\n", fragSize, (long)logBlockNo));
 618
 619                 bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
 620                 retval = 0;
 621
 622                 if (bp->b_blkno == -1) {
 623                     brelse(bp);
 624                     retval = EIO;               /* XXX */
 625                     break;
 626                 }
 627             } else {
 628
 629                 if (currOffset == fcb->fcbEOF && blkoffset == 0) {
 630                     bp = getblk(vp, logBlockNo, fragSize, 0, 0, BLK_READ);
 631                     retval = 0;
 632
 633                     if (bp->b_blkno == -1) {
 634                         brelse(bp);
 635                         retval = EIO;           /* XXX */
 636                         break;
 637                     }
 638
 639                 } else {
 640                     /*
 641                     * This I/O transfer is not sufficiently aligned, so read the affected block into a buffer:
 642                     */
 643                     DBG_VOP(("\tRequesting block Ox%X, size = 0x%08lX...\n", logBlockNo, fragSize));
 644                     retval = bread(vp, logBlockNo, fragSize, ap->a_cred, &bp);
 645
 646                     if (retval != E_NONE) {
 647                         if (bp)
 648                             brelse(bp);
 649                         break;
 650                     }
 651                 }
 652             }
 653
 654             /* See if we are starting to write within file boundaries:
 655                 If not, then we need to present a "hole" for the area between
 656                 the current EOF and the start of the current I/O operation:
 657
 658                 Note that currOffset is only less than uio_offset if uio_offset > LEOF...
 659                 */
 660             if (uio->uio_offset > currOffset) {
 661                 clearSize = MIN(uio->uio_offset - currOffset, xfersize);
 662                 DBG_RW(("\tzeroing Ox%lX bytes Ox%lX bytes into block Ox%X...\n", clearSize, blkoffset, logBlockNo));
 663                 bzero(bp->b_data + blkoffset, clearSize);
 664                 currOffset += clearSize;
 665                 blkoffset += clearSize;
 666                 xfersize -= clearSize;
 667             };
 668
 669             if (xfersize > 0) {
 670                 DBG_RW(("\tCopying Ox%lX bytes Ox%lX bytes into block Ox%X... ioflag == 0x%X\n",
 671                         xfersize, blkoffset, logBlockNo, ioflag));
 672                 retval = uiomove((caddr_t)bp->b_data + blkoffset, (int)xfersize, uio);
 673                 currOffset += xfersize;
 674             };
 675             DBG_ASSERT((bp->b_bcount % devBlockSize) == 0);
 676
 677             if (ioflag & IO_SYNC) {
 678                 (void)VOP_BWRITE(bp);
 679                 //DBG_RW(("\tissuing bwrite\n"));
 680             } else if ((xfersize + blkoffset) == fragSize) {
 681                 //DBG_RW(("\tissuing bawrite\n"));
 682                 bp->b_flags |= B_AGE;
 683                 bawrite(bp);
 684             } else {
 685                 //DBG_RW(("\tissuing bdwrite\n"));
 686                 bdwrite(bp);
 687             };
 688
 689             /* Update the EOF if we just extended the file
 690                 (the PEOF has already been moved out and the block mapping table has been updated): */
 691             if (currOffset > fcb->fcbEOF) {
 692                 DBG_VOP(("\textending EOF to 0x%lX...\n", (UInt32)fcb->fcbEOF));
 693                 fcb->fcbEOF = currOffset;
 694
 695                 if (UBCISVALID(vp))
 696                     ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
 697             };
 698
 699             if (retval || (resid == 0))
 700                 break;
 701             hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
 702         };
 703     };
 704
 705 ioerr_exit:
 706     /*
 707          * If we successfully wrote any data, and we are not the superuser
 708      * we clear the setuid and setgid bits as a precaution against
 709      * tampering.
 710      */
 711     if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0)
 712     hp->h_meta->h_mode &= ~(ISUID | ISGID);
 713
 714     if (retval) {
 715         if (ioflag & IO_UNIT) {
 716             (void)VOP_TRUNCATE(vp, origFileSize,
 717                             ioflag & IO_SYNC, ap->a_cred, uio->uio_procp);
 718             uio->uio_offset -= resid - uio->uio_resid;
 719             uio->uio_resid = resid;
 720         }
 721     } else if (resid > uio->uio_resid && (ioflag & IO_SYNC)) {
 722         tv = time;
 723         retval = VOP_UPDATE(vp, &tv, &tv, 1);
 724     }
 725
 726     #if HFS_DIAGNOSTIC
 727     debug_check_blocksizes(vp);
 728     #endif
 729
 730     KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 731                 (int)uio->uio_offset, uio->uio_resid, (int)fcb->fcbEOF, (int)fcb->fcbPLen, 0);
 732
 733     DBG_VOP_LOCKS_TEST(retval);
 734     return (retval);
 735 }
 736
 737
 738 /*
 739
 740 #% ioctl        vp      U U U
 741 #
 742  vop_ioctl {
 743      IN struct vnode *vp;
 744      IN u_long command;
 745      IN caddr_t data;
 746      IN int fflag;
 747      IN struct ucred *cred;
 748      IN struct proc *p;
 749
 750      */
 751
 752
 753 /* ARGSUSED */
 754 int
 755 hfs_ioctl(ap)
 756 struct vop_ioctl_args /* {
 757     struct vnode *a_vp;
 758     int  a_command;
 759     caddr_t  a_data;
 760     int  a_fflag;
 761     struct ucred *a_cred;
 762     struct proc *a_p;
 763 } */ *ap;
 764 {
 765     DBG_FUNC_NAME("hfs_ioctl");
 766     DBG_VOP_LOCKS_DECL(1);
 767     DBG_VOP_PRINT_FUNCNAME();
 768     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
 769
 770     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_UNLOCKED, VOPDBG_UNLOCKED, VOPDBG_UNLOCKED, VOPDBG_POS);
 771
 772     switch (ap->a_command) {
 773
 774         case 1:
 775     {   register struct hfsnode *hp;
 776             register struct vnode *vp;
 777         register struct radvisory *ra;
 778         FCB *fcb;
 779         int devBlockSize = 0;
 780         int error;
 781
 782         vp = ap->a_vp;
 783
 784         VOP_LEASE(vp, ap->a_p, ap->a_cred, LEASE_READ);
 785         vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, ap->a_p);
 786
 787         ra = (struct radvisory *)(ap->a_data);
 788         hp = VTOH(vp);
 789
 790         fcb = HTOFCB(hp);
 791
 792         if (ra->ra_offset >= fcb->fcbEOF) {
 793             VOP_UNLOCK(vp, 0, ap->a_p);
 794             DBG_VOP_LOCKS_TEST(EFBIG);
 795             return (EFBIG);
 796         }
 797         VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
 798
 799         error = advisory_read(vp, fcb->fcbEOF, ra->ra_offset, ra->ra_count, devBlockSize);
 800         VOP_UNLOCK(vp, 0, ap->a_p);
 801
 802         DBG_VOP_LOCKS_TEST(error);
 803         return (error);
 804             }
 805
 806         case 2: /* F_READBOOTBLOCKS */
 807         case 3: /* F_WRITEBOOTBLOCKS */
 808             {
 809             struct vnode *vp = ap->a_vp;
 810             struct hfsnode *hp = VTOH(vp);
 811             struct fbootstraptransfer *btd = (struct fbootstraptransfer *)ap->a_data;
 812             int devBlockSize;
 813             int error;
 814             struct iovec aiov;
 815             struct uio auio;
 816             u_long blockNumber;
 817             u_long blockOffset;
 818             u_long xfersize;
 819             struct buf *bp;
 820
 821             if ((vp->v_flag & VROOT) == 0) return EINVAL;
 822             if (btd->fbt_offset + btd->fbt_length > 1024) return EINVAL;
 823
 824             aiov.iov_base = btd->fbt_buffer;
 825             aiov.iov_len = btd->fbt_length;
 826
 827             auio.uio_iov = &aiov;
 828             auio.uio_iovcnt = 1;
 829             auio.uio_offset = btd->fbt_offset;
 830             auio.uio_resid = btd->fbt_length;
 831             auio.uio_segflg = UIO_USERSPACE;
 832             auio.uio_rw = (ap->a_command == 3) ? UIO_WRITE : UIO_READ; /* F_WRITEBOOTSTRAP / F_READBOOTSTRAP */
 833             auio.uio_procp = ap->a_p;
 834
 835             VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
 836
 837             while (auio.uio_resid > 0) {
 838               blockNumber = auio.uio_offset / devBlockSize;
 839               error = bread(hp->h_meta->h_devvp, blockNumber, devBlockSize, ap->a_cred, &bp);
 840               if (error) {
 841                   if (bp) brelse(bp);
 842                   return error;
 843                 };
 844
 845                 blockOffset = auio.uio_offset % devBlockSize;
 846               xfersize = devBlockSize - blockOffset;
 847               error = uiomove((caddr_t)bp->b_data + blockOffset, (int)xfersize, &auio);
 848                 if (error) {
 849                   brelse(bp);
 850                   return error;
 851                 };
 852                 if (auio.uio_rw == UIO_WRITE) {
 853                   error = VOP_BWRITE(bp);
 854                   if (error) return error;
 855                 } else {
 856                   brelse(bp);
 857                 };
 858             };
 859         };
 860         return 0;
 861
 862         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
 863             {
 864             *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(ap->a_vp)->localCreateDate);
 865             return 0;
 866             }
 867
 868         default:
 869             DBG_VOP_LOCKS_TEST(ENOTTY);
 870             return (ENOTTY);
 871     }
 872
 873     /* Should never get here */
 874         return 0;
 875 }
 876
 877 /* ARGSUSED */
 878 int
 879 hfs_select(ap)
 880 struct vop_select_args /* {
 881     struct vnode *a_vp;
 882     int  a_which;
 883     int  a_fflags;
 884     struct ucred *a_cred;
 885         void *a_wql;
 886     struct proc *a_p;
 887 } */ *ap;
 888 {
 889     DBG_FUNC_NAME("hfs_select");
 890     DBG_VOP_LOCKS_DECL(1);
 891     DBG_VOP_PRINT_FUNCNAME();
 892     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
 893
 894     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
 895
 896     /*
 897      * We should really check to see if I/O is possible.
 898      */
 899     DBG_VOP_LOCKS_TEST(1);
 900     return (1);
 901 }
 902
 903
 904
 905 /*
 906  * Mmap a file
 907  *
 908  * NB Currently unsupported.
 909 # XXX - not used
 910 #
 911  vop_mmap {
 912      IN struct vnode *vp;
 913      IN int fflags;
 914      IN struct ucred *cred;
 915      IN struct proc *p;
 916
 917      */
 918
 919 /* ARGSUSED */
 920
 921 int
 922 hfs_mmap(ap)
 923 struct vop_mmap_args /* {
 924     struct vnode *a_vp;
 925     int  a_fflags;
 926     struct ucred *a_cred;
 927     struct proc *a_p;
 928 } */ *ap;
 929 {
 930     DBG_FUNC_NAME("hfs_mmap");
 931     DBG_VOP_LOCKS_DECL(1);
 932     DBG_VOP_PRINT_FUNCNAME();
 933     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
 934
 935     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
 936
 937     DBG_VOP_LOCKS_TEST(EINVAL);
 938     return (EINVAL);
 939 }
 940
 941
 942
 943 /*
 944  * Seek on a file
 945  *
 946  * Nothing to do, so just return.
 947 # XXX - not used
 948 # Needs work: Is newoff right?  What's it mean?
 949 #
 950  vop_seek {
 951      IN struct vnode *vp;
 952      IN off_t oldoff;
 953      IN off_t newoff;
 954      IN struct ucred *cred;
 955      */
 956 /* ARGSUSED */
 957 int
 958 hfs_seek(ap)
 959 struct vop_seek_args /* {
 960     struct vnode *a_vp;
 961     off_t  a_oldoff;
 962     off_t  a_newoff;
 963     struct ucred *a_cred;
 964 } */ *ap;
 965 {
 966     DBG_FUNC_NAME("hfs_seek");
 967     DBG_VOP_LOCKS_DECL(1);
 968     DBG_VOP_PRINT_FUNCNAME();
 969     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
 970
 971     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
 972
 973     DBG_VOP_LOCKS_TEST(E_NONE);
 974     return (E_NONE);
 975 }
 976
 977
 978 /*
 979  * Bmap converts a the logical block number of a file to its physical block
 980  * number on the disk.
 981  */
 982
 983 /*
 984  * vp  - address of vnode file the file
 985  * bn  - which logical block to convert to a physical block number.
 986  * vpp - returns the vnode for the block special file holding the filesystem
 987  *       containing the file of interest
 988  * bnp - address of where to return the filesystem physical block number
 989 #% bmap         vp      L L L
 990 #% bmap         vpp     - U -
 991 #
 992  vop_bmap {
 993      IN struct vnode *vp;
 994      IN daddr_t bn;
 995      OUT struct vnode **vpp;
 996      IN daddr_t *bnp;
 997      OUT int *runp;
 998      */
 999 /*
1000  * Converts a logical block number to a physical block, and optionally returns
1001  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1002  * The physical block number is based on the device block size, currently its 512.
1003  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1004  */
1005
1006 int
1007 hfs_bmap(ap)
1008 struct vop_bmap_args /* {
1009     struct vnode *a_vp;
1010     daddr_t a_bn;
1011     struct vnode **a_vpp;
1012     daddr_t *a_bnp;
1013     int *a_runp;
1014 } */ *ap;
1015 {
1016     struct hfsnode              *hp = VTOH(ap->a_vp);
1017     struct hfsmount     *hfsmp = VTOHFS(ap->a_vp);
1018     int                                 retval = E_NONE;
1019     daddr_t                             logBlockSize;
1020     size_t                              bytesContAvail = 0;
1021     off_t blockposition;
1022     struct proc                 *p = NULL;
1023     int                                 lockExtBtree;
1024     struct rl_entry *invalid_range;
1025     enum rl_overlaptype overlaptype;
1026
1027 #define DEBUG_BMAP 0
1028 #if DEBUG_BMAP
1029     DBG_FUNC_NAME("hfs_bmap");
1030     DBG_VOP_LOCKS_DECL(2);
1031     DBG_VOP_PRINT_FUNCNAME();
1032     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);
1033
1034     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
1035     if (ap->a_vpp != NULL) {
1036         DBG_VOP_LOCKS_INIT(1,*ap->a_vpp, VOPDBG_IGNORE, VOPDBG_UNLOCKED, VOPDBG_IGNORE, VOPDBG_POS);
1037     } else {
1038         DBG_VOP_LOCKS_INIT(1,NULL, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_IGNORE, VOPDBG_POS);
1039         };
1040 #endif
1041
1042         DBG_IO(("\tMapped blk %d --> ", ap->a_bn));
1043     /*
1044      * Check for underlying vnode requests and ensure that logical
1045      * to physical mapping is requested.
1046      */
1047     if (ap->a_vpp != NULL)
1048         *ap->a_vpp = VTOH(ap->a_vp)->h_meta->h_devvp;
1049     if (ap->a_bnp == NULL)
1050         return (0);
1051
1052     logBlockSize = GetLogicalBlockSize(ap->a_vp);
1053     blockposition = (off_t)(ap->a_bn * logBlockSize);
1054
1055     lockExtBtree = hasOverflowExtents(hp);
1056     if (lockExtBtree)
1057     {
1058         p = current_proc();
1059         retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p);
1060         if (retval)
1061             return (retval);
1062     }
1063
1064     retval = MacToVFSError(
1065                             MapFileBlockC (HFSTOVCB(hfsmp),
1066                                             HTOFCB(hp),
1067                                             MAXPHYSIO,
1068                                             blockposition,
1069                                             ap->a_bnp,
1070                                             &bytesContAvail));
1071
1072     if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1073
1074     if (retval == E_NONE) {
1075         /* Adjust the mapping information for invalid file ranges: */
1076         overlaptype = rl_scan(&hp->h_invalidranges,
1077                             blockposition,
1078                             blockposition + MAXPHYSIO - 1,
1079                             &invalid_range);
1080         if (overlaptype != RL_NOOVERLAP) {
1081             switch(overlaptype) {
1082                 case RL_MATCHINGOVERLAP:
1083                 case RL_OVERLAPCONTAINSRANGE:
1084                 case RL_OVERLAPSTARTSBEFORE:
1085                     /* There's no valid block for this byte offset: */
1086                     *ap->a_bnp = (daddr_t)-1;
1087                     bytesContAvail = invalid_range->rl_end + 1 - blockposition;
1088                     break;
1089
1090                 case RL_OVERLAPISCONTAINED:
1091                 case RL_OVERLAPENDSAFTER:
1092                     /* The range of interest hits an invalid block before the end: */
1093                     if (invalid_range->rl_start == blockposition) {
1094                         /* There's actually no valid information to be had starting here: */
1095                         *ap->a_bnp = (daddr_t)-1;
1096                                                 if ((HTOFCB(hp)->fcbEOF > (invalid_range->rl_end + 1)) &&
1097                                                         (invalid_range->rl_end + 1 - blockposition < bytesContAvail)) {
1098                                 bytesContAvail = invalid_range->rl_end + 1 - blockposition;
1099                         };
1100                     } else {
1101                         bytesContAvail = invalid_range->rl_start - blockposition;
1102                     };
1103                     break;
1104             };
1105                         if (bytesContAvail > MAXPHYSIO) bytesContAvail = MAXPHYSIO;
1106         };
1107
1108         /* Figure out how many read ahead blocks there are */
1109         if (ap->a_runp != NULL) {
1110             if (can_cluster(logBlockSize)) {
1111                 /* Make sure this result never goes negative: */
1112                 *ap->a_runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1113             } else {
1114                 *ap->a_runp = 0;
1115             };
1116         };
1117     };
1118
1119     DBG_IO(("%d:%d.\n", *ap->a_bnp, (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1));
1120
1121 #if DEBUG_BMAP
1122
1123     DBG_VOP_LOCKS_TEST(retval);
1124 #endif
1125
1126     if (ap->a_runp) {
1127         DBG_ASSERT((*ap->a_runp * logBlockSize) < bytesContAvail);                                                      /* At least *ap->a_runp blocks left and ... */
1128         if (can_cluster(logBlockSize)) {
1129             DBG_ASSERT(bytesContAvail - (*ap->a_runp * logBlockSize) < (2*logBlockSize));       /* ... at most 1 logical block accounted for by current block */
1130                                                                                             /* ... plus some sub-logical block sized piece */
1131         };
1132     };
1133
1134     return (retval);
1135 }
1136
1137 /* blktooff converts logical block number to file offset */
1138
1139 int
1140 hfs_blktooff(ap)
1141 struct vop_blktooff_args /* {
1142     struct vnode *a_vp;
1143     daddr_t a_lblkno;
1144     off_t *a_offset;
1145 } */ *ap;
1146 {
1147         if (ap->a_vp == NULL)
1148                 return (EINVAL);
1149         *ap->a_offset = (off_t)ap->a_lblkno * PAGE_SIZE_64;
1150
1151         return(0);
1152 }
1153
1154 int
1155 hfs_offtoblk(ap)
1156 struct vop_offtoblk_args /* {
1157         struct vnode *a_vp;
1158         off_t a_offset;
1159         daddr_t *a_lblkno;
1160 } */ *ap;
1161 {
1162         long lbsize, boff;
1163
1164         if (ap->a_vp == NULL)
1165                 return (EINVAL);
1166         *ap->a_lblkno = ap->a_offset / PAGE_SIZE_64;
1167
1168         return(0);
1169 }
1170
1171 int
1172 hfs_cmap(ap)
1173 struct vop_cmap_args /* {
1174         struct vnode *a_vp;
1175         off_t a_foffset;
1176         size_t a_size;
1177         daddr_t *a_bpn;
1178         size_t *a_run;
1179         void *a_poff;
1180 } */ *ap;
1181 {
1182     struct hfsnode      *hp = VTOH(ap->a_vp);
1183     struct hfsmount     *hfsmp = VTOHFS(ap->a_vp);
1184     FCB                                 *fcb = HTOFCB(hp);
1185     size_t                              bytesContAvail = 0;
1186     int                 retval = E_NONE;
1187     int                                 lockExtBtree;
1188     struct proc         *p = NULL;
1189     struct rl_entry *invalid_range;
1190     enum rl_overlaptype overlaptype;
1191     off_t limit;
1192
1193 #define DEBUG_CMAP 0
1194 #if DEBUG_CMAP
1195     DBG_FUNC_NAME("hfs_cmap");
1196     DBG_VOP_LOCKS_DECL(2);
1197     DBG_VOP_PRINT_FUNCNAME();
1198     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);
1199
1200     DBG_VOP_LOCKS_INIT(0, ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
1201 #endif
1202
1203     DBG_IO(("\tMapped offset %qx --> ", ap->a_foffset));
1204     /*
1205      * Check for underlying vnode requests and ensure that logical
1206      * to physical mapping is requested.
1207      */
1208     if (ap->a_bpn == NULL) {
1209         return (0);
1210     };
1211
1212     if (lockExtBtree = hasOverflowExtents(hp))
1213     {
1214         p = current_proc();
1215         if (retval = hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_EXCLUSIVE | LK_CANRECURSE, p)) {
1216             return (retval);
1217         };
1218     }
1219     retval = MacToVFSError(
1220                            MapFileBlockC (HFSTOVCB(hfsmp),
1221                                           fcb,
1222                                           ap->a_size,
1223                                           ap->a_foffset,
1224                                           ap->a_bpn,
1225                                           &bytesContAvail));
1226
1227     if (lockExtBtree) (void) hfs_metafilelocking(hfsmp, kHFSExtentsFileID, LK_RELEASE, p);
1228
1229     if (retval == E_NONE) {
1230         /* Adjust the mapping information for invalid file ranges: */
1231         overlaptype = rl_scan(&hp->h_invalidranges,
1232                             ap->a_foffset,
1233                             ap->a_foffset + (off_t)bytesContAvail - 1,
1234                             &invalid_range);
1235         if (overlaptype != RL_NOOVERLAP) {
1236             switch(overlaptype) {
1237                 case RL_MATCHINGOVERLAP:
1238                 case RL_OVERLAPCONTAINSRANGE:
1239                 case RL_OVERLAPSTARTSBEFORE:
1240                     /* There's no valid block for this byte offset: */
1241                     *ap->a_bpn = (daddr_t)-1;
1242
1243                     /* There's no point limiting the amount to be returned if the
1244                        invalid range that was hit extends all the way to the EOF
1245                        (i.e. there's no valid bytes between the end of this range
1246                        and the file's EOF):
1247                      */
1248                     if ((fcb->fcbEOF > (invalid_range->rl_end + 1)) &&
1249                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1250                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1251                     };
1252                     break;
1253
1254                 case RL_OVERLAPISCONTAINED:
1255                 case RL_OVERLAPENDSAFTER:
1256                     /* The range of interest hits an invalid block before the end: */
1257                     if (invalid_range->rl_start == ap->a_foffset) {
1258                         /* There's actually no valid information to be had starting here: */
1259                         *ap->a_bpn = (daddr_t)-1;
1260                                                 if ((fcb->fcbEOF > (invalid_range->rl_end + 1)) &&
1261                                                         (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1262                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1263                         };
1264                     } else {
1265                         bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1266                     };
1267                     break;
1268             };
1269             if (bytesContAvail > ap->a_size) bytesContAvail = ap->a_size;
1270         };
1271
1272         if (ap->a_run) *ap->a_run = bytesContAvail;
1273     };
1274
1275     if (ap->a_poff) *(int *)ap->a_poff = 0;
1276
1277     DBG_IO(("%d:%d.\n", *ap->a_bpn, bytesContAvail));
1278
1279 #if DEBUG_BMAP
1280
1281     DBG_VOP_LOCKS_TEST(retval);
1282 #endif
1283
1284     return (retval);
1285
1286 }
1287
1288 /*
1289  * Calculate the logical to physical mapping if not done already,
1290  * then call the device strategy routine.
1291 #
1292 #vop_strategy {
1293 #       IN struct buf *bp;
1294     */
1295 int
1296 hfs_strategy(ap)
1297 struct vop_strategy_args /* {
1298     struct buf *a_bp;
1299 } */ *ap;
1300 {
1301     register struct buf *bp = ap->a_bp;
1302     register struct vnode *vp = bp->b_vp;
1303     register struct hfsnode *hp;
1304     int retval = 0;
1305
1306         DBG_FUNC_NAME("hfs_strategy");
1307
1308 //      DBG_VOP_PRINT_FUNCNAME();DBG_VOP_CONT(("\n"));
1309
1310     hp = VTOH(vp);
1311
1312     if ( !(bp->b_flags & B_VECTORLIST)) {
1313
1314         if (vp->v_type == VBLK || vp->v_type == VCHR)
1315             panic("hfs_strategy: device vnode passed!");
1316
1317         if (bp->b_flags & B_PAGELIST) {
1318             /*
1319              * if we have a page list associated with this bp,
1320              * then go through cluster_bp since it knows how to
1321              * deal with a page request that might span non-contiguous
1322              * physical blocks on the disk...
1323              */
1324             retval = cluster_bp(bp);
1325             vp = hp->h_meta->h_devvp;
1326             bp->b_dev = vp->v_rdev;
1327
1328             return (retval);
1329         }
1330         /*
1331          * If we don't already know the filesystem relative block number
1332          * then get it using VOP_BMAP().  If VOP_BMAP() returns the block
1333          * number as -1 then we've got a hole in the file.  Although HFS
1334          * filesystems don't create files with holes, invalidating of
1335          * subranges of the file (lazy zero filling) may create such a
1336          * situation.
1337          */
1338         if (bp->b_blkno == bp->b_lblkno) {
1339             if ((retval = VOP_BMAP(vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL))) {
1340                 bp->b_error = retval;
1341                 bp->b_flags |= B_ERROR;
1342                 biodone(bp);
1343                 return (retval);
1344             }
1345             if ((long)bp->b_blkno == -1)
1346                 clrbuf(bp);
1347         }
1348         if ((long)bp->b_blkno == -1) {
1349             biodone(bp);
1350             return (0);
1351         }
1352         if (bp->b_validend == 0) {
1353             /* Record the exact size of the I/O transfer about to be made: */
1354             DBG_ASSERT(bp->b_validoff == 0);
1355             bp->b_validend = bp->b_bcount;
1356             DBG_ASSERT(bp->b_dirtyoff == 0);
1357         };
1358     }
1359     vp = hp->h_meta->h_devvp;
1360     bp->b_dev = vp->v_rdev;
1361     DBG_IO(("\t\t>>>%s: continuing w/ vp: 0x%x with logBlk Ox%X and phyBlk Ox%X\n", funcname, (u_int)vp, bp->b_lblkno, bp->b_blkno));
1362
1363     return VOCALL (vp->v_op, VOFFSET(vop_strategy), ap);
1364 }
1365
1366
1367 /*
1368 #% reallocblks  vp      L L L
1369 #
1370  vop_reallocblks {
1371      IN struct vnode *vp;
1372      IN struct cluster_save *buflist;
1373
1374      */
1375
1376 int
1377 hfs_reallocblks(ap)
1378 struct vop_reallocblks_args /* {
1379     struct vnode *a_vp;
1380     struct cluster_save *a_buflist;
1381 } */ *ap;
1382 {
1383     DBG_FUNC_NAME("hfs_reallocblks");
1384     DBG_VOP_LOCKS_DECL(1);
1385     DBG_VOP_PRINT_FUNCNAME();
1386     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
1387
1388     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
1389
1390     /* Currently no support for clustering */           /* XXX */
1391     DBG_VOP_LOCKS_TEST(ENOSPC);
1392     return (ENOSPC);
1393 }
1394
1395
1396
1397 /*
1398 #
1399 #% truncate     vp      L L L
1400 #
1401 vop_truncate {
1402     IN struct vnode *vp;
1403     IN off_t length;
1404     IN int flags;       (IO_SYNC)
1405     IN struct ucred *cred;
1406     IN struct proc *p;
1407 };
1408  * Truncate the hfsnode hp to at most length size, freeing (or adding) the
1409  * disk blocks.
1410  */
1411 int hfs_truncate(ap)
1412     struct vop_truncate_args /* {
1413         struct vnode *a_vp;
1414         off_t a_length;
1415         int a_flags;
1416         struct ucred *a_cred;
1417         struct proc *a_p;
1418     } */ *ap;
1419 {
1420     register struct vnode *vp = ap->a_vp;
1421     register struct hfsnode *hp = VTOH(vp);
1422     off_t length = ap->a_length;
1423     long vflags;
1424     struct timeval tv;
1425     int retval;
1426     FCB *fcb;
1427     off_t bytesToAdd;
1428     off_t actualBytesAdded;
1429     DBG_FUNC_NAME("hfs_truncate");
1430     DBG_VOP_LOCKS_DECL(1);
1431     DBG_VOP_PRINT_FUNCNAME();
1432     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
1433     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
1434
1435 #if HFS_DIAGNOSTIC
1436     debug_check_blocksizes(ap->a_vp);
1437 #endif
1438
1439     fcb = HTOFCB(hp);
1440
1441     KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1442                  (int)length, fcb->fcbEOF, fcb->fcbPLen, 0, 0);
1443
1444     if (length < 0) {
1445         DBG_VOP_LOCKS_TEST(EINVAL);
1446         return (EINVAL);
1447     }
1448
1449     if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE)) {
1450         DBG_VOP_LOCKS_TEST(EFBIG);
1451         return (EFBIG);
1452     }
1453
1454     if (vp->v_type != VREG && vp->v_type != VLNK) {
1455         DBG_VOP_LOCKS_TEST(EISDIR);
1456         return (EISDIR);                /* hfs doesn't support truncating of directories */
1457     }
1458
1459     tv = time;
1460     retval = E_NONE;
1461
1462     DBG_RW(("%s: truncate from Ox%lX to Ox%X bytes\n", funcname, fcb->fcbPLen, length));
1463
1464     /*
1465      * we cannot just check if fcb->fcbEOF == length (as an optimization)
1466      * since there may be extra physical blocks that also need truncation
1467      */
1468
1469     /*
1470      * Lengthen the size of the file. We must ensure that the
1471      * last byte of the file is allocated. Since the smallest
1472      * value of fcbEOF is 0, length will be at least 1.
1473      */
1474     if (length > fcb->fcbEOF) {
1475                 off_t filePosition;
1476                 daddr_t logBlockNo;
1477                 long logBlockSize;
1478                 long blkOffset;
1479                 off_t bytestoclear;
1480                 int blockZeroCount;
1481                 struct buf *bp=NULL;
1482
1483         /*
1484          * If we don't have enough physical space then
1485          * we need to extend the physical size.
1486          */
1487         if (length > fcb->fcbPLen) {
1488             /* lock extents b-tree (also protects volume bitmap) */
1489             retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1490             if (retval)
1491                 goto Err_Exit;
1492
1493             while ((length > fcb->fcbPLen) && (retval == E_NONE)) {
1494                 bytesToAdd = length - fcb->fcbPLen;
1495                 retval = MacToVFSError(
1496                                        ExtendFileC (HTOVCB(hp),
1497                                                     fcb,
1498                                                     bytesToAdd,
1499                                                     0,
1500                                                     kEFAllMask, /* allocate all requested bytes or none */
1501                                                     &actualBytesAdded));
1502
1503                 if (actualBytesAdded == 0 && retval == E_NONE) {
1504                     if (length > fcb->fcbPLen)
1505                         length = fcb->fcbPLen;
1506                     break;
1507                 }
1508             }
1509             (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1510             if (retval)
1511                 goto Err_Exit;
1512
1513             DBG_ASSERT(length <= fcb->fcbPLen);
1514             KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1515                          (int)length, fcb->fcbEOF, fcb->fcbPLen, 0, 0);
1516         }
1517
1518         if (! (ap->a_flags & IO_NOZEROFILL)) {
1519
1520             if (UBCISVALID(vp) && retval == E_NONE) {
1521                         struct rl_entry *invalid_range;
1522                 int devBlockSize;
1523                         off_t zero_limit;
1524
1525                         zero_limit = (fcb->fcbEOF + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1526                         if (length < zero_limit) zero_limit = length;
1527
1528                         if (length > fcb->fcbEOF) {
1529                                 /* Extending the file: time to fill out the current last page w. zeroes? */
1530                                 if ((fcb->fcbEOF & PAGE_MASK_64) &&
1531                                         (rl_scan(&hp->h_invalidranges,
1532                                                          fcb->fcbEOF & ~PAGE_MASK_64,
1533                                                          fcb->fcbEOF - 1,
1534                                                          &invalid_range) == RL_NOOVERLAP)) {
1535
1536                                                 /* There's some valid data at the start of the (current) last page
1537                                                    of the file, so zero out the remainder of that page to ensure the
1538                                                    entire page contains valid data.  Since there is no invalid range
1539                                                    possible past the (current) eof, there's no need to remove anything
1540                                                    from the invalid range list before calling cluster_write():                                           */
1541                                                 VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
1542                                                 retval = cluster_write(vp, (struct uio *) 0, fcb->fcbEOF, zero_limit,
1543                                                                                                 fcb->fcbEOF, (off_t)0, devBlockSize, (ap->a_flags & IO_SYNC) | IO_HEADZEROFILL);
1544                                                 if (retval) goto Err_Exit;
1545
1546                                                 /* Merely invalidate the remaining area, if necessary: */
1547                                                 if (length > zero_limit) rl_add(zero_limit, length - 1, &hp->h_invalidranges);
1548                                 } else {
1549                                         /* The page containing the (current) eof is invalid: just add the
1550                                            remainder of the page to the invalid list, along with the area
1551                                            being newly allocated:
1552                                          */
1553                                         rl_add(fcb->fcbEOF, length - 1, &hp->h_invalidranges);
1554                                 };
1555                         }
1556             } else {
1557
1558 #if 0
1559                     /*
1560                      * zero out any new logical space...
1561                      */
1562                     bytestoclear = length - fcb->fcbEOF;
1563                     filePosition = fcb->fcbEOF;
1564
1565                     while (bytestoclear > 0) {
1566                         logBlockNo   = (daddr_t)(filePosition / PAGE_SIZE_64);
1567                         blkOffset    = (long)(filePosition & PAGE_MASK_64);
1568
1569                         if (((off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * (off_t)PAGE_SIZE)) < PAGE_SIZE_64)
1570                             logBlockSize = (off_t)(fcb->fcbPLen) - ((off_t)logBlockNo * PAGE_SIZE_64);
1571                         else
1572                             logBlockSize = PAGE_SIZE;
1573
1574                         if (logBlockSize < blkOffset)
1575                             panic("hfs_truncate: bad logBlockSize computed\n");
1576
1577                         blockZeroCount = MIN(bytestoclear, logBlockSize - blkOffset);
1578
1579                         if (blkOffset == 0 && ((bytestoclear >= logBlockSize) || filePosition >= fcb->fcbEOF)) {
1580                             bp = getblk(vp, logBlockNo, logBlockSize, 0, 0, BLK_WRITE);
1581                             retval = 0;
1582
1583                         } else {
1584                             retval = bread(vp, logBlockNo, logBlockSize, ap->a_cred, &bp);
1585                             if (retval) {
1586                                 brelse(bp);
1587                                 goto Err_Exit;
1588                             }
1589                         }
1590                         bzero((char *)bp->b_data + blkOffset, blockZeroCount);
1591
1592                         bp->b_flags |= B_DIRTY | B_AGE;
1593
1594                         if (ap->a_flags & IO_SYNC)
1595                             VOP_BWRITE(bp);
1596                         else if (logBlockNo % 32)
1597                             bawrite(bp);
1598                         else
1599                             VOP_BWRITE(bp);     /* wait after we issue 32 requests */
1600
1601                         bytestoclear -= blockZeroCount;
1602                         filePosition += blockZeroCount;
1603                     }
1604 #else
1605                         panic("hfs_truncate: invoked on non-UBC object?!");
1606 #endif
1607             };
1608         }
1609         fcb->fcbEOF = length;
1610
1611         if (UBCISVALID(vp))
1612                 ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
1613
1614     } else { /* Shorten the size of the file */
1615
1616         if (fcb->fcbEOF > length) {
1617             /*
1618              * Any buffers that are past the truncation point need to be
1619              * invalidated (to maintain buffer cache consistency).  For
1620              * simplicity, we invalidate all the buffers by calling vinvalbuf.
1621              */
1622             if (UBCISVALID(vp))
1623                 ubc_setsize(vp, (off_t)length); /* XXX check errors */
1624
1625             vflags = ((length > 0) ? V_SAVE : 0)  | V_SAVEMETA;
1626             retval = vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1627
1628             /* Any space previously marked as invalid is now irrelevant: */
1629             rl_remove(length, fcb->fcbEOF - 1, &hp->h_invalidranges);
1630         }
1631
1632         /*
1633          * For a TBE process the deallocation of the file blocks is
1634          * delayed until the file is closed.  And hfs_close calls
1635          * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
1636          * isn't set, we make sure this isn't a TBE process.
1637          */
1638         if ((ap->a_flags & IO_NDELAY) || (!ISSET(ap->a_p->p_flag, P_TBE))) {
1639
1640             /* lock extents b-tree (also protects volume bitmap) */
1641             retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1642             if (retval)
1643                 goto Err_Exit;
1644             retval = MacToVFSError(
1645                                TruncateFileC(
1646                                              HTOVCB(hp),
1647                                              fcb,
1648                                              length,
1649                                              false));
1650             (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1651             if (retval)
1652                 goto Err_Exit;
1653         }
1654         fcb->fcbEOF = length;
1655
1656         if (fcb->fcbFlags & fcbModifiedMask)
1657             hp->h_nodeflags |= IN_MODIFIED;
1658     }
1659     hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
1660     retval = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1661     if (retval) {
1662         DBG_ERR(("Could not update truncate"));
1663                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1664                      -1, -1, -1, retval, 0);
1665     }
1666 Err_Exit:;
1667
1668 #if HFS_DIAGNOSTIC
1669     debug_check_blocksizes(ap->a_vp);
1670 #endif
1671
1672     KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
1673                  (int)length, fcb->fcbEOF, fcb->fcbPLen, retval, 0);
1674
1675     DBG_VOP_LOCKS_TEST(retval);
1676     return (retval);
1677 }
1678
1679
1680
1681 /*
1682 #
1683 #% allocate     vp      L L L
1684 #
1685 vop_allocate {
1686         IN struct vnode *vp;
1687         IN off_t length;
1688         IN int flags;
1689         OUT off_t *bytesallocated;
1690         IN off_t offset;
1691         IN struct ucred *cred;
1692         IN struct proc *p;
1693 };
1694  * allocate the hfsnode hp to at most length size
1695  */
1696 int hfs_allocate(ap)
1697     struct vop_allocate_args /* {
1698         struct vnode *a_vp;
1699         off_t a_length;
1700         u_int32_t  a_flags;
1701         off_t *a_bytesallocated;
1702         off_t a_offset;
1703         struct ucred *a_cred;
1704         struct proc *a_p;
1705     } */ *ap;
1706 {
1707     register struct vnode *vp = ap->a_vp;
1708     register struct hfsnode *hp = VTOH(vp);
1709     off_t       length = ap->a_length;
1710     off_t       startingPEOF;
1711     off_t       moreBytesRequested;
1712     off_t       actualBytesAdded;
1713     long vflags;
1714     struct timeval tv;
1715     int retval, retval2;
1716     FCB *fcb;
1717     UInt32 blockHint;
1718     UInt32 extendFlags =0;   /* For call to ExtendFileC */
1719     DBG_FUNC_NAME("hfs_allocate");
1720     DBG_VOP_LOCKS_DECL(1);
1721     DBG_VOP_PRINT_FUNCNAME();
1722     DBG_VOP_PRINT_VNODE_INFO(ap->a_vp);DBG_VOP_CONT(("\n"));
1723     DBG_VOP_LOCKS_INIT(0,ap->a_vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
1724
1725     /* Set the number of bytes allocated to 0 so that the caller will know that we
1726        did nothing.  ExtendFileC will fill this in for us if we actually allocate space */
1727
1728     *(ap->a_bytesallocated) = 0;
1729     fcb = HTOFCB(hp);
1730
1731     /* Now for some error checking */
1732
1733     if (length < (off_t)0) {
1734         DBG_VOP_LOCKS_TEST(EINVAL);
1735         return (EINVAL);
1736     }
1737
1738     if (vp->v_type != VREG && vp->v_type != VLNK) {
1739         DBG_VOP_LOCKS_TEST(EISDIR);
1740         return (EISDIR);        /* hfs doesn't support truncating of directories */
1741     }
1742
1743     if ((ap->a_flags & ALLOCATEFROMVOL) && (length <= fcb->fcbPLen))
1744         return (EINVAL);
1745
1746     /* Fill in the flags word for the call to Extend the file */
1747
1748         if (ap->a_flags & ALLOCATECONTIG) {
1749                 extendFlags |= kEFContigMask;
1750         }
1751
1752     if (ap->a_flags & ALLOCATEALL) {
1753                 extendFlags |= kEFAllMask;
1754         }
1755
1756     tv = time;
1757     retval = E_NONE;
1758     blockHint = 0;
1759     startingPEOF = fcb->fcbPLen;
1760
1761     if (ap->a_flags & ALLOCATEFROMPEOF) {
1762                 length += fcb->fcbPLen;
1763         }
1764
1765         if (ap->a_flags & ALLOCATEFROMVOL)
1766                 blockHint = ap->a_offset / HTOVCB(hp)->blockSize;
1767
1768     /* If no changes are necesary, then we're done */
1769     if (fcb->fcbPLen == length)
1770         goto Std_Exit;
1771
1772     /*
1773     * Lengthen the size of the file. We must ensure that the
1774     * last byte of the file is allocated. Since the smallest
1775     * value of fcbPLen is 0, length will be at least 1.
1776     */
1777     if (length > fcb->fcbPLen) {
1778                 moreBytesRequested = length - fcb->fcbPLen;
1779
1780                 /* lock extents b-tree (also protects volume bitmap) */
1781                 retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1782                 if (retval) goto Err_Exit;
1783
1784                 retval = MacToVFSError(
1785                                                                 ExtendFileC(HTOVCB(hp),
1786                                                                                         fcb,
1787                                                                                         moreBytesRequested,
1788                                                                                         blockHint,
1789                                                                                         extendFlags,
1790                                                                                         &actualBytesAdded));
1791
1792                 *(ap->a_bytesallocated) = actualBytesAdded;
1793
1794                 (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1795
1796                 DBG_ASSERT(length <= fcb->fcbPLen);
1797
1798                 /*
1799                  * if we get an error and no changes were made then exit
1800                  * otherwise we must do the VOP_UPDATE to reflect the changes
1801                  */
1802         if (retval && (startingPEOF == fcb->fcbPLen)) goto Err_Exit;
1803
1804         /*
1805          * Adjust actualBytesAdded to be allocation block aligned, not
1806          * clump size aligned.
1807          * NOTE: So what we are reporting does not affect reality
1808          * until the file is closed, when we truncate the file to allocation
1809          * block size.
1810          */
1811
1812                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
1813                         *(ap->a_bytesallocated) =
1814                                 roundup(moreBytesRequested, (off_t)VTOVCB(vp)->blockSize);
1815
1816     } else { /* Shorten the size of the file */
1817
1818         if (fcb->fcbEOF > length) {
1819                         /*
1820                          * Any buffers that are past the truncation point need to be
1821                          * invalidated (to maintain buffer cache consistency).  For
1822                          * simplicity, we invalidate all the buffers by calling vinvalbuf.
1823                          */
1824                         vflags = ((length > 0) ? V_SAVE : 0) | V_SAVEMETA;
1825                         (void) vinvalbuf(vp, vflags, ap->a_cred, ap->a_p, 0, 0);
1826                 }
1827
1828        /* lock extents b-tree (also protects volume bitmap) */
1829         retval = hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_EXCLUSIVE, ap->a_p);
1830         if (retval) goto Err_Exit;
1831
1832         retval = MacToVFSError(
1833                             TruncateFileC(
1834                                             HTOVCB(hp),
1835                                             fcb,
1836                                             length,
1837                                             false));
1838         (void) hfs_metafilelocking(HTOHFS(hp), kHFSExtentsFileID, LK_RELEASE, ap->a_p);
1839
1840                 /*
1841                  * if we get an error and no changes were made then exit
1842                  * otherwise we must do the VOP_UPDATE to reflect the changes
1843                  */
1844                 if (retval && (startingPEOF == fcb->fcbPLen)) goto Err_Exit;
1845         if (fcb->fcbFlags & fcbModifiedMask)
1846            hp->h_nodeflags |= IN_MODIFIED;
1847
1848         DBG_ASSERT(length <= fcb->fcbPLen)  // DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG DEBUG
1849
1850         if (fcb->fcbEOF > fcb->fcbPLen) {
1851                         fcb->fcbEOF = fcb->fcbPLen;
1852
1853                         if (UBCISVALID(vp))
1854                                 ubc_setsize(vp, (off_t)fcb->fcbEOF); /* XXX check errors */
1855         }
1856     }
1857
1858 Std_Exit:
1859     hp->h_nodeflags |= IN_CHANGE | IN_UPDATE;
1860         retval2 = VOP_UPDATE(vp, &tv, &tv, MNT_WAIT);
1861
1862     if (retval == 0) retval = retval2;
1863
1864 Err_Exit:
1865     DBG_VOP_LOCKS_TEST(retval);
1866     return (retval);
1867 }
1868
1869
1870
1871
1872 /* pagein for HFS filesystem, similar to hfs_read(), but without cluster_read() */
1873 int
1874 hfs_pagein(ap)
1875         struct vop_pagein_args /* {
1876                 struct vnode *a_vp,
1877                 upl_t         a_pl,
1878                 vm_offset_t   a_pl_offset,
1879                 off_t         a_f_offset,
1880                 size_t        a_size,
1881                 struct ucred *a_cred,
1882                 int           a_flags
1883         } */ *ap;
1884 {
1885     register struct vnode *vp;
1886     struct hfsnode        *hp;
1887     FCB                   *fcb;
1888     int                         devBlockSize = 0;
1889     int                    retval;
1890
1891     DBG_FUNC_NAME("hfs_pagein");
1892     DBG_VOP_LOCKS_DECL(1);
1893     DBG_VOP_PRINT_FUNCNAME();
1894     DBG_VOP_PRINT_VNODE_INFO(vp);DBG_VOP_CONT(("\n"));
1895     DBG_VOP_LOCKS_INIT(0,vp, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
1896
1897     vp  = ap->a_vp;
1898     hp  = VTOH(vp);
1899     fcb = HTOFCB(hp);
1900
1901     if (vp->v_type != VREG && vp->v_type != VLNK)
1902         panic("hfs_pagein: vp not UBC type\n");
1903
1904     DBG_VOP(("\tfile size Ox%X\n", (u_int)fcb->fcbEOF));
1905     DBG_VOP(("\tstarting at offset Ox%X of file, length Ox%X\n", (u_int)ap->a_f_offset, (u_int)ap->a_size));
1906
1907 #if HFS_DIAGNOSTIC
1908     debug_check_blocksizes(vp);
1909 #endif
1910
1911     VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
1912
1913     retval = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
1914                          ap->a_size, (off_t)fcb->fcbEOF, devBlockSize,
1915                          ap->a_flags);
1916
1917 #if HFS_DIAGNOSTIC
1918     debug_check_blocksizes(vp);
1919 #endif
1920     DBG_VOP_LOCKS_TEST(retval);
1921
1922     return (retval);
1923 }
1924
1925 /*
1926  * pageout for HFS filesystem.
1927  */
1928 int
1929 hfs_pageout(ap)
1930         struct vop_pageout_args /* {
1931            struct vnode *a_vp,
1932            upl_t         a_pl,
1933            vm_offset_t   a_pl_offset,
1934            off_t         a_f_offset,
1935            size_t        a_size,
1936            struct ucred *a_cred,
1937            int           a_flags
1938         } */ *ap;
1939 {
1940         struct vnode    *vp = ap->a_vp;
1941         struct hfsnode  *hp =  VTOH(vp);
1942         FCB             *fcb = HTOFCB(hp);
1943         int              retval;
1944         int              devBlockSize = 0;
1945         off_t            end_of_range;
1946
1947         DBG_FUNC_NAME("hfs_pageout");
1948         DBG_VOP_LOCKS_DECL(1);
1949         DBG_VOP_PRINT_FUNCNAME();
1950         DBG_VOP_PRINT_VNODE_INFO(vp);DBG_VOP_CONT(("\n"));
1951         DBG_VOP(("\thfsnode 0x%x (%s)\n", (u_int)hp, H_NAME(hp)));
1952         DBG_VOP(("\tstarting at offset Ox%lX of file, length Ox%lX\n",
1953                 (UInt32)ap->a_f_offset, (UInt32)ap->a_size));
1954
1955         DBG_VOP_LOCKS_INIT(0, vp, VOPDBG_LOCKED,
1956                 VOPDBG_LOCKED, VOPDBG_LOCKED, VOPDBG_POS);
1957
1958 #if HFS_DIAGNOSTIC
1959         debug_check_blocksizes(vp);
1960 #endif
1961
1962         if (UBCINVALID(vp))
1963                 panic("hfs_pageout: Not a  VREG: vp=%x", vp);
1964
1965         VOP_DEVBLOCKSIZE(hp->h_meta->h_devvp, &devBlockSize);
1966
1967         end_of_range = ap->a_f_offset + ap->a_size - 1;
1968
1969         if (end_of_range >= (off_t)fcb->fcbEOF)
1970                 end_of_range = (off_t)(fcb->fcbEOF - 1);
1971
1972         if (ap->a_f_offset < (off_t)fcb->fcbEOF)
1973                 rl_remove(ap->a_f_offset, end_of_range, &hp->h_invalidranges);
1974
1975         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset, ap->a_size,
1976                                  (off_t)fcb->fcbEOF, devBlockSize, ap->a_flags);
1977
1978         /*
1979          * If we successfully wrote any data, and we are not the superuser
1980          * we clear the setuid and setgid bits as a precaution against
1981          * tampering.
1982          */
1983         if (retval == 0 && ap->a_cred && ap->a_cred->cr_uid != 0)
1984                 hp->h_meta->h_mode &= ~(ISUID | ISGID);
1985
1986 #if HFS_DIAGNOSTIC
1987         debug_check_blocksizes(vp);
1988 #endif
1989
1990         DBG_VOP_LOCKS_TEST(retval);
1991         return (retval);
1992 }
1993
1994 /*
1995  * Intercept B-Tree node writes to unswap them if necessary.
1996 #
1997 #vop_bwrite {
1998 #       IN struct buf *bp;
1999  */
2000 int
2001 hfs_bwrite(ap)
2002 struct vop_bwrite_args /* {
2003     struct buf *a_bp;
2004 } */ *ap;
2005 {
2006     register struct buf *bp = ap->a_bp;
2007     register struct vnode *vp = bp->b_vp;
2008     BlockDescriptor block;
2009     int retval = 0;
2010
2011         DBG_FUNC_NAME("hfs_bwrite");
2012
2013 #if BYTE_ORDER == LITTLE_ENDIAN
2014     /* Trap B-Tree writes */
2015     if ((H_FILEID(VTOH(vp)) == kHFSExtentsFileID) ||
2016         (H_FILEID(VTOH(vp)) == kHFSCatalogFileID)) {
2017
2018         /* Swap if the B-Tree node is in native byte order */
2019         if (((UInt16 *)((char *)bp->b_data + bp->b_bcount - 2))[0] == 0x000e) {
2020             /* Prepare the block pointer */
2021             block.blockHeader = bp;
2022             block.buffer = bp->b_data + IOBYTEOFFSETFORBLK(bp->b_blkno, VTOHFS(vp)->hfs_phys_block_size);
2023             block.blockReadFromDisk = (bp->b_flags & B_CACHE) == 0;     /* not found in cache ==> came from disk */
2024             block.blockSize = bp->b_bcount;
2025
2026             /* Endian un-swap B-Tree node */
2027             SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), H_FILEID(VTOH(vp)), 1);
2028         }
2029
2030         /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2031     }
2032 #endif
2033
2034     retval = vn_bwrite (ap);
2035
2036     return (retval);
2037 }