bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/filedesc.h>
  36 #include <sys/stat.h>
  37 #include <sys/buf.h>
  38 #include <sys/proc.h>
  39 #include <sys/kauth.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/vfs_context.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45
  46 #include <sys/ubc.h>
  47 #include <vm/vm_pageout.h>
  48 #include <vm/vm_kern.h>
  49
  50 #include <sys/kdebug.h>
  51
  52 #include        "hfs.h"
  53 #include        "hfs_endian.h"
  54 #include  "hfs_fsctl.h"
  55 #include        "hfs_quota.h"
  56 #include        "hfscommon/headers/FileMgrInternal.h"
  57 #include        "hfscommon/headers/BTreesInternal.h"
  58 #include        "hfs_cnode.h"
  59 #include        "hfs_dbg.h"
  60
  61 extern int overflow_extents(struct filefork *fp);
  62
  63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  64
  65 enum {
  66         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  67 };
  68
  69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  70
  71 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  72
  73
  74 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  75 static int  hfs_clonefile(struct vnode *, int, int, int);
  76 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  77
  78
  79 /*****************************************************************************
  80 *
  81 *       I/O Operations on vnodes
  82 *
  83 *****************************************************************************/
  84 int  hfs_vnop_read(struct vnop_read_args *);
  85 int  hfs_vnop_write(struct vnop_write_args *);
  86 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  87 int  hfs_vnop_select(struct vnop_select_args *);
  88 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  89 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  90 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  91 int  hfs_vnop_strategy(struct vnop_strategy_args *);
  92 int  hfs_vnop_allocate(struct vnop_allocate_args *);
  93 int  hfs_vnop_pagein(struct vnop_pagein_args *);
  94 int  hfs_vnop_pageout(struct vnop_pageout_args *);
  95 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
  96
  97
  98 /*
  99  * Read data from a file.
 100  */
 101 int
 102 hfs_vnop_read(struct vnop_read_args *ap)
 103 {
 104         uio_t uio = ap->a_uio;
 105         struct vnode *vp = ap->a_vp;
 106         struct cnode *cp;
 107         struct filefork *fp;
 108         struct hfsmount *hfsmp;
 109         off_t filesize;
 110         off_t filebytes;
 111         off_t start_resid = uio_resid(uio);
 112         off_t offset = uio_offset(uio);
 113         int retval = 0;
 114
 115
 116         /* Preflight checks */
 117         if (!vnode_isreg(vp)) {
 118                 /* can only read regular files */
 119                 if (vnode_isdir(vp))
 120                         return (EISDIR);
 121                 else
 122                         return (EPERM);
 123         }
 124         if (start_resid == 0)
 125                 return (0);             /* Nothing left to do */
 126         if (offset < 0)
 127                 return (EINVAL);        /* cant read from a negative offset */
 128
 129         cp = VTOC(vp);
 130         fp = VTOF(vp);
 131         hfsmp = VTOHFS(vp);
 132
 133         /* Protect against a size change. */
 134         hfs_lock_truncate(cp, 0);
 135
 136         filesize = fp->ff_size;
 137         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 138         if (offset > filesize) {
 139                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 140                     (offset > (off_t)MAXHFSFILESIZE)) {
 141                         retval = EFBIG;
 142                 }
 143                 goto exit;
 144         }
 145
 146         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 147                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 148
 149         retval = cluster_read(vp, uio, filesize, 0);
 150
 151         cp->c_touch_acctime = TRUE;
 152
 153         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 154                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 155
 156         /*
 157          * Keep track blocks read
 158          */
 159         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 160                 int took_cnode_lock = 0;
 161                 off_t bytesread;
 162
 163                 bytesread = start_resid - uio_resid(uio);
 164
 165                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 166                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 167                         hfs_lock(cp, HFS_FORCE_LOCK);
 168                         took_cnode_lock = 1;
 169                 }
 170                 /*
 171                  * If this file hasn't been seen since the start of
 172                  * the current sampling period then start over.
 173                  */
 174                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 175                         struct timeval tv;
 176
 177                         fp->ff_bytesread = bytesread;
 178                         microtime(&tv);
 179                         cp->c_atime = tv.tv_sec;
 180                 } else {
 181                         fp->ff_bytesread += bytesread;
 182                 }
 183                 if (took_cnode_lock)
 184                         hfs_unlock(cp);
 185         }
 186 exit:
 187         hfs_unlock_truncate(cp);
 188         return (retval);
 189 }
 190
 191 /*
 192  * Write data to a file.
 193  */
 194 int
 195 hfs_vnop_write(struct vnop_write_args *ap)
 196 {
 197         uio_t uio = ap->a_uio;
 198         struct vnode *vp = ap->a_vp;
 199         struct cnode *cp;
 200         struct filefork *fp;
 201         struct hfsmount *hfsmp;
 202         kauth_cred_t cred = NULL;
 203         off_t origFileSize;
 204         off_t writelimit;
 205         off_t bytesToAdd;
 206         off_t actualBytesAdded;
 207         off_t filebytes;
 208         off_t offset;
 209         size_t resid;
 210         int eflags;
 211         int ioflag = ap->a_ioflag;
 212         int retval = 0;
 213         int lockflags;
 214         int cnode_locked = 0;
 215
 216         // LP64todo - fix this! uio_resid may be 64-bit value
 217         resid = uio_resid(uio);
 218         offset = uio_offset(uio);
 219
 220         if (offset < 0)
 221                 return (EINVAL);
 222         if (resid == 0)
 223                 return (E_NONE);
 224         if (!vnode_isreg(vp))
 225                 return (EPERM);  /* Can only write regular files */
 226
 227         /* Protect against a size change. */
 228         hfs_lock_truncate(VTOC(vp), TRUE);
 229
 230         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 231                 hfs_unlock_truncate(VTOC(vp));
 232                 return (retval);
 233         }
 234         cnode_locked = 1;
 235         cp = VTOC(vp);
 236         fp = VTOF(vp);
 237         hfsmp = VTOHFS(vp);
 238         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 239
 240         if (ioflag & IO_APPEND) {
 241                 uio_setoffset(uio, fp->ff_size);
 242                 offset = fp->ff_size;
 243         }
 244         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 245                 retval = EPERM;
 246                 goto exit;
 247         }
 248
 249         origFileSize = fp->ff_size;
 250         eflags = kEFDeferMask;  /* defer file block allocations */
 251
 252 #ifdef HFS_SPARSE_DEV
 253         /*
 254          * When the underlying device is sparse and space
 255          * is low (< 8MB), stop doing delayed allocations
 256          * and begin doing synchronous I/O.
 257          */
 258         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 259             (hfs_freeblks(hfsmp, 0) < 2048)) {
 260                 eflags &= ~kEFDeferMask;
 261                 ioflag |= IO_SYNC;
 262         }
 263 #endif /* HFS_SPARSE_DEV */
 264
 265         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 266                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 267
 268         /* Now test if we need to extend the file */
 269         /* Doing so will adjust the filebytes for us */
 270
 271         writelimit = offset + resid;
 272         if (writelimit <= filebytes)
 273                 goto sizeok;
 274
 275         cred = vfs_context_ucred(ap->a_context);
 276 #if QUOTA
 277         bytesToAdd = writelimit - filebytes;
 278         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 279                            cred, 0);
 280         if (retval)
 281                 goto exit;
 282 #endif /* QUOTA */
 283
 284         if (hfs_start_transaction(hfsmp) != 0) {
 285                 retval = EINVAL;
 286                 goto exit;
 287         }
 288
 289         while (writelimit > filebytes) {
 290                 bytesToAdd = writelimit - filebytes;
 291                 if (cred && suser(cred, NULL) != 0)
 292                         eflags |= kEFReserveMask;
 293
 294                 /* Protect extents b-tree and allocation bitmap */
 295                 lockflags = SFL_BITMAP;
 296                 if (overflow_extents(fp))
 297                         lockflags |= SFL_EXTENTS;
 298                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 299
 300                 /* Files that are changing size are not hot file candidates. */
 301                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 302                         fp->ff_bytesread = 0;
 303                 }
 304                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 305                                 0, eflags, &actualBytesAdded));
 306
 307                 hfs_systemfile_unlock(hfsmp, lockflags);
 308
 309                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 310                         retval = ENOSPC;
 311                 if (retval != E_NONE)
 312                         break;
 313                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 314                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 315                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 316         }
 317         (void) hfs_update(vp, TRUE);
 318         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 319         (void) hfs_end_transaction(hfsmp);
 320
 321 sizeok:
 322         if (retval == E_NONE) {
 323                 off_t filesize;
 324                 off_t zero_off;
 325                 off_t tail_off;
 326                 off_t inval_start;
 327                 off_t inval_end;
 328                 off_t io_start;
 329                 int lflag;
 330                 struct rl_entry *invalid_range;
 331
 332                 if (writelimit > fp->ff_size)
 333                         filesize = writelimit;
 334                 else
 335                         filesize = fp->ff_size;
 336
 337                 lflag = (ioflag & IO_SYNC);
 338
 339                 if (offset <= fp->ff_size) {
 340                         zero_off = offset & ~PAGE_MASK_64;
 341
 342                         /* Check to see whether the area between the zero_offset and the start
 343                            of the transfer to see whether is invalid and should be zero-filled
 344                            as part of the transfer:
 345                          */
 346                         if (offset > zero_off) {
 347                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 348                                         lflag |= IO_HEADZEROFILL;
 349                         }
 350                 } else {
 351                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 352
 353                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 354                            read without being zeroed.  The current last block is filled with zeroes
 355                            if it holds valid data but in all cases merely do a little bookkeeping
 356                            to track the area from the end of the current last page to the start of
 357                            the area actually written.  For the same reason only the bytes up to the
 358                            start of the page where this write will start is invalidated; any remainder
 359                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 360
 361                            Note that inval_start, the start of the page after the current EOF,
 362                            may be past the start of the write, in which case the zeroing
 363                            will be handled by the cluser_write of the actual data.
 364                          */
 365                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 366                         inval_end = offset & ~PAGE_MASK_64;
 367                         zero_off = fp->ff_size;
 368
 369                         if ((fp->ff_size & PAGE_MASK_64) &&
 370                                 (rl_scan(&fp->ff_invalidranges,
 371                                                         eof_page_base,
 372                                                         fp->ff_size - 1,
 373                                                         &invalid_range) != RL_NOOVERLAP)) {
 374                                 /* The page containing the EOF is not valid, so the
 375                                    entire page must be made inaccessible now.  If the write
 376                                    starts on a page beyond the page containing the eof
 377                                    (inval_end > eof_page_base), add the
 378                                    whole page to the range to be invalidated.  Otherwise
 379                                    (i.e. if the write starts on the same page), zero-fill
 380                                    the entire page explicitly now:
 381                                  */
 382                                 if (inval_end > eof_page_base) {
 383                                         inval_start = eof_page_base;
 384                                 } else {
 385                                         zero_off = eof_page_base;
 386                                 };
 387                         };
 388
 389                         if (inval_start < inval_end) {
 390                                 struct timeval tv;
 391                                 /* There's some range of data that's going to be marked invalid */
 392
 393                                 if (zero_off < inval_start) {
 394                                         /* The pages between inval_start and inval_end are going to be invalidated,
 395                                            and the actual write will start on a page past inval_end.  Now's the last
 396                                            chance to zero-fill the page containing the EOF:
 397                                          */
 398                                         hfs_unlock(cp);
 399                                         cnode_locked = 0;
 400                                         retval = cluster_write(vp, (uio_t) 0,
 401                                                         fp->ff_size, inval_start,
 402                                                         zero_off, (off_t)0,
 403                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 404                                         hfs_lock(cp, HFS_FORCE_LOCK);
 405                                         cnode_locked = 1;
 406                                         if (retval) goto ioerr_exit;
 407                                         offset = uio_offset(uio);
 408                                 };
 409
 410                                 /* Mark the remaining area of the newly allocated space as invalid: */
 411                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 412                                 microuptime(&tv);
 413                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 414                                 zero_off = fp->ff_size = inval_end;
 415                         };
 416
 417                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 418                 };
 419
 420                 /* Check to see whether the area between the end of the write and the end of
 421                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 422                  */
 423                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 424                 if (tail_off > filesize) tail_off = filesize;
 425                 if (tail_off > writelimit) {
 426                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 427                                 lflag |= IO_TAILZEROFILL;
 428                         };
 429                 };
 430
 431                 /*
 432                  * if the write starts beyond the current EOF (possibly advanced in the
 433                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 434                  * to where the write begins:
 435                  *
 436                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 437                  *       before the current EOF it might be marked as invalid now and must be
 438                  *       made readable (removed from the invalid ranges) before cluster_write
 439                  *       tries to write it:
 440                  */
 441                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 442                 if (io_start < fp->ff_size) {
 443                         off_t io_end;
 444
 445                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 446                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 447                 };
 448
 449                 hfs_unlock(cp);
 450                 cnode_locked = 0;
 451                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 452                                 tail_off, lflag | IO_NOZERODIRTY);
 453                 offset = uio_offset(uio);
 454                 if (offset > fp->ff_size) {
 455                         fp->ff_size = offset;
 456
 457                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 458                         /* Files that are changing size are not hot file candidates. */
 459                         if (hfsmp->hfc_stage == HFC_RECORDING)
 460                                 fp->ff_bytesread = 0;
 461                 }
 462                 if (resid > uio_resid(uio)) {
 463                         cp->c_touch_chgtime = TRUE;
 464                         cp->c_touch_modtime = TRUE;
 465                 }
 466         }
 467         HFS_KNOTE(vp, NOTE_WRITE);
 468
 469 ioerr_exit:
 470         /*
 471          * If we successfully wrote any data, and we are not the superuser
 472          * we clear the setuid and setgid bits as a precaution against
 473          * tampering.
 474          */
 475         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 476                 cred = vfs_context_ucred(ap->a_context);
 477                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 478                         if (!cnode_locked) {
 479                                 hfs_lock(cp, HFS_FORCE_LOCK);
 480                                 cnode_locked = 1;
 481                         }
 482                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 483                 }
 484         }
 485         if (retval) {
 486                 if (ioflag & IO_UNIT) {
 487                         if (!cnode_locked) {
 488                                 hfs_lock(cp, HFS_FORCE_LOCK);
 489                                 cnode_locked = 1;
 490                         }
 491                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 492                                            0, ap->a_context);
 493                         // LP64todo - fix this!  resid needs to by user_ssize_t
 494                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 495                         uio_setresid(uio, resid);
 496                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 497                 }
 498         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 499                 if (!cnode_locked) {
 500                         hfs_lock(cp, HFS_FORCE_LOCK);
 501                         cnode_locked = 1;
 502                 }
 503                 retval = hfs_update(vp, TRUE);
 504         }
 505         /* Updating vcbWrCnt doesn't need to be atomic. */
 506         hfsmp->vcbWrCnt++;
 507
 508         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 509                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 510 exit:
 511         if (cnode_locked)
 512                 hfs_unlock(cp);
 513         hfs_unlock_truncate(cp);
 514         return (retval);
 515 }
 516
 517 /* support for the "bulk-access" fcntl */
 518
 519 #define CACHE_ELEMS 64
 520 #define CACHE_LEVELS 16
 521 #define PARENT_IDS_FLAG 0x100
 522
 523 /* from hfs_attrlist.c */
 524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 525                         mode_t obj_mode, struct mount *mp,
 526                         kauth_cred_t cred, struct proc *p);
 527
 528 /* from vfs/vfs_fsevents.c */
 529 extern char *get_pathbuff(void);
 530 extern void release_pathbuff(char *buff);
 531
 532 struct access_cache {
 533        int numcached;
 534        int cachehits; /* these two for statistics gathering */
 535        int lookups;
 536        unsigned int *acache;
 537        Boolean *haveaccess;
 538 };
 539
 540 struct access_t {
 541         uid_t     uid;              /* IN: effective user id */
 542         short     flags;            /* IN: access requested (i.e. R_OK) */
 543         short     num_groups;       /* IN: number of groups user belongs to */
 544         int       num_files;        /* IN: number of files to process */
 545         int       *file_ids;        /* IN: array of file ids */
 546         gid_t     *groups;          /* IN: array of groups */
 547         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 548 };
 549
 550 struct user_access_t {
 551         uid_t           uid;                    /* IN: effective user id */
 552         short           flags;                  /* IN: access requested (i.e. R_OK) */
 553         short           num_groups;             /* IN: number of groups user belongs to */
 554         int                     num_files;              /* IN: number of files to process */
 555         user_addr_t     file_ids;               /* IN: array of file ids */
 556         user_addr_t     groups;                 /* IN: array of groups */
 557         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 558 };
 559
 560 /*
 561  * Perform a binary search for the given parent_id. Return value is
 562  * found/not found boolean, and indexp will be the index of the item
 563  * or the index at which to insert the item if it's not found.
 564  */
 565 static int
 566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 567 {
 568         unsigned int lo, hi;
 569         int index, matches = 0;
 570
 571         if (cache->numcached == 0) {
 572                 *indexp = 0;
 573                 return 0; // table is empty, so insert at index=0 and report no match
 574         }
 575
 576         if (cache->numcached > CACHE_ELEMS) {
 577                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 578                   cache->numcached, CACHE_ELEMS);*/
 579                 cache->numcached = CACHE_ELEMS;
 580         }
 581
 582         lo = 0;
 583         hi = cache->numcached - 1;
 584         index = -1;
 585
 586         /* perform binary search for parent_id */
 587         do {
 588                 unsigned int mid = (hi - lo)/2 + lo;
 589                 unsigned int this_id = cache->acache[mid];
 590
 591                 if (parent_id == this_id) {
 592                         index = mid;
 593                         break;
 594                 }
 595
 596                 if (parent_id < this_id) {
 597                         hi = mid;
 598                         continue;
 599                 }
 600
 601                 if (parent_id > this_id) {
 602                         lo = mid + 1;
 603                         continue;
 604                 }
 605         } while(lo < hi);
 606
 607         /* check if lo and hi converged on the match */
 608         if (parent_id == cache->acache[hi]) {
 609                 index = hi;
 610         }
 611
 612         /* if no existing entry found, find index for new one */
 613         if (index == -1) {
 614                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 615                 matches = 0;
 616         } else {
 617                 matches = 1;
 618         }
 619
 620         *indexp = index;
 621         return matches;
 622 }
 623
 624 /*
 625  * Add a node to the access_cache at the given index (or do a lookup first
 626  * to find the index if -1 is passed in). We currently do a replace rather
 627  * than an insert if the cache is full.
 628  */
 629 static void
 630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 631 {
 632        int lookup_index = -1;
 633
 634        /* need to do a lookup first if -1 passed for index */
 635        if (index == -1) {
 636                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 637                        if (cache->haveaccess[lookup_index] != access) {
 638                                /* change access info for existing entry... should never happen */
 639                                cache->haveaccess[lookup_index] = access;
 640                        }
 641
 642                        /* mission accomplished */
 643                        return;
 644                } else {
 645                        index = lookup_index;
 646                }
 647
 648        }
 649
 650        /* if the cache is full, do a replace rather than an insert */
 651        if (cache->numcached >= CACHE_ELEMS) {
 652                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 653                cache->numcached = CACHE_ELEMS-1;
 654
 655                if (index > cache->numcached) {
 656                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 657                        index = cache->numcached;
 658                }
 659        } else if (index >= 0 && index < cache->numcached) {
 660                /* only do bcopy if we're inserting */
 661                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 662                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 663        }
 664
 665        cache->acache[index] = nodeID;
 666        cache->haveaccess[index] = access;
 667        cache->numcached++;
 668 }
 669
 670
 671 struct cinfo {
 672         uid_t   uid;
 673         gid_t   gid;
 674         mode_t  mode;
 675         cnid_t  parentcnid;
 676 };
 677
 678 static int
 679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 680 {
 681         struct cinfo *cip = (struct cinfo *)arg;
 682
 683         cip->uid = attrp->ca_uid;
 684         cip->gid = attrp->ca_gid;
 685         cip->mode = attrp->ca_mode;
 686         cip->parentcnid = descp->cd_parentcnid;
 687
 688         return (0);
 689 }
 690
 691 /*
 692  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 693  * isn't incore, then go to the catalog.
 694  */
 695 static int
 696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 697                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 698 {
 699         int error = 0;
 700
 701         /* if this id matches the one the fsctl was called with, skip the lookup */
 702         if (cnid == skip_cp->c_cnid) {
 703                 cnattrp->ca_uid = skip_cp->c_uid;
 704                 cnattrp->ca_gid = skip_cp->c_gid;
 705                 cnattrp->ca_mode = skip_cp->c_mode;
 706                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 707         } else {
 708                 struct cinfo c_info;
 709
 710                 /* otherwise, check the cnode hash incase the file/dir is incore */
 711                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 712                         cnattrp->ca_uid = c_info.uid;
 713                         cnattrp->ca_gid = c_info.gid;
 714                         cnattrp->ca_mode = c_info.mode;
 715                         keyp->hfsPlus.parentID = c_info.parentcnid;
 716                 } else {
 717                         int lockflags;
 718
 719                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 720
 721                         /* lookup this cnid in the catalog */
 722                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 723
 724                         hfs_systemfile_unlock(hfsmp, lockflags);
 725
 726                         cache->lookups++;
 727                 }
 728         }
 729
 730         return (error);
 731 }
 732
 733 /*
 734  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 735  * up to CACHE_LEVELS as we progress towards the root.
 736  */
 737 static int
 738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 739                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 740 {
 741        int                     myErr = 0;
 742        int                     myResult;
 743        HFSCatalogNodeID        thisNodeID;
 744        unsigned long           myPerms;
 745        struct cat_attr         cnattr;
 746        int                     cache_index = -1;
 747        CatalogKey              catkey;
 748
 749        int i = 0, ids_to_cache = 0;
 750        int parent_ids[CACHE_LEVELS];
 751
 752        /* root always has access */
 753        if (!suser(myp_ucred, NULL)) {
 754                return (1);
 755        }
 756
 757        thisNodeID = nodeID;
 758        while (thisNodeID >=  kRootDirID) {
 759                myResult = 0;   /* default to "no access" */
 760
 761                /* check the cache before resorting to hitting the catalog */
 762
 763                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 764                 * to look any further after hitting cached dir */
 765
 766                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 767                        cache->cachehits++;
 768                        myResult = cache->haveaccess[cache_index];
 769                        goto ExitThisRoutine;
 770                }
 771
 772                /* remember which parents we want to cache */
 773                if (ids_to_cache < CACHE_LEVELS) {
 774                        parent_ids[ids_to_cache] = thisNodeID;
 775                        ids_to_cache++;
 776                }
 777
 778                /* do the lookup (checks the cnode hash, then the catalog) */
 779                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 780                if (myErr) {
 781                        goto ExitThisRoutine; /* no access */
 782                }
 783
 784                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 785                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 786                                                  myp_ucred, theProcPtr);
 787
 788                if ( (myPerms & X_OK) == 0 ) {
 789                        myResult = 0;
 790                        goto ExitThisRoutine;   /* no access */
 791                }
 792
 793                /* up the hierarchy we go */
 794                thisNodeID = catkey.hfsPlus.parentID;
 795        }
 796
 797        /* if here, we have access to this node */
 798        myResult = 1;
 799
 800  ExitThisRoutine:
 801        if (myErr) {
 802                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 803                myResult = 0;
 804        }
 805        *err = myErr;
 806
 807        /* cache the parent directory(ies) */
 808        for (i = 0; i < ids_to_cache; i++) {
 809                /* small optimization: get rid of double-lookup for all these */
 810                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 811                add_node(cache, -1, parent_ids[i], myResult);
 812        }
 813
 814        return (myResult);
 815 }
 816 /* end "bulk-access" support */
 817
 818
 819
 820 /*
 821  * Callback for use with freeze ioctl.
 822  */
 823 static int
 824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 825 {
 826         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 827
 828         return 0;
 829 }
 830
 831 /*
 832  * Control filesystem operating characteristics.
 833  */
 834 int
 835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 836                 vnode_t a_vp;
 837                 int  a_command;
 838                 caddr_t  a_data;
 839                 int  a_fflag;
 840                 vfs_context_t a_context;
 841         } */ *ap)
 842 {
 843         struct vnode * vp = ap->a_vp;
 844         struct hfsmount *hfsmp = VTOHFS(vp);
 845         vfs_context_t context = ap->a_context;
 846         kauth_cred_t cred = vfs_context_ucred(context);
 847         proc_t p = vfs_context_proc(context);
 848         struct vfsstatfs *vfsp;
 849         boolean_t is64bit;
 850
 851         is64bit = proc_is64bit(p);
 852
 853         switch (ap->a_command) {
 854
 855         case HFS_RESIZE_VOLUME: {
 856                 u_int64_t newsize;
 857                 u_int64_t cursize;
 858
 859                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 860                 if (suser(cred, NULL) &&
 861                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 862                         return (EACCES); /* must be owner of file system */
 863                 }
 864                 if (!vnode_isvroot(vp)) {
 865                         return (EINVAL);
 866                 }
 867                 newsize = *(u_int64_t *)ap->a_data;
 868                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 869
 870                 if (newsize > cursize) {
 871                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 872                 } else if (newsize < cursize) {
 873                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 874                 } else {
 875                         return (0);
 876                 }
 877         }
 878         case HFS_CHANGE_NEXT_ALLOCATION: {
 879                 u_int32_t location;
 880
 881                 if (vnode_vfsisrdonly(vp)) {
 882                         return (EROFS);
 883                 }
 884                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 885                 if (suser(cred, NULL) &&
 886                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 887                         return (EACCES); /* must be owner of file system */
 888                 }
 889                 if (!vnode_isvroot(vp)) {
 890                         return (EINVAL);
 891                 }
 892                 location = *(u_int32_t *)ap->a_data;
 893                 if (location > hfsmp->totalBlocks - 1) {
 894                         return (EINVAL);
 895                 }
 896                 /* Return previous value. */
 897                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 898                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 899                 hfsmp->nextAllocation = location;
 900                 hfsmp->vcbFlags |= 0xFF00;
 901                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 902                 return (0);
 903         }
 904
 905 #ifdef HFS_SPARSE_DEV
 906         case HFS_SETBACKINGSTOREINFO: {
 907                 struct vnode * bsfs_rootvp;
 908                 struct vnode * di_vp;
 909                 struct hfs_backingstoreinfo *bsdata;
 910                 int error = 0;
 911
 912                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 913                         return (EALREADY);
 914                 }
 915                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 916                 if (suser(cred, NULL) &&
 917                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 918                         return (EACCES); /* must be owner of file system */
 919                 }
 920                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 921                 if (bsdata == NULL) {
 922                         return (EINVAL);
 923                 }
 924                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 925                         return (error);
 926                 }
 927                 if ((error = vnode_getwithref(di_vp))) {
 928                         file_drop(bsdata->backingfd);
 929                         return(error);
 930                 }
 931
 932                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 933                         (void)vnode_put(di_vp);
 934                         file_drop(bsdata->backingfd);
 935                         return (EINVAL);
 936                 }
 937
 938                 /*
 939                  * Obtain the backing fs root vnode and keep a reference
 940                  * on it.  This reference will be dropped in hfs_unmount.
 941                  */
 942                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 943                 if (error) {
 944                         (void)vnode_put(di_vp);
 945                         file_drop(bsdata->backingfd);
 946                         return (error);
 947                 }
 948                 vnode_ref(bsfs_rootvp);
 949                 vnode_put(bsfs_rootvp);
 950
 951                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 952                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 953                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 954                 hfsmp->hfs_sparsebandblks *= 4;
 955
 956                 (void)vnode_put(di_vp);
 957                 file_drop(bsdata->backingfd);
 958                 return (0);
 959         }
 960         case HFS_CLRBACKINGSTOREINFO: {
 961                 struct vnode * tmpvp;
 962
 963                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 964                 if (suser(cred, NULL) &&
 965                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 966                         return (EACCES); /* must be owner of file system */
 967                 }
 968                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 969                     hfsmp->hfs_backingfs_rootvp) {
 970
 971                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 972                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 973                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 974                         hfsmp->hfs_sparsebandblks = 0;
 975                         vnode_rele(tmpvp);
 976                 }
 977                 return (0);
 978         }
 979 #endif /* HFS_SPARSE_DEV */
 980
 981         case F_FREEZE_FS: {
 982                 struct mount *mp;
 983                 task_t task;
 984
 985                 if (!is_suser())
 986                         return (EACCES);
 987
 988                 mp = vnode_mount(vp);
 989                 hfsmp = VFSTOHFS(mp);
 990
 991                 if (!(hfsmp->jnl))
 992                         return (ENOTSUP);
 993
 994                 task = current_task();
 995                 task_working_set_disable(task);
 996
 997                 // flush things before we get started to try and prevent
 998                 // dirty data from being paged out while we're frozen.
 999                 // note: can't do this after taking the lock as it will
1000                 // deadlock against ourselves.
1001                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1002                 hfs_global_exclusive_lock_acquire(hfsmp);
1003                 journal_flush(hfsmp->jnl);
1004                 // don't need to iterate on all vnodes, we just need to
1005                 // wait for writes to the system files and the device vnode
1006                 // vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1007                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1008                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1009                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1010                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1011                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1012                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1013                 if (hfsmp->hfs_attribute_vp)
1014                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1015                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1016
1017                 hfsmp->hfs_freezing_proc = current_proc();
1018
1019                 return (0);
1020         }
1021
1022         case F_THAW_FS: {
1023                 if (!is_suser())
1024                         return (EACCES);
1025
1026                 // if we're not the one who froze the fs then we
1027                 // can't thaw it.
1028                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1029                     return EINVAL;
1030                 }
1031
1032                 // NOTE: if you add code here, also go check the
1033                 //       code that "thaws" the fs in hfs_vnop_close()
1034                 //
1035                 hfsmp->hfs_freezing_proc = NULL;
1036                 hfs_global_exclusive_lock_release(hfsmp);
1037
1038                 return (0);
1039         }
1040
1041 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1042 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1043
1044         case HFS_BULKACCESS_FSCTL:
1045         case HFS_BULKACCESS: {
1046                 /*
1047                  * NOTE: on entry, the vnode is locked. Incase this vnode
1048                  * happens to be in our list of file_ids, we'll note it
1049                  * avoid calling hfs_chashget_nowait() on that id as that
1050                  * will cause a "locking against myself" panic.
1051                  */
1052                 Boolean check_leaf = true;
1053
1054                 struct user_access_t *user_access_structp;
1055                 struct user_access_t tmp_user_access_t;
1056                 struct access_cache cache;
1057
1058                 int error = 0, i;
1059
1060                 dev_t dev = VTOC(vp)->c_dev;
1061
1062                 short flags;
1063                 struct ucred myucred;   /* XXX ILLEGAL */
1064                 int num_files;
1065                 int *file_ids = NULL;
1066                 short *access = NULL;
1067
1068                 cnid_t cnid;
1069                 cnid_t prevParent_cnid = 0;
1070                 unsigned long myPerms;
1071                 short myaccess = 0;
1072                 struct cat_attr cnattr;
1073                 CatalogKey catkey;
1074                 struct cnode *skip_cp = VTOC(vp);
1075                 struct vfs_context      my_context;
1076
1077                 /* first, return error if not run as root */
1078                 if (cred->cr_ruid != 0) {
1079                         return EPERM;
1080                 }
1081
1082                 /* initialize the local cache and buffers */
1083                 cache.numcached = 0;
1084                 cache.cachehits = 0;
1085                 cache.lookups = 0;
1086
1087                 file_ids = (int *) get_pathbuff();
1088                 access = (short *) get_pathbuff();
1089                 cache.acache = (int *) get_pathbuff();
1090                 cache.haveaccess = (Boolean *) get_pathbuff();
1091
1092                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1093                         release_pathbuff((char *) file_ids);
1094                         release_pathbuff((char *) access);
1095                         release_pathbuff((char *) cache.acache);
1096                         release_pathbuff((char *) cache.haveaccess);
1097
1098                         return ENOMEM;
1099                 }
1100
1101                 /* struct copyin done during dispatch... need to copy file_id array separately */
1102                 if (ap->a_data == NULL) {
1103                         error = EINVAL;
1104                         goto err_exit_bulk_access;
1105                 }
1106
1107                 if (is64bit) {
1108                         user_access_structp = (struct user_access_t *)ap->a_data;
1109                 }
1110                 else {
1111                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1112                         tmp_user_access_t.uid = accessp->uid;
1113                         tmp_user_access_t.flags = accessp->flags;
1114                         tmp_user_access_t.num_groups = accessp->num_groups;
1115                         tmp_user_access_t.num_files = accessp->num_files;
1116                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1117                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1118                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1119                         user_access_structp = &tmp_user_access_t;
1120                 }
1121
1122                 num_files = user_access_structp->num_files;
1123                 if (num_files < 1) {
1124                         goto err_exit_bulk_access;
1125                 }
1126                 if (num_files > 256) {
1127                         error = EINVAL;
1128                         goto err_exit_bulk_access;
1129                 }
1130
1131                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1132                                                         num_files * sizeof(int)))) {
1133                         goto err_exit_bulk_access;
1134                 }
1135
1136                 /* fill in the ucred structure */
1137                 flags = user_access_structp->flags;
1138                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1139                         flags = R_OK;
1140                 }
1141
1142                 /* check if we've been passed leaf node ids or parent ids */
1143                 if (flags & PARENT_IDS_FLAG) {
1144                         check_leaf = false;
1145                 }
1146
1147                 memset(&myucred, 0, sizeof(myucred));
1148                 myucred.cr_ref = 1;
1149                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1150                 myucred.cr_ngroups = user_access_structp->num_groups;
1151                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1152                         myucred.cr_ngroups = 0;
1153                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1154                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1155                         goto err_exit_bulk_access;
1156                 }
1157                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1158
1159                 my_context.vc_proc = p;
1160                 my_context.vc_ucred = &myucred;
1161
1162                 /* Check access to each file_id passed in */
1163                 for (i = 0; i < num_files; i++) {
1164 #if 0
1165                         cnid = (cnid_t) file_ids[i];
1166
1167                         /* root always has access */
1168                         if (!suser(&myucred, NULL)) {
1169                                 access[i] = 0;
1170                                 continue;
1171                         }
1172
1173                         if (check_leaf) {
1174
1175                                 /* do the lookup (checks the cnode hash, then the catalog) */
1176                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1177                                 if (error) {
1178                                         access[i] = (short) error;
1179                                         continue;
1180                                 }
1181
1182                                 /* before calling CheckAccess(), check the target file for read access */
1183                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1184                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1185
1186
1187                                 /* fail fast if no access */
1188                                 if ((myPerms & flags) == 0) {
1189                                         access[i] = EACCES;
1190                                         continue;
1191                                 }
1192                         } else {
1193                                 /* we were passed an array of parent ids */
1194                                 catkey.hfsPlus.parentID = cnid;
1195                         }
1196
1197                         /* if the last guy had the same parent and had access, we're done */
1198                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1199                                 cache.cachehits++;
1200                                 access[i] = 0;
1201                                 continue;
1202                         }
1203
1204                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1205                                                    skip_cp, p, &myucred, dev);
1206
1207                         if ( myaccess ) {
1208                                 access[i] = 0; // have access.. no errors to report
1209                         } else {
1210                                 access[i] = (error != 0 ? (short) error : EACCES);
1211                         }
1212
1213                         prevParent_cnid = catkey.hfsPlus.parentID;
1214 #else
1215                         int myErr;
1216
1217                         cnid = (cnid_t)file_ids[i];
1218
1219                         while (cnid >= kRootDirID) {
1220                             /* get the vnode for this cnid */
1221                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1222                             if ( myErr ) {
1223                                 access[i] = EACCES;
1224                                 break;
1225                             }
1226
1227                             cnid = VTOC(vp)->c_parentcnid;
1228
1229                             hfs_unlock(VTOC(vp));
1230                             if (vnode_vtype(vp) == VDIR) {
1231                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_SEARCH, &my_context);
1232                                 if (myErr) {
1233                                     // try again with just read-access
1234                                     myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1235                                 }
1236                             } else {
1237                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1238                             }
1239                             vnode_put(vp);
1240                             access[i] = myErr;
1241                             if (myErr) {
1242                                 break;
1243                             }
1244                         }
1245 #endif
1246                 }
1247
1248                 /* copyout the access array */
1249                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1250                                      num_files * sizeof (short)))) {
1251                         goto err_exit_bulk_access;
1252                 }
1253
1254         err_exit_bulk_access:
1255
1256                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1257
1258                 release_pathbuff((char *) cache.acache);
1259                 release_pathbuff((char *) cache.haveaccess);
1260                 release_pathbuff((char *) file_ids);
1261                 release_pathbuff((char *) access);
1262
1263                 return (error);
1264         } /* HFS_BULKACCESS */
1265
1266         case HFS_SETACLSTATE: {
1267                 int state;
1268
1269                 if (!is_suser()) {
1270                         return (EPERM);
1271                 }
1272                 if (ap->a_data == NULL) {
1273                         return (EINVAL);
1274                 }
1275                 state = *(int *)ap->a_data;
1276                 if (state == 0 || state == 1)
1277                         return hfs_setextendedsecurity(hfsmp, state);
1278                 else
1279                         return (EINVAL);
1280         }
1281
1282         case F_FULLFSYNC: {
1283                 int error;
1284
1285                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1286                 if (error == 0) {
1287                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1288                         hfs_unlock(VTOC(vp));
1289                 }
1290
1291                 return error;
1292         }
1293
1294         case F_CHKCLEAN: {
1295                 register struct cnode *cp;
1296                 int error;
1297
1298                 if (!vnode_isreg(vp))
1299                         return EINVAL;
1300
1301                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1302                 if (error == 0) {
1303                         cp = VTOC(vp);
1304                         /*
1305                          * used by regression test to determine if
1306                          * all the dirty pages (via write) have been cleaned
1307                          * after a call to 'fsysnc'.
1308                          */
1309                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1310                         hfs_unlock(cp);
1311                 }
1312                 return (error);
1313         }
1314
1315         case F_RDADVISE: {
1316                 register struct radvisory *ra;
1317                 struct filefork *fp;
1318                 int error;
1319
1320                 if (!vnode_isreg(vp))
1321                         return EINVAL;
1322
1323                 ra = (struct radvisory *)(ap->a_data);
1324                 fp = VTOF(vp);
1325
1326                 /* Protect against a size change. */
1327                 hfs_lock_truncate(VTOC(vp), TRUE);
1328
1329                 if (ra->ra_offset >= fp->ff_size) {
1330                         error = EFBIG;
1331                 } else {
1332                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1333                 }
1334
1335                 hfs_unlock_truncate(VTOC(vp));
1336                 return (error);
1337         }
1338
1339         case F_READBOOTSTRAP:
1340         case F_WRITEBOOTSTRAP:
1341         {
1342             struct vnode *devvp = NULL;
1343             user_fbootstraptransfer_t *user_bootstrapp;
1344             int devBlockSize;
1345             int error;
1346             uio_t auio;
1347             daddr64_t blockNumber;
1348             u_long blockOffset;
1349             u_long xfersize;
1350             struct buf *bp;
1351             user_fbootstraptransfer_t user_bootstrap;
1352
1353                 if (!vnode_isvroot(vp))
1354                         return (EINVAL);
1355                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1356                  * to a user_fbootstraptransfer_t else we get a pointer to a
1357                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1358                  */
1359                 if (is64bit) {
1360                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1361                 }
1362                 else {
1363                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1364                         user_bootstrapp = &user_bootstrap;
1365                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1366                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1367                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1368                 }
1369                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1370                         return EINVAL;
1371
1372             devvp = VTOHFS(vp)->hfs_devvp;
1373                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1374                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1375                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1376                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1377
1378             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1379
1380             while (uio_resid(auio) > 0) {
1381                         blockNumber = uio_offset(auio) / devBlockSize;
1382                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1383                         if (error) {
1384                                 if (bp) buf_brelse(bp);
1385                                 uio_free(auio);
1386                                 return error;
1387                         };
1388
1389                         blockOffset = uio_offset(auio) % devBlockSize;
1390                         xfersize = devBlockSize - blockOffset;
1391                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1392                         if (error) {
1393                                 buf_brelse(bp);
1394                                 uio_free(auio);
1395                                 return error;
1396                         };
1397                         if (uio_rw(auio) == UIO_WRITE) {
1398                                 error = VNOP_BWRITE(bp);
1399                                 if (error) {
1400                                         uio_free(auio);
1401                         return error;
1402                                 }
1403                         } else {
1404                                 buf_brelse(bp);
1405                         };
1406                 };
1407                 uio_free(auio);
1408         };
1409         return 0;
1410
1411         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1412         {
1413                 if (is64bit) {
1414                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1415                 }
1416                 else {
1417                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1418                 }
1419                 return 0;
1420         }
1421
1422         case HFS_GET_MOUNT_TIME:
1423             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1424             break;
1425
1426         case HFS_GET_LAST_MTIME:
1427             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1428             break;
1429
1430         case HFS_SET_BOOT_INFO:
1431                 if (!vnode_isvroot(vp))
1432                         return(EINVAL);
1433                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1434                         return(EACCES); /* must be superuser or owner of filesystem */
1435                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1436                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1437                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1438                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1439                 break;
1440
1441         case HFS_GET_BOOT_INFO:
1442                 if (!vnode_isvroot(vp))
1443                         return(EINVAL);
1444                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1445                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1446                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1447                 break;
1448
1449         default:
1450                 return (ENOTTY);
1451         }
1452
1453     /* Should never get here */
1454         return 0;
1455 }
1456
1457 /*
1458  * select
1459  */
1460 int
1461 hfs_vnop_select(__unused struct vnop_select_args *ap)
1462 /*
1463         struct vnop_select_args {
1464                 vnode_t a_vp;
1465                 int  a_which;
1466                 int  a_fflags;
1467                 void *a_wql;
1468                 vfs_context_t a_context;
1469         };
1470 */
1471 {
1472         /*
1473          * We should really check to see if I/O is possible.
1474          */
1475         return (1);
1476 }
1477
1478 /*
1479  * Converts a logical block number to a physical block, and optionally returns
1480  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1481  * The physical block number is based on the device block size, currently its 512.
1482  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1483  */
1484 int
1485 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1486 {
1487         struct cnode *cp = VTOC(vp);
1488         struct filefork *fp = VTOF(vp);
1489         struct hfsmount *hfsmp = VTOHFS(vp);
1490         int  retval = E_NONE;
1491         daddr_t  logBlockSize;
1492         size_t  bytesContAvail = 0;
1493         off_t  blockposition;
1494         int lockExtBtree;
1495         int lockflags = 0;
1496
1497         /*
1498          * Check for underlying vnode requests and ensure that logical
1499          * to physical mapping is requested.
1500          */
1501         if (vpp != NULL)
1502                 *vpp = cp->c_devvp;
1503         if (bnp == NULL)
1504                 return (0);
1505
1506         logBlockSize = GetLogicalBlockSize(vp);
1507         blockposition = (off_t)bn * (off_t)logBlockSize;
1508
1509         lockExtBtree = overflow_extents(fp);
1510
1511         if (lockExtBtree)
1512                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1513
1514         retval = MacToVFSError(
1515                             MapFileBlockC (HFSTOVCB(hfsmp),
1516                                             (FCB*)fp,
1517                                             MAXPHYSIO,
1518                                             blockposition,
1519                                             bnp,
1520                                             &bytesContAvail));
1521
1522         if (lockExtBtree)
1523                 hfs_systemfile_unlock(hfsmp, lockflags);
1524
1525         if (retval == E_NONE) {
1526                 /* Figure out how many read ahead blocks there are */
1527                 if (runp != NULL) {
1528                         if (can_cluster(logBlockSize)) {
1529                                 /* Make sure this result never goes negative: */
1530                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1531                         } else {
1532                                 *runp = 0;
1533                         }
1534                 }
1535         }
1536         return (retval);
1537 }
1538
1539 /*
1540  * Convert logical block number to file offset.
1541  */
1542 int
1543 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1544 /*
1545         struct vnop_blktooff_args {
1546                 vnode_t a_vp;
1547                 daddr64_t a_lblkno;
1548                 off_t *a_offset;
1549         };
1550 */
1551 {
1552         if (ap->a_vp == NULL)
1553                 return (EINVAL);
1554         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1555
1556         return(0);
1557 }
1558
1559 /*
1560  * Convert file offset to logical block number.
1561  */
1562 int
1563 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1564 /*
1565         struct vnop_offtoblk_args {
1566                 vnode_t a_vp;
1567                 off_t a_offset;
1568                 daddr64_t *a_lblkno;
1569         };
1570 */
1571 {
1572         if (ap->a_vp == NULL)
1573                 return (EINVAL);
1574         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1575
1576         return(0);
1577 }
1578
1579 /*
1580  * Map file offset to physical block number.
1581  *
1582  * System file cnodes are expected to be locked (shared or exclusive).
1583  */
1584 int
1585 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1586 /*
1587         struct vnop_blockmap_args {
1588                 vnode_t a_vp;
1589                 off_t a_foffset;
1590                 size_t a_size;
1591                 daddr64_t *a_bpn;
1592                 size_t *a_run;
1593                 void *a_poff;
1594                 int a_flags;
1595                 vfs_context_t a_context;
1596         };
1597 */
1598 {
1599         struct vnode *vp = ap->a_vp;
1600         struct cnode *cp;
1601         struct filefork *fp;
1602         struct hfsmount *hfsmp;
1603         size_t bytesContAvail = 0;
1604         int retval = E_NONE;
1605         int syslocks = 0;
1606         int lockflags = 0;
1607         struct rl_entry *invalid_range;
1608         enum rl_overlaptype overlaptype;
1609         int started_tr = 0;
1610         int tooklock = 0;
1611
1612         /*
1613          * Check for underlying vnode requests and ensure that logical
1614          * to physical mapping is requested.
1615          */
1616         if (ap->a_bpn == NULL)
1617                 return (0);
1618
1619         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1620                 if (VTOC(vp)->c_lockowner != current_thread()) {
1621                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1622                         tooklock = 1;
1623                 } else {
1624                         cp = VTOC(vp);
1625                         panic("blockmap: %s cnode lock already held!\n",
1626                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1627                 }
1628         }
1629         hfsmp = VTOHFS(vp);
1630         cp = VTOC(vp);
1631         fp = VTOF(vp);
1632
1633 retry:
1634         if (fp->ff_unallocblocks) {
1635                 if (hfs_start_transaction(hfsmp) != 0) {
1636                         retval = EINVAL;
1637                         goto exit;
1638                 } else {
1639                         started_tr = 1;
1640                 }
1641                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1642
1643         } else if (overflow_extents(fp)) {
1644                 syslocks = SFL_EXTENTS;
1645         }
1646
1647         if (syslocks)
1648                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1649
1650         /*
1651          * Check for any delayed allocations.
1652          */
1653         if (fp->ff_unallocblocks) {
1654                 SInt64 actbytes;
1655                 u_int32_t loanedBlocks;
1656
1657                 //
1658                 // Make sure we have a transaction.  It's possible
1659                 // that we came in and fp->ff_unallocblocks was zero
1660                 // but during the time we blocked acquiring the extents
1661                 // btree, ff_unallocblocks became non-zero and so we
1662                 // will need to start a transaction.
1663                 //
1664                 if (started_tr == 0) {
1665                         if (syslocks) {
1666                                 hfs_systemfile_unlock(hfsmp, lockflags);
1667                                 syslocks = 0;
1668                         }
1669                         goto retry;
1670                 }
1671
1672                 /*
1673                  * Note: ExtendFileC will Release any blocks on loan and
1674                  * aquire real blocks.  So we ask to extend by zero bytes
1675                  * since ExtendFileC will account for the virtual blocks.
1676                  */
1677
1678                 loanedBlocks = fp->ff_unallocblocks;
1679                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1680                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1681
1682                 if (retval) {
1683                         fp->ff_unallocblocks = loanedBlocks;
1684                         cp->c_blocks += loanedBlocks;
1685                         fp->ff_blocks += loanedBlocks;
1686
1687                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1688                         hfsmp->loanedBlocks += loanedBlocks;
1689                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1690                 }
1691
1692                 if (retval) {
1693                         hfs_systemfile_unlock(hfsmp, lockflags);
1694                         cp->c_flag |= C_MODIFIED;
1695                         if (started_tr) {
1696                                 (void) hfs_update(vp, TRUE);
1697                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1698
1699                                 hfs_end_transaction(hfsmp);
1700                         }
1701                         goto exit;
1702                 }
1703         }
1704
1705         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1706                                ap->a_bpn, &bytesContAvail);
1707         if (syslocks) {
1708                 hfs_systemfile_unlock(hfsmp, lockflags);
1709                 syslocks = 0;
1710         }
1711
1712         if (started_tr) {
1713                 (void) hfs_update(vp, TRUE);
1714                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1715                 hfs_end_transaction(hfsmp);
1716                 started_tr = 0;
1717         }
1718         if (retval) {
1719                 goto exit;
1720         }
1721
1722         /* Adjust the mapping information for invalid file ranges: */
1723         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1724                               ap->a_foffset + (off_t)bytesContAvail - 1,
1725                               &invalid_range);
1726         if (overlaptype != RL_NOOVERLAP) {
1727                 switch(overlaptype) {
1728                 case RL_MATCHINGOVERLAP:
1729                 case RL_OVERLAPCONTAINSRANGE:
1730                 case RL_OVERLAPSTARTSBEFORE:
1731                         /* There's no valid block for this byte offset: */
1732                         *ap->a_bpn = (daddr64_t)-1;
1733                         /* There's no point limiting the amount to be returned
1734                          * if the invalid range that was hit extends all the way
1735                          * to the EOF (i.e. there's no valid bytes between the
1736                          * end of this range and the file's EOF):
1737                          */
1738                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1739                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1740                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1741                         }
1742                         break;
1743
1744                 case RL_OVERLAPISCONTAINED:
1745                 case RL_OVERLAPENDSAFTER:
1746                         /* The range of interest hits an invalid block before the end: */
1747                         if (invalid_range->rl_start == ap->a_foffset) {
1748                                 /* There's actually no valid information to be had starting here: */
1749                                 *ap->a_bpn = (daddr64_t)-1;
1750                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1751                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1752                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1753                                 }
1754                         } else {
1755                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1756                         }
1757                         break;
1758
1759                 case RL_NOOVERLAP:
1760                         break;
1761                 } /* end switch */
1762                 if (bytesContAvail > ap->a_size)
1763                         bytesContAvail = ap->a_size;
1764         }
1765         if (ap->a_run)
1766                 *ap->a_run = bytesContAvail;
1767
1768         if (ap->a_poff)
1769                 *(int *)ap->a_poff = 0;
1770 exit:
1771         if (tooklock)
1772                 hfs_unlock(cp);
1773
1774         return (MacToVFSError(retval));
1775 }
1776
1777
1778 /*
1779  * prepare and issue the I/O
1780  * buf_strategy knows how to deal
1781  * with requests that require
1782  * fragmented I/Os
1783  */
1784 int
1785 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1786 {
1787         buf_t   bp = ap->a_bp;
1788         vnode_t vp = buf_vnode(bp);
1789         struct cnode *cp = VTOC(vp);
1790
1791         return (buf_strategy(cp->c_devvp, ap));
1792 }
1793
1794
1795 static int
1796 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1797 {
1798         register struct cnode *cp = VTOC(vp);
1799         struct filefork *fp = VTOF(vp);
1800         struct proc *p = vfs_context_proc(context);;
1801         kauth_cred_t cred = vfs_context_ucred(context);
1802         int retval;
1803         off_t bytesToAdd;
1804         off_t actualBytesAdded;
1805         off_t filebytes;
1806         u_long fileblocks;
1807         int blksize;
1808         struct hfsmount *hfsmp;
1809         int lockflags;
1810
1811         blksize = VTOVCB(vp)->blockSize;
1812         fileblocks = fp->ff_blocks;
1813         filebytes = (off_t)fileblocks * (off_t)blksize;
1814
1815         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1816                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1817
1818         if (length < 0)
1819                 return (EINVAL);
1820
1821         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1822                 return (EFBIG);
1823
1824         hfsmp = VTOHFS(vp);
1825
1826         retval = E_NONE;
1827
1828         /* Files that are changing size are not hot file candidates. */
1829         if (hfsmp->hfc_stage == HFC_RECORDING) {
1830                 fp->ff_bytesread = 0;
1831         }
1832
1833         /*
1834          * We cannot just check if fp->ff_size == length (as an optimization)
1835          * since there may be extra physical blocks that also need truncation.
1836          */
1837 #if QUOTA
1838         if ((retval = hfs_getinoquota(cp)))
1839                 return(retval);
1840 #endif /* QUOTA */
1841
1842         /*
1843          * Lengthen the size of the file. We must ensure that the
1844          * last byte of the file is allocated. Since the smallest
1845          * value of ff_size is 0, length will be at least 1.
1846          */
1847         if (length > (off_t)fp->ff_size) {
1848 #if QUOTA
1849                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1850                                    cred, 0);
1851                 if (retval)
1852                         goto Err_Exit;
1853 #endif /* QUOTA */
1854                 /*
1855                  * If we don't have enough physical space then
1856                  * we need to extend the physical size.
1857                  */
1858                 if (length > filebytes) {
1859                         int eflags;
1860                         u_long blockHint = 0;
1861
1862                         /* All or nothing and don't round up to clumpsize. */
1863                         eflags = kEFAllMask | kEFNoClumpMask;
1864
1865                         if (cred && suser(cred, NULL) != 0)
1866                                 eflags |= kEFReserveMask;  /* keep a reserve */
1867
1868                         /*
1869                          * Allocate Journal and Quota files in metadata zone.
1870                          */
1871                         if (filebytes == 0 &&
1872                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1873                             hfs_virtualmetafile(cp)) {
1874                                 eflags |= kEFMetadataMask;
1875                                 blockHint = hfsmp->hfs_metazone_start;
1876                         }
1877                         if (hfs_start_transaction(hfsmp) != 0) {
1878                             retval = EINVAL;
1879                             goto Err_Exit;
1880                         }
1881
1882                         /* Protect extents b-tree and allocation bitmap */
1883                         lockflags = SFL_BITMAP;
1884                         if (overflow_extents(fp))
1885                                 lockflags |= SFL_EXTENTS;
1886                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1887
1888                         while ((length > filebytes) && (retval == E_NONE)) {
1889                                 bytesToAdd = length - filebytes;
1890                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1891                                                     (FCB*)fp,
1892                                                     bytesToAdd,
1893                                                     blockHint,
1894                                                     eflags,
1895                                                     &actualBytesAdded));
1896
1897                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1898                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1899                                         if (length > filebytes)
1900                                                 length = filebytes;
1901                                         break;
1902                                 }
1903                         } /* endwhile */
1904
1905                         hfs_systemfile_unlock(hfsmp, lockflags);
1906
1907                         if (hfsmp->jnl) {
1908                             (void) hfs_update(vp, TRUE);
1909                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1910                         }
1911
1912                         hfs_end_transaction(hfsmp);
1913
1914                         if (retval)
1915                                 goto Err_Exit;
1916
1917                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1918                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1919                 }
1920
1921                 if (!(flags & IO_NOZEROFILL)) {
1922                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1923                                 struct rl_entry *invalid_range;
1924                                 off_t zero_limit;
1925
1926                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1927                                 if (length < zero_limit) zero_limit = length;
1928
1929                                 if (length > (off_t)fp->ff_size) {
1930                                         struct timeval tv;
1931
1932                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1933                                         if ((fp->ff_size & PAGE_MASK_64) &&
1934                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1935                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1936
1937                                                 /* There's some valid data at the start of the (current) last page
1938                                                    of the file, so zero out the remainder of that page to ensure the
1939                                                    entire page contains valid data.  Since there is no invalid range
1940                                                    possible past the (current) eof, there's no need to remove anything
1941                                                    from the invalid range list before calling cluster_write():  */
1942                                                 hfs_unlock(cp);
1943                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1944                                                                 fp->ff_size, (off_t)0,
1945                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1946                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1947                                                 if (retval) goto Err_Exit;
1948
1949                                                 /* Merely invalidate the remaining area, if necessary: */
1950                                                 if (length > zero_limit) {
1951                                                         microuptime(&tv);
1952                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1953                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1954                                                 }
1955                                         } else {
1956                                         /* The page containing the (current) eof is invalid: just add the
1957                                            remainder of the page to the invalid list, along with the area
1958                                            being newly allocated:
1959                                          */
1960                                         microuptime(&tv);
1961                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1962                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1963                                         };
1964                                 }
1965                         } else {
1966                                         panic("hfs_truncate: invoked on non-UBC object?!");
1967                         };
1968                 }
1969                 cp->c_touch_modtime = TRUE;
1970                 fp->ff_size = length;
1971
1972                 /* Nested transactions will do their own ubc_setsize. */
1973                 if (!skipsetsize) {
1974                         /*
1975                          * ubc_setsize can cause a pagein here
1976                          * so we need to drop cnode lock.
1977                          */
1978                         hfs_unlock(cp);
1979                         ubc_setsize(vp, length);
1980                         hfs_lock(cp, HFS_FORCE_LOCK);
1981                 }
1982
1983         } else { /* Shorten the size of the file */
1984
1985                 if ((off_t)fp->ff_size > length) {
1986                         /*
1987                          * Any buffers that are past the truncation point need to be
1988                          * invalidated (to maintain buffer cache consistency).
1989                          */
1990
1991                          /* Nested transactions will do their own ubc_setsize. */
1992                          if (!skipsetsize) {
1993                                 /*
1994                                  * ubc_setsize can cause a pageout here
1995                                  * so we need to drop cnode lock.
1996                                  */
1997                                 hfs_unlock(cp);
1998                                 ubc_setsize(vp, length);
1999                                 hfs_lock(cp, HFS_FORCE_LOCK);
2000                         }
2001
2002                         /* Any space previously marked as invalid is now irrelevant: */
2003                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2004                 }
2005
2006                 /*
2007                  * Account for any unmapped blocks. Note that the new
2008                  * file length can still end up with unmapped blocks.
2009                  */
2010                 if (fp->ff_unallocblocks > 0) {
2011                         u_int32_t finalblks;
2012                         u_int32_t loanedBlocks;
2013
2014                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2015
2016                         loanedBlocks = fp->ff_unallocblocks;
2017                         cp->c_blocks -= loanedBlocks;
2018                         fp->ff_blocks -= loanedBlocks;
2019                         fp->ff_unallocblocks = 0;
2020
2021                         hfsmp->loanedBlocks -= loanedBlocks;
2022
2023                         finalblks = (length + blksize - 1) / blksize;
2024                         if (finalblks > fp->ff_blocks) {
2025                                 /* calculate required unmapped blocks */
2026                                 loanedBlocks = finalblks - fp->ff_blocks;
2027                                 hfsmp->loanedBlocks += loanedBlocks;
2028
2029                                 fp->ff_unallocblocks = loanedBlocks;
2030                                 cp->c_blocks += loanedBlocks;
2031                                 fp->ff_blocks += loanedBlocks;
2032                         }
2033                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2034                 }
2035
2036                 /*
2037                  * For a TBE process the deallocation of the file blocks is
2038                  * delayed until the file is closed.  And hfs_close calls
2039                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2040                  * isn't set, we make sure this isn't a TBE process.
2041                  */
2042                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2043 #if QUOTA
2044                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2045 #endif /* QUOTA */
2046                   if (hfs_start_transaction(hfsmp) != 0) {
2047                       retval = EINVAL;
2048                       goto Err_Exit;
2049                   }
2050
2051                         if (fp->ff_unallocblocks == 0) {
2052                                 /* Protect extents b-tree and allocation bitmap */
2053                                 lockflags = SFL_BITMAP;
2054                                 if (overflow_extents(fp))
2055                                         lockflags |= SFL_EXTENTS;
2056                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2057
2058                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2059                                                 (FCB*)fp, length, false));
2060
2061                                 hfs_systemfile_unlock(hfsmp, lockflags);
2062                         }
2063                         if (hfsmp->jnl) {
2064                                 (void) hfs_update(vp, TRUE);
2065                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2066                         }
2067
2068                         hfs_end_transaction(hfsmp);
2069
2070                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2071                         if (retval)
2072                                 goto Err_Exit;
2073 #if QUOTA
2074                         /* These are bytesreleased */
2075                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2076 #endif /* QUOTA */
2077                 }
2078                 /* Only set update flag if the logical length changes */
2079                 if ((off_t)fp->ff_size != length)
2080                         cp->c_touch_modtime = TRUE;
2081                 fp->ff_size = length;
2082         }
2083         cp->c_touch_chgtime = TRUE;
2084         retval = hfs_update(vp, MNT_WAIT);
2085         if (retval) {
2086                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2087                      -1, -1, -1, retval, 0);
2088         }
2089
2090 Err_Exit:
2091
2092         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2093                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2094
2095         return (retval);
2096 }
2097
2098
2099
2100 /*
2101  * Truncate a cnode to at most length size, freeing (or adding) the
2102  * disk blocks.
2103  */
2104 __private_extern__
2105 int
2106 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2107              vfs_context_t context)
2108 {
2109         struct filefork *fp = VTOF(vp);
2110         off_t filebytes;
2111         u_long fileblocks;
2112         int blksize, error = 0;
2113
2114         if (vnode_isdir(vp))
2115                 return (EISDIR);        /* cannot truncate an HFS directory! */
2116
2117         blksize = VTOVCB(vp)->blockSize;
2118         fileblocks = fp->ff_blocks;
2119         filebytes = (off_t)fileblocks * (off_t)blksize;
2120
2121         // have to loop truncating or growing files that are
2122         // really big because otherwise transactions can get
2123         // enormous and consume too many kernel resources.
2124
2125         if (length < filebytes) {
2126                 while (filebytes > length) {
2127                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2128                                 filebytes -= HFS_BIGFILE_SIZE;
2129                         } else {
2130                                 filebytes = length;
2131                         }
2132                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2133                         if (error)
2134                                 break;
2135                 }
2136         } else if (length > filebytes) {
2137                 while (filebytes < length) {
2138                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2139                                 filebytes += HFS_BIGFILE_SIZE;
2140                         } else {
2141                                 filebytes = length;
2142                         }
2143                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2144                         if (error)
2145                                 break;
2146                 }
2147         } else /* Same logical size */ {
2148
2149                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2150         }
2151         /* Files that are changing size are not hot file candidates. */
2152         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2153                 fp->ff_bytesread = 0;
2154         }
2155
2156         return (error);
2157 }
2158
2159
2160
2161 /*
2162  * Preallocate file storage space.
2163  */
2164 int
2165 hfs_vnop_allocate(struct vnop_allocate_args /* {
2166                 vnode_t a_vp;
2167                 off_t a_length;
2168                 u_int32_t  a_flags;
2169                 off_t *a_bytesallocated;
2170                 off_t a_offset;
2171                 vfs_context_t a_context;
2172         } */ *ap)
2173 {
2174         struct vnode *vp = ap->a_vp;
2175         struct cnode *cp;
2176         struct filefork *fp;
2177         ExtendedVCB *vcb;
2178         off_t length = ap->a_length;
2179         off_t startingPEOF;
2180         off_t moreBytesRequested;
2181         off_t actualBytesAdded;
2182         off_t filebytes;
2183         u_long fileblocks;
2184         int retval, retval2;
2185         UInt32 blockHint;
2186         UInt32 extendFlags;   /* For call to ExtendFileC */
2187         struct hfsmount *hfsmp;
2188         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2189         int lockflags;
2190
2191         *(ap->a_bytesallocated) = 0;
2192
2193         if (!vnode_isreg(vp))
2194                 return (EISDIR);
2195         if (length < (off_t)0)
2196                 return (EINVAL);
2197
2198         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2199                 return (retval);
2200         cp = VTOC(vp);
2201         fp = VTOF(vp);
2202         hfsmp = VTOHFS(vp);
2203         vcb = VTOVCB(vp);
2204
2205         fileblocks = fp->ff_blocks;
2206         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2207
2208         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2209                 retval = EINVAL;
2210                 goto Err_Exit;
2211         }
2212
2213         /* Fill in the flags word for the call to Extend the file */
2214
2215         extendFlags = kEFNoClumpMask;
2216         if (ap->a_flags & ALLOCATECONTIG)
2217                 extendFlags |= kEFContigMask;
2218         if (ap->a_flags & ALLOCATEALL)
2219                 extendFlags |= kEFAllMask;
2220         if (cred && suser(cred, NULL) != 0)
2221                 extendFlags |= kEFReserveMask;
2222
2223         retval = E_NONE;
2224         blockHint = 0;
2225         startingPEOF = filebytes;
2226
2227         if (ap->a_flags & ALLOCATEFROMPEOF)
2228                 length += filebytes;
2229         else if (ap->a_flags & ALLOCATEFROMVOL)
2230                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2231
2232         /* If no changes are necesary, then we're done */
2233         if (filebytes == length)
2234                 goto Std_Exit;
2235
2236         /*
2237          * Lengthen the size of the file. We must ensure that the
2238          * last byte of the file is allocated. Since the smallest
2239          * value of filebytes is 0, length will be at least 1.
2240          */
2241         if (length > filebytes) {
2242                 moreBytesRequested = length - filebytes;
2243
2244 #if QUOTA
2245                 retval = hfs_chkdq(cp,
2246                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2247                                 cred, 0);
2248                 if (retval)
2249                         goto Err_Exit;
2250
2251 #endif /* QUOTA */
2252                 /*
2253                  * Metadata zone checks.
2254                  */
2255                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2256                         /*
2257                          * Allocate Journal and Quota files in metadata zone.
2258                          */
2259                         if (hfs_virtualmetafile(cp)) {
2260                                 extendFlags |= kEFMetadataMask;
2261                                 blockHint = hfsmp->hfs_metazone_start;
2262                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2263                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2264                                 /*
2265                                  * Move blockHint outside metadata zone.
2266                                  */
2267                                 blockHint = hfsmp->hfs_metazone_end + 1;
2268                         }
2269                 }
2270
2271                 if (hfs_start_transaction(hfsmp) != 0) {
2272                     retval = EINVAL;
2273                     goto Err_Exit;
2274                 }
2275
2276                 /* Protect extents b-tree and allocation bitmap */
2277                 lockflags = SFL_BITMAP;
2278                 if (overflow_extents(fp))
2279                         lockflags |= SFL_EXTENTS;
2280                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2281
2282                 retval = MacToVFSError(ExtendFileC(vcb,
2283                                                 (FCB*)fp,
2284                                                 moreBytesRequested,
2285                                                 blockHint,
2286                                                 extendFlags,
2287                                                 &actualBytesAdded));
2288
2289                 *(ap->a_bytesallocated) = actualBytesAdded;
2290                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2291
2292                 hfs_systemfile_unlock(hfsmp, lockflags);
2293
2294                 if (hfsmp->jnl) {
2295                         (void) hfs_update(vp, TRUE);
2296                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2297                 }
2298
2299                 hfs_end_transaction(hfsmp);
2300
2301                 /*
2302                  * if we get an error and no changes were made then exit
2303                  * otherwise we must do the hfs_update to reflect the changes
2304                  */
2305                 if (retval && (startingPEOF == filebytes))
2306                         goto Err_Exit;
2307
2308                 /*
2309                  * Adjust actualBytesAdded to be allocation block aligned, not
2310                  * clump size aligned.
2311                  * NOTE: So what we are reporting does not affect reality
2312                  * until the file is closed, when we truncate the file to allocation
2313                  * block size.
2314                  */
2315                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2316                         *(ap->a_bytesallocated) =
2317                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2318
2319         } else { /* Shorten the size of the file */
2320
2321                 if (fp->ff_size > length) {
2322                         /*
2323                          * Any buffers that are past the truncation point need to be
2324                          * invalidated (to maintain buffer cache consistency).
2325                          */
2326                 }
2327
2328                 if (hfs_start_transaction(hfsmp) != 0) {
2329                     retval = EINVAL;
2330                     goto Err_Exit;
2331                 }
2332
2333                 /* Protect extents b-tree and allocation bitmap */
2334                 lockflags = SFL_BITMAP;
2335                 if (overflow_extents(fp))
2336                         lockflags |= SFL_EXTENTS;
2337                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2338
2339                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2340
2341                 hfs_systemfile_unlock(hfsmp, lockflags);
2342
2343                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2344
2345                 if (hfsmp->jnl) {
2346                         (void) hfs_update(vp, TRUE);
2347                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2348                 }
2349
2350                 hfs_end_transaction(hfsmp);
2351
2352
2353                 /*
2354                  * if we get an error and no changes were made then exit
2355                  * otherwise we must do the hfs_update to reflect the changes
2356                  */
2357                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2358 #if QUOTA
2359                 /* These are  bytesreleased */
2360                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2361 #endif /* QUOTA */
2362
2363                 if (fp->ff_size > filebytes) {
2364                         fp->ff_size = filebytes;
2365
2366                         hfs_unlock(cp);
2367                         ubc_setsize(vp, fp->ff_size);
2368                         hfs_lock(cp, HFS_FORCE_LOCK);
2369                 }
2370         }
2371
2372 Std_Exit:
2373         cp->c_touch_chgtime = TRUE;
2374         cp->c_touch_modtime = TRUE;
2375         retval2 = hfs_update(vp, MNT_WAIT);
2376
2377         if (retval == 0)
2378                 retval = retval2;
2379 Err_Exit:
2380         hfs_unlock(cp);
2381         return (retval);
2382 }
2383
2384
2385 /*
2386  * Pagein for HFS filesystem
2387  */
2388 int
2389 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2390 /*
2391         struct vnop_pagein_args {
2392                 vnode_t a_vp,
2393                 upl_t         a_pl,
2394                 vm_offset_t   a_pl_offset,
2395                 off_t         a_f_offset,
2396                 size_t        a_size,
2397                 int           a_flags
2398                 vfs_context_t a_context;
2399         };
2400 */
2401 {
2402         vnode_t vp = ap->a_vp;
2403         int error;
2404
2405         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2406                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2407         /*
2408          * Keep track of blocks read.
2409          */
2410         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2411                 struct cnode *cp;
2412                 struct filefork *fp;
2413                 int bytesread;
2414                 int took_cnode_lock = 0;
2415
2416                 cp = VTOC(vp);
2417                 fp = VTOF(vp);
2418
2419                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2420                         bytesread = fp->ff_size;
2421                 else
2422                         bytesread = ap->a_size;
2423
2424                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2425                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2426                         hfs_lock(cp, HFS_FORCE_LOCK);
2427                         took_cnode_lock = 1;
2428                 }
2429                 /*
2430                  * If this file hasn't been seen since the start of
2431                  * the current sampling period then start over.
2432                  */
2433                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2434                         struct timeval tv;
2435
2436                         fp->ff_bytesread = bytesread;
2437                         microtime(&tv);
2438                         cp->c_atime = tv.tv_sec;
2439                 } else {
2440                         fp->ff_bytesread += bytesread;
2441                 }
2442                 cp->c_touch_acctime = TRUE;
2443                 if (took_cnode_lock)
2444                         hfs_unlock(cp);
2445         }
2446         return (error);
2447 }
2448
2449 /*
2450  * Pageout for HFS filesystem.
2451  */
2452 int
2453 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2454 /*
2455         struct vnop_pageout_args {
2456            vnode_t a_vp,
2457            upl_t         a_pl,
2458            vm_offset_t   a_pl_offset,
2459            off_t         a_f_offset,
2460            size_t        a_size,
2461            int           a_flags
2462            vfs_context_t a_context;
2463         };
2464 */
2465 {
2466         vnode_t vp = ap->a_vp;
2467         struct cnode *cp;
2468         struct filefork *fp;
2469         int retval;
2470         off_t end_of_range;
2471         off_t filesize;
2472
2473         cp = VTOC(vp);
2474         if (cp->c_lockowner == current_thread()) {
2475                 panic("pageout: %s cnode lock already held!\n",
2476                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2477         }
2478         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2479                 return (retval);
2480         }
2481         fp = VTOF(vp);
2482
2483         filesize = fp->ff_size;
2484         end_of_range = ap->a_f_offset + ap->a_size - 1;
2485
2486         if (end_of_range >= filesize) {
2487                 end_of_range = (off_t)(filesize - 1);
2488         }
2489         if (ap->a_f_offset < filesize) {
2490                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2491                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2492         }
2493         hfs_unlock(cp);
2494
2495         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2496                                  ap->a_size, filesize, ap->a_flags);
2497
2498         /*
2499          * If data was written, and setuid or setgid bits are set and
2500          * this process is not the superuser then clear the setuid and
2501          * setgid bits as a precaution against tampering.
2502          */
2503         if ((retval == 0) &&
2504             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2505             (vfs_context_suser(ap->a_context) != 0)) {
2506                 hfs_lock(cp, HFS_FORCE_LOCK);
2507                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2508                 cp->c_touch_chgtime = TRUE;
2509                 hfs_unlock(cp);
2510         }
2511         return (retval);
2512 }
2513
2514 /*
2515  * Intercept B-Tree node writes to unswap them if necessary.
2516  */
2517 int
2518 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2519 {
2520         int retval = 0;
2521         register struct buf *bp = ap->a_bp;
2522         register struct vnode *vp = buf_vnode(bp);
2523 #if BYTE_ORDER == LITTLE_ENDIAN
2524         BlockDescriptor block;
2525
2526         /* Trap B-Tree writes */
2527         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2528             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2529             (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2530
2531                 /* Swap if the B-Tree node is in native byte order */
2532                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2533                         /* Prepare the block pointer */
2534                         block.blockHeader = bp;
2535                         block.buffer = (char *)buf_dataptr(bp);
2536                         /* not found in cache ==> came from disk */
2537                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2538                         block.blockSize = buf_count(bp);
2539
2540                         /* Endian un-swap B-Tree node */
2541                         SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2542                 }
2543
2544                 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2545         }
2546 #endif
2547         /* This buffer shouldn't be locked anymore but if it is clear it */
2548         if ((buf_flags(bp) & B_LOCKED)) {
2549                 // XXXdbg
2550                 if (VTOHFS(vp)->jnl) {
2551                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2552                 }
2553                 buf_clearflags(bp, B_LOCKED);
2554         }
2555         retval = vn_bwrite (ap);
2556
2557         return (retval);
2558 }
2559
2560 /*
2561  * Relocate a file to a new location on disk
2562  *  cnode must be locked on entry
2563  *
2564  * Relocation occurs by cloning the file's data from its
2565  * current set of blocks to a new set of blocks. During
2566  * the relocation all of the blocks (old and new) are
2567  * owned by the file.
2568  *
2569  * -----------------
2570  * |///////////////|
2571  * -----------------
2572  * 0               N (file offset)
2573  *
2574  * -----------------     -----------------
2575  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2576  * -----------------     -----------------
2577  * 0               N     N+1             2N
2578  *
2579  * -----------------     -----------------
2580  * |///////////////|     |///////////////|     STEP 2 (clone data)
2581  * -----------------     -----------------
2582  * 0               N     N+1             2N
2583  *
2584  *                       -----------------
2585  *                       |///////////////|     STEP 3 (head truncate blocks)
2586  *                       -----------------
2587  *                       0               N
2588  *
2589  * During steps 2 and 3 page-outs to file offsets less
2590  * than or equal to N are suspended.
2591  *
2592  * During step 3 page-ins to the file get supended.
2593  */
2594 __private_extern__
2595 int
2596 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2597         struct  proc *p)
2598 {
2599         struct  cnode *cp;
2600         struct  filefork *fp;
2601         struct  hfsmount *hfsmp;
2602         u_int32_t  headblks;
2603         u_int32_t  datablks;
2604         u_int32_t  blksize;
2605         u_int32_t  growsize;
2606         u_int32_t  nextallocsave;
2607         daddr64_t  sector_a,  sector_b;
2608         int disabled_caching = 0;
2609         int eflags;
2610         off_t  newbytes;
2611         int  retval;
2612         int lockflags = 0;
2613         int took_trunc_lock = 0;
2614         int started_tr = 0;
2615         enum vtype vnodetype;
2616
2617         vnodetype = vnode_vtype(vp);
2618         if (vnodetype != VREG && vnodetype != VLNK) {
2619                 return (EPERM);
2620         }
2621
2622         hfsmp = VTOHFS(vp);
2623         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2624                 return (ENOSPC);
2625         }
2626
2627         cp = VTOC(vp);
2628         fp = VTOF(vp);
2629         if (fp->ff_unallocblocks)
2630                 return (EINVAL);
2631         blksize = hfsmp->blockSize;
2632         if (blockHint == 0)
2633                 blockHint = hfsmp->nextAllocation;
2634
2635         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2636             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2637                 return (EFBIG);
2638         }
2639
2640         //
2641         // We do not believe that this call to hfs_fsync() is
2642         // necessary and it causes a journal transaction
2643         // deadlock so we are removing it.
2644         //
2645         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2646         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2647         //      if (retval)
2648         //              return (retval);
2649         //}
2650
2651         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2652                 hfs_unlock(cp);
2653                 hfs_lock_truncate(cp, TRUE);
2654                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2655                         hfs_unlock_truncate(cp);
2656                         return (retval);
2657                 }
2658                 took_trunc_lock = 1;
2659         }
2660         headblks = fp->ff_blocks;
2661         datablks = howmany(fp->ff_size, blksize);
2662         growsize = datablks * blksize;
2663         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2664         if (blockHint >= hfsmp->hfs_metazone_start &&
2665             blockHint <= hfsmp->hfs_metazone_end)
2666                 eflags |= kEFMetadataMask;
2667
2668         if (hfs_start_transaction(hfsmp) != 0) {
2669                 if (took_trunc_lock)
2670                         hfs_unlock_truncate(cp);
2671             return (EINVAL);
2672         }
2673         started_tr = 1;
2674         /*
2675          * Protect the extents b-tree and the allocation bitmap
2676          * during MapFileBlockC and ExtendFileC operations.
2677          */
2678         lockflags = SFL_BITMAP;
2679         if (overflow_extents(fp))
2680                 lockflags |= SFL_EXTENTS;
2681         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2682
2683         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2684         if (retval) {
2685                 retval = MacToVFSError(retval);
2686                 goto out;
2687         }
2688
2689         /*
2690          * STEP 1 - aquire new allocation blocks.
2691          */
2692         if (!vnode_isnocache(vp)) {
2693                 vnode_setnocache(vp);
2694                 disabled_caching = 1;
2695
2696         }
2697         nextallocsave = hfsmp->nextAllocation;
2698         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2699         if (eflags & kEFMetadataMask) {
2700                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2701                 hfsmp->nextAllocation = nextallocsave;
2702                 hfsmp->vcbFlags |= 0xFF00;
2703                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2704         }
2705
2706         retval = MacToVFSError(retval);
2707         if (retval == 0) {
2708                 cp->c_flag |= C_MODIFIED;
2709                 if (newbytes < growsize) {
2710                         retval = ENOSPC;
2711                         goto restore;
2712                 } else if (fp->ff_blocks < (headblks + datablks)) {
2713                         printf("hfs_relocate: allocation failed");
2714                         retval = ENOSPC;
2715                         goto restore;
2716                 }
2717
2718                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2719                 if (retval) {
2720                         retval = MacToVFSError(retval);
2721                 } else if ((sector_a + 1) == sector_b) {
2722                         retval = ENOSPC;
2723                         goto restore;
2724                 } else if ((eflags & kEFMetadataMask) &&
2725                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2726                               hfsmp->hfs_metazone_end)) {
2727                         printf("hfs_relocate: didn't move into metadata zone\n");
2728                         retval = ENOSPC;
2729                         goto restore;
2730                 }
2731         }
2732         /* Done with system locks and journal for now. */
2733         hfs_systemfile_unlock(hfsmp, lockflags);
2734         lockflags = 0;
2735         hfs_end_transaction(hfsmp);
2736         started_tr = 0;
2737
2738         if (retval) {
2739                 /*
2740                  * Check to see if failure is due to excessive fragmentation.
2741                  */
2742                 if ((retval == ENOSPC) &&
2743                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2744                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2745                 }
2746                 goto out;
2747         }
2748         /*
2749          * STEP 2 - clone file data into the new allocation blocks.
2750          */
2751
2752         if (vnodetype == VLNK)
2753                 retval = hfs_clonelink(vp, blksize, cred, p);
2754         else if (vnode_issystem(vp))
2755                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2756         else
2757                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2758
2759         /* Start transaction for step 3 or for a restore. */
2760         if (hfs_start_transaction(hfsmp) != 0) {
2761                 retval = EINVAL;
2762                 goto out;
2763         }
2764         started_tr = 1;
2765         if (retval)
2766                 goto restore;
2767
2768         /*
2769          * STEP 3 - switch to cloned data and remove old blocks.
2770          */
2771         lockflags = SFL_BITMAP;
2772         if (overflow_extents(fp))
2773                 lockflags |= SFL_EXTENTS;
2774         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2775
2776         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2777
2778         hfs_systemfile_unlock(hfsmp, lockflags);
2779         lockflags = 0;
2780         if (retval)
2781                 goto restore;
2782 out:
2783         if (took_trunc_lock)
2784                 hfs_unlock_truncate(cp);
2785
2786         if (lockflags) {
2787                 hfs_systemfile_unlock(hfsmp, lockflags);
2788                 lockflags = 0;
2789         }
2790
2791         // See comment up above about calls to hfs_fsync()
2792         //
2793         //if (retval == 0)
2794         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2795
2796         if (hfsmp->jnl) {
2797                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2798                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2799                 else
2800                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2801         }
2802 exit:
2803         if (disabled_caching) {
2804                 vnode_clearnocache(vp);
2805         }
2806         if (started_tr)
2807                 hfs_end_transaction(hfsmp);
2808
2809         return (retval);
2810
2811 restore:
2812         if (fp->ff_blocks == headblks)
2813                 goto exit;
2814         /*
2815          * Give back any newly allocated space.
2816          */
2817         if (lockflags == 0) {
2818                 lockflags = SFL_BITMAP;
2819                 if (overflow_extents(fp))
2820                         lockflags |= SFL_EXTENTS;
2821                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2822         }
2823
2824         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2825
2826         hfs_systemfile_unlock(hfsmp, lockflags);
2827         lockflags = 0;
2828
2829         if (took_trunc_lock)
2830                 hfs_unlock_truncate(cp);
2831         goto exit;
2832 }
2833
2834
2835 /*
2836  * Clone a symlink.
2837  *
2838  */
2839 static int
2840 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2841 {
2842         struct buf *head_bp = NULL;
2843         struct buf *tail_bp = NULL;
2844         int error;
2845
2846
2847         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2848         if (error)
2849                 goto out;
2850
2851         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2852         if (tail_bp == NULL) {
2853                 error = EIO;
2854                 goto out;
2855         }
2856         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2857         error = (int)buf_bwrite(tail_bp);
2858 out:
2859         if (head_bp) {
2860                 buf_markinvalid(head_bp);
2861                 buf_brelse(head_bp);
2862         }
2863         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2864
2865         return (error);
2866 }
2867
2868 /*
2869  * Clone a file's data within the file.
2870  *
2871  */
2872 static int
2873 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2874 {
2875         caddr_t  bufp;
2876         size_t  writebase;
2877         size_t  bufsize;
2878         size_t  copysize;
2879         size_t  iosize;
2880         off_t   filesize;
2881         size_t  offset;
2882         uio_t auio;
2883         int  error = 0;
2884
2885         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2886         writebase = blkstart * blksize;
2887         copysize = blkcnt * blksize;
2888         iosize = bufsize = MIN(copysize, 4096 * 16);
2889         offset = 0;
2890
2891         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2892                 return (ENOMEM);
2893         }
2894         hfs_unlock(VTOC(vp));
2895
2896         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2897
2898         while (offset < copysize) {
2899                 iosize = MIN(copysize - offset, iosize);
2900
2901                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2902                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2903
2904                 error = cluster_read(vp, auio, copysize, 0);
2905                 if (error) {
2906                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2907                         break;
2908                 }
2909                 if (uio_resid(auio) != 0) {
2910                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2911                         error = EIO;
2912                         break;
2913                 }
2914
2915                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2916                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2917
2918                 error = cluster_write(vp, auio, filesize + offset,
2919                                       filesize + offset + iosize,
2920                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2921                 if (error) {
2922                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2923                         break;
2924                 }
2925                 if (uio_resid(auio) != 0) {
2926                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2927                         error = EIO;
2928                         break;
2929                 }
2930                 offset += iosize;
2931         }
2932         uio_free(auio);
2933
2934         /*
2935          * No need to call ubc_sync_range or hfs_invalbuf
2936          * since the file was copied using IO_NOCACHE.
2937          */
2938
2939         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2940
2941         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2942         return (error);
2943 }
2944
2945 /*
2946  * Clone a system (metadata) file.
2947  *
2948  */
2949 static int
2950 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2951                  kauth_cred_t cred, struct proc *p)
2952 {
2953         caddr_t  bufp;
2954         char * offset;
2955         size_t  bufsize;
2956         size_t  iosize;
2957         struct buf *bp = NULL;
2958         daddr64_t  blkno;
2959         daddr64_t  blk;
2960         daddr64_t  start_blk;
2961         daddr64_t  last_blk;
2962         int  breadcnt;
2963         int  i;
2964         int  error = 0;
2965
2966
2967         iosize = GetLogicalBlockSize(vp);
2968         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2969         breadcnt = bufsize / iosize;
2970
2971         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2972                 return (ENOMEM);
2973         }
2974         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
2975         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
2976         blkno = 0;
2977
2978         while (blkno < last_blk) {
2979                 /*
2980                  * Read up to a megabyte
2981                  */
2982                 offset = bufp;
2983                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
2984                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
2985                         if (error) {
2986                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
2987                                 goto out;
2988                         }
2989                         if (buf_count(bp) != iosize) {
2990                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
2991                                 goto out;
2992                         }
2993                         bcopy((char *)buf_dataptr(bp), offset, iosize);
2994
2995                         buf_markinvalid(bp);
2996                         buf_brelse(bp);
2997                         bp = NULL;
2998
2999                         offset += iosize;
3000                 }
3001
3002                 /*
3003                  * Write up to a megabyte
3004                  */
3005                 offset = bufp;
3006                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3007                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3008                         if (bp == NULL) {
3009                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3010                                 error = EIO;
3011                                 goto out;
3012                         }
3013                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3014                         error = (int)buf_bwrite(bp);
3015                         bp = NULL;
3016                         if (error)
3017                                 goto out;
3018                         offset += iosize;
3019                 }
3020         }
3021 out:
3022         if (bp) {
3023                 buf_brelse(bp);
3024         }
3025
3026         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3027
3028         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3029
3030         return (error);
3031 }