bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. Please obtain a copy of the License at
  10  * http://www.opensource.apple.com/apsl/ and read it before using this
  11  * file.
  12  *
  13  * The Original Code and all software distributed under the License are
  14  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  15  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  16  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  17  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  18  * Please see the License for the specific language governing rights and
  19  * limitations under the License.
  20  *
  21  * @APPLE_LICENSE_HEADER_END@
  22  */
  23 /*      @(#)hfs_readwrite.c     1.0
  24  *
  25  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  26  *
  27  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  28  *
  29  */
  30
  31 #include <sys/param.h>
  32 #include <sys/systm.h>
  33 #include <sys/resourcevar.h>
  34 #include <sys/kernel.h>
  35 #include <sys/fcntl.h>
  36 #include <sys/filedesc.h>
  37 #include <sys/stat.h>
  38 #include <sys/buf.h>
  39 #include <sys/proc.h>
  40 #include <sys/kauth.h>
  41 #include <sys/vnode.h>
  42 #include <sys/uio.h>
  43 #include <sys/vfs_context.h>
  44
  45 #include <miscfs/specfs/specdev.h>
  46
  47 #include <sys/ubc.h>
  48 #include <vm/vm_pageout.h>
  49 #include <vm/vm_kern.h>
  50
  51 #include <sys/kdebug.h>
  52
  53 #include        "hfs.h"
  54 #include        "hfs_endian.h"
  55 #include  "hfs_fsctl.h"
  56 #include        "hfs_quota.h"
  57 #include        "hfscommon/headers/FileMgrInternal.h"
  58 #include        "hfscommon/headers/BTreesInternal.h"
  59 #include        "hfs_cnode.h"
  60 #include        "hfs_dbg.h"
  61
  62 extern int overflow_extents(struct filefork *fp);
  63
  64 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  65
  66 enum {
  67         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  68 };
  69
  70 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  71
  72 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  73
  74
  75 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  76 static int  hfs_clonefile(struct vnode *, int, int, int);
  77 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  78
  79
  80 /*****************************************************************************
  81 *
  82 *       I/O Operations on vnodes
  83 *
  84 *****************************************************************************/
  85 int  hfs_vnop_read(struct vnop_read_args *);
  86 int  hfs_vnop_write(struct vnop_write_args *);
  87 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  88 int  hfs_vnop_select(struct vnop_select_args *);
  89 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  90 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  91 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  92 int  hfs_vnop_strategy(struct vnop_strategy_args *);
  93 int  hfs_vnop_allocate(struct vnop_allocate_args *);
  94 int  hfs_vnop_pagein(struct vnop_pagein_args *);
  95 int  hfs_vnop_pageout(struct vnop_pageout_args *);
  96 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
  97
  98
  99 /*
 100  * Read data from a file.
 101  */
 102 int
 103 hfs_vnop_read(struct vnop_read_args *ap)
 104 {
 105         uio_t uio = ap->a_uio;
 106         struct vnode *vp = ap->a_vp;
 107         struct cnode *cp;
 108         struct filefork *fp;
 109         struct hfsmount *hfsmp;
 110         off_t filesize;
 111         off_t filebytes;
 112         off_t start_resid = uio_resid(uio);
 113         off_t offset = uio_offset(uio);
 114         int retval = 0;
 115
 116
 117         /* Preflight checks */
 118         if (!vnode_isreg(vp)) {
 119                 /* can only read regular files */
 120                 if (vnode_isdir(vp))
 121                         return (EISDIR);
 122                 else
 123                         return (EPERM);
 124         }
 125         if (start_resid == 0)
 126                 return (0);             /* Nothing left to do */
 127         if (offset < 0)
 128                 return (EINVAL);        /* cant read from a negative offset */
 129
 130         cp = VTOC(vp);
 131         fp = VTOF(vp);
 132         hfsmp = VTOHFS(vp);
 133
 134         /* Protect against a size change. */
 135         hfs_lock_truncate(cp, 0);
 136
 137         filesize = fp->ff_size;
 138         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 139         if (offset > filesize) {
 140                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 141                     (offset > (off_t)MAXHFSFILESIZE)) {
 142                         retval = EFBIG;
 143                 }
 144                 goto exit;
 145         }
 146
 147         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 148                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 149
 150         retval = cluster_read(vp, uio, filesize, 0);
 151
 152         cp->c_touch_acctime = TRUE;
 153
 154         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 155                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 156
 157         /*
 158          * Keep track blocks read
 159          */
 160         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 161                 int took_cnode_lock = 0;
 162                 off_t bytesread;
 163
 164                 bytesread = start_resid - uio_resid(uio);
 165
 166                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 167                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 168                         hfs_lock(cp, HFS_FORCE_LOCK);
 169                         took_cnode_lock = 1;
 170                 }
 171                 /*
 172                  * If this file hasn't been seen since the start of
 173                  * the current sampling period then start over.
 174                  */
 175                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 176                         struct timeval tv;
 177
 178                         fp->ff_bytesread = bytesread;
 179                         microtime(&tv);
 180                         cp->c_atime = tv.tv_sec;
 181                 } else {
 182                         fp->ff_bytesread += bytesread;
 183                 }
 184                 if (took_cnode_lock)
 185                         hfs_unlock(cp);
 186         }
 187 exit:
 188         hfs_unlock_truncate(cp);
 189         return (retval);
 190 }
 191
 192 /*
 193  * Write data to a file.
 194  */
 195 int
 196 hfs_vnop_write(struct vnop_write_args *ap)
 197 {
 198         uio_t uio = ap->a_uio;
 199         struct vnode *vp = ap->a_vp;
 200         struct cnode *cp;
 201         struct filefork *fp;
 202         struct hfsmount *hfsmp;
 203         kauth_cred_t cred = NULL;
 204         off_t origFileSize;
 205         off_t writelimit;
 206         off_t bytesToAdd;
 207         off_t actualBytesAdded;
 208         off_t filebytes;
 209         off_t offset;
 210         size_t resid;
 211         int eflags;
 212         int ioflag = ap->a_ioflag;
 213         int retval = 0;
 214         int lockflags;
 215         int cnode_locked = 0;
 216
 217         // LP64todo - fix this! uio_resid may be 64-bit value
 218         resid = uio_resid(uio);
 219         offset = uio_offset(uio);
 220
 221         if (offset < 0)
 222                 return (EINVAL);
 223         if (resid == 0)
 224                 return (E_NONE);
 225         if (!vnode_isreg(vp))
 226                 return (EPERM);  /* Can only write regular files */
 227
 228         /* Protect against a size change. */
 229         hfs_lock_truncate(VTOC(vp), TRUE);
 230
 231         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 232                 hfs_unlock_truncate(VTOC(vp));
 233                 return (retval);
 234         }
 235         cnode_locked = 1;
 236         cp = VTOC(vp);
 237         fp = VTOF(vp);
 238         hfsmp = VTOHFS(vp);
 239         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 240
 241         if (ioflag & IO_APPEND) {
 242                 uio_setoffset(uio, fp->ff_size);
 243                 offset = fp->ff_size;
 244         }
 245         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 246                 retval = EPERM;
 247                 goto exit;
 248         }
 249
 250         origFileSize = fp->ff_size;
 251         eflags = kEFDeferMask;  /* defer file block allocations */
 252
 253 #ifdef HFS_SPARSE_DEV
 254         /*
 255          * When the underlying device is sparse and space
 256          * is low (< 8MB), stop doing delayed allocations
 257          * and begin doing synchronous I/O.
 258          */
 259         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 260             (hfs_freeblks(hfsmp, 0) < 2048)) {
 261                 eflags &= ~kEFDeferMask;
 262                 ioflag |= IO_SYNC;
 263         }
 264 #endif /* HFS_SPARSE_DEV */
 265
 266         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 267                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 268
 269         /* Now test if we need to extend the file */
 270         /* Doing so will adjust the filebytes for us */
 271
 272         writelimit = offset + resid;
 273         if (writelimit <= filebytes)
 274                 goto sizeok;
 275
 276         cred = vfs_context_ucred(ap->a_context);
 277 #if QUOTA
 278         bytesToAdd = writelimit - filebytes;
 279         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 280                            cred, 0);
 281         if (retval)
 282                 goto exit;
 283 #endif /* QUOTA */
 284
 285         if (hfs_start_transaction(hfsmp) != 0) {
 286                 retval = EINVAL;
 287                 goto exit;
 288         }
 289
 290         while (writelimit > filebytes) {
 291                 bytesToAdd = writelimit - filebytes;
 292                 if (cred && suser(cred, NULL) != 0)
 293                         eflags |= kEFReserveMask;
 294
 295                 /* Protect extents b-tree and allocation bitmap */
 296                 lockflags = SFL_BITMAP;
 297                 if (overflow_extents(fp))
 298                         lockflags |= SFL_EXTENTS;
 299                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 300
 301                 /* Files that are changing size are not hot file candidates. */
 302                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 303                         fp->ff_bytesread = 0;
 304                 }
 305                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 306                                 0, eflags, &actualBytesAdded));
 307
 308                 hfs_systemfile_unlock(hfsmp, lockflags);
 309
 310                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 311                         retval = ENOSPC;
 312                 if (retval != E_NONE)
 313                         break;
 314                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 315                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 316                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 317         }
 318         (void) hfs_update(vp, TRUE);
 319         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 320         (void) hfs_end_transaction(hfsmp);
 321
 322 sizeok:
 323         if (retval == E_NONE) {
 324                 off_t filesize;
 325                 off_t zero_off;
 326                 off_t tail_off;
 327                 off_t inval_start;
 328                 off_t inval_end;
 329                 off_t io_start;
 330                 int lflag;
 331                 struct rl_entry *invalid_range;
 332
 333                 if (writelimit > fp->ff_size)
 334                         filesize = writelimit;
 335                 else
 336                         filesize = fp->ff_size;
 337
 338                 lflag = (ioflag & IO_SYNC);
 339
 340                 if (offset <= fp->ff_size) {
 341                         zero_off = offset & ~PAGE_MASK_64;
 342
 343                         /* Check to see whether the area between the zero_offset and the start
 344                            of the transfer to see whether is invalid and should be zero-filled
 345                            as part of the transfer:
 346                          */
 347                         if (offset > zero_off) {
 348                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 349                                         lflag |= IO_HEADZEROFILL;
 350                         }
 351                 } else {
 352                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 353
 354                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 355                            read without being zeroed.  The current last block is filled with zeroes
 356                            if it holds valid data but in all cases merely do a little bookkeeping
 357                            to track the area from the end of the current last page to the start of
 358                            the area actually written.  For the same reason only the bytes up to the
 359                            start of the page where this write will start is invalidated; any remainder
 360                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 361
 362                            Note that inval_start, the start of the page after the current EOF,
 363                            may be past the start of the write, in which case the zeroing
 364                            will be handled by the cluser_write of the actual data.
 365                          */
 366                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 367                         inval_end = offset & ~PAGE_MASK_64;
 368                         zero_off = fp->ff_size;
 369
 370                         if ((fp->ff_size & PAGE_MASK_64) &&
 371                                 (rl_scan(&fp->ff_invalidranges,
 372                                                         eof_page_base,
 373                                                         fp->ff_size - 1,
 374                                                         &invalid_range) != RL_NOOVERLAP)) {
 375                                 /* The page containing the EOF is not valid, so the
 376                                    entire page must be made inaccessible now.  If the write
 377                                    starts on a page beyond the page containing the eof
 378                                    (inval_end > eof_page_base), add the
 379                                    whole page to the range to be invalidated.  Otherwise
 380                                    (i.e. if the write starts on the same page), zero-fill
 381                                    the entire page explicitly now:
 382                                  */
 383                                 if (inval_end > eof_page_base) {
 384                                         inval_start = eof_page_base;
 385                                 } else {
 386                                         zero_off = eof_page_base;
 387                                 };
 388                         };
 389
 390                         if (inval_start < inval_end) {
 391                                 struct timeval tv;
 392                                 /* There's some range of data that's going to be marked invalid */
 393
 394                                 if (zero_off < inval_start) {
 395                                         /* The pages between inval_start and inval_end are going to be invalidated,
 396                                            and the actual write will start on a page past inval_end.  Now's the last
 397                                            chance to zero-fill the page containing the EOF:
 398                                          */
 399                                         hfs_unlock(cp);
 400                                         cnode_locked = 0;
 401                                         retval = cluster_write(vp, (uio_t) 0,
 402                                                         fp->ff_size, inval_start,
 403                                                         zero_off, (off_t)0,
 404                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 405                                         hfs_lock(cp, HFS_FORCE_LOCK);
 406                                         cnode_locked = 1;
 407                                         if (retval) goto ioerr_exit;
 408                                         offset = uio_offset(uio);
 409                                 };
 410
 411                                 /* Mark the remaining area of the newly allocated space as invalid: */
 412                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 413                                 microuptime(&tv);
 414                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 415                                 zero_off = fp->ff_size = inval_end;
 416                         };
 417
 418                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 419                 };
 420
 421                 /* Check to see whether the area between the end of the write and the end of
 422                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 423                  */
 424                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 425                 if (tail_off > filesize) tail_off = filesize;
 426                 if (tail_off > writelimit) {
 427                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 428                                 lflag |= IO_TAILZEROFILL;
 429                         };
 430                 };
 431
 432                 /*
 433                  * if the write starts beyond the current EOF (possibly advanced in the
 434                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 435                  * to where the write begins:
 436                  *
 437                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 438                  *       before the current EOF it might be marked as invalid now and must be
 439                  *       made readable (removed from the invalid ranges) before cluster_write
 440                  *       tries to write it:
 441                  */
 442                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 443                 if (io_start < fp->ff_size) {
 444                         off_t io_end;
 445
 446                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 447                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 448                 };
 449
 450                 hfs_unlock(cp);
 451                 cnode_locked = 0;
 452                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 453                                 tail_off, lflag | IO_NOZERODIRTY);
 454                 offset = uio_offset(uio);
 455                 if (offset > fp->ff_size) {
 456                         fp->ff_size = offset;
 457
 458                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 459                         /* Files that are changing size are not hot file candidates. */
 460                         if (hfsmp->hfc_stage == HFC_RECORDING)
 461                                 fp->ff_bytesread = 0;
 462                 }
 463                 if (resid > uio_resid(uio)) {
 464                         cp->c_touch_chgtime = TRUE;
 465                         cp->c_touch_modtime = TRUE;
 466                 }
 467         }
 468         HFS_KNOTE(vp, NOTE_WRITE);
 469
 470 ioerr_exit:
 471         /*
 472          * If we successfully wrote any data, and we are not the superuser
 473          * we clear the setuid and setgid bits as a precaution against
 474          * tampering.
 475          */
 476         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 477                 cred = vfs_context_ucred(ap->a_context);
 478                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 479                         if (!cnode_locked) {
 480                                 hfs_lock(cp, HFS_FORCE_LOCK);
 481                                 cnode_locked = 1;
 482                         }
 483                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 484                 }
 485         }
 486         if (retval) {
 487                 if (ioflag & IO_UNIT) {
 488                         if (!cnode_locked) {
 489                                 hfs_lock(cp, HFS_FORCE_LOCK);
 490                                 cnode_locked = 1;
 491                         }
 492                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 493                                            0, ap->a_context);
 494                         // LP64todo - fix this!  resid needs to by user_ssize_t
 495                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 496                         uio_setresid(uio, resid);
 497                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 498                 }
 499         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 500                 if (!cnode_locked) {
 501                         hfs_lock(cp, HFS_FORCE_LOCK);
 502                         cnode_locked = 1;
 503                 }
 504                 retval = hfs_update(vp, TRUE);
 505         }
 506         /* Updating vcbWrCnt doesn't need to be atomic. */
 507         hfsmp->vcbWrCnt++;
 508
 509         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 510                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 511 exit:
 512         if (cnode_locked)
 513                 hfs_unlock(cp);
 514         hfs_unlock_truncate(cp);
 515         return (retval);
 516 }
 517
 518 /* support for the "bulk-access" fcntl */
 519
 520 #define CACHE_ELEMS 64
 521 #define CACHE_LEVELS 16
 522 #define PARENT_IDS_FLAG 0x100
 523
 524 /* from hfs_attrlist.c */
 525 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 526                         mode_t obj_mode, struct mount *mp,
 527                         kauth_cred_t cred, struct proc *p);
 528
 529 /* from vfs/vfs_fsevents.c */
 530 extern char *get_pathbuff(void);
 531 extern void release_pathbuff(char *buff);
 532
 533 struct access_cache {
 534        int numcached;
 535        int cachehits; /* these two for statistics gathering */
 536        int lookups;
 537        unsigned int *acache;
 538        Boolean *haveaccess;
 539 };
 540
 541 struct access_t {
 542         uid_t     uid;              /* IN: effective user id */
 543         short     flags;            /* IN: access requested (i.e. R_OK) */
 544         short     num_groups;       /* IN: number of groups user belongs to */
 545         int       num_files;        /* IN: number of files to process */
 546         int       *file_ids;        /* IN: array of file ids */
 547         gid_t     *groups;          /* IN: array of groups */
 548         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 549 };
 550
 551 struct user_access_t {
 552         uid_t           uid;                    /* IN: effective user id */
 553         short           flags;                  /* IN: access requested (i.e. R_OK) */
 554         short           num_groups;             /* IN: number of groups user belongs to */
 555         int                     num_files;              /* IN: number of files to process */
 556         user_addr_t     file_ids;               /* IN: array of file ids */
 557         user_addr_t     groups;                 /* IN: array of groups */
 558         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 559 };
 560
 561 /*
 562  * Perform a binary search for the given parent_id. Return value is
 563  * found/not found boolean, and indexp will be the index of the item
 564  * or the index at which to insert the item if it's not found.
 565  */
 566 static int
 567 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 568 {
 569         unsigned int lo, hi;
 570         int index, matches = 0;
 571
 572         if (cache->numcached == 0) {
 573                 *indexp = 0;
 574                 return 0; // table is empty, so insert at index=0 and report no match
 575         }
 576
 577         if (cache->numcached > CACHE_ELEMS) {
 578                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 579                   cache->numcached, CACHE_ELEMS);*/
 580                 cache->numcached = CACHE_ELEMS;
 581         }
 582
 583         lo = 0;
 584         hi = cache->numcached - 1;
 585         index = -1;
 586
 587         /* perform binary search for parent_id */
 588         do {
 589                 unsigned int mid = (hi - lo)/2 + lo;
 590                 unsigned int this_id = cache->acache[mid];
 591
 592                 if (parent_id == this_id) {
 593                         index = mid;
 594                         break;
 595                 }
 596
 597                 if (parent_id < this_id) {
 598                         hi = mid;
 599                         continue;
 600                 }
 601
 602                 if (parent_id > this_id) {
 603                         lo = mid + 1;
 604                         continue;
 605                 }
 606         } while(lo < hi);
 607
 608         /* check if lo and hi converged on the match */
 609         if (parent_id == cache->acache[hi]) {
 610                 index = hi;
 611         }
 612
 613         /* if no existing entry found, find index for new one */
 614         if (index == -1) {
 615                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 616                 matches = 0;
 617         } else {
 618                 matches = 1;
 619         }
 620
 621         *indexp = index;
 622         return matches;
 623 }
 624
 625 /*
 626  * Add a node to the access_cache at the given index (or do a lookup first
 627  * to find the index if -1 is passed in). We currently do a replace rather
 628  * than an insert if the cache is full.
 629  */
 630 static void
 631 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 632 {
 633        int lookup_index = -1;
 634
 635        /* need to do a lookup first if -1 passed for index */
 636        if (index == -1) {
 637                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 638                        if (cache->haveaccess[lookup_index] != access) {
 639                                /* change access info for existing entry... should never happen */
 640                                cache->haveaccess[lookup_index] = access;
 641                        }
 642
 643                        /* mission accomplished */
 644                        return;
 645                } else {
 646                        index = lookup_index;
 647                }
 648
 649        }
 650
 651        /* if the cache is full, do a replace rather than an insert */
 652        if (cache->numcached >= CACHE_ELEMS) {
 653                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 654                cache->numcached = CACHE_ELEMS-1;
 655
 656                if (index > cache->numcached) {
 657                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 658                        index = cache->numcached;
 659                }
 660        } else if (index >= 0 && index < cache->numcached) {
 661                /* only do bcopy if we're inserting */
 662                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 663                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 664        }
 665
 666        cache->acache[index] = nodeID;
 667        cache->haveaccess[index] = access;
 668        cache->numcached++;
 669 }
 670
 671
 672 struct cinfo {
 673         uid_t   uid;
 674         gid_t   gid;
 675         mode_t  mode;
 676         cnid_t  parentcnid;
 677 };
 678
 679 static int
 680 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 681 {
 682         struct cinfo *cip = (struct cinfo *)arg;
 683
 684         cip->uid = attrp->ca_uid;
 685         cip->gid = attrp->ca_gid;
 686         cip->mode = attrp->ca_mode;
 687         cip->parentcnid = descp->cd_parentcnid;
 688
 689         return (0);
 690 }
 691
 692 /*
 693  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 694  * isn't incore, then go to the catalog.
 695  */
 696 static int
 697 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 698                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 699 {
 700         int error = 0;
 701
 702         /* if this id matches the one the fsctl was called with, skip the lookup */
 703         if (cnid == skip_cp->c_cnid) {
 704                 cnattrp->ca_uid = skip_cp->c_uid;
 705                 cnattrp->ca_gid = skip_cp->c_gid;
 706                 cnattrp->ca_mode = skip_cp->c_mode;
 707                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 708         } else {
 709                 struct cinfo c_info;
 710
 711                 /* otherwise, check the cnode hash incase the file/dir is incore */
 712                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 713                         cnattrp->ca_uid = c_info.uid;
 714                         cnattrp->ca_gid = c_info.gid;
 715                         cnattrp->ca_mode = c_info.mode;
 716                         keyp->hfsPlus.parentID = c_info.parentcnid;
 717                 } else {
 718                         int lockflags;
 719
 720                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 721
 722                         /* lookup this cnid in the catalog */
 723                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 724
 725                         hfs_systemfile_unlock(hfsmp, lockflags);
 726
 727                         cache->lookups++;
 728                 }
 729         }
 730
 731         return (error);
 732 }
 733
 734 /*
 735  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 736  * up to CACHE_LEVELS as we progress towards the root.
 737  */
 738 static int
 739 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 740                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 741 {
 742        int                     myErr = 0;
 743        int                     myResult;
 744        HFSCatalogNodeID        thisNodeID;
 745        unsigned long           myPerms;
 746        struct cat_attr         cnattr;
 747        int                     cache_index = -1;
 748        CatalogKey              catkey;
 749
 750        int i = 0, ids_to_cache = 0;
 751        int parent_ids[CACHE_LEVELS];
 752
 753        /* root always has access */
 754        if (!suser(myp_ucred, NULL)) {
 755                return (1);
 756        }
 757
 758        thisNodeID = nodeID;
 759        while (thisNodeID >=  kRootDirID) {
 760                myResult = 0;   /* default to "no access" */
 761
 762                /* check the cache before resorting to hitting the catalog */
 763
 764                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 765                 * to look any further after hitting cached dir */
 766
 767                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 768                        cache->cachehits++;
 769                        myResult = cache->haveaccess[cache_index];
 770                        goto ExitThisRoutine;
 771                }
 772
 773                /* remember which parents we want to cache */
 774                if (ids_to_cache < CACHE_LEVELS) {
 775                        parent_ids[ids_to_cache] = thisNodeID;
 776                        ids_to_cache++;
 777                }
 778
 779                /* do the lookup (checks the cnode hash, then the catalog) */
 780                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 781                if (myErr) {
 782                        goto ExitThisRoutine; /* no access */
 783                }
 784
 785                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 786                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 787                                                  myp_ucred, theProcPtr);
 788
 789                if ( (myPerms & X_OK) == 0 ) {
 790                        myResult = 0;
 791                        goto ExitThisRoutine;   /* no access */
 792                }
 793
 794                /* up the hierarchy we go */
 795                thisNodeID = catkey.hfsPlus.parentID;
 796        }
 797
 798        /* if here, we have access to this node */
 799        myResult = 1;
 800
 801  ExitThisRoutine:
 802        if (myErr) {
 803                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 804                myResult = 0;
 805        }
 806        *err = myErr;
 807
 808        /* cache the parent directory(ies) */
 809        for (i = 0; i < ids_to_cache; i++) {
 810                /* small optimization: get rid of double-lookup for all these */
 811                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 812                add_node(cache, -1, parent_ids[i], myResult);
 813        }
 814
 815        return (myResult);
 816 }
 817 /* end "bulk-access" support */
 818
 819
 820
 821 /*
 822  * Callback for use with freeze ioctl.
 823  */
 824 static int
 825 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 826 {
 827         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 828
 829         return 0;
 830 }
 831
 832 /*
 833  * Control filesystem operating characteristics.
 834  */
 835 int
 836 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 837                 vnode_t a_vp;
 838                 int  a_command;
 839                 caddr_t  a_data;
 840                 int  a_fflag;
 841                 vfs_context_t a_context;
 842         } */ *ap)
 843 {
 844         struct vnode * vp = ap->a_vp;
 845         struct hfsmount *hfsmp = VTOHFS(vp);
 846         vfs_context_t context = ap->a_context;
 847         kauth_cred_t cred = vfs_context_ucred(context);
 848         proc_t p = vfs_context_proc(context);
 849         struct vfsstatfs *vfsp;
 850         boolean_t is64bit;
 851
 852         is64bit = proc_is64bit(p);
 853
 854         switch (ap->a_command) {
 855
 856         case HFS_RESIZE_VOLUME: {
 857                 u_int64_t newsize;
 858                 u_int64_t cursize;
 859
 860                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 861                 if (suser(cred, NULL) &&
 862                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 863                         return (EACCES); /* must be owner of file system */
 864                 }
 865                 if (!vnode_isvroot(vp)) {
 866                         return (EINVAL);
 867                 }
 868                 newsize = *(u_int64_t *)ap->a_data;
 869                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 870
 871                 if (newsize > cursize) {
 872                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 873                 } else if (newsize < cursize) {
 874                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 875                 } else {
 876                         return (0);
 877                 }
 878         }
 879         case HFS_CHANGE_NEXT_ALLOCATION: {
 880                 u_int32_t location;
 881
 882                 if (vnode_vfsisrdonly(vp)) {
 883                         return (EROFS);
 884                 }
 885                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 886                 if (suser(cred, NULL) &&
 887                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 888                         return (EACCES); /* must be owner of file system */
 889                 }
 890                 if (!vnode_isvroot(vp)) {
 891                         return (EINVAL);
 892                 }
 893                 location = *(u_int32_t *)ap->a_data;
 894                 if (location > hfsmp->totalBlocks - 1) {
 895                         return (EINVAL);
 896                 }
 897                 /* Return previous value. */
 898                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 899                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 900                 hfsmp->nextAllocation = location;
 901                 hfsmp->vcbFlags |= 0xFF00;
 902                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 903                 return (0);
 904         }
 905
 906 #ifdef HFS_SPARSE_DEV
 907         case HFS_SETBACKINGSTOREINFO: {
 908                 struct vnode * bsfs_rootvp;
 909                 struct vnode * di_vp;
 910                 struct hfs_backingstoreinfo *bsdata;
 911                 int error = 0;
 912
 913                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 914                         return (EALREADY);
 915                 }
 916                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 917                 if (suser(cred, NULL) &&
 918                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 919                         return (EACCES); /* must be owner of file system */
 920                 }
 921                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 922                 if (bsdata == NULL) {
 923                         return (EINVAL);
 924                 }
 925                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 926                         return (error);
 927                 }
 928                 if ((error = vnode_getwithref(di_vp))) {
 929                         file_drop(bsdata->backingfd);
 930                         return(error);
 931                 }
 932
 933                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 934                         (void)vnode_put(di_vp);
 935                         file_drop(bsdata->backingfd);
 936                         return (EINVAL);
 937                 }
 938
 939                 /*
 940                  * Obtain the backing fs root vnode and keep a reference
 941                  * on it.  This reference will be dropped in hfs_unmount.
 942                  */
 943                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 944                 if (error) {
 945                         (void)vnode_put(di_vp);
 946                         file_drop(bsdata->backingfd);
 947                         return (error);
 948                 }
 949                 vnode_ref(bsfs_rootvp);
 950                 vnode_put(bsfs_rootvp);
 951
 952                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 953                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 954                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 955                 hfsmp->hfs_sparsebandblks *= 4;
 956
 957                 (void)vnode_put(di_vp);
 958                 file_drop(bsdata->backingfd);
 959                 return (0);
 960         }
 961         case HFS_CLRBACKINGSTOREINFO: {
 962                 struct vnode * tmpvp;
 963
 964                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 965                 if (suser(cred, NULL) &&
 966                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 967                         return (EACCES); /* must be owner of file system */
 968                 }
 969                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 970                     hfsmp->hfs_backingfs_rootvp) {
 971
 972                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 973                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 974                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 975                         hfsmp->hfs_sparsebandblks = 0;
 976                         vnode_rele(tmpvp);
 977                 }
 978                 return (0);
 979         }
 980 #endif /* HFS_SPARSE_DEV */
 981
 982         case F_FREEZE_FS: {
 983                 struct mount *mp;
 984                 task_t task;
 985
 986                 if (!is_suser())
 987                         return (EACCES);
 988
 989                 mp = vnode_mount(vp);
 990                 hfsmp = VFSTOHFS(mp);
 991
 992                 if (!(hfsmp->jnl))
 993                         return (ENOTSUP);
 994
 995                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
 996
 997                 task = current_task();
 998                 task_working_set_disable(task);
 999
1000                 // flush things before we get started to try and prevent
1001                 // dirty data from being paged out while we're frozen.
1002                 // note: can't do this after taking the lock as it will
1003                 // deadlock against ourselves.
1004                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1005                 hfs_global_exclusive_lock_acquire(hfsmp);
1006                 journal_flush(hfsmp->jnl);
1007
1008                 // don't need to iterate on all vnodes, we just need to
1009                 // wait for writes to the system files and the device vnode
1010                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1011                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1012                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1013                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1014                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1015                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1016                 if (hfsmp->hfs_attribute_vp)
1017                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1018                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1019
1020                 hfsmp->hfs_freezing_proc = current_proc();
1021
1022                 return (0);
1023         }
1024
1025         case F_THAW_FS: {
1026                 if (!is_suser())
1027                         return (EACCES);
1028
1029                 // if we're not the one who froze the fs then we
1030                 // can't thaw it.
1031                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1032                     return EPERM;
1033                 }
1034
1035                 // NOTE: if you add code here, also go check the
1036                 //       code that "thaws" the fs in hfs_vnop_close()
1037                 //
1038                 hfsmp->hfs_freezing_proc = NULL;
1039                 hfs_global_exclusive_lock_release(hfsmp);
1040                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1041
1042                 return (0);
1043         }
1044
1045 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1046 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1047
1048         case HFS_BULKACCESS_FSCTL:
1049         case HFS_BULKACCESS: {
1050                 /*
1051                  * NOTE: on entry, the vnode is locked. Incase this vnode
1052                  * happens to be in our list of file_ids, we'll note it
1053                  * avoid calling hfs_chashget_nowait() on that id as that
1054                  * will cause a "locking against myself" panic.
1055                  */
1056                 Boolean check_leaf = true;
1057
1058                 struct user_access_t *user_access_structp;
1059                 struct user_access_t tmp_user_access_t;
1060                 struct access_cache cache;
1061
1062                 int error = 0, i;
1063
1064                 dev_t dev = VTOC(vp)->c_dev;
1065
1066                 short flags;
1067                 struct ucred myucred;   /* XXX ILLEGAL */
1068                 int num_files;
1069                 int *file_ids = NULL;
1070                 short *access = NULL;
1071
1072                 cnid_t cnid;
1073                 cnid_t prevParent_cnid = 0;
1074                 unsigned long myPerms;
1075                 short myaccess = 0;
1076                 struct cat_attr cnattr;
1077                 CatalogKey catkey;
1078                 struct cnode *skip_cp = VTOC(vp);
1079                 struct vfs_context      my_context;
1080
1081                 /* first, return error if not run as root */
1082                 if (cred->cr_ruid != 0) {
1083                         return EPERM;
1084                 }
1085
1086                 /* initialize the local cache and buffers */
1087                 cache.numcached = 0;
1088                 cache.cachehits = 0;
1089                 cache.lookups = 0;
1090
1091                 file_ids = (int *) get_pathbuff();
1092                 access = (short *) get_pathbuff();
1093                 cache.acache = (int *) get_pathbuff();
1094                 cache.haveaccess = (Boolean *) get_pathbuff();
1095
1096                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1097                         release_pathbuff((char *) file_ids);
1098                         release_pathbuff((char *) access);
1099                         release_pathbuff((char *) cache.acache);
1100                         release_pathbuff((char *) cache.haveaccess);
1101
1102                         return ENOMEM;
1103                 }
1104
1105                 /* struct copyin done during dispatch... need to copy file_id array separately */
1106                 if (ap->a_data == NULL) {
1107                         error = EINVAL;
1108                         goto err_exit_bulk_access;
1109                 }
1110
1111                 if (is64bit) {
1112                         user_access_structp = (struct user_access_t *)ap->a_data;
1113                 }
1114                 else {
1115                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1116                         tmp_user_access_t.uid = accessp->uid;
1117                         tmp_user_access_t.flags = accessp->flags;
1118                         tmp_user_access_t.num_groups = accessp->num_groups;
1119                         tmp_user_access_t.num_files = accessp->num_files;
1120                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1121                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1122                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1123                         user_access_structp = &tmp_user_access_t;
1124                 }
1125
1126                 num_files = user_access_structp->num_files;
1127                 if (num_files < 1) {
1128                         goto err_exit_bulk_access;
1129                 }
1130                 if (num_files > 256) {
1131                         error = EINVAL;
1132                         goto err_exit_bulk_access;
1133                 }
1134
1135                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1136                                                         num_files * sizeof(int)))) {
1137                         goto err_exit_bulk_access;
1138                 }
1139
1140                 /* fill in the ucred structure */
1141                 flags = user_access_structp->flags;
1142                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1143                         flags = R_OK;
1144                 }
1145
1146                 /* check if we've been passed leaf node ids or parent ids */
1147                 if (flags & PARENT_IDS_FLAG) {
1148                         check_leaf = false;
1149                 }
1150
1151                 memset(&myucred, 0, sizeof(myucred));
1152                 myucred.cr_ref = 1;
1153                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1154                 myucred.cr_ngroups = user_access_structp->num_groups;
1155                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1156                         myucred.cr_ngroups = 0;
1157                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1158                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1159                         goto err_exit_bulk_access;
1160                 }
1161                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1162
1163                 my_context.vc_proc = p;
1164                 my_context.vc_ucred = &myucred;
1165
1166                 /* Check access to each file_id passed in */
1167                 for (i = 0; i < num_files; i++) {
1168 #if 0
1169                         cnid = (cnid_t) file_ids[i];
1170
1171                         /* root always has access */
1172                         if (!suser(&myucred, NULL)) {
1173                                 access[i] = 0;
1174                                 continue;
1175                         }
1176
1177                         if (check_leaf) {
1178
1179                                 /* do the lookup (checks the cnode hash, then the catalog) */
1180                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1181                                 if (error) {
1182                                         access[i] = (short) error;
1183                                         continue;
1184                                 }
1185
1186                                 /* before calling CheckAccess(), check the target file for read access */
1187                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1188                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1189
1190
1191                                 /* fail fast if no access */
1192                                 if ((myPerms & flags) == 0) {
1193                                         access[i] = EACCES;
1194                                         continue;
1195                                 }
1196                         } else {
1197                                 /* we were passed an array of parent ids */
1198                                 catkey.hfsPlus.parentID = cnid;
1199                         }
1200
1201                         /* if the last guy had the same parent and had access, we're done */
1202                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1203                                 cache.cachehits++;
1204                                 access[i] = 0;
1205                                 continue;
1206                         }
1207
1208                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1209                                                    skip_cp, p, &myucred, dev);
1210
1211                         if ( myaccess ) {
1212                                 access[i] = 0; // have access.. no errors to report
1213                         } else {
1214                                 access[i] = (error != 0 ? (short) error : EACCES);
1215                         }
1216
1217                         prevParent_cnid = catkey.hfsPlus.parentID;
1218 #else
1219                         int myErr;
1220
1221                         cnid = (cnid_t)file_ids[i];
1222
1223                         while (cnid >= kRootDirID) {
1224                             /* get the vnode for this cnid */
1225                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1226                             if ( myErr ) {
1227                                 access[i] = EACCES;
1228                                 break;
1229                             }
1230
1231                             cnid = VTOC(vp)->c_parentcnid;
1232
1233                             hfs_unlock(VTOC(vp));
1234                             if (vnode_vtype(vp) == VDIR) {
1235                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1236                             } else {
1237                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1238                             }
1239                             vnode_put(vp);
1240                             access[i] = myErr;
1241                             if (myErr) {
1242                                 break;
1243                             }
1244                         }
1245 #endif
1246                 }
1247
1248                 /* copyout the access array */
1249                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1250                                      num_files * sizeof (short)))) {
1251                         goto err_exit_bulk_access;
1252                 }
1253
1254         err_exit_bulk_access:
1255
1256                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1257
1258                 release_pathbuff((char *) cache.acache);
1259                 release_pathbuff((char *) cache.haveaccess);
1260                 release_pathbuff((char *) file_ids);
1261                 release_pathbuff((char *) access);
1262
1263                 return (error);
1264         } /* HFS_BULKACCESS */
1265
1266         case HFS_SETACLSTATE: {
1267                 int state;
1268
1269                 if (ap->a_data == NULL) {
1270                         return (EINVAL);
1271                 }
1272
1273                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1274                 state = *(int *)ap->a_data;
1275
1276                 // super-user can enable or disable acl's on a volume.
1277                 // the volume owner can only enable acl's
1278                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1279                         return (EPERM);
1280                 }
1281                 if (state == 0 || state == 1)
1282                         return hfs_setextendedsecurity(hfsmp, state);
1283                 else
1284                         return (EINVAL);
1285         }
1286
1287         case F_FULLFSYNC: {
1288                 int error;
1289
1290                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1291                 if (error == 0) {
1292                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1293                         hfs_unlock(VTOC(vp));
1294                 }
1295
1296                 return error;
1297         }
1298
1299         case F_CHKCLEAN: {
1300                 register struct cnode *cp;
1301                 int error;
1302
1303                 if (!vnode_isreg(vp))
1304                         return EINVAL;
1305
1306                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1307                 if (error == 0) {
1308                         cp = VTOC(vp);
1309                         /*
1310                          * used by regression test to determine if
1311                          * all the dirty pages (via write) have been cleaned
1312                          * after a call to 'fsysnc'.
1313                          */
1314                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1315                         hfs_unlock(cp);
1316                 }
1317                 return (error);
1318         }
1319
1320         case F_RDADVISE: {
1321                 register struct radvisory *ra;
1322                 struct filefork *fp;
1323                 int error;
1324
1325                 if (!vnode_isreg(vp))
1326                         return EINVAL;
1327
1328                 ra = (struct radvisory *)(ap->a_data);
1329                 fp = VTOF(vp);
1330
1331                 /* Protect against a size change. */
1332                 hfs_lock_truncate(VTOC(vp), TRUE);
1333
1334                 if (ra->ra_offset >= fp->ff_size) {
1335                         error = EFBIG;
1336                 } else {
1337                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1338                 }
1339
1340                 hfs_unlock_truncate(VTOC(vp));
1341                 return (error);
1342         }
1343
1344         case F_READBOOTSTRAP:
1345         case F_WRITEBOOTSTRAP:
1346         {
1347             struct vnode *devvp = NULL;
1348             user_fbootstraptransfer_t *user_bootstrapp;
1349             int devBlockSize;
1350             int error;
1351             uio_t auio;
1352             daddr64_t blockNumber;
1353             u_long blockOffset;
1354             u_long xfersize;
1355             struct buf *bp;
1356             user_fbootstraptransfer_t user_bootstrap;
1357
1358                 if (!vnode_isvroot(vp))
1359                         return (EINVAL);
1360                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1361                  * to a user_fbootstraptransfer_t else we get a pointer to a
1362                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1363                  */
1364                 if (is64bit) {
1365                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1366                 }
1367                 else {
1368                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1369                         user_bootstrapp = &user_bootstrap;
1370                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1371                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1372                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1373                 }
1374                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1375                         return EINVAL;
1376
1377             devvp = VTOHFS(vp)->hfs_devvp;
1378                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1379                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1380                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1381                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1382
1383             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1384
1385             while (uio_resid(auio) > 0) {
1386                         blockNumber = uio_offset(auio) / devBlockSize;
1387                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1388                         if (error) {
1389                                 if (bp) buf_brelse(bp);
1390                                 uio_free(auio);
1391                                 return error;
1392                         };
1393
1394                         blockOffset = uio_offset(auio) % devBlockSize;
1395                         xfersize = devBlockSize - blockOffset;
1396                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1397                         if (error) {
1398                                 buf_brelse(bp);
1399                                 uio_free(auio);
1400                                 return error;
1401                         };
1402                         if (uio_rw(auio) == UIO_WRITE) {
1403                                 error = VNOP_BWRITE(bp);
1404                                 if (error) {
1405                                         uio_free(auio);
1406                         return error;
1407                                 }
1408                         } else {
1409                                 buf_brelse(bp);
1410                         };
1411                 };
1412                 uio_free(auio);
1413         };
1414         return 0;
1415
1416         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1417         {
1418                 if (is64bit) {
1419                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1420                 }
1421                 else {
1422                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1423                 }
1424                 return 0;
1425         }
1426
1427         case HFS_GET_MOUNT_TIME:
1428             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1429             break;
1430
1431         case HFS_GET_LAST_MTIME:
1432             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1433             break;
1434
1435         case HFS_SET_BOOT_INFO:
1436                 if (!vnode_isvroot(vp))
1437                         return(EINVAL);
1438                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1439                         return(EACCES); /* must be superuser or owner of filesystem */
1440                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1441                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1442                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1443                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1444                 break;
1445
1446         case HFS_GET_BOOT_INFO:
1447                 if (!vnode_isvroot(vp))
1448                         return(EINVAL);
1449                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1450                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1451                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1452                 break;
1453
1454         default:
1455                 return (ENOTTY);
1456         }
1457
1458     /* Should never get here */
1459         return 0;
1460 }
1461
1462 /*
1463  * select
1464  */
1465 int
1466 hfs_vnop_select(__unused struct vnop_select_args *ap)
1467 /*
1468         struct vnop_select_args {
1469                 vnode_t a_vp;
1470                 int  a_which;
1471                 int  a_fflags;
1472                 void *a_wql;
1473                 vfs_context_t a_context;
1474         };
1475 */
1476 {
1477         /*
1478          * We should really check to see if I/O is possible.
1479          */
1480         return (1);
1481 }
1482
1483 /*
1484  * Converts a logical block number to a physical block, and optionally returns
1485  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1486  * The physical block number is based on the device block size, currently its 512.
1487  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1488  */
1489 int
1490 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1491 {
1492         struct cnode *cp = VTOC(vp);
1493         struct filefork *fp = VTOF(vp);
1494         struct hfsmount *hfsmp = VTOHFS(vp);
1495         int  retval = E_NONE;
1496         daddr_t  logBlockSize;
1497         size_t  bytesContAvail = 0;
1498         off_t  blockposition;
1499         int lockExtBtree;
1500         int lockflags = 0;
1501
1502         /*
1503          * Check for underlying vnode requests and ensure that logical
1504          * to physical mapping is requested.
1505          */
1506         if (vpp != NULL)
1507                 *vpp = cp->c_devvp;
1508         if (bnp == NULL)
1509                 return (0);
1510
1511         logBlockSize = GetLogicalBlockSize(vp);
1512         blockposition = (off_t)bn * (off_t)logBlockSize;
1513
1514         lockExtBtree = overflow_extents(fp);
1515
1516         if (lockExtBtree)
1517                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1518
1519         retval = MacToVFSError(
1520                             MapFileBlockC (HFSTOVCB(hfsmp),
1521                                             (FCB*)fp,
1522                                             MAXPHYSIO,
1523                                             blockposition,
1524                                             bnp,
1525                                             &bytesContAvail));
1526
1527         if (lockExtBtree)
1528                 hfs_systemfile_unlock(hfsmp, lockflags);
1529
1530         if (retval == E_NONE) {
1531                 /* Figure out how many read ahead blocks there are */
1532                 if (runp != NULL) {
1533                         if (can_cluster(logBlockSize)) {
1534                                 /* Make sure this result never goes negative: */
1535                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1536                         } else {
1537                                 *runp = 0;
1538                         }
1539                 }
1540         }
1541         return (retval);
1542 }
1543
1544 /*
1545  * Convert logical block number to file offset.
1546  */
1547 int
1548 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1549 /*
1550         struct vnop_blktooff_args {
1551                 vnode_t a_vp;
1552                 daddr64_t a_lblkno;
1553                 off_t *a_offset;
1554         };
1555 */
1556 {
1557         if (ap->a_vp == NULL)
1558                 return (EINVAL);
1559         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1560
1561         return(0);
1562 }
1563
1564 /*
1565  * Convert file offset to logical block number.
1566  */
1567 int
1568 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1569 /*
1570         struct vnop_offtoblk_args {
1571                 vnode_t a_vp;
1572                 off_t a_offset;
1573                 daddr64_t *a_lblkno;
1574         };
1575 */
1576 {
1577         if (ap->a_vp == NULL)
1578                 return (EINVAL);
1579         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1580
1581         return(0);
1582 }
1583
1584 /*
1585  * Map file offset to physical block number.
1586  *
1587  * System file cnodes are expected to be locked (shared or exclusive).
1588  */
1589 int
1590 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1591 /*
1592         struct vnop_blockmap_args {
1593                 vnode_t a_vp;
1594                 off_t a_foffset;
1595                 size_t a_size;
1596                 daddr64_t *a_bpn;
1597                 size_t *a_run;
1598                 void *a_poff;
1599                 int a_flags;
1600                 vfs_context_t a_context;
1601         };
1602 */
1603 {
1604         struct vnode *vp = ap->a_vp;
1605         struct cnode *cp;
1606         struct filefork *fp;
1607         struct hfsmount *hfsmp;
1608         size_t bytesContAvail = 0;
1609         int retval = E_NONE;
1610         int syslocks = 0;
1611         int lockflags = 0;
1612         struct rl_entry *invalid_range;
1613         enum rl_overlaptype overlaptype;
1614         int started_tr = 0;
1615         int tooklock = 0;
1616
1617         /* Do not allow blockmap operation on a directory */
1618         if (vnode_isdir(vp)) {
1619                 return (ENOTSUP);
1620         }
1621
1622         /*
1623          * Check for underlying vnode requests and ensure that logical
1624          * to physical mapping is requested.
1625          */
1626         if (ap->a_bpn == NULL)
1627                 return (0);
1628
1629         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1630                 if (VTOC(vp)->c_lockowner != current_thread()) {
1631                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1632                         tooklock = 1;
1633                 } else {
1634                         cp = VTOC(vp);
1635                         panic("blockmap: %s cnode lock already held!\n",
1636                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1637                 }
1638         }
1639         hfsmp = VTOHFS(vp);
1640         cp = VTOC(vp);
1641         fp = VTOF(vp);
1642
1643 retry:
1644         if (fp->ff_unallocblocks) {
1645                 if (hfs_start_transaction(hfsmp) != 0) {
1646                         retval = EINVAL;
1647                         goto exit;
1648                 } else {
1649                         started_tr = 1;
1650                 }
1651                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1652
1653         } else if (overflow_extents(fp)) {
1654                 syslocks = SFL_EXTENTS;
1655         }
1656
1657         if (syslocks)
1658                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1659
1660         /*
1661          * Check for any delayed allocations.
1662          */
1663         if (fp->ff_unallocblocks) {
1664                 SInt64 actbytes;
1665                 u_int32_t loanedBlocks;
1666
1667                 //
1668                 // Make sure we have a transaction.  It's possible
1669                 // that we came in and fp->ff_unallocblocks was zero
1670                 // but during the time we blocked acquiring the extents
1671                 // btree, ff_unallocblocks became non-zero and so we
1672                 // will need to start a transaction.
1673                 //
1674                 if (started_tr == 0) {
1675                         if (syslocks) {
1676                                 hfs_systemfile_unlock(hfsmp, lockflags);
1677                                 syslocks = 0;
1678                         }
1679                         goto retry;
1680                 }
1681
1682                 /*
1683                  * Note: ExtendFileC will Release any blocks on loan and
1684                  * aquire real blocks.  So we ask to extend by zero bytes
1685                  * since ExtendFileC will account for the virtual blocks.
1686                  */
1687
1688                 loanedBlocks = fp->ff_unallocblocks;
1689                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1690                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1691
1692                 if (retval) {
1693                         fp->ff_unallocblocks = loanedBlocks;
1694                         cp->c_blocks += loanedBlocks;
1695                         fp->ff_blocks += loanedBlocks;
1696
1697                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1698                         hfsmp->loanedBlocks += loanedBlocks;
1699                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1700                 }
1701
1702                 if (retval) {
1703                         hfs_systemfile_unlock(hfsmp, lockflags);
1704                         cp->c_flag |= C_MODIFIED;
1705                         if (started_tr) {
1706                                 (void) hfs_update(vp, TRUE);
1707                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1708
1709                                 hfs_end_transaction(hfsmp);
1710                         }
1711                         goto exit;
1712                 }
1713         }
1714
1715         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1716                                ap->a_bpn, &bytesContAvail);
1717         if (syslocks) {
1718                 hfs_systemfile_unlock(hfsmp, lockflags);
1719                 syslocks = 0;
1720         }
1721
1722         if (started_tr) {
1723                 (void) hfs_update(vp, TRUE);
1724                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1725                 hfs_end_transaction(hfsmp);
1726                 started_tr = 0;
1727         }
1728         if (retval) {
1729                 goto exit;
1730         }
1731
1732         /* Adjust the mapping information for invalid file ranges: */
1733         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1734                               ap->a_foffset + (off_t)bytesContAvail - 1,
1735                               &invalid_range);
1736         if (overlaptype != RL_NOOVERLAP) {
1737                 switch(overlaptype) {
1738                 case RL_MATCHINGOVERLAP:
1739                 case RL_OVERLAPCONTAINSRANGE:
1740                 case RL_OVERLAPSTARTSBEFORE:
1741                         /* There's no valid block for this byte offset: */
1742                         *ap->a_bpn = (daddr64_t)-1;
1743                         /* There's no point limiting the amount to be returned
1744                          * if the invalid range that was hit extends all the way
1745                          * to the EOF (i.e. there's no valid bytes between the
1746                          * end of this range and the file's EOF):
1747                          */
1748                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1749                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1750                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1751                         }
1752                         break;
1753
1754                 case RL_OVERLAPISCONTAINED:
1755                 case RL_OVERLAPENDSAFTER:
1756                         /* The range of interest hits an invalid block before the end: */
1757                         if (invalid_range->rl_start == ap->a_foffset) {
1758                                 /* There's actually no valid information to be had starting here: */
1759                                 *ap->a_bpn = (daddr64_t)-1;
1760                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1761                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1762                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1763                                 }
1764                         } else {
1765                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1766                         }
1767                         break;
1768
1769                 case RL_NOOVERLAP:
1770                         break;
1771                 } /* end switch */
1772                 if (bytesContAvail > ap->a_size)
1773                         bytesContAvail = ap->a_size;
1774         }
1775         if (ap->a_run)
1776                 *ap->a_run = bytesContAvail;
1777
1778         if (ap->a_poff)
1779                 *(int *)ap->a_poff = 0;
1780 exit:
1781         if (tooklock)
1782                 hfs_unlock(cp);
1783
1784         return (MacToVFSError(retval));
1785 }
1786
1787
1788 /*
1789  * prepare and issue the I/O
1790  * buf_strategy knows how to deal
1791  * with requests that require
1792  * fragmented I/Os
1793  */
1794 int
1795 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1796 {
1797         buf_t   bp = ap->a_bp;
1798         vnode_t vp = buf_vnode(bp);
1799         struct cnode *cp = VTOC(vp);
1800
1801         return (buf_strategy(cp->c_devvp, ap));
1802 }
1803
1804
1805 static int
1806 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1807 {
1808         register struct cnode *cp = VTOC(vp);
1809         struct filefork *fp = VTOF(vp);
1810         struct proc *p = vfs_context_proc(context);;
1811         kauth_cred_t cred = vfs_context_ucred(context);
1812         int retval;
1813         off_t bytesToAdd;
1814         off_t actualBytesAdded;
1815         off_t filebytes;
1816         u_int64_t old_filesize;
1817         u_long fileblocks;
1818         int blksize;
1819         struct hfsmount *hfsmp;
1820         int lockflags;
1821
1822         blksize = VTOVCB(vp)->blockSize;
1823         fileblocks = fp->ff_blocks;
1824         filebytes = (off_t)fileblocks * (off_t)blksize;
1825         old_filesize = fp->ff_size;
1826
1827         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1828                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1829
1830         if (length < 0)
1831                 return (EINVAL);
1832
1833         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1834                 return (EFBIG);
1835
1836         hfsmp = VTOHFS(vp);
1837
1838         retval = E_NONE;
1839
1840         /* Files that are changing size are not hot file candidates. */
1841         if (hfsmp->hfc_stage == HFC_RECORDING) {
1842                 fp->ff_bytesread = 0;
1843         }
1844
1845         /*
1846          * We cannot just check if fp->ff_size == length (as an optimization)
1847          * since there may be extra physical blocks that also need truncation.
1848          */
1849 #if QUOTA
1850         if ((retval = hfs_getinoquota(cp)))
1851                 return(retval);
1852 #endif /* QUOTA */
1853
1854         /*
1855          * Lengthen the size of the file. We must ensure that the
1856          * last byte of the file is allocated. Since the smallest
1857          * value of ff_size is 0, length will be at least 1.
1858          */
1859         if (length > (off_t)fp->ff_size) {
1860 #if QUOTA
1861                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1862                                    cred, 0);
1863                 if (retval)
1864                         goto Err_Exit;
1865 #endif /* QUOTA */
1866                 /*
1867                  * If we don't have enough physical space then
1868                  * we need to extend the physical size.
1869                  */
1870                 if (length > filebytes) {
1871                         int eflags;
1872                         u_long blockHint = 0;
1873
1874                         /* All or nothing and don't round up to clumpsize. */
1875                         eflags = kEFAllMask | kEFNoClumpMask;
1876
1877                         if (cred && suser(cred, NULL) != 0)
1878                                 eflags |= kEFReserveMask;  /* keep a reserve */
1879
1880                         /*
1881                          * Allocate Journal and Quota files in metadata zone.
1882                          */
1883                         if (filebytes == 0 &&
1884                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1885                             hfs_virtualmetafile(cp)) {
1886                                 eflags |= kEFMetadataMask;
1887                                 blockHint = hfsmp->hfs_metazone_start;
1888                         }
1889                         if (hfs_start_transaction(hfsmp) != 0) {
1890                             retval = EINVAL;
1891                             goto Err_Exit;
1892                         }
1893
1894                         /* Protect extents b-tree and allocation bitmap */
1895                         lockflags = SFL_BITMAP;
1896                         if (overflow_extents(fp))
1897                                 lockflags |= SFL_EXTENTS;
1898                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1899
1900                         while ((length > filebytes) && (retval == E_NONE)) {
1901                                 bytesToAdd = length - filebytes;
1902                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1903                                                     (FCB*)fp,
1904                                                     bytesToAdd,
1905                                                     blockHint,
1906                                                     eflags,
1907                                                     &actualBytesAdded));
1908
1909                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1910                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1911                                         if (length > filebytes)
1912                                                 length = filebytes;
1913                                         break;
1914                                 }
1915                         } /* endwhile */
1916
1917                         hfs_systemfile_unlock(hfsmp, lockflags);
1918
1919                         if (hfsmp->jnl) {
1920                             (void) hfs_update(vp, TRUE);
1921                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1922                         }
1923
1924                         hfs_end_transaction(hfsmp);
1925
1926                         if (retval)
1927                                 goto Err_Exit;
1928
1929                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1930                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1931                 }
1932
1933                 if (!(flags & IO_NOZEROFILL)) {
1934                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1935                                 struct rl_entry *invalid_range;
1936                                 off_t zero_limit;
1937
1938                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1939                                 if (length < zero_limit) zero_limit = length;
1940
1941                                 if (length > (off_t)fp->ff_size) {
1942                                         struct timeval tv;
1943
1944                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1945                                         if ((fp->ff_size & PAGE_MASK_64) &&
1946                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1947                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1948
1949                                                 /* There's some valid data at the start of the (current) last page
1950                                                    of the file, so zero out the remainder of that page to ensure the
1951                                                    entire page contains valid data.  Since there is no invalid range
1952                                                    possible past the (current) eof, there's no need to remove anything
1953                                                    from the invalid range list before calling cluster_write():  */
1954                                                 hfs_unlock(cp);
1955                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1956                                                                 fp->ff_size, (off_t)0,
1957                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1958                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1959                                                 if (retval) goto Err_Exit;
1960
1961                                                 /* Merely invalidate the remaining area, if necessary: */
1962                                                 if (length > zero_limit) {
1963                                                         microuptime(&tv);
1964                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1965                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1966                                                 }
1967                                         } else {
1968                                         /* The page containing the (current) eof is invalid: just add the
1969                                            remainder of the page to the invalid list, along with the area
1970                                            being newly allocated:
1971                                          */
1972                                         microuptime(&tv);
1973                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1974                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1975                                         };
1976                                 }
1977                         } else {
1978                                         panic("hfs_truncate: invoked on non-UBC object?!");
1979                         };
1980                 }
1981                 cp->c_touch_modtime = TRUE;
1982                 fp->ff_size = length;
1983
1984                 /* Nested transactions will do their own ubc_setsize. */
1985                 if (!skipsetsize) {
1986                         /*
1987                          * ubc_setsize can cause a pagein here
1988                          * so we need to drop cnode lock.
1989                          */
1990                         hfs_unlock(cp);
1991                         ubc_setsize(vp, length);
1992                         hfs_lock(cp, HFS_FORCE_LOCK);
1993                 }
1994
1995         } else { /* Shorten the size of the file */
1996
1997                 if ((off_t)fp->ff_size > length) {
1998                         /*
1999                          * Any buffers that are past the truncation point need to be
2000                          * invalidated (to maintain buffer cache consistency).
2001                          */
2002
2003                          /* Nested transactions will do their own ubc_setsize. */
2004                          if (!skipsetsize) {
2005                                 /*
2006                                  * ubc_setsize can cause a pageout here
2007                                  * so we need to drop cnode lock.
2008                                  */
2009                                 hfs_unlock(cp);
2010                                 ubc_setsize(vp, length);
2011                                 hfs_lock(cp, HFS_FORCE_LOCK);
2012                         }
2013
2014                         /* Any space previously marked as invalid is now irrelevant: */
2015                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2016                 }
2017
2018                 /*
2019                  * Account for any unmapped blocks. Note that the new
2020                  * file length can still end up with unmapped blocks.
2021                  */
2022                 if (fp->ff_unallocblocks > 0) {
2023                         u_int32_t finalblks;
2024                         u_int32_t loanedBlocks;
2025
2026                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2027
2028                         loanedBlocks = fp->ff_unallocblocks;
2029                         cp->c_blocks -= loanedBlocks;
2030                         fp->ff_blocks -= loanedBlocks;
2031                         fp->ff_unallocblocks = 0;
2032
2033                         hfsmp->loanedBlocks -= loanedBlocks;
2034
2035                         finalblks = (length + blksize - 1) / blksize;
2036                         if (finalblks > fp->ff_blocks) {
2037                                 /* calculate required unmapped blocks */
2038                                 loanedBlocks = finalblks - fp->ff_blocks;
2039                                 hfsmp->loanedBlocks += loanedBlocks;
2040
2041                                 fp->ff_unallocblocks = loanedBlocks;
2042                                 cp->c_blocks += loanedBlocks;
2043                                 fp->ff_blocks += loanedBlocks;
2044                         }
2045                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2046                 }
2047
2048                 /*
2049                  * For a TBE process the deallocation of the file blocks is
2050                  * delayed until the file is closed.  And hfs_close calls
2051                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2052                  * isn't set, we make sure this isn't a TBE process.
2053                  */
2054                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2055 #if QUOTA
2056                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2057 #endif /* QUOTA */
2058                   if (hfs_start_transaction(hfsmp) != 0) {
2059                       retval = EINVAL;
2060                       goto Err_Exit;
2061                   }
2062
2063                         if (fp->ff_unallocblocks == 0) {
2064                                 /* Protect extents b-tree and allocation bitmap */
2065                                 lockflags = SFL_BITMAP;
2066                                 if (overflow_extents(fp))
2067                                         lockflags |= SFL_EXTENTS;
2068                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2069
2070                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2071                                                 (FCB*)fp, length, false));
2072
2073                                 hfs_systemfile_unlock(hfsmp, lockflags);
2074                         }
2075                         if (hfsmp->jnl) {
2076                                 if (retval == 0) {
2077                                         fp->ff_size = length;
2078                                 }
2079                                 (void) hfs_update(vp, TRUE);
2080                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2081                         }
2082
2083                         hfs_end_transaction(hfsmp);
2084
2085                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2086                         if (retval)
2087                                 goto Err_Exit;
2088 #if QUOTA
2089                         /* These are bytesreleased */
2090                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2091 #endif /* QUOTA */
2092                 }
2093                 /* Only set update flag if the logical length changes */
2094                 if (old_filesize != length)
2095                         cp->c_touch_modtime = TRUE;
2096                 fp->ff_size = length;
2097         }
2098         cp->c_touch_chgtime = TRUE;
2099         retval = hfs_update(vp, MNT_WAIT);
2100         if (retval) {
2101                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2102                      -1, -1, -1, retval, 0);
2103         }
2104
2105 Err_Exit:
2106
2107         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2108                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2109
2110         return (retval);
2111 }
2112
2113
2114
2115 /*
2116  * Truncate a cnode to at most length size, freeing (or adding) the
2117  * disk blocks.
2118  */
2119 __private_extern__
2120 int
2121 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2122              vfs_context_t context)
2123 {
2124         struct filefork *fp = VTOF(vp);
2125         off_t filebytes;
2126         u_long fileblocks;
2127         int blksize, error = 0;
2128         struct cnode *cp = VTOC(vp);
2129
2130         if (vnode_isdir(vp))
2131                 return (EISDIR);        /* cannot truncate an HFS directory! */
2132
2133         blksize = VTOVCB(vp)->blockSize;
2134         fileblocks = fp->ff_blocks;
2135         filebytes = (off_t)fileblocks * (off_t)blksize;
2136
2137         // have to loop truncating or growing files that are
2138         // really big because otherwise transactions can get
2139         // enormous and consume too many kernel resources.
2140
2141         if (length < filebytes) {
2142                 while (filebytes > length) {
2143                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2144                                 filebytes -= HFS_BIGFILE_SIZE;
2145                         } else {
2146                                 filebytes = length;
2147                         }
2148                         cp->c_flag |= C_FORCEUPDATE;
2149                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2150                         if (error)
2151                                 break;
2152                 }
2153         } else if (length > filebytes) {
2154                 while (filebytes < length) {
2155                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2156                                 filebytes += HFS_BIGFILE_SIZE;
2157                         } else {
2158                                 filebytes = length;
2159                         }
2160                         cp->c_flag |= C_FORCEUPDATE;
2161                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2162                         if (error)
2163                                 break;
2164                 }
2165         } else /* Same logical size */ {
2166
2167                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2168         }
2169         /* Files that are changing size are not hot file candidates. */
2170         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2171                 fp->ff_bytesread = 0;
2172         }
2173
2174         return (error);
2175 }
2176
2177
2178
2179 /*
2180  * Preallocate file storage space.
2181  */
2182 int
2183 hfs_vnop_allocate(struct vnop_allocate_args /* {
2184                 vnode_t a_vp;
2185                 off_t a_length;
2186                 u_int32_t  a_flags;
2187                 off_t *a_bytesallocated;
2188                 off_t a_offset;
2189                 vfs_context_t a_context;
2190         } */ *ap)
2191 {
2192         struct vnode *vp = ap->a_vp;
2193         struct cnode *cp;
2194         struct filefork *fp;
2195         ExtendedVCB *vcb;
2196         off_t length = ap->a_length;
2197         off_t startingPEOF;
2198         off_t moreBytesRequested;
2199         off_t actualBytesAdded;
2200         off_t filebytes;
2201         u_long fileblocks;
2202         int retval, retval2;
2203         UInt32 blockHint;
2204         UInt32 extendFlags;   /* For call to ExtendFileC */
2205         struct hfsmount *hfsmp;
2206         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2207         int lockflags;
2208
2209         *(ap->a_bytesallocated) = 0;
2210
2211         if (!vnode_isreg(vp))
2212                 return (EISDIR);
2213         if (length < (off_t)0)
2214                 return (EINVAL);
2215
2216         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2217                 return (retval);
2218         cp = VTOC(vp);
2219         fp = VTOF(vp);
2220         hfsmp = VTOHFS(vp);
2221         vcb = VTOVCB(vp);
2222
2223         fileblocks = fp->ff_blocks;
2224         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2225
2226         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2227                 retval = EINVAL;
2228                 goto Err_Exit;
2229         }
2230
2231         /* Fill in the flags word for the call to Extend the file */
2232
2233         extendFlags = kEFNoClumpMask;
2234         if (ap->a_flags & ALLOCATECONTIG)
2235                 extendFlags |= kEFContigMask;
2236         if (ap->a_flags & ALLOCATEALL)
2237                 extendFlags |= kEFAllMask;
2238         if (cred && suser(cred, NULL) != 0)
2239                 extendFlags |= kEFReserveMask;
2240
2241         retval = E_NONE;
2242         blockHint = 0;
2243         startingPEOF = filebytes;
2244
2245         if (ap->a_flags & ALLOCATEFROMPEOF)
2246                 length += filebytes;
2247         else if (ap->a_flags & ALLOCATEFROMVOL)
2248                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2249
2250         /* If no changes are necesary, then we're done */
2251         if (filebytes == length)
2252                 goto Std_Exit;
2253
2254         /*
2255          * Lengthen the size of the file. We must ensure that the
2256          * last byte of the file is allocated. Since the smallest
2257          * value of filebytes is 0, length will be at least 1.
2258          */
2259         if (length > filebytes) {
2260                 moreBytesRequested = length - filebytes;
2261
2262 #if QUOTA
2263                 retval = hfs_chkdq(cp,
2264                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2265                                 cred, 0);
2266                 if (retval)
2267                         goto Err_Exit;
2268
2269 #endif /* QUOTA */
2270                 /*
2271                  * Metadata zone checks.
2272                  */
2273                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2274                         /*
2275                          * Allocate Journal and Quota files in metadata zone.
2276                          */
2277                         if (hfs_virtualmetafile(cp)) {
2278                                 extendFlags |= kEFMetadataMask;
2279                                 blockHint = hfsmp->hfs_metazone_start;
2280                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2281                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2282                                 /*
2283                                  * Move blockHint outside metadata zone.
2284                                  */
2285                                 blockHint = hfsmp->hfs_metazone_end + 1;
2286                         }
2287                 }
2288
2289                 if (hfs_start_transaction(hfsmp) != 0) {
2290                     retval = EINVAL;
2291                     goto Err_Exit;
2292                 }
2293
2294                 /* Protect extents b-tree and allocation bitmap */
2295                 lockflags = SFL_BITMAP;
2296                 if (overflow_extents(fp))
2297                         lockflags |= SFL_EXTENTS;
2298                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2299
2300                 retval = MacToVFSError(ExtendFileC(vcb,
2301                                                 (FCB*)fp,
2302                                                 moreBytesRequested,
2303                                                 blockHint,
2304                                                 extendFlags,
2305                                                 &actualBytesAdded));
2306
2307                 *(ap->a_bytesallocated) = actualBytesAdded;
2308                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2309
2310                 hfs_systemfile_unlock(hfsmp, lockflags);
2311
2312                 if (hfsmp->jnl) {
2313                         (void) hfs_update(vp, TRUE);
2314                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2315                 }
2316
2317                 hfs_end_transaction(hfsmp);
2318
2319                 /*
2320                  * if we get an error and no changes were made then exit
2321                  * otherwise we must do the hfs_update to reflect the changes
2322                  */
2323                 if (retval && (startingPEOF == filebytes))
2324                         goto Err_Exit;
2325
2326                 /*
2327                  * Adjust actualBytesAdded to be allocation block aligned, not
2328                  * clump size aligned.
2329                  * NOTE: So what we are reporting does not affect reality
2330                  * until the file is closed, when we truncate the file to allocation
2331                  * block size.
2332                  */
2333                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2334                         *(ap->a_bytesallocated) =
2335                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2336
2337         } else { /* Shorten the size of the file */
2338
2339                 if (fp->ff_size > length) {
2340                         /*
2341                          * Any buffers that are past the truncation point need to be
2342                          * invalidated (to maintain buffer cache consistency).
2343                          */
2344                 }
2345
2346                 if (hfs_start_transaction(hfsmp) != 0) {
2347                     retval = EINVAL;
2348                     goto Err_Exit;
2349                 }
2350
2351                 /* Protect extents b-tree and allocation bitmap */
2352                 lockflags = SFL_BITMAP;
2353                 if (overflow_extents(fp))
2354                         lockflags |= SFL_EXTENTS;
2355                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2356
2357                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2358
2359                 hfs_systemfile_unlock(hfsmp, lockflags);
2360
2361                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2362
2363                 if (hfsmp->jnl) {
2364                         (void) hfs_update(vp, TRUE);
2365                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2366                 }
2367
2368                 hfs_end_transaction(hfsmp);
2369
2370
2371                 /*
2372                  * if we get an error and no changes were made then exit
2373                  * otherwise we must do the hfs_update to reflect the changes
2374                  */
2375                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2376 #if QUOTA
2377                 /* These are  bytesreleased */
2378                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2379 #endif /* QUOTA */
2380
2381                 if (fp->ff_size > filebytes) {
2382                         fp->ff_size = filebytes;
2383
2384                         hfs_unlock(cp);
2385                         ubc_setsize(vp, fp->ff_size);
2386                         hfs_lock(cp, HFS_FORCE_LOCK);
2387                 }
2388         }
2389
2390 Std_Exit:
2391         cp->c_touch_chgtime = TRUE;
2392         cp->c_touch_modtime = TRUE;
2393         retval2 = hfs_update(vp, MNT_WAIT);
2394
2395         if (retval == 0)
2396                 retval = retval2;
2397 Err_Exit:
2398         hfs_unlock(cp);
2399         return (retval);
2400 }
2401
2402
2403 /*
2404  * Pagein for HFS filesystem
2405  */
2406 int
2407 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2408 /*
2409         struct vnop_pagein_args {
2410                 vnode_t a_vp,
2411                 upl_t         a_pl,
2412                 vm_offset_t   a_pl_offset,
2413                 off_t         a_f_offset,
2414                 size_t        a_size,
2415                 int           a_flags
2416                 vfs_context_t a_context;
2417         };
2418 */
2419 {
2420         vnode_t vp = ap->a_vp;
2421         int error;
2422
2423         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2424                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2425         /*
2426          * Keep track of blocks read.
2427          */
2428         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2429                 struct cnode *cp;
2430                 struct filefork *fp;
2431                 int bytesread;
2432                 int took_cnode_lock = 0;
2433
2434                 cp = VTOC(vp);
2435                 fp = VTOF(vp);
2436
2437                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2438                         bytesread = fp->ff_size;
2439                 else
2440                         bytesread = ap->a_size;
2441
2442                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2443                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2444                         hfs_lock(cp, HFS_FORCE_LOCK);
2445                         took_cnode_lock = 1;
2446                 }
2447                 /*
2448                  * If this file hasn't been seen since the start of
2449                  * the current sampling period then start over.
2450                  */
2451                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2452                         struct timeval tv;
2453
2454                         fp->ff_bytesread = bytesread;
2455                         microtime(&tv);
2456                         cp->c_atime = tv.tv_sec;
2457                 } else {
2458                         fp->ff_bytesread += bytesread;
2459                 }
2460                 cp->c_touch_acctime = TRUE;
2461                 if (took_cnode_lock)
2462                         hfs_unlock(cp);
2463         }
2464         return (error);
2465 }
2466
2467 /*
2468  * Pageout for HFS filesystem.
2469  */
2470 int
2471 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2472 /*
2473         struct vnop_pageout_args {
2474            vnode_t a_vp,
2475            upl_t         a_pl,
2476            vm_offset_t   a_pl_offset,
2477            off_t         a_f_offset,
2478            size_t        a_size,
2479            int           a_flags
2480            vfs_context_t a_context;
2481         };
2482 */
2483 {
2484         vnode_t vp = ap->a_vp;
2485         struct cnode *cp;
2486         struct filefork *fp;
2487         int retval;
2488         off_t end_of_range;
2489         off_t filesize;
2490
2491         cp = VTOC(vp);
2492         if (cp->c_lockowner == current_thread()) {
2493                 panic("pageout: %s cnode lock already held!\n",
2494                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2495         }
2496         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2497                 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2498                         ubc_upl_abort_range(ap->a_pl,
2499                                             ap->a_pl_offset,
2500                                             ap->a_size,
2501                                             UPL_ABORT_FREE_ON_EMPTY);
2502                 }
2503                 return (retval);
2504         }
2505         fp = VTOF(vp);
2506
2507         filesize = fp->ff_size;
2508         end_of_range = ap->a_f_offset + ap->a_size - 1;
2509
2510         if (end_of_range >= filesize) {
2511                 end_of_range = (off_t)(filesize - 1);
2512         }
2513         if (ap->a_f_offset < filesize) {
2514                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2515                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2516         }
2517         hfs_unlock(cp);
2518
2519         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2520                                  ap->a_size, filesize, ap->a_flags);
2521
2522         /*
2523          * If data was written, and setuid or setgid bits are set and
2524          * this process is not the superuser then clear the setuid and
2525          * setgid bits as a precaution against tampering.
2526          */
2527         if ((retval == 0) &&
2528             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2529             (vfs_context_suser(ap->a_context) != 0)) {
2530                 hfs_lock(cp, HFS_FORCE_LOCK);
2531                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2532                 cp->c_touch_chgtime = TRUE;
2533                 hfs_unlock(cp);
2534         }
2535         return (retval);
2536 }
2537
2538 /*
2539  * Intercept B-Tree node writes to unswap them if necessary.
2540  */
2541 int
2542 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2543 {
2544         int retval = 0;
2545         register struct buf *bp = ap->a_bp;
2546         register struct vnode *vp = buf_vnode(bp);
2547         BlockDescriptor block;
2548
2549         /* Trap B-Tree writes */
2550         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2551             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2552             (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2553
2554                 /*
2555                  * Swap and validate the node if it is in native byte order.
2556                  * This is always be true on big endian, so we always validate
2557                  * before writing here.  On little endian, the node typically has
2558                  * been swapped and validatated when it was written to the journal,
2559                  * so we won't do anything here.
2560                  */
2561                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2562                         /* Prepare the block pointer */
2563                         block.blockHeader = bp;
2564                         block.buffer = (char *)buf_dataptr(bp);
2565                         block.blockNum = buf_lblkno(bp);
2566                         /* not found in cache ==> came from disk */
2567                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2568                         block.blockSize = buf_count(bp);
2569
2570                         /* Endian un-swap B-Tree node */
2571                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2572                         if (retval)
2573                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2574                 }
2575         }
2576
2577         /* This buffer shouldn't be locked anymore but if it is clear it */
2578         if ((buf_flags(bp) & B_LOCKED)) {
2579                 // XXXdbg
2580                 if (VTOHFS(vp)->jnl) {
2581                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2582                 }
2583                 buf_clearflags(bp, B_LOCKED);
2584         }
2585         retval = vn_bwrite (ap);
2586
2587         return (retval);
2588 }
2589
2590 /*
2591  * Relocate a file to a new location on disk
2592  *  cnode must be locked on entry
2593  *
2594  * Relocation occurs by cloning the file's data from its
2595  * current set of blocks to a new set of blocks. During
2596  * the relocation all of the blocks (old and new) are
2597  * owned by the file.
2598  *
2599  * -----------------
2600  * |///////////////|
2601  * -----------------
2602  * 0               N (file offset)
2603  *
2604  * -----------------     -----------------
2605  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2606  * -----------------     -----------------
2607  * 0               N     N+1             2N
2608  *
2609  * -----------------     -----------------
2610  * |///////////////|     |///////////////|     STEP 2 (clone data)
2611  * -----------------     -----------------
2612  * 0               N     N+1             2N
2613  *
2614  *                       -----------------
2615  *                       |///////////////|     STEP 3 (head truncate blocks)
2616  *                       -----------------
2617  *                       0               N
2618  *
2619  * During steps 2 and 3 page-outs to file offsets less
2620  * than or equal to N are suspended.
2621  *
2622  * During step 3 page-ins to the file get supended.
2623  */
2624 __private_extern__
2625 int
2626 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2627         struct  proc *p)
2628 {
2629         struct  cnode *cp;
2630         struct  filefork *fp;
2631         struct  hfsmount *hfsmp;
2632         u_int32_t  headblks;
2633         u_int32_t  datablks;
2634         u_int32_t  blksize;
2635         u_int32_t  growsize;
2636         u_int32_t  nextallocsave;
2637         daddr64_t  sector_a,  sector_b;
2638         int disabled_caching = 0;
2639         int eflags;
2640         off_t  newbytes;
2641         int  retval;
2642         int lockflags = 0;
2643         int took_trunc_lock = 0;
2644         int started_tr = 0;
2645         enum vtype vnodetype;
2646
2647         vnodetype = vnode_vtype(vp);
2648         if (vnodetype != VREG && vnodetype != VLNK) {
2649                 return (EPERM);
2650         }
2651
2652         hfsmp = VTOHFS(vp);
2653         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2654                 return (ENOSPC);
2655         }
2656
2657         cp = VTOC(vp);
2658         fp = VTOF(vp);
2659         if (fp->ff_unallocblocks)
2660                 return (EINVAL);
2661         blksize = hfsmp->blockSize;
2662         if (blockHint == 0)
2663                 blockHint = hfsmp->nextAllocation;
2664
2665         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2666             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2667                 return (EFBIG);
2668         }
2669
2670         //
2671         // We do not believe that this call to hfs_fsync() is
2672         // necessary and it causes a journal transaction
2673         // deadlock so we are removing it.
2674         //
2675         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2676         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2677         //      if (retval)
2678         //              return (retval);
2679         //}
2680
2681         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2682                 hfs_unlock(cp);
2683                 hfs_lock_truncate(cp, TRUE);
2684                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2685                         hfs_unlock_truncate(cp);
2686                         return (retval);
2687                 }
2688                 took_trunc_lock = 1;
2689         }
2690         headblks = fp->ff_blocks;
2691         datablks = howmany(fp->ff_size, blksize);
2692         growsize = datablks * blksize;
2693         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2694         if (blockHint >= hfsmp->hfs_metazone_start &&
2695             blockHint <= hfsmp->hfs_metazone_end)
2696                 eflags |= kEFMetadataMask;
2697
2698         if (hfs_start_transaction(hfsmp) != 0) {
2699                 if (took_trunc_lock)
2700                         hfs_unlock_truncate(cp);
2701             return (EINVAL);
2702         }
2703         started_tr = 1;
2704         /*
2705          * Protect the extents b-tree and the allocation bitmap
2706          * during MapFileBlockC and ExtendFileC operations.
2707          */
2708         lockflags = SFL_BITMAP;
2709         if (overflow_extents(fp))
2710                 lockflags |= SFL_EXTENTS;
2711         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2712
2713         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2714         if (retval) {
2715                 retval = MacToVFSError(retval);
2716                 goto out;
2717         }
2718
2719         /*
2720          * STEP 1 - aquire new allocation blocks.
2721          */
2722         if (!vnode_isnocache(vp)) {
2723                 vnode_setnocache(vp);
2724                 disabled_caching = 1;
2725
2726         }
2727         nextallocsave = hfsmp->nextAllocation;
2728         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2729         if (eflags & kEFMetadataMask) {
2730                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2731                 hfsmp->nextAllocation = nextallocsave;
2732                 hfsmp->vcbFlags |= 0xFF00;
2733                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2734         }
2735
2736         retval = MacToVFSError(retval);
2737         if (retval == 0) {
2738                 cp->c_flag |= C_MODIFIED;
2739                 if (newbytes < growsize) {
2740                         retval = ENOSPC;
2741                         goto restore;
2742                 } else if (fp->ff_blocks < (headblks + datablks)) {
2743                         printf("hfs_relocate: allocation failed");
2744                         retval = ENOSPC;
2745                         goto restore;
2746                 }
2747
2748                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2749                 if (retval) {
2750                         retval = MacToVFSError(retval);
2751                 } else if ((sector_a + 1) == sector_b) {
2752                         retval = ENOSPC;
2753                         goto restore;
2754                 } else if ((eflags & kEFMetadataMask) &&
2755                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2756                               hfsmp->hfs_metazone_end)) {
2757                         printf("hfs_relocate: didn't move into metadata zone\n");
2758                         retval = ENOSPC;
2759                         goto restore;
2760                 }
2761         }
2762         /* Done with system locks and journal for now. */
2763         hfs_systemfile_unlock(hfsmp, lockflags);
2764         lockflags = 0;
2765         hfs_end_transaction(hfsmp);
2766         started_tr = 0;
2767
2768         if (retval) {
2769                 /*
2770                  * Check to see if failure is due to excessive fragmentation.
2771                  */
2772                 if ((retval == ENOSPC) &&
2773                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2774                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2775                 }
2776                 goto out;
2777         }
2778         /*
2779          * STEP 2 - clone file data into the new allocation blocks.
2780          */
2781
2782         if (vnodetype == VLNK)
2783                 retval = hfs_clonelink(vp, blksize, cred, p);
2784         else if (vnode_issystem(vp))
2785                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2786         else
2787                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2788
2789         /* Start transaction for step 3 or for a restore. */
2790         if (hfs_start_transaction(hfsmp) != 0) {
2791                 retval = EINVAL;
2792                 goto out;
2793         }
2794         started_tr = 1;
2795         if (retval)
2796                 goto restore;
2797
2798         /*
2799          * STEP 3 - switch to cloned data and remove old blocks.
2800          */
2801         lockflags = SFL_BITMAP;
2802         if (overflow_extents(fp))
2803                 lockflags |= SFL_EXTENTS;
2804         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2805
2806         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2807
2808         hfs_systemfile_unlock(hfsmp, lockflags);
2809         lockflags = 0;
2810         if (retval)
2811                 goto restore;
2812 out:
2813         if (took_trunc_lock)
2814                 hfs_unlock_truncate(cp);
2815
2816         if (lockflags) {
2817                 hfs_systemfile_unlock(hfsmp, lockflags);
2818                 lockflags = 0;
2819         }
2820
2821         // See comment up above about calls to hfs_fsync()
2822         //
2823         //if (retval == 0)
2824         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2825
2826         if (hfsmp->jnl) {
2827                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2828                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2829                 else
2830                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2831         }
2832 exit:
2833         if (disabled_caching) {
2834                 vnode_clearnocache(vp);
2835         }
2836         if (started_tr)
2837                 hfs_end_transaction(hfsmp);
2838
2839         return (retval);
2840
2841 restore:
2842         if (fp->ff_blocks == headblks)
2843                 goto exit;
2844         /*
2845          * Give back any newly allocated space.
2846          */
2847         if (lockflags == 0) {
2848                 lockflags = SFL_BITMAP;
2849                 if (overflow_extents(fp))
2850                         lockflags |= SFL_EXTENTS;
2851                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2852         }
2853
2854         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2855
2856         hfs_systemfile_unlock(hfsmp, lockflags);
2857         lockflags = 0;
2858
2859         if (took_trunc_lock)
2860                 hfs_unlock_truncate(cp);
2861         goto exit;
2862 }
2863
2864
2865 /*
2866  * Clone a symlink.
2867  *
2868  */
2869 static int
2870 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2871 {
2872         struct buf *head_bp = NULL;
2873         struct buf *tail_bp = NULL;
2874         int error;
2875
2876
2877         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2878         if (error)
2879                 goto out;
2880
2881         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2882         if (tail_bp == NULL) {
2883                 error = EIO;
2884                 goto out;
2885         }
2886         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2887         error = (int)buf_bwrite(tail_bp);
2888 out:
2889         if (head_bp) {
2890                 buf_markinvalid(head_bp);
2891                 buf_brelse(head_bp);
2892         }
2893         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2894
2895         return (error);
2896 }
2897
2898 /*
2899  * Clone a file's data within the file.
2900  *
2901  */
2902 static int
2903 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2904 {
2905         caddr_t  bufp;
2906         size_t  writebase;
2907         size_t  bufsize;
2908         size_t  copysize;
2909         size_t  iosize;
2910         off_t   filesize;
2911         size_t  offset;
2912         uio_t auio;
2913         int  error = 0;
2914
2915         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2916         writebase = blkstart * blksize;
2917         copysize = blkcnt * blksize;
2918         iosize = bufsize = MIN(copysize, 4096 * 16);
2919         offset = 0;
2920
2921         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2922                 return (ENOMEM);
2923         }
2924         hfs_unlock(VTOC(vp));
2925
2926         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2927
2928         while (offset < copysize) {
2929                 iosize = MIN(copysize - offset, iosize);
2930
2931                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2932                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2933
2934                 error = cluster_read(vp, auio, copysize, 0);
2935                 if (error) {
2936                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2937                         break;
2938                 }
2939                 if (uio_resid(auio) != 0) {
2940                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2941                         error = EIO;
2942                         break;
2943                 }
2944
2945                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2946                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2947
2948                 error = cluster_write(vp, auio, filesize + offset,
2949                                       filesize + offset + iosize,
2950                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2951                 if (error) {
2952                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2953                         break;
2954                 }
2955                 if (uio_resid(auio) != 0) {
2956                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2957                         error = EIO;
2958                         break;
2959                 }
2960                 offset += iosize;
2961         }
2962         uio_free(auio);
2963
2964         /*
2965          * No need to call ubc_sync_range or hfs_invalbuf
2966          * since the file was copied using IO_NOCACHE.
2967          */
2968
2969         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2970
2971         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2972         return (error);
2973 }
2974
2975 /*
2976  * Clone a system (metadata) file.
2977  *
2978  */
2979 static int
2980 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2981                  kauth_cred_t cred, struct proc *p)
2982 {
2983         caddr_t  bufp;
2984         char * offset;
2985         size_t  bufsize;
2986         size_t  iosize;
2987         struct buf *bp = NULL;
2988         daddr64_t  blkno;
2989         daddr64_t  blk;
2990         daddr64_t  start_blk;
2991         daddr64_t  last_blk;
2992         int  breadcnt;
2993         int  i;
2994         int  error = 0;
2995
2996
2997         iosize = GetLogicalBlockSize(vp);
2998         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2999         breadcnt = bufsize / iosize;
3000
3001         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3002                 return (ENOMEM);
3003         }
3004         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3005         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3006         blkno = 0;
3007
3008         while (blkno < last_blk) {
3009                 /*
3010                  * Read up to a megabyte
3011                  */
3012                 offset = bufp;
3013                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3014                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3015                         if (error) {
3016                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3017                                 goto out;
3018                         }
3019                         if (buf_count(bp) != iosize) {
3020                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3021                                 goto out;
3022                         }
3023                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3024
3025                         buf_markinvalid(bp);
3026                         buf_brelse(bp);
3027                         bp = NULL;
3028
3029                         offset += iosize;
3030                 }
3031
3032                 /*
3033                  * Write up to a megabyte
3034                  */
3035                 offset = bufp;
3036                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3037                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3038                         if (bp == NULL) {
3039                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3040                                 error = EIO;
3041                                 goto out;
3042                         }
3043                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3044                         error = (int)buf_bwrite(bp);
3045                         bp = NULL;
3046                         if (error)
3047                                 goto out;
3048                         offset += iosize;
3049                 }
3050         }
3051 out:
3052         if (bp) {
3053                 buf_brelse(bp);
3054         }
3055
3056         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3057
3058         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3059
3060         return (error);
3061 }