bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/filedesc.h>
  36 #include <sys/stat.h>
  37 #include <sys/buf.h>
  38 #include <sys/proc.h>
  39 #include <sys/kauth.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/vfs_context.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45
  46 #include <sys/ubc.h>
  47 #include <vm/vm_pageout.h>
  48 #include <vm/vm_kern.h>
  49
  50 #include <sys/kdebug.h>
  51
  52 #include        "hfs.h"
  53 #include        "hfs_endian.h"
  54 #include  "hfs_fsctl.h"
  55 #include        "hfs_quota.h"
  56 #include        "hfscommon/headers/FileMgrInternal.h"
  57 #include        "hfscommon/headers/BTreesInternal.h"
  58 #include        "hfs_cnode.h"
  59 #include        "hfs_dbg.h"
  60
  61 extern int overflow_extents(struct filefork *fp);
  62
  63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  64
  65 enum {
  66         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  67 };
  68
  69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  70
  71 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  72
  73
  74 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  75 static int  hfs_clonefile(struct vnode *, int, int, int);
  76 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  77
  78
  79 /*****************************************************************************
  80 *
  81 *       I/O Operations on vnodes
  82 *
  83 *****************************************************************************/
  84 int  hfs_vnop_read(struct vnop_read_args *);
  85 int  hfs_vnop_write(struct vnop_write_args *);
  86 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  87 int  hfs_vnop_select(struct vnop_select_args *);
  88 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  89 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  90 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  91 int  hfs_vnop_strategy(struct vnop_strategy_args *);
  92 int  hfs_vnop_allocate(struct vnop_allocate_args *);
  93 int  hfs_vnop_pagein(struct vnop_pagein_args *);
  94 int  hfs_vnop_pageout(struct vnop_pageout_args *);
  95 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
  96
  97
  98 /*
  99  * Read data from a file.
 100  */
 101 int
 102 hfs_vnop_read(struct vnop_read_args *ap)
 103 {
 104         uio_t uio = ap->a_uio;
 105         struct vnode *vp = ap->a_vp;
 106         struct cnode *cp;
 107         struct filefork *fp;
 108         struct hfsmount *hfsmp;
 109         off_t filesize;
 110         off_t filebytes;
 111         off_t start_resid = uio_resid(uio);
 112         off_t offset = uio_offset(uio);
 113         int retval = 0;
 114
 115
 116         /* Preflight checks */
 117         if (!vnode_isreg(vp)) {
 118                 /* can only read regular files */
 119                 if (vnode_isdir(vp))
 120                         return (EISDIR);
 121                 else
 122                         return (EPERM);
 123         }
 124         if (start_resid == 0)
 125                 return (0);             /* Nothing left to do */
 126         if (offset < 0)
 127                 return (EINVAL);        /* cant read from a negative offset */
 128
 129         cp = VTOC(vp);
 130         fp = VTOF(vp);
 131         hfsmp = VTOHFS(vp);
 132
 133         /* Protect against a size change. */
 134         hfs_lock_truncate(cp, 0);
 135
 136         filesize = fp->ff_size;
 137         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 138         if (offset > filesize) {
 139                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 140                     (offset > (off_t)MAXHFSFILESIZE)) {
 141                         retval = EFBIG;
 142                 }
 143                 goto exit;
 144         }
 145
 146         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 147                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 148
 149         retval = cluster_read(vp, uio, filesize, 0);
 150
 151         cp->c_touch_acctime = TRUE;
 152
 153         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 154                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 155
 156         /*
 157          * Keep track blocks read
 158          */
 159         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 160                 int took_cnode_lock = 0;
 161                 off_t bytesread;
 162
 163                 bytesread = start_resid - uio_resid(uio);
 164
 165                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 166                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 167                         hfs_lock(cp, HFS_FORCE_LOCK);
 168                         took_cnode_lock = 1;
 169                 }
 170                 /*
 171                  * If this file hasn't been seen since the start of
 172                  * the current sampling period then start over.
 173                  */
 174                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 175                         struct timeval tv;
 176
 177                         fp->ff_bytesread = bytesread;
 178                         microtime(&tv);
 179                         cp->c_atime = tv.tv_sec;
 180                 } else {
 181                         fp->ff_bytesread += bytesread;
 182                 }
 183                 if (took_cnode_lock)
 184                         hfs_unlock(cp);
 185         }
 186 exit:
 187         hfs_unlock_truncate(cp);
 188         return (retval);
 189 }
 190
 191 /*
 192  * Write data to a file.
 193  */
 194 int
 195 hfs_vnop_write(struct vnop_write_args *ap)
 196 {
 197         uio_t uio = ap->a_uio;
 198         struct vnode *vp = ap->a_vp;
 199         struct cnode *cp;
 200         struct filefork *fp;
 201         struct hfsmount *hfsmp;
 202         kauth_cred_t cred = NULL;
 203         off_t origFileSize;
 204         off_t writelimit;
 205         off_t bytesToAdd;
 206         off_t actualBytesAdded;
 207         off_t filebytes;
 208         off_t offset;
 209         size_t resid;
 210         int eflags;
 211         int ioflag = ap->a_ioflag;
 212         int retval = 0;
 213         int lockflags;
 214         int cnode_locked = 0;
 215
 216         // LP64todo - fix this! uio_resid may be 64-bit value
 217         resid = uio_resid(uio);
 218         offset = uio_offset(uio);
 219
 220         if (offset < 0)
 221                 return (EINVAL);
 222         if (resid == 0)
 223                 return (E_NONE);
 224         if (!vnode_isreg(vp))
 225                 return (EPERM);  /* Can only write regular files */
 226
 227         /* Protect against a size change. */
 228         hfs_lock_truncate(VTOC(vp), TRUE);
 229
 230         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 231                 hfs_unlock_truncate(VTOC(vp));
 232                 return (retval);
 233         }
 234         cnode_locked = 1;
 235         cp = VTOC(vp);
 236         fp = VTOF(vp);
 237         hfsmp = VTOHFS(vp);
 238         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 239
 240         if (ioflag & IO_APPEND) {
 241                 uio_setoffset(uio, fp->ff_size);
 242                 offset = fp->ff_size;
 243         }
 244         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 245                 retval = EPERM;
 246                 goto exit;
 247         }
 248
 249         origFileSize = fp->ff_size;
 250         eflags = kEFDeferMask;  /* defer file block allocations */
 251
 252 #ifdef HFS_SPARSE_DEV
 253         /*
 254          * When the underlying device is sparse and space
 255          * is low (< 8MB), stop doing delayed allocations
 256          * and begin doing synchronous I/O.
 257          */
 258         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 259             (hfs_freeblks(hfsmp, 0) < 2048)) {
 260                 eflags &= ~kEFDeferMask;
 261                 ioflag |= IO_SYNC;
 262         }
 263 #endif /* HFS_SPARSE_DEV */
 264
 265         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 266                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 267
 268         /* Now test if we need to extend the file */
 269         /* Doing so will adjust the filebytes for us */
 270
 271         writelimit = offset + resid;
 272         if (writelimit <= filebytes)
 273                 goto sizeok;
 274
 275         cred = vfs_context_ucred(ap->a_context);
 276 #if QUOTA
 277         bytesToAdd = writelimit - filebytes;
 278         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 279                            cred, 0);
 280         if (retval)
 281                 goto exit;
 282 #endif /* QUOTA */
 283
 284         if (hfs_start_transaction(hfsmp) != 0) {
 285                 retval = EINVAL;
 286                 goto exit;
 287         }
 288
 289         while (writelimit > filebytes) {
 290                 bytesToAdd = writelimit - filebytes;
 291                 if (cred && suser(cred, NULL) != 0)
 292                         eflags |= kEFReserveMask;
 293
 294                 /* Protect extents b-tree and allocation bitmap */
 295                 lockflags = SFL_BITMAP;
 296                 if (overflow_extents(fp))
 297                         lockflags |= SFL_EXTENTS;
 298                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 299
 300                 /* Files that are changing size are not hot file candidates. */
 301                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 302                         fp->ff_bytesread = 0;
 303                 }
 304                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 305                                 0, eflags, &actualBytesAdded));
 306
 307                 hfs_systemfile_unlock(hfsmp, lockflags);
 308
 309                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 310                         retval = ENOSPC;
 311                 if (retval != E_NONE)
 312                         break;
 313                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 314                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 315                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 316         }
 317         (void) hfs_update(vp, TRUE);
 318         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 319         (void) hfs_end_transaction(hfsmp);
 320
 321 sizeok:
 322         if (retval == E_NONE) {
 323                 off_t filesize;
 324                 off_t zero_off;
 325                 off_t tail_off;
 326                 off_t inval_start;
 327                 off_t inval_end;
 328                 off_t io_start;
 329                 int lflag;
 330                 struct rl_entry *invalid_range;
 331
 332                 if (writelimit > fp->ff_size)
 333                         filesize = writelimit;
 334                 else
 335                         filesize = fp->ff_size;
 336
 337                 lflag = (ioflag & IO_SYNC);
 338
 339                 if (offset <= fp->ff_size) {
 340                         zero_off = offset & ~PAGE_MASK_64;
 341
 342                         /* Check to see whether the area between the zero_offset and the start
 343                            of the transfer to see whether is invalid and should be zero-filled
 344                            as part of the transfer:
 345                          */
 346                         if (offset > zero_off) {
 347                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 348                                         lflag |= IO_HEADZEROFILL;
 349                         }
 350                 } else {
 351                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 352
 353                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 354                            read without being zeroed.  The current last block is filled with zeroes
 355                            if it holds valid data but in all cases merely do a little bookkeeping
 356                            to track the area from the end of the current last page to the start of
 357                            the area actually written.  For the same reason only the bytes up to the
 358                            start of the page where this write will start is invalidated; any remainder
 359                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 360
 361                            Note that inval_start, the start of the page after the current EOF,
 362                            may be past the start of the write, in which case the zeroing
 363                            will be handled by the cluser_write of the actual data.
 364                          */
 365                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 366                         inval_end = offset & ~PAGE_MASK_64;
 367                         zero_off = fp->ff_size;
 368
 369                         if ((fp->ff_size & PAGE_MASK_64) &&
 370                                 (rl_scan(&fp->ff_invalidranges,
 371                                                         eof_page_base,
 372                                                         fp->ff_size - 1,
 373                                                         &invalid_range) != RL_NOOVERLAP)) {
 374                                 /* The page containing the EOF is not valid, so the
 375                                    entire page must be made inaccessible now.  If the write
 376                                    starts on a page beyond the page containing the eof
 377                                    (inval_end > eof_page_base), add the
 378                                    whole page to the range to be invalidated.  Otherwise
 379                                    (i.e. if the write starts on the same page), zero-fill
 380                                    the entire page explicitly now:
 381                                  */
 382                                 if (inval_end > eof_page_base) {
 383                                         inval_start = eof_page_base;
 384                                 } else {
 385                                         zero_off = eof_page_base;
 386                                 };
 387                         };
 388
 389                         if (inval_start < inval_end) {
 390                                 struct timeval tv;
 391                                 /* There's some range of data that's going to be marked invalid */
 392
 393                                 if (zero_off < inval_start) {
 394                                         /* The pages between inval_start and inval_end are going to be invalidated,
 395                                            and the actual write will start on a page past inval_end.  Now's the last
 396                                            chance to zero-fill the page containing the EOF:
 397                                          */
 398                                         hfs_unlock(cp);
 399                                         cnode_locked = 0;
 400                                         retval = cluster_write(vp, (uio_t) 0,
 401                                                         fp->ff_size, inval_start,
 402                                                         zero_off, (off_t)0,
 403                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 404                                         hfs_lock(cp, HFS_FORCE_LOCK);
 405                                         cnode_locked = 1;
 406                                         if (retval) goto ioerr_exit;
 407                                         offset = uio_offset(uio);
 408                                 };
 409
 410                                 /* Mark the remaining area of the newly allocated space as invalid: */
 411                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 412                                 microuptime(&tv);
 413                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 414                                 zero_off = fp->ff_size = inval_end;
 415                         };
 416
 417                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 418                 };
 419
 420                 /* Check to see whether the area between the end of the write and the end of
 421                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 422                  */
 423                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 424                 if (tail_off > filesize) tail_off = filesize;
 425                 if (tail_off > writelimit) {
 426                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 427                                 lflag |= IO_TAILZEROFILL;
 428                         };
 429                 };
 430
 431                 /*
 432                  * if the write starts beyond the current EOF (possibly advanced in the
 433                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 434                  * to where the write begins:
 435                  *
 436                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 437                  *       before the current EOF it might be marked as invalid now and must be
 438                  *       made readable (removed from the invalid ranges) before cluster_write
 439                  *       tries to write it:
 440                  */
 441                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 442                 if (io_start < fp->ff_size) {
 443                         off_t io_end;
 444
 445                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 446                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 447                 };
 448
 449                 hfs_unlock(cp);
 450                 cnode_locked = 0;
 451                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 452                                 tail_off, lflag | IO_NOZERODIRTY);
 453                 offset = uio_offset(uio);
 454                 if (offset > fp->ff_size) {
 455                         fp->ff_size = offset;
 456
 457                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 458                         /* Files that are changing size are not hot file candidates. */
 459                         if (hfsmp->hfc_stage == HFC_RECORDING)
 460                                 fp->ff_bytesread = 0;
 461                 }
 462                 if (resid > uio_resid(uio)) {
 463                         cp->c_touch_chgtime = TRUE;
 464                         cp->c_touch_modtime = TRUE;
 465                 }
 466         }
 467         HFS_KNOTE(vp, NOTE_WRITE);
 468
 469 ioerr_exit:
 470         /*
 471          * If we successfully wrote any data, and we are not the superuser
 472          * we clear the setuid and setgid bits as a precaution against
 473          * tampering.
 474          */
 475         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 476                 cred = vfs_context_ucred(ap->a_context);
 477                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 478                         if (!cnode_locked) {
 479                                 hfs_lock(cp, HFS_FORCE_LOCK);
 480                                 cnode_locked = 1;
 481                         }
 482                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 483                 }
 484         }
 485         if (retval) {
 486                 if (ioflag & IO_UNIT) {
 487                         if (!cnode_locked) {
 488                                 hfs_lock(cp, HFS_FORCE_LOCK);
 489                                 cnode_locked = 1;
 490                         }
 491                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 492                                            0, ap->a_context);
 493                         // LP64todo - fix this!  resid needs to by user_ssize_t
 494                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 495                         uio_setresid(uio, resid);
 496                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 497                 }
 498         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 499                 if (!cnode_locked) {
 500                         hfs_lock(cp, HFS_FORCE_LOCK);
 501                         cnode_locked = 1;
 502                 }
 503                 retval = hfs_update(vp, TRUE);
 504         }
 505         /* Updating vcbWrCnt doesn't need to be atomic. */
 506         hfsmp->vcbWrCnt++;
 507
 508         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 509                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 510 exit:
 511         if (cnode_locked)
 512                 hfs_unlock(cp);
 513         hfs_unlock_truncate(cp);
 514         return (retval);
 515 }
 516
 517 /* support for the "bulk-access" fcntl */
 518
 519 #define CACHE_ELEMS 64
 520 #define CACHE_LEVELS 16
 521 #define PARENT_IDS_FLAG 0x100
 522
 523 /* from hfs_attrlist.c */
 524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 525                         mode_t obj_mode, struct mount *mp,
 526                         kauth_cred_t cred, struct proc *p);
 527
 528 /* from vfs/vfs_fsevents.c */
 529 extern char *get_pathbuff(void);
 530 extern void release_pathbuff(char *buff);
 531
 532 struct access_cache {
 533        int numcached;
 534        int cachehits; /* these two for statistics gathering */
 535        int lookups;
 536        unsigned int *acache;
 537        Boolean *haveaccess;
 538 };
 539
 540 struct access_t {
 541         uid_t     uid;              /* IN: effective user id */
 542         short     flags;            /* IN: access requested (i.e. R_OK) */
 543         short     num_groups;       /* IN: number of groups user belongs to */
 544         int       num_files;        /* IN: number of files to process */
 545         int       *file_ids;        /* IN: array of file ids */
 546         gid_t     *groups;          /* IN: array of groups */
 547         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 548 };
 549
 550 struct user_access_t {
 551         uid_t           uid;                    /* IN: effective user id */
 552         short           flags;                  /* IN: access requested (i.e. R_OK) */
 553         short           num_groups;             /* IN: number of groups user belongs to */
 554         int                     num_files;              /* IN: number of files to process */
 555         user_addr_t     file_ids;               /* IN: array of file ids */
 556         user_addr_t     groups;                 /* IN: array of groups */
 557         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 558 };
 559
 560 /*
 561  * Perform a binary search for the given parent_id. Return value is
 562  * found/not found boolean, and indexp will be the index of the item
 563  * or the index at which to insert the item if it's not found.
 564  */
 565 static int
 566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 567 {
 568         unsigned int lo, hi;
 569         int index, matches = 0;
 570
 571         if (cache->numcached == 0) {
 572                 *indexp = 0;
 573                 return 0; // table is empty, so insert at index=0 and report no match
 574         }
 575
 576         if (cache->numcached > CACHE_ELEMS) {
 577                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 578                   cache->numcached, CACHE_ELEMS);*/
 579                 cache->numcached = CACHE_ELEMS;
 580         }
 581
 582         lo = 0;
 583         hi = cache->numcached - 1;
 584         index = -1;
 585
 586         /* perform binary search for parent_id */
 587         do {
 588                 unsigned int mid = (hi - lo)/2 + lo;
 589                 unsigned int this_id = cache->acache[mid];
 590
 591                 if (parent_id == this_id) {
 592                         index = mid;
 593                         break;
 594                 }
 595
 596                 if (parent_id < this_id) {
 597                         hi = mid;
 598                         continue;
 599                 }
 600
 601                 if (parent_id > this_id) {
 602                         lo = mid + 1;
 603                         continue;
 604                 }
 605         } while(lo < hi);
 606
 607         /* check if lo and hi converged on the match */
 608         if (parent_id == cache->acache[hi]) {
 609                 index = hi;
 610         }
 611
 612         /* if no existing entry found, find index for new one */
 613         if (index == -1) {
 614                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 615                 matches = 0;
 616         } else {
 617                 matches = 1;
 618         }
 619
 620         *indexp = index;
 621         return matches;
 622 }
 623
 624 /*
 625  * Add a node to the access_cache at the given index (or do a lookup first
 626  * to find the index if -1 is passed in). We currently do a replace rather
 627  * than an insert if the cache is full.
 628  */
 629 static void
 630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 631 {
 632        int lookup_index = -1;
 633
 634        /* need to do a lookup first if -1 passed for index */
 635        if (index == -1) {
 636                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 637                        if (cache->haveaccess[lookup_index] != access) {
 638                                /* change access info for existing entry... should never happen */
 639                                cache->haveaccess[lookup_index] = access;
 640                        }
 641
 642                        /* mission accomplished */
 643                        return;
 644                } else {
 645                        index = lookup_index;
 646                }
 647
 648        }
 649
 650        /* if the cache is full, do a replace rather than an insert */
 651        if (cache->numcached >= CACHE_ELEMS) {
 652                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 653                cache->numcached = CACHE_ELEMS-1;
 654
 655                if (index > cache->numcached) {
 656                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 657                        index = cache->numcached;
 658                }
 659        } else if (index >= 0 && index < cache->numcached) {
 660                /* only do bcopy if we're inserting */
 661                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 662                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 663        }
 664
 665        cache->acache[index] = nodeID;
 666        cache->haveaccess[index] = access;
 667        cache->numcached++;
 668 }
 669
 670
 671 struct cinfo {
 672         uid_t   uid;
 673         gid_t   gid;
 674         mode_t  mode;
 675         cnid_t  parentcnid;
 676 };
 677
 678 static int
 679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 680 {
 681         struct cinfo *cip = (struct cinfo *)arg;
 682
 683         cip->uid = attrp->ca_uid;
 684         cip->gid = attrp->ca_gid;
 685         cip->mode = attrp->ca_mode;
 686         cip->parentcnid = descp->cd_parentcnid;
 687
 688         return (0);
 689 }
 690
 691 /*
 692  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 693  * isn't incore, then go to the catalog.
 694  */
 695 static int
 696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 697                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 698 {
 699         int error = 0;
 700
 701         /* if this id matches the one the fsctl was called with, skip the lookup */
 702         if (cnid == skip_cp->c_cnid) {
 703                 cnattrp->ca_uid = skip_cp->c_uid;
 704                 cnattrp->ca_gid = skip_cp->c_gid;
 705                 cnattrp->ca_mode = skip_cp->c_mode;
 706                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 707         } else {
 708                 struct cinfo c_info;
 709
 710                 /* otherwise, check the cnode hash incase the file/dir is incore */
 711                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 712                         cnattrp->ca_uid = c_info.uid;
 713                         cnattrp->ca_gid = c_info.gid;
 714                         cnattrp->ca_mode = c_info.mode;
 715                         keyp->hfsPlus.parentID = c_info.parentcnid;
 716                 } else {
 717                         int lockflags;
 718
 719                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 720
 721                         /* lookup this cnid in the catalog */
 722                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 723
 724                         hfs_systemfile_unlock(hfsmp, lockflags);
 725
 726                         cache->lookups++;
 727                 }
 728         }
 729
 730         return (error);
 731 }
 732
 733 /*
 734  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 735  * up to CACHE_LEVELS as we progress towards the root.
 736  */
 737 static int
 738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 739                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 740 {
 741        int                     myErr = 0;
 742        int                     myResult;
 743        HFSCatalogNodeID        thisNodeID;
 744        unsigned long           myPerms;
 745        struct cat_attr         cnattr;
 746        int                     cache_index = -1;
 747        CatalogKey              catkey;
 748
 749        int i = 0, ids_to_cache = 0;
 750        int parent_ids[CACHE_LEVELS];
 751
 752        /* root always has access */
 753        if (!suser(myp_ucred, NULL)) {
 754                return (1);
 755        }
 756
 757        thisNodeID = nodeID;
 758        while (thisNodeID >=  kRootDirID) {
 759                myResult = 0;   /* default to "no access" */
 760
 761                /* check the cache before resorting to hitting the catalog */
 762
 763                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 764                 * to look any further after hitting cached dir */
 765
 766                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 767                        cache->cachehits++;
 768                        myResult = cache->haveaccess[cache_index];
 769                        goto ExitThisRoutine;
 770                }
 771
 772                /* remember which parents we want to cache */
 773                if (ids_to_cache < CACHE_LEVELS) {
 774                        parent_ids[ids_to_cache] = thisNodeID;
 775                        ids_to_cache++;
 776                }
 777
 778                /* do the lookup (checks the cnode hash, then the catalog) */
 779                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 780                if (myErr) {
 781                        goto ExitThisRoutine; /* no access */
 782                }
 783
 784                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 785                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 786                                                  myp_ucred, theProcPtr);
 787
 788                if ( (myPerms & X_OK) == 0 ) {
 789                        myResult = 0;
 790                        goto ExitThisRoutine;   /* no access */
 791                }
 792
 793                /* up the hierarchy we go */
 794                thisNodeID = catkey.hfsPlus.parentID;
 795        }
 796
 797        /* if here, we have access to this node */
 798        myResult = 1;
 799
 800  ExitThisRoutine:
 801        if (myErr) {
 802                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 803                myResult = 0;
 804        }
 805        *err = myErr;
 806
 807        /* cache the parent directory(ies) */
 808        for (i = 0; i < ids_to_cache; i++) {
 809                /* small optimization: get rid of double-lookup for all these */
 810                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 811                add_node(cache, -1, parent_ids[i], myResult);
 812        }
 813
 814        return (myResult);
 815 }
 816 /* end "bulk-access" support */
 817
 818
 819
 820 /*
 821  * Callback for use with freeze ioctl.
 822  */
 823 static int
 824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 825 {
 826         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 827
 828         return 0;
 829 }
 830
 831 /*
 832  * Control filesystem operating characteristics.
 833  */
 834 int
 835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 836                 vnode_t a_vp;
 837                 int  a_command;
 838                 caddr_t  a_data;
 839                 int  a_fflag;
 840                 vfs_context_t a_context;
 841         } */ *ap)
 842 {
 843         struct vnode * vp = ap->a_vp;
 844         struct hfsmount *hfsmp = VTOHFS(vp);
 845         vfs_context_t context = ap->a_context;
 846         kauth_cred_t cred = vfs_context_ucred(context);
 847         proc_t p = vfs_context_proc(context);
 848         struct vfsstatfs *vfsp;
 849         boolean_t is64bit;
 850
 851         is64bit = proc_is64bit(p);
 852
 853         switch (ap->a_command) {
 854
 855         case HFS_RESIZE_VOLUME: {
 856                 u_int64_t newsize;
 857                 u_int64_t cursize;
 858
 859                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 860                 if (suser(cred, NULL) &&
 861                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 862                         return (EACCES); /* must be owner of file system */
 863                 }
 864                 if (!vnode_isvroot(vp)) {
 865                         return (EINVAL);
 866                 }
 867                 newsize = *(u_int64_t *)ap->a_data;
 868                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 869
 870                 if (newsize > cursize) {
 871                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 872                 } else if (newsize < cursize) {
 873                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 874                 } else {
 875                         return (0);
 876                 }
 877         }
 878         case HFS_CHANGE_NEXT_ALLOCATION: {
 879                 u_int32_t location;
 880
 881                 if (vnode_vfsisrdonly(vp)) {
 882                         return (EROFS);
 883                 }
 884                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 885                 if (suser(cred, NULL) &&
 886                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 887                         return (EACCES); /* must be owner of file system */
 888                 }
 889                 if (!vnode_isvroot(vp)) {
 890                         return (EINVAL);
 891                 }
 892                 location = *(u_int32_t *)ap->a_data;
 893                 if (location > hfsmp->totalBlocks - 1) {
 894                         return (EINVAL);
 895                 }
 896                 /* Return previous value. */
 897                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 898                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 899                 hfsmp->nextAllocation = location;
 900                 hfsmp->vcbFlags |= 0xFF00;
 901                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 902                 return (0);
 903         }
 904
 905 #ifdef HFS_SPARSE_DEV
 906         case HFS_SETBACKINGSTOREINFO: {
 907                 struct vnode * bsfs_rootvp;
 908                 struct vnode * di_vp;
 909                 struct hfs_backingstoreinfo *bsdata;
 910                 int error = 0;
 911
 912                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 913                         return (EALREADY);
 914                 }
 915                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 916                 if (suser(cred, NULL) &&
 917                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 918                         return (EACCES); /* must be owner of file system */
 919                 }
 920                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 921                 if (bsdata == NULL) {
 922                         return (EINVAL);
 923                 }
 924                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 925                         return (error);
 926                 }
 927                 if ((error = vnode_getwithref(di_vp))) {
 928                         file_drop(bsdata->backingfd);
 929                         return(error);
 930                 }
 931
 932                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 933                         (void)vnode_put(di_vp);
 934                         file_drop(bsdata->backingfd);
 935                         return (EINVAL);
 936                 }
 937
 938                 /*
 939                  * Obtain the backing fs root vnode and keep a reference
 940                  * on it.  This reference will be dropped in hfs_unmount.
 941                  */
 942                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 943                 if (error) {
 944                         (void)vnode_put(di_vp);
 945                         file_drop(bsdata->backingfd);
 946                         return (error);
 947                 }
 948                 vnode_ref(bsfs_rootvp);
 949                 vnode_put(bsfs_rootvp);
 950
 951                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 952                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 953                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 954                 hfsmp->hfs_sparsebandblks *= 4;
 955
 956                 (void)vnode_put(di_vp);
 957                 file_drop(bsdata->backingfd);
 958                 return (0);
 959         }
 960         case HFS_CLRBACKINGSTOREINFO: {
 961                 struct vnode * tmpvp;
 962
 963                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 964                 if (suser(cred, NULL) &&
 965                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 966                         return (EACCES); /* must be owner of file system */
 967                 }
 968                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 969                     hfsmp->hfs_backingfs_rootvp) {
 970
 971                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 972                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 973                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 974                         hfsmp->hfs_sparsebandblks = 0;
 975                         vnode_rele(tmpvp);
 976                 }
 977                 return (0);
 978         }
 979 #endif /* HFS_SPARSE_DEV */
 980
 981         case F_FREEZE_FS: {
 982                 struct mount *mp;
 983                 task_t task;
 984
 985                 if (!is_suser())
 986                         return (EACCES);
 987
 988                 mp = vnode_mount(vp);
 989                 hfsmp = VFSTOHFS(mp);
 990
 991                 if (!(hfsmp->jnl))
 992                         return (ENOTSUP);
 993
 994                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
 995
 996                 task = current_task();
 997                 task_working_set_disable(task);
 998
 999                 // flush things before we get started to try and prevent
1000                 // dirty data from being paged out while we're frozen.
1001                 // note: can't do this after taking the lock as it will
1002                 // deadlock against ourselves.
1003                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1004                 hfs_global_exclusive_lock_acquire(hfsmp);
1005                 journal_flush(hfsmp->jnl);
1006
1007                 // don't need to iterate on all vnodes, we just need to
1008                 // wait for writes to the system files and the device vnode
1009                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1010                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1011                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1012                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1013                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1014                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1015                 if (hfsmp->hfs_attribute_vp)
1016                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1017                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1018
1019                 hfsmp->hfs_freezing_proc = current_proc();
1020
1021                 return (0);
1022         }
1023
1024         case F_THAW_FS: {
1025                 if (!is_suser())
1026                         return (EACCES);
1027
1028                 // if we're not the one who froze the fs then we
1029                 // can't thaw it.
1030                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1031                     return EPERM;
1032                 }
1033
1034                 // NOTE: if you add code here, also go check the
1035                 //       code that "thaws" the fs in hfs_vnop_close()
1036                 //
1037                 hfsmp->hfs_freezing_proc = NULL;
1038                 hfs_global_exclusive_lock_release(hfsmp);
1039                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1040
1041                 return (0);
1042         }
1043
1044 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1045 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1046
1047         case HFS_BULKACCESS_FSCTL:
1048         case HFS_BULKACCESS: {
1049                 /*
1050                  * NOTE: on entry, the vnode is locked. Incase this vnode
1051                  * happens to be in our list of file_ids, we'll note it
1052                  * avoid calling hfs_chashget_nowait() on that id as that
1053                  * will cause a "locking against myself" panic.
1054                  */
1055                 Boolean check_leaf = true;
1056
1057                 struct user_access_t *user_access_structp;
1058                 struct user_access_t tmp_user_access_t;
1059                 struct access_cache cache;
1060
1061                 int error = 0, i;
1062
1063                 dev_t dev = VTOC(vp)->c_dev;
1064
1065                 short flags;
1066                 struct ucred myucred;   /* XXX ILLEGAL */
1067                 int num_files;
1068                 int *file_ids = NULL;
1069                 short *access = NULL;
1070
1071                 cnid_t cnid;
1072                 cnid_t prevParent_cnid = 0;
1073                 unsigned long myPerms;
1074                 short myaccess = 0;
1075                 struct cat_attr cnattr;
1076                 CatalogKey catkey;
1077                 struct cnode *skip_cp = VTOC(vp);
1078                 struct vfs_context      my_context;
1079
1080                 /* first, return error if not run as root */
1081                 if (cred->cr_ruid != 0) {
1082                         return EPERM;
1083                 }
1084
1085                 /* initialize the local cache and buffers */
1086                 cache.numcached = 0;
1087                 cache.cachehits = 0;
1088                 cache.lookups = 0;
1089
1090                 file_ids = (int *) get_pathbuff();
1091                 access = (short *) get_pathbuff();
1092                 cache.acache = (int *) get_pathbuff();
1093                 cache.haveaccess = (Boolean *) get_pathbuff();
1094
1095                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1096                         release_pathbuff((char *) file_ids);
1097                         release_pathbuff((char *) access);
1098                         release_pathbuff((char *) cache.acache);
1099                         release_pathbuff((char *) cache.haveaccess);
1100
1101                         return ENOMEM;
1102                 }
1103
1104                 /* struct copyin done during dispatch... need to copy file_id array separately */
1105                 if (ap->a_data == NULL) {
1106                         error = EINVAL;
1107                         goto err_exit_bulk_access;
1108                 }
1109
1110                 if (is64bit) {
1111                         user_access_structp = (struct user_access_t *)ap->a_data;
1112                 }
1113                 else {
1114                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1115                         tmp_user_access_t.uid = accessp->uid;
1116                         tmp_user_access_t.flags = accessp->flags;
1117                         tmp_user_access_t.num_groups = accessp->num_groups;
1118                         tmp_user_access_t.num_files = accessp->num_files;
1119                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1120                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1121                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1122                         user_access_structp = &tmp_user_access_t;
1123                 }
1124
1125                 num_files = user_access_structp->num_files;
1126                 if (num_files < 1) {
1127                         goto err_exit_bulk_access;
1128                 }
1129                 if (num_files > 256) {
1130                         error = EINVAL;
1131                         goto err_exit_bulk_access;
1132                 }
1133
1134                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1135                                                         num_files * sizeof(int)))) {
1136                         goto err_exit_bulk_access;
1137                 }
1138
1139                 /* fill in the ucred structure */
1140                 flags = user_access_structp->flags;
1141                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1142                         flags = R_OK;
1143                 }
1144
1145                 /* check if we've been passed leaf node ids or parent ids */
1146                 if (flags & PARENT_IDS_FLAG) {
1147                         check_leaf = false;
1148                 }
1149
1150                 memset(&myucred, 0, sizeof(myucred));
1151                 myucred.cr_ref = 1;
1152                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1153                 myucred.cr_ngroups = user_access_structp->num_groups;
1154                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1155                         myucred.cr_ngroups = 0;
1156                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1157                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1158                         goto err_exit_bulk_access;
1159                 }
1160                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1161
1162                 my_context.vc_proc = p;
1163                 my_context.vc_ucred = &myucred;
1164
1165                 /* Check access to each file_id passed in */
1166                 for (i = 0; i < num_files; i++) {
1167 #if 0
1168                         cnid = (cnid_t) file_ids[i];
1169
1170                         /* root always has access */
1171                         if (!suser(&myucred, NULL)) {
1172                                 access[i] = 0;
1173                                 continue;
1174                         }
1175
1176                         if (check_leaf) {
1177
1178                                 /* do the lookup (checks the cnode hash, then the catalog) */
1179                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1180                                 if (error) {
1181                                         access[i] = (short) error;
1182                                         continue;
1183                                 }
1184
1185                                 /* before calling CheckAccess(), check the target file for read access */
1186                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1187                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1188
1189
1190                                 /* fail fast if no access */
1191                                 if ((myPerms & flags) == 0) {
1192                                         access[i] = EACCES;
1193                                         continue;
1194                                 }
1195                         } else {
1196                                 /* we were passed an array of parent ids */
1197                                 catkey.hfsPlus.parentID = cnid;
1198                         }
1199
1200                         /* if the last guy had the same parent and had access, we're done */
1201                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1202                                 cache.cachehits++;
1203                                 access[i] = 0;
1204                                 continue;
1205                         }
1206
1207                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1208                                                    skip_cp, p, &myucred, dev);
1209
1210                         if ( myaccess ) {
1211                                 access[i] = 0; // have access.. no errors to report
1212                         } else {
1213                                 access[i] = (error != 0 ? (short) error : EACCES);
1214                         }
1215
1216                         prevParent_cnid = catkey.hfsPlus.parentID;
1217 #else
1218                         int myErr;
1219
1220                         cnid = (cnid_t)file_ids[i];
1221
1222                         while (cnid >= kRootDirID) {
1223                             /* get the vnode for this cnid */
1224                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1225                             if ( myErr ) {
1226                                 access[i] = EACCES;
1227                                 break;
1228                             }
1229
1230                             cnid = VTOC(vp)->c_parentcnid;
1231
1232                             hfs_unlock(VTOC(vp));
1233                             if (vnode_vtype(vp) == VDIR) {
1234                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1235                             } else {
1236                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1237                             }
1238                             vnode_put(vp);
1239                             access[i] = myErr;
1240                             if (myErr) {
1241                                 break;
1242                             }
1243                         }
1244 #endif
1245                 }
1246
1247                 /* copyout the access array */
1248                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1249                                      num_files * sizeof (short)))) {
1250                         goto err_exit_bulk_access;
1251                 }
1252
1253         err_exit_bulk_access:
1254
1255                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1256
1257                 release_pathbuff((char *) cache.acache);
1258                 release_pathbuff((char *) cache.haveaccess);
1259                 release_pathbuff((char *) file_ids);
1260                 release_pathbuff((char *) access);
1261
1262                 return (error);
1263         } /* HFS_BULKACCESS */
1264
1265         case HFS_SETACLSTATE: {
1266                 int state;
1267
1268                 if (ap->a_data == NULL) {
1269                         return (EINVAL);
1270                 }
1271
1272                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1273                 state = *(int *)ap->a_data;
1274
1275                 // super-user can enable or disable acl's on a volume.
1276                 // the volume owner can only enable acl's
1277                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1278                         return (EPERM);
1279                 }
1280                 if (state == 0 || state == 1)
1281                         return hfs_setextendedsecurity(hfsmp, state);
1282                 else
1283                         return (EINVAL);
1284         }
1285
1286         case F_FULLFSYNC: {
1287                 int error;
1288
1289                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1290                 if (error == 0) {
1291                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1292                         hfs_unlock(VTOC(vp));
1293                 }
1294
1295                 return error;
1296         }
1297
1298         case F_CHKCLEAN: {
1299                 register struct cnode *cp;
1300                 int error;
1301
1302                 if (!vnode_isreg(vp))
1303                         return EINVAL;
1304
1305                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1306                 if (error == 0) {
1307                         cp = VTOC(vp);
1308                         /*
1309                          * used by regression test to determine if
1310                          * all the dirty pages (via write) have been cleaned
1311                          * after a call to 'fsysnc'.
1312                          */
1313                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1314                         hfs_unlock(cp);
1315                 }
1316                 return (error);
1317         }
1318
1319         case F_RDADVISE: {
1320                 register struct radvisory *ra;
1321                 struct filefork *fp;
1322                 int error;
1323
1324                 if (!vnode_isreg(vp))
1325                         return EINVAL;
1326
1327                 ra = (struct radvisory *)(ap->a_data);
1328                 fp = VTOF(vp);
1329
1330                 /* Protect against a size change. */
1331                 hfs_lock_truncate(VTOC(vp), TRUE);
1332
1333                 if (ra->ra_offset >= fp->ff_size) {
1334                         error = EFBIG;
1335                 } else {
1336                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1337                 }
1338
1339                 hfs_unlock_truncate(VTOC(vp));
1340                 return (error);
1341         }
1342
1343         case F_READBOOTSTRAP:
1344         case F_WRITEBOOTSTRAP:
1345         {
1346             struct vnode *devvp = NULL;
1347             user_fbootstraptransfer_t *user_bootstrapp;
1348             int devBlockSize;
1349             int error;
1350             uio_t auio;
1351             daddr64_t blockNumber;
1352             u_long blockOffset;
1353             u_long xfersize;
1354             struct buf *bp;
1355             user_fbootstraptransfer_t user_bootstrap;
1356
1357                 if (!vnode_isvroot(vp))
1358                         return (EINVAL);
1359                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1360                  * to a user_fbootstraptransfer_t else we get a pointer to a
1361                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1362                  */
1363                 if (is64bit) {
1364                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1365                 }
1366                 else {
1367                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1368                         user_bootstrapp = &user_bootstrap;
1369                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1370                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1371                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1372                 }
1373                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1374                         return EINVAL;
1375
1376             devvp = VTOHFS(vp)->hfs_devvp;
1377                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1378                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1379                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1380                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1381
1382             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1383
1384             while (uio_resid(auio) > 0) {
1385                         blockNumber = uio_offset(auio) / devBlockSize;
1386                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1387                         if (error) {
1388                                 if (bp) buf_brelse(bp);
1389                                 uio_free(auio);
1390                                 return error;
1391                         };
1392
1393                         blockOffset = uio_offset(auio) % devBlockSize;
1394                         xfersize = devBlockSize - blockOffset;
1395                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1396                         if (error) {
1397                                 buf_brelse(bp);
1398                                 uio_free(auio);
1399                                 return error;
1400                         };
1401                         if (uio_rw(auio) == UIO_WRITE) {
1402                                 error = VNOP_BWRITE(bp);
1403                                 if (error) {
1404                                         uio_free(auio);
1405                         return error;
1406                                 }
1407                         } else {
1408                                 buf_brelse(bp);
1409                         };
1410                 };
1411                 uio_free(auio);
1412         };
1413         return 0;
1414
1415         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1416         {
1417                 if (is64bit) {
1418                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1419                 }
1420                 else {
1421                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1422                 }
1423                 return 0;
1424         }
1425
1426         case HFS_GET_MOUNT_TIME:
1427             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1428             break;
1429
1430         case HFS_GET_LAST_MTIME:
1431             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1432             break;
1433
1434         case HFS_SET_BOOT_INFO:
1435                 if (!vnode_isvroot(vp))
1436                         return(EINVAL);
1437                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1438                         return(EACCES); /* must be superuser or owner of filesystem */
1439                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1440                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1441                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1442                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1443                 break;
1444
1445         case HFS_GET_BOOT_INFO:
1446                 if (!vnode_isvroot(vp))
1447                         return(EINVAL);
1448                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1449                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1450                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1451                 break;
1452
1453         default:
1454                 return (ENOTTY);
1455         }
1456
1457     /* Should never get here */
1458         return 0;
1459 }
1460
1461 /*
1462  * select
1463  */
1464 int
1465 hfs_vnop_select(__unused struct vnop_select_args *ap)
1466 /*
1467         struct vnop_select_args {
1468                 vnode_t a_vp;
1469                 int  a_which;
1470                 int  a_fflags;
1471                 void *a_wql;
1472                 vfs_context_t a_context;
1473         };
1474 */
1475 {
1476         /*
1477          * We should really check to see if I/O is possible.
1478          */
1479         return (1);
1480 }
1481
1482 /*
1483  * Converts a logical block number to a physical block, and optionally returns
1484  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1485  * The physical block number is based on the device block size, currently its 512.
1486  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1487  */
1488 int
1489 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1490 {
1491         struct cnode *cp = VTOC(vp);
1492         struct filefork *fp = VTOF(vp);
1493         struct hfsmount *hfsmp = VTOHFS(vp);
1494         int  retval = E_NONE;
1495         daddr_t  logBlockSize;
1496         size_t  bytesContAvail = 0;
1497         off_t  blockposition;
1498         int lockExtBtree;
1499         int lockflags = 0;
1500
1501         /*
1502          * Check for underlying vnode requests and ensure that logical
1503          * to physical mapping is requested.
1504          */
1505         if (vpp != NULL)
1506                 *vpp = cp->c_devvp;
1507         if (bnp == NULL)
1508                 return (0);
1509
1510         logBlockSize = GetLogicalBlockSize(vp);
1511         blockposition = (off_t)bn * (off_t)logBlockSize;
1512
1513         lockExtBtree = overflow_extents(fp);
1514
1515         if (lockExtBtree)
1516                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1517
1518         retval = MacToVFSError(
1519                             MapFileBlockC (HFSTOVCB(hfsmp),
1520                                             (FCB*)fp,
1521                                             MAXPHYSIO,
1522                                             blockposition,
1523                                             bnp,
1524                                             &bytesContAvail));
1525
1526         if (lockExtBtree)
1527                 hfs_systemfile_unlock(hfsmp, lockflags);
1528
1529         if (retval == E_NONE) {
1530                 /* Figure out how many read ahead blocks there are */
1531                 if (runp != NULL) {
1532                         if (can_cluster(logBlockSize)) {
1533                                 /* Make sure this result never goes negative: */
1534                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1535                         } else {
1536                                 *runp = 0;
1537                         }
1538                 }
1539         }
1540         return (retval);
1541 }
1542
1543 /*
1544  * Convert logical block number to file offset.
1545  */
1546 int
1547 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1548 /*
1549         struct vnop_blktooff_args {
1550                 vnode_t a_vp;
1551                 daddr64_t a_lblkno;
1552                 off_t *a_offset;
1553         };
1554 */
1555 {
1556         if (ap->a_vp == NULL)
1557                 return (EINVAL);
1558         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1559
1560         return(0);
1561 }
1562
1563 /*
1564  * Convert file offset to logical block number.
1565  */
1566 int
1567 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1568 /*
1569         struct vnop_offtoblk_args {
1570                 vnode_t a_vp;
1571                 off_t a_offset;
1572                 daddr64_t *a_lblkno;
1573         };
1574 */
1575 {
1576         if (ap->a_vp == NULL)
1577                 return (EINVAL);
1578         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1579
1580         return(0);
1581 }
1582
1583 /*
1584  * Map file offset to physical block number.
1585  *
1586  * System file cnodes are expected to be locked (shared or exclusive).
1587  */
1588 int
1589 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1590 /*
1591         struct vnop_blockmap_args {
1592                 vnode_t a_vp;
1593                 off_t a_foffset;
1594                 size_t a_size;
1595                 daddr64_t *a_bpn;
1596                 size_t *a_run;
1597                 void *a_poff;
1598                 int a_flags;
1599                 vfs_context_t a_context;
1600         };
1601 */
1602 {
1603         struct vnode *vp = ap->a_vp;
1604         struct cnode *cp;
1605         struct filefork *fp;
1606         struct hfsmount *hfsmp;
1607         size_t bytesContAvail = 0;
1608         int retval = E_NONE;
1609         int syslocks = 0;
1610         int lockflags = 0;
1611         struct rl_entry *invalid_range;
1612         enum rl_overlaptype overlaptype;
1613         int started_tr = 0;
1614         int tooklock = 0;
1615
1616         /* Do not allow blockmap operation on a directory */
1617         if (vnode_isdir(vp)) {
1618                 return (ENOTSUP);
1619         }
1620
1621         /*
1622          * Check for underlying vnode requests and ensure that logical
1623          * to physical mapping is requested.
1624          */
1625         if (ap->a_bpn == NULL)
1626                 return (0);
1627
1628         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1629                 if (VTOC(vp)->c_lockowner != current_thread()) {
1630                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1631                         tooklock = 1;
1632                 } else {
1633                         cp = VTOC(vp);
1634                         panic("blockmap: %s cnode lock already held!\n",
1635                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1636                 }
1637         }
1638         hfsmp = VTOHFS(vp);
1639         cp = VTOC(vp);
1640         fp = VTOF(vp);
1641
1642 retry:
1643         if (fp->ff_unallocblocks) {
1644                 if (hfs_start_transaction(hfsmp) != 0) {
1645                         retval = EINVAL;
1646                         goto exit;
1647                 } else {
1648                         started_tr = 1;
1649                 }
1650                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1651
1652         } else if (overflow_extents(fp)) {
1653                 syslocks = SFL_EXTENTS;
1654         }
1655
1656         if (syslocks)
1657                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1658
1659         /*
1660          * Check for any delayed allocations.
1661          */
1662         if (fp->ff_unallocblocks) {
1663                 SInt64 actbytes;
1664                 u_int32_t loanedBlocks;
1665
1666                 //
1667                 // Make sure we have a transaction.  It's possible
1668                 // that we came in and fp->ff_unallocblocks was zero
1669                 // but during the time we blocked acquiring the extents
1670                 // btree, ff_unallocblocks became non-zero and so we
1671                 // will need to start a transaction.
1672                 //
1673                 if (started_tr == 0) {
1674                         if (syslocks) {
1675                                 hfs_systemfile_unlock(hfsmp, lockflags);
1676                                 syslocks = 0;
1677                         }
1678                         goto retry;
1679                 }
1680
1681                 /*
1682                  * Note: ExtendFileC will Release any blocks on loan and
1683                  * aquire real blocks.  So we ask to extend by zero bytes
1684                  * since ExtendFileC will account for the virtual blocks.
1685                  */
1686
1687                 loanedBlocks = fp->ff_unallocblocks;
1688                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1689                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1690
1691                 if (retval) {
1692                         fp->ff_unallocblocks = loanedBlocks;
1693                         cp->c_blocks += loanedBlocks;
1694                         fp->ff_blocks += loanedBlocks;
1695
1696                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1697                         hfsmp->loanedBlocks += loanedBlocks;
1698                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1699                 }
1700
1701                 if (retval) {
1702                         hfs_systemfile_unlock(hfsmp, lockflags);
1703                         cp->c_flag |= C_MODIFIED;
1704                         if (started_tr) {
1705                                 (void) hfs_update(vp, TRUE);
1706                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1707
1708                                 hfs_end_transaction(hfsmp);
1709                         }
1710                         goto exit;
1711                 }
1712         }
1713
1714         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1715                                ap->a_bpn, &bytesContAvail);
1716         if (syslocks) {
1717                 hfs_systemfile_unlock(hfsmp, lockflags);
1718                 syslocks = 0;
1719         }
1720
1721         if (started_tr) {
1722                 (void) hfs_update(vp, TRUE);
1723                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1724                 hfs_end_transaction(hfsmp);
1725                 started_tr = 0;
1726         }
1727         if (retval) {
1728                 goto exit;
1729         }
1730
1731         /* Adjust the mapping information for invalid file ranges: */
1732         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1733                               ap->a_foffset + (off_t)bytesContAvail - 1,
1734                               &invalid_range);
1735         if (overlaptype != RL_NOOVERLAP) {
1736                 switch(overlaptype) {
1737                 case RL_MATCHINGOVERLAP:
1738                 case RL_OVERLAPCONTAINSRANGE:
1739                 case RL_OVERLAPSTARTSBEFORE:
1740                         /* There's no valid block for this byte offset: */
1741                         *ap->a_bpn = (daddr64_t)-1;
1742                         /* There's no point limiting the amount to be returned
1743                          * if the invalid range that was hit extends all the way
1744                          * to the EOF (i.e. there's no valid bytes between the
1745                          * end of this range and the file's EOF):
1746                          */
1747                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1748                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1749                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1750                         }
1751                         break;
1752
1753                 case RL_OVERLAPISCONTAINED:
1754                 case RL_OVERLAPENDSAFTER:
1755                         /* The range of interest hits an invalid block before the end: */
1756                         if (invalid_range->rl_start == ap->a_foffset) {
1757                                 /* There's actually no valid information to be had starting here: */
1758                                 *ap->a_bpn = (daddr64_t)-1;
1759                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1760                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1761                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1762                                 }
1763                         } else {
1764                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1765                         }
1766                         break;
1767
1768                 case RL_NOOVERLAP:
1769                         break;
1770                 } /* end switch */
1771                 if (bytesContAvail > ap->a_size)
1772                         bytesContAvail = ap->a_size;
1773         }
1774         if (ap->a_run)
1775                 *ap->a_run = bytesContAvail;
1776
1777         if (ap->a_poff)
1778                 *(int *)ap->a_poff = 0;
1779 exit:
1780         if (tooklock)
1781                 hfs_unlock(cp);
1782
1783         return (MacToVFSError(retval));
1784 }
1785
1786
1787 /*
1788  * prepare and issue the I/O
1789  * buf_strategy knows how to deal
1790  * with requests that require
1791  * fragmented I/Os
1792  */
1793 int
1794 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1795 {
1796         buf_t   bp = ap->a_bp;
1797         vnode_t vp = buf_vnode(bp);
1798         struct cnode *cp = VTOC(vp);
1799
1800         return (buf_strategy(cp->c_devvp, ap));
1801 }
1802
1803
1804 static int
1805 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1806 {
1807         register struct cnode *cp = VTOC(vp);
1808         struct filefork *fp = VTOF(vp);
1809         struct proc *p = vfs_context_proc(context);;
1810         kauth_cred_t cred = vfs_context_ucred(context);
1811         int retval;
1812         off_t bytesToAdd;
1813         off_t actualBytesAdded;
1814         off_t filebytes;
1815         u_int64_t old_filesize;
1816         u_long fileblocks;
1817         int blksize;
1818         struct hfsmount *hfsmp;
1819         int lockflags;
1820
1821         blksize = VTOVCB(vp)->blockSize;
1822         fileblocks = fp->ff_blocks;
1823         filebytes = (off_t)fileblocks * (off_t)blksize;
1824         old_filesize = fp->ff_size;
1825
1826         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1827                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1828
1829         if (length < 0)
1830                 return (EINVAL);
1831
1832         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1833                 return (EFBIG);
1834
1835         hfsmp = VTOHFS(vp);
1836
1837         retval = E_NONE;
1838
1839         /* Files that are changing size are not hot file candidates. */
1840         if (hfsmp->hfc_stage == HFC_RECORDING) {
1841                 fp->ff_bytesread = 0;
1842         }
1843
1844         /*
1845          * We cannot just check if fp->ff_size == length (as an optimization)
1846          * since there may be extra physical blocks that also need truncation.
1847          */
1848 #if QUOTA
1849         if ((retval = hfs_getinoquota(cp)))
1850                 return(retval);
1851 #endif /* QUOTA */
1852
1853         /*
1854          * Lengthen the size of the file. We must ensure that the
1855          * last byte of the file is allocated. Since the smallest
1856          * value of ff_size is 0, length will be at least 1.
1857          */
1858         if (length > (off_t)fp->ff_size) {
1859 #if QUOTA
1860                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1861                                    cred, 0);
1862                 if (retval)
1863                         goto Err_Exit;
1864 #endif /* QUOTA */
1865                 /*
1866                  * If we don't have enough physical space then
1867                  * we need to extend the physical size.
1868                  */
1869                 if (length > filebytes) {
1870                         int eflags;
1871                         u_long blockHint = 0;
1872
1873                         /* All or nothing and don't round up to clumpsize. */
1874                         eflags = kEFAllMask | kEFNoClumpMask;
1875
1876                         if (cred && suser(cred, NULL) != 0)
1877                                 eflags |= kEFReserveMask;  /* keep a reserve */
1878
1879                         /*
1880                          * Allocate Journal and Quota files in metadata zone.
1881                          */
1882                         if (filebytes == 0 &&
1883                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1884                             hfs_virtualmetafile(cp)) {
1885                                 eflags |= kEFMetadataMask;
1886                                 blockHint = hfsmp->hfs_metazone_start;
1887                         }
1888                         if (hfs_start_transaction(hfsmp) != 0) {
1889                             retval = EINVAL;
1890                             goto Err_Exit;
1891                         }
1892
1893                         /* Protect extents b-tree and allocation bitmap */
1894                         lockflags = SFL_BITMAP;
1895                         if (overflow_extents(fp))
1896                                 lockflags |= SFL_EXTENTS;
1897                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1898
1899                         while ((length > filebytes) && (retval == E_NONE)) {
1900                                 bytesToAdd = length - filebytes;
1901                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1902                                                     (FCB*)fp,
1903                                                     bytesToAdd,
1904                                                     blockHint,
1905                                                     eflags,
1906                                                     &actualBytesAdded));
1907
1908                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1909                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1910                                         if (length > filebytes)
1911                                                 length = filebytes;
1912                                         break;
1913                                 }
1914                         } /* endwhile */
1915
1916                         hfs_systemfile_unlock(hfsmp, lockflags);
1917
1918                         if (hfsmp->jnl) {
1919                             (void) hfs_update(vp, TRUE);
1920                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1921                         }
1922
1923                         hfs_end_transaction(hfsmp);
1924
1925                         if (retval)
1926                                 goto Err_Exit;
1927
1928                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1929                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1930                 }
1931
1932                 if (!(flags & IO_NOZEROFILL)) {
1933                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1934                                 struct rl_entry *invalid_range;
1935                                 off_t zero_limit;
1936
1937                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1938                                 if (length < zero_limit) zero_limit = length;
1939
1940                                 if (length > (off_t)fp->ff_size) {
1941                                         struct timeval tv;
1942
1943                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1944                                         if ((fp->ff_size & PAGE_MASK_64) &&
1945                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1946                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1947
1948                                                 /* There's some valid data at the start of the (current) last page
1949                                                    of the file, so zero out the remainder of that page to ensure the
1950                                                    entire page contains valid data.  Since there is no invalid range
1951                                                    possible past the (current) eof, there's no need to remove anything
1952                                                    from the invalid range list before calling cluster_write():  */
1953                                                 hfs_unlock(cp);
1954                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1955                                                                 fp->ff_size, (off_t)0,
1956                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1957                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1958                                                 if (retval) goto Err_Exit;
1959
1960                                                 /* Merely invalidate the remaining area, if necessary: */
1961                                                 if (length > zero_limit) {
1962                                                         microuptime(&tv);
1963                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1964                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1965                                                 }
1966                                         } else {
1967                                         /* The page containing the (current) eof is invalid: just add the
1968                                            remainder of the page to the invalid list, along with the area
1969                                            being newly allocated:
1970                                          */
1971                                         microuptime(&tv);
1972                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1973                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1974                                         };
1975                                 }
1976                         } else {
1977                                         panic("hfs_truncate: invoked on non-UBC object?!");
1978                         };
1979                 }
1980                 cp->c_touch_modtime = TRUE;
1981                 fp->ff_size = length;
1982
1983                 /* Nested transactions will do their own ubc_setsize. */
1984                 if (!skipsetsize) {
1985                         /*
1986                          * ubc_setsize can cause a pagein here
1987                          * so we need to drop cnode lock.
1988                          */
1989                         hfs_unlock(cp);
1990                         ubc_setsize(vp, length);
1991                         hfs_lock(cp, HFS_FORCE_LOCK);
1992                 }
1993
1994         } else { /* Shorten the size of the file */
1995
1996                 if ((off_t)fp->ff_size > length) {
1997                         /*
1998                          * Any buffers that are past the truncation point need to be
1999                          * invalidated (to maintain buffer cache consistency).
2000                          */
2001
2002                          /* Nested transactions will do their own ubc_setsize. */
2003                          if (!skipsetsize) {
2004                                 /*
2005                                  * ubc_setsize can cause a pageout here
2006                                  * so we need to drop cnode lock.
2007                                  */
2008                                 hfs_unlock(cp);
2009                                 ubc_setsize(vp, length);
2010                                 hfs_lock(cp, HFS_FORCE_LOCK);
2011                         }
2012
2013                         /* Any space previously marked as invalid is now irrelevant: */
2014                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2015                 }
2016
2017                 /*
2018                  * Account for any unmapped blocks. Note that the new
2019                  * file length can still end up with unmapped blocks.
2020                  */
2021                 if (fp->ff_unallocblocks > 0) {
2022                         u_int32_t finalblks;
2023                         u_int32_t loanedBlocks;
2024
2025                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2026
2027                         loanedBlocks = fp->ff_unallocblocks;
2028                         cp->c_blocks -= loanedBlocks;
2029                         fp->ff_blocks -= loanedBlocks;
2030                         fp->ff_unallocblocks = 0;
2031
2032                         hfsmp->loanedBlocks -= loanedBlocks;
2033
2034                         finalblks = (length + blksize - 1) / blksize;
2035                         if (finalblks > fp->ff_blocks) {
2036                                 /* calculate required unmapped blocks */
2037                                 loanedBlocks = finalblks - fp->ff_blocks;
2038                                 hfsmp->loanedBlocks += loanedBlocks;
2039
2040                                 fp->ff_unallocblocks = loanedBlocks;
2041                                 cp->c_blocks += loanedBlocks;
2042                                 fp->ff_blocks += loanedBlocks;
2043                         }
2044                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2045                 }
2046
2047                 /*
2048                  * For a TBE process the deallocation of the file blocks is
2049                  * delayed until the file is closed.  And hfs_close calls
2050                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2051                  * isn't set, we make sure this isn't a TBE process.
2052                  */
2053                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2054 #if QUOTA
2055                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2056 #endif /* QUOTA */
2057                   if (hfs_start_transaction(hfsmp) != 0) {
2058                       retval = EINVAL;
2059                       goto Err_Exit;
2060                   }
2061
2062                         if (fp->ff_unallocblocks == 0) {
2063                                 /* Protect extents b-tree and allocation bitmap */
2064                                 lockflags = SFL_BITMAP;
2065                                 if (overflow_extents(fp))
2066                                         lockflags |= SFL_EXTENTS;
2067                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2068
2069                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2070                                                 (FCB*)fp, length, false));
2071
2072                                 hfs_systemfile_unlock(hfsmp, lockflags);
2073                         }
2074                         if (hfsmp->jnl) {
2075                                 if (retval == 0) {
2076                                         fp->ff_size = length;
2077                                 }
2078                                 (void) hfs_update(vp, TRUE);
2079                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2080                         }
2081
2082                         hfs_end_transaction(hfsmp);
2083
2084                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2085                         if (retval)
2086                                 goto Err_Exit;
2087 #if QUOTA
2088                         /* These are bytesreleased */
2089                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2090 #endif /* QUOTA */
2091                 }
2092                 /* Only set update flag if the logical length changes */
2093                 if (old_filesize != length)
2094                         cp->c_touch_modtime = TRUE;
2095                 fp->ff_size = length;
2096         }
2097         cp->c_touch_chgtime = TRUE;
2098         retval = hfs_update(vp, MNT_WAIT);
2099         if (retval) {
2100                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2101                      -1, -1, -1, retval, 0);
2102         }
2103
2104 Err_Exit:
2105
2106         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2107                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2108
2109         return (retval);
2110 }
2111
2112
2113
2114 /*
2115  * Truncate a cnode to at most length size, freeing (or adding) the
2116  * disk blocks.
2117  */
2118 __private_extern__
2119 int
2120 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2121              vfs_context_t context)
2122 {
2123         struct filefork *fp = VTOF(vp);
2124         off_t filebytes;
2125         u_long fileblocks;
2126         int blksize, error = 0;
2127         struct cnode *cp = VTOC(vp);
2128
2129         if (vnode_isdir(vp))
2130                 return (EISDIR);        /* cannot truncate an HFS directory! */
2131
2132         blksize = VTOVCB(vp)->blockSize;
2133         fileblocks = fp->ff_blocks;
2134         filebytes = (off_t)fileblocks * (off_t)blksize;
2135
2136         // have to loop truncating or growing files that are
2137         // really big because otherwise transactions can get
2138         // enormous and consume too many kernel resources.
2139
2140         if (length < filebytes) {
2141                 while (filebytes > length) {
2142                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2143                                 filebytes -= HFS_BIGFILE_SIZE;
2144                         } else {
2145                                 filebytes = length;
2146                         }
2147                         cp->c_flag |= C_FORCEUPDATE;
2148                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2149                         if (error)
2150                                 break;
2151                 }
2152         } else if (length > filebytes) {
2153                 while (filebytes < length) {
2154                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2155                                 filebytes += HFS_BIGFILE_SIZE;
2156                         } else {
2157                                 filebytes = length;
2158                         }
2159                         cp->c_flag |= C_FORCEUPDATE;
2160                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2161                         if (error)
2162                                 break;
2163                 }
2164         } else /* Same logical size */ {
2165
2166                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2167         }
2168         /* Files that are changing size are not hot file candidates. */
2169         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2170                 fp->ff_bytesread = 0;
2171         }
2172
2173         return (error);
2174 }
2175
2176
2177
2178 /*
2179  * Preallocate file storage space.
2180  */
2181 int
2182 hfs_vnop_allocate(struct vnop_allocate_args /* {
2183                 vnode_t a_vp;
2184                 off_t a_length;
2185                 u_int32_t  a_flags;
2186                 off_t *a_bytesallocated;
2187                 off_t a_offset;
2188                 vfs_context_t a_context;
2189         } */ *ap)
2190 {
2191         struct vnode *vp = ap->a_vp;
2192         struct cnode *cp;
2193         struct filefork *fp;
2194         ExtendedVCB *vcb;
2195         off_t length = ap->a_length;
2196         off_t startingPEOF;
2197         off_t moreBytesRequested;
2198         off_t actualBytesAdded;
2199         off_t filebytes;
2200         u_long fileblocks;
2201         int retval, retval2;
2202         UInt32 blockHint;
2203         UInt32 extendFlags;   /* For call to ExtendFileC */
2204         struct hfsmount *hfsmp;
2205         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2206         int lockflags;
2207
2208         *(ap->a_bytesallocated) = 0;
2209
2210         if (!vnode_isreg(vp))
2211                 return (EISDIR);
2212         if (length < (off_t)0)
2213                 return (EINVAL);
2214
2215         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2216                 return (retval);
2217         cp = VTOC(vp);
2218         fp = VTOF(vp);
2219         hfsmp = VTOHFS(vp);
2220         vcb = VTOVCB(vp);
2221
2222         fileblocks = fp->ff_blocks;
2223         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2224
2225         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2226                 retval = EINVAL;
2227                 goto Err_Exit;
2228         }
2229
2230         /* Fill in the flags word for the call to Extend the file */
2231
2232         extendFlags = kEFNoClumpMask;
2233         if (ap->a_flags & ALLOCATECONTIG)
2234                 extendFlags |= kEFContigMask;
2235         if (ap->a_flags & ALLOCATEALL)
2236                 extendFlags |= kEFAllMask;
2237         if (cred && suser(cred, NULL) != 0)
2238                 extendFlags |= kEFReserveMask;
2239
2240         retval = E_NONE;
2241         blockHint = 0;
2242         startingPEOF = filebytes;
2243
2244         if (ap->a_flags & ALLOCATEFROMPEOF)
2245                 length += filebytes;
2246         else if (ap->a_flags & ALLOCATEFROMVOL)
2247                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2248
2249         /* If no changes are necesary, then we're done */
2250         if (filebytes == length)
2251                 goto Std_Exit;
2252
2253         /*
2254          * Lengthen the size of the file. We must ensure that the
2255          * last byte of the file is allocated. Since the smallest
2256          * value of filebytes is 0, length will be at least 1.
2257          */
2258         if (length > filebytes) {
2259                 moreBytesRequested = length - filebytes;
2260
2261 #if QUOTA
2262                 retval = hfs_chkdq(cp,
2263                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2264                                 cred, 0);
2265                 if (retval)
2266                         goto Err_Exit;
2267
2268 #endif /* QUOTA */
2269                 /*
2270                  * Metadata zone checks.
2271                  */
2272                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2273                         /*
2274                          * Allocate Journal and Quota files in metadata zone.
2275                          */
2276                         if (hfs_virtualmetafile(cp)) {
2277                                 extendFlags |= kEFMetadataMask;
2278                                 blockHint = hfsmp->hfs_metazone_start;
2279                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2280                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2281                                 /*
2282                                  * Move blockHint outside metadata zone.
2283                                  */
2284                                 blockHint = hfsmp->hfs_metazone_end + 1;
2285                         }
2286                 }
2287
2288                 if (hfs_start_transaction(hfsmp) != 0) {
2289                     retval = EINVAL;
2290                     goto Err_Exit;
2291                 }
2292
2293                 /* Protect extents b-tree and allocation bitmap */
2294                 lockflags = SFL_BITMAP;
2295                 if (overflow_extents(fp))
2296                         lockflags |= SFL_EXTENTS;
2297                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2298
2299                 retval = MacToVFSError(ExtendFileC(vcb,
2300                                                 (FCB*)fp,
2301                                                 moreBytesRequested,
2302                                                 blockHint,
2303                                                 extendFlags,
2304                                                 &actualBytesAdded));
2305
2306                 *(ap->a_bytesallocated) = actualBytesAdded;
2307                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2308
2309                 hfs_systemfile_unlock(hfsmp, lockflags);
2310
2311                 if (hfsmp->jnl) {
2312                         (void) hfs_update(vp, TRUE);
2313                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2314                 }
2315
2316                 hfs_end_transaction(hfsmp);
2317
2318                 /*
2319                  * if we get an error and no changes were made then exit
2320                  * otherwise we must do the hfs_update to reflect the changes
2321                  */
2322                 if (retval && (startingPEOF == filebytes))
2323                         goto Err_Exit;
2324
2325                 /*
2326                  * Adjust actualBytesAdded to be allocation block aligned, not
2327                  * clump size aligned.
2328                  * NOTE: So what we are reporting does not affect reality
2329                  * until the file is closed, when we truncate the file to allocation
2330                  * block size.
2331                  */
2332                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2333                         *(ap->a_bytesallocated) =
2334                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2335
2336         } else { /* Shorten the size of the file */
2337
2338                 if (fp->ff_size > length) {
2339                         /*
2340                          * Any buffers that are past the truncation point need to be
2341                          * invalidated (to maintain buffer cache consistency).
2342                          */
2343                 }
2344
2345                 if (hfs_start_transaction(hfsmp) != 0) {
2346                     retval = EINVAL;
2347                     goto Err_Exit;
2348                 }
2349
2350                 /* Protect extents b-tree and allocation bitmap */
2351                 lockflags = SFL_BITMAP;
2352                 if (overflow_extents(fp))
2353                         lockflags |= SFL_EXTENTS;
2354                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2355
2356                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2357
2358                 hfs_systemfile_unlock(hfsmp, lockflags);
2359
2360                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2361
2362                 if (hfsmp->jnl) {
2363                         (void) hfs_update(vp, TRUE);
2364                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2365                 }
2366
2367                 hfs_end_transaction(hfsmp);
2368
2369
2370                 /*
2371                  * if we get an error and no changes were made then exit
2372                  * otherwise we must do the hfs_update to reflect the changes
2373                  */
2374                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2375 #if QUOTA
2376                 /* These are  bytesreleased */
2377                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2378 #endif /* QUOTA */
2379
2380                 if (fp->ff_size > filebytes) {
2381                         fp->ff_size = filebytes;
2382
2383                         hfs_unlock(cp);
2384                         ubc_setsize(vp, fp->ff_size);
2385                         hfs_lock(cp, HFS_FORCE_LOCK);
2386                 }
2387         }
2388
2389 Std_Exit:
2390         cp->c_touch_chgtime = TRUE;
2391         cp->c_touch_modtime = TRUE;
2392         retval2 = hfs_update(vp, MNT_WAIT);
2393
2394         if (retval == 0)
2395                 retval = retval2;
2396 Err_Exit:
2397         hfs_unlock(cp);
2398         return (retval);
2399 }
2400
2401
2402 /*
2403  * Pagein for HFS filesystem
2404  */
2405 int
2406 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2407 /*
2408         struct vnop_pagein_args {
2409                 vnode_t a_vp,
2410                 upl_t         a_pl,
2411                 vm_offset_t   a_pl_offset,
2412                 off_t         a_f_offset,
2413                 size_t        a_size,
2414                 int           a_flags
2415                 vfs_context_t a_context;
2416         };
2417 */
2418 {
2419         vnode_t vp = ap->a_vp;
2420         int error;
2421
2422         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2423                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2424         /*
2425          * Keep track of blocks read.
2426          */
2427         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2428                 struct cnode *cp;
2429                 struct filefork *fp;
2430                 int bytesread;
2431                 int took_cnode_lock = 0;
2432
2433                 cp = VTOC(vp);
2434                 fp = VTOF(vp);
2435
2436                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2437                         bytesread = fp->ff_size;
2438                 else
2439                         bytesread = ap->a_size;
2440
2441                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2442                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2443                         hfs_lock(cp, HFS_FORCE_LOCK);
2444                         took_cnode_lock = 1;
2445                 }
2446                 /*
2447                  * If this file hasn't been seen since the start of
2448                  * the current sampling period then start over.
2449                  */
2450                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2451                         struct timeval tv;
2452
2453                         fp->ff_bytesread = bytesread;
2454                         microtime(&tv);
2455                         cp->c_atime = tv.tv_sec;
2456                 } else {
2457                         fp->ff_bytesread += bytesread;
2458                 }
2459                 cp->c_touch_acctime = TRUE;
2460                 if (took_cnode_lock)
2461                         hfs_unlock(cp);
2462         }
2463         return (error);
2464 }
2465
2466 /*
2467  * Pageout for HFS filesystem.
2468  */
2469 int
2470 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2471 /*
2472         struct vnop_pageout_args {
2473            vnode_t a_vp,
2474            upl_t         a_pl,
2475            vm_offset_t   a_pl_offset,
2476            off_t         a_f_offset,
2477            size_t        a_size,
2478            int           a_flags
2479            vfs_context_t a_context;
2480         };
2481 */
2482 {
2483         vnode_t vp = ap->a_vp;
2484         struct cnode *cp;
2485         struct filefork *fp;
2486         int retval;
2487         off_t end_of_range;
2488         off_t filesize;
2489
2490         cp = VTOC(vp);
2491         if (cp->c_lockowner == current_thread()) {
2492                 panic("pageout: %s cnode lock already held!\n",
2493                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2494         }
2495         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2496                 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2497                         ubc_upl_abort_range(ap->a_pl,
2498                                             ap->a_pl_offset,
2499                                             ap->a_size,
2500                                             UPL_ABORT_FREE_ON_EMPTY);
2501                 }
2502                 return (retval);
2503         }
2504         fp = VTOF(vp);
2505
2506         filesize = fp->ff_size;
2507         end_of_range = ap->a_f_offset + ap->a_size - 1;
2508
2509         if (end_of_range >= filesize) {
2510                 end_of_range = (off_t)(filesize - 1);
2511         }
2512         if (ap->a_f_offset < filesize) {
2513                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2514                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2515         }
2516         hfs_unlock(cp);
2517
2518         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2519                                  ap->a_size, filesize, ap->a_flags);
2520
2521         /*
2522          * If data was written, and setuid or setgid bits are set and
2523          * this process is not the superuser then clear the setuid and
2524          * setgid bits as a precaution against tampering.
2525          */
2526         if ((retval == 0) &&
2527             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2528             (vfs_context_suser(ap->a_context) != 0)) {
2529                 hfs_lock(cp, HFS_FORCE_LOCK);
2530                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2531                 cp->c_touch_chgtime = TRUE;
2532                 hfs_unlock(cp);
2533         }
2534         return (retval);
2535 }
2536
2537 /*
2538  * Intercept B-Tree node writes to unswap them if necessary.
2539  */
2540 int
2541 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2542 {
2543         int retval = 0;
2544         register struct buf *bp = ap->a_bp;
2545         register struct vnode *vp = buf_vnode(bp);
2546         BlockDescriptor block;
2547
2548         /* Trap B-Tree writes */
2549         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2550             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2551             (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2552
2553                 /*
2554                  * Swap and validate the node if it is in native byte order.
2555                  * This is always be true on big endian, so we always validate
2556                  * before writing here.  On little endian, the node typically has
2557                  * been swapped and validatated when it was written to the journal,
2558                  * so we won't do anything here.
2559                  */
2560                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2561                         /* Prepare the block pointer */
2562                         block.blockHeader = bp;
2563                         block.buffer = (char *)buf_dataptr(bp);
2564                         block.blockNum = buf_lblkno(bp);
2565                         /* not found in cache ==> came from disk */
2566                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2567                         block.blockSize = buf_count(bp);
2568
2569                         /* Endian un-swap B-Tree node */
2570                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2571                         if (retval)
2572                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2573                 }
2574         }
2575
2576         /* This buffer shouldn't be locked anymore but if it is clear it */
2577         if ((buf_flags(bp) & B_LOCKED)) {
2578                 // XXXdbg
2579                 if (VTOHFS(vp)->jnl) {
2580                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2581                 }
2582                 buf_clearflags(bp, B_LOCKED);
2583         }
2584         retval = vn_bwrite (ap);
2585
2586         return (retval);
2587 }
2588
2589 /*
2590  * Relocate a file to a new location on disk
2591  *  cnode must be locked on entry
2592  *
2593  * Relocation occurs by cloning the file's data from its
2594  * current set of blocks to a new set of blocks. During
2595  * the relocation all of the blocks (old and new) are
2596  * owned by the file.
2597  *
2598  * -----------------
2599  * |///////////////|
2600  * -----------------
2601  * 0               N (file offset)
2602  *
2603  * -----------------     -----------------
2604  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2605  * -----------------     -----------------
2606  * 0               N     N+1             2N
2607  *
2608  * -----------------     -----------------
2609  * |///////////////|     |///////////////|     STEP 2 (clone data)
2610  * -----------------     -----------------
2611  * 0               N     N+1             2N
2612  *
2613  *                       -----------------
2614  *                       |///////////////|     STEP 3 (head truncate blocks)
2615  *                       -----------------
2616  *                       0               N
2617  *
2618  * During steps 2 and 3 page-outs to file offsets less
2619  * than or equal to N are suspended.
2620  *
2621  * During step 3 page-ins to the file get supended.
2622  */
2623 __private_extern__
2624 int
2625 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2626         struct  proc *p)
2627 {
2628         struct  cnode *cp;
2629         struct  filefork *fp;
2630         struct  hfsmount *hfsmp;
2631         u_int32_t  headblks;
2632         u_int32_t  datablks;
2633         u_int32_t  blksize;
2634         u_int32_t  growsize;
2635         u_int32_t  nextallocsave;
2636         daddr64_t  sector_a,  sector_b;
2637         int disabled_caching = 0;
2638         int eflags;
2639         off_t  newbytes;
2640         int  retval;
2641         int lockflags = 0;
2642         int took_trunc_lock = 0;
2643         int started_tr = 0;
2644         enum vtype vnodetype;
2645
2646         vnodetype = vnode_vtype(vp);
2647         if (vnodetype != VREG && vnodetype != VLNK) {
2648                 return (EPERM);
2649         }
2650
2651         hfsmp = VTOHFS(vp);
2652         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2653                 return (ENOSPC);
2654         }
2655
2656         cp = VTOC(vp);
2657         fp = VTOF(vp);
2658         if (fp->ff_unallocblocks)
2659                 return (EINVAL);
2660         blksize = hfsmp->blockSize;
2661         if (blockHint == 0)
2662                 blockHint = hfsmp->nextAllocation;
2663
2664         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2665             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2666                 return (EFBIG);
2667         }
2668
2669         //
2670         // We do not believe that this call to hfs_fsync() is
2671         // necessary and it causes a journal transaction
2672         // deadlock so we are removing it.
2673         //
2674         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2675         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2676         //      if (retval)
2677         //              return (retval);
2678         //}
2679
2680         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2681                 hfs_unlock(cp);
2682                 hfs_lock_truncate(cp, TRUE);
2683                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2684                         hfs_unlock_truncate(cp);
2685                         return (retval);
2686                 }
2687                 took_trunc_lock = 1;
2688         }
2689         headblks = fp->ff_blocks;
2690         datablks = howmany(fp->ff_size, blksize);
2691         growsize = datablks * blksize;
2692         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2693         if (blockHint >= hfsmp->hfs_metazone_start &&
2694             blockHint <= hfsmp->hfs_metazone_end)
2695                 eflags |= kEFMetadataMask;
2696
2697         if (hfs_start_transaction(hfsmp) != 0) {
2698                 if (took_trunc_lock)
2699                         hfs_unlock_truncate(cp);
2700             return (EINVAL);
2701         }
2702         started_tr = 1;
2703         /*
2704          * Protect the extents b-tree and the allocation bitmap
2705          * during MapFileBlockC and ExtendFileC operations.
2706          */
2707         lockflags = SFL_BITMAP;
2708         if (overflow_extents(fp))
2709                 lockflags |= SFL_EXTENTS;
2710         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2711
2712         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2713         if (retval) {
2714                 retval = MacToVFSError(retval);
2715                 goto out;
2716         }
2717
2718         /*
2719          * STEP 1 - aquire new allocation blocks.
2720          */
2721         if (!vnode_isnocache(vp)) {
2722                 vnode_setnocache(vp);
2723                 disabled_caching = 1;
2724
2725         }
2726         nextallocsave = hfsmp->nextAllocation;
2727         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2728         if (eflags & kEFMetadataMask) {
2729                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2730                 hfsmp->nextAllocation = nextallocsave;
2731                 hfsmp->vcbFlags |= 0xFF00;
2732                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2733         }
2734
2735         retval = MacToVFSError(retval);
2736         if (retval == 0) {
2737                 cp->c_flag |= C_MODIFIED;
2738                 if (newbytes < growsize) {
2739                         retval = ENOSPC;
2740                         goto restore;
2741                 } else if (fp->ff_blocks < (headblks + datablks)) {
2742                         printf("hfs_relocate: allocation failed");
2743                         retval = ENOSPC;
2744                         goto restore;
2745                 }
2746
2747                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2748                 if (retval) {
2749                         retval = MacToVFSError(retval);
2750                 } else if ((sector_a + 1) == sector_b) {
2751                         retval = ENOSPC;
2752                         goto restore;
2753                 } else if ((eflags & kEFMetadataMask) &&
2754                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2755                               hfsmp->hfs_metazone_end)) {
2756                         printf("hfs_relocate: didn't move into metadata zone\n");
2757                         retval = ENOSPC;
2758                         goto restore;
2759                 }
2760         }
2761         /* Done with system locks and journal for now. */
2762         hfs_systemfile_unlock(hfsmp, lockflags);
2763         lockflags = 0;
2764         hfs_end_transaction(hfsmp);
2765         started_tr = 0;
2766
2767         if (retval) {
2768                 /*
2769                  * Check to see if failure is due to excessive fragmentation.
2770                  */
2771                 if ((retval == ENOSPC) &&
2772                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2773                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2774                 }
2775                 goto out;
2776         }
2777         /*
2778          * STEP 2 - clone file data into the new allocation blocks.
2779          */
2780
2781         if (vnodetype == VLNK)
2782                 retval = hfs_clonelink(vp, blksize, cred, p);
2783         else if (vnode_issystem(vp))
2784                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2785         else
2786                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2787
2788         /* Start transaction for step 3 or for a restore. */
2789         if (hfs_start_transaction(hfsmp) != 0) {
2790                 retval = EINVAL;
2791                 goto out;
2792         }
2793         started_tr = 1;
2794         if (retval)
2795                 goto restore;
2796
2797         /*
2798          * STEP 3 - switch to cloned data and remove old blocks.
2799          */
2800         lockflags = SFL_BITMAP;
2801         if (overflow_extents(fp))
2802                 lockflags |= SFL_EXTENTS;
2803         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2804
2805         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2806
2807         hfs_systemfile_unlock(hfsmp, lockflags);
2808         lockflags = 0;
2809         if (retval)
2810                 goto restore;
2811 out:
2812         if (took_trunc_lock)
2813                 hfs_unlock_truncate(cp);
2814
2815         if (lockflags) {
2816                 hfs_systemfile_unlock(hfsmp, lockflags);
2817                 lockflags = 0;
2818         }
2819
2820         // See comment up above about calls to hfs_fsync()
2821         //
2822         //if (retval == 0)
2823         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2824
2825         if (hfsmp->jnl) {
2826                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2827                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2828                 else
2829                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2830         }
2831 exit:
2832         if (disabled_caching) {
2833                 vnode_clearnocache(vp);
2834         }
2835         if (started_tr)
2836                 hfs_end_transaction(hfsmp);
2837
2838         return (retval);
2839
2840 restore:
2841         if (fp->ff_blocks == headblks)
2842                 goto exit;
2843         /*
2844          * Give back any newly allocated space.
2845          */
2846         if (lockflags == 0) {
2847                 lockflags = SFL_BITMAP;
2848                 if (overflow_extents(fp))
2849                         lockflags |= SFL_EXTENTS;
2850                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2851         }
2852
2853         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2854
2855         hfs_systemfile_unlock(hfsmp, lockflags);
2856         lockflags = 0;
2857
2858         if (took_trunc_lock)
2859                 hfs_unlock_truncate(cp);
2860         goto exit;
2861 }
2862
2863
2864 /*
2865  * Clone a symlink.
2866  *
2867  */
2868 static int
2869 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2870 {
2871         struct buf *head_bp = NULL;
2872         struct buf *tail_bp = NULL;
2873         int error;
2874
2875
2876         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2877         if (error)
2878                 goto out;
2879
2880         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2881         if (tail_bp == NULL) {
2882                 error = EIO;
2883                 goto out;
2884         }
2885         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2886         error = (int)buf_bwrite(tail_bp);
2887 out:
2888         if (head_bp) {
2889                 buf_markinvalid(head_bp);
2890                 buf_brelse(head_bp);
2891         }
2892         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2893
2894         return (error);
2895 }
2896
2897 /*
2898  * Clone a file's data within the file.
2899  *
2900  */
2901 static int
2902 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2903 {
2904         caddr_t  bufp;
2905         size_t  writebase;
2906         size_t  bufsize;
2907         size_t  copysize;
2908         size_t  iosize;
2909         off_t   filesize;
2910         size_t  offset;
2911         uio_t auio;
2912         int  error = 0;
2913
2914         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2915         writebase = blkstart * blksize;
2916         copysize = blkcnt * blksize;
2917         iosize = bufsize = MIN(copysize, 4096 * 16);
2918         offset = 0;
2919
2920         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2921                 return (ENOMEM);
2922         }
2923         hfs_unlock(VTOC(vp));
2924
2925         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2926
2927         while (offset < copysize) {
2928                 iosize = MIN(copysize - offset, iosize);
2929
2930                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2931                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2932
2933                 error = cluster_read(vp, auio, copysize, 0);
2934                 if (error) {
2935                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2936                         break;
2937                 }
2938                 if (uio_resid(auio) != 0) {
2939                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2940                         error = EIO;
2941                         break;
2942                 }
2943
2944                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2945                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2946
2947                 error = cluster_write(vp, auio, filesize + offset,
2948                                       filesize + offset + iosize,
2949                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2950                 if (error) {
2951                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2952                         break;
2953                 }
2954                 if (uio_resid(auio) != 0) {
2955                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2956                         error = EIO;
2957                         break;
2958                 }
2959                 offset += iosize;
2960         }
2961         uio_free(auio);
2962
2963         /*
2964          * No need to call ubc_sync_range or hfs_invalbuf
2965          * since the file was copied using IO_NOCACHE.
2966          */
2967
2968         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2969
2970         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2971         return (error);
2972 }
2973
2974 /*
2975  * Clone a system (metadata) file.
2976  *
2977  */
2978 static int
2979 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2980                  kauth_cred_t cred, struct proc *p)
2981 {
2982         caddr_t  bufp;
2983         char * offset;
2984         size_t  bufsize;
2985         size_t  iosize;
2986         struct buf *bp = NULL;
2987         daddr64_t  blkno;
2988         daddr64_t  blk;
2989         daddr64_t  start_blk;
2990         daddr64_t  last_blk;
2991         int  breadcnt;
2992         int  i;
2993         int  error = 0;
2994
2995
2996         iosize = GetLogicalBlockSize(vp);
2997         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2998         breadcnt = bufsize / iosize;
2999
3000         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3001                 return (ENOMEM);
3002         }
3003         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3004         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3005         blkno = 0;
3006
3007         while (blkno < last_blk) {
3008                 /*
3009                  * Read up to a megabyte
3010                  */
3011                 offset = bufp;
3012                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3013                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3014                         if (error) {
3015                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3016                                 goto out;
3017                         }
3018                         if (buf_count(bp) != iosize) {
3019                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3020                                 goto out;
3021                         }
3022                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3023
3024                         buf_markinvalid(bp);
3025                         buf_brelse(bp);
3026                         bp = NULL;
3027
3028                         offset += iosize;
3029                 }
3030
3031                 /*
3032                  * Write up to a megabyte
3033                  */
3034                 offset = bufp;
3035                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3036                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3037                         if (bp == NULL) {
3038                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3039                                 error = EIO;
3040                                 goto out;
3041                         }
3042                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3043                         error = (int)buf_bwrite(bp);
3044                         bp = NULL;
3045                         if (error)
3046                                 goto out;
3047                         offset += iosize;
3048                 }
3049         }
3050 out:
3051         if (bp) {
3052                 buf_brelse(bp);
3053         }
3054
3055         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3056
3057         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3058
3059         return (error);
3060 }