bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/filedesc.h>
  36 #include <sys/stat.h>
  37 #include <sys/buf.h>
  38 #include <sys/proc.h>
  39 #include <sys/kauth.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/vfs_context.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45
  46 #include <sys/ubc.h>
  47 #include <vm/vm_pageout.h>
  48 #include <vm/vm_kern.h>
  49
  50 #include <sys/kdebug.h>
  51
  52 #include        "hfs.h"
  53 #include        "hfs_endian.h"
  54 #include  "hfs_fsctl.h"
  55 #include        "hfs_quota.h"
  56 #include        "hfscommon/headers/FileMgrInternal.h"
  57 #include        "hfscommon/headers/BTreesInternal.h"
  58 #include        "hfs_cnode.h"
  59 #include        "hfs_dbg.h"
  60
  61 extern int overflow_extents(struct filefork *fp);
  62
  63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  64
  65 enum {
  66         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  67 };
  68
  69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  70
  71 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  72
  73
  74 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  75 static int  hfs_clonefile(struct vnode *, int, int, int);
  76 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  77
  78
  79 /*****************************************************************************
  80 *
  81 *       I/O Operations on vnodes
  82 *
  83 *****************************************************************************/
  84 int  hfs_vnop_read(struct vnop_read_args *);
  85 int  hfs_vnop_write(struct vnop_write_args *);
  86 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  87 int  hfs_vnop_select(struct vnop_select_args *);
  88 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  89 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  90 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  91 int  hfs_vnop_strategy(struct vnop_strategy_args *);
  92 int  hfs_vnop_allocate(struct vnop_allocate_args *);
  93 int  hfs_vnop_pagein(struct vnop_pagein_args *);
  94 int  hfs_vnop_pageout(struct vnop_pageout_args *);
  95 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
  96
  97
  98 /*
  99  * Read data from a file.
 100  */
 101 int
 102 hfs_vnop_read(struct vnop_read_args *ap)
 103 {
 104         uio_t uio = ap->a_uio;
 105         struct vnode *vp = ap->a_vp;
 106         struct cnode *cp;
 107         struct filefork *fp;
 108         struct hfsmount *hfsmp;
 109         off_t filesize;
 110         off_t filebytes;
 111         off_t start_resid = uio_resid(uio);
 112         off_t offset = uio_offset(uio);
 113         int retval = 0;
 114
 115
 116         /* Preflight checks */
 117         if (!vnode_isreg(vp)) {
 118                 /* can only read regular files */
 119                 if (vnode_isdir(vp))
 120                         return (EISDIR);
 121                 else
 122                         return (EPERM);
 123         }
 124         if (start_resid == 0)
 125                 return (0);             /* Nothing left to do */
 126         if (offset < 0)
 127                 return (EINVAL);        /* cant read from a negative offset */
 128
 129         cp = VTOC(vp);
 130         fp = VTOF(vp);
 131         hfsmp = VTOHFS(vp);
 132
 133         /* Protect against a size change. */
 134         hfs_lock_truncate(cp, 0);
 135
 136         filesize = fp->ff_size;
 137         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 138         if (offset > filesize) {
 139                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 140                     (offset > (off_t)MAXHFSFILESIZE)) {
 141                         retval = EFBIG;
 142                 }
 143                 goto exit;
 144         }
 145
 146         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 147                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 148
 149         retval = cluster_read(vp, uio, filesize, 0);
 150
 151         cp->c_touch_acctime = TRUE;
 152
 153         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 154                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 155
 156         /*
 157          * Keep track blocks read
 158          */
 159         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 160                 int took_cnode_lock = 0;
 161                 off_t bytesread;
 162
 163                 bytesread = start_resid - uio_resid(uio);
 164
 165                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 166                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 167                         hfs_lock(cp, HFS_FORCE_LOCK);
 168                         took_cnode_lock = 1;
 169                 }
 170                 /*
 171                  * If this file hasn't been seen since the start of
 172                  * the current sampling period then start over.
 173                  */
 174                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 175                         struct timeval tv;
 176
 177                         fp->ff_bytesread = bytesread;
 178                         microtime(&tv);
 179                         cp->c_atime = tv.tv_sec;
 180                 } else {
 181                         fp->ff_bytesread += bytesread;
 182                 }
 183                 if (took_cnode_lock)
 184                         hfs_unlock(cp);
 185         }
 186 exit:
 187         hfs_unlock_truncate(cp);
 188         return (retval);
 189 }
 190
 191 /*
 192  * Write data to a file.
 193  */
 194 int
 195 hfs_vnop_write(struct vnop_write_args *ap)
 196 {
 197         uio_t uio = ap->a_uio;
 198         struct vnode *vp = ap->a_vp;
 199         struct cnode *cp;
 200         struct filefork *fp;
 201         struct hfsmount *hfsmp;
 202         kauth_cred_t cred = NULL;
 203         off_t origFileSize;
 204         off_t writelimit;
 205         off_t bytesToAdd;
 206         off_t actualBytesAdded;
 207         off_t filebytes;
 208         off_t offset;
 209         size_t resid;
 210         int eflags;
 211         int ioflag = ap->a_ioflag;
 212         int retval = 0;
 213         int lockflags;
 214         int cnode_locked = 0;
 215
 216         // LP64todo - fix this! uio_resid may be 64-bit value
 217         resid = uio_resid(uio);
 218         offset = uio_offset(uio);
 219
 220         if (offset < 0)
 221                 return (EINVAL);
 222         if (resid == 0)
 223                 return (E_NONE);
 224         if (!vnode_isreg(vp))
 225                 return (EPERM);  /* Can only write regular files */
 226
 227         /* Protect against a size change. */
 228         hfs_lock_truncate(VTOC(vp), TRUE);
 229
 230         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 231                 hfs_unlock_truncate(VTOC(vp));
 232                 return (retval);
 233         }
 234         cnode_locked = 1;
 235         cp = VTOC(vp);
 236         fp = VTOF(vp);
 237         hfsmp = VTOHFS(vp);
 238         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 239
 240         if (ioflag & IO_APPEND) {
 241                 uio_setoffset(uio, fp->ff_size);
 242                 offset = fp->ff_size;
 243         }
 244         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 245                 retval = EPERM;
 246                 goto exit;
 247         }
 248
 249         origFileSize = fp->ff_size;
 250         eflags = kEFDeferMask;  /* defer file block allocations */
 251
 252 #ifdef HFS_SPARSE_DEV
 253         /*
 254          * When the underlying device is sparse and space
 255          * is low (< 8MB), stop doing delayed allocations
 256          * and begin doing synchronous I/O.
 257          */
 258         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 259             (hfs_freeblks(hfsmp, 0) < 2048)) {
 260                 eflags &= ~kEFDeferMask;
 261                 ioflag |= IO_SYNC;
 262         }
 263 #endif /* HFS_SPARSE_DEV */
 264
 265         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 266                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 267
 268         /* Now test if we need to extend the file */
 269         /* Doing so will adjust the filebytes for us */
 270
 271         writelimit = offset + resid;
 272         if (writelimit <= filebytes)
 273                 goto sizeok;
 274
 275         cred = vfs_context_ucred(ap->a_context);
 276 #if QUOTA
 277         bytesToAdd = writelimit - filebytes;
 278         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 279                            cred, 0);
 280         if (retval)
 281                 goto exit;
 282 #endif /* QUOTA */
 283
 284         if (hfs_start_transaction(hfsmp) != 0) {
 285                 retval = EINVAL;
 286                 goto exit;
 287         }
 288
 289         while (writelimit > filebytes) {
 290                 bytesToAdd = writelimit - filebytes;
 291                 if (cred && suser(cred, NULL) != 0)
 292                         eflags |= kEFReserveMask;
 293
 294                 /* Protect extents b-tree and allocation bitmap */
 295                 lockflags = SFL_BITMAP;
 296                 if (overflow_extents(fp))
 297                         lockflags |= SFL_EXTENTS;
 298                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 299
 300                 /* Files that are changing size are not hot file candidates. */
 301                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 302                         fp->ff_bytesread = 0;
 303                 }
 304                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 305                                 0, eflags, &actualBytesAdded));
 306
 307                 hfs_systemfile_unlock(hfsmp, lockflags);
 308
 309                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 310                         retval = ENOSPC;
 311                 if (retval != E_NONE)
 312                         break;
 313                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 314                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 315                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 316         }
 317         (void) hfs_update(vp, TRUE);
 318         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 319         (void) hfs_end_transaction(hfsmp);
 320
 321 sizeok:
 322         if (retval == E_NONE) {
 323                 off_t filesize;
 324                 off_t zero_off;
 325                 off_t tail_off;
 326                 off_t inval_start;
 327                 off_t inval_end;
 328                 off_t io_start;
 329                 int lflag;
 330                 struct rl_entry *invalid_range;
 331
 332                 if (writelimit > fp->ff_size)
 333                         filesize = writelimit;
 334                 else
 335                         filesize = fp->ff_size;
 336
 337                 lflag = (ioflag & IO_SYNC);
 338
 339                 if (offset <= fp->ff_size) {
 340                         zero_off = offset & ~PAGE_MASK_64;
 341
 342                         /* Check to see whether the area between the zero_offset and the start
 343                            of the transfer to see whether is invalid and should be zero-filled
 344                            as part of the transfer:
 345                          */
 346                         if (offset > zero_off) {
 347                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 348                                         lflag |= IO_HEADZEROFILL;
 349                         }
 350                 } else {
 351                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 352
 353                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 354                            read without being zeroed.  The current last block is filled with zeroes
 355                            if it holds valid data but in all cases merely do a little bookkeeping
 356                            to track the area from the end of the current last page to the start of
 357                            the area actually written.  For the same reason only the bytes up to the
 358                            start of the page where this write will start is invalidated; any remainder
 359                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 360
 361                            Note that inval_start, the start of the page after the current EOF,
 362                            may be past the start of the write, in which case the zeroing
 363                            will be handled by the cluser_write of the actual data.
 364                          */
 365                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 366                         inval_end = offset & ~PAGE_MASK_64;
 367                         zero_off = fp->ff_size;
 368
 369                         if ((fp->ff_size & PAGE_MASK_64) &&
 370                                 (rl_scan(&fp->ff_invalidranges,
 371                                                         eof_page_base,
 372                                                         fp->ff_size - 1,
 373                                                         &invalid_range) != RL_NOOVERLAP)) {
 374                                 /* The page containing the EOF is not valid, so the
 375                                    entire page must be made inaccessible now.  If the write
 376                                    starts on a page beyond the page containing the eof
 377                                    (inval_end > eof_page_base), add the
 378                                    whole page to the range to be invalidated.  Otherwise
 379                                    (i.e. if the write starts on the same page), zero-fill
 380                                    the entire page explicitly now:
 381                                  */
 382                                 if (inval_end > eof_page_base) {
 383                                         inval_start = eof_page_base;
 384                                 } else {
 385                                         zero_off = eof_page_base;
 386                                 };
 387                         };
 388
 389                         if (inval_start < inval_end) {
 390                                 struct timeval tv;
 391                                 /* There's some range of data that's going to be marked invalid */
 392
 393                                 if (zero_off < inval_start) {
 394                                         /* The pages between inval_start and inval_end are going to be invalidated,
 395                                            and the actual write will start on a page past inval_end.  Now's the last
 396                                            chance to zero-fill the page containing the EOF:
 397                                          */
 398                                         hfs_unlock(cp);
 399                                         cnode_locked = 0;
 400                                         retval = cluster_write(vp, (uio_t) 0,
 401                                                         fp->ff_size, inval_start,
 402                                                         zero_off, (off_t)0,
 403                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 404                                         hfs_lock(cp, HFS_FORCE_LOCK);
 405                                         cnode_locked = 1;
 406                                         if (retval) goto ioerr_exit;
 407                                         offset = uio_offset(uio);
 408                                 };
 409
 410                                 /* Mark the remaining area of the newly allocated space as invalid: */
 411                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 412                                 microuptime(&tv);
 413                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 414                                 zero_off = fp->ff_size = inval_end;
 415                         };
 416
 417                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 418                 };
 419
 420                 /* Check to see whether the area between the end of the write and the end of
 421                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 422                  */
 423                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 424                 if (tail_off > filesize) tail_off = filesize;
 425                 if (tail_off > writelimit) {
 426                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 427                                 lflag |= IO_TAILZEROFILL;
 428                         };
 429                 };
 430
 431                 /*
 432                  * if the write starts beyond the current EOF (possibly advanced in the
 433                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 434                  * to where the write begins:
 435                  *
 436                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 437                  *       before the current EOF it might be marked as invalid now and must be
 438                  *       made readable (removed from the invalid ranges) before cluster_write
 439                  *       tries to write it:
 440                  */
 441                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 442                 if (io_start < fp->ff_size) {
 443                         off_t io_end;
 444
 445                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 446                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 447                 };
 448
 449                 hfs_unlock(cp);
 450                 cnode_locked = 0;
 451                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 452                                 tail_off, lflag | IO_NOZERODIRTY);
 453                 offset = uio_offset(uio);
 454                 if (offset > fp->ff_size) {
 455                         fp->ff_size = offset;
 456
 457                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 458                         /* Files that are changing size are not hot file candidates. */
 459                         if (hfsmp->hfc_stage == HFC_RECORDING)
 460                                 fp->ff_bytesread = 0;
 461                 }
 462                 if (resid > uio_resid(uio)) {
 463                         cp->c_touch_chgtime = TRUE;
 464                         cp->c_touch_modtime = TRUE;
 465                 }
 466         }
 467         HFS_KNOTE(vp, NOTE_WRITE);
 468
 469 ioerr_exit:
 470         /*
 471          * If we successfully wrote any data, and we are not the superuser
 472          * we clear the setuid and setgid bits as a precaution against
 473          * tampering.
 474          */
 475         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 476                 cred = vfs_context_ucred(ap->a_context);
 477                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 478                         if (!cnode_locked) {
 479                                 hfs_lock(cp, HFS_FORCE_LOCK);
 480                                 cnode_locked = 1;
 481                         }
 482                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 483                 }
 484         }
 485         if (retval) {
 486                 if (ioflag & IO_UNIT) {
 487                         if (!cnode_locked) {
 488                                 hfs_lock(cp, HFS_FORCE_LOCK);
 489                                 cnode_locked = 1;
 490                         }
 491                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 492                                            0, ap->a_context);
 493                         // LP64todo - fix this!  resid needs to by user_ssize_t
 494                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 495                         uio_setresid(uio, resid);
 496                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 497                 }
 498         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 499                 if (!cnode_locked) {
 500                         hfs_lock(cp, HFS_FORCE_LOCK);
 501                         cnode_locked = 1;
 502                 }
 503                 retval = hfs_update(vp, TRUE);
 504         }
 505         /* Updating vcbWrCnt doesn't need to be atomic. */
 506         hfsmp->vcbWrCnt++;
 507
 508         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 509                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 510 exit:
 511         if (cnode_locked)
 512                 hfs_unlock(cp);
 513         hfs_unlock_truncate(cp);
 514         return (retval);
 515 }
 516
 517 /* support for the "bulk-access" fcntl */
 518
 519 #define CACHE_ELEMS 64
 520 #define CACHE_LEVELS 16
 521 #define PARENT_IDS_FLAG 0x100
 522
 523 /* from hfs_attrlist.c */
 524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 525                         mode_t obj_mode, struct mount *mp,
 526                         kauth_cred_t cred, struct proc *p);
 527
 528 /* from vfs/vfs_fsevents.c */
 529 extern char *get_pathbuff(void);
 530 extern void release_pathbuff(char *buff);
 531
 532 struct access_cache {
 533        int numcached;
 534        int cachehits; /* these two for statistics gathering */
 535        int lookups;
 536        unsigned int *acache;
 537        Boolean *haveaccess;
 538 };
 539
 540 struct access_t {
 541         uid_t     uid;              /* IN: effective user id */
 542         short     flags;            /* IN: access requested (i.e. R_OK) */
 543         short     num_groups;       /* IN: number of groups user belongs to */
 544         int       num_files;        /* IN: number of files to process */
 545         int       *file_ids;        /* IN: array of file ids */
 546         gid_t     *groups;          /* IN: array of groups */
 547         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 548 };
 549
 550 struct user_access_t {
 551         uid_t           uid;                    /* IN: effective user id */
 552         short           flags;                  /* IN: access requested (i.e. R_OK) */
 553         short           num_groups;             /* IN: number of groups user belongs to */
 554         int                     num_files;              /* IN: number of files to process */
 555         user_addr_t     file_ids;               /* IN: array of file ids */
 556         user_addr_t     groups;                 /* IN: array of groups */
 557         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 558 };
 559
 560 /*
 561  * Perform a binary search for the given parent_id. Return value is
 562  * found/not found boolean, and indexp will be the index of the item
 563  * or the index at which to insert the item if it's not found.
 564  */
 565 static int
 566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 567 {
 568         unsigned int lo, hi;
 569         int index, matches = 0;
 570
 571         if (cache->numcached == 0) {
 572                 *indexp = 0;
 573                 return 0; // table is empty, so insert at index=0 and report no match
 574         }
 575
 576         if (cache->numcached > CACHE_ELEMS) {
 577                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 578                   cache->numcached, CACHE_ELEMS);*/
 579                 cache->numcached = CACHE_ELEMS;
 580         }
 581
 582         lo = 0;
 583         hi = cache->numcached - 1;
 584         index = -1;
 585
 586         /* perform binary search for parent_id */
 587         do {
 588                 unsigned int mid = (hi - lo)/2 + lo;
 589                 unsigned int this_id = cache->acache[mid];
 590
 591                 if (parent_id == this_id) {
 592                         index = mid;
 593                         break;
 594                 }
 595
 596                 if (parent_id < this_id) {
 597                         hi = mid;
 598                         continue;
 599                 }
 600
 601                 if (parent_id > this_id) {
 602                         lo = mid + 1;
 603                         continue;
 604                 }
 605         } while(lo < hi);
 606
 607         /* check if lo and hi converged on the match */
 608         if (parent_id == cache->acache[hi]) {
 609                 index = hi;
 610         }
 611
 612         /* if no existing entry found, find index for new one */
 613         if (index == -1) {
 614                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 615                 matches = 0;
 616         } else {
 617                 matches = 1;
 618         }
 619
 620         *indexp = index;
 621         return matches;
 622 }
 623
 624 /*
 625  * Add a node to the access_cache at the given index (or do a lookup first
 626  * to find the index if -1 is passed in). We currently do a replace rather
 627  * than an insert if the cache is full.
 628  */
 629 static void
 630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 631 {
 632        int lookup_index = -1;
 633
 634        /* need to do a lookup first if -1 passed for index */
 635        if (index == -1) {
 636                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 637                        if (cache->haveaccess[lookup_index] != access) {
 638                                /* change access info for existing entry... should never happen */
 639                                cache->haveaccess[lookup_index] = access;
 640                        }
 641
 642                        /* mission accomplished */
 643                        return;
 644                } else {
 645                        index = lookup_index;
 646                }
 647
 648        }
 649
 650        /* if the cache is full, do a replace rather than an insert */
 651        if (cache->numcached >= CACHE_ELEMS) {
 652                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 653                cache->numcached = CACHE_ELEMS-1;
 654
 655                if (index > cache->numcached) {
 656                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 657                        index = cache->numcached;
 658                }
 659        } else if (index >= 0 && index < cache->numcached) {
 660                /* only do bcopy if we're inserting */
 661                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 662                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 663        }
 664
 665        cache->acache[index] = nodeID;
 666        cache->haveaccess[index] = access;
 667        cache->numcached++;
 668 }
 669
 670
 671 struct cinfo {
 672         uid_t   uid;
 673         gid_t   gid;
 674         mode_t  mode;
 675         cnid_t  parentcnid;
 676 };
 677
 678 static int
 679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 680 {
 681         struct cinfo *cip = (struct cinfo *)arg;
 682
 683         cip->uid = attrp->ca_uid;
 684         cip->gid = attrp->ca_gid;
 685         cip->mode = attrp->ca_mode;
 686         cip->parentcnid = descp->cd_parentcnid;
 687
 688         return (0);
 689 }
 690
 691 /*
 692  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 693  * isn't incore, then go to the catalog.
 694  */
 695 static int
 696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 697                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 698 {
 699         int error = 0;
 700
 701         /* if this id matches the one the fsctl was called with, skip the lookup */
 702         if (cnid == skip_cp->c_cnid) {
 703                 cnattrp->ca_uid = skip_cp->c_uid;
 704                 cnattrp->ca_gid = skip_cp->c_gid;
 705                 cnattrp->ca_mode = skip_cp->c_mode;
 706                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 707         } else {
 708                 struct cinfo c_info;
 709
 710                 /* otherwise, check the cnode hash incase the file/dir is incore */
 711                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 712                         cnattrp->ca_uid = c_info.uid;
 713                         cnattrp->ca_gid = c_info.gid;
 714                         cnattrp->ca_mode = c_info.mode;
 715                         keyp->hfsPlus.parentID = c_info.parentcnid;
 716                 } else {
 717                         int lockflags;
 718
 719                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 720
 721                         /* lookup this cnid in the catalog */
 722                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 723
 724                         hfs_systemfile_unlock(hfsmp, lockflags);
 725
 726                         cache->lookups++;
 727                 }
 728         }
 729
 730         return (error);
 731 }
 732
 733 /*
 734  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 735  * up to CACHE_LEVELS as we progress towards the root.
 736  */
 737 static int
 738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 739                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 740 {
 741        int                     myErr = 0;
 742        int                     myResult;
 743        HFSCatalogNodeID        thisNodeID;
 744        unsigned long           myPerms;
 745        struct cat_attr         cnattr;
 746        int                     cache_index = -1;
 747        CatalogKey              catkey;
 748
 749        int i = 0, ids_to_cache = 0;
 750        int parent_ids[CACHE_LEVELS];
 751
 752        /* root always has access */
 753        if (!suser(myp_ucred, NULL)) {
 754                return (1);
 755        }
 756
 757        thisNodeID = nodeID;
 758        while (thisNodeID >=  kRootDirID) {
 759                myResult = 0;   /* default to "no access" */
 760
 761                /* check the cache before resorting to hitting the catalog */
 762
 763                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 764                 * to look any further after hitting cached dir */
 765
 766                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 767                        cache->cachehits++;
 768                        myResult = cache->haveaccess[cache_index];
 769                        goto ExitThisRoutine;
 770                }
 771
 772                /* remember which parents we want to cache */
 773                if (ids_to_cache < CACHE_LEVELS) {
 774                        parent_ids[ids_to_cache] = thisNodeID;
 775                        ids_to_cache++;
 776                }
 777
 778                /* do the lookup (checks the cnode hash, then the catalog) */
 779                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 780                if (myErr) {
 781                        goto ExitThisRoutine; /* no access */
 782                }
 783
 784                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 785                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 786                                                  myp_ucred, theProcPtr);
 787
 788                if ( (myPerms & X_OK) == 0 ) {
 789                        myResult = 0;
 790                        goto ExitThisRoutine;   /* no access */
 791                }
 792
 793                /* up the hierarchy we go */
 794                thisNodeID = catkey.hfsPlus.parentID;
 795        }
 796
 797        /* if here, we have access to this node */
 798        myResult = 1;
 799
 800  ExitThisRoutine:
 801        if (myErr) {
 802                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 803                myResult = 0;
 804        }
 805        *err = myErr;
 806
 807        /* cache the parent directory(ies) */
 808        for (i = 0; i < ids_to_cache; i++) {
 809                /* small optimization: get rid of double-lookup for all these */
 810                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 811                add_node(cache, -1, parent_ids[i], myResult);
 812        }
 813
 814        return (myResult);
 815 }
 816 /* end "bulk-access" support */
 817
 818
 819
 820 /*
 821  * Callback for use with freeze ioctl.
 822  */
 823 static int
 824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 825 {
 826         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 827
 828         return 0;
 829 }
 830
 831 /*
 832  * Control filesystem operating characteristics.
 833  */
 834 int
 835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 836                 vnode_t a_vp;
 837                 int  a_command;
 838                 caddr_t  a_data;
 839                 int  a_fflag;
 840                 vfs_context_t a_context;
 841         } */ *ap)
 842 {
 843         struct vnode * vp = ap->a_vp;
 844         struct hfsmount *hfsmp = VTOHFS(vp);
 845         vfs_context_t context = ap->a_context;
 846         kauth_cred_t cred = vfs_context_ucred(context);
 847         proc_t p = vfs_context_proc(context);
 848         struct vfsstatfs *vfsp;
 849         boolean_t is64bit;
 850
 851         is64bit = proc_is64bit(p);
 852
 853         switch (ap->a_command) {
 854
 855         case HFS_RESIZE_VOLUME: {
 856                 u_int64_t newsize;
 857                 u_int64_t cursize;
 858
 859                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 860                 if (suser(cred, NULL) &&
 861                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 862                         return (EACCES); /* must be owner of file system */
 863                 }
 864                 if (!vnode_isvroot(vp)) {
 865                         return (EINVAL);
 866                 }
 867                 newsize = *(u_int64_t *)ap->a_data;
 868                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 869
 870                 if (newsize > cursize) {
 871                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 872                 } else if (newsize < cursize) {
 873                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 874                 } else {
 875                         return (0);
 876                 }
 877         }
 878         case HFS_CHANGE_NEXT_ALLOCATION: {
 879                 u_int32_t location;
 880
 881                 if (vnode_vfsisrdonly(vp)) {
 882                         return (EROFS);
 883                 }
 884                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 885                 if (suser(cred, NULL) &&
 886                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 887                         return (EACCES); /* must be owner of file system */
 888                 }
 889                 if (!vnode_isvroot(vp)) {
 890                         return (EINVAL);
 891                 }
 892                 location = *(u_int32_t *)ap->a_data;
 893                 if (location > hfsmp->totalBlocks - 1) {
 894                         return (EINVAL);
 895                 }
 896                 /* Return previous value. */
 897                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 898                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 899                 hfsmp->nextAllocation = location;
 900                 hfsmp->vcbFlags |= 0xFF00;
 901                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 902                 return (0);
 903         }
 904
 905 #ifdef HFS_SPARSE_DEV
 906         case HFS_SETBACKINGSTOREINFO: {
 907                 struct vnode * bsfs_rootvp;
 908                 struct vnode * di_vp;
 909                 struct hfs_backingstoreinfo *bsdata;
 910                 int error = 0;
 911
 912                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 913                         return (EALREADY);
 914                 }
 915                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 916                 if (suser(cred, NULL) &&
 917                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 918                         return (EACCES); /* must be owner of file system */
 919                 }
 920                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 921                 if (bsdata == NULL) {
 922                         return (EINVAL);
 923                 }
 924                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 925                         return (error);
 926                 }
 927                 if ((error = vnode_getwithref(di_vp))) {
 928                         file_drop(bsdata->backingfd);
 929                         return(error);
 930                 }
 931
 932                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 933                         (void)vnode_put(di_vp);
 934                         file_drop(bsdata->backingfd);
 935                         return (EINVAL);
 936                 }
 937
 938                 /*
 939                  * Obtain the backing fs root vnode and keep a reference
 940                  * on it.  This reference will be dropped in hfs_unmount.
 941                  */
 942                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 943                 if (error) {
 944                         (void)vnode_put(di_vp);
 945                         file_drop(bsdata->backingfd);
 946                         return (error);
 947                 }
 948                 vnode_ref(bsfs_rootvp);
 949                 vnode_put(bsfs_rootvp);
 950
 951                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 952                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 953                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 954                 hfsmp->hfs_sparsebandblks *= 4;
 955
 956                 (void)vnode_put(di_vp);
 957                 file_drop(bsdata->backingfd);
 958                 return (0);
 959         }
 960         case HFS_CLRBACKINGSTOREINFO: {
 961                 struct vnode * tmpvp;
 962
 963                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 964                 if (suser(cred, NULL) &&
 965                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 966                         return (EACCES); /* must be owner of file system */
 967                 }
 968                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 969                     hfsmp->hfs_backingfs_rootvp) {
 970
 971                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 972                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 973                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 974                         hfsmp->hfs_sparsebandblks = 0;
 975                         vnode_rele(tmpvp);
 976                 }
 977                 return (0);
 978         }
 979 #endif /* HFS_SPARSE_DEV */
 980
 981         case F_FREEZE_FS: {
 982                 struct mount *mp;
 983                 task_t task;
 984
 985                 if (!is_suser())
 986                         return (EACCES);
 987
 988                 mp = vnode_mount(vp);
 989                 hfsmp = VFSTOHFS(mp);
 990
 991                 if (!(hfsmp->jnl))
 992                         return (ENOTSUP);
 993
 994                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
 995
 996                 task = current_task();
 997                 task_working_set_disable(task);
 998
 999                 // flush things before we get started to try and prevent
1000                 // dirty data from being paged out while we're frozen.
1001                 // note: can't do this after taking the lock as it will
1002                 // deadlock against ourselves.
1003                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1004                 hfs_global_exclusive_lock_acquire(hfsmp);
1005                 journal_flush(hfsmp->jnl);
1006
1007                 // don't need to iterate on all vnodes, we just need to
1008                 // wait for writes to the system files and the device vnode
1009                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1010                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1011                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1012                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1013                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1014                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1015                 if (hfsmp->hfs_attribute_vp)
1016                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1017                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1018
1019                 hfsmp->hfs_freezing_proc = current_proc();
1020
1021                 return (0);
1022         }
1023
1024         case F_THAW_FS: {
1025                 if (!is_suser())
1026                         return (EACCES);
1027
1028                 // if we're not the one who froze the fs then we
1029                 // can't thaw it.
1030                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1031                     return EPERM;
1032                 }
1033
1034                 // NOTE: if you add code here, also go check the
1035                 //       code that "thaws" the fs in hfs_vnop_close()
1036                 //
1037                 hfsmp->hfs_freezing_proc = NULL;
1038                 hfs_global_exclusive_lock_release(hfsmp);
1039                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1040
1041                 return (0);
1042         }
1043
1044 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1045 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1046
1047         case HFS_BULKACCESS_FSCTL:
1048         case HFS_BULKACCESS: {
1049                 /*
1050                  * NOTE: on entry, the vnode is locked. Incase this vnode
1051                  * happens to be in our list of file_ids, we'll note it
1052                  * avoid calling hfs_chashget_nowait() on that id as that
1053                  * will cause a "locking against myself" panic.
1054                  */
1055                 Boolean check_leaf = true;
1056
1057                 struct user_access_t *user_access_structp;
1058                 struct user_access_t tmp_user_access_t;
1059                 struct access_cache cache;
1060
1061                 int error = 0, i;
1062
1063                 dev_t dev = VTOC(vp)->c_dev;
1064
1065                 short flags;
1066                 struct ucred myucred;   /* XXX ILLEGAL */
1067                 int num_files;
1068                 int *file_ids = NULL;
1069                 short *access = NULL;
1070
1071                 cnid_t cnid;
1072                 cnid_t prevParent_cnid = 0;
1073                 unsigned long myPerms;
1074                 short myaccess = 0;
1075                 struct cat_attr cnattr;
1076                 CatalogKey catkey;
1077                 struct cnode *skip_cp = VTOC(vp);
1078                 struct vfs_context      my_context;
1079
1080                 /* first, return error if not run as root */
1081                 if (cred->cr_ruid != 0) {
1082                         return EPERM;
1083                 }
1084
1085                 /* initialize the local cache and buffers */
1086                 cache.numcached = 0;
1087                 cache.cachehits = 0;
1088                 cache.lookups = 0;
1089
1090                 file_ids = (int *) get_pathbuff();
1091                 access = (short *) get_pathbuff();
1092                 cache.acache = (int *) get_pathbuff();
1093                 cache.haveaccess = (Boolean *) get_pathbuff();
1094
1095                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1096                         release_pathbuff((char *) file_ids);
1097                         release_pathbuff((char *) access);
1098                         release_pathbuff((char *) cache.acache);
1099                         release_pathbuff((char *) cache.haveaccess);
1100
1101                         return ENOMEM;
1102                 }
1103
1104                 /* struct copyin done during dispatch... need to copy file_id array separately */
1105                 if (ap->a_data == NULL) {
1106                         error = EINVAL;
1107                         goto err_exit_bulk_access;
1108                 }
1109
1110                 if (is64bit) {
1111                         user_access_structp = (struct user_access_t *)ap->a_data;
1112                 }
1113                 else {
1114                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1115                         tmp_user_access_t.uid = accessp->uid;
1116                         tmp_user_access_t.flags = accessp->flags;
1117                         tmp_user_access_t.num_groups = accessp->num_groups;
1118                         tmp_user_access_t.num_files = accessp->num_files;
1119                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1120                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1121                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1122                         user_access_structp = &tmp_user_access_t;
1123                 }
1124
1125                 num_files = user_access_structp->num_files;
1126                 if (num_files < 1) {
1127                         goto err_exit_bulk_access;
1128                 }
1129                 if (num_files > 256) {
1130                         error = EINVAL;
1131                         goto err_exit_bulk_access;
1132                 }
1133
1134                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1135                                                         num_files * sizeof(int)))) {
1136                         goto err_exit_bulk_access;
1137                 }
1138
1139                 /* fill in the ucred structure */
1140                 flags = user_access_structp->flags;
1141                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1142                         flags = R_OK;
1143                 }
1144
1145                 /* check if we've been passed leaf node ids or parent ids */
1146                 if (flags & PARENT_IDS_FLAG) {
1147                         check_leaf = false;
1148                 }
1149
1150                 memset(&myucred, 0, sizeof(myucred));
1151                 myucred.cr_ref = 1;
1152                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1153                 myucred.cr_ngroups = user_access_structp->num_groups;
1154                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1155                         myucred.cr_ngroups = 0;
1156                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1157                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1158                         goto err_exit_bulk_access;
1159                 }
1160                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1161
1162                 my_context.vc_proc = p;
1163                 my_context.vc_ucred = &myucred;
1164
1165                 /* Check access to each file_id passed in */
1166                 for (i = 0; i < num_files; i++) {
1167 #if 0
1168                         cnid = (cnid_t) file_ids[i];
1169
1170                         /* root always has access */
1171                         if (!suser(&myucred, NULL)) {
1172                                 access[i] = 0;
1173                                 continue;
1174                         }
1175
1176                         if (check_leaf) {
1177
1178                                 /* do the lookup (checks the cnode hash, then the catalog) */
1179                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1180                                 if (error) {
1181                                         access[i] = (short) error;
1182                                         continue;
1183                                 }
1184
1185                                 /* before calling CheckAccess(), check the target file for read access */
1186                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1187                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1188
1189
1190                                 /* fail fast if no access */
1191                                 if ((myPerms & flags) == 0) {
1192                                         access[i] = EACCES;
1193                                         continue;
1194                                 }
1195                         } else {
1196                                 /* we were passed an array of parent ids */
1197                                 catkey.hfsPlus.parentID = cnid;
1198                         }
1199
1200                         /* if the last guy had the same parent and had access, we're done */
1201                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1202                                 cache.cachehits++;
1203                                 access[i] = 0;
1204                                 continue;
1205                         }
1206
1207                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1208                                                    skip_cp, p, &myucred, dev);
1209
1210                         if ( myaccess ) {
1211                                 access[i] = 0; // have access.. no errors to report
1212                         } else {
1213                                 access[i] = (error != 0 ? (short) error : EACCES);
1214                         }
1215
1216                         prevParent_cnid = catkey.hfsPlus.parentID;
1217 #else
1218                         int myErr;
1219
1220                         cnid = (cnid_t)file_ids[i];
1221
1222                         while (cnid >= kRootDirID) {
1223                             /* get the vnode for this cnid */
1224                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1225                             if ( myErr ) {
1226                                 access[i] = EACCES;
1227                                 break;
1228                             }
1229
1230                             cnid = VTOC(vp)->c_parentcnid;
1231
1232                             hfs_unlock(VTOC(vp));
1233                             if (vnode_vtype(vp) == VDIR) {
1234                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1235                             } else {
1236                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1237                             }
1238                             vnode_put(vp);
1239                             access[i] = myErr;
1240                             if (myErr) {
1241                                 break;
1242                             }
1243                         }
1244 #endif
1245                 }
1246
1247                 /* copyout the access array */
1248                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1249                                      num_files * sizeof (short)))) {
1250                         goto err_exit_bulk_access;
1251                 }
1252
1253         err_exit_bulk_access:
1254
1255                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1256
1257                 release_pathbuff((char *) cache.acache);
1258                 release_pathbuff((char *) cache.haveaccess);
1259                 release_pathbuff((char *) file_ids);
1260                 release_pathbuff((char *) access);
1261
1262                 return (error);
1263         } /* HFS_BULKACCESS */
1264
1265         case HFS_SETACLSTATE: {
1266                 int state;
1267
1268                 if (ap->a_data == NULL) {
1269                         return (EINVAL);
1270                 }
1271
1272                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1273                 state = *(int *)ap->a_data;
1274
1275                 // super-user can enable or disable acl's on a volume.
1276                 // the volume owner can only enable acl's
1277                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1278                         return (EPERM);
1279                 }
1280                 if (state == 0 || state == 1)
1281                         return hfs_setextendedsecurity(hfsmp, state);
1282                 else
1283                         return (EINVAL);
1284         }
1285
1286         case F_FULLFSYNC: {
1287                 int error;
1288
1289                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1290                 if (error == 0) {
1291                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1292                         hfs_unlock(VTOC(vp));
1293                 }
1294
1295                 return error;
1296         }
1297
1298         case F_CHKCLEAN: {
1299                 register struct cnode *cp;
1300                 int error;
1301
1302                 if (!vnode_isreg(vp))
1303                         return EINVAL;
1304
1305                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1306                 if (error == 0) {
1307                         cp = VTOC(vp);
1308                         /*
1309                          * used by regression test to determine if
1310                          * all the dirty pages (via write) have been cleaned
1311                          * after a call to 'fsysnc'.
1312                          */
1313                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1314                         hfs_unlock(cp);
1315                 }
1316                 return (error);
1317         }
1318
1319         case F_RDADVISE: {
1320                 register struct radvisory *ra;
1321                 struct filefork *fp;
1322                 int error;
1323
1324                 if (!vnode_isreg(vp))
1325                         return EINVAL;
1326
1327                 ra = (struct radvisory *)(ap->a_data);
1328                 fp = VTOF(vp);
1329
1330                 /* Protect against a size change. */
1331                 hfs_lock_truncate(VTOC(vp), TRUE);
1332
1333                 if (ra->ra_offset >= fp->ff_size) {
1334                         error = EFBIG;
1335                 } else {
1336                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1337                 }
1338
1339                 hfs_unlock_truncate(VTOC(vp));
1340                 return (error);
1341         }
1342
1343         case F_READBOOTSTRAP:
1344         case F_WRITEBOOTSTRAP:
1345         {
1346             struct vnode *devvp = NULL;
1347             user_fbootstraptransfer_t *user_bootstrapp;
1348             int devBlockSize;
1349             int error;
1350             uio_t auio;
1351             daddr64_t blockNumber;
1352             u_long blockOffset;
1353             u_long xfersize;
1354             struct buf *bp;
1355             user_fbootstraptransfer_t user_bootstrap;
1356
1357                 if (!vnode_isvroot(vp))
1358                         return (EINVAL);
1359                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1360                  * to a user_fbootstraptransfer_t else we get a pointer to a
1361                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1362                  */
1363                 if (is64bit) {
1364                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1365                 }
1366                 else {
1367                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1368                         user_bootstrapp = &user_bootstrap;
1369                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1370                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1371                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1372                 }
1373                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1374                         return EINVAL;
1375
1376             devvp = VTOHFS(vp)->hfs_devvp;
1377                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1378                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1379                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1380                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1381
1382             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1383
1384             while (uio_resid(auio) > 0) {
1385                         blockNumber = uio_offset(auio) / devBlockSize;
1386                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1387                         if (error) {
1388                                 if (bp) buf_brelse(bp);
1389                                 uio_free(auio);
1390                                 return error;
1391                         };
1392
1393                         blockOffset = uio_offset(auio) % devBlockSize;
1394                         xfersize = devBlockSize - blockOffset;
1395                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1396                         if (error) {
1397                                 buf_brelse(bp);
1398                                 uio_free(auio);
1399                                 return error;
1400                         };
1401                         if (uio_rw(auio) == UIO_WRITE) {
1402                                 error = VNOP_BWRITE(bp);
1403                                 if (error) {
1404                                         uio_free(auio);
1405                         return error;
1406                                 }
1407                         } else {
1408                                 buf_brelse(bp);
1409                         };
1410                 };
1411                 uio_free(auio);
1412         };
1413         return 0;
1414
1415         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1416         {
1417                 if (is64bit) {
1418                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1419                 }
1420                 else {
1421                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1422                 }
1423                 return 0;
1424         }
1425
1426         case HFS_GET_MOUNT_TIME:
1427             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1428             break;
1429
1430         case HFS_GET_LAST_MTIME:
1431             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1432             break;
1433
1434         case HFS_SET_BOOT_INFO:
1435                 if (!vnode_isvroot(vp))
1436                         return(EINVAL);
1437                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1438                         return(EACCES); /* must be superuser or owner of filesystem */
1439                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1440                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1441                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1442                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1443                 break;
1444
1445         case HFS_GET_BOOT_INFO:
1446                 if (!vnode_isvroot(vp))
1447                         return(EINVAL);
1448                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1449                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1450                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1451                 break;
1452
1453         default:
1454                 return (ENOTTY);
1455         }
1456
1457     /* Should never get here */
1458         return 0;
1459 }
1460
1461 /*
1462  * select
1463  */
1464 int
1465 hfs_vnop_select(__unused struct vnop_select_args *ap)
1466 /*
1467         struct vnop_select_args {
1468                 vnode_t a_vp;
1469                 int  a_which;
1470                 int  a_fflags;
1471                 void *a_wql;
1472                 vfs_context_t a_context;
1473         };
1474 */
1475 {
1476         /*
1477          * We should really check to see if I/O is possible.
1478          */
1479         return (1);
1480 }
1481
1482 /*
1483  * Converts a logical block number to a physical block, and optionally returns
1484  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1485  * The physical block number is based on the device block size, currently its 512.
1486  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1487  */
1488 int
1489 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1490 {
1491         struct cnode *cp = VTOC(vp);
1492         struct filefork *fp = VTOF(vp);
1493         struct hfsmount *hfsmp = VTOHFS(vp);
1494         int  retval = E_NONE;
1495         daddr_t  logBlockSize;
1496         size_t  bytesContAvail = 0;
1497         off_t  blockposition;
1498         int lockExtBtree;
1499         int lockflags = 0;
1500
1501         /*
1502          * Check for underlying vnode requests and ensure that logical
1503          * to physical mapping is requested.
1504          */
1505         if (vpp != NULL)
1506                 *vpp = cp->c_devvp;
1507         if (bnp == NULL)
1508                 return (0);
1509
1510         logBlockSize = GetLogicalBlockSize(vp);
1511         blockposition = (off_t)bn * (off_t)logBlockSize;
1512
1513         lockExtBtree = overflow_extents(fp);
1514
1515         if (lockExtBtree)
1516                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1517
1518         retval = MacToVFSError(
1519                             MapFileBlockC (HFSTOVCB(hfsmp),
1520                                             (FCB*)fp,
1521                                             MAXPHYSIO,
1522                                             blockposition,
1523                                             bnp,
1524                                             &bytesContAvail));
1525
1526         if (lockExtBtree)
1527                 hfs_systemfile_unlock(hfsmp, lockflags);
1528
1529         if (retval == E_NONE) {
1530                 /* Figure out how many read ahead blocks there are */
1531                 if (runp != NULL) {
1532                         if (can_cluster(logBlockSize)) {
1533                                 /* Make sure this result never goes negative: */
1534                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1535                         } else {
1536                                 *runp = 0;
1537                         }
1538                 }
1539         }
1540         return (retval);
1541 }
1542
1543 /*
1544  * Convert logical block number to file offset.
1545  */
1546 int
1547 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1548 /*
1549         struct vnop_blktooff_args {
1550                 vnode_t a_vp;
1551                 daddr64_t a_lblkno;
1552                 off_t *a_offset;
1553         };
1554 */
1555 {
1556         if (ap->a_vp == NULL)
1557                 return (EINVAL);
1558         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1559
1560         return(0);
1561 }
1562
1563 /*
1564  * Convert file offset to logical block number.
1565  */
1566 int
1567 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1568 /*
1569         struct vnop_offtoblk_args {
1570                 vnode_t a_vp;
1571                 off_t a_offset;
1572                 daddr64_t *a_lblkno;
1573         };
1574 */
1575 {
1576         if (ap->a_vp == NULL)
1577                 return (EINVAL);
1578         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1579
1580         return(0);
1581 }
1582
1583 /*
1584  * Map file offset to physical block number.
1585  *
1586  * System file cnodes are expected to be locked (shared or exclusive).
1587  */
1588 int
1589 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1590 /*
1591         struct vnop_blockmap_args {
1592                 vnode_t a_vp;
1593                 off_t a_foffset;
1594                 size_t a_size;
1595                 daddr64_t *a_bpn;
1596                 size_t *a_run;
1597                 void *a_poff;
1598                 int a_flags;
1599                 vfs_context_t a_context;
1600         };
1601 */
1602 {
1603         struct vnode *vp = ap->a_vp;
1604         struct cnode *cp;
1605         struct filefork *fp;
1606         struct hfsmount *hfsmp;
1607         size_t bytesContAvail = 0;
1608         int retval = E_NONE;
1609         int syslocks = 0;
1610         int lockflags = 0;
1611         struct rl_entry *invalid_range;
1612         enum rl_overlaptype overlaptype;
1613         int started_tr = 0;
1614         int tooklock = 0;
1615
1616         /* Do not allow blockmap operation on a directory */
1617         if (vnode_isdir(vp)) {
1618                 return (ENOTSUP);
1619         }
1620
1621         /*
1622          * Check for underlying vnode requests and ensure that logical
1623          * to physical mapping is requested.
1624          */
1625         if (ap->a_bpn == NULL)
1626                 return (0);
1627
1628         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1629                 if (VTOC(vp)->c_lockowner != current_thread()) {
1630                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1631                         tooklock = 1;
1632                 } else {
1633                         cp = VTOC(vp);
1634                         panic("blockmap: %s cnode lock already held!\n",
1635                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1636                 }
1637         }
1638         hfsmp = VTOHFS(vp);
1639         cp = VTOC(vp);
1640         fp = VTOF(vp);
1641
1642 retry:
1643         if (fp->ff_unallocblocks) {
1644                 if (hfs_start_transaction(hfsmp) != 0) {
1645                         retval = EINVAL;
1646                         goto exit;
1647                 } else {
1648                         started_tr = 1;
1649                 }
1650                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1651
1652         } else if (overflow_extents(fp)) {
1653                 syslocks = SFL_EXTENTS;
1654         }
1655
1656         if (syslocks)
1657                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1658
1659         /*
1660          * Check for any delayed allocations.
1661          */
1662         if (fp->ff_unallocblocks) {
1663                 SInt64 actbytes;
1664                 u_int32_t loanedBlocks;
1665
1666                 //
1667                 // Make sure we have a transaction.  It's possible
1668                 // that we came in and fp->ff_unallocblocks was zero
1669                 // but during the time we blocked acquiring the extents
1670                 // btree, ff_unallocblocks became non-zero and so we
1671                 // will need to start a transaction.
1672                 //
1673                 if (started_tr == 0) {
1674                         if (syslocks) {
1675                                 hfs_systemfile_unlock(hfsmp, lockflags);
1676                                 syslocks = 0;
1677                         }
1678                         goto retry;
1679                 }
1680
1681                 /*
1682                  * Note: ExtendFileC will Release any blocks on loan and
1683                  * aquire real blocks.  So we ask to extend by zero bytes
1684                  * since ExtendFileC will account for the virtual blocks.
1685                  */
1686
1687                 loanedBlocks = fp->ff_unallocblocks;
1688                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1689                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1690
1691                 if (retval) {
1692                         fp->ff_unallocblocks = loanedBlocks;
1693                         cp->c_blocks += loanedBlocks;
1694                         fp->ff_blocks += loanedBlocks;
1695
1696                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1697                         hfsmp->loanedBlocks += loanedBlocks;
1698                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1699                 }
1700
1701                 if (retval) {
1702                         hfs_systemfile_unlock(hfsmp, lockflags);
1703                         cp->c_flag |= C_MODIFIED;
1704                         if (started_tr) {
1705                                 (void) hfs_update(vp, TRUE);
1706                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1707
1708                                 hfs_end_transaction(hfsmp);
1709                         }
1710                         goto exit;
1711                 }
1712         }
1713
1714         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1715                                ap->a_bpn, &bytesContAvail);
1716         if (syslocks) {
1717                 hfs_systemfile_unlock(hfsmp, lockflags);
1718                 syslocks = 0;
1719         }
1720
1721         if (started_tr) {
1722                 (void) hfs_update(vp, TRUE);
1723                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1724                 hfs_end_transaction(hfsmp);
1725                 started_tr = 0;
1726         }
1727         if (retval) {
1728                 goto exit;
1729         }
1730
1731         /* Adjust the mapping information for invalid file ranges: */
1732         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1733                               ap->a_foffset + (off_t)bytesContAvail - 1,
1734                               &invalid_range);
1735         if (overlaptype != RL_NOOVERLAP) {
1736                 switch(overlaptype) {
1737                 case RL_MATCHINGOVERLAP:
1738                 case RL_OVERLAPCONTAINSRANGE:
1739                 case RL_OVERLAPSTARTSBEFORE:
1740                         /* There's no valid block for this byte offset: */
1741                         *ap->a_bpn = (daddr64_t)-1;
1742                         /* There's no point limiting the amount to be returned
1743                          * if the invalid range that was hit extends all the way
1744                          * to the EOF (i.e. there's no valid bytes between the
1745                          * end of this range and the file's EOF):
1746                          */
1747                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1748                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1749                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1750                         }
1751                         break;
1752
1753                 case RL_OVERLAPISCONTAINED:
1754                 case RL_OVERLAPENDSAFTER:
1755                         /* The range of interest hits an invalid block before the end: */
1756                         if (invalid_range->rl_start == ap->a_foffset) {
1757                                 /* There's actually no valid information to be had starting here: */
1758                                 *ap->a_bpn = (daddr64_t)-1;
1759                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1760                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1761                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1762                                 }
1763                         } else {
1764                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1765                         }
1766                         break;
1767
1768                 case RL_NOOVERLAP:
1769                         break;
1770                 } /* end switch */
1771                 if (bytesContAvail > ap->a_size)
1772                         bytesContAvail = ap->a_size;
1773         }
1774         if (ap->a_run)
1775                 *ap->a_run = bytesContAvail;
1776
1777         if (ap->a_poff)
1778                 *(int *)ap->a_poff = 0;
1779 exit:
1780         if (tooklock)
1781                 hfs_unlock(cp);
1782
1783         return (MacToVFSError(retval));
1784 }
1785
1786
1787 /*
1788  * prepare and issue the I/O
1789  * buf_strategy knows how to deal
1790  * with requests that require
1791  * fragmented I/Os
1792  */
1793 int
1794 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1795 {
1796         buf_t   bp = ap->a_bp;
1797         vnode_t vp = buf_vnode(bp);
1798         struct cnode *cp = VTOC(vp);
1799
1800         return (buf_strategy(cp->c_devvp, ap));
1801 }
1802
1803
1804 static int
1805 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1806 {
1807         register struct cnode *cp = VTOC(vp);
1808         struct filefork *fp = VTOF(vp);
1809         struct proc *p = vfs_context_proc(context);;
1810         kauth_cred_t cred = vfs_context_ucred(context);
1811         int retval;
1812         off_t bytesToAdd;
1813         off_t actualBytesAdded;
1814         off_t filebytes;
1815         u_long fileblocks;
1816         int blksize;
1817         struct hfsmount *hfsmp;
1818         int lockflags;
1819
1820         blksize = VTOVCB(vp)->blockSize;
1821         fileblocks = fp->ff_blocks;
1822         filebytes = (off_t)fileblocks * (off_t)blksize;
1823
1824         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1825                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1826
1827         if (length < 0)
1828                 return (EINVAL);
1829
1830         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1831                 return (EFBIG);
1832
1833         hfsmp = VTOHFS(vp);
1834
1835         retval = E_NONE;
1836
1837         /* Files that are changing size are not hot file candidates. */
1838         if (hfsmp->hfc_stage == HFC_RECORDING) {
1839                 fp->ff_bytesread = 0;
1840         }
1841
1842         /*
1843          * We cannot just check if fp->ff_size == length (as an optimization)
1844          * since there may be extra physical blocks that also need truncation.
1845          */
1846 #if QUOTA
1847         if ((retval = hfs_getinoquota(cp)))
1848                 return(retval);
1849 #endif /* QUOTA */
1850
1851         /*
1852          * Lengthen the size of the file. We must ensure that the
1853          * last byte of the file is allocated. Since the smallest
1854          * value of ff_size is 0, length will be at least 1.
1855          */
1856         if (length > (off_t)fp->ff_size) {
1857 #if QUOTA
1858                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1859                                    cred, 0);
1860                 if (retval)
1861                         goto Err_Exit;
1862 #endif /* QUOTA */
1863                 /*
1864                  * If we don't have enough physical space then
1865                  * we need to extend the physical size.
1866                  */
1867                 if (length > filebytes) {
1868                         int eflags;
1869                         u_long blockHint = 0;
1870
1871                         /* All or nothing and don't round up to clumpsize. */
1872                         eflags = kEFAllMask | kEFNoClumpMask;
1873
1874                         if (cred && suser(cred, NULL) != 0)
1875                                 eflags |= kEFReserveMask;  /* keep a reserve */
1876
1877                         /*
1878                          * Allocate Journal and Quota files in metadata zone.
1879                          */
1880                         if (filebytes == 0 &&
1881                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1882                             hfs_virtualmetafile(cp)) {
1883                                 eflags |= kEFMetadataMask;
1884                                 blockHint = hfsmp->hfs_metazone_start;
1885                         }
1886                         if (hfs_start_transaction(hfsmp) != 0) {
1887                             retval = EINVAL;
1888                             goto Err_Exit;
1889                         }
1890
1891                         /* Protect extents b-tree and allocation bitmap */
1892                         lockflags = SFL_BITMAP;
1893                         if (overflow_extents(fp))
1894                                 lockflags |= SFL_EXTENTS;
1895                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1896
1897                         while ((length > filebytes) && (retval == E_NONE)) {
1898                                 bytesToAdd = length - filebytes;
1899                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1900                                                     (FCB*)fp,
1901                                                     bytesToAdd,
1902                                                     blockHint,
1903                                                     eflags,
1904                                                     &actualBytesAdded));
1905
1906                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1907                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1908                                         if (length > filebytes)
1909                                                 length = filebytes;
1910                                         break;
1911                                 }
1912                         } /* endwhile */
1913
1914                         hfs_systemfile_unlock(hfsmp, lockflags);
1915
1916                         if (hfsmp->jnl) {
1917                             (void) hfs_update(vp, TRUE);
1918                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1919                         }
1920
1921                         hfs_end_transaction(hfsmp);
1922
1923                         if (retval)
1924                                 goto Err_Exit;
1925
1926                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1927                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1928                 }
1929
1930                 if (!(flags & IO_NOZEROFILL)) {
1931                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1932                                 struct rl_entry *invalid_range;
1933                                 off_t zero_limit;
1934
1935                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1936                                 if (length < zero_limit) zero_limit = length;
1937
1938                                 if (length > (off_t)fp->ff_size) {
1939                                         struct timeval tv;
1940
1941                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1942                                         if ((fp->ff_size & PAGE_MASK_64) &&
1943                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1944                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1945
1946                                                 /* There's some valid data at the start of the (current) last page
1947                                                    of the file, so zero out the remainder of that page to ensure the
1948                                                    entire page contains valid data.  Since there is no invalid range
1949                                                    possible past the (current) eof, there's no need to remove anything
1950                                                    from the invalid range list before calling cluster_write():  */
1951                                                 hfs_unlock(cp);
1952                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1953                                                                 fp->ff_size, (off_t)0,
1954                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1955                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1956                                                 if (retval) goto Err_Exit;
1957
1958                                                 /* Merely invalidate the remaining area, if necessary: */
1959                                                 if (length > zero_limit) {
1960                                                         microuptime(&tv);
1961                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1962                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1963                                                 }
1964                                         } else {
1965                                         /* The page containing the (current) eof is invalid: just add the
1966                                            remainder of the page to the invalid list, along with the area
1967                                            being newly allocated:
1968                                          */
1969                                         microuptime(&tv);
1970                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1971                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1972                                         };
1973                                 }
1974                         } else {
1975                                         panic("hfs_truncate: invoked on non-UBC object?!");
1976                         };
1977                 }
1978                 cp->c_touch_modtime = TRUE;
1979                 fp->ff_size = length;
1980
1981                 /* Nested transactions will do their own ubc_setsize. */
1982                 if (!skipsetsize) {
1983                         /*
1984                          * ubc_setsize can cause a pagein here
1985                          * so we need to drop cnode lock.
1986                          */
1987                         hfs_unlock(cp);
1988                         ubc_setsize(vp, length);
1989                         hfs_lock(cp, HFS_FORCE_LOCK);
1990                 }
1991
1992         } else { /* Shorten the size of the file */
1993
1994                 if ((off_t)fp->ff_size > length) {
1995                         /*
1996                          * Any buffers that are past the truncation point need to be
1997                          * invalidated (to maintain buffer cache consistency).
1998                          */
1999
2000                          /* Nested transactions will do their own ubc_setsize. */
2001                          if (!skipsetsize) {
2002                                 /*
2003                                  * ubc_setsize can cause a pageout here
2004                                  * so we need to drop cnode lock.
2005                                  */
2006                                 hfs_unlock(cp);
2007                                 ubc_setsize(vp, length);
2008                                 hfs_lock(cp, HFS_FORCE_LOCK);
2009                         }
2010
2011                         /* Any space previously marked as invalid is now irrelevant: */
2012                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2013                 }
2014
2015                 /*
2016                  * Account for any unmapped blocks. Note that the new
2017                  * file length can still end up with unmapped blocks.
2018                  */
2019                 if (fp->ff_unallocblocks > 0) {
2020                         u_int32_t finalblks;
2021                         u_int32_t loanedBlocks;
2022
2023                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2024
2025                         loanedBlocks = fp->ff_unallocblocks;
2026                         cp->c_blocks -= loanedBlocks;
2027                         fp->ff_blocks -= loanedBlocks;
2028                         fp->ff_unallocblocks = 0;
2029
2030                         hfsmp->loanedBlocks -= loanedBlocks;
2031
2032                         finalblks = (length + blksize - 1) / blksize;
2033                         if (finalblks > fp->ff_blocks) {
2034                                 /* calculate required unmapped blocks */
2035                                 loanedBlocks = finalblks - fp->ff_blocks;
2036                                 hfsmp->loanedBlocks += loanedBlocks;
2037
2038                                 fp->ff_unallocblocks = loanedBlocks;
2039                                 cp->c_blocks += loanedBlocks;
2040                                 fp->ff_blocks += loanedBlocks;
2041                         }
2042                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2043                 }
2044
2045                 /*
2046                  * For a TBE process the deallocation of the file blocks is
2047                  * delayed until the file is closed.  And hfs_close calls
2048                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2049                  * isn't set, we make sure this isn't a TBE process.
2050                  */
2051                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2052 #if QUOTA
2053                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2054 #endif /* QUOTA */
2055                   if (hfs_start_transaction(hfsmp) != 0) {
2056                       retval = EINVAL;
2057                       goto Err_Exit;
2058                   }
2059
2060                         if (fp->ff_unallocblocks == 0) {
2061                                 /* Protect extents b-tree and allocation bitmap */
2062                                 lockflags = SFL_BITMAP;
2063                                 if (overflow_extents(fp))
2064                                         lockflags |= SFL_EXTENTS;
2065                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2066
2067                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2068                                                 (FCB*)fp, length, false));
2069
2070                                 hfs_systemfile_unlock(hfsmp, lockflags);
2071                         }
2072                         if (hfsmp->jnl) {
2073                                 (void) hfs_update(vp, TRUE);
2074                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2075                         }
2076
2077                         hfs_end_transaction(hfsmp);
2078
2079                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2080                         if (retval)
2081                                 goto Err_Exit;
2082 #if QUOTA
2083                         /* These are bytesreleased */
2084                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2085 #endif /* QUOTA */
2086                 }
2087                 /* Only set update flag if the logical length changes */
2088                 if ((off_t)fp->ff_size != length)
2089                         cp->c_touch_modtime = TRUE;
2090                 fp->ff_size = length;
2091         }
2092         cp->c_touch_chgtime = TRUE;
2093         retval = hfs_update(vp, MNT_WAIT);
2094         if (retval) {
2095                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2096                      -1, -1, -1, retval, 0);
2097         }
2098
2099 Err_Exit:
2100
2101         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2102                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2103
2104         return (retval);
2105 }
2106
2107
2108
2109 /*
2110  * Truncate a cnode to at most length size, freeing (or adding) the
2111  * disk blocks.
2112  */
2113 __private_extern__
2114 int
2115 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2116              vfs_context_t context)
2117 {
2118         struct filefork *fp = VTOF(vp);
2119         off_t filebytes;
2120         u_long fileblocks;
2121         int blksize, error = 0;
2122         struct cnode *cp = VTOC(vp);
2123
2124         if (vnode_isdir(vp))
2125                 return (EISDIR);        /* cannot truncate an HFS directory! */
2126
2127         blksize = VTOVCB(vp)->blockSize;
2128         fileblocks = fp->ff_blocks;
2129         filebytes = (off_t)fileblocks * (off_t)blksize;
2130
2131         // have to loop truncating or growing files that are
2132         // really big because otherwise transactions can get
2133         // enormous and consume too many kernel resources.
2134
2135         if (length < filebytes) {
2136                 while (filebytes > length) {
2137                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2138                                 filebytes -= HFS_BIGFILE_SIZE;
2139                         } else {
2140                                 filebytes = length;
2141                         }
2142                         cp->c_flag |= C_FORCEUPDATE;
2143                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2144                         if (error)
2145                                 break;
2146                 }
2147         } else if (length > filebytes) {
2148                 while (filebytes < length) {
2149                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2150                                 filebytes += HFS_BIGFILE_SIZE;
2151                         } else {
2152                                 filebytes = length;
2153                         }
2154                         cp->c_flag |= C_FORCEUPDATE;
2155                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2156                         if (error)
2157                                 break;
2158                 }
2159         } else /* Same logical size */ {
2160
2161                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2162         }
2163         /* Files that are changing size are not hot file candidates. */
2164         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2165                 fp->ff_bytesread = 0;
2166         }
2167
2168         return (error);
2169 }
2170
2171
2172
2173 /*
2174  * Preallocate file storage space.
2175  */
2176 int
2177 hfs_vnop_allocate(struct vnop_allocate_args /* {
2178                 vnode_t a_vp;
2179                 off_t a_length;
2180                 u_int32_t  a_flags;
2181                 off_t *a_bytesallocated;
2182                 off_t a_offset;
2183                 vfs_context_t a_context;
2184         } */ *ap)
2185 {
2186         struct vnode *vp = ap->a_vp;
2187         struct cnode *cp;
2188         struct filefork *fp;
2189         ExtendedVCB *vcb;
2190         off_t length = ap->a_length;
2191         off_t startingPEOF;
2192         off_t moreBytesRequested;
2193         off_t actualBytesAdded;
2194         off_t filebytes;
2195         u_long fileblocks;
2196         int retval, retval2;
2197         UInt32 blockHint;
2198         UInt32 extendFlags;   /* For call to ExtendFileC */
2199         struct hfsmount *hfsmp;
2200         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2201         int lockflags;
2202
2203         *(ap->a_bytesallocated) = 0;
2204
2205         if (!vnode_isreg(vp))
2206                 return (EISDIR);
2207         if (length < (off_t)0)
2208                 return (EINVAL);
2209
2210         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2211                 return (retval);
2212         cp = VTOC(vp);
2213         fp = VTOF(vp);
2214         hfsmp = VTOHFS(vp);
2215         vcb = VTOVCB(vp);
2216
2217         fileblocks = fp->ff_blocks;
2218         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2219
2220         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2221                 retval = EINVAL;
2222                 goto Err_Exit;
2223         }
2224
2225         /* Fill in the flags word for the call to Extend the file */
2226
2227         extendFlags = kEFNoClumpMask;
2228         if (ap->a_flags & ALLOCATECONTIG)
2229                 extendFlags |= kEFContigMask;
2230         if (ap->a_flags & ALLOCATEALL)
2231                 extendFlags |= kEFAllMask;
2232         if (cred && suser(cred, NULL) != 0)
2233                 extendFlags |= kEFReserveMask;
2234
2235         retval = E_NONE;
2236         blockHint = 0;
2237         startingPEOF = filebytes;
2238
2239         if (ap->a_flags & ALLOCATEFROMPEOF)
2240                 length += filebytes;
2241         else if (ap->a_flags & ALLOCATEFROMVOL)
2242                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2243
2244         /* If no changes are necesary, then we're done */
2245         if (filebytes == length)
2246                 goto Std_Exit;
2247
2248         /*
2249          * Lengthen the size of the file. We must ensure that the
2250          * last byte of the file is allocated. Since the smallest
2251          * value of filebytes is 0, length will be at least 1.
2252          */
2253         if (length > filebytes) {
2254                 moreBytesRequested = length - filebytes;
2255
2256 #if QUOTA
2257                 retval = hfs_chkdq(cp,
2258                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2259                                 cred, 0);
2260                 if (retval)
2261                         goto Err_Exit;
2262
2263 #endif /* QUOTA */
2264                 /*
2265                  * Metadata zone checks.
2266                  */
2267                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2268                         /*
2269                          * Allocate Journal and Quota files in metadata zone.
2270                          */
2271                         if (hfs_virtualmetafile(cp)) {
2272                                 extendFlags |= kEFMetadataMask;
2273                                 blockHint = hfsmp->hfs_metazone_start;
2274                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2275                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2276                                 /*
2277                                  * Move blockHint outside metadata zone.
2278                                  */
2279                                 blockHint = hfsmp->hfs_metazone_end + 1;
2280                         }
2281                 }
2282
2283                 if (hfs_start_transaction(hfsmp) != 0) {
2284                     retval = EINVAL;
2285                     goto Err_Exit;
2286                 }
2287
2288                 /* Protect extents b-tree and allocation bitmap */
2289                 lockflags = SFL_BITMAP;
2290                 if (overflow_extents(fp))
2291                         lockflags |= SFL_EXTENTS;
2292                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2293
2294                 retval = MacToVFSError(ExtendFileC(vcb,
2295                                                 (FCB*)fp,
2296                                                 moreBytesRequested,
2297                                                 blockHint,
2298                                                 extendFlags,
2299                                                 &actualBytesAdded));
2300
2301                 *(ap->a_bytesallocated) = actualBytesAdded;
2302                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2303
2304                 hfs_systemfile_unlock(hfsmp, lockflags);
2305
2306                 if (hfsmp->jnl) {
2307                         (void) hfs_update(vp, TRUE);
2308                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2309                 }
2310
2311                 hfs_end_transaction(hfsmp);
2312
2313                 /*
2314                  * if we get an error and no changes were made then exit
2315                  * otherwise we must do the hfs_update to reflect the changes
2316                  */
2317                 if (retval && (startingPEOF == filebytes))
2318                         goto Err_Exit;
2319
2320                 /*
2321                  * Adjust actualBytesAdded to be allocation block aligned, not
2322                  * clump size aligned.
2323                  * NOTE: So what we are reporting does not affect reality
2324                  * until the file is closed, when we truncate the file to allocation
2325                  * block size.
2326                  */
2327                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2328                         *(ap->a_bytesallocated) =
2329                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2330
2331         } else { /* Shorten the size of the file */
2332
2333                 if (fp->ff_size > length) {
2334                         /*
2335                          * Any buffers that are past the truncation point need to be
2336                          * invalidated (to maintain buffer cache consistency).
2337                          */
2338                 }
2339
2340                 if (hfs_start_transaction(hfsmp) != 0) {
2341                     retval = EINVAL;
2342                     goto Err_Exit;
2343                 }
2344
2345                 /* Protect extents b-tree and allocation bitmap */
2346                 lockflags = SFL_BITMAP;
2347                 if (overflow_extents(fp))
2348                         lockflags |= SFL_EXTENTS;
2349                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2350
2351                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2352
2353                 hfs_systemfile_unlock(hfsmp, lockflags);
2354
2355                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2356
2357                 if (hfsmp->jnl) {
2358                         (void) hfs_update(vp, TRUE);
2359                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2360                 }
2361
2362                 hfs_end_transaction(hfsmp);
2363
2364
2365                 /*
2366                  * if we get an error and no changes were made then exit
2367                  * otherwise we must do the hfs_update to reflect the changes
2368                  */
2369                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2370 #if QUOTA
2371                 /* These are  bytesreleased */
2372                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2373 #endif /* QUOTA */
2374
2375                 if (fp->ff_size > filebytes) {
2376                         fp->ff_size = filebytes;
2377
2378                         hfs_unlock(cp);
2379                         ubc_setsize(vp, fp->ff_size);
2380                         hfs_lock(cp, HFS_FORCE_LOCK);
2381                 }
2382         }
2383
2384 Std_Exit:
2385         cp->c_touch_chgtime = TRUE;
2386         cp->c_touch_modtime = TRUE;
2387         retval2 = hfs_update(vp, MNT_WAIT);
2388
2389         if (retval == 0)
2390                 retval = retval2;
2391 Err_Exit:
2392         hfs_unlock(cp);
2393         return (retval);
2394 }
2395
2396
2397 /*
2398  * Pagein for HFS filesystem
2399  */
2400 int
2401 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2402 /*
2403         struct vnop_pagein_args {
2404                 vnode_t a_vp,
2405                 upl_t         a_pl,
2406                 vm_offset_t   a_pl_offset,
2407                 off_t         a_f_offset,
2408                 size_t        a_size,
2409                 int           a_flags
2410                 vfs_context_t a_context;
2411         };
2412 */
2413 {
2414         vnode_t vp = ap->a_vp;
2415         int error;
2416
2417         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2418                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2419         /*
2420          * Keep track of blocks read.
2421          */
2422         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2423                 struct cnode *cp;
2424                 struct filefork *fp;
2425                 int bytesread;
2426                 int took_cnode_lock = 0;
2427
2428                 cp = VTOC(vp);
2429                 fp = VTOF(vp);
2430
2431                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2432                         bytesread = fp->ff_size;
2433                 else
2434                         bytesread = ap->a_size;
2435
2436                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2437                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2438                         hfs_lock(cp, HFS_FORCE_LOCK);
2439                         took_cnode_lock = 1;
2440                 }
2441                 /*
2442                  * If this file hasn't been seen since the start of
2443                  * the current sampling period then start over.
2444                  */
2445                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2446                         struct timeval tv;
2447
2448                         fp->ff_bytesread = bytesread;
2449                         microtime(&tv);
2450                         cp->c_atime = tv.tv_sec;
2451                 } else {
2452                         fp->ff_bytesread += bytesread;
2453                 }
2454                 cp->c_touch_acctime = TRUE;
2455                 if (took_cnode_lock)
2456                         hfs_unlock(cp);
2457         }
2458         return (error);
2459 }
2460
2461 /*
2462  * Pageout for HFS filesystem.
2463  */
2464 int
2465 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2466 /*
2467         struct vnop_pageout_args {
2468            vnode_t a_vp,
2469            upl_t         a_pl,
2470            vm_offset_t   a_pl_offset,
2471            off_t         a_f_offset,
2472            size_t        a_size,
2473            int           a_flags
2474            vfs_context_t a_context;
2475         };
2476 */
2477 {
2478         vnode_t vp = ap->a_vp;
2479         struct cnode *cp;
2480         struct filefork *fp;
2481         int retval;
2482         off_t end_of_range;
2483         off_t filesize;
2484
2485         cp = VTOC(vp);
2486         if (cp->c_lockowner == current_thread()) {
2487                 panic("pageout: %s cnode lock already held!\n",
2488                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2489         }
2490         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2491                 return (retval);
2492         }
2493         fp = VTOF(vp);
2494
2495         filesize = fp->ff_size;
2496         end_of_range = ap->a_f_offset + ap->a_size - 1;
2497
2498         if (end_of_range >= filesize) {
2499                 end_of_range = (off_t)(filesize - 1);
2500         }
2501         if (ap->a_f_offset < filesize) {
2502                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2503                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2504         }
2505         hfs_unlock(cp);
2506
2507         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2508                                  ap->a_size, filesize, ap->a_flags);
2509
2510         /*
2511          * If data was written, and setuid or setgid bits are set and
2512          * this process is not the superuser then clear the setuid and
2513          * setgid bits as a precaution against tampering.
2514          */
2515         if ((retval == 0) &&
2516             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2517             (vfs_context_suser(ap->a_context) != 0)) {
2518                 hfs_lock(cp, HFS_FORCE_LOCK);
2519                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2520                 cp->c_touch_chgtime = TRUE;
2521                 hfs_unlock(cp);
2522         }
2523         return (retval);
2524 }
2525
2526 /*
2527  * Intercept B-Tree node writes to unswap them if necessary.
2528  */
2529 int
2530 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2531 {
2532         int retval = 0;
2533         register struct buf *bp = ap->a_bp;
2534         register struct vnode *vp = buf_vnode(bp);
2535         BlockDescriptor block;
2536
2537         /* Trap B-Tree writes */
2538         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2539             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2540             (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2541
2542                 /*
2543                  * Swap and validate the node if it is in native byte order.
2544                  * This is always be true on big endian, so we always validate
2545                  * before writing here.  On little endian, the node typically has
2546                  * been swapped and validatated when it was written to the journal,
2547                  * so we won't do anything here.
2548                  */
2549                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2550                         /* Prepare the block pointer */
2551                         block.blockHeader = bp;
2552                         block.buffer = (char *)buf_dataptr(bp);
2553                         block.blockNum = buf_lblkno(bp);
2554                         /* not found in cache ==> came from disk */
2555                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2556                         block.blockSize = buf_count(bp);
2557
2558                         /* Endian un-swap B-Tree node */
2559                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2560                         if (retval)
2561                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2562                 }
2563         }
2564
2565         /* This buffer shouldn't be locked anymore but if it is clear it */
2566         if ((buf_flags(bp) & B_LOCKED)) {
2567                 // XXXdbg
2568                 if (VTOHFS(vp)->jnl) {
2569                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2570                 }
2571                 buf_clearflags(bp, B_LOCKED);
2572         }
2573         retval = vn_bwrite (ap);
2574
2575         return (retval);
2576 }
2577
2578 /*
2579  * Relocate a file to a new location on disk
2580  *  cnode must be locked on entry
2581  *
2582  * Relocation occurs by cloning the file's data from its
2583  * current set of blocks to a new set of blocks. During
2584  * the relocation all of the blocks (old and new) are
2585  * owned by the file.
2586  *
2587  * -----------------
2588  * |///////////////|
2589  * -----------------
2590  * 0               N (file offset)
2591  *
2592  * -----------------     -----------------
2593  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2594  * -----------------     -----------------
2595  * 0               N     N+1             2N
2596  *
2597  * -----------------     -----------------
2598  * |///////////////|     |///////////////|     STEP 2 (clone data)
2599  * -----------------     -----------------
2600  * 0               N     N+1             2N
2601  *
2602  *                       -----------------
2603  *                       |///////////////|     STEP 3 (head truncate blocks)
2604  *                       -----------------
2605  *                       0               N
2606  *
2607  * During steps 2 and 3 page-outs to file offsets less
2608  * than or equal to N are suspended.
2609  *
2610  * During step 3 page-ins to the file get supended.
2611  */
2612 __private_extern__
2613 int
2614 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2615         struct  proc *p)
2616 {
2617         struct  cnode *cp;
2618         struct  filefork *fp;
2619         struct  hfsmount *hfsmp;
2620         u_int32_t  headblks;
2621         u_int32_t  datablks;
2622         u_int32_t  blksize;
2623         u_int32_t  growsize;
2624         u_int32_t  nextallocsave;
2625         daddr64_t  sector_a,  sector_b;
2626         int disabled_caching = 0;
2627         int eflags;
2628         off_t  newbytes;
2629         int  retval;
2630         int lockflags = 0;
2631         int took_trunc_lock = 0;
2632         int started_tr = 0;
2633         enum vtype vnodetype;
2634
2635         vnodetype = vnode_vtype(vp);
2636         if (vnodetype != VREG && vnodetype != VLNK) {
2637                 return (EPERM);
2638         }
2639
2640         hfsmp = VTOHFS(vp);
2641         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2642                 return (ENOSPC);
2643         }
2644
2645         cp = VTOC(vp);
2646         fp = VTOF(vp);
2647         if (fp->ff_unallocblocks)
2648                 return (EINVAL);
2649         blksize = hfsmp->blockSize;
2650         if (blockHint == 0)
2651                 blockHint = hfsmp->nextAllocation;
2652
2653         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2654             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2655                 return (EFBIG);
2656         }
2657
2658         //
2659         // We do not believe that this call to hfs_fsync() is
2660         // necessary and it causes a journal transaction
2661         // deadlock so we are removing it.
2662         //
2663         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2664         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2665         //      if (retval)
2666         //              return (retval);
2667         //}
2668
2669         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2670                 hfs_unlock(cp);
2671                 hfs_lock_truncate(cp, TRUE);
2672                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2673                         hfs_unlock_truncate(cp);
2674                         return (retval);
2675                 }
2676                 took_trunc_lock = 1;
2677         }
2678         headblks = fp->ff_blocks;
2679         datablks = howmany(fp->ff_size, blksize);
2680         growsize = datablks * blksize;
2681         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2682         if (blockHint >= hfsmp->hfs_metazone_start &&
2683             blockHint <= hfsmp->hfs_metazone_end)
2684                 eflags |= kEFMetadataMask;
2685
2686         if (hfs_start_transaction(hfsmp) != 0) {
2687                 if (took_trunc_lock)
2688                         hfs_unlock_truncate(cp);
2689             return (EINVAL);
2690         }
2691         started_tr = 1;
2692         /*
2693          * Protect the extents b-tree and the allocation bitmap
2694          * during MapFileBlockC and ExtendFileC operations.
2695          */
2696         lockflags = SFL_BITMAP;
2697         if (overflow_extents(fp))
2698                 lockflags |= SFL_EXTENTS;
2699         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2700
2701         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2702         if (retval) {
2703                 retval = MacToVFSError(retval);
2704                 goto out;
2705         }
2706
2707         /*
2708          * STEP 1 - aquire new allocation blocks.
2709          */
2710         if (!vnode_isnocache(vp)) {
2711                 vnode_setnocache(vp);
2712                 disabled_caching = 1;
2713
2714         }
2715         nextallocsave = hfsmp->nextAllocation;
2716         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2717         if (eflags & kEFMetadataMask) {
2718                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2719                 hfsmp->nextAllocation = nextallocsave;
2720                 hfsmp->vcbFlags |= 0xFF00;
2721                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2722         }
2723
2724         retval = MacToVFSError(retval);
2725         if (retval == 0) {
2726                 cp->c_flag |= C_MODIFIED;
2727                 if (newbytes < growsize) {
2728                         retval = ENOSPC;
2729                         goto restore;
2730                 } else if (fp->ff_blocks < (headblks + datablks)) {
2731                         printf("hfs_relocate: allocation failed");
2732                         retval = ENOSPC;
2733                         goto restore;
2734                 }
2735
2736                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2737                 if (retval) {
2738                         retval = MacToVFSError(retval);
2739                 } else if ((sector_a + 1) == sector_b) {
2740                         retval = ENOSPC;
2741                         goto restore;
2742                 } else if ((eflags & kEFMetadataMask) &&
2743                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2744                               hfsmp->hfs_metazone_end)) {
2745                         printf("hfs_relocate: didn't move into metadata zone\n");
2746                         retval = ENOSPC;
2747                         goto restore;
2748                 }
2749         }
2750         /* Done with system locks and journal for now. */
2751         hfs_systemfile_unlock(hfsmp, lockflags);
2752         lockflags = 0;
2753         hfs_end_transaction(hfsmp);
2754         started_tr = 0;
2755
2756         if (retval) {
2757                 /*
2758                  * Check to see if failure is due to excessive fragmentation.
2759                  */
2760                 if ((retval == ENOSPC) &&
2761                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2762                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2763                 }
2764                 goto out;
2765         }
2766         /*
2767          * STEP 2 - clone file data into the new allocation blocks.
2768          */
2769
2770         if (vnodetype == VLNK)
2771                 retval = hfs_clonelink(vp, blksize, cred, p);
2772         else if (vnode_issystem(vp))
2773                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2774         else
2775                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2776
2777         /* Start transaction for step 3 or for a restore. */
2778         if (hfs_start_transaction(hfsmp) != 0) {
2779                 retval = EINVAL;
2780                 goto out;
2781         }
2782         started_tr = 1;
2783         if (retval)
2784                 goto restore;
2785
2786         /*
2787          * STEP 3 - switch to cloned data and remove old blocks.
2788          */
2789         lockflags = SFL_BITMAP;
2790         if (overflow_extents(fp))
2791                 lockflags |= SFL_EXTENTS;
2792         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2793
2794         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2795
2796         hfs_systemfile_unlock(hfsmp, lockflags);
2797         lockflags = 0;
2798         if (retval)
2799                 goto restore;
2800 out:
2801         if (took_trunc_lock)
2802                 hfs_unlock_truncate(cp);
2803
2804         if (lockflags) {
2805                 hfs_systemfile_unlock(hfsmp, lockflags);
2806                 lockflags = 0;
2807         }
2808
2809         // See comment up above about calls to hfs_fsync()
2810         //
2811         //if (retval == 0)
2812         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2813
2814         if (hfsmp->jnl) {
2815                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2816                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2817                 else
2818                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2819         }
2820 exit:
2821         if (disabled_caching) {
2822                 vnode_clearnocache(vp);
2823         }
2824         if (started_tr)
2825                 hfs_end_transaction(hfsmp);
2826
2827         return (retval);
2828
2829 restore:
2830         if (fp->ff_blocks == headblks)
2831                 goto exit;
2832         /*
2833          * Give back any newly allocated space.
2834          */
2835         if (lockflags == 0) {
2836                 lockflags = SFL_BITMAP;
2837                 if (overflow_extents(fp))
2838                         lockflags |= SFL_EXTENTS;
2839                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2840         }
2841
2842         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2843
2844         hfs_systemfile_unlock(hfsmp, lockflags);
2845         lockflags = 0;
2846
2847         if (took_trunc_lock)
2848                 hfs_unlock_truncate(cp);
2849         goto exit;
2850 }
2851
2852
2853 /*
2854  * Clone a symlink.
2855  *
2856  */
2857 static int
2858 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2859 {
2860         struct buf *head_bp = NULL;
2861         struct buf *tail_bp = NULL;
2862         int error;
2863
2864
2865         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2866         if (error)
2867                 goto out;
2868
2869         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2870         if (tail_bp == NULL) {
2871                 error = EIO;
2872                 goto out;
2873         }
2874         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2875         error = (int)buf_bwrite(tail_bp);
2876 out:
2877         if (head_bp) {
2878                 buf_markinvalid(head_bp);
2879                 buf_brelse(head_bp);
2880         }
2881         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2882
2883         return (error);
2884 }
2885
2886 /*
2887  * Clone a file's data within the file.
2888  *
2889  */
2890 static int
2891 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2892 {
2893         caddr_t  bufp;
2894         size_t  writebase;
2895         size_t  bufsize;
2896         size_t  copysize;
2897         size_t  iosize;
2898         off_t   filesize;
2899         size_t  offset;
2900         uio_t auio;
2901         int  error = 0;
2902
2903         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2904         writebase = blkstart * blksize;
2905         copysize = blkcnt * blksize;
2906         iosize = bufsize = MIN(copysize, 4096 * 16);
2907         offset = 0;
2908
2909         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2910                 return (ENOMEM);
2911         }
2912         hfs_unlock(VTOC(vp));
2913
2914         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2915
2916         while (offset < copysize) {
2917                 iosize = MIN(copysize - offset, iosize);
2918
2919                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2920                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2921
2922                 error = cluster_read(vp, auio, copysize, 0);
2923                 if (error) {
2924                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2925                         break;
2926                 }
2927                 if (uio_resid(auio) != 0) {
2928                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2929                         error = EIO;
2930                         break;
2931                 }
2932
2933                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2934                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2935
2936                 error = cluster_write(vp, auio, filesize + offset,
2937                                       filesize + offset + iosize,
2938                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2939                 if (error) {
2940                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2941                         break;
2942                 }
2943                 if (uio_resid(auio) != 0) {
2944                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2945                         error = EIO;
2946                         break;
2947                 }
2948                 offset += iosize;
2949         }
2950         uio_free(auio);
2951
2952         /*
2953          * No need to call ubc_sync_range or hfs_invalbuf
2954          * since the file was copied using IO_NOCACHE.
2955          */
2956
2957         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2958
2959         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2960         return (error);
2961 }
2962
2963 /*
2964  * Clone a system (metadata) file.
2965  *
2966  */
2967 static int
2968 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2969                  kauth_cred_t cred, struct proc *p)
2970 {
2971         caddr_t  bufp;
2972         char * offset;
2973         size_t  bufsize;
2974         size_t  iosize;
2975         struct buf *bp = NULL;
2976         daddr64_t  blkno;
2977         daddr64_t  blk;
2978         daddr64_t  start_blk;
2979         daddr64_t  last_blk;
2980         int  breadcnt;
2981         int  i;
2982         int  error = 0;
2983
2984
2985         iosize = GetLogicalBlockSize(vp);
2986         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2987         breadcnt = bufsize / iosize;
2988
2989         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2990                 return (ENOMEM);
2991         }
2992         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
2993         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
2994         blkno = 0;
2995
2996         while (blkno < last_blk) {
2997                 /*
2998                  * Read up to a megabyte
2999                  */
3000                 offset = bufp;
3001                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3002                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3003                         if (error) {
3004                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3005                                 goto out;
3006                         }
3007                         if (buf_count(bp) != iosize) {
3008                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3009                                 goto out;
3010                         }
3011                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3012
3013                         buf_markinvalid(bp);
3014                         buf_brelse(bp);
3015                         bp = NULL;
3016
3017                         offset += iosize;
3018                 }
3019
3020                 /*
3021                  * Write up to a megabyte
3022                  */
3023                 offset = bufp;
3024                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3025                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3026                         if (bp == NULL) {
3027                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3028                                 error = EIO;
3029                                 goto out;
3030                         }
3031                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3032                         error = (int)buf_bwrite(bp);
3033                         bp = NULL;
3034                         if (error)
3035                                 goto out;
3036                         offset += iosize;
3037                 }
3038         }
3039 out:
3040         if (bp) {
3041                 buf_brelse(bp);
3042         }
3043
3044         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3045
3046         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3047
3048         return (error);
3049 }