bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/filedesc.h>
  36 #include <sys/stat.h>
  37 #include <sys/buf.h>
  38 #include <sys/proc.h>
  39 #include <sys/kauth.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/vfs_context.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45
  46 #include <sys/ubc.h>
  47 #include <vm/vm_pageout.h>
  48 #include <vm/vm_kern.h>
  49
  50 #include <sys/kdebug.h>
  51
  52 #include        "hfs.h"
  53 #include        "hfs_endian.h"
  54 #include  "hfs_fsctl.h"
  55 #include        "hfs_quota.h"
  56 #include        "hfscommon/headers/FileMgrInternal.h"
  57 #include        "hfscommon/headers/BTreesInternal.h"
  58 #include        "hfs_cnode.h"
  59 #include        "hfs_dbg.h"
  60
  61 extern int overflow_extents(struct filefork *fp);
  62
  63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  64
  65 enum {
  66         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  67 };
  68
  69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  70
  71 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  72
  73
  74 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  75 static int  hfs_clonefile(struct vnode *, int, int, int);
  76 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  77
  78
  79 /*****************************************************************************
  80 *
  81 *       I/O Operations on vnodes
  82 *
  83 *****************************************************************************/
  84 int  hfs_vnop_read(struct vnop_read_args *);
  85 int  hfs_vnop_write(struct vnop_write_args *);
  86 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  87 int  hfs_vnop_select(struct vnop_select_args *);
  88 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  89 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  90 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  91 int  hfs_vnop_strategy(struct vnop_strategy_args *);
  92 int  hfs_vnop_allocate(struct vnop_allocate_args *);
  93 int  hfs_vnop_pagein(struct vnop_pagein_args *);
  94 int  hfs_vnop_pageout(struct vnop_pageout_args *);
  95 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
  96
  97
  98 /*
  99  * Read data from a file.
 100  */
 101 int
 102 hfs_vnop_read(struct vnop_read_args *ap)
 103 {
 104         uio_t uio = ap->a_uio;
 105         struct vnode *vp = ap->a_vp;
 106         struct cnode *cp;
 107         struct filefork *fp;
 108         struct hfsmount *hfsmp;
 109         off_t filesize;
 110         off_t filebytes;
 111         off_t start_resid = uio_resid(uio);
 112         off_t offset = uio_offset(uio);
 113         int retval = 0;
 114
 115
 116         /* Preflight checks */
 117         if (!vnode_isreg(vp)) {
 118                 /* can only read regular files */
 119                 if (vnode_isdir(vp))
 120                         return (EISDIR);
 121                 else
 122                         return (EPERM);
 123         }
 124         if (start_resid == 0)
 125                 return (0);             /* Nothing left to do */
 126         if (offset < 0)
 127                 return (EINVAL);        /* cant read from a negative offset */
 128
 129         cp = VTOC(vp);
 130         fp = VTOF(vp);
 131         hfsmp = VTOHFS(vp);
 132
 133         /* Protect against a size change. */
 134         hfs_lock_truncate(cp, 0);
 135
 136         filesize = fp->ff_size;
 137         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 138         if (offset > filesize) {
 139                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 140                     (offset > (off_t)MAXHFSFILESIZE)) {
 141                         retval = EFBIG;
 142                 }
 143                 goto exit;
 144         }
 145
 146         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 147                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 148
 149         retval = cluster_read(vp, uio, filesize, 0);
 150
 151         cp->c_touch_acctime = TRUE;
 152
 153         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 154                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 155
 156         /*
 157          * Keep track blocks read
 158          */
 159         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 160                 int took_cnode_lock = 0;
 161                 off_t bytesread;
 162
 163                 bytesread = start_resid - uio_resid(uio);
 164
 165                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 166                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 167                         hfs_lock(cp, HFS_FORCE_LOCK);
 168                         took_cnode_lock = 1;
 169                 }
 170                 /*
 171                  * If this file hasn't been seen since the start of
 172                  * the current sampling period then start over.
 173                  */
 174                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 175                         struct timeval tv;
 176
 177                         fp->ff_bytesread = bytesread;
 178                         microtime(&tv);
 179                         cp->c_atime = tv.tv_sec;
 180                 } else {
 181                         fp->ff_bytesread += bytesread;
 182                 }
 183                 if (took_cnode_lock)
 184                         hfs_unlock(cp);
 185         }
 186 exit:
 187         hfs_unlock_truncate(cp);
 188         return (retval);
 189 }
 190
 191 /*
 192  * Write data to a file.
 193  */
 194 int
 195 hfs_vnop_write(struct vnop_write_args *ap)
 196 {
 197         uio_t uio = ap->a_uio;
 198         struct vnode *vp = ap->a_vp;
 199         struct cnode *cp;
 200         struct filefork *fp;
 201         struct hfsmount *hfsmp;
 202         kauth_cred_t cred = NULL;
 203         off_t origFileSize;
 204         off_t writelimit;
 205         off_t bytesToAdd;
 206         off_t actualBytesAdded;
 207         off_t filebytes;
 208         off_t offset;
 209         size_t resid;
 210         int eflags;
 211         int ioflag = ap->a_ioflag;
 212         int retval = 0;
 213         int lockflags;
 214         int cnode_locked = 0;
 215
 216         // LP64todo - fix this! uio_resid may be 64-bit value
 217         resid = uio_resid(uio);
 218         offset = uio_offset(uio);
 219
 220         if (offset < 0)
 221                 return (EINVAL);
 222         if (resid == 0)
 223                 return (E_NONE);
 224         if (!vnode_isreg(vp))
 225                 return (EPERM);  /* Can only write regular files */
 226
 227         /* Protect against a size change. */
 228         hfs_lock_truncate(VTOC(vp), TRUE);
 229
 230         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 231                 hfs_unlock_truncate(VTOC(vp));
 232                 return (retval);
 233         }
 234         cnode_locked = 1;
 235         cp = VTOC(vp);
 236         fp = VTOF(vp);
 237         hfsmp = VTOHFS(vp);
 238         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 239
 240         if (ioflag & IO_APPEND) {
 241                 uio_setoffset(uio, fp->ff_size);
 242                 offset = fp->ff_size;
 243         }
 244         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 245                 retval = EPERM;
 246                 goto exit;
 247         }
 248
 249         origFileSize = fp->ff_size;
 250         eflags = kEFDeferMask;  /* defer file block allocations */
 251
 252 #ifdef HFS_SPARSE_DEV
 253         /*
 254          * When the underlying device is sparse and space
 255          * is low (< 8MB), stop doing delayed allocations
 256          * and begin doing synchronous I/O.
 257          */
 258         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 259             (hfs_freeblks(hfsmp, 0) < 2048)) {
 260                 eflags &= ~kEFDeferMask;
 261                 ioflag |= IO_SYNC;
 262         }
 263 #endif /* HFS_SPARSE_DEV */
 264
 265         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 266                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 267
 268         /* Now test if we need to extend the file */
 269         /* Doing so will adjust the filebytes for us */
 270
 271         writelimit = offset + resid;
 272         if (writelimit <= filebytes)
 273                 goto sizeok;
 274
 275         cred = vfs_context_ucred(ap->a_context);
 276 #if QUOTA
 277         bytesToAdd = writelimit - filebytes;
 278         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 279                            cred, 0);
 280         if (retval)
 281                 goto exit;
 282 #endif /* QUOTA */
 283
 284         if (hfs_start_transaction(hfsmp) != 0) {
 285                 retval = EINVAL;
 286                 goto exit;
 287         }
 288
 289         while (writelimit > filebytes) {
 290                 bytesToAdd = writelimit - filebytes;
 291                 if (cred && suser(cred, NULL) != 0)
 292                         eflags |= kEFReserveMask;
 293
 294                 /* Protect extents b-tree and allocation bitmap */
 295                 lockflags = SFL_BITMAP;
 296                 if (overflow_extents(fp))
 297                         lockflags |= SFL_EXTENTS;
 298                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 299
 300                 /* Files that are changing size are not hot file candidates. */
 301                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 302                         fp->ff_bytesread = 0;
 303                 }
 304                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 305                                 0, eflags, &actualBytesAdded));
 306
 307                 hfs_systemfile_unlock(hfsmp, lockflags);
 308
 309                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 310                         retval = ENOSPC;
 311                 if (retval != E_NONE)
 312                         break;
 313                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 314                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 315                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 316         }
 317         (void) hfs_update(vp, TRUE);
 318         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 319         (void) hfs_end_transaction(hfsmp);
 320
 321 sizeok:
 322         if (retval == E_NONE) {
 323                 off_t filesize;
 324                 off_t zero_off;
 325                 off_t tail_off;
 326                 off_t inval_start;
 327                 off_t inval_end;
 328                 off_t io_start;
 329                 int lflag;
 330                 struct rl_entry *invalid_range;
 331
 332                 if (writelimit > fp->ff_size)
 333                         filesize = writelimit;
 334                 else
 335                         filesize = fp->ff_size;
 336
 337                 lflag = (ioflag & IO_SYNC);
 338
 339                 if (offset <= fp->ff_size) {
 340                         zero_off = offset & ~PAGE_MASK_64;
 341
 342                         /* Check to see whether the area between the zero_offset and the start
 343                            of the transfer to see whether is invalid and should be zero-filled
 344                            as part of the transfer:
 345                          */
 346                         if (offset > zero_off) {
 347                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 348                                         lflag |= IO_HEADZEROFILL;
 349                         }
 350                 } else {
 351                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 352
 353                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 354                            read without being zeroed.  The current last block is filled with zeroes
 355                            if it holds valid data but in all cases merely do a little bookkeeping
 356                            to track the area from the end of the current last page to the start of
 357                            the area actually written.  For the same reason only the bytes up to the
 358                            start of the page where this write will start is invalidated; any remainder
 359                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 360
 361                            Note that inval_start, the start of the page after the current EOF,
 362                            may be past the start of the write, in which case the zeroing
 363                            will be handled by the cluser_write of the actual data.
 364                          */
 365                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 366                         inval_end = offset & ~PAGE_MASK_64;
 367                         zero_off = fp->ff_size;
 368
 369                         if ((fp->ff_size & PAGE_MASK_64) &&
 370                                 (rl_scan(&fp->ff_invalidranges,
 371                                                         eof_page_base,
 372                                                         fp->ff_size - 1,
 373                                                         &invalid_range) != RL_NOOVERLAP)) {
 374                                 /* The page containing the EOF is not valid, so the
 375                                    entire page must be made inaccessible now.  If the write
 376                                    starts on a page beyond the page containing the eof
 377                                    (inval_end > eof_page_base), add the
 378                                    whole page to the range to be invalidated.  Otherwise
 379                                    (i.e. if the write starts on the same page), zero-fill
 380                                    the entire page explicitly now:
 381                                  */
 382                                 if (inval_end > eof_page_base) {
 383                                         inval_start = eof_page_base;
 384                                 } else {
 385                                         zero_off = eof_page_base;
 386                                 };
 387                         };
 388
 389                         if (inval_start < inval_end) {
 390                                 struct timeval tv;
 391                                 /* There's some range of data that's going to be marked invalid */
 392
 393                                 if (zero_off < inval_start) {
 394                                         /* The pages between inval_start and inval_end are going to be invalidated,
 395                                            and the actual write will start on a page past inval_end.  Now's the last
 396                                            chance to zero-fill the page containing the EOF:
 397                                          */
 398                                         hfs_unlock(cp);
 399                                         cnode_locked = 0;
 400                                         retval = cluster_write(vp, (uio_t) 0,
 401                                                         fp->ff_size, inval_start,
 402                                                         zero_off, (off_t)0,
 403                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 404                                         hfs_lock(cp, HFS_FORCE_LOCK);
 405                                         cnode_locked = 1;
 406                                         if (retval) goto ioerr_exit;
 407                                         offset = uio_offset(uio);
 408                                 };
 409
 410                                 /* Mark the remaining area of the newly allocated space as invalid: */
 411                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 412                                 microuptime(&tv);
 413                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 414                                 zero_off = fp->ff_size = inval_end;
 415                         };
 416
 417                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 418                 };
 419
 420                 /* Check to see whether the area between the end of the write and the end of
 421                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 422                  */
 423                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 424                 if (tail_off > filesize) tail_off = filesize;
 425                 if (tail_off > writelimit) {
 426                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 427                                 lflag |= IO_TAILZEROFILL;
 428                         };
 429                 };
 430
 431                 /*
 432                  * if the write starts beyond the current EOF (possibly advanced in the
 433                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 434                  * to where the write begins:
 435                  *
 436                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 437                  *       before the current EOF it might be marked as invalid now and must be
 438                  *       made readable (removed from the invalid ranges) before cluster_write
 439                  *       tries to write it:
 440                  */
 441                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 442                 if (io_start < fp->ff_size) {
 443                         off_t io_end;
 444
 445                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 446                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 447                 };
 448
 449                 hfs_unlock(cp);
 450                 cnode_locked = 0;
 451                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 452                                 tail_off, lflag | IO_NOZERODIRTY);
 453                 offset = uio_offset(uio);
 454                 if (offset > fp->ff_size) {
 455                         fp->ff_size = offset;
 456
 457                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 458                         /* Files that are changing size are not hot file candidates. */
 459                         if (hfsmp->hfc_stage == HFC_RECORDING)
 460                                 fp->ff_bytesread = 0;
 461                 }
 462                 if (resid > uio_resid(uio)) {
 463                         cp->c_touch_chgtime = TRUE;
 464                         cp->c_touch_modtime = TRUE;
 465                 }
 466         }
 467         HFS_KNOTE(vp, NOTE_WRITE);
 468
 469 ioerr_exit:
 470         /*
 471          * If we successfully wrote any data, and we are not the superuser
 472          * we clear the setuid and setgid bits as a precaution against
 473          * tampering.
 474          */
 475         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 476                 cred = vfs_context_ucred(ap->a_context);
 477                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 478                         if (!cnode_locked) {
 479                                 hfs_lock(cp, HFS_FORCE_LOCK);
 480                                 cnode_locked = 1;
 481                         }
 482                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 483                 }
 484         }
 485         if (retval) {
 486                 if (ioflag & IO_UNIT) {
 487                         if (!cnode_locked) {
 488                                 hfs_lock(cp, HFS_FORCE_LOCK);
 489                                 cnode_locked = 1;
 490                         }
 491                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 492                                            0, ap->a_context);
 493                         // LP64todo - fix this!  resid needs to by user_ssize_t
 494                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 495                         uio_setresid(uio, resid);
 496                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 497                 }
 498         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 499                 if (!cnode_locked) {
 500                         hfs_lock(cp, HFS_FORCE_LOCK);
 501                         cnode_locked = 1;
 502                 }
 503                 retval = hfs_update(vp, TRUE);
 504         }
 505         /* Updating vcbWrCnt doesn't need to be atomic. */
 506         hfsmp->vcbWrCnt++;
 507
 508         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 509                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 510 exit:
 511         if (cnode_locked)
 512                 hfs_unlock(cp);
 513         hfs_unlock_truncate(cp);
 514         return (retval);
 515 }
 516
 517 /* support for the "bulk-access" fcntl */
 518
 519 #define CACHE_ELEMS 64
 520 #define CACHE_LEVELS 16
 521 #define PARENT_IDS_FLAG 0x100
 522
 523 /* from hfs_attrlist.c */
 524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 525                         mode_t obj_mode, struct mount *mp,
 526                         kauth_cred_t cred, struct proc *p);
 527
 528 /* from vfs/vfs_fsevents.c */
 529 extern char *get_pathbuff(void);
 530 extern void release_pathbuff(char *buff);
 531
 532 struct access_cache {
 533        int numcached;
 534        int cachehits; /* these two for statistics gathering */
 535        int lookups;
 536        unsigned int *acache;
 537        Boolean *haveaccess;
 538 };
 539
 540 struct access_t {
 541         uid_t     uid;              /* IN: effective user id */
 542         short     flags;            /* IN: access requested (i.e. R_OK) */
 543         short     num_groups;       /* IN: number of groups user belongs to */
 544         int       num_files;        /* IN: number of files to process */
 545         int       *file_ids;        /* IN: array of file ids */
 546         gid_t     *groups;          /* IN: array of groups */
 547         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 548 };
 549
 550 struct user_access_t {
 551         uid_t           uid;                    /* IN: effective user id */
 552         short           flags;                  /* IN: access requested (i.e. R_OK) */
 553         short           num_groups;             /* IN: number of groups user belongs to */
 554         int                     num_files;              /* IN: number of files to process */
 555         user_addr_t     file_ids;               /* IN: array of file ids */
 556         user_addr_t     groups;                 /* IN: array of groups */
 557         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 558 };
 559
 560 /*
 561  * Perform a binary search for the given parent_id. Return value is
 562  * found/not found boolean, and indexp will be the index of the item
 563  * or the index at which to insert the item if it's not found.
 564  */
 565 static int
 566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 567 {
 568         unsigned int lo, hi;
 569         int index, matches = 0;
 570
 571         if (cache->numcached == 0) {
 572                 *indexp = 0;
 573                 return 0; // table is empty, so insert at index=0 and report no match
 574         }
 575
 576         if (cache->numcached > CACHE_ELEMS) {
 577                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 578                   cache->numcached, CACHE_ELEMS);*/
 579                 cache->numcached = CACHE_ELEMS;
 580         }
 581
 582         lo = 0;
 583         hi = cache->numcached - 1;
 584         index = -1;
 585
 586         /* perform binary search for parent_id */
 587         do {
 588                 unsigned int mid = (hi - lo)/2 + lo;
 589                 unsigned int this_id = cache->acache[mid];
 590
 591                 if (parent_id == this_id) {
 592                         index = mid;
 593                         break;
 594                 }
 595
 596                 if (parent_id < this_id) {
 597                         hi = mid;
 598                         continue;
 599                 }
 600
 601                 if (parent_id > this_id) {
 602                         lo = mid + 1;
 603                         continue;
 604                 }
 605         } while(lo < hi);
 606
 607         /* check if lo and hi converged on the match */
 608         if (parent_id == cache->acache[hi]) {
 609                 index = hi;
 610         }
 611
 612         /* if no existing entry found, find index for new one */
 613         if (index == -1) {
 614                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 615                 matches = 0;
 616         } else {
 617                 matches = 1;
 618         }
 619
 620         *indexp = index;
 621         return matches;
 622 }
 623
 624 /*
 625  * Add a node to the access_cache at the given index (or do a lookup first
 626  * to find the index if -1 is passed in). We currently do a replace rather
 627  * than an insert if the cache is full.
 628  */
 629 static void
 630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 631 {
 632        int lookup_index = -1;
 633
 634        /* need to do a lookup first if -1 passed for index */
 635        if (index == -1) {
 636                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 637                        if (cache->haveaccess[lookup_index] != access) {
 638                                /* change access info for existing entry... should never happen */
 639                                cache->haveaccess[lookup_index] = access;
 640                        }
 641
 642                        /* mission accomplished */
 643                        return;
 644                } else {
 645                        index = lookup_index;
 646                }
 647
 648        }
 649
 650        /* if the cache is full, do a replace rather than an insert */
 651        if (cache->numcached >= CACHE_ELEMS) {
 652                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 653                cache->numcached = CACHE_ELEMS-1;
 654
 655                if (index > cache->numcached) {
 656                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 657                        index = cache->numcached;
 658                }
 659        } else if (index >= 0 && index < cache->numcached) {
 660                /* only do bcopy if we're inserting */
 661                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 662                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 663        }
 664
 665        cache->acache[index] = nodeID;
 666        cache->haveaccess[index] = access;
 667        cache->numcached++;
 668 }
 669
 670
 671 struct cinfo {
 672         uid_t   uid;
 673         gid_t   gid;
 674         mode_t  mode;
 675         cnid_t  parentcnid;
 676 };
 677
 678 static int
 679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 680 {
 681         struct cinfo *cip = (struct cinfo *)arg;
 682
 683         cip->uid = attrp->ca_uid;
 684         cip->gid = attrp->ca_gid;
 685         cip->mode = attrp->ca_mode;
 686         cip->parentcnid = descp->cd_parentcnid;
 687
 688         return (0);
 689 }
 690
 691 /*
 692  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 693  * isn't incore, then go to the catalog.
 694  */
 695 static int
 696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 697                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 698 {
 699         int error = 0;
 700
 701         /* if this id matches the one the fsctl was called with, skip the lookup */
 702         if (cnid == skip_cp->c_cnid) {
 703                 cnattrp->ca_uid = skip_cp->c_uid;
 704                 cnattrp->ca_gid = skip_cp->c_gid;
 705                 cnattrp->ca_mode = skip_cp->c_mode;
 706                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 707         } else {
 708                 struct cinfo c_info;
 709
 710                 /* otherwise, check the cnode hash incase the file/dir is incore */
 711                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 712                         cnattrp->ca_uid = c_info.uid;
 713                         cnattrp->ca_gid = c_info.gid;
 714                         cnattrp->ca_mode = c_info.mode;
 715                         keyp->hfsPlus.parentID = c_info.parentcnid;
 716                 } else {
 717                         int lockflags;
 718
 719                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 720
 721                         /* lookup this cnid in the catalog */
 722                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 723
 724                         hfs_systemfile_unlock(hfsmp, lockflags);
 725
 726                         cache->lookups++;
 727                 }
 728         }
 729
 730         return (error);
 731 }
 732
 733 /*
 734  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 735  * up to CACHE_LEVELS as we progress towards the root.
 736  */
 737 static int
 738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 739                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 740 {
 741        int                     myErr = 0;
 742        int                     myResult;
 743        HFSCatalogNodeID        thisNodeID;
 744        unsigned long           myPerms;
 745        struct cat_attr         cnattr;
 746        int                     cache_index = -1;
 747        CatalogKey              catkey;
 748
 749        int i = 0, ids_to_cache = 0;
 750        int parent_ids[CACHE_LEVELS];
 751
 752        /* root always has access */
 753        if (!suser(myp_ucred, NULL)) {
 754                return (1);
 755        }
 756
 757        thisNodeID = nodeID;
 758        while (thisNodeID >=  kRootDirID) {
 759                myResult = 0;   /* default to "no access" */
 760
 761                /* check the cache before resorting to hitting the catalog */
 762
 763                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 764                 * to look any further after hitting cached dir */
 765
 766                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 767                        cache->cachehits++;
 768                        myResult = cache->haveaccess[cache_index];
 769                        goto ExitThisRoutine;
 770                }
 771
 772                /* remember which parents we want to cache */
 773                if (ids_to_cache < CACHE_LEVELS) {
 774                        parent_ids[ids_to_cache] = thisNodeID;
 775                        ids_to_cache++;
 776                }
 777
 778                /* do the lookup (checks the cnode hash, then the catalog) */
 779                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 780                if (myErr) {
 781                        goto ExitThisRoutine; /* no access */
 782                }
 783
 784                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 785                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 786                                                  myp_ucred, theProcPtr);
 787
 788                if ( (myPerms & X_OK) == 0 ) {
 789                        myResult = 0;
 790                        goto ExitThisRoutine;   /* no access */
 791                }
 792
 793                /* up the hierarchy we go */
 794                thisNodeID = catkey.hfsPlus.parentID;
 795        }
 796
 797        /* if here, we have access to this node */
 798        myResult = 1;
 799
 800  ExitThisRoutine:
 801        if (myErr) {
 802                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 803                myResult = 0;
 804        }
 805        *err = myErr;
 806
 807        /* cache the parent directory(ies) */
 808        for (i = 0; i < ids_to_cache; i++) {
 809                /* small optimization: get rid of double-lookup for all these */
 810                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 811                add_node(cache, -1, parent_ids[i], myResult);
 812        }
 813
 814        return (myResult);
 815 }
 816 /* end "bulk-access" support */
 817
 818
 819
 820 /*
 821  * Callback for use with freeze ioctl.
 822  */
 823 static int
 824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 825 {
 826         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 827
 828         return 0;
 829 }
 830
 831 /*
 832  * Control filesystem operating characteristics.
 833  */
 834 int
 835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 836                 vnode_t a_vp;
 837                 int  a_command;
 838                 caddr_t  a_data;
 839                 int  a_fflag;
 840                 vfs_context_t a_context;
 841         } */ *ap)
 842 {
 843         struct vnode * vp = ap->a_vp;
 844         struct hfsmount *hfsmp = VTOHFS(vp);
 845         vfs_context_t context = ap->a_context;
 846         kauth_cred_t cred = vfs_context_ucred(context);
 847         proc_t p = vfs_context_proc(context);
 848         struct vfsstatfs *vfsp;
 849         boolean_t is64bit;
 850
 851         is64bit = proc_is64bit(p);
 852
 853         switch (ap->a_command) {
 854
 855         case HFS_RESIZE_PROGRESS: {
 856
 857                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 858                 if (suser(cred, NULL) &&
 859                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 860                         return (EACCES); /* must be owner of file system */
 861                 }
 862                 if (!vnode_isvroot(vp)) {
 863                         return (EINVAL);
 864                 }
 865                 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
 866         }
 867         case HFS_RESIZE_VOLUME: {
 868                 u_int64_t newsize;
 869                 u_int64_t cursize;
 870
 871                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 872                 if (suser(cred, NULL) &&
 873                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 874                         return (EACCES); /* must be owner of file system */
 875                 }
 876                 if (!vnode_isvroot(vp)) {
 877                         return (EINVAL);
 878                 }
 879                 newsize = *(u_int64_t *)ap->a_data;
 880                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 881
 882                 if (newsize > cursize) {
 883                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 884                 } else if (newsize < cursize) {
 885                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 886                 } else {
 887                         return (0);
 888                 }
 889         }
 890         case HFS_CHANGE_NEXT_ALLOCATION: {
 891                 u_int32_t location;
 892
 893                 if (vnode_vfsisrdonly(vp)) {
 894                         return (EROFS);
 895                 }
 896                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 897                 if (suser(cred, NULL) &&
 898                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 899                         return (EACCES); /* must be owner of file system */
 900                 }
 901                 if (!vnode_isvroot(vp)) {
 902                         return (EINVAL);
 903                 }
 904                 location = *(u_int32_t *)ap->a_data;
 905                 if (location > hfsmp->totalBlocks - 1) {
 906                         return (EINVAL);
 907                 }
 908                 /* Return previous value. */
 909                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 910                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 911                 hfsmp->nextAllocation = location;
 912                 hfsmp->vcbFlags |= 0xFF00;
 913                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 914                 return (0);
 915         }
 916
 917 #ifdef HFS_SPARSE_DEV
 918         case HFS_SETBACKINGSTOREINFO: {
 919                 struct vnode * bsfs_rootvp;
 920                 struct vnode * di_vp;
 921                 struct hfs_backingstoreinfo *bsdata;
 922                 int error = 0;
 923
 924                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 925                         return (EALREADY);
 926                 }
 927                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 928                 if (suser(cred, NULL) &&
 929                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 930                         return (EACCES); /* must be owner of file system */
 931                 }
 932                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 933                 if (bsdata == NULL) {
 934                         return (EINVAL);
 935                 }
 936                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 937                         return (error);
 938                 }
 939                 if ((error = vnode_getwithref(di_vp))) {
 940                         file_drop(bsdata->backingfd);
 941                         return(error);
 942                 }
 943
 944                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 945                         (void)vnode_put(di_vp);
 946                         file_drop(bsdata->backingfd);
 947                         return (EINVAL);
 948                 }
 949
 950                 /*
 951                  * Obtain the backing fs root vnode and keep a reference
 952                  * on it.  This reference will be dropped in hfs_unmount.
 953                  */
 954                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 955                 if (error) {
 956                         (void)vnode_put(di_vp);
 957                         file_drop(bsdata->backingfd);
 958                         return (error);
 959                 }
 960                 vnode_ref(bsfs_rootvp);
 961                 vnode_put(bsfs_rootvp);
 962
 963                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 964                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 965                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 966                 hfsmp->hfs_sparsebandblks *= 4;
 967
 968                 (void)vnode_put(di_vp);
 969                 file_drop(bsdata->backingfd);
 970                 return (0);
 971         }
 972         case HFS_CLRBACKINGSTOREINFO: {
 973                 struct vnode * tmpvp;
 974
 975                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 976                 if (suser(cred, NULL) &&
 977                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 978                         return (EACCES); /* must be owner of file system */
 979                 }
 980                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 981                     hfsmp->hfs_backingfs_rootvp) {
 982
 983                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 984                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 985                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 986                         hfsmp->hfs_sparsebandblks = 0;
 987                         vnode_rele(tmpvp);
 988                 }
 989                 return (0);
 990         }
 991 #endif /* HFS_SPARSE_DEV */
 992
 993         case F_FREEZE_FS: {
 994                 struct mount *mp;
 995                 task_t task;
 996
 997                 if (!is_suser())
 998                         return (EACCES);
 999
1000                 mp = vnode_mount(vp);
1001                 hfsmp = VFSTOHFS(mp);
1002
1003                 if (!(hfsmp->jnl))
1004                         return (ENOTSUP);
1005
1006                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1007
1008                 task = current_task();
1009                 task_working_set_disable(task);
1010
1011                 // flush things before we get started to try and prevent
1012                 // dirty data from being paged out while we're frozen.
1013                 // note: can't do this after taking the lock as it will
1014                 // deadlock against ourselves.
1015                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1016                 hfs_global_exclusive_lock_acquire(hfsmp);
1017                 journal_flush(hfsmp->jnl);
1018
1019                 // don't need to iterate on all vnodes, we just need to
1020                 // wait for writes to the system files and the device vnode
1021                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1022                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1023                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1024                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1025                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1026                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1027                 if (hfsmp->hfs_attribute_vp)
1028                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1029                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1030
1031                 hfsmp->hfs_freezing_proc = current_proc();
1032
1033                 return (0);
1034         }
1035
1036         case F_THAW_FS: {
1037                 if (!is_suser())
1038                         return (EACCES);
1039
1040                 // if we're not the one who froze the fs then we
1041                 // can't thaw it.
1042                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1043                     return EPERM;
1044                 }
1045
1046                 // NOTE: if you add code here, also go check the
1047                 //       code that "thaws" the fs in hfs_vnop_close()
1048                 //
1049                 hfsmp->hfs_freezing_proc = NULL;
1050                 hfs_global_exclusive_lock_release(hfsmp);
1051                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1052
1053                 return (0);
1054         }
1055
1056 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1057 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1058
1059         case HFS_BULKACCESS_FSCTL:
1060         case HFS_BULKACCESS: {
1061                 /*
1062                  * NOTE: on entry, the vnode is locked. Incase this vnode
1063                  * happens to be in our list of file_ids, we'll note it
1064                  * avoid calling hfs_chashget_nowait() on that id as that
1065                  * will cause a "locking against myself" panic.
1066                  */
1067                 Boolean check_leaf = true;
1068
1069                 struct user_access_t *user_access_structp;
1070                 struct user_access_t tmp_user_access_t;
1071                 struct access_cache cache;
1072
1073                 int error = 0, i;
1074
1075                 dev_t dev = VTOC(vp)->c_dev;
1076
1077                 short flags;
1078                 struct ucred myucred;   /* XXX ILLEGAL */
1079                 int num_files;
1080                 int *file_ids = NULL;
1081                 short *access = NULL;
1082
1083                 cnid_t cnid;
1084                 cnid_t prevParent_cnid = 0;
1085                 unsigned long myPerms;
1086                 short myaccess = 0;
1087                 struct cat_attr cnattr;
1088                 CatalogKey catkey;
1089                 struct cnode *skip_cp = VTOC(vp);
1090                 struct vfs_context      my_context;
1091
1092                 /* first, return error if not run as root */
1093                 if (cred->cr_ruid != 0) {
1094                         return EPERM;
1095                 }
1096
1097                 /* initialize the local cache and buffers */
1098                 cache.numcached = 0;
1099                 cache.cachehits = 0;
1100                 cache.lookups = 0;
1101
1102                 file_ids = (int *) get_pathbuff();
1103                 access = (short *) get_pathbuff();
1104                 cache.acache = (int *) get_pathbuff();
1105                 cache.haveaccess = (Boolean *) get_pathbuff();
1106
1107                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1108                         release_pathbuff((char *) file_ids);
1109                         release_pathbuff((char *) access);
1110                         release_pathbuff((char *) cache.acache);
1111                         release_pathbuff((char *) cache.haveaccess);
1112
1113                         return ENOMEM;
1114                 }
1115
1116                 /* struct copyin done during dispatch... need to copy file_id array separately */
1117                 if (ap->a_data == NULL) {
1118                         error = EINVAL;
1119                         goto err_exit_bulk_access;
1120                 }
1121
1122                 if (is64bit) {
1123                         user_access_structp = (struct user_access_t *)ap->a_data;
1124                 }
1125                 else {
1126                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1127                         tmp_user_access_t.uid = accessp->uid;
1128                         tmp_user_access_t.flags = accessp->flags;
1129                         tmp_user_access_t.num_groups = accessp->num_groups;
1130                         tmp_user_access_t.num_files = accessp->num_files;
1131                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1132                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1133                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1134                         user_access_structp = &tmp_user_access_t;
1135                 }
1136
1137                 num_files = user_access_structp->num_files;
1138                 if (num_files < 1) {
1139                         goto err_exit_bulk_access;
1140                 }
1141                 if (num_files > 256) {
1142                         error = EINVAL;
1143                         goto err_exit_bulk_access;
1144                 }
1145
1146                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1147                                                         num_files * sizeof(int)))) {
1148                         goto err_exit_bulk_access;
1149                 }
1150
1151                 /* fill in the ucred structure */
1152                 flags = user_access_structp->flags;
1153                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1154                         flags = R_OK;
1155                 }
1156
1157                 /* check if we've been passed leaf node ids or parent ids */
1158                 if (flags & PARENT_IDS_FLAG) {
1159                         check_leaf = false;
1160                 }
1161
1162                 memset(&myucred, 0, sizeof(myucred));
1163                 myucred.cr_ref = 1;
1164                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1165                 myucred.cr_ngroups = user_access_structp->num_groups;
1166                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1167                         myucred.cr_ngroups = 0;
1168                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1169                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1170                         goto err_exit_bulk_access;
1171                 }
1172                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1173                 myucred.cr_gmuid = myucred.cr_uid;
1174
1175                 my_context.vc_proc = p;
1176                 my_context.vc_ucred = &myucred;
1177
1178                 /* Check access to each file_id passed in */
1179                 for (i = 0; i < num_files; i++) {
1180 #if 0
1181                         cnid = (cnid_t) file_ids[i];
1182
1183                         /* root always has access */
1184                         if (!suser(&myucred, NULL)) {
1185                                 access[i] = 0;
1186                                 continue;
1187                         }
1188
1189                         if (check_leaf) {
1190
1191                                 /* do the lookup (checks the cnode hash, then the catalog) */
1192                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1193                                 if (error) {
1194                                         access[i] = (short) error;
1195                                         continue;
1196                                 }
1197
1198                                 /* before calling CheckAccess(), check the target file for read access */
1199                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1200                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1201
1202
1203                                 /* fail fast if no access */
1204                                 if ((myPerms & flags) == 0) {
1205                                         access[i] = EACCES;
1206                                         continue;
1207                                 }
1208                         } else {
1209                                 /* we were passed an array of parent ids */
1210                                 catkey.hfsPlus.parentID = cnid;
1211                         }
1212
1213                         /* if the last guy had the same parent and had access, we're done */
1214                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1215                                 cache.cachehits++;
1216                                 access[i] = 0;
1217                                 continue;
1218                         }
1219
1220                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1221                                                    skip_cp, p, &myucred, dev);
1222
1223                         if ( myaccess ) {
1224                                 access[i] = 0; // have access.. no errors to report
1225                         } else {
1226                                 access[i] = (error != 0 ? (short) error : EACCES);
1227                         }
1228
1229                         prevParent_cnid = catkey.hfsPlus.parentID;
1230 #else
1231                         int myErr;
1232
1233                         cnid = (cnid_t)file_ids[i];
1234
1235                         while (cnid >= kRootDirID) {
1236                             /* get the vnode for this cnid */
1237                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1238                             if ( myErr ) {
1239                                 access[i] = EACCES;
1240                                 break;
1241                             }
1242
1243                             cnid = VTOC(vp)->c_parentcnid;
1244
1245                             hfs_unlock(VTOC(vp));
1246                             if (vnode_vtype(vp) == VDIR) {
1247                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1248                             } else {
1249                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1250                             }
1251                             vnode_put(vp);
1252                             access[i] = myErr;
1253                             if (myErr) {
1254                                 break;
1255                             }
1256                         }
1257 #endif
1258                 }
1259
1260                 /* copyout the access array */
1261                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1262                                      num_files * sizeof (short)))) {
1263                         goto err_exit_bulk_access;
1264                 }
1265
1266         err_exit_bulk_access:
1267
1268                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1269
1270                 release_pathbuff((char *) cache.acache);
1271                 release_pathbuff((char *) cache.haveaccess);
1272                 release_pathbuff((char *) file_ids);
1273                 release_pathbuff((char *) access);
1274
1275                 return (error);
1276         } /* HFS_BULKACCESS */
1277
1278         case HFS_SETACLSTATE: {
1279                 int state;
1280
1281                 if (ap->a_data == NULL) {
1282                         return (EINVAL);
1283                 }
1284
1285                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1286                 state = *(int *)ap->a_data;
1287
1288                 // super-user can enable or disable acl's on a volume.
1289                 // the volume owner can only enable acl's
1290                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1291                         return (EPERM);
1292                 }
1293                 if (state == 0 || state == 1)
1294                         return hfs_setextendedsecurity(hfsmp, state);
1295                 else
1296                         return (EINVAL);
1297         }
1298
1299         case F_FULLFSYNC: {
1300                 int error;
1301
1302                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1303                 if (error == 0) {
1304                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1305                         hfs_unlock(VTOC(vp));
1306                 }
1307
1308                 return error;
1309         }
1310
1311         case F_CHKCLEAN: {
1312                 register struct cnode *cp;
1313                 int error;
1314
1315                 if (!vnode_isreg(vp))
1316                         return EINVAL;
1317
1318                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1319                 if (error == 0) {
1320                         cp = VTOC(vp);
1321                         /*
1322                          * used by regression test to determine if
1323                          * all the dirty pages (via write) have been cleaned
1324                          * after a call to 'fsysnc'.
1325                          */
1326                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1327                         hfs_unlock(cp);
1328                 }
1329                 return (error);
1330         }
1331
1332         case F_RDADVISE: {
1333                 register struct radvisory *ra;
1334                 struct filefork *fp;
1335                 int error;
1336
1337                 if (!vnode_isreg(vp))
1338                         return EINVAL;
1339
1340                 ra = (struct radvisory *)(ap->a_data);
1341                 fp = VTOF(vp);
1342
1343                 /* Protect against a size change. */
1344                 hfs_lock_truncate(VTOC(vp), TRUE);
1345
1346                 if (ra->ra_offset >= fp->ff_size) {
1347                         error = EFBIG;
1348                 } else {
1349                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1350                 }
1351
1352                 hfs_unlock_truncate(VTOC(vp));
1353                 return (error);
1354         }
1355
1356         case F_READBOOTSTRAP:
1357         case F_WRITEBOOTSTRAP:
1358         {
1359             struct vnode *devvp = NULL;
1360             user_fbootstraptransfer_t *user_bootstrapp;
1361             int devBlockSize;
1362             int error;
1363             uio_t auio;
1364             daddr64_t blockNumber;
1365             u_long blockOffset;
1366             u_long xfersize;
1367             struct buf *bp;
1368             user_fbootstraptransfer_t user_bootstrap;
1369
1370                 if (!vnode_isvroot(vp))
1371                         return (EINVAL);
1372                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1373                  * to a user_fbootstraptransfer_t else we get a pointer to a
1374                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1375                  */
1376                 if (is64bit) {
1377                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1378                 }
1379                 else {
1380                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1381                         user_bootstrapp = &user_bootstrap;
1382                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1383                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1384                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1385                 }
1386                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1387                         return EINVAL;
1388
1389             devvp = VTOHFS(vp)->hfs_devvp;
1390                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1391                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1392                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1393                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1394
1395             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1396
1397             while (uio_resid(auio) > 0) {
1398                         blockNumber = uio_offset(auio) / devBlockSize;
1399                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1400                         if (error) {
1401                                 if (bp) buf_brelse(bp);
1402                                 uio_free(auio);
1403                                 return error;
1404                         };
1405
1406                         blockOffset = uio_offset(auio) % devBlockSize;
1407                         xfersize = devBlockSize - blockOffset;
1408                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1409                         if (error) {
1410                                 buf_brelse(bp);
1411                                 uio_free(auio);
1412                                 return error;
1413                         };
1414                         if (uio_rw(auio) == UIO_WRITE) {
1415                                 error = VNOP_BWRITE(bp);
1416                                 if (error) {
1417                                         uio_free(auio);
1418                         return error;
1419                                 }
1420                         } else {
1421                                 buf_brelse(bp);
1422                         };
1423                 };
1424                 uio_free(auio);
1425         };
1426         return 0;
1427
1428         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1429         {
1430                 if (is64bit) {
1431                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1432                 }
1433                 else {
1434                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1435                 }
1436                 return 0;
1437         }
1438
1439         case HFS_GET_MOUNT_TIME:
1440             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1441             break;
1442
1443         case HFS_GET_LAST_MTIME:
1444             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1445             break;
1446
1447         case HFS_SET_BOOT_INFO:
1448                 if (!vnode_isvroot(vp))
1449                         return(EINVAL);
1450                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1451                         return(EACCES); /* must be superuser or owner of filesystem */
1452                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1453                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1454                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1455                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1456                 break;
1457
1458         case HFS_GET_BOOT_INFO:
1459                 if (!vnode_isvroot(vp))
1460                         return(EINVAL);
1461                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1462                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1463                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1464                 break;
1465
1466         default:
1467                 return (ENOTTY);
1468         }
1469
1470     /* Should never get here */
1471         return 0;
1472 }
1473
1474 /*
1475  * select
1476  */
1477 int
1478 hfs_vnop_select(__unused struct vnop_select_args *ap)
1479 /*
1480         struct vnop_select_args {
1481                 vnode_t a_vp;
1482                 int  a_which;
1483                 int  a_fflags;
1484                 void *a_wql;
1485                 vfs_context_t a_context;
1486         };
1487 */
1488 {
1489         /*
1490          * We should really check to see if I/O is possible.
1491          */
1492         return (1);
1493 }
1494
1495 /*
1496  * Converts a logical block number to a physical block, and optionally returns
1497  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1498  * The physical block number is based on the device block size, currently its 512.
1499  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1500  */
1501 int
1502 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1503 {
1504         struct cnode *cp = VTOC(vp);
1505         struct filefork *fp = VTOF(vp);
1506         struct hfsmount *hfsmp = VTOHFS(vp);
1507         int  retval = E_NONE;
1508         daddr_t  logBlockSize;
1509         size_t  bytesContAvail = 0;
1510         off_t  blockposition;
1511         int lockExtBtree;
1512         int lockflags = 0;
1513
1514         /*
1515          * Check for underlying vnode requests and ensure that logical
1516          * to physical mapping is requested.
1517          */
1518         if (vpp != NULL)
1519                 *vpp = cp->c_devvp;
1520         if (bnp == NULL)
1521                 return (0);
1522
1523         logBlockSize = GetLogicalBlockSize(vp);
1524         blockposition = (off_t)bn * (off_t)logBlockSize;
1525
1526         lockExtBtree = overflow_extents(fp);
1527
1528         if (lockExtBtree)
1529                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1530
1531         retval = MacToVFSError(
1532                             MapFileBlockC (HFSTOVCB(hfsmp),
1533                                             (FCB*)fp,
1534                                             MAXPHYSIO,
1535                                             blockposition,
1536                                             bnp,
1537                                             &bytesContAvail));
1538
1539         if (lockExtBtree)
1540                 hfs_systemfile_unlock(hfsmp, lockflags);
1541
1542         if (retval == E_NONE) {
1543                 /* Figure out how many read ahead blocks there are */
1544                 if (runp != NULL) {
1545                         if (can_cluster(logBlockSize)) {
1546                                 /* Make sure this result never goes negative: */
1547                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1548                         } else {
1549                                 *runp = 0;
1550                         }
1551                 }
1552         }
1553         return (retval);
1554 }
1555
1556 /*
1557  * Convert logical block number to file offset.
1558  */
1559 int
1560 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1561 /*
1562         struct vnop_blktooff_args {
1563                 vnode_t a_vp;
1564                 daddr64_t a_lblkno;
1565                 off_t *a_offset;
1566         };
1567 */
1568 {
1569         if (ap->a_vp == NULL)
1570                 return (EINVAL);
1571         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1572
1573         return(0);
1574 }
1575
1576 /*
1577  * Convert file offset to logical block number.
1578  */
1579 int
1580 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1581 /*
1582         struct vnop_offtoblk_args {
1583                 vnode_t a_vp;
1584                 off_t a_offset;
1585                 daddr64_t *a_lblkno;
1586         };
1587 */
1588 {
1589         if (ap->a_vp == NULL)
1590                 return (EINVAL);
1591         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1592
1593         return(0);
1594 }
1595
1596 /*
1597  * Map file offset to physical block number.
1598  *
1599  * System file cnodes are expected to be locked (shared or exclusive).
1600  */
1601 int
1602 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1603 /*
1604         struct vnop_blockmap_args {
1605                 vnode_t a_vp;
1606                 off_t a_foffset;
1607                 size_t a_size;
1608                 daddr64_t *a_bpn;
1609                 size_t *a_run;
1610                 void *a_poff;
1611                 int a_flags;
1612                 vfs_context_t a_context;
1613         };
1614 */
1615 {
1616         struct vnode *vp = ap->a_vp;
1617         struct cnode *cp;
1618         struct filefork *fp;
1619         struct hfsmount *hfsmp;
1620         size_t bytesContAvail = 0;
1621         int retval = E_NONE;
1622         int syslocks = 0;
1623         int lockflags = 0;
1624         struct rl_entry *invalid_range;
1625         enum rl_overlaptype overlaptype;
1626         int started_tr = 0;
1627         int tooklock = 0;
1628
1629         /* Do not allow blockmap operation on a directory */
1630         if (vnode_isdir(vp)) {
1631                 return (ENOTSUP);
1632         }
1633
1634         /*
1635          * Check for underlying vnode requests and ensure that logical
1636          * to physical mapping is requested.
1637          */
1638         if (ap->a_bpn == NULL)
1639                 return (0);
1640
1641         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1642                 if (VTOC(vp)->c_lockowner != current_thread()) {
1643                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1644                         tooklock = 1;
1645                 } else {
1646                         cp = VTOC(vp);
1647                         panic("blockmap: %s cnode lock already held!\n",
1648                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1649                 }
1650         }
1651         hfsmp = VTOHFS(vp);
1652         cp = VTOC(vp);
1653         fp = VTOF(vp);
1654
1655 retry:
1656         if (fp->ff_unallocblocks) {
1657                 if (hfs_start_transaction(hfsmp) != 0) {
1658                         retval = EINVAL;
1659                         goto exit;
1660                 } else {
1661                         started_tr = 1;
1662                 }
1663                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1664
1665         } else if (overflow_extents(fp)) {
1666                 syslocks = SFL_EXTENTS;
1667         }
1668
1669         if (syslocks)
1670                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1671
1672         /*
1673          * Check for any delayed allocations.
1674          */
1675         if (fp->ff_unallocblocks) {
1676                 SInt64 actbytes;
1677                 u_int32_t loanedBlocks;
1678
1679                 //
1680                 // Make sure we have a transaction.  It's possible
1681                 // that we came in and fp->ff_unallocblocks was zero
1682                 // but during the time we blocked acquiring the extents
1683                 // btree, ff_unallocblocks became non-zero and so we
1684                 // will need to start a transaction.
1685                 //
1686                 if (started_tr == 0) {
1687                         if (syslocks) {
1688                                 hfs_systemfile_unlock(hfsmp, lockflags);
1689                                 syslocks = 0;
1690                         }
1691                         goto retry;
1692                 }
1693
1694                 /*
1695                  * Note: ExtendFileC will Release any blocks on loan and
1696                  * aquire real blocks.  So we ask to extend by zero bytes
1697                  * since ExtendFileC will account for the virtual blocks.
1698                  */
1699
1700                 loanedBlocks = fp->ff_unallocblocks;
1701                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1702                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1703
1704                 if (retval) {
1705                         fp->ff_unallocblocks = loanedBlocks;
1706                         cp->c_blocks += loanedBlocks;
1707                         fp->ff_blocks += loanedBlocks;
1708
1709                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1710                         hfsmp->loanedBlocks += loanedBlocks;
1711                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1712                 }
1713
1714                 if (retval) {
1715                         hfs_systemfile_unlock(hfsmp, lockflags);
1716                         cp->c_flag |= C_MODIFIED;
1717                         if (started_tr) {
1718                                 (void) hfs_update(vp, TRUE);
1719                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1720
1721                                 hfs_end_transaction(hfsmp);
1722                         }
1723                         goto exit;
1724                 }
1725         }
1726
1727         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1728                                ap->a_bpn, &bytesContAvail);
1729         if (syslocks) {
1730                 hfs_systemfile_unlock(hfsmp, lockflags);
1731                 syslocks = 0;
1732         }
1733
1734         if (started_tr) {
1735                 (void) hfs_update(vp, TRUE);
1736                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1737                 hfs_end_transaction(hfsmp);
1738                 started_tr = 0;
1739         }
1740         if (retval) {
1741                 goto exit;
1742         }
1743
1744         /* Adjust the mapping information for invalid file ranges: */
1745         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1746                               ap->a_foffset + (off_t)bytesContAvail - 1,
1747                               &invalid_range);
1748         if (overlaptype != RL_NOOVERLAP) {
1749                 switch(overlaptype) {
1750                 case RL_MATCHINGOVERLAP:
1751                 case RL_OVERLAPCONTAINSRANGE:
1752                 case RL_OVERLAPSTARTSBEFORE:
1753                         /* There's no valid block for this byte offset: */
1754                         *ap->a_bpn = (daddr64_t)-1;
1755                         /* There's no point limiting the amount to be returned
1756                          * if the invalid range that was hit extends all the way
1757                          * to the EOF (i.e. there's no valid bytes between the
1758                          * end of this range and the file's EOF):
1759                          */
1760                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1761                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1762                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1763                         }
1764                         break;
1765
1766                 case RL_OVERLAPISCONTAINED:
1767                 case RL_OVERLAPENDSAFTER:
1768                         /* The range of interest hits an invalid block before the end: */
1769                         if (invalid_range->rl_start == ap->a_foffset) {
1770                                 /* There's actually no valid information to be had starting here: */
1771                                 *ap->a_bpn = (daddr64_t)-1;
1772                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1773                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1774                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1775                                 }
1776                         } else {
1777                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1778                         }
1779                         break;
1780
1781                 case RL_NOOVERLAP:
1782                         break;
1783                 } /* end switch */
1784                 if (bytesContAvail > ap->a_size)
1785                         bytesContAvail = ap->a_size;
1786         }
1787         if (ap->a_run)
1788                 *ap->a_run = bytesContAvail;
1789
1790         if (ap->a_poff)
1791                 *(int *)ap->a_poff = 0;
1792 exit:
1793         if (tooklock)
1794                 hfs_unlock(cp);
1795
1796         return (MacToVFSError(retval));
1797 }
1798
1799
1800 /*
1801  * prepare and issue the I/O
1802  * buf_strategy knows how to deal
1803  * with requests that require
1804  * fragmented I/Os
1805  */
1806 int
1807 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1808 {
1809         buf_t   bp = ap->a_bp;
1810         vnode_t vp = buf_vnode(bp);
1811         struct cnode *cp = VTOC(vp);
1812
1813         return (buf_strategy(cp->c_devvp, ap));
1814 }
1815
1816
1817 static int
1818 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1819 {
1820         register struct cnode *cp = VTOC(vp);
1821         struct filefork *fp = VTOF(vp);
1822         struct proc *p = vfs_context_proc(context);;
1823         kauth_cred_t cred = vfs_context_ucred(context);
1824         int retval;
1825         off_t bytesToAdd;
1826         off_t actualBytesAdded;
1827         off_t filebytes;
1828         u_int64_t old_filesize;
1829         u_long fileblocks;
1830         int blksize;
1831         struct hfsmount *hfsmp;
1832         int lockflags;
1833
1834         blksize = VTOVCB(vp)->blockSize;
1835         fileblocks = fp->ff_blocks;
1836         filebytes = (off_t)fileblocks * (off_t)blksize;
1837         old_filesize = fp->ff_size;
1838
1839         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1840                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1841
1842         if (length < 0)
1843                 return (EINVAL);
1844
1845         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1846                 return (EFBIG);
1847
1848         hfsmp = VTOHFS(vp);
1849
1850         retval = E_NONE;
1851
1852         /* Files that are changing size are not hot file candidates. */
1853         if (hfsmp->hfc_stage == HFC_RECORDING) {
1854                 fp->ff_bytesread = 0;
1855         }
1856
1857         /*
1858          * We cannot just check if fp->ff_size == length (as an optimization)
1859          * since there may be extra physical blocks that also need truncation.
1860          */
1861 #if QUOTA
1862         if ((retval = hfs_getinoquota(cp)))
1863                 return(retval);
1864 #endif /* QUOTA */
1865
1866         /*
1867          * Lengthen the size of the file. We must ensure that the
1868          * last byte of the file is allocated. Since the smallest
1869          * value of ff_size is 0, length will be at least 1.
1870          */
1871         if (length > (off_t)fp->ff_size) {
1872 #if QUOTA
1873                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1874                                    cred, 0);
1875                 if (retval)
1876                         goto Err_Exit;
1877 #endif /* QUOTA */
1878                 /*
1879                  * If we don't have enough physical space then
1880                  * we need to extend the physical size.
1881                  */
1882                 if (length > filebytes) {
1883                         int eflags;
1884                         u_long blockHint = 0;
1885
1886                         /* All or nothing and don't round up to clumpsize. */
1887                         eflags = kEFAllMask | kEFNoClumpMask;
1888
1889                         if (cred && suser(cred, NULL) != 0)
1890                                 eflags |= kEFReserveMask;  /* keep a reserve */
1891
1892                         /*
1893                          * Allocate Journal and Quota files in metadata zone.
1894                          */
1895                         if (filebytes == 0 &&
1896                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1897                             hfs_virtualmetafile(cp)) {
1898                                 eflags |= kEFMetadataMask;
1899                                 blockHint = hfsmp->hfs_metazone_start;
1900                         }
1901                         if (hfs_start_transaction(hfsmp) != 0) {
1902                             retval = EINVAL;
1903                             goto Err_Exit;
1904                         }
1905
1906                         /* Protect extents b-tree and allocation bitmap */
1907                         lockflags = SFL_BITMAP;
1908                         if (overflow_extents(fp))
1909                                 lockflags |= SFL_EXTENTS;
1910                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1911
1912                         while ((length > filebytes) && (retval == E_NONE)) {
1913                                 bytesToAdd = length - filebytes;
1914                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1915                                                     (FCB*)fp,
1916                                                     bytesToAdd,
1917                                                     blockHint,
1918                                                     eflags,
1919                                                     &actualBytesAdded));
1920
1921                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1922                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1923                                         if (length > filebytes)
1924                                                 length = filebytes;
1925                                         break;
1926                                 }
1927                         } /* endwhile */
1928
1929                         hfs_systemfile_unlock(hfsmp, lockflags);
1930
1931                         if (hfsmp->jnl) {
1932                             (void) hfs_update(vp, TRUE);
1933                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1934                         }
1935
1936                         hfs_end_transaction(hfsmp);
1937
1938                         if (retval)
1939                                 goto Err_Exit;
1940
1941                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1942                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1943                 }
1944
1945                 if (!(flags & IO_NOZEROFILL)) {
1946                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1947                                 struct rl_entry *invalid_range;
1948                                 off_t zero_limit;
1949
1950                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1951                                 if (length < zero_limit) zero_limit = length;
1952
1953                                 if (length > (off_t)fp->ff_size) {
1954                                         struct timeval tv;
1955
1956                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1957                                         if ((fp->ff_size & PAGE_MASK_64) &&
1958                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1959                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1960
1961                                                 /* There's some valid data at the start of the (current) last page
1962                                                    of the file, so zero out the remainder of that page to ensure the
1963                                                    entire page contains valid data.  Since there is no invalid range
1964                                                    possible past the (current) eof, there's no need to remove anything
1965                                                    from the invalid range list before calling cluster_write():  */
1966                                                 hfs_unlock(cp);
1967                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1968                                                                 fp->ff_size, (off_t)0,
1969                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1970                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1971                                                 if (retval) goto Err_Exit;
1972
1973                                                 /* Merely invalidate the remaining area, if necessary: */
1974                                                 if (length > zero_limit) {
1975                                                         microuptime(&tv);
1976                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1977                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1978                                                 }
1979                                         } else {
1980                                         /* The page containing the (current) eof is invalid: just add the
1981                                            remainder of the page to the invalid list, along with the area
1982                                            being newly allocated:
1983                                          */
1984                                         microuptime(&tv);
1985                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1986                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1987                                         };
1988                                 }
1989                         } else {
1990                                         panic("hfs_truncate: invoked on non-UBC object?!");
1991                         };
1992                 }
1993                 cp->c_touch_modtime = TRUE;
1994                 fp->ff_size = length;
1995
1996                 /* Nested transactions will do their own ubc_setsize. */
1997                 if (!skipsetsize) {
1998                         /*
1999                          * ubc_setsize can cause a pagein here
2000                          * so we need to drop cnode lock.
2001                          */
2002                         hfs_unlock(cp);
2003                         ubc_setsize(vp, length);
2004                         hfs_lock(cp, HFS_FORCE_LOCK);
2005                 }
2006
2007         } else { /* Shorten the size of the file */
2008
2009                 if ((off_t)fp->ff_size > length) {
2010                         /*
2011                          * Any buffers that are past the truncation point need to be
2012                          * invalidated (to maintain buffer cache consistency).
2013                          */
2014
2015                          /* Nested transactions will do their own ubc_setsize. */
2016                          if (!skipsetsize) {
2017                                 /*
2018                                  * ubc_setsize can cause a pageout here
2019                                  * so we need to drop cnode lock.
2020                                  */
2021                                 hfs_unlock(cp);
2022                                 ubc_setsize(vp, length);
2023                                 hfs_lock(cp, HFS_FORCE_LOCK);
2024                         }
2025
2026                         /* Any space previously marked as invalid is now irrelevant: */
2027                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2028                 }
2029
2030                 /*
2031                  * Account for any unmapped blocks. Note that the new
2032                  * file length can still end up with unmapped blocks.
2033                  */
2034                 if (fp->ff_unallocblocks > 0) {
2035                         u_int32_t finalblks;
2036                         u_int32_t loanedBlocks;
2037
2038                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2039
2040                         loanedBlocks = fp->ff_unallocblocks;
2041                         cp->c_blocks -= loanedBlocks;
2042                         fp->ff_blocks -= loanedBlocks;
2043                         fp->ff_unallocblocks = 0;
2044
2045                         hfsmp->loanedBlocks -= loanedBlocks;
2046
2047                         finalblks = (length + blksize - 1) / blksize;
2048                         if (finalblks > fp->ff_blocks) {
2049                                 /* calculate required unmapped blocks */
2050                                 loanedBlocks = finalblks - fp->ff_blocks;
2051                                 hfsmp->loanedBlocks += loanedBlocks;
2052
2053                                 fp->ff_unallocblocks = loanedBlocks;
2054                                 cp->c_blocks += loanedBlocks;
2055                                 fp->ff_blocks += loanedBlocks;
2056                         }
2057                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2058                 }
2059
2060                 /*
2061                  * For a TBE process the deallocation of the file blocks is
2062                  * delayed until the file is closed.  And hfs_close calls
2063                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2064                  * isn't set, we make sure this isn't a TBE process.
2065                  */
2066                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2067 #if QUOTA
2068                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2069 #endif /* QUOTA */
2070                   if (hfs_start_transaction(hfsmp) != 0) {
2071                       retval = EINVAL;
2072                       goto Err_Exit;
2073                   }
2074
2075                         if (fp->ff_unallocblocks == 0) {
2076                                 /* Protect extents b-tree and allocation bitmap */
2077                                 lockflags = SFL_BITMAP;
2078                                 if (overflow_extents(fp))
2079                                         lockflags |= SFL_EXTENTS;
2080                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2081
2082                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2083                                                 (FCB*)fp, length, false));
2084
2085                                 hfs_systemfile_unlock(hfsmp, lockflags);
2086                         }
2087                         if (hfsmp->jnl) {
2088                                 if (retval == 0) {
2089                                         fp->ff_size = length;
2090                                 }
2091                                 (void) hfs_update(vp, TRUE);
2092                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2093                         }
2094
2095                         hfs_end_transaction(hfsmp);
2096
2097                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2098                         if (retval)
2099                                 goto Err_Exit;
2100 #if QUOTA
2101                         /* These are bytesreleased */
2102                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2103 #endif /* QUOTA */
2104                 }
2105                 /* Only set update flag if the logical length changes */
2106                 if (old_filesize != length)
2107                         cp->c_touch_modtime = TRUE;
2108                 fp->ff_size = length;
2109         }
2110         cp->c_touch_chgtime = TRUE;
2111         retval = hfs_update(vp, MNT_WAIT);
2112         if (retval) {
2113                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2114                      -1, -1, -1, retval, 0);
2115         }
2116
2117 Err_Exit:
2118
2119         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2120                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2121
2122         return (retval);
2123 }
2124
2125
2126
2127 /*
2128  * Truncate a cnode to at most length size, freeing (or adding) the
2129  * disk blocks.
2130  */
2131 __private_extern__
2132 int
2133 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2134              vfs_context_t context)
2135 {
2136         struct filefork *fp = VTOF(vp);
2137         off_t filebytes;
2138         u_long fileblocks;
2139         int blksize, error = 0;
2140         struct cnode *cp = VTOC(vp);
2141
2142         if (vnode_isdir(vp))
2143                 return (EISDIR);        /* cannot truncate an HFS directory! */
2144
2145         blksize = VTOVCB(vp)->blockSize;
2146         fileblocks = fp->ff_blocks;
2147         filebytes = (off_t)fileblocks * (off_t)blksize;
2148
2149         // have to loop truncating or growing files that are
2150         // really big because otherwise transactions can get
2151         // enormous and consume too many kernel resources.
2152
2153         if (length < filebytes) {
2154                 while (filebytes > length) {
2155                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2156                                 filebytes -= HFS_BIGFILE_SIZE;
2157                         } else {
2158                                 filebytes = length;
2159                         }
2160                         cp->c_flag |= C_FORCEUPDATE;
2161                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2162                         if (error)
2163                                 break;
2164                 }
2165         } else if (length > filebytes) {
2166                 while (filebytes < length) {
2167                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2168                                 filebytes += HFS_BIGFILE_SIZE;
2169                         } else {
2170                                 filebytes = length;
2171                         }
2172                         cp->c_flag |= C_FORCEUPDATE;
2173                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2174                         if (error)
2175                                 break;
2176                 }
2177         } else /* Same logical size */ {
2178
2179                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2180         }
2181         /* Files that are changing size are not hot file candidates. */
2182         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2183                 fp->ff_bytesread = 0;
2184         }
2185
2186         return (error);
2187 }
2188
2189
2190
2191 /*
2192  * Preallocate file storage space.
2193  */
2194 int
2195 hfs_vnop_allocate(struct vnop_allocate_args /* {
2196                 vnode_t a_vp;
2197                 off_t a_length;
2198                 u_int32_t  a_flags;
2199                 off_t *a_bytesallocated;
2200                 off_t a_offset;
2201                 vfs_context_t a_context;
2202         } */ *ap)
2203 {
2204         struct vnode *vp = ap->a_vp;
2205         struct cnode *cp;
2206         struct filefork *fp;
2207         ExtendedVCB *vcb;
2208         off_t length = ap->a_length;
2209         off_t startingPEOF;
2210         off_t moreBytesRequested;
2211         off_t actualBytesAdded;
2212         off_t filebytes;
2213         u_long fileblocks;
2214         int retval, retval2;
2215         UInt32 blockHint;
2216         UInt32 extendFlags;   /* For call to ExtendFileC */
2217         struct hfsmount *hfsmp;
2218         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2219         int lockflags;
2220
2221         *(ap->a_bytesallocated) = 0;
2222
2223         if (!vnode_isreg(vp))
2224                 return (EISDIR);
2225         if (length < (off_t)0)
2226                 return (EINVAL);
2227
2228         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2229                 return (retval);
2230         cp = VTOC(vp);
2231         fp = VTOF(vp);
2232         hfsmp = VTOHFS(vp);
2233         vcb = VTOVCB(vp);
2234
2235         fileblocks = fp->ff_blocks;
2236         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2237
2238         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2239                 retval = EINVAL;
2240                 goto Err_Exit;
2241         }
2242
2243         /* Fill in the flags word for the call to Extend the file */
2244
2245         extendFlags = kEFNoClumpMask;
2246         if (ap->a_flags & ALLOCATECONTIG)
2247                 extendFlags |= kEFContigMask;
2248         if (ap->a_flags & ALLOCATEALL)
2249                 extendFlags |= kEFAllMask;
2250         if (cred && suser(cred, NULL) != 0)
2251                 extendFlags |= kEFReserveMask;
2252
2253         retval = E_NONE;
2254         blockHint = 0;
2255         startingPEOF = filebytes;
2256
2257         if (ap->a_flags & ALLOCATEFROMPEOF)
2258                 length += filebytes;
2259         else if (ap->a_flags & ALLOCATEFROMVOL)
2260                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2261
2262         /* If no changes are necesary, then we're done */
2263         if (filebytes == length)
2264                 goto Std_Exit;
2265
2266         /*
2267          * Lengthen the size of the file. We must ensure that the
2268          * last byte of the file is allocated. Since the smallest
2269          * value of filebytes is 0, length will be at least 1.
2270          */
2271         if (length > filebytes) {
2272                 moreBytesRequested = length - filebytes;
2273
2274 #if QUOTA
2275                 retval = hfs_chkdq(cp,
2276                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2277                                 cred, 0);
2278                 if (retval)
2279                         goto Err_Exit;
2280
2281 #endif /* QUOTA */
2282                 /*
2283                  * Metadata zone checks.
2284                  */
2285                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2286                         /*
2287                          * Allocate Journal and Quota files in metadata zone.
2288                          */
2289                         if (hfs_virtualmetafile(cp)) {
2290                                 extendFlags |= kEFMetadataMask;
2291                                 blockHint = hfsmp->hfs_metazone_start;
2292                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2293                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2294                                 /*
2295                                  * Move blockHint outside metadata zone.
2296                                  */
2297                                 blockHint = hfsmp->hfs_metazone_end + 1;
2298                         }
2299                 }
2300
2301                 if (hfs_start_transaction(hfsmp) != 0) {
2302                     retval = EINVAL;
2303                     goto Err_Exit;
2304                 }
2305
2306                 /* Protect extents b-tree and allocation bitmap */
2307                 lockflags = SFL_BITMAP;
2308                 if (overflow_extents(fp))
2309                         lockflags |= SFL_EXTENTS;
2310                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2311
2312                 retval = MacToVFSError(ExtendFileC(vcb,
2313                                                 (FCB*)fp,
2314                                                 moreBytesRequested,
2315                                                 blockHint,
2316                                                 extendFlags,
2317                                                 &actualBytesAdded));
2318
2319                 *(ap->a_bytesallocated) = actualBytesAdded;
2320                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2321
2322                 hfs_systemfile_unlock(hfsmp, lockflags);
2323
2324                 if (hfsmp->jnl) {
2325                         (void) hfs_update(vp, TRUE);
2326                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2327                 }
2328
2329                 hfs_end_transaction(hfsmp);
2330
2331                 /*
2332                  * if we get an error and no changes were made then exit
2333                  * otherwise we must do the hfs_update to reflect the changes
2334                  */
2335                 if (retval && (startingPEOF == filebytes))
2336                         goto Err_Exit;
2337
2338                 /*
2339                  * Adjust actualBytesAdded to be allocation block aligned, not
2340                  * clump size aligned.
2341                  * NOTE: So what we are reporting does not affect reality
2342                  * until the file is closed, when we truncate the file to allocation
2343                  * block size.
2344                  */
2345                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2346                         *(ap->a_bytesallocated) =
2347                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2348
2349         } else { /* Shorten the size of the file */
2350
2351                 if (fp->ff_size > length) {
2352                         /*
2353                          * Any buffers that are past the truncation point need to be
2354                          * invalidated (to maintain buffer cache consistency).
2355                          */
2356                 }
2357
2358                 if (hfs_start_transaction(hfsmp) != 0) {
2359                     retval = EINVAL;
2360                     goto Err_Exit;
2361                 }
2362
2363                 /* Protect extents b-tree and allocation bitmap */
2364                 lockflags = SFL_BITMAP;
2365                 if (overflow_extents(fp))
2366                         lockflags |= SFL_EXTENTS;
2367                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2368
2369                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2370
2371                 hfs_systemfile_unlock(hfsmp, lockflags);
2372
2373                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2374
2375                 if (hfsmp->jnl) {
2376                         (void) hfs_update(vp, TRUE);
2377                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2378                 }
2379
2380                 hfs_end_transaction(hfsmp);
2381
2382
2383                 /*
2384                  * if we get an error and no changes were made then exit
2385                  * otherwise we must do the hfs_update to reflect the changes
2386                  */
2387                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2388 #if QUOTA
2389                 /* These are  bytesreleased */
2390                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2391 #endif /* QUOTA */
2392
2393                 if (fp->ff_size > filebytes) {
2394                         fp->ff_size = filebytes;
2395
2396                         hfs_unlock(cp);
2397                         ubc_setsize(vp, fp->ff_size);
2398                         hfs_lock(cp, HFS_FORCE_LOCK);
2399                 }
2400         }
2401
2402 Std_Exit:
2403         cp->c_touch_chgtime = TRUE;
2404         cp->c_touch_modtime = TRUE;
2405         retval2 = hfs_update(vp, MNT_WAIT);
2406
2407         if (retval == 0)
2408                 retval = retval2;
2409 Err_Exit:
2410         hfs_unlock(cp);
2411         return (retval);
2412 }
2413
2414
2415 /*
2416  * Pagein for HFS filesystem
2417  */
2418 int
2419 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2420 /*
2421         struct vnop_pagein_args {
2422                 vnode_t a_vp,
2423                 upl_t         a_pl,
2424                 vm_offset_t   a_pl_offset,
2425                 off_t         a_f_offset,
2426                 size_t        a_size,
2427                 int           a_flags
2428                 vfs_context_t a_context;
2429         };
2430 */
2431 {
2432         vnode_t vp = ap->a_vp;
2433         int error;
2434
2435         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2436                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2437         /*
2438          * Keep track of blocks read.
2439          */
2440         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2441                 struct cnode *cp;
2442                 struct filefork *fp;
2443                 int bytesread;
2444                 int took_cnode_lock = 0;
2445
2446                 cp = VTOC(vp);
2447                 fp = VTOF(vp);
2448
2449                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2450                         bytesread = fp->ff_size;
2451                 else
2452                         bytesread = ap->a_size;
2453
2454                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2455                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2456                         hfs_lock(cp, HFS_FORCE_LOCK);
2457                         took_cnode_lock = 1;
2458                 }
2459                 /*
2460                  * If this file hasn't been seen since the start of
2461                  * the current sampling period then start over.
2462                  */
2463                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2464                         struct timeval tv;
2465
2466                         fp->ff_bytesread = bytesread;
2467                         microtime(&tv);
2468                         cp->c_atime = tv.tv_sec;
2469                 } else {
2470                         fp->ff_bytesread += bytesread;
2471                 }
2472                 cp->c_touch_acctime = TRUE;
2473                 if (took_cnode_lock)
2474                         hfs_unlock(cp);
2475         }
2476         return (error);
2477 }
2478
2479 /*
2480  * Pageout for HFS filesystem.
2481  */
2482 int
2483 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2484 /*
2485         struct vnop_pageout_args {
2486            vnode_t a_vp,
2487            upl_t         a_pl,
2488            vm_offset_t   a_pl_offset,
2489            off_t         a_f_offset,
2490            size_t        a_size,
2491            int           a_flags
2492            vfs_context_t a_context;
2493         };
2494 */
2495 {
2496         vnode_t vp = ap->a_vp;
2497         struct cnode *cp;
2498         struct filefork *fp;
2499         int retval;
2500         off_t end_of_range;
2501         off_t filesize;
2502
2503         cp = VTOC(vp);
2504         if (cp->c_lockowner == current_thread()) {
2505                 panic("pageout: %s cnode lock already held!\n",
2506                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2507         }
2508         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2509                 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2510                         ubc_upl_abort_range(ap->a_pl,
2511                                             ap->a_pl_offset,
2512                                             ap->a_size,
2513                                             UPL_ABORT_FREE_ON_EMPTY);
2514                 }
2515                 return (retval);
2516         }
2517         fp = VTOF(vp);
2518
2519         filesize = fp->ff_size;
2520         end_of_range = ap->a_f_offset + ap->a_size - 1;
2521
2522         if (end_of_range >= filesize) {
2523                 end_of_range = (off_t)(filesize - 1);
2524         }
2525         if (ap->a_f_offset < filesize) {
2526                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2527                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2528         }
2529         hfs_unlock(cp);
2530
2531         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2532                                  ap->a_size, filesize, ap->a_flags);
2533
2534         /*
2535          * If data was written, and setuid or setgid bits are set and
2536          * this process is not the superuser then clear the setuid and
2537          * setgid bits as a precaution against tampering.
2538          */
2539         if ((retval == 0) &&
2540             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2541             (vfs_context_suser(ap->a_context) != 0)) {
2542                 hfs_lock(cp, HFS_FORCE_LOCK);
2543                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2544                 cp->c_touch_chgtime = TRUE;
2545                 hfs_unlock(cp);
2546         }
2547         return (retval);
2548 }
2549
2550 /*
2551  * Intercept B-Tree node writes to unswap them if necessary.
2552  */
2553 int
2554 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2555 {
2556         int retval = 0;
2557         register struct buf *bp = ap->a_bp;
2558         register struct vnode *vp = buf_vnode(bp);
2559         BlockDescriptor block;
2560
2561         /* Trap B-Tree writes */
2562         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2563             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2564             (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
2565             (vp == VTOHFS(vp)->hfc_filevp)) {
2566
2567                 /*
2568                  * Swap and validate the node if it is in native byte order.
2569                  * This is always be true on big endian, so we always validate
2570                  * before writing here.  On little endian, the node typically has
2571                  * been swapped and validatated when it was written to the journal,
2572                  * so we won't do anything here.
2573                  */
2574                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2575                         /* Prepare the block pointer */
2576                         block.blockHeader = bp;
2577                         block.buffer = (char *)buf_dataptr(bp);
2578                         block.blockNum = buf_lblkno(bp);
2579                         /* not found in cache ==> came from disk */
2580                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2581                         block.blockSize = buf_count(bp);
2582
2583                         /* Endian un-swap B-Tree node */
2584                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2585                         if (retval)
2586                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2587                 }
2588         }
2589
2590         /* This buffer shouldn't be locked anymore but if it is clear it */
2591         if ((buf_flags(bp) & B_LOCKED)) {
2592                 // XXXdbg
2593                 if (VTOHFS(vp)->jnl) {
2594                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2595                 }
2596                 buf_clearflags(bp, B_LOCKED);
2597         }
2598         retval = vn_bwrite (ap);
2599
2600         return (retval);
2601 }
2602
2603 /*
2604  * Relocate a file to a new location on disk
2605  *  cnode must be locked on entry
2606  *
2607  * Relocation occurs by cloning the file's data from its
2608  * current set of blocks to a new set of blocks. During
2609  * the relocation all of the blocks (old and new) are
2610  * owned by the file.
2611  *
2612  * -----------------
2613  * |///////////////|
2614  * -----------------
2615  * 0               N (file offset)
2616  *
2617  * -----------------     -----------------
2618  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2619  * -----------------     -----------------
2620  * 0               N     N+1             2N
2621  *
2622  * -----------------     -----------------
2623  * |///////////////|     |///////////////|     STEP 2 (clone data)
2624  * -----------------     -----------------
2625  * 0               N     N+1             2N
2626  *
2627  *                       -----------------
2628  *                       |///////////////|     STEP 3 (head truncate blocks)
2629  *                       -----------------
2630  *                       0               N
2631  *
2632  * During steps 2 and 3 page-outs to file offsets less
2633  * than or equal to N are suspended.
2634  *
2635  * During step 3 page-ins to the file get supended.
2636  */
2637 __private_extern__
2638 int
2639 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2640         struct  proc *p)
2641 {
2642         struct  cnode *cp;
2643         struct  filefork *fp;
2644         struct  hfsmount *hfsmp;
2645         u_int32_t  headblks;
2646         u_int32_t  datablks;
2647         u_int32_t  blksize;
2648         u_int32_t  growsize;
2649         u_int32_t  nextallocsave;
2650         daddr64_t  sector_a,  sector_b;
2651         int disabled_caching = 0;
2652         int eflags;
2653         off_t  newbytes;
2654         int  retval;
2655         int lockflags = 0;
2656         int took_trunc_lock = 0;
2657         int started_tr = 0;
2658         enum vtype vnodetype;
2659
2660         vnodetype = vnode_vtype(vp);
2661         if (vnodetype != VREG && vnodetype != VLNK) {
2662                 return (EPERM);
2663         }
2664
2665         hfsmp = VTOHFS(vp);
2666         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2667                 return (ENOSPC);
2668         }
2669
2670         cp = VTOC(vp);
2671         fp = VTOF(vp);
2672         if (fp->ff_unallocblocks)
2673                 return (EINVAL);
2674         blksize = hfsmp->blockSize;
2675         if (blockHint == 0)
2676                 blockHint = hfsmp->nextAllocation;
2677
2678         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2679             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2680                 return (EFBIG);
2681         }
2682
2683         //
2684         // We do not believe that this call to hfs_fsync() is
2685         // necessary and it causes a journal transaction
2686         // deadlock so we are removing it.
2687         //
2688         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2689         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2690         //      if (retval)
2691         //              return (retval);
2692         //}
2693
2694         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2695                 hfs_unlock(cp);
2696                 hfs_lock_truncate(cp, TRUE);
2697                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2698                         hfs_unlock_truncate(cp);
2699                         return (retval);
2700                 }
2701                 took_trunc_lock = 1;
2702         }
2703         headblks = fp->ff_blocks;
2704         datablks = howmany(fp->ff_size, blksize);
2705         growsize = datablks * blksize;
2706         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2707         if (blockHint >= hfsmp->hfs_metazone_start &&
2708             blockHint <= hfsmp->hfs_metazone_end)
2709                 eflags |= kEFMetadataMask;
2710
2711         if (hfs_start_transaction(hfsmp) != 0) {
2712                 if (took_trunc_lock)
2713                         hfs_unlock_truncate(cp);
2714             return (EINVAL);
2715         }
2716         started_tr = 1;
2717         /*
2718          * Protect the extents b-tree and the allocation bitmap
2719          * during MapFileBlockC and ExtendFileC operations.
2720          */
2721         lockflags = SFL_BITMAP;
2722         if (overflow_extents(fp))
2723                 lockflags |= SFL_EXTENTS;
2724         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2725
2726         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2727         if (retval) {
2728                 retval = MacToVFSError(retval);
2729                 goto out;
2730         }
2731
2732         /*
2733          * STEP 1 - aquire new allocation blocks.
2734          */
2735         if (!vnode_isnocache(vp)) {
2736                 vnode_setnocache(vp);
2737                 disabled_caching = 1;
2738
2739         }
2740         nextallocsave = hfsmp->nextAllocation;
2741         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2742         if (eflags & kEFMetadataMask) {
2743                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2744                 hfsmp->nextAllocation = nextallocsave;
2745                 hfsmp->vcbFlags |= 0xFF00;
2746                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2747         }
2748
2749         retval = MacToVFSError(retval);
2750         if (retval == 0) {
2751                 cp->c_flag |= C_MODIFIED;
2752                 if (newbytes < growsize) {
2753                         retval = ENOSPC;
2754                         goto restore;
2755                 } else if (fp->ff_blocks < (headblks + datablks)) {
2756                         printf("hfs_relocate: allocation failed");
2757                         retval = ENOSPC;
2758                         goto restore;
2759                 }
2760
2761                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2762                 if (retval) {
2763                         retval = MacToVFSError(retval);
2764                 } else if ((sector_a + 1) == sector_b) {
2765                         retval = ENOSPC;
2766                         goto restore;
2767                 } else if ((eflags & kEFMetadataMask) &&
2768                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2769                               hfsmp->hfs_metazone_end)) {
2770                         printf("hfs_relocate: didn't move into metadata zone\n");
2771                         retval = ENOSPC;
2772                         goto restore;
2773                 }
2774         }
2775         /* Done with system locks and journal for now. */
2776         hfs_systemfile_unlock(hfsmp, lockflags);
2777         lockflags = 0;
2778         hfs_end_transaction(hfsmp);
2779         started_tr = 0;
2780
2781         if (retval) {
2782                 /*
2783                  * Check to see if failure is due to excessive fragmentation.
2784                  */
2785                 if ((retval == ENOSPC) &&
2786                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2787                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2788                 }
2789                 goto out;
2790         }
2791         /*
2792          * STEP 2 - clone file data into the new allocation blocks.
2793          */
2794
2795         if (vnodetype == VLNK)
2796                 retval = hfs_clonelink(vp, blksize, cred, p);
2797         else if (vnode_issystem(vp))
2798                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2799         else
2800                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2801
2802         /* Start transaction for step 3 or for a restore. */
2803         if (hfs_start_transaction(hfsmp) != 0) {
2804                 retval = EINVAL;
2805                 goto out;
2806         }
2807         started_tr = 1;
2808         if (retval)
2809                 goto restore;
2810
2811         /*
2812          * STEP 3 - switch to cloned data and remove old blocks.
2813          */
2814         lockflags = SFL_BITMAP;
2815         if (overflow_extents(fp))
2816                 lockflags |= SFL_EXTENTS;
2817         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2818
2819         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2820
2821         hfs_systemfile_unlock(hfsmp, lockflags);
2822         lockflags = 0;
2823         if (retval)
2824                 goto restore;
2825 out:
2826         if (took_trunc_lock)
2827                 hfs_unlock_truncate(cp);
2828
2829         if (lockflags) {
2830                 hfs_systemfile_unlock(hfsmp, lockflags);
2831                 lockflags = 0;
2832         }
2833
2834         /* Push cnode's new extent data to disk. */
2835         if (retval == 0) {
2836                 (void) hfs_update(vp, MNT_WAIT);
2837         }
2838
2839         if (hfsmp->jnl) {
2840                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2841                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2842                 else
2843                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2844         }
2845 exit:
2846         if (disabled_caching) {
2847                 vnode_clearnocache(vp);
2848         }
2849         if (started_tr)
2850                 hfs_end_transaction(hfsmp);
2851
2852         return (retval);
2853
2854 restore:
2855         if (fp->ff_blocks == headblks)
2856                 goto exit;
2857         /*
2858          * Give back any newly allocated space.
2859          */
2860         if (lockflags == 0) {
2861                 lockflags = SFL_BITMAP;
2862                 if (overflow_extents(fp))
2863                         lockflags |= SFL_EXTENTS;
2864                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2865         }
2866
2867         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2868
2869         hfs_systemfile_unlock(hfsmp, lockflags);
2870         lockflags = 0;
2871
2872         if (took_trunc_lock)
2873                 hfs_unlock_truncate(cp);
2874         goto exit;
2875 }
2876
2877
2878 /*
2879  * Clone a symlink.
2880  *
2881  */
2882 static int
2883 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2884 {
2885         struct buf *head_bp = NULL;
2886         struct buf *tail_bp = NULL;
2887         int error;
2888
2889
2890         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2891         if (error)
2892                 goto out;
2893
2894         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2895         if (tail_bp == NULL) {
2896                 error = EIO;
2897                 goto out;
2898         }
2899         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2900         error = (int)buf_bwrite(tail_bp);
2901 out:
2902         if (head_bp) {
2903                 buf_markinvalid(head_bp);
2904                 buf_brelse(head_bp);
2905         }
2906         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2907
2908         return (error);
2909 }
2910
2911 /*
2912  * Clone a file's data within the file.
2913  *
2914  */
2915 static int
2916 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2917 {
2918         caddr_t  bufp;
2919         size_t  writebase;
2920         size_t  bufsize;
2921         size_t  copysize;
2922         size_t  iosize;
2923         off_t   filesize;
2924         size_t  offset;
2925         uio_t auio;
2926         int  error = 0;
2927
2928         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2929         writebase = blkstart * blksize;
2930         copysize = blkcnt * blksize;
2931         iosize = bufsize = MIN(copysize, 128 * 1024);
2932         offset = 0;
2933
2934         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2935                 return (ENOMEM);
2936         }
2937         hfs_unlock(VTOC(vp));
2938
2939         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2940
2941         while (offset < copysize) {
2942                 iosize = MIN(copysize - offset, iosize);
2943
2944                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2945                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2946
2947                 error = cluster_read(vp, auio, copysize, 0);
2948                 if (error) {
2949                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2950                         break;
2951                 }
2952                 if (uio_resid(auio) != 0) {
2953                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2954                         error = EIO;
2955                         break;
2956                 }
2957
2958                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2959                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2960
2961                 error = cluster_write(vp, auio, filesize + offset,
2962                                       filesize + offset + iosize,
2963                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2964                 if (error) {
2965                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2966                         break;
2967                 }
2968                 if (uio_resid(auio) != 0) {
2969                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2970                         error = EIO;
2971                         break;
2972                 }
2973                 offset += iosize;
2974         }
2975         uio_free(auio);
2976
2977         /*
2978          * No need to call ubc_sync_range or hfs_invalbuf
2979          * since the file was copied using IO_NOCACHE.
2980          */
2981
2982         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2983
2984         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2985         return (error);
2986 }
2987
2988 /*
2989  * Clone a system (metadata) file.
2990  *
2991  */
2992 static int
2993 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2994                  kauth_cred_t cred, struct proc *p)
2995 {
2996         caddr_t  bufp;
2997         char * offset;
2998         size_t  bufsize;
2999         size_t  iosize;
3000         struct buf *bp = NULL;
3001         daddr64_t  blkno;
3002         daddr64_t  blk;
3003         daddr64_t  start_blk;
3004         daddr64_t  last_blk;
3005         int  breadcnt;
3006         int  i;
3007         int  error = 0;
3008
3009
3010         iosize = GetLogicalBlockSize(vp);
3011         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3012         breadcnt = bufsize / iosize;
3013
3014         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3015                 return (ENOMEM);
3016         }
3017         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3018         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3019         blkno = 0;
3020
3021         while (blkno < last_blk) {
3022                 /*
3023                  * Read up to a megabyte
3024                  */
3025                 offset = bufp;
3026                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3027                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3028                         if (error) {
3029                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3030                                 goto out;
3031                         }
3032                         if (buf_count(bp) != iosize) {
3033                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3034                                 goto out;
3035                         }
3036                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3037
3038                         buf_markinvalid(bp);
3039                         buf_brelse(bp);
3040                         bp = NULL;
3041
3042                         offset += iosize;
3043                 }
3044
3045                 /*
3046                  * Write up to a megabyte
3047                  */
3048                 offset = bufp;
3049                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3050                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3051                         if (bp == NULL) {
3052                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3053                                 error = EIO;
3054                                 goto out;
3055                         }
3056                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3057                         error = (int)buf_bwrite(bp);
3058                         bp = NULL;
3059                         if (error)
3060                                 goto out;
3061                         offset += iosize;
3062                 }
3063         }
3064 out:
3065         if (bp) {
3066                 buf_brelse(bp);
3067         }
3068
3069         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3070
3071         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3072
3073         return (error);
3074 }