bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/filedesc.h>
  36 #include <sys/stat.h>
  37 #include <sys/buf.h>
  38 #include <sys/proc.h>
  39 #include <sys/kauth.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/vfs_context.h>
  43
  44 #include <miscfs/specfs/specdev.h>
  45
  46 #include <sys/ubc.h>
  47 #include <vm/vm_pageout.h>
  48 #include <vm/vm_kern.h>
  49
  50 #include <sys/kdebug.h>
  51
  52 #include        "hfs.h"
  53 #include        "hfs_endian.h"
  54 #include  "hfs_fsctl.h"
  55 #include        "hfs_quota.h"
  56 #include        "hfscommon/headers/FileMgrInternal.h"
  57 #include        "hfscommon/headers/BTreesInternal.h"
  58 #include        "hfs_cnode.h"
  59 #include        "hfs_dbg.h"
  60
  61 extern int overflow_extents(struct filefork *fp);
  62
  63 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  64
  65 enum {
  66         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  67 };
  68
  69 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  70
  71 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  72
  73
  74 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  75 static int  hfs_clonefile(struct vnode *, int, int, int);
  76 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  77
  78
  79 /*****************************************************************************
  80 *
  81 *       I/O Operations on vnodes
  82 *
  83 *****************************************************************************/
  84 int  hfs_vnop_read(struct vnop_read_args *);
  85 int  hfs_vnop_write(struct vnop_write_args *);
  86 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  87 int  hfs_vnop_select(struct vnop_select_args *);
  88 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  89 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  90 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  91 int  hfs_vnop_strategy(struct vnop_strategy_args *);
  92 int  hfs_vnop_allocate(struct vnop_allocate_args *);
  93 int  hfs_vnop_pagein(struct vnop_pagein_args *);
  94 int  hfs_vnop_pageout(struct vnop_pageout_args *);
  95 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
  96
  97
  98 /*
  99  * Read data from a file.
 100  */
 101 int
 102 hfs_vnop_read(struct vnop_read_args *ap)
 103 {
 104         uio_t uio = ap->a_uio;
 105         struct vnode *vp = ap->a_vp;
 106         struct cnode *cp;
 107         struct filefork *fp;
 108         struct hfsmount *hfsmp;
 109         off_t filesize;
 110         off_t filebytes;
 111         off_t start_resid = uio_resid(uio);
 112         off_t offset = uio_offset(uio);
 113         int retval = 0;
 114
 115
 116         /* Preflight checks */
 117         if (!vnode_isreg(vp)) {
 118                 /* can only read regular files */
 119                 if (vnode_isdir(vp))
 120                         return (EISDIR);
 121                 else
 122                         return (EPERM);
 123         }
 124         if (start_resid == 0)
 125                 return (0);             /* Nothing left to do */
 126         if (offset < 0)
 127                 return (EINVAL);        /* cant read from a negative offset */
 128
 129         cp = VTOC(vp);
 130         fp = VTOF(vp);
 131         hfsmp = VTOHFS(vp);
 132
 133         /* Protect against a size change. */
 134         hfs_lock_truncate(cp, 0);
 135
 136         filesize = fp->ff_size;
 137         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 138         if (offset > filesize) {
 139                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 140                     (offset > (off_t)MAXHFSFILESIZE)) {
 141                         retval = EFBIG;
 142                 }
 143                 goto exit;
 144         }
 145
 146         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 147                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 148
 149         retval = cluster_read(vp, uio, filesize, 0);
 150
 151         cp->c_touch_acctime = TRUE;
 152
 153         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 154                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 155
 156         /*
 157          * Keep track blocks read
 158          */
 159         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 160                 int took_cnode_lock = 0;
 161                 off_t bytesread;
 162
 163                 bytesread = start_resid - uio_resid(uio);
 164
 165                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 166                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 167                         hfs_lock(cp, HFS_FORCE_LOCK);
 168                         took_cnode_lock = 1;
 169                 }
 170                 /*
 171                  * If this file hasn't been seen since the start of
 172                  * the current sampling period then start over.
 173                  */
 174                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 175                         struct timeval tv;
 176
 177                         fp->ff_bytesread = bytesread;
 178                         microtime(&tv);
 179                         cp->c_atime = tv.tv_sec;
 180                 } else {
 181                         fp->ff_bytesread += bytesread;
 182                 }
 183                 if (took_cnode_lock)
 184                         hfs_unlock(cp);
 185         }
 186 exit:
 187         hfs_unlock_truncate(cp);
 188         return (retval);
 189 }
 190
 191 /*
 192  * Write data to a file.
 193  */
 194 int
 195 hfs_vnop_write(struct vnop_write_args *ap)
 196 {
 197         uio_t uio = ap->a_uio;
 198         struct vnode *vp = ap->a_vp;
 199         struct cnode *cp;
 200         struct filefork *fp;
 201         struct hfsmount *hfsmp;
 202         kauth_cred_t cred = NULL;
 203         off_t origFileSize;
 204         off_t writelimit;
 205         off_t bytesToAdd;
 206         off_t actualBytesAdded;
 207         off_t filebytes;
 208         off_t offset;
 209         size_t resid;
 210         int eflags;
 211         int ioflag = ap->a_ioflag;
 212         int retval = 0;
 213         int lockflags;
 214         int cnode_locked = 0;
 215
 216         // LP64todo - fix this! uio_resid may be 64-bit value
 217         resid = uio_resid(uio);
 218         offset = uio_offset(uio);
 219
 220         if (offset < 0)
 221                 return (EINVAL);
 222         if (resid == 0)
 223                 return (E_NONE);
 224         if (!vnode_isreg(vp))
 225                 return (EPERM);  /* Can only write regular files */
 226
 227         /* Protect against a size change. */
 228         hfs_lock_truncate(VTOC(vp), TRUE);
 229
 230         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 231                 hfs_unlock_truncate(VTOC(vp));
 232                 return (retval);
 233         }
 234         cnode_locked = 1;
 235         cp = VTOC(vp);
 236         fp = VTOF(vp);
 237         hfsmp = VTOHFS(vp);
 238         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 239
 240         if (ioflag & IO_APPEND) {
 241                 uio_setoffset(uio, fp->ff_size);
 242                 offset = fp->ff_size;
 243         }
 244         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 245                 retval = EPERM;
 246                 goto exit;
 247         }
 248
 249         origFileSize = fp->ff_size;
 250         eflags = kEFDeferMask;  /* defer file block allocations */
 251
 252 #ifdef HFS_SPARSE_DEV
 253         /*
 254          * When the underlying device is sparse and space
 255          * is low (< 8MB), stop doing delayed allocations
 256          * and begin doing synchronous I/O.
 257          */
 258         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 259             (hfs_freeblks(hfsmp, 0) < 2048)) {
 260                 eflags &= ~kEFDeferMask;
 261                 ioflag |= IO_SYNC;
 262         }
 263 #endif /* HFS_SPARSE_DEV */
 264
 265         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 266                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 267
 268         /* Now test if we need to extend the file */
 269         /* Doing so will adjust the filebytes for us */
 270
 271         writelimit = offset + resid;
 272         if (writelimit <= filebytes)
 273                 goto sizeok;
 274
 275         cred = vfs_context_ucred(ap->a_context);
 276 #if QUOTA
 277         bytesToAdd = writelimit - filebytes;
 278         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 279                            cred, 0);
 280         if (retval)
 281                 goto exit;
 282 #endif /* QUOTA */
 283
 284         if (hfs_start_transaction(hfsmp) != 0) {
 285                 retval = EINVAL;
 286                 goto exit;
 287         }
 288
 289         while (writelimit > filebytes) {
 290                 bytesToAdd = writelimit - filebytes;
 291                 if (cred && suser(cred, NULL) != 0)
 292                         eflags |= kEFReserveMask;
 293
 294                 /* Protect extents b-tree and allocation bitmap */
 295                 lockflags = SFL_BITMAP;
 296                 if (overflow_extents(fp))
 297                         lockflags |= SFL_EXTENTS;
 298                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 299
 300                 /* Files that are changing size are not hot file candidates. */
 301                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 302                         fp->ff_bytesread = 0;
 303                 }
 304                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 305                                 0, eflags, &actualBytesAdded));
 306
 307                 hfs_systemfile_unlock(hfsmp, lockflags);
 308
 309                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 310                         retval = ENOSPC;
 311                 if (retval != E_NONE)
 312                         break;
 313                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 314                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 315                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 316         }
 317         (void) hfs_update(vp, TRUE);
 318         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 319         (void) hfs_end_transaction(hfsmp);
 320
 321 sizeok:
 322         if (retval == E_NONE) {
 323                 off_t filesize;
 324                 off_t zero_off;
 325                 off_t tail_off;
 326                 off_t inval_start;
 327                 off_t inval_end;
 328                 off_t io_start;
 329                 int lflag;
 330                 struct rl_entry *invalid_range;
 331
 332                 if (writelimit > fp->ff_size)
 333                         filesize = writelimit;
 334                 else
 335                         filesize = fp->ff_size;
 336
 337                 lflag = (ioflag & IO_SYNC);
 338
 339                 if (offset <= fp->ff_size) {
 340                         zero_off = offset & ~PAGE_MASK_64;
 341
 342                         /* Check to see whether the area between the zero_offset and the start
 343                            of the transfer to see whether is invalid and should be zero-filled
 344                            as part of the transfer:
 345                          */
 346                         if (offset > zero_off) {
 347                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 348                                         lflag |= IO_HEADZEROFILL;
 349                         }
 350                 } else {
 351                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 352
 353                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 354                            read without being zeroed.  The current last block is filled with zeroes
 355                            if it holds valid data but in all cases merely do a little bookkeeping
 356                            to track the area from the end of the current last page to the start of
 357                            the area actually written.  For the same reason only the bytes up to the
 358                            start of the page where this write will start is invalidated; any remainder
 359                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 360
 361                            Note that inval_start, the start of the page after the current EOF,
 362                            may be past the start of the write, in which case the zeroing
 363                            will be handled by the cluser_write of the actual data.
 364                          */
 365                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 366                         inval_end = offset & ~PAGE_MASK_64;
 367                         zero_off = fp->ff_size;
 368
 369                         if ((fp->ff_size & PAGE_MASK_64) &&
 370                                 (rl_scan(&fp->ff_invalidranges,
 371                                                         eof_page_base,
 372                                                         fp->ff_size - 1,
 373                                                         &invalid_range) != RL_NOOVERLAP)) {
 374                                 /* The page containing the EOF is not valid, so the
 375                                    entire page must be made inaccessible now.  If the write
 376                                    starts on a page beyond the page containing the eof
 377                                    (inval_end > eof_page_base), add the
 378                                    whole page to the range to be invalidated.  Otherwise
 379                                    (i.e. if the write starts on the same page), zero-fill
 380                                    the entire page explicitly now:
 381                                  */
 382                                 if (inval_end > eof_page_base) {
 383                                         inval_start = eof_page_base;
 384                                 } else {
 385                                         zero_off = eof_page_base;
 386                                 };
 387                         };
 388
 389                         if (inval_start < inval_end) {
 390                                 struct timeval tv;
 391                                 /* There's some range of data that's going to be marked invalid */
 392
 393                                 if (zero_off < inval_start) {
 394                                         /* The pages between inval_start and inval_end are going to be invalidated,
 395                                            and the actual write will start on a page past inval_end.  Now's the last
 396                                            chance to zero-fill the page containing the EOF:
 397                                          */
 398                                         hfs_unlock(cp);
 399                                         cnode_locked = 0;
 400                                         retval = cluster_write(vp, (uio_t) 0,
 401                                                         fp->ff_size, inval_start,
 402                                                         zero_off, (off_t)0,
 403                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 404                                         hfs_lock(cp, HFS_FORCE_LOCK);
 405                                         cnode_locked = 1;
 406                                         if (retval) goto ioerr_exit;
 407                                         offset = uio_offset(uio);
 408                                 };
 409
 410                                 /* Mark the remaining area of the newly allocated space as invalid: */
 411                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 412                                 microuptime(&tv);
 413                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 414                                 zero_off = fp->ff_size = inval_end;
 415                         };
 416
 417                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 418                 };
 419
 420                 /* Check to see whether the area between the end of the write and the end of
 421                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 422                  */
 423                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 424                 if (tail_off > filesize) tail_off = filesize;
 425                 if (tail_off > writelimit) {
 426                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 427                                 lflag |= IO_TAILZEROFILL;
 428                         };
 429                 };
 430
 431                 /*
 432                  * if the write starts beyond the current EOF (possibly advanced in the
 433                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 434                  * to where the write begins:
 435                  *
 436                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 437                  *       before the current EOF it might be marked as invalid now and must be
 438                  *       made readable (removed from the invalid ranges) before cluster_write
 439                  *       tries to write it:
 440                  */
 441                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 442                 if (io_start < fp->ff_size) {
 443                         off_t io_end;
 444
 445                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 446                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 447                 };
 448
 449                 hfs_unlock(cp);
 450                 cnode_locked = 0;
 451                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 452                                 tail_off, lflag | IO_NOZERODIRTY);
 453                 offset = uio_offset(uio);
 454                 if (offset > fp->ff_size) {
 455                         fp->ff_size = offset;
 456
 457                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 458                         /* Files that are changing size are not hot file candidates. */
 459                         if (hfsmp->hfc_stage == HFC_RECORDING)
 460                                 fp->ff_bytesread = 0;
 461                 }
 462                 if (resid > uio_resid(uio)) {
 463                         cp->c_touch_chgtime = TRUE;
 464                         cp->c_touch_modtime = TRUE;
 465                 }
 466         }
 467         HFS_KNOTE(vp, NOTE_WRITE);
 468
 469 ioerr_exit:
 470         /*
 471          * If we successfully wrote any data, and we are not the superuser
 472          * we clear the setuid and setgid bits as a precaution against
 473          * tampering.
 474          */
 475         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 476                 cred = vfs_context_ucred(ap->a_context);
 477                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 478                         if (!cnode_locked) {
 479                                 hfs_lock(cp, HFS_FORCE_LOCK);
 480                                 cnode_locked = 1;
 481                         }
 482                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 483                 }
 484         }
 485         if (retval) {
 486                 if (ioflag & IO_UNIT) {
 487                         if (!cnode_locked) {
 488                                 hfs_lock(cp, HFS_FORCE_LOCK);
 489                                 cnode_locked = 1;
 490                         }
 491                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 492                                            0, ap->a_context);
 493                         // LP64todo - fix this!  resid needs to by user_ssize_t
 494                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 495                         uio_setresid(uio, resid);
 496                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 497                 }
 498         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 499                 if (!cnode_locked) {
 500                         hfs_lock(cp, HFS_FORCE_LOCK);
 501                         cnode_locked = 1;
 502                 }
 503                 retval = hfs_update(vp, TRUE);
 504         }
 505         /* Updating vcbWrCnt doesn't need to be atomic. */
 506         hfsmp->vcbWrCnt++;
 507
 508         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 509                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 510 exit:
 511         if (cnode_locked)
 512                 hfs_unlock(cp);
 513         hfs_unlock_truncate(cp);
 514         return (retval);
 515 }
 516
 517 /* support for the "bulk-access" fcntl */
 518
 519 #define CACHE_ELEMS 64
 520 #define CACHE_LEVELS 16
 521 #define PARENT_IDS_FLAG 0x100
 522
 523 /* from hfs_attrlist.c */
 524 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 525                         mode_t obj_mode, struct mount *mp,
 526                         kauth_cred_t cred, struct proc *p);
 527
 528 /* from vfs/vfs_fsevents.c */
 529 extern char *get_pathbuff(void);
 530 extern void release_pathbuff(char *buff);
 531
 532 struct access_cache {
 533        int numcached;
 534        int cachehits; /* these two for statistics gathering */
 535        int lookups;
 536        unsigned int *acache;
 537        Boolean *haveaccess;
 538 };
 539
 540 struct access_t {
 541         uid_t     uid;              /* IN: effective user id */
 542         short     flags;            /* IN: access requested (i.e. R_OK) */
 543         short     num_groups;       /* IN: number of groups user belongs to */
 544         int       num_files;        /* IN: number of files to process */
 545         int       *file_ids;        /* IN: array of file ids */
 546         gid_t     *groups;          /* IN: array of groups */
 547         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 548 };
 549
 550 struct user_access_t {
 551         uid_t           uid;                    /* IN: effective user id */
 552         short           flags;                  /* IN: access requested (i.e. R_OK) */
 553         short           num_groups;             /* IN: number of groups user belongs to */
 554         int                     num_files;              /* IN: number of files to process */
 555         user_addr_t     file_ids;               /* IN: array of file ids */
 556         user_addr_t     groups;                 /* IN: array of groups */
 557         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 558 };
 559
 560 /*
 561  * Perform a binary search for the given parent_id. Return value is
 562  * found/not found boolean, and indexp will be the index of the item
 563  * or the index at which to insert the item if it's not found.
 564  */
 565 static int
 566 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 567 {
 568         unsigned int lo, hi;
 569         int index, matches = 0;
 570
 571         if (cache->numcached == 0) {
 572                 *indexp = 0;
 573                 return 0; // table is empty, so insert at index=0 and report no match
 574         }
 575
 576         if (cache->numcached > CACHE_ELEMS) {
 577                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 578                   cache->numcached, CACHE_ELEMS);*/
 579                 cache->numcached = CACHE_ELEMS;
 580         }
 581
 582         lo = 0;
 583         hi = cache->numcached - 1;
 584         index = -1;
 585
 586         /* perform binary search for parent_id */
 587         do {
 588                 unsigned int mid = (hi - lo)/2 + lo;
 589                 unsigned int this_id = cache->acache[mid];
 590
 591                 if (parent_id == this_id) {
 592                         index = mid;
 593                         break;
 594                 }
 595
 596                 if (parent_id < this_id) {
 597                         hi = mid;
 598                         continue;
 599                 }
 600
 601                 if (parent_id > this_id) {
 602                         lo = mid + 1;
 603                         continue;
 604                 }
 605         } while(lo < hi);
 606
 607         /* check if lo and hi converged on the match */
 608         if (parent_id == cache->acache[hi]) {
 609                 index = hi;
 610         }
 611
 612         /* if no existing entry found, find index for new one */
 613         if (index == -1) {
 614                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 615                 matches = 0;
 616         } else {
 617                 matches = 1;
 618         }
 619
 620         *indexp = index;
 621         return matches;
 622 }
 623
 624 /*
 625  * Add a node to the access_cache at the given index (or do a lookup first
 626  * to find the index if -1 is passed in). We currently do a replace rather
 627  * than an insert if the cache is full.
 628  */
 629 static void
 630 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 631 {
 632        int lookup_index = -1;
 633
 634        /* need to do a lookup first if -1 passed for index */
 635        if (index == -1) {
 636                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 637                        if (cache->haveaccess[lookup_index] != access) {
 638                                /* change access info for existing entry... should never happen */
 639                                cache->haveaccess[lookup_index] = access;
 640                        }
 641
 642                        /* mission accomplished */
 643                        return;
 644                } else {
 645                        index = lookup_index;
 646                }
 647
 648        }
 649
 650        /* if the cache is full, do a replace rather than an insert */
 651        if (cache->numcached >= CACHE_ELEMS) {
 652                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 653                cache->numcached = CACHE_ELEMS-1;
 654
 655                if (index > cache->numcached) {
 656                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 657                        index = cache->numcached;
 658                }
 659        } else if (index >= 0 && index < cache->numcached) {
 660                /* only do bcopy if we're inserting */
 661                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 662                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 663        }
 664
 665        cache->acache[index] = nodeID;
 666        cache->haveaccess[index] = access;
 667        cache->numcached++;
 668 }
 669
 670
 671 struct cinfo {
 672         uid_t   uid;
 673         gid_t   gid;
 674         mode_t  mode;
 675         cnid_t  parentcnid;
 676 };
 677
 678 static int
 679 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 680 {
 681         struct cinfo *cip = (struct cinfo *)arg;
 682
 683         cip->uid = attrp->ca_uid;
 684         cip->gid = attrp->ca_gid;
 685         cip->mode = attrp->ca_mode;
 686         cip->parentcnid = descp->cd_parentcnid;
 687
 688         return (0);
 689 }
 690
 691 /*
 692  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 693  * isn't incore, then go to the catalog.
 694  */
 695 static int
 696 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 697                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 698 {
 699         int error = 0;
 700
 701         /* if this id matches the one the fsctl was called with, skip the lookup */
 702         if (cnid == skip_cp->c_cnid) {
 703                 cnattrp->ca_uid = skip_cp->c_uid;
 704                 cnattrp->ca_gid = skip_cp->c_gid;
 705                 cnattrp->ca_mode = skip_cp->c_mode;
 706                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 707         } else {
 708                 struct cinfo c_info;
 709
 710                 /* otherwise, check the cnode hash incase the file/dir is incore */
 711                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 712                         cnattrp->ca_uid = c_info.uid;
 713                         cnattrp->ca_gid = c_info.gid;
 714                         cnattrp->ca_mode = c_info.mode;
 715                         keyp->hfsPlus.parentID = c_info.parentcnid;
 716                 } else {
 717                         int lockflags;
 718
 719                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 720
 721                         /* lookup this cnid in the catalog */
 722                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 723
 724                         hfs_systemfile_unlock(hfsmp, lockflags);
 725
 726                         cache->lookups++;
 727                 }
 728         }
 729
 730         return (error);
 731 }
 732
 733 /*
 734  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 735  * up to CACHE_LEVELS as we progress towards the root.
 736  */
 737 static int
 738 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 739                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 740 {
 741        int                     myErr = 0;
 742        int                     myResult;
 743        HFSCatalogNodeID        thisNodeID;
 744        unsigned long           myPerms;
 745        struct cat_attr         cnattr;
 746        int                     cache_index = -1;
 747        CatalogKey              catkey;
 748
 749        int i = 0, ids_to_cache = 0;
 750        int parent_ids[CACHE_LEVELS];
 751
 752        /* root always has access */
 753        if (!suser(myp_ucred, NULL)) {
 754                return (1);
 755        }
 756
 757        thisNodeID = nodeID;
 758        while (thisNodeID >=  kRootDirID) {
 759                myResult = 0;   /* default to "no access" */
 760
 761                /* check the cache before resorting to hitting the catalog */
 762
 763                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 764                 * to look any further after hitting cached dir */
 765
 766                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 767                        cache->cachehits++;
 768                        myResult = cache->haveaccess[cache_index];
 769                        goto ExitThisRoutine;
 770                }
 771
 772                /* remember which parents we want to cache */
 773                if (ids_to_cache < CACHE_LEVELS) {
 774                        parent_ids[ids_to_cache] = thisNodeID;
 775                        ids_to_cache++;
 776                }
 777
 778                /* do the lookup (checks the cnode hash, then the catalog) */
 779                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 780                if (myErr) {
 781                        goto ExitThisRoutine; /* no access */
 782                }
 783
 784                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 785                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 786                                                  myp_ucred, theProcPtr);
 787
 788                if ( (myPerms & X_OK) == 0 ) {
 789                        myResult = 0;
 790                        goto ExitThisRoutine;   /* no access */
 791                }
 792
 793                /* up the hierarchy we go */
 794                thisNodeID = catkey.hfsPlus.parentID;
 795        }
 796
 797        /* if here, we have access to this node */
 798        myResult = 1;
 799
 800  ExitThisRoutine:
 801        if (myErr) {
 802                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 803                myResult = 0;
 804        }
 805        *err = myErr;
 806
 807        /* cache the parent directory(ies) */
 808        for (i = 0; i < ids_to_cache; i++) {
 809                /* small optimization: get rid of double-lookup for all these */
 810                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 811                add_node(cache, -1, parent_ids[i], myResult);
 812        }
 813
 814        return (myResult);
 815 }
 816 /* end "bulk-access" support */
 817
 818
 819
 820 /*
 821  * Callback for use with freeze ioctl.
 822  */
 823 static int
 824 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 825 {
 826         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 827
 828         return 0;
 829 }
 830
 831 /*
 832  * Control filesystem operating characteristics.
 833  */
 834 int
 835 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 836                 vnode_t a_vp;
 837                 int  a_command;
 838                 caddr_t  a_data;
 839                 int  a_fflag;
 840                 vfs_context_t a_context;
 841         } */ *ap)
 842 {
 843         struct vnode * vp = ap->a_vp;
 844         struct hfsmount *hfsmp = VTOHFS(vp);
 845         vfs_context_t context = ap->a_context;
 846         kauth_cred_t cred = vfs_context_ucred(context);
 847         proc_t p = vfs_context_proc(context);
 848         struct vfsstatfs *vfsp;
 849         boolean_t is64bit;
 850
 851         is64bit = proc_is64bit(p);
 852
 853         switch (ap->a_command) {
 854
 855         case HFS_RESIZE_VOLUME: {
 856                 u_int64_t newsize;
 857                 u_int64_t cursize;
 858
 859                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 860                 if (suser(cred, NULL) &&
 861                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 862                         return (EACCES); /* must be owner of file system */
 863                 }
 864                 if (!vnode_isvroot(vp)) {
 865                         return (EINVAL);
 866                 }
 867                 newsize = *(u_int64_t *)ap->a_data;
 868                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 869
 870                 if (newsize > cursize) {
 871                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 872                 } else if (newsize < cursize) {
 873                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 874                 } else {
 875                         return (0);
 876                 }
 877         }
 878         case HFS_CHANGE_NEXT_ALLOCATION: {
 879                 u_int32_t location;
 880
 881                 if (vnode_vfsisrdonly(vp)) {
 882                         return (EROFS);
 883                 }
 884                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 885                 if (suser(cred, NULL) &&
 886                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 887                         return (EACCES); /* must be owner of file system */
 888                 }
 889                 if (!vnode_isvroot(vp)) {
 890                         return (EINVAL);
 891                 }
 892                 location = *(u_int32_t *)ap->a_data;
 893                 if (location > hfsmp->totalBlocks - 1) {
 894                         return (EINVAL);
 895                 }
 896                 /* Return previous value. */
 897                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 898                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 899                 hfsmp->nextAllocation = location;
 900                 hfsmp->vcbFlags |= 0xFF00;
 901                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 902                 return (0);
 903         }
 904
 905 #ifdef HFS_SPARSE_DEV
 906         case HFS_SETBACKINGSTOREINFO: {
 907                 struct vnode * bsfs_rootvp;
 908                 struct vnode * di_vp;
 909                 struct hfs_backingstoreinfo *bsdata;
 910                 int error = 0;
 911
 912                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 913                         return (EALREADY);
 914                 }
 915                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 916                 if (suser(cred, NULL) &&
 917                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 918                         return (EACCES); /* must be owner of file system */
 919                 }
 920                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 921                 if (bsdata == NULL) {
 922                         return (EINVAL);
 923                 }
 924                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 925                         return (error);
 926                 }
 927                 if ((error = vnode_getwithref(di_vp))) {
 928                         file_drop(bsdata->backingfd);
 929                         return(error);
 930                 }
 931
 932                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 933                         (void)vnode_put(di_vp);
 934                         file_drop(bsdata->backingfd);
 935                         return (EINVAL);
 936                 }
 937
 938                 /*
 939                  * Obtain the backing fs root vnode and keep a reference
 940                  * on it.  This reference will be dropped in hfs_unmount.
 941                  */
 942                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 943                 if (error) {
 944                         (void)vnode_put(di_vp);
 945                         file_drop(bsdata->backingfd);
 946                         return (error);
 947                 }
 948                 vnode_ref(bsfs_rootvp);
 949                 vnode_put(bsfs_rootvp);
 950
 951                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 952                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 953                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 954                 hfsmp->hfs_sparsebandblks *= 4;
 955
 956                 (void)vnode_put(di_vp);
 957                 file_drop(bsdata->backingfd);
 958                 return (0);
 959         }
 960         case HFS_CLRBACKINGSTOREINFO: {
 961                 struct vnode * tmpvp;
 962
 963                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 964                 if (suser(cred, NULL) &&
 965                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 966                         return (EACCES); /* must be owner of file system */
 967                 }
 968                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 969                     hfsmp->hfs_backingfs_rootvp) {
 970
 971                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 972                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 973                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 974                         hfsmp->hfs_sparsebandblks = 0;
 975                         vnode_rele(tmpvp);
 976                 }
 977                 return (0);
 978         }
 979 #endif /* HFS_SPARSE_DEV */
 980
 981         case F_FREEZE_FS: {
 982                 struct mount *mp;
 983                 task_t task;
 984
 985                 if (!is_suser())
 986                         return (EACCES);
 987
 988                 mp = vnode_mount(vp);
 989                 hfsmp = VFSTOHFS(mp);
 990
 991                 if (!(hfsmp->jnl))
 992                         return (ENOTSUP);
 993
 994                 task = current_task();
 995                 task_working_set_disable(task);
 996
 997                 // flush things before we get started to try and prevent
 998                 // dirty data from being paged out while we're frozen.
 999                 // note: can't do this after taking the lock as it will
1000                 // deadlock against ourselves.
1001                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1002                 hfs_global_exclusive_lock_acquire(hfsmp);
1003                 journal_flush(hfsmp->jnl);
1004                 // don't need to iterate on all vnodes, we just need to
1005                 // wait for writes to the system files and the device vnode
1006                 // vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1007                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1008                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1009                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1010                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1011                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1012                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1013                 if (hfsmp->hfs_attribute_vp)
1014                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1015                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1016
1017                 hfsmp->hfs_freezing_proc = current_proc();
1018
1019                 return (0);
1020         }
1021
1022         case F_THAW_FS: {
1023                 if (!is_suser())
1024                         return (EACCES);
1025
1026                 // if we're not the one who froze the fs then we
1027                 // can't thaw it.
1028                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1029                     return EINVAL;
1030                 }
1031
1032                 // NOTE: if you add code here, also go check the
1033                 //       code that "thaws" the fs in hfs_vnop_close()
1034                 //
1035                 hfsmp->hfs_freezing_proc = NULL;
1036                 hfs_global_exclusive_lock_release(hfsmp);
1037
1038                 return (0);
1039         }
1040
1041 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1042 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1043
1044         case HFS_BULKACCESS_FSCTL:
1045         case HFS_BULKACCESS: {
1046                 /*
1047                  * NOTE: on entry, the vnode is locked. Incase this vnode
1048                  * happens to be in our list of file_ids, we'll note it
1049                  * avoid calling hfs_chashget_nowait() on that id as that
1050                  * will cause a "locking against myself" panic.
1051                  */
1052                 Boolean check_leaf = true;
1053
1054                 struct user_access_t *user_access_structp;
1055                 struct user_access_t tmp_user_access_t;
1056                 struct access_cache cache;
1057
1058                 int error = 0, i;
1059
1060                 dev_t dev = VTOC(vp)->c_dev;
1061
1062                 short flags;
1063                 struct ucred myucred;   /* XXX ILLEGAL */
1064                 int num_files;
1065                 int *file_ids = NULL;
1066                 short *access = NULL;
1067
1068                 cnid_t cnid;
1069                 cnid_t prevParent_cnid = 0;
1070                 unsigned long myPerms;
1071                 short myaccess = 0;
1072                 struct cat_attr cnattr;
1073                 CatalogKey catkey;
1074                 struct cnode *skip_cp = VTOC(vp);
1075                 struct vfs_context      my_context;
1076
1077                 /* first, return error if not run as root */
1078                 if (cred->cr_ruid != 0) {
1079                         return EPERM;
1080                 }
1081
1082                 /* initialize the local cache and buffers */
1083                 cache.numcached = 0;
1084                 cache.cachehits = 0;
1085                 cache.lookups = 0;
1086
1087                 file_ids = (int *) get_pathbuff();
1088                 access = (short *) get_pathbuff();
1089                 cache.acache = (int *) get_pathbuff();
1090                 cache.haveaccess = (Boolean *) get_pathbuff();
1091
1092                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1093                         release_pathbuff((char *) file_ids);
1094                         release_pathbuff((char *) access);
1095                         release_pathbuff((char *) cache.acache);
1096                         release_pathbuff((char *) cache.haveaccess);
1097
1098                         return ENOMEM;
1099                 }
1100
1101                 /* struct copyin done during dispatch... need to copy file_id array separately */
1102                 if (ap->a_data == NULL) {
1103                         error = EINVAL;
1104                         goto err_exit_bulk_access;
1105                 }
1106
1107                 if (is64bit) {
1108                         user_access_structp = (struct user_access_t *)ap->a_data;
1109                 }
1110                 else {
1111                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1112                         tmp_user_access_t.uid = accessp->uid;
1113                         tmp_user_access_t.flags = accessp->flags;
1114                         tmp_user_access_t.num_groups = accessp->num_groups;
1115                         tmp_user_access_t.num_files = accessp->num_files;
1116                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1117                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1118                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1119                         user_access_structp = &tmp_user_access_t;
1120                 }
1121
1122                 num_files = user_access_structp->num_files;
1123                 if (num_files < 1) {
1124                         goto err_exit_bulk_access;
1125                 }
1126                 if (num_files > 256) {
1127                         error = EINVAL;
1128                         goto err_exit_bulk_access;
1129                 }
1130
1131                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1132                                                         num_files * sizeof(int)))) {
1133                         goto err_exit_bulk_access;
1134                 }
1135
1136                 /* fill in the ucred structure */
1137                 flags = user_access_structp->flags;
1138                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1139                         flags = R_OK;
1140                 }
1141
1142                 /* check if we've been passed leaf node ids or parent ids */
1143                 if (flags & PARENT_IDS_FLAG) {
1144                         check_leaf = false;
1145                 }
1146
1147                 memset(&myucred, 0, sizeof(myucred));
1148                 myucred.cr_ref = 1;
1149                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1150                 myucred.cr_ngroups = user_access_structp->num_groups;
1151                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1152                         myucred.cr_ngroups = 0;
1153                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1154                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1155                         goto err_exit_bulk_access;
1156                 }
1157                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1158
1159                 my_context.vc_proc = p;
1160                 my_context.vc_ucred = &myucred;
1161
1162                 /* Check access to each file_id passed in */
1163                 for (i = 0; i < num_files; i++) {
1164 #if 0
1165                         cnid = (cnid_t) file_ids[i];
1166
1167                         /* root always has access */
1168                         if (!suser(&myucred, NULL)) {
1169                                 access[i] = 0;
1170                                 continue;
1171                         }
1172
1173                         if (check_leaf) {
1174
1175                                 /* do the lookup (checks the cnode hash, then the catalog) */
1176                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1177                                 if (error) {
1178                                         access[i] = (short) error;
1179                                         continue;
1180                                 }
1181
1182                                 /* before calling CheckAccess(), check the target file for read access */
1183                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1184                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1185
1186
1187                                 /* fail fast if no access */
1188                                 if ((myPerms & flags) == 0) {
1189                                         access[i] = EACCES;
1190                                         continue;
1191                                 }
1192                         } else {
1193                                 /* we were passed an array of parent ids */
1194                                 catkey.hfsPlus.parentID = cnid;
1195                         }
1196
1197                         /* if the last guy had the same parent and had access, we're done */
1198                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1199                                 cache.cachehits++;
1200                                 access[i] = 0;
1201                                 continue;
1202                         }
1203
1204                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1205                                                    skip_cp, p, &myucred, dev);
1206
1207                         if ( myaccess ) {
1208                                 access[i] = 0; // have access.. no errors to report
1209                         } else {
1210                                 access[i] = (error != 0 ? (short) error : EACCES);
1211                         }
1212
1213                         prevParent_cnid = catkey.hfsPlus.parentID;
1214 #else
1215                         int myErr;
1216
1217                         cnid = (cnid_t)file_ids[i];
1218
1219                         while (cnid >= kRootDirID) {
1220                             /* get the vnode for this cnid */
1221                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1222                             if ( myErr ) {
1223                                 access[i] = EACCES;
1224                                 break;
1225                             }
1226
1227                             cnid = VTOC(vp)->c_parentcnid;
1228
1229                             hfs_unlock(VTOC(vp));
1230                             if (vnode_vtype(vp) == VDIR) {
1231                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1232                             } else {
1233                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1234                             }
1235                             vnode_put(vp);
1236                             access[i] = myErr;
1237                             if (myErr) {
1238                                 break;
1239                             }
1240                         }
1241 #endif
1242                 }
1243
1244                 /* copyout the access array */
1245                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1246                                      num_files * sizeof (short)))) {
1247                         goto err_exit_bulk_access;
1248                 }
1249
1250         err_exit_bulk_access:
1251
1252                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1253
1254                 release_pathbuff((char *) cache.acache);
1255                 release_pathbuff((char *) cache.haveaccess);
1256                 release_pathbuff((char *) file_ids);
1257                 release_pathbuff((char *) access);
1258
1259                 return (error);
1260         } /* HFS_BULKACCESS */
1261
1262         case HFS_SETACLSTATE: {
1263                 int state;
1264
1265                 if (!is_suser()) {
1266                         return (EPERM);
1267                 }
1268                 if (ap->a_data == NULL) {
1269                         return (EINVAL);
1270                 }
1271                 state = *(int *)ap->a_data;
1272                 if (state == 0 || state == 1)
1273                         return hfs_setextendedsecurity(hfsmp, state);
1274                 else
1275                         return (EINVAL);
1276         }
1277
1278         case F_FULLFSYNC: {
1279                 int error;
1280
1281                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1282                 if (error == 0) {
1283                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1284                         hfs_unlock(VTOC(vp));
1285                 }
1286
1287                 return error;
1288         }
1289
1290         case F_CHKCLEAN: {
1291                 register struct cnode *cp;
1292                 int error;
1293
1294                 if (!vnode_isreg(vp))
1295                         return EINVAL;
1296
1297                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1298                 if (error == 0) {
1299                         cp = VTOC(vp);
1300                         /*
1301                          * used by regression test to determine if
1302                          * all the dirty pages (via write) have been cleaned
1303                          * after a call to 'fsysnc'.
1304                          */
1305                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1306                         hfs_unlock(cp);
1307                 }
1308                 return (error);
1309         }
1310
1311         case F_RDADVISE: {
1312                 register struct radvisory *ra;
1313                 struct filefork *fp;
1314                 int error;
1315
1316                 if (!vnode_isreg(vp))
1317                         return EINVAL;
1318
1319                 ra = (struct radvisory *)(ap->a_data);
1320                 fp = VTOF(vp);
1321
1322                 /* Protect against a size change. */
1323                 hfs_lock_truncate(VTOC(vp), TRUE);
1324
1325                 if (ra->ra_offset >= fp->ff_size) {
1326                         error = EFBIG;
1327                 } else {
1328                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1329                 }
1330
1331                 hfs_unlock_truncate(VTOC(vp));
1332                 return (error);
1333         }
1334
1335         case F_READBOOTSTRAP:
1336         case F_WRITEBOOTSTRAP:
1337         {
1338             struct vnode *devvp = NULL;
1339             user_fbootstraptransfer_t *user_bootstrapp;
1340             int devBlockSize;
1341             int error;
1342             uio_t auio;
1343             daddr64_t blockNumber;
1344             u_long blockOffset;
1345             u_long xfersize;
1346             struct buf *bp;
1347             user_fbootstraptransfer_t user_bootstrap;
1348
1349                 if (!vnode_isvroot(vp))
1350                         return (EINVAL);
1351                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1352                  * to a user_fbootstraptransfer_t else we get a pointer to a
1353                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1354                  */
1355                 if (is64bit) {
1356                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1357                 }
1358                 else {
1359                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1360                         user_bootstrapp = &user_bootstrap;
1361                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1362                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1363                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1364                 }
1365                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1366                         return EINVAL;
1367
1368             devvp = VTOHFS(vp)->hfs_devvp;
1369                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1370                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1371                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1372                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1373
1374             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1375
1376             while (uio_resid(auio) > 0) {
1377                         blockNumber = uio_offset(auio) / devBlockSize;
1378                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1379                         if (error) {
1380                                 if (bp) buf_brelse(bp);
1381                                 uio_free(auio);
1382                                 return error;
1383                         };
1384
1385                         blockOffset = uio_offset(auio) % devBlockSize;
1386                         xfersize = devBlockSize - blockOffset;
1387                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1388                         if (error) {
1389                                 buf_brelse(bp);
1390                                 uio_free(auio);
1391                                 return error;
1392                         };
1393                         if (uio_rw(auio) == UIO_WRITE) {
1394                                 error = VNOP_BWRITE(bp);
1395                                 if (error) {
1396                                         uio_free(auio);
1397                         return error;
1398                                 }
1399                         } else {
1400                                 buf_brelse(bp);
1401                         };
1402                 };
1403                 uio_free(auio);
1404         };
1405         return 0;
1406
1407         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1408         {
1409                 if (is64bit) {
1410                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1411                 }
1412                 else {
1413                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1414                 }
1415                 return 0;
1416         }
1417
1418         case HFS_GET_MOUNT_TIME:
1419             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1420             break;
1421
1422         case HFS_GET_LAST_MTIME:
1423             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1424             break;
1425
1426         case HFS_SET_BOOT_INFO:
1427                 if (!vnode_isvroot(vp))
1428                         return(EINVAL);
1429                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1430                         return(EACCES); /* must be superuser or owner of filesystem */
1431                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1432                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1433                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1434                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1435                 break;
1436
1437         case HFS_GET_BOOT_INFO:
1438                 if (!vnode_isvroot(vp))
1439                         return(EINVAL);
1440                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1441                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1442                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1443                 break;
1444
1445         default:
1446                 return (ENOTTY);
1447         }
1448
1449     /* Should never get here */
1450         return 0;
1451 }
1452
1453 /*
1454  * select
1455  */
1456 int
1457 hfs_vnop_select(__unused struct vnop_select_args *ap)
1458 /*
1459         struct vnop_select_args {
1460                 vnode_t a_vp;
1461                 int  a_which;
1462                 int  a_fflags;
1463                 void *a_wql;
1464                 vfs_context_t a_context;
1465         };
1466 */
1467 {
1468         /*
1469          * We should really check to see if I/O is possible.
1470          */
1471         return (1);
1472 }
1473
1474 /*
1475  * Converts a logical block number to a physical block, and optionally returns
1476  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1477  * The physical block number is based on the device block size, currently its 512.
1478  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1479  */
1480 int
1481 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1482 {
1483         struct cnode *cp = VTOC(vp);
1484         struct filefork *fp = VTOF(vp);
1485         struct hfsmount *hfsmp = VTOHFS(vp);
1486         int  retval = E_NONE;
1487         daddr_t  logBlockSize;
1488         size_t  bytesContAvail = 0;
1489         off_t  blockposition;
1490         int lockExtBtree;
1491         int lockflags = 0;
1492
1493         /*
1494          * Check for underlying vnode requests and ensure that logical
1495          * to physical mapping is requested.
1496          */
1497         if (vpp != NULL)
1498                 *vpp = cp->c_devvp;
1499         if (bnp == NULL)
1500                 return (0);
1501
1502         logBlockSize = GetLogicalBlockSize(vp);
1503         blockposition = (off_t)bn * (off_t)logBlockSize;
1504
1505         lockExtBtree = overflow_extents(fp);
1506
1507         if (lockExtBtree)
1508                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1509
1510         retval = MacToVFSError(
1511                             MapFileBlockC (HFSTOVCB(hfsmp),
1512                                             (FCB*)fp,
1513                                             MAXPHYSIO,
1514                                             blockposition,
1515                                             bnp,
1516                                             &bytesContAvail));
1517
1518         if (lockExtBtree)
1519                 hfs_systemfile_unlock(hfsmp, lockflags);
1520
1521         if (retval == E_NONE) {
1522                 /* Figure out how many read ahead blocks there are */
1523                 if (runp != NULL) {
1524                         if (can_cluster(logBlockSize)) {
1525                                 /* Make sure this result never goes negative: */
1526                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1527                         } else {
1528                                 *runp = 0;
1529                         }
1530                 }
1531         }
1532         return (retval);
1533 }
1534
1535 /*
1536  * Convert logical block number to file offset.
1537  */
1538 int
1539 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1540 /*
1541         struct vnop_blktooff_args {
1542                 vnode_t a_vp;
1543                 daddr64_t a_lblkno;
1544                 off_t *a_offset;
1545         };
1546 */
1547 {
1548         if (ap->a_vp == NULL)
1549                 return (EINVAL);
1550         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1551
1552         return(0);
1553 }
1554
1555 /*
1556  * Convert file offset to logical block number.
1557  */
1558 int
1559 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1560 /*
1561         struct vnop_offtoblk_args {
1562                 vnode_t a_vp;
1563                 off_t a_offset;
1564                 daddr64_t *a_lblkno;
1565         };
1566 */
1567 {
1568         if (ap->a_vp == NULL)
1569                 return (EINVAL);
1570         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1571
1572         return(0);
1573 }
1574
1575 /*
1576  * Map file offset to physical block number.
1577  *
1578  * System file cnodes are expected to be locked (shared or exclusive).
1579  */
1580 int
1581 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1582 /*
1583         struct vnop_blockmap_args {
1584                 vnode_t a_vp;
1585                 off_t a_foffset;
1586                 size_t a_size;
1587                 daddr64_t *a_bpn;
1588                 size_t *a_run;
1589                 void *a_poff;
1590                 int a_flags;
1591                 vfs_context_t a_context;
1592         };
1593 */
1594 {
1595         struct vnode *vp = ap->a_vp;
1596         struct cnode *cp;
1597         struct filefork *fp;
1598         struct hfsmount *hfsmp;
1599         size_t bytesContAvail = 0;
1600         int retval = E_NONE;
1601         int syslocks = 0;
1602         int lockflags = 0;
1603         struct rl_entry *invalid_range;
1604         enum rl_overlaptype overlaptype;
1605         int started_tr = 0;
1606         int tooklock = 0;
1607
1608         /*
1609          * Check for underlying vnode requests and ensure that logical
1610          * to physical mapping is requested.
1611          */
1612         if (ap->a_bpn == NULL)
1613                 return (0);
1614
1615         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1616                 if (VTOC(vp)->c_lockowner != current_thread()) {
1617                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1618                         tooklock = 1;
1619                 } else {
1620                         cp = VTOC(vp);
1621                         panic("blockmap: %s cnode lock already held!\n",
1622                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1623                 }
1624         }
1625         hfsmp = VTOHFS(vp);
1626         cp = VTOC(vp);
1627         fp = VTOF(vp);
1628
1629 retry:
1630         if (fp->ff_unallocblocks) {
1631                 if (hfs_start_transaction(hfsmp) != 0) {
1632                         retval = EINVAL;
1633                         goto exit;
1634                 } else {
1635                         started_tr = 1;
1636                 }
1637                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1638
1639         } else if (overflow_extents(fp)) {
1640                 syslocks = SFL_EXTENTS;
1641         }
1642
1643         if (syslocks)
1644                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1645
1646         /*
1647          * Check for any delayed allocations.
1648          */
1649         if (fp->ff_unallocblocks) {
1650                 SInt64 actbytes;
1651                 u_int32_t loanedBlocks;
1652
1653                 //
1654                 // Make sure we have a transaction.  It's possible
1655                 // that we came in and fp->ff_unallocblocks was zero
1656                 // but during the time we blocked acquiring the extents
1657                 // btree, ff_unallocblocks became non-zero and so we
1658                 // will need to start a transaction.
1659                 //
1660                 if (started_tr == 0) {
1661                         if (syslocks) {
1662                                 hfs_systemfile_unlock(hfsmp, lockflags);
1663                                 syslocks = 0;
1664                         }
1665                         goto retry;
1666                 }
1667
1668                 /*
1669                  * Note: ExtendFileC will Release any blocks on loan and
1670                  * aquire real blocks.  So we ask to extend by zero bytes
1671                  * since ExtendFileC will account for the virtual blocks.
1672                  */
1673
1674                 loanedBlocks = fp->ff_unallocblocks;
1675                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1676                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1677
1678                 if (retval) {
1679                         fp->ff_unallocblocks = loanedBlocks;
1680                         cp->c_blocks += loanedBlocks;
1681                         fp->ff_blocks += loanedBlocks;
1682
1683                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1684                         hfsmp->loanedBlocks += loanedBlocks;
1685                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1686                 }
1687
1688                 if (retval) {
1689                         hfs_systemfile_unlock(hfsmp, lockflags);
1690                         cp->c_flag |= C_MODIFIED;
1691                         if (started_tr) {
1692                                 (void) hfs_update(vp, TRUE);
1693                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1694
1695                                 hfs_end_transaction(hfsmp);
1696                         }
1697                         goto exit;
1698                 }
1699         }
1700
1701         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1702                                ap->a_bpn, &bytesContAvail);
1703         if (syslocks) {
1704                 hfs_systemfile_unlock(hfsmp, lockflags);
1705                 syslocks = 0;
1706         }
1707
1708         if (started_tr) {
1709                 (void) hfs_update(vp, TRUE);
1710                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1711                 hfs_end_transaction(hfsmp);
1712                 started_tr = 0;
1713         }
1714         if (retval) {
1715                 goto exit;
1716         }
1717
1718         /* Adjust the mapping information for invalid file ranges: */
1719         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1720                               ap->a_foffset + (off_t)bytesContAvail - 1,
1721                               &invalid_range);
1722         if (overlaptype != RL_NOOVERLAP) {
1723                 switch(overlaptype) {
1724                 case RL_MATCHINGOVERLAP:
1725                 case RL_OVERLAPCONTAINSRANGE:
1726                 case RL_OVERLAPSTARTSBEFORE:
1727                         /* There's no valid block for this byte offset: */
1728                         *ap->a_bpn = (daddr64_t)-1;
1729                         /* There's no point limiting the amount to be returned
1730                          * if the invalid range that was hit extends all the way
1731                          * to the EOF (i.e. there's no valid bytes between the
1732                          * end of this range and the file's EOF):
1733                          */
1734                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1735                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1736                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1737                         }
1738                         break;
1739
1740                 case RL_OVERLAPISCONTAINED:
1741                 case RL_OVERLAPENDSAFTER:
1742                         /* The range of interest hits an invalid block before the end: */
1743                         if (invalid_range->rl_start == ap->a_foffset) {
1744                                 /* There's actually no valid information to be had starting here: */
1745                                 *ap->a_bpn = (daddr64_t)-1;
1746                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1747                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1748                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1749                                 }
1750                         } else {
1751                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1752                         }
1753                         break;
1754
1755                 case RL_NOOVERLAP:
1756                         break;
1757                 } /* end switch */
1758                 if (bytesContAvail > ap->a_size)
1759                         bytesContAvail = ap->a_size;
1760         }
1761         if (ap->a_run)
1762                 *ap->a_run = bytesContAvail;
1763
1764         if (ap->a_poff)
1765                 *(int *)ap->a_poff = 0;
1766 exit:
1767         if (tooklock)
1768                 hfs_unlock(cp);
1769
1770         return (MacToVFSError(retval));
1771 }
1772
1773
1774 /*
1775  * prepare and issue the I/O
1776  * buf_strategy knows how to deal
1777  * with requests that require
1778  * fragmented I/Os
1779  */
1780 int
1781 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1782 {
1783         buf_t   bp = ap->a_bp;
1784         vnode_t vp = buf_vnode(bp);
1785         struct cnode *cp = VTOC(vp);
1786
1787         return (buf_strategy(cp->c_devvp, ap));
1788 }
1789
1790
1791 static int
1792 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1793 {
1794         register struct cnode *cp = VTOC(vp);
1795         struct filefork *fp = VTOF(vp);
1796         struct proc *p = vfs_context_proc(context);;
1797         kauth_cred_t cred = vfs_context_ucred(context);
1798         int retval;
1799         off_t bytesToAdd;
1800         off_t actualBytesAdded;
1801         off_t filebytes;
1802         u_long fileblocks;
1803         int blksize;
1804         struct hfsmount *hfsmp;
1805         int lockflags;
1806
1807         blksize = VTOVCB(vp)->blockSize;
1808         fileblocks = fp->ff_blocks;
1809         filebytes = (off_t)fileblocks * (off_t)blksize;
1810
1811         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1812                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1813
1814         if (length < 0)
1815                 return (EINVAL);
1816
1817         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1818                 return (EFBIG);
1819
1820         hfsmp = VTOHFS(vp);
1821
1822         retval = E_NONE;
1823
1824         /* Files that are changing size are not hot file candidates. */
1825         if (hfsmp->hfc_stage == HFC_RECORDING) {
1826                 fp->ff_bytesread = 0;
1827         }
1828
1829         /*
1830          * We cannot just check if fp->ff_size == length (as an optimization)
1831          * since there may be extra physical blocks that also need truncation.
1832          */
1833 #if QUOTA
1834         if ((retval = hfs_getinoquota(cp)))
1835                 return(retval);
1836 #endif /* QUOTA */
1837
1838         /*
1839          * Lengthen the size of the file. We must ensure that the
1840          * last byte of the file is allocated. Since the smallest
1841          * value of ff_size is 0, length will be at least 1.
1842          */
1843         if (length > (off_t)fp->ff_size) {
1844 #if QUOTA
1845                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1846                                    cred, 0);
1847                 if (retval)
1848                         goto Err_Exit;
1849 #endif /* QUOTA */
1850                 /*
1851                  * If we don't have enough physical space then
1852                  * we need to extend the physical size.
1853                  */
1854                 if (length > filebytes) {
1855                         int eflags;
1856                         u_long blockHint = 0;
1857
1858                         /* All or nothing and don't round up to clumpsize. */
1859                         eflags = kEFAllMask | kEFNoClumpMask;
1860
1861                         if (cred && suser(cred, NULL) != 0)
1862                                 eflags |= kEFReserveMask;  /* keep a reserve */
1863
1864                         /*
1865                          * Allocate Journal and Quota files in metadata zone.
1866                          */
1867                         if (filebytes == 0 &&
1868                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1869                             hfs_virtualmetafile(cp)) {
1870                                 eflags |= kEFMetadataMask;
1871                                 blockHint = hfsmp->hfs_metazone_start;
1872                         }
1873                         if (hfs_start_transaction(hfsmp) != 0) {
1874                             retval = EINVAL;
1875                             goto Err_Exit;
1876                         }
1877
1878                         /* Protect extents b-tree and allocation bitmap */
1879                         lockflags = SFL_BITMAP;
1880                         if (overflow_extents(fp))
1881                                 lockflags |= SFL_EXTENTS;
1882                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1883
1884                         while ((length > filebytes) && (retval == E_NONE)) {
1885                                 bytesToAdd = length - filebytes;
1886                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1887                                                     (FCB*)fp,
1888                                                     bytesToAdd,
1889                                                     blockHint,
1890                                                     eflags,
1891                                                     &actualBytesAdded));
1892
1893                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1894                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1895                                         if (length > filebytes)
1896                                                 length = filebytes;
1897                                         break;
1898                                 }
1899                         } /* endwhile */
1900
1901                         hfs_systemfile_unlock(hfsmp, lockflags);
1902
1903                         if (hfsmp->jnl) {
1904                             (void) hfs_update(vp, TRUE);
1905                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1906                         }
1907
1908                         hfs_end_transaction(hfsmp);
1909
1910                         if (retval)
1911                                 goto Err_Exit;
1912
1913                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1914                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1915                 }
1916
1917                 if (!(flags & IO_NOZEROFILL)) {
1918                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1919                                 struct rl_entry *invalid_range;
1920                                 off_t zero_limit;
1921
1922                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1923                                 if (length < zero_limit) zero_limit = length;
1924
1925                                 if (length > (off_t)fp->ff_size) {
1926                                         struct timeval tv;
1927
1928                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1929                                         if ((fp->ff_size & PAGE_MASK_64) &&
1930                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1931                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1932
1933                                                 /* There's some valid data at the start of the (current) last page
1934                                                    of the file, so zero out the remainder of that page to ensure the
1935                                                    entire page contains valid data.  Since there is no invalid range
1936                                                    possible past the (current) eof, there's no need to remove anything
1937                                                    from the invalid range list before calling cluster_write():  */
1938                                                 hfs_unlock(cp);
1939                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1940                                                                 fp->ff_size, (off_t)0,
1941                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1942                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1943                                                 if (retval) goto Err_Exit;
1944
1945                                                 /* Merely invalidate the remaining area, if necessary: */
1946                                                 if (length > zero_limit) {
1947                                                         microuptime(&tv);
1948                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1949                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1950                                                 }
1951                                         } else {
1952                                         /* The page containing the (current) eof is invalid: just add the
1953                                            remainder of the page to the invalid list, along with the area
1954                                            being newly allocated:
1955                                          */
1956                                         microuptime(&tv);
1957                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1958                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1959                                         };
1960                                 }
1961                         } else {
1962                                         panic("hfs_truncate: invoked on non-UBC object?!");
1963                         };
1964                 }
1965                 cp->c_touch_modtime = TRUE;
1966                 fp->ff_size = length;
1967
1968                 /* Nested transactions will do their own ubc_setsize. */
1969                 if (!skipsetsize) {
1970                         /*
1971                          * ubc_setsize can cause a pagein here
1972                          * so we need to drop cnode lock.
1973                          */
1974                         hfs_unlock(cp);
1975                         ubc_setsize(vp, length);
1976                         hfs_lock(cp, HFS_FORCE_LOCK);
1977                 }
1978
1979         } else { /* Shorten the size of the file */
1980
1981                 if ((off_t)fp->ff_size > length) {
1982                         /*
1983                          * Any buffers that are past the truncation point need to be
1984                          * invalidated (to maintain buffer cache consistency).
1985                          */
1986
1987                          /* Nested transactions will do their own ubc_setsize. */
1988                          if (!skipsetsize) {
1989                                 /*
1990                                  * ubc_setsize can cause a pageout here
1991                                  * so we need to drop cnode lock.
1992                                  */
1993                                 hfs_unlock(cp);
1994                                 ubc_setsize(vp, length);
1995                                 hfs_lock(cp, HFS_FORCE_LOCK);
1996                         }
1997
1998                         /* Any space previously marked as invalid is now irrelevant: */
1999                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2000                 }
2001
2002                 /*
2003                  * Account for any unmapped blocks. Note that the new
2004                  * file length can still end up with unmapped blocks.
2005                  */
2006                 if (fp->ff_unallocblocks > 0) {
2007                         u_int32_t finalblks;
2008                         u_int32_t loanedBlocks;
2009
2010                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2011
2012                         loanedBlocks = fp->ff_unallocblocks;
2013                         cp->c_blocks -= loanedBlocks;
2014                         fp->ff_blocks -= loanedBlocks;
2015                         fp->ff_unallocblocks = 0;
2016
2017                         hfsmp->loanedBlocks -= loanedBlocks;
2018
2019                         finalblks = (length + blksize - 1) / blksize;
2020                         if (finalblks > fp->ff_blocks) {
2021                                 /* calculate required unmapped blocks */
2022                                 loanedBlocks = finalblks - fp->ff_blocks;
2023                                 hfsmp->loanedBlocks += loanedBlocks;
2024
2025                                 fp->ff_unallocblocks = loanedBlocks;
2026                                 cp->c_blocks += loanedBlocks;
2027                                 fp->ff_blocks += loanedBlocks;
2028                         }
2029                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2030                 }
2031
2032                 /*
2033                  * For a TBE process the deallocation of the file blocks is
2034                  * delayed until the file is closed.  And hfs_close calls
2035                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2036                  * isn't set, we make sure this isn't a TBE process.
2037                  */
2038                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2039 #if QUOTA
2040                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2041 #endif /* QUOTA */
2042                   if (hfs_start_transaction(hfsmp) != 0) {
2043                       retval = EINVAL;
2044                       goto Err_Exit;
2045                   }
2046
2047                         if (fp->ff_unallocblocks == 0) {
2048                                 /* Protect extents b-tree and allocation bitmap */
2049                                 lockflags = SFL_BITMAP;
2050                                 if (overflow_extents(fp))
2051                                         lockflags |= SFL_EXTENTS;
2052                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2053
2054                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2055                                                 (FCB*)fp, length, false));
2056
2057                                 hfs_systemfile_unlock(hfsmp, lockflags);
2058                         }
2059                         if (hfsmp->jnl) {
2060                                 (void) hfs_update(vp, TRUE);
2061                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2062                         }
2063
2064                         hfs_end_transaction(hfsmp);
2065
2066                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2067                         if (retval)
2068                                 goto Err_Exit;
2069 #if QUOTA
2070                         /* These are bytesreleased */
2071                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2072 #endif /* QUOTA */
2073                 }
2074                 /* Only set update flag if the logical length changes */
2075                 if ((off_t)fp->ff_size != length)
2076                         cp->c_touch_modtime = TRUE;
2077                 fp->ff_size = length;
2078         }
2079         cp->c_touch_chgtime = TRUE;
2080         retval = hfs_update(vp, MNT_WAIT);
2081         if (retval) {
2082                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2083                      -1, -1, -1, retval, 0);
2084         }
2085
2086 Err_Exit:
2087
2088         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2089                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2090
2091         return (retval);
2092 }
2093
2094
2095
2096 /*
2097  * Truncate a cnode to at most length size, freeing (or adding) the
2098  * disk blocks.
2099  */
2100 __private_extern__
2101 int
2102 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2103              vfs_context_t context)
2104 {
2105         struct filefork *fp = VTOF(vp);
2106         off_t filebytes;
2107         u_long fileblocks;
2108         int blksize, error = 0;
2109
2110         if (vnode_isdir(vp))
2111                 return (EISDIR);        /* cannot truncate an HFS directory! */
2112
2113         blksize = VTOVCB(vp)->blockSize;
2114         fileblocks = fp->ff_blocks;
2115         filebytes = (off_t)fileblocks * (off_t)blksize;
2116
2117         // have to loop truncating or growing files that are
2118         // really big because otherwise transactions can get
2119         // enormous and consume too many kernel resources.
2120
2121         if (length < filebytes) {
2122                 while (filebytes > length) {
2123                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2124                                 filebytes -= HFS_BIGFILE_SIZE;
2125                         } else {
2126                                 filebytes = length;
2127                         }
2128                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2129                         if (error)
2130                                 break;
2131                 }
2132         } else if (length > filebytes) {
2133                 while (filebytes < length) {
2134                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2135                                 filebytes += HFS_BIGFILE_SIZE;
2136                         } else {
2137                                 filebytes = length;
2138                         }
2139                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2140                         if (error)
2141                                 break;
2142                 }
2143         } else /* Same logical size */ {
2144
2145                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2146         }
2147         /* Files that are changing size are not hot file candidates. */
2148         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2149                 fp->ff_bytesread = 0;
2150         }
2151
2152         return (error);
2153 }
2154
2155
2156
2157 /*
2158  * Preallocate file storage space.
2159  */
2160 int
2161 hfs_vnop_allocate(struct vnop_allocate_args /* {
2162                 vnode_t a_vp;
2163                 off_t a_length;
2164                 u_int32_t  a_flags;
2165                 off_t *a_bytesallocated;
2166                 off_t a_offset;
2167                 vfs_context_t a_context;
2168         } */ *ap)
2169 {
2170         struct vnode *vp = ap->a_vp;
2171         struct cnode *cp;
2172         struct filefork *fp;
2173         ExtendedVCB *vcb;
2174         off_t length = ap->a_length;
2175         off_t startingPEOF;
2176         off_t moreBytesRequested;
2177         off_t actualBytesAdded;
2178         off_t filebytes;
2179         u_long fileblocks;
2180         int retval, retval2;
2181         UInt32 blockHint;
2182         UInt32 extendFlags;   /* For call to ExtendFileC */
2183         struct hfsmount *hfsmp;
2184         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2185         int lockflags;
2186
2187         *(ap->a_bytesallocated) = 0;
2188
2189         if (!vnode_isreg(vp))
2190                 return (EISDIR);
2191         if (length < (off_t)0)
2192                 return (EINVAL);
2193
2194         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2195                 return (retval);
2196         cp = VTOC(vp);
2197         fp = VTOF(vp);
2198         hfsmp = VTOHFS(vp);
2199         vcb = VTOVCB(vp);
2200
2201         fileblocks = fp->ff_blocks;
2202         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2203
2204         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2205                 retval = EINVAL;
2206                 goto Err_Exit;
2207         }
2208
2209         /* Fill in the flags word for the call to Extend the file */
2210
2211         extendFlags = kEFNoClumpMask;
2212         if (ap->a_flags & ALLOCATECONTIG)
2213                 extendFlags |= kEFContigMask;
2214         if (ap->a_flags & ALLOCATEALL)
2215                 extendFlags |= kEFAllMask;
2216         if (cred && suser(cred, NULL) != 0)
2217                 extendFlags |= kEFReserveMask;
2218
2219         retval = E_NONE;
2220         blockHint = 0;
2221         startingPEOF = filebytes;
2222
2223         if (ap->a_flags & ALLOCATEFROMPEOF)
2224                 length += filebytes;
2225         else if (ap->a_flags & ALLOCATEFROMVOL)
2226                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2227
2228         /* If no changes are necesary, then we're done */
2229         if (filebytes == length)
2230                 goto Std_Exit;
2231
2232         /*
2233          * Lengthen the size of the file. We must ensure that the
2234          * last byte of the file is allocated. Since the smallest
2235          * value of filebytes is 0, length will be at least 1.
2236          */
2237         if (length > filebytes) {
2238                 moreBytesRequested = length - filebytes;
2239
2240 #if QUOTA
2241                 retval = hfs_chkdq(cp,
2242                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2243                                 cred, 0);
2244                 if (retval)
2245                         goto Err_Exit;
2246
2247 #endif /* QUOTA */
2248                 /*
2249                  * Metadata zone checks.
2250                  */
2251                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2252                         /*
2253                          * Allocate Journal and Quota files in metadata zone.
2254                          */
2255                         if (hfs_virtualmetafile(cp)) {
2256                                 extendFlags |= kEFMetadataMask;
2257                                 blockHint = hfsmp->hfs_metazone_start;
2258                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2259                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2260                                 /*
2261                                  * Move blockHint outside metadata zone.
2262                                  */
2263                                 blockHint = hfsmp->hfs_metazone_end + 1;
2264                         }
2265                 }
2266
2267                 if (hfs_start_transaction(hfsmp) != 0) {
2268                     retval = EINVAL;
2269                     goto Err_Exit;
2270                 }
2271
2272                 /* Protect extents b-tree and allocation bitmap */
2273                 lockflags = SFL_BITMAP;
2274                 if (overflow_extents(fp))
2275                         lockflags |= SFL_EXTENTS;
2276                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2277
2278                 retval = MacToVFSError(ExtendFileC(vcb,
2279                                                 (FCB*)fp,
2280                                                 moreBytesRequested,
2281                                                 blockHint,
2282                                                 extendFlags,
2283                                                 &actualBytesAdded));
2284
2285                 *(ap->a_bytesallocated) = actualBytesAdded;
2286                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2287
2288                 hfs_systemfile_unlock(hfsmp, lockflags);
2289
2290                 if (hfsmp->jnl) {
2291                         (void) hfs_update(vp, TRUE);
2292                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2293                 }
2294
2295                 hfs_end_transaction(hfsmp);
2296
2297                 /*
2298                  * if we get an error and no changes were made then exit
2299                  * otherwise we must do the hfs_update to reflect the changes
2300                  */
2301                 if (retval && (startingPEOF == filebytes))
2302                         goto Err_Exit;
2303
2304                 /*
2305                  * Adjust actualBytesAdded to be allocation block aligned, not
2306                  * clump size aligned.
2307                  * NOTE: So what we are reporting does not affect reality
2308                  * until the file is closed, when we truncate the file to allocation
2309                  * block size.
2310                  */
2311                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2312                         *(ap->a_bytesallocated) =
2313                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2314
2315         } else { /* Shorten the size of the file */
2316
2317                 if (fp->ff_size > length) {
2318                         /*
2319                          * Any buffers that are past the truncation point need to be
2320                          * invalidated (to maintain buffer cache consistency).
2321                          */
2322                 }
2323
2324                 if (hfs_start_transaction(hfsmp) != 0) {
2325                     retval = EINVAL;
2326                     goto Err_Exit;
2327                 }
2328
2329                 /* Protect extents b-tree and allocation bitmap */
2330                 lockflags = SFL_BITMAP;
2331                 if (overflow_extents(fp))
2332                         lockflags |= SFL_EXTENTS;
2333                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2334
2335                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2336
2337                 hfs_systemfile_unlock(hfsmp, lockflags);
2338
2339                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2340
2341                 if (hfsmp->jnl) {
2342                         (void) hfs_update(vp, TRUE);
2343                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2344                 }
2345
2346                 hfs_end_transaction(hfsmp);
2347
2348
2349                 /*
2350                  * if we get an error and no changes were made then exit
2351                  * otherwise we must do the hfs_update to reflect the changes
2352                  */
2353                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2354 #if QUOTA
2355                 /* These are  bytesreleased */
2356                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2357 #endif /* QUOTA */
2358
2359                 if (fp->ff_size > filebytes) {
2360                         fp->ff_size = filebytes;
2361
2362                         hfs_unlock(cp);
2363                         ubc_setsize(vp, fp->ff_size);
2364                         hfs_lock(cp, HFS_FORCE_LOCK);
2365                 }
2366         }
2367
2368 Std_Exit:
2369         cp->c_touch_chgtime = TRUE;
2370         cp->c_touch_modtime = TRUE;
2371         retval2 = hfs_update(vp, MNT_WAIT);
2372
2373         if (retval == 0)
2374                 retval = retval2;
2375 Err_Exit:
2376         hfs_unlock(cp);
2377         return (retval);
2378 }
2379
2380
2381 /*
2382  * Pagein for HFS filesystem
2383  */
2384 int
2385 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2386 /*
2387         struct vnop_pagein_args {
2388                 vnode_t a_vp,
2389                 upl_t         a_pl,
2390                 vm_offset_t   a_pl_offset,
2391                 off_t         a_f_offset,
2392                 size_t        a_size,
2393                 int           a_flags
2394                 vfs_context_t a_context;
2395         };
2396 */
2397 {
2398         vnode_t vp = ap->a_vp;
2399         int error;
2400
2401         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2402                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2403         /*
2404          * Keep track of blocks read.
2405          */
2406         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2407                 struct cnode *cp;
2408                 struct filefork *fp;
2409                 int bytesread;
2410                 int took_cnode_lock = 0;
2411
2412                 cp = VTOC(vp);
2413                 fp = VTOF(vp);
2414
2415                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2416                         bytesread = fp->ff_size;
2417                 else
2418                         bytesread = ap->a_size;
2419
2420                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2421                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2422                         hfs_lock(cp, HFS_FORCE_LOCK);
2423                         took_cnode_lock = 1;
2424                 }
2425                 /*
2426                  * If this file hasn't been seen since the start of
2427                  * the current sampling period then start over.
2428                  */
2429                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2430                         struct timeval tv;
2431
2432                         fp->ff_bytesread = bytesread;
2433                         microtime(&tv);
2434                         cp->c_atime = tv.tv_sec;
2435                 } else {
2436                         fp->ff_bytesread += bytesread;
2437                 }
2438                 cp->c_touch_acctime = TRUE;
2439                 if (took_cnode_lock)
2440                         hfs_unlock(cp);
2441         }
2442         return (error);
2443 }
2444
2445 /*
2446  * Pageout for HFS filesystem.
2447  */
2448 int
2449 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2450 /*
2451         struct vnop_pageout_args {
2452            vnode_t a_vp,
2453            upl_t         a_pl,
2454            vm_offset_t   a_pl_offset,
2455            off_t         a_f_offset,
2456            size_t        a_size,
2457            int           a_flags
2458            vfs_context_t a_context;
2459         };
2460 */
2461 {
2462         vnode_t vp = ap->a_vp;
2463         struct cnode *cp;
2464         struct filefork *fp;
2465         int retval;
2466         off_t end_of_range;
2467         off_t filesize;
2468
2469         cp = VTOC(vp);
2470         if (cp->c_lockowner == current_thread()) {
2471                 panic("pageout: %s cnode lock already held!\n",
2472                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2473         }
2474         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2475                 return (retval);
2476         }
2477         fp = VTOF(vp);
2478
2479         filesize = fp->ff_size;
2480         end_of_range = ap->a_f_offset + ap->a_size - 1;
2481
2482         if (end_of_range >= filesize) {
2483                 end_of_range = (off_t)(filesize - 1);
2484         }
2485         if (ap->a_f_offset < filesize) {
2486                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2487                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2488         }
2489         hfs_unlock(cp);
2490
2491         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2492                                  ap->a_size, filesize, ap->a_flags);
2493
2494         /*
2495          * If data was written, and setuid or setgid bits are set and
2496          * this process is not the superuser then clear the setuid and
2497          * setgid bits as a precaution against tampering.
2498          */
2499         if ((retval == 0) &&
2500             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2501             (vfs_context_suser(ap->a_context) != 0)) {
2502                 hfs_lock(cp, HFS_FORCE_LOCK);
2503                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2504                 cp->c_touch_chgtime = TRUE;
2505                 hfs_unlock(cp);
2506         }
2507         return (retval);
2508 }
2509
2510 /*
2511  * Intercept B-Tree node writes to unswap them if necessary.
2512  */
2513 int
2514 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2515 {
2516         int retval = 0;
2517         register struct buf *bp = ap->a_bp;
2518         register struct vnode *vp = buf_vnode(bp);
2519 #if BYTE_ORDER == LITTLE_ENDIAN
2520         BlockDescriptor block;
2521
2522         /* Trap B-Tree writes */
2523         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2524             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2525             (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2526
2527                 /* Swap if the B-Tree node is in native byte order */
2528                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2529                         /* Prepare the block pointer */
2530                         block.blockHeader = bp;
2531                         block.buffer = (char *)buf_dataptr(bp);
2532                         /* not found in cache ==> came from disk */
2533                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2534                         block.blockSize = buf_count(bp);
2535
2536                         /* Endian un-swap B-Tree node */
2537                         SWAP_BT_NODE (&block, ISHFSPLUS (VTOVCB(vp)), VTOC(vp)->c_fileid, 1);
2538                 }
2539
2540                 /* We don't check to make sure that it's 0x0e00 because it could be all zeros */
2541         }
2542 #endif
2543         /* This buffer shouldn't be locked anymore but if it is clear it */
2544         if ((buf_flags(bp) & B_LOCKED)) {
2545                 // XXXdbg
2546                 if (VTOHFS(vp)->jnl) {
2547                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2548                 }
2549                 buf_clearflags(bp, B_LOCKED);
2550         }
2551         retval = vn_bwrite (ap);
2552
2553         return (retval);
2554 }
2555
2556 /*
2557  * Relocate a file to a new location on disk
2558  *  cnode must be locked on entry
2559  *
2560  * Relocation occurs by cloning the file's data from its
2561  * current set of blocks to a new set of blocks. During
2562  * the relocation all of the blocks (old and new) are
2563  * owned by the file.
2564  *
2565  * -----------------
2566  * |///////////////|
2567  * -----------------
2568  * 0               N (file offset)
2569  *
2570  * -----------------     -----------------
2571  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2572  * -----------------     -----------------
2573  * 0               N     N+1             2N
2574  *
2575  * -----------------     -----------------
2576  * |///////////////|     |///////////////|     STEP 2 (clone data)
2577  * -----------------     -----------------
2578  * 0               N     N+1             2N
2579  *
2580  *                       -----------------
2581  *                       |///////////////|     STEP 3 (head truncate blocks)
2582  *                       -----------------
2583  *                       0               N
2584  *
2585  * During steps 2 and 3 page-outs to file offsets less
2586  * than or equal to N are suspended.
2587  *
2588  * During step 3 page-ins to the file get supended.
2589  */
2590 __private_extern__
2591 int
2592 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2593         struct  proc *p)
2594 {
2595         struct  cnode *cp;
2596         struct  filefork *fp;
2597         struct  hfsmount *hfsmp;
2598         u_int32_t  headblks;
2599         u_int32_t  datablks;
2600         u_int32_t  blksize;
2601         u_int32_t  growsize;
2602         u_int32_t  nextallocsave;
2603         daddr64_t  sector_a,  sector_b;
2604         int disabled_caching = 0;
2605         int eflags;
2606         off_t  newbytes;
2607         int  retval;
2608         int lockflags = 0;
2609         int took_trunc_lock = 0;
2610         int started_tr = 0;
2611         enum vtype vnodetype;
2612
2613         vnodetype = vnode_vtype(vp);
2614         if (vnodetype != VREG && vnodetype != VLNK) {
2615                 return (EPERM);
2616         }
2617
2618         hfsmp = VTOHFS(vp);
2619         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2620                 return (ENOSPC);
2621         }
2622
2623         cp = VTOC(vp);
2624         fp = VTOF(vp);
2625         if (fp->ff_unallocblocks)
2626                 return (EINVAL);
2627         blksize = hfsmp->blockSize;
2628         if (blockHint == 0)
2629                 blockHint = hfsmp->nextAllocation;
2630
2631         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2632             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2633                 return (EFBIG);
2634         }
2635
2636         //
2637         // We do not believe that this call to hfs_fsync() is
2638         // necessary and it causes a journal transaction
2639         // deadlock so we are removing it.
2640         //
2641         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2642         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2643         //      if (retval)
2644         //              return (retval);
2645         //}
2646
2647         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2648                 hfs_unlock(cp);
2649                 hfs_lock_truncate(cp, TRUE);
2650                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2651                         hfs_unlock_truncate(cp);
2652                         return (retval);
2653                 }
2654                 took_trunc_lock = 1;
2655         }
2656         headblks = fp->ff_blocks;
2657         datablks = howmany(fp->ff_size, blksize);
2658         growsize = datablks * blksize;
2659         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2660         if (blockHint >= hfsmp->hfs_metazone_start &&
2661             blockHint <= hfsmp->hfs_metazone_end)
2662                 eflags |= kEFMetadataMask;
2663
2664         if (hfs_start_transaction(hfsmp) != 0) {
2665                 if (took_trunc_lock)
2666                         hfs_unlock_truncate(cp);
2667             return (EINVAL);
2668         }
2669         started_tr = 1;
2670         /*
2671          * Protect the extents b-tree and the allocation bitmap
2672          * during MapFileBlockC and ExtendFileC operations.
2673          */
2674         lockflags = SFL_BITMAP;
2675         if (overflow_extents(fp))
2676                 lockflags |= SFL_EXTENTS;
2677         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2678
2679         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2680         if (retval) {
2681                 retval = MacToVFSError(retval);
2682                 goto out;
2683         }
2684
2685         /*
2686          * STEP 1 - aquire new allocation blocks.
2687          */
2688         if (!vnode_isnocache(vp)) {
2689                 vnode_setnocache(vp);
2690                 disabled_caching = 1;
2691
2692         }
2693         nextallocsave = hfsmp->nextAllocation;
2694         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2695         if (eflags & kEFMetadataMask) {
2696                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2697                 hfsmp->nextAllocation = nextallocsave;
2698                 hfsmp->vcbFlags |= 0xFF00;
2699                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2700         }
2701
2702         retval = MacToVFSError(retval);
2703         if (retval == 0) {
2704                 cp->c_flag |= C_MODIFIED;
2705                 if (newbytes < growsize) {
2706                         retval = ENOSPC;
2707                         goto restore;
2708                 } else if (fp->ff_blocks < (headblks + datablks)) {
2709                         printf("hfs_relocate: allocation failed");
2710                         retval = ENOSPC;
2711                         goto restore;
2712                 }
2713
2714                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2715                 if (retval) {
2716                         retval = MacToVFSError(retval);
2717                 } else if ((sector_a + 1) == sector_b) {
2718                         retval = ENOSPC;
2719                         goto restore;
2720                 } else if ((eflags & kEFMetadataMask) &&
2721                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2722                               hfsmp->hfs_metazone_end)) {
2723                         printf("hfs_relocate: didn't move into metadata zone\n");
2724                         retval = ENOSPC;
2725                         goto restore;
2726                 }
2727         }
2728         /* Done with system locks and journal for now. */
2729         hfs_systemfile_unlock(hfsmp, lockflags);
2730         lockflags = 0;
2731         hfs_end_transaction(hfsmp);
2732         started_tr = 0;
2733
2734         if (retval) {
2735                 /*
2736                  * Check to see if failure is due to excessive fragmentation.
2737                  */
2738                 if ((retval == ENOSPC) &&
2739                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2740                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2741                 }
2742                 goto out;
2743         }
2744         /*
2745          * STEP 2 - clone file data into the new allocation blocks.
2746          */
2747
2748         if (vnodetype == VLNK)
2749                 retval = hfs_clonelink(vp, blksize, cred, p);
2750         else if (vnode_issystem(vp))
2751                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2752         else
2753                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2754
2755         /* Start transaction for step 3 or for a restore. */
2756         if (hfs_start_transaction(hfsmp) != 0) {
2757                 retval = EINVAL;
2758                 goto out;
2759         }
2760         started_tr = 1;
2761         if (retval)
2762                 goto restore;
2763
2764         /*
2765          * STEP 3 - switch to cloned data and remove old blocks.
2766          */
2767         lockflags = SFL_BITMAP;
2768         if (overflow_extents(fp))
2769                 lockflags |= SFL_EXTENTS;
2770         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2771
2772         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2773
2774         hfs_systemfile_unlock(hfsmp, lockflags);
2775         lockflags = 0;
2776         if (retval)
2777                 goto restore;
2778 out:
2779         if (took_trunc_lock)
2780                 hfs_unlock_truncate(cp);
2781
2782         if (lockflags) {
2783                 hfs_systemfile_unlock(hfsmp, lockflags);
2784                 lockflags = 0;
2785         }
2786
2787         // See comment up above about calls to hfs_fsync()
2788         //
2789         //if (retval == 0)
2790         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2791
2792         if (hfsmp->jnl) {
2793                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2794                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2795                 else
2796                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2797         }
2798 exit:
2799         if (disabled_caching) {
2800                 vnode_clearnocache(vp);
2801         }
2802         if (started_tr)
2803                 hfs_end_transaction(hfsmp);
2804
2805         return (retval);
2806
2807 restore:
2808         if (fp->ff_blocks == headblks)
2809                 goto exit;
2810         /*
2811          * Give back any newly allocated space.
2812          */
2813         if (lockflags == 0) {
2814                 lockflags = SFL_BITMAP;
2815                 if (overflow_extents(fp))
2816                         lockflags |= SFL_EXTENTS;
2817                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2818         }
2819
2820         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2821
2822         hfs_systemfile_unlock(hfsmp, lockflags);
2823         lockflags = 0;
2824
2825         if (took_trunc_lock)
2826                 hfs_unlock_truncate(cp);
2827         goto exit;
2828 }
2829
2830
2831 /*
2832  * Clone a symlink.
2833  *
2834  */
2835 static int
2836 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2837 {
2838         struct buf *head_bp = NULL;
2839         struct buf *tail_bp = NULL;
2840         int error;
2841
2842
2843         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2844         if (error)
2845                 goto out;
2846
2847         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2848         if (tail_bp == NULL) {
2849                 error = EIO;
2850                 goto out;
2851         }
2852         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2853         error = (int)buf_bwrite(tail_bp);
2854 out:
2855         if (head_bp) {
2856                 buf_markinvalid(head_bp);
2857                 buf_brelse(head_bp);
2858         }
2859         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2860
2861         return (error);
2862 }
2863
2864 /*
2865  * Clone a file's data within the file.
2866  *
2867  */
2868 static int
2869 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2870 {
2871         caddr_t  bufp;
2872         size_t  writebase;
2873         size_t  bufsize;
2874         size_t  copysize;
2875         size_t  iosize;
2876         off_t   filesize;
2877         size_t  offset;
2878         uio_t auio;
2879         int  error = 0;
2880
2881         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2882         writebase = blkstart * blksize;
2883         copysize = blkcnt * blksize;
2884         iosize = bufsize = MIN(copysize, 4096 * 16);
2885         offset = 0;
2886
2887         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2888                 return (ENOMEM);
2889         }
2890         hfs_unlock(VTOC(vp));
2891
2892         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2893
2894         while (offset < copysize) {
2895                 iosize = MIN(copysize - offset, iosize);
2896
2897                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2898                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2899
2900                 error = cluster_read(vp, auio, copysize, 0);
2901                 if (error) {
2902                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2903                         break;
2904                 }
2905                 if (uio_resid(auio) != 0) {
2906                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2907                         error = EIO;
2908                         break;
2909                 }
2910
2911                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2912                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2913
2914                 error = cluster_write(vp, auio, filesize + offset,
2915                                       filesize + offset + iosize,
2916                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2917                 if (error) {
2918                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2919                         break;
2920                 }
2921                 if (uio_resid(auio) != 0) {
2922                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2923                         error = EIO;
2924                         break;
2925                 }
2926                 offset += iosize;
2927         }
2928         uio_free(auio);
2929
2930         /*
2931          * No need to call ubc_sync_range or hfs_invalbuf
2932          * since the file was copied using IO_NOCACHE.
2933          */
2934
2935         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2936
2937         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2938         return (error);
2939 }
2940
2941 /*
2942  * Clone a system (metadata) file.
2943  *
2944  */
2945 static int
2946 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2947                  kauth_cred_t cred, struct proc *p)
2948 {
2949         caddr_t  bufp;
2950         char * offset;
2951         size_t  bufsize;
2952         size_t  iosize;
2953         struct buf *bp = NULL;
2954         daddr64_t  blkno;
2955         daddr64_t  blk;
2956         daddr64_t  start_blk;
2957         daddr64_t  last_blk;
2958         int  breadcnt;
2959         int  i;
2960         int  error = 0;
2961
2962
2963         iosize = GetLogicalBlockSize(vp);
2964         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
2965         breadcnt = bufsize / iosize;
2966
2967         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2968                 return (ENOMEM);
2969         }
2970         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
2971         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
2972         blkno = 0;
2973
2974         while (blkno < last_blk) {
2975                 /*
2976                  * Read up to a megabyte
2977                  */
2978                 offset = bufp;
2979                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
2980                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
2981                         if (error) {
2982                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
2983                                 goto out;
2984                         }
2985                         if (buf_count(bp) != iosize) {
2986                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
2987                                 goto out;
2988                         }
2989                         bcopy((char *)buf_dataptr(bp), offset, iosize);
2990
2991                         buf_markinvalid(bp);
2992                         buf_brelse(bp);
2993                         bp = NULL;
2994
2995                         offset += iosize;
2996                 }
2997
2998                 /*
2999                  * Write up to a megabyte
3000                  */
3001                 offset = bufp;
3002                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3003                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3004                         if (bp == NULL) {
3005                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3006                                 error = EIO;
3007                                 goto out;
3008                         }
3009                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3010                         error = (int)buf_bwrite(bp);
3011                         bp = NULL;
3012                         if (error)
3013                                 goto out;
3014                         offset += iosize;
3015                 }
3016         }
3017 out:
3018         if (bp) {
3019                 buf_brelse(bp);
3020         }
3021
3022         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3023
3024         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3025
3026         return (error);
3027 }