bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30 /*      @(#)hfs_readwrite.c     1.0
  31  *
  32  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  33  *
  34  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  35  *
  36  */
  37
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/resourcevar.h>
  41 #include <sys/kernel.h>
  42 #include <sys/fcntl.h>
  43 #include <sys/filedesc.h>
  44 #include <sys/stat.h>
  45 #include <sys/buf.h>
  46 #include <sys/proc.h>
  47 #include <sys/kauth.h>
  48 #include <sys/vnode.h>
  49 #include <sys/uio.h>
  50 #include <sys/vfs_context.h>
  51
  52 #include <miscfs/specfs/specdev.h>
  53
  54 #include <sys/ubc.h>
  55 #include <vm/vm_pageout.h>
  56 #include <vm/vm_kern.h>
  57
  58 #include <sys/kdebug.h>
  59
  60 #include        "hfs.h"
  61 #include        "hfs_endian.h"
  62 #include  "hfs_fsctl.h"
  63 #include        "hfs_quota.h"
  64 #include        "hfscommon/headers/FileMgrInternal.h"
  65 #include        "hfscommon/headers/BTreesInternal.h"
  66 #include        "hfs_cnode.h"
  67 #include        "hfs_dbg.h"
  68
  69 extern int overflow_extents(struct filefork *fp);
  70
  71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  72
  73 enum {
  74         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  75 };
  76
  77 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  78
  79 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  80
  81
  82 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  83 static int  hfs_clonefile(struct vnode *, int, int, int);
  84 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  85
  86
  87 /*****************************************************************************
  88 *
  89 *       I/O Operations on vnodes
  90 *
  91 *****************************************************************************/
  92 int  hfs_vnop_read(struct vnop_read_args *);
  93 int  hfs_vnop_write(struct vnop_write_args *);
  94 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  95 int  hfs_vnop_select(struct vnop_select_args *);
  96 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  97 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  98 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  99 int  hfs_vnop_strategy(struct vnop_strategy_args *);
 100 int  hfs_vnop_allocate(struct vnop_allocate_args *);
 101 int  hfs_vnop_pagein(struct vnop_pagein_args *);
 102 int  hfs_vnop_pageout(struct vnop_pageout_args *);
 103 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
 104
 105
 106 /*
 107  * Read data from a file.
 108  */
 109 int
 110 hfs_vnop_read(struct vnop_read_args *ap)
 111 {
 112         uio_t uio = ap->a_uio;
 113         struct vnode *vp = ap->a_vp;
 114         struct cnode *cp;
 115         struct filefork *fp;
 116         struct hfsmount *hfsmp;
 117         off_t filesize;
 118         off_t filebytes;
 119         off_t start_resid = uio_resid(uio);
 120         off_t offset = uio_offset(uio);
 121         int retval = 0;
 122
 123
 124         /* Preflight checks */
 125         if (!vnode_isreg(vp)) {
 126                 /* can only read regular files */
 127                 if (vnode_isdir(vp))
 128                         return (EISDIR);
 129                 else
 130                         return (EPERM);
 131         }
 132         if (start_resid == 0)
 133                 return (0);             /* Nothing left to do */
 134         if (offset < 0)
 135                 return (EINVAL);        /* cant read from a negative offset */
 136
 137         cp = VTOC(vp);
 138         fp = VTOF(vp);
 139         hfsmp = VTOHFS(vp);
 140
 141         /* Protect against a size change. */
 142         hfs_lock_truncate(cp, 0);
 143
 144         filesize = fp->ff_size;
 145         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 146         if (offset > filesize) {
 147                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 148                     (offset > (off_t)MAXHFSFILESIZE)) {
 149                         retval = EFBIG;
 150                 }
 151                 goto exit;
 152         }
 153
 154         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 155                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 156
 157         retval = cluster_read(vp, uio, filesize, 0);
 158
 159         cp->c_touch_acctime = TRUE;
 160
 161         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 162                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 163
 164         /*
 165          * Keep track blocks read
 166          */
 167         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 168                 int took_cnode_lock = 0;
 169                 off_t bytesread;
 170
 171                 bytesread = start_resid - uio_resid(uio);
 172
 173                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 174                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 175                         hfs_lock(cp, HFS_FORCE_LOCK);
 176                         took_cnode_lock = 1;
 177                 }
 178                 /*
 179                  * If this file hasn't been seen since the start of
 180                  * the current sampling period then start over.
 181                  */
 182                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 183                         struct timeval tv;
 184
 185                         fp->ff_bytesread = bytesread;
 186                         microtime(&tv);
 187                         cp->c_atime = tv.tv_sec;
 188                 } else {
 189                         fp->ff_bytesread += bytesread;
 190                 }
 191                 if (took_cnode_lock)
 192                         hfs_unlock(cp);
 193         }
 194 exit:
 195         hfs_unlock_truncate(cp);
 196         return (retval);
 197 }
 198
 199 /*
 200  * Write data to a file.
 201  */
 202 int
 203 hfs_vnop_write(struct vnop_write_args *ap)
 204 {
 205         uio_t uio = ap->a_uio;
 206         struct vnode *vp = ap->a_vp;
 207         struct cnode *cp;
 208         struct filefork *fp;
 209         struct hfsmount *hfsmp;
 210         kauth_cred_t cred = NULL;
 211         off_t origFileSize;
 212         off_t writelimit;
 213         off_t bytesToAdd;
 214         off_t actualBytesAdded;
 215         off_t filebytes;
 216         off_t offset;
 217         size_t resid;
 218         int eflags;
 219         int ioflag = ap->a_ioflag;
 220         int retval = 0;
 221         int lockflags;
 222         int cnode_locked = 0;
 223
 224         // LP64todo - fix this! uio_resid may be 64-bit value
 225         resid = uio_resid(uio);
 226         offset = uio_offset(uio);
 227
 228         if (offset < 0)
 229                 return (EINVAL);
 230         if (resid == 0)
 231                 return (E_NONE);
 232         if (!vnode_isreg(vp))
 233                 return (EPERM);  /* Can only write regular files */
 234
 235         /* Protect against a size change. */
 236         hfs_lock_truncate(VTOC(vp), TRUE);
 237
 238         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 239                 hfs_unlock_truncate(VTOC(vp));
 240                 return (retval);
 241         }
 242         cnode_locked = 1;
 243         cp = VTOC(vp);
 244         fp = VTOF(vp);
 245         hfsmp = VTOHFS(vp);
 246         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 247
 248         if (ioflag & IO_APPEND) {
 249                 uio_setoffset(uio, fp->ff_size);
 250                 offset = fp->ff_size;
 251         }
 252         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 253                 retval = EPERM;
 254                 goto exit;
 255         }
 256
 257         origFileSize = fp->ff_size;
 258         eflags = kEFDeferMask;  /* defer file block allocations */
 259
 260 #ifdef HFS_SPARSE_DEV
 261         /*
 262          * When the underlying device is sparse and space
 263          * is low (< 8MB), stop doing delayed allocations
 264          * and begin doing synchronous I/O.
 265          */
 266         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 267             (hfs_freeblks(hfsmp, 0) < 2048)) {
 268                 eflags &= ~kEFDeferMask;
 269                 ioflag |= IO_SYNC;
 270         }
 271 #endif /* HFS_SPARSE_DEV */
 272
 273         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 274                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 275
 276         /* Now test if we need to extend the file */
 277         /* Doing so will adjust the filebytes for us */
 278
 279         writelimit = offset + resid;
 280         if (writelimit <= filebytes)
 281                 goto sizeok;
 282
 283         cred = vfs_context_ucred(ap->a_context);
 284 #if QUOTA
 285         bytesToAdd = writelimit - filebytes;
 286         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 287                            cred, 0);
 288         if (retval)
 289                 goto exit;
 290 #endif /* QUOTA */
 291
 292         if (hfs_start_transaction(hfsmp) != 0) {
 293                 retval = EINVAL;
 294                 goto exit;
 295         }
 296
 297         while (writelimit > filebytes) {
 298                 bytesToAdd = writelimit - filebytes;
 299                 if (cred && suser(cred, NULL) != 0)
 300                         eflags |= kEFReserveMask;
 301
 302                 /* Protect extents b-tree and allocation bitmap */
 303                 lockflags = SFL_BITMAP;
 304                 if (overflow_extents(fp))
 305                         lockflags |= SFL_EXTENTS;
 306                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 307
 308                 /* Files that are changing size are not hot file candidates. */
 309                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 310                         fp->ff_bytesread = 0;
 311                 }
 312                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 313                                 0, eflags, &actualBytesAdded));
 314
 315                 hfs_systemfile_unlock(hfsmp, lockflags);
 316
 317                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 318                         retval = ENOSPC;
 319                 if (retval != E_NONE)
 320                         break;
 321                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 322                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 323                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 324         }
 325         (void) hfs_update(vp, TRUE);
 326         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 327         (void) hfs_end_transaction(hfsmp);
 328
 329 sizeok:
 330         if (retval == E_NONE) {
 331                 off_t filesize;
 332                 off_t zero_off;
 333                 off_t tail_off;
 334                 off_t inval_start;
 335                 off_t inval_end;
 336                 off_t io_start;
 337                 int lflag;
 338                 struct rl_entry *invalid_range;
 339
 340                 if (writelimit > fp->ff_size)
 341                         filesize = writelimit;
 342                 else
 343                         filesize = fp->ff_size;
 344
 345                 lflag = (ioflag & IO_SYNC);
 346
 347                 if (offset <= fp->ff_size) {
 348                         zero_off = offset & ~PAGE_MASK_64;
 349
 350                         /* Check to see whether the area between the zero_offset and the start
 351                            of the transfer to see whether is invalid and should be zero-filled
 352                            as part of the transfer:
 353                          */
 354                         if (offset > zero_off) {
 355                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 356                                         lflag |= IO_HEADZEROFILL;
 357                         }
 358                 } else {
 359                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 360
 361                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 362                            read without being zeroed.  The current last block is filled with zeroes
 363                            if it holds valid data but in all cases merely do a little bookkeeping
 364                            to track the area from the end of the current last page to the start of
 365                            the area actually written.  For the same reason only the bytes up to the
 366                            start of the page where this write will start is invalidated; any remainder
 367                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 368
 369                            Note that inval_start, the start of the page after the current EOF,
 370                            may be past the start of the write, in which case the zeroing
 371                            will be handled by the cluser_write of the actual data.
 372                          */
 373                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 374                         inval_end = offset & ~PAGE_MASK_64;
 375                         zero_off = fp->ff_size;
 376
 377                         if ((fp->ff_size & PAGE_MASK_64) &&
 378                                 (rl_scan(&fp->ff_invalidranges,
 379                                                         eof_page_base,
 380                                                         fp->ff_size - 1,
 381                                                         &invalid_range) != RL_NOOVERLAP)) {
 382                                 /* The page containing the EOF is not valid, so the
 383                                    entire page must be made inaccessible now.  If the write
 384                                    starts on a page beyond the page containing the eof
 385                                    (inval_end > eof_page_base), add the
 386                                    whole page to the range to be invalidated.  Otherwise
 387                                    (i.e. if the write starts on the same page), zero-fill
 388                                    the entire page explicitly now:
 389                                  */
 390                                 if (inval_end > eof_page_base) {
 391                                         inval_start = eof_page_base;
 392                                 } else {
 393                                         zero_off = eof_page_base;
 394                                 };
 395                         };
 396
 397                         if (inval_start < inval_end) {
 398                                 struct timeval tv;
 399                                 /* There's some range of data that's going to be marked invalid */
 400
 401                                 if (zero_off < inval_start) {
 402                                         /* The pages between inval_start and inval_end are going to be invalidated,
 403                                            and the actual write will start on a page past inval_end.  Now's the last
 404                                            chance to zero-fill the page containing the EOF:
 405                                          */
 406                                         hfs_unlock(cp);
 407                                         cnode_locked = 0;
 408                                         retval = cluster_write(vp, (uio_t) 0,
 409                                                         fp->ff_size, inval_start,
 410                                                         zero_off, (off_t)0,
 411                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 412                                         hfs_lock(cp, HFS_FORCE_LOCK);
 413                                         cnode_locked = 1;
 414                                         if (retval) goto ioerr_exit;
 415                                         offset = uio_offset(uio);
 416                                 };
 417
 418                                 /* Mark the remaining area of the newly allocated space as invalid: */
 419                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 420                                 microuptime(&tv);
 421                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 422                                 zero_off = fp->ff_size = inval_end;
 423                         };
 424
 425                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 426                 };
 427
 428                 /* Check to see whether the area between the end of the write and the end of
 429                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 430                  */
 431                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 432                 if (tail_off > filesize) tail_off = filesize;
 433                 if (tail_off > writelimit) {
 434                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 435                                 lflag |= IO_TAILZEROFILL;
 436                         };
 437                 };
 438
 439                 /*
 440                  * if the write starts beyond the current EOF (possibly advanced in the
 441                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 442                  * to where the write begins:
 443                  *
 444                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 445                  *       before the current EOF it might be marked as invalid now and must be
 446                  *       made readable (removed from the invalid ranges) before cluster_write
 447                  *       tries to write it:
 448                  */
 449                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 450                 if (io_start < fp->ff_size) {
 451                         off_t io_end;
 452
 453                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 454                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 455                 };
 456
 457                 hfs_unlock(cp);
 458                 cnode_locked = 0;
 459                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 460                                 tail_off, lflag | IO_NOZERODIRTY);
 461                 offset = uio_offset(uio);
 462                 if (offset > fp->ff_size) {
 463                         fp->ff_size = offset;
 464
 465                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 466                         /* Files that are changing size are not hot file candidates. */
 467                         if (hfsmp->hfc_stage == HFC_RECORDING)
 468                                 fp->ff_bytesread = 0;
 469                 }
 470                 if (resid > uio_resid(uio)) {
 471                         cp->c_touch_chgtime = TRUE;
 472                         cp->c_touch_modtime = TRUE;
 473                 }
 474         }
 475         HFS_KNOTE(vp, NOTE_WRITE);
 476
 477 ioerr_exit:
 478         /*
 479          * If we successfully wrote any data, and we are not the superuser
 480          * we clear the setuid and setgid bits as a precaution against
 481          * tampering.
 482          */
 483         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 484                 cred = vfs_context_ucred(ap->a_context);
 485                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 486                         if (!cnode_locked) {
 487                                 hfs_lock(cp, HFS_FORCE_LOCK);
 488                                 cnode_locked = 1;
 489                         }
 490                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 491                 }
 492         }
 493         if (retval) {
 494                 if (ioflag & IO_UNIT) {
 495                         if (!cnode_locked) {
 496                                 hfs_lock(cp, HFS_FORCE_LOCK);
 497                                 cnode_locked = 1;
 498                         }
 499                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 500                                            0, ap->a_context);
 501                         // LP64todo - fix this!  resid needs to by user_ssize_t
 502                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 503                         uio_setresid(uio, resid);
 504                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 505                 }
 506         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 507                 if (!cnode_locked) {
 508                         hfs_lock(cp, HFS_FORCE_LOCK);
 509                         cnode_locked = 1;
 510                 }
 511                 retval = hfs_update(vp, TRUE);
 512         }
 513         /* Updating vcbWrCnt doesn't need to be atomic. */
 514         hfsmp->vcbWrCnt++;
 515
 516         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 517                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 518 exit:
 519         if (cnode_locked)
 520                 hfs_unlock(cp);
 521         hfs_unlock_truncate(cp);
 522         return (retval);
 523 }
 524
 525 /* support for the "bulk-access" fcntl */
 526
 527 #define CACHE_ELEMS 64
 528 #define CACHE_LEVELS 16
 529 #define PARENT_IDS_FLAG 0x100
 530
 531 /* from hfs_attrlist.c */
 532 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 533                         mode_t obj_mode, struct mount *mp,
 534                         kauth_cred_t cred, struct proc *p);
 535
 536 /* from vfs/vfs_fsevents.c */
 537 extern char *get_pathbuff(void);
 538 extern void release_pathbuff(char *buff);
 539
 540 struct access_cache {
 541        int numcached;
 542        int cachehits; /* these two for statistics gathering */
 543        int lookups;
 544        unsigned int *acache;
 545        Boolean *haveaccess;
 546 };
 547
 548 struct access_t {
 549         uid_t     uid;              /* IN: effective user id */
 550         short     flags;            /* IN: access requested (i.e. R_OK) */
 551         short     num_groups;       /* IN: number of groups user belongs to */
 552         int       num_files;        /* IN: number of files to process */
 553         int       *file_ids;        /* IN: array of file ids */
 554         gid_t     *groups;          /* IN: array of groups */
 555         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 556 };
 557
 558 struct user_access_t {
 559         uid_t           uid;                    /* IN: effective user id */
 560         short           flags;                  /* IN: access requested (i.e. R_OK) */
 561         short           num_groups;             /* IN: number of groups user belongs to */
 562         int                     num_files;              /* IN: number of files to process */
 563         user_addr_t     file_ids;               /* IN: array of file ids */
 564         user_addr_t     groups;                 /* IN: array of groups */
 565         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 566 };
 567
 568 /*
 569  * Perform a binary search for the given parent_id. Return value is
 570  * found/not found boolean, and indexp will be the index of the item
 571  * or the index at which to insert the item if it's not found.
 572  */
 573 static int
 574 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 575 {
 576         unsigned int lo, hi;
 577         int index, matches = 0;
 578
 579         if (cache->numcached == 0) {
 580                 *indexp = 0;
 581                 return 0; // table is empty, so insert at index=0 and report no match
 582         }
 583
 584         if (cache->numcached > CACHE_ELEMS) {
 585                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 586                   cache->numcached, CACHE_ELEMS);*/
 587                 cache->numcached = CACHE_ELEMS;
 588         }
 589
 590         lo = 0;
 591         hi = cache->numcached - 1;
 592         index = -1;
 593
 594         /* perform binary search for parent_id */
 595         do {
 596                 unsigned int mid = (hi - lo)/2 + lo;
 597                 unsigned int this_id = cache->acache[mid];
 598
 599                 if (parent_id == this_id) {
 600                         index = mid;
 601                         break;
 602                 }
 603
 604                 if (parent_id < this_id) {
 605                         hi = mid;
 606                         continue;
 607                 }
 608
 609                 if (parent_id > this_id) {
 610                         lo = mid + 1;
 611                         continue;
 612                 }
 613         } while(lo < hi);
 614
 615         /* check if lo and hi converged on the match */
 616         if (parent_id == cache->acache[hi]) {
 617                 index = hi;
 618         }
 619
 620         /* if no existing entry found, find index for new one */
 621         if (index == -1) {
 622                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 623                 matches = 0;
 624         } else {
 625                 matches = 1;
 626         }
 627
 628         *indexp = index;
 629         return matches;
 630 }
 631
 632 /*
 633  * Add a node to the access_cache at the given index (or do a lookup first
 634  * to find the index if -1 is passed in). We currently do a replace rather
 635  * than an insert if the cache is full.
 636  */
 637 static void
 638 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 639 {
 640        int lookup_index = -1;
 641
 642        /* need to do a lookup first if -1 passed for index */
 643        if (index == -1) {
 644                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 645                        if (cache->haveaccess[lookup_index] != access) {
 646                                /* change access info for existing entry... should never happen */
 647                                cache->haveaccess[lookup_index] = access;
 648                        }
 649
 650                        /* mission accomplished */
 651                        return;
 652                } else {
 653                        index = lookup_index;
 654                }
 655
 656        }
 657
 658        /* if the cache is full, do a replace rather than an insert */
 659        if (cache->numcached >= CACHE_ELEMS) {
 660                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 661                cache->numcached = CACHE_ELEMS-1;
 662
 663                if (index > cache->numcached) {
 664                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 665                        index = cache->numcached;
 666                }
 667        } else if (index >= 0 && index < cache->numcached) {
 668                /* only do bcopy if we're inserting */
 669                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 670                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 671        }
 672
 673        cache->acache[index] = nodeID;
 674        cache->haveaccess[index] = access;
 675        cache->numcached++;
 676 }
 677
 678
 679 struct cinfo {
 680         uid_t   uid;
 681         gid_t   gid;
 682         mode_t  mode;
 683         cnid_t  parentcnid;
 684 };
 685
 686 static int
 687 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 688 {
 689         struct cinfo *cip = (struct cinfo *)arg;
 690
 691         cip->uid = attrp->ca_uid;
 692         cip->gid = attrp->ca_gid;
 693         cip->mode = attrp->ca_mode;
 694         cip->parentcnid = descp->cd_parentcnid;
 695
 696         return (0);
 697 }
 698
 699 /*
 700  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 701  * isn't incore, then go to the catalog.
 702  */
 703 static int
 704 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 705                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 706 {
 707         int error = 0;
 708
 709         /* if this id matches the one the fsctl was called with, skip the lookup */
 710         if (cnid == skip_cp->c_cnid) {
 711                 cnattrp->ca_uid = skip_cp->c_uid;
 712                 cnattrp->ca_gid = skip_cp->c_gid;
 713                 cnattrp->ca_mode = skip_cp->c_mode;
 714                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 715         } else {
 716                 struct cinfo c_info;
 717
 718                 /* otherwise, check the cnode hash incase the file/dir is incore */
 719                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 720                         cnattrp->ca_uid = c_info.uid;
 721                         cnattrp->ca_gid = c_info.gid;
 722                         cnattrp->ca_mode = c_info.mode;
 723                         keyp->hfsPlus.parentID = c_info.parentcnid;
 724                 } else {
 725                         int lockflags;
 726
 727                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 728
 729                         /* lookup this cnid in the catalog */
 730                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 731
 732                         hfs_systemfile_unlock(hfsmp, lockflags);
 733
 734                         cache->lookups++;
 735                 }
 736         }
 737
 738         return (error);
 739 }
 740
 741 /*
 742  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 743  * up to CACHE_LEVELS as we progress towards the root.
 744  */
 745 static int
 746 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 747                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 748 {
 749        int                     myErr = 0;
 750        int                     myResult;
 751        HFSCatalogNodeID        thisNodeID;
 752        unsigned long           myPerms;
 753        struct cat_attr         cnattr;
 754        int                     cache_index = -1;
 755        CatalogKey              catkey;
 756
 757        int i = 0, ids_to_cache = 0;
 758        int parent_ids[CACHE_LEVELS];
 759
 760        /* root always has access */
 761        if (!suser(myp_ucred, NULL)) {
 762                return (1);
 763        }
 764
 765        thisNodeID = nodeID;
 766        while (thisNodeID >=  kRootDirID) {
 767                myResult = 0;   /* default to "no access" */
 768
 769                /* check the cache before resorting to hitting the catalog */
 770
 771                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 772                 * to look any further after hitting cached dir */
 773
 774                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 775                        cache->cachehits++;
 776                        myResult = cache->haveaccess[cache_index];
 777                        goto ExitThisRoutine;
 778                }
 779
 780                /* remember which parents we want to cache */
 781                if (ids_to_cache < CACHE_LEVELS) {
 782                        parent_ids[ids_to_cache] = thisNodeID;
 783                        ids_to_cache++;
 784                }
 785
 786                /* do the lookup (checks the cnode hash, then the catalog) */
 787                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 788                if (myErr) {
 789                        goto ExitThisRoutine; /* no access */
 790                }
 791
 792                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 793                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 794                                                  myp_ucred, theProcPtr);
 795
 796                if ( (myPerms & X_OK) == 0 ) {
 797                        myResult = 0;
 798                        goto ExitThisRoutine;   /* no access */
 799                }
 800
 801                /* up the hierarchy we go */
 802                thisNodeID = catkey.hfsPlus.parentID;
 803        }
 804
 805        /* if here, we have access to this node */
 806        myResult = 1;
 807
 808  ExitThisRoutine:
 809        if (myErr) {
 810                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 811                myResult = 0;
 812        }
 813        *err = myErr;
 814
 815        /* cache the parent directory(ies) */
 816        for (i = 0; i < ids_to_cache; i++) {
 817                /* small optimization: get rid of double-lookup for all these */
 818                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 819                add_node(cache, -1, parent_ids[i], myResult);
 820        }
 821
 822        return (myResult);
 823 }
 824 /* end "bulk-access" support */
 825
 826
 827
 828 /*
 829  * Callback for use with freeze ioctl.
 830  */
 831 static int
 832 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 833 {
 834         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 835
 836         return 0;
 837 }
 838
 839 /*
 840  * Control filesystem operating characteristics.
 841  */
 842 int
 843 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 844                 vnode_t a_vp;
 845                 int  a_command;
 846                 caddr_t  a_data;
 847                 int  a_fflag;
 848                 vfs_context_t a_context;
 849         } */ *ap)
 850 {
 851         struct vnode * vp = ap->a_vp;
 852         struct hfsmount *hfsmp = VTOHFS(vp);
 853         vfs_context_t context = ap->a_context;
 854         kauth_cred_t cred = vfs_context_ucred(context);
 855         proc_t p = vfs_context_proc(context);
 856         struct vfsstatfs *vfsp;
 857         boolean_t is64bit;
 858
 859         is64bit = proc_is64bit(p);
 860
 861         switch (ap->a_command) {
 862
 863         case HFS_RESIZE_PROGRESS: {
 864
 865                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 866                 if (suser(cred, NULL) &&
 867                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 868                         return (EACCES); /* must be owner of file system */
 869                 }
 870                 if (!vnode_isvroot(vp)) {
 871                         return (EINVAL);
 872                 }
 873                 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
 874         }
 875         case HFS_RESIZE_VOLUME: {
 876                 u_int64_t newsize;
 877                 u_int64_t cursize;
 878
 879                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 880                 if (suser(cred, NULL) &&
 881                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 882                         return (EACCES); /* must be owner of file system */
 883                 }
 884                 if (!vnode_isvroot(vp)) {
 885                         return (EINVAL);
 886                 }
 887                 newsize = *(u_int64_t *)ap->a_data;
 888                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 889
 890                 if (newsize > cursize) {
 891                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 892                 } else if (newsize < cursize) {
 893                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 894                 } else {
 895                         return (0);
 896                 }
 897         }
 898         case HFS_CHANGE_NEXT_ALLOCATION: {
 899                 u_int32_t location;
 900
 901                 if (vnode_vfsisrdonly(vp)) {
 902                         return (EROFS);
 903                 }
 904                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 905                 if (suser(cred, NULL) &&
 906                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 907                         return (EACCES); /* must be owner of file system */
 908                 }
 909                 if (!vnode_isvroot(vp)) {
 910                         return (EINVAL);
 911                 }
 912                 location = *(u_int32_t *)ap->a_data;
 913                 if (location > hfsmp->totalBlocks - 1) {
 914                         return (EINVAL);
 915                 }
 916                 /* Return previous value. */
 917                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 918                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 919                 hfsmp->nextAllocation = location;
 920                 hfsmp->vcbFlags |= 0xFF00;
 921                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 922                 return (0);
 923         }
 924
 925 #ifdef HFS_SPARSE_DEV
 926         case HFS_SETBACKINGSTOREINFO: {
 927                 struct vnode * bsfs_rootvp;
 928                 struct vnode * di_vp;
 929                 struct hfs_backingstoreinfo *bsdata;
 930                 int error = 0;
 931
 932                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 933                         return (EALREADY);
 934                 }
 935                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 936                 if (suser(cred, NULL) &&
 937                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 938                         return (EACCES); /* must be owner of file system */
 939                 }
 940                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 941                 if (bsdata == NULL) {
 942                         return (EINVAL);
 943                 }
 944                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 945                         return (error);
 946                 }
 947                 if ((error = vnode_getwithref(di_vp))) {
 948                         file_drop(bsdata->backingfd);
 949                         return(error);
 950                 }
 951
 952                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 953                         (void)vnode_put(di_vp);
 954                         file_drop(bsdata->backingfd);
 955                         return (EINVAL);
 956                 }
 957
 958                 /*
 959                  * Obtain the backing fs root vnode and keep a reference
 960                  * on it.  This reference will be dropped in hfs_unmount.
 961                  */
 962                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 963                 if (error) {
 964                         (void)vnode_put(di_vp);
 965                         file_drop(bsdata->backingfd);
 966                         return (error);
 967                 }
 968                 vnode_ref(bsfs_rootvp);
 969                 vnode_put(bsfs_rootvp);
 970
 971                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 972                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 973                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 974                 hfsmp->hfs_sparsebandblks *= 4;
 975
 976                 (void)vnode_put(di_vp);
 977                 file_drop(bsdata->backingfd);
 978                 return (0);
 979         }
 980         case HFS_CLRBACKINGSTOREINFO: {
 981                 struct vnode * tmpvp;
 982
 983                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 984                 if (suser(cred, NULL) &&
 985                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 986                         return (EACCES); /* must be owner of file system */
 987                 }
 988                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 989                     hfsmp->hfs_backingfs_rootvp) {
 990
 991                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 992                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 993                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 994                         hfsmp->hfs_sparsebandblks = 0;
 995                         vnode_rele(tmpvp);
 996                 }
 997                 return (0);
 998         }
 999 #endif /* HFS_SPARSE_DEV */
1000
1001         case F_FREEZE_FS: {
1002                 struct mount *mp;
1003                 task_t task;
1004
1005                 if (!is_suser())
1006                         return (EACCES);
1007
1008                 mp = vnode_mount(vp);
1009                 hfsmp = VFSTOHFS(mp);
1010
1011                 if (!(hfsmp->jnl))
1012                         return (ENOTSUP);
1013
1014                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1015
1016                 task = current_task();
1017                 task_working_set_disable(task);
1018
1019                 // flush things before we get started to try and prevent
1020                 // dirty data from being paged out while we're frozen.
1021                 // note: can't do this after taking the lock as it will
1022                 // deadlock against ourselves.
1023                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1024                 hfs_global_exclusive_lock_acquire(hfsmp);
1025                 journal_flush(hfsmp->jnl);
1026
1027                 // don't need to iterate on all vnodes, we just need to
1028                 // wait for writes to the system files and the device vnode
1029                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1030                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1031                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1032                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1033                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1034                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1035                 if (hfsmp->hfs_attribute_vp)
1036                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1037                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1038
1039                 hfsmp->hfs_freezing_proc = current_proc();
1040
1041                 return (0);
1042         }
1043
1044         case F_THAW_FS: {
1045                 if (!is_suser())
1046                         return (EACCES);
1047
1048                 // if we're not the one who froze the fs then we
1049                 // can't thaw it.
1050                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1051                     return EPERM;
1052                 }
1053
1054                 // NOTE: if you add code here, also go check the
1055                 //       code that "thaws" the fs in hfs_vnop_close()
1056                 //
1057                 hfsmp->hfs_freezing_proc = NULL;
1058                 hfs_global_exclusive_lock_release(hfsmp);
1059                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1060
1061                 return (0);
1062         }
1063
1064 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1065 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1066
1067         case HFS_BULKACCESS_FSCTL:
1068         case HFS_BULKACCESS: {
1069                 /*
1070                  * NOTE: on entry, the vnode is locked. Incase this vnode
1071                  * happens to be in our list of file_ids, we'll note it
1072                  * avoid calling hfs_chashget_nowait() on that id as that
1073                  * will cause a "locking against myself" panic.
1074                  */
1075                 Boolean check_leaf = true;
1076
1077                 struct user_access_t *user_access_structp;
1078                 struct user_access_t tmp_user_access_t;
1079                 struct access_cache cache;
1080
1081                 int error = 0, i;
1082
1083                 dev_t dev = VTOC(vp)->c_dev;
1084
1085                 short flags;
1086                 struct ucred myucred;   /* XXX ILLEGAL */
1087                 int num_files;
1088                 int *file_ids = NULL;
1089                 short *access = NULL;
1090
1091                 cnid_t cnid;
1092                 cnid_t prevParent_cnid = 0;
1093                 unsigned long myPerms;
1094                 short myaccess = 0;
1095                 struct cat_attr cnattr;
1096                 CatalogKey catkey;
1097                 struct cnode *skip_cp = VTOC(vp);
1098                 struct vfs_context      my_context;
1099
1100                 /* first, return error if not run as root */
1101                 if (cred->cr_ruid != 0) {
1102                         return EPERM;
1103                 }
1104
1105                 /* initialize the local cache and buffers */
1106                 cache.numcached = 0;
1107                 cache.cachehits = 0;
1108                 cache.lookups = 0;
1109
1110                 file_ids = (int *) get_pathbuff();
1111                 access = (short *) get_pathbuff();
1112                 cache.acache = (int *) get_pathbuff();
1113                 cache.haveaccess = (Boolean *) get_pathbuff();
1114
1115                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1116                         release_pathbuff((char *) file_ids);
1117                         release_pathbuff((char *) access);
1118                         release_pathbuff((char *) cache.acache);
1119                         release_pathbuff((char *) cache.haveaccess);
1120
1121                         return ENOMEM;
1122                 }
1123
1124                 /* struct copyin done during dispatch... need to copy file_id array separately */
1125                 if (ap->a_data == NULL) {
1126                         error = EINVAL;
1127                         goto err_exit_bulk_access;
1128                 }
1129
1130                 if (is64bit) {
1131                         user_access_structp = (struct user_access_t *)ap->a_data;
1132                 }
1133                 else {
1134                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1135                         tmp_user_access_t.uid = accessp->uid;
1136                         tmp_user_access_t.flags = accessp->flags;
1137                         tmp_user_access_t.num_groups = accessp->num_groups;
1138                         tmp_user_access_t.num_files = accessp->num_files;
1139                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1140                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1141                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1142                         user_access_structp = &tmp_user_access_t;
1143                 }
1144
1145                 num_files = user_access_structp->num_files;
1146                 if (num_files < 1) {
1147                         goto err_exit_bulk_access;
1148                 }
1149                 if (num_files > 256) {
1150                         error = EINVAL;
1151                         goto err_exit_bulk_access;
1152                 }
1153
1154                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1155                                                         num_files * sizeof(int)))) {
1156                         goto err_exit_bulk_access;
1157                 }
1158
1159                 /* fill in the ucred structure */
1160                 flags = user_access_structp->flags;
1161                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1162                         flags = R_OK;
1163                 }
1164
1165                 /* check if we've been passed leaf node ids or parent ids */
1166                 if (flags & PARENT_IDS_FLAG) {
1167                         check_leaf = false;
1168                 }
1169
1170                 memset(&myucred, 0, sizeof(myucred));
1171                 myucred.cr_ref = 1;
1172                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1173                 myucred.cr_ngroups = user_access_structp->num_groups;
1174                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1175                         myucred.cr_ngroups = 0;
1176                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1177                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1178                         goto err_exit_bulk_access;
1179                 }
1180                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1181                 myucred.cr_gmuid = myucred.cr_uid;
1182
1183                 my_context.vc_proc = p;
1184                 my_context.vc_ucred = &myucred;
1185
1186                 /* Check access to each file_id passed in */
1187                 for (i = 0; i < num_files; i++) {
1188 #if 0
1189                         cnid = (cnid_t) file_ids[i];
1190
1191                         /* root always has access */
1192                         if (!suser(&myucred, NULL)) {
1193                                 access[i] = 0;
1194                                 continue;
1195                         }
1196
1197                         if (check_leaf) {
1198
1199                                 /* do the lookup (checks the cnode hash, then the catalog) */
1200                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1201                                 if (error) {
1202                                         access[i] = (short) error;
1203                                         continue;
1204                                 }
1205
1206                                 /* before calling CheckAccess(), check the target file for read access */
1207                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1208                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1209
1210
1211                                 /* fail fast if no access */
1212                                 if ((myPerms & flags) == 0) {
1213                                         access[i] = EACCES;
1214                                         continue;
1215                                 }
1216                         } else {
1217                                 /* we were passed an array of parent ids */
1218                                 catkey.hfsPlus.parentID = cnid;
1219                         }
1220
1221                         /* if the last guy had the same parent and had access, we're done */
1222                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1223                                 cache.cachehits++;
1224                                 access[i] = 0;
1225                                 continue;
1226                         }
1227
1228                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1229                                                    skip_cp, p, &myucred, dev);
1230
1231                         if ( myaccess ) {
1232                                 access[i] = 0; // have access.. no errors to report
1233                         } else {
1234                                 access[i] = (error != 0 ? (short) error : EACCES);
1235                         }
1236
1237                         prevParent_cnid = catkey.hfsPlus.parentID;
1238 #else
1239                         int myErr;
1240
1241                         cnid = (cnid_t)file_ids[i];
1242
1243                         while (cnid >= kRootDirID) {
1244                             /* get the vnode for this cnid */
1245                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1246                             if ( myErr ) {
1247                                 access[i] = EACCES;
1248                                 break;
1249                             }
1250
1251                             cnid = VTOC(vp)->c_parentcnid;
1252
1253                             hfs_unlock(VTOC(vp));
1254                             if (vnode_vtype(vp) == VDIR) {
1255                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1256                             } else {
1257                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1258                             }
1259                             vnode_put(vp);
1260                             access[i] = myErr;
1261                             if (myErr) {
1262                                 break;
1263                             }
1264                         }
1265 #endif
1266                 }
1267
1268                 /* copyout the access array */
1269                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1270                                      num_files * sizeof (short)))) {
1271                         goto err_exit_bulk_access;
1272                 }
1273
1274         err_exit_bulk_access:
1275
1276                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1277
1278                 release_pathbuff((char *) cache.acache);
1279                 release_pathbuff((char *) cache.haveaccess);
1280                 release_pathbuff((char *) file_ids);
1281                 release_pathbuff((char *) access);
1282
1283                 return (error);
1284         } /* HFS_BULKACCESS */
1285
1286         case HFS_SETACLSTATE: {
1287                 int state;
1288
1289                 if (ap->a_data == NULL) {
1290                         return (EINVAL);
1291                 }
1292
1293                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1294                 state = *(int *)ap->a_data;
1295
1296                 // super-user can enable or disable acl's on a volume.
1297                 // the volume owner can only enable acl's
1298                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1299                         return (EPERM);
1300                 }
1301                 if (state == 0 || state == 1)
1302                         return hfs_setextendedsecurity(hfsmp, state);
1303                 else
1304                         return (EINVAL);
1305         }
1306
1307         case F_FULLFSYNC: {
1308                 int error;
1309
1310                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1311                 if (error == 0) {
1312                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1313                         hfs_unlock(VTOC(vp));
1314                 }
1315
1316                 return error;
1317         }
1318
1319         case F_CHKCLEAN: {
1320                 register struct cnode *cp;
1321                 int error;
1322
1323                 if (!vnode_isreg(vp))
1324                         return EINVAL;
1325
1326                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1327                 if (error == 0) {
1328                         cp = VTOC(vp);
1329                         /*
1330                          * used by regression test to determine if
1331                          * all the dirty pages (via write) have been cleaned
1332                          * after a call to 'fsysnc'.
1333                          */
1334                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1335                         hfs_unlock(cp);
1336                 }
1337                 return (error);
1338         }
1339
1340         case F_RDADVISE: {
1341                 register struct radvisory *ra;
1342                 struct filefork *fp;
1343                 int error;
1344
1345                 if (!vnode_isreg(vp))
1346                         return EINVAL;
1347
1348                 ra = (struct radvisory *)(ap->a_data);
1349                 fp = VTOF(vp);
1350
1351                 /* Protect against a size change. */
1352                 hfs_lock_truncate(VTOC(vp), TRUE);
1353
1354                 if (ra->ra_offset >= fp->ff_size) {
1355                         error = EFBIG;
1356                 } else {
1357                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1358                 }
1359
1360                 hfs_unlock_truncate(VTOC(vp));
1361                 return (error);
1362         }
1363
1364         case F_READBOOTSTRAP:
1365         case F_WRITEBOOTSTRAP:
1366         {
1367             struct vnode *devvp = NULL;
1368             user_fbootstraptransfer_t *user_bootstrapp;
1369             int devBlockSize;
1370             int error;
1371             uio_t auio;
1372             daddr64_t blockNumber;
1373             u_long blockOffset;
1374             u_long xfersize;
1375             struct buf *bp;
1376             user_fbootstraptransfer_t user_bootstrap;
1377
1378                 if (!vnode_isvroot(vp))
1379                         return (EINVAL);
1380                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1381                  * to a user_fbootstraptransfer_t else we get a pointer to a
1382                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1383                  */
1384                 if (is64bit) {
1385                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1386                 }
1387                 else {
1388                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1389                         user_bootstrapp = &user_bootstrap;
1390                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1391                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1392                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1393                 }
1394                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1395                         return EINVAL;
1396
1397             devvp = VTOHFS(vp)->hfs_devvp;
1398                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1399                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1400                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1401                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1402
1403             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1404
1405             while (uio_resid(auio) > 0) {
1406                         blockNumber = uio_offset(auio) / devBlockSize;
1407                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1408                         if (error) {
1409                                 if (bp) buf_brelse(bp);
1410                                 uio_free(auio);
1411                                 return error;
1412                         };
1413
1414                         blockOffset = uio_offset(auio) % devBlockSize;
1415                         xfersize = devBlockSize - blockOffset;
1416                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1417                         if (error) {
1418                                 buf_brelse(bp);
1419                                 uio_free(auio);
1420                                 return error;
1421                         };
1422                         if (uio_rw(auio) == UIO_WRITE) {
1423                                 error = VNOP_BWRITE(bp);
1424                                 if (error) {
1425                                         uio_free(auio);
1426                         return error;
1427                                 }
1428                         } else {
1429                                 buf_brelse(bp);
1430                         };
1431                 };
1432                 uio_free(auio);
1433         };
1434         return 0;
1435
1436         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1437         {
1438                 if (is64bit) {
1439                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1440                 }
1441                 else {
1442                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1443                 }
1444                 return 0;
1445         }
1446
1447         case HFS_GET_MOUNT_TIME:
1448             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1449             break;
1450
1451         case HFS_GET_LAST_MTIME:
1452             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1453             break;
1454
1455         case HFS_SET_BOOT_INFO:
1456                 if (!vnode_isvroot(vp))
1457                         return(EINVAL);
1458                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1459                         return(EACCES); /* must be superuser or owner of filesystem */
1460                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1461                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1462                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1463                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1464                 break;
1465
1466         case HFS_GET_BOOT_INFO:
1467                 if (!vnode_isvroot(vp))
1468                         return(EINVAL);
1469                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1470                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1471                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1472                 break;
1473
1474         default:
1475                 return (ENOTTY);
1476         }
1477
1478     /* Should never get here */
1479         return 0;
1480 }
1481
1482 /*
1483  * select
1484  */
1485 int
1486 hfs_vnop_select(__unused struct vnop_select_args *ap)
1487 /*
1488         struct vnop_select_args {
1489                 vnode_t a_vp;
1490                 int  a_which;
1491                 int  a_fflags;
1492                 void *a_wql;
1493                 vfs_context_t a_context;
1494         };
1495 */
1496 {
1497         /*
1498          * We should really check to see if I/O is possible.
1499          */
1500         return (1);
1501 }
1502
1503 /*
1504  * Converts a logical block number to a physical block, and optionally returns
1505  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1506  * The physical block number is based on the device block size, currently its 512.
1507  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1508  */
1509 int
1510 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1511 {
1512         struct cnode *cp = VTOC(vp);
1513         struct filefork *fp = VTOF(vp);
1514         struct hfsmount *hfsmp = VTOHFS(vp);
1515         int  retval = E_NONE;
1516         daddr_t  logBlockSize;
1517         size_t  bytesContAvail = 0;
1518         off_t  blockposition;
1519         int lockExtBtree;
1520         int lockflags = 0;
1521
1522         /*
1523          * Check for underlying vnode requests and ensure that logical
1524          * to physical mapping is requested.
1525          */
1526         if (vpp != NULL)
1527                 *vpp = cp->c_devvp;
1528         if (bnp == NULL)
1529                 return (0);
1530
1531         logBlockSize = GetLogicalBlockSize(vp);
1532         blockposition = (off_t)bn * (off_t)logBlockSize;
1533
1534         lockExtBtree = overflow_extents(fp);
1535
1536         if (lockExtBtree)
1537                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1538
1539         retval = MacToVFSError(
1540                             MapFileBlockC (HFSTOVCB(hfsmp),
1541                                             (FCB*)fp,
1542                                             MAXPHYSIO,
1543                                             blockposition,
1544                                             bnp,
1545                                             &bytesContAvail));
1546
1547         if (lockExtBtree)
1548                 hfs_systemfile_unlock(hfsmp, lockflags);
1549
1550         if (retval == E_NONE) {
1551                 /* Figure out how many read ahead blocks there are */
1552                 if (runp != NULL) {
1553                         if (can_cluster(logBlockSize)) {
1554                                 /* Make sure this result never goes negative: */
1555                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1556                         } else {
1557                                 *runp = 0;
1558                         }
1559                 }
1560         }
1561         return (retval);
1562 }
1563
1564 /*
1565  * Convert logical block number to file offset.
1566  */
1567 int
1568 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1569 /*
1570         struct vnop_blktooff_args {
1571                 vnode_t a_vp;
1572                 daddr64_t a_lblkno;
1573                 off_t *a_offset;
1574         };
1575 */
1576 {
1577         if (ap->a_vp == NULL)
1578                 return (EINVAL);
1579         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1580
1581         return(0);
1582 }
1583
1584 /*
1585  * Convert file offset to logical block number.
1586  */
1587 int
1588 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1589 /*
1590         struct vnop_offtoblk_args {
1591                 vnode_t a_vp;
1592                 off_t a_offset;
1593                 daddr64_t *a_lblkno;
1594         };
1595 */
1596 {
1597         if (ap->a_vp == NULL)
1598                 return (EINVAL);
1599         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1600
1601         return(0);
1602 }
1603
1604 /*
1605  * Map file offset to physical block number.
1606  *
1607  * System file cnodes are expected to be locked (shared or exclusive).
1608  */
1609 int
1610 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1611 /*
1612         struct vnop_blockmap_args {
1613                 vnode_t a_vp;
1614                 off_t a_foffset;
1615                 size_t a_size;
1616                 daddr64_t *a_bpn;
1617                 size_t *a_run;
1618                 void *a_poff;
1619                 int a_flags;
1620                 vfs_context_t a_context;
1621         };
1622 */
1623 {
1624         struct vnode *vp = ap->a_vp;
1625         struct cnode *cp;
1626         struct filefork *fp;
1627         struct hfsmount *hfsmp;
1628         size_t bytesContAvail = 0;
1629         int retval = E_NONE;
1630         int syslocks = 0;
1631         int lockflags = 0;
1632         struct rl_entry *invalid_range;
1633         enum rl_overlaptype overlaptype;
1634         int started_tr = 0;
1635         int tooklock = 0;
1636
1637         /* Do not allow blockmap operation on a directory */
1638         if (vnode_isdir(vp)) {
1639                 return (ENOTSUP);
1640         }
1641
1642         /*
1643          * Check for underlying vnode requests and ensure that logical
1644          * to physical mapping is requested.
1645          */
1646         if (ap->a_bpn == NULL)
1647                 return (0);
1648
1649         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1650                 if (VTOC(vp)->c_lockowner != current_thread()) {
1651                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1652                         tooklock = 1;
1653                 } else {
1654                         cp = VTOC(vp);
1655                         panic("blockmap: %s cnode lock already held!\n",
1656                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1657                 }
1658         }
1659         hfsmp = VTOHFS(vp);
1660         cp = VTOC(vp);
1661         fp = VTOF(vp);
1662
1663 retry:
1664         if (fp->ff_unallocblocks) {
1665                 if (hfs_start_transaction(hfsmp) != 0) {
1666                         retval = EINVAL;
1667                         goto exit;
1668                 } else {
1669                         started_tr = 1;
1670                 }
1671                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1672
1673         } else if (overflow_extents(fp)) {
1674                 syslocks = SFL_EXTENTS;
1675         }
1676
1677         if (syslocks)
1678                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1679
1680         /*
1681          * Check for any delayed allocations.
1682          */
1683         if (fp->ff_unallocblocks) {
1684                 SInt64 actbytes;
1685                 u_int32_t loanedBlocks;
1686
1687                 //
1688                 // Make sure we have a transaction.  It's possible
1689                 // that we came in and fp->ff_unallocblocks was zero
1690                 // but during the time we blocked acquiring the extents
1691                 // btree, ff_unallocblocks became non-zero and so we
1692                 // will need to start a transaction.
1693                 //
1694                 if (started_tr == 0) {
1695                         if (syslocks) {
1696                                 hfs_systemfile_unlock(hfsmp, lockflags);
1697                                 syslocks = 0;
1698                         }
1699                         goto retry;
1700                 }
1701
1702                 /*
1703                  * Note: ExtendFileC will Release any blocks on loan and
1704                  * aquire real blocks.  So we ask to extend by zero bytes
1705                  * since ExtendFileC will account for the virtual blocks.
1706                  */
1707
1708                 loanedBlocks = fp->ff_unallocblocks;
1709                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1710                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1711
1712                 if (retval) {
1713                         fp->ff_unallocblocks = loanedBlocks;
1714                         cp->c_blocks += loanedBlocks;
1715                         fp->ff_blocks += loanedBlocks;
1716
1717                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1718                         hfsmp->loanedBlocks += loanedBlocks;
1719                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1720                 }
1721
1722                 if (retval) {
1723                         hfs_systemfile_unlock(hfsmp, lockflags);
1724                         cp->c_flag |= C_MODIFIED;
1725                         if (started_tr) {
1726                                 (void) hfs_update(vp, TRUE);
1727                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1728
1729                                 hfs_end_transaction(hfsmp);
1730                         }
1731                         goto exit;
1732                 }
1733         }
1734
1735         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1736                                ap->a_bpn, &bytesContAvail);
1737         if (syslocks) {
1738                 hfs_systemfile_unlock(hfsmp, lockflags);
1739                 syslocks = 0;
1740         }
1741
1742         if (started_tr) {
1743                 (void) hfs_update(vp, TRUE);
1744                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1745                 hfs_end_transaction(hfsmp);
1746                 started_tr = 0;
1747         }
1748         if (retval) {
1749                 goto exit;
1750         }
1751
1752         /* Adjust the mapping information for invalid file ranges: */
1753         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1754                               ap->a_foffset + (off_t)bytesContAvail - 1,
1755                               &invalid_range);
1756         if (overlaptype != RL_NOOVERLAP) {
1757                 switch(overlaptype) {
1758                 case RL_MATCHINGOVERLAP:
1759                 case RL_OVERLAPCONTAINSRANGE:
1760                 case RL_OVERLAPSTARTSBEFORE:
1761                         /* There's no valid block for this byte offset: */
1762                         *ap->a_bpn = (daddr64_t)-1;
1763                         /* There's no point limiting the amount to be returned
1764                          * if the invalid range that was hit extends all the way
1765                          * to the EOF (i.e. there's no valid bytes between the
1766                          * end of this range and the file's EOF):
1767                          */
1768                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1769                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1770                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1771                         }
1772                         break;
1773
1774                 case RL_OVERLAPISCONTAINED:
1775                 case RL_OVERLAPENDSAFTER:
1776                         /* The range of interest hits an invalid block before the end: */
1777                         if (invalid_range->rl_start == ap->a_foffset) {
1778                                 /* There's actually no valid information to be had starting here: */
1779                                 *ap->a_bpn = (daddr64_t)-1;
1780                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1781                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1782                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1783                                 }
1784                         } else {
1785                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1786                         }
1787                         break;
1788
1789                 case RL_NOOVERLAP:
1790                         break;
1791                 } /* end switch */
1792                 if (bytesContAvail > ap->a_size)
1793                         bytesContAvail = ap->a_size;
1794         }
1795         if (ap->a_run)
1796                 *ap->a_run = bytesContAvail;
1797
1798         if (ap->a_poff)
1799                 *(int *)ap->a_poff = 0;
1800 exit:
1801         if (tooklock)
1802                 hfs_unlock(cp);
1803
1804         return (MacToVFSError(retval));
1805 }
1806
1807
1808 /*
1809  * prepare and issue the I/O
1810  * buf_strategy knows how to deal
1811  * with requests that require
1812  * fragmented I/Os
1813  */
1814 int
1815 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1816 {
1817         buf_t   bp = ap->a_bp;
1818         vnode_t vp = buf_vnode(bp);
1819         struct cnode *cp = VTOC(vp);
1820
1821         return (buf_strategy(cp->c_devvp, ap));
1822 }
1823
1824
1825 static int
1826 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1827 {
1828         register struct cnode *cp = VTOC(vp);
1829         struct filefork *fp = VTOF(vp);
1830         struct proc *p = vfs_context_proc(context);;
1831         kauth_cred_t cred = vfs_context_ucred(context);
1832         int retval;
1833         off_t bytesToAdd;
1834         off_t actualBytesAdded;
1835         off_t filebytes;
1836         u_int64_t old_filesize;
1837         u_long fileblocks;
1838         int blksize;
1839         struct hfsmount *hfsmp;
1840         int lockflags;
1841
1842         blksize = VTOVCB(vp)->blockSize;
1843         fileblocks = fp->ff_blocks;
1844         filebytes = (off_t)fileblocks * (off_t)blksize;
1845         old_filesize = fp->ff_size;
1846
1847         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1848                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1849
1850         if (length < 0)
1851                 return (EINVAL);
1852
1853         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1854                 return (EFBIG);
1855
1856         hfsmp = VTOHFS(vp);
1857
1858         retval = E_NONE;
1859
1860         /* Files that are changing size are not hot file candidates. */
1861         if (hfsmp->hfc_stage == HFC_RECORDING) {
1862                 fp->ff_bytesread = 0;
1863         }
1864
1865         /*
1866          * We cannot just check if fp->ff_size == length (as an optimization)
1867          * since there may be extra physical blocks that also need truncation.
1868          */
1869 #if QUOTA
1870         if ((retval = hfs_getinoquota(cp)))
1871                 return(retval);
1872 #endif /* QUOTA */
1873
1874         /*
1875          * Lengthen the size of the file. We must ensure that the
1876          * last byte of the file is allocated. Since the smallest
1877          * value of ff_size is 0, length will be at least 1.
1878          */
1879         if (length > (off_t)fp->ff_size) {
1880 #if QUOTA
1881                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1882                                    cred, 0);
1883                 if (retval)
1884                         goto Err_Exit;
1885 #endif /* QUOTA */
1886                 /*
1887                  * If we don't have enough physical space then
1888                  * we need to extend the physical size.
1889                  */
1890                 if (length > filebytes) {
1891                         int eflags;
1892                         u_long blockHint = 0;
1893
1894                         /* All or nothing and don't round up to clumpsize. */
1895                         eflags = kEFAllMask | kEFNoClumpMask;
1896
1897                         if (cred && suser(cred, NULL) != 0)
1898                                 eflags |= kEFReserveMask;  /* keep a reserve */
1899
1900                         /*
1901                          * Allocate Journal and Quota files in metadata zone.
1902                          */
1903                         if (filebytes == 0 &&
1904                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1905                             hfs_virtualmetafile(cp)) {
1906                                 eflags |= kEFMetadataMask;
1907                                 blockHint = hfsmp->hfs_metazone_start;
1908                         }
1909                         if (hfs_start_transaction(hfsmp) != 0) {
1910                             retval = EINVAL;
1911                             goto Err_Exit;
1912                         }
1913
1914                         /* Protect extents b-tree and allocation bitmap */
1915                         lockflags = SFL_BITMAP;
1916                         if (overflow_extents(fp))
1917                                 lockflags |= SFL_EXTENTS;
1918                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1919
1920                         while ((length > filebytes) && (retval == E_NONE)) {
1921                                 bytesToAdd = length - filebytes;
1922                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1923                                                     (FCB*)fp,
1924                                                     bytesToAdd,
1925                                                     blockHint,
1926                                                     eflags,
1927                                                     &actualBytesAdded));
1928
1929                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1930                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1931                                         if (length > filebytes)
1932                                                 length = filebytes;
1933                                         break;
1934                                 }
1935                         } /* endwhile */
1936
1937                         hfs_systemfile_unlock(hfsmp, lockflags);
1938
1939                         if (hfsmp->jnl) {
1940                             (void) hfs_update(vp, TRUE);
1941                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1942                         }
1943
1944                         hfs_end_transaction(hfsmp);
1945
1946                         if (retval)
1947                                 goto Err_Exit;
1948
1949                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1950                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1951                 }
1952
1953                 if (!(flags & IO_NOZEROFILL)) {
1954                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1955                                 struct rl_entry *invalid_range;
1956                                 off_t zero_limit;
1957
1958                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1959                                 if (length < zero_limit) zero_limit = length;
1960
1961                                 if (length > (off_t)fp->ff_size) {
1962                                         struct timeval tv;
1963
1964                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1965                                         if ((fp->ff_size & PAGE_MASK_64) &&
1966                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1967                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1968
1969                                                 /* There's some valid data at the start of the (current) last page
1970                                                    of the file, so zero out the remainder of that page to ensure the
1971                                                    entire page contains valid data.  Since there is no invalid range
1972                                                    possible past the (current) eof, there's no need to remove anything
1973                                                    from the invalid range list before calling cluster_write():  */
1974                                                 hfs_unlock(cp);
1975                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1976                                                                 fp->ff_size, (off_t)0,
1977                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1978                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1979                                                 if (retval) goto Err_Exit;
1980
1981                                                 /* Merely invalidate the remaining area, if necessary: */
1982                                                 if (length > zero_limit) {
1983                                                         microuptime(&tv);
1984                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1985                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1986                                                 }
1987                                         } else {
1988                                         /* The page containing the (current) eof is invalid: just add the
1989                                            remainder of the page to the invalid list, along with the area
1990                                            being newly allocated:
1991                                          */
1992                                         microuptime(&tv);
1993                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1994                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1995                                         };
1996                                 }
1997                         } else {
1998                                         panic("hfs_truncate: invoked on non-UBC object?!");
1999                         };
2000                 }
2001                 cp->c_touch_modtime = TRUE;
2002                 fp->ff_size = length;
2003
2004                 /* Nested transactions will do their own ubc_setsize. */
2005                 if (!skipsetsize) {
2006                         /*
2007                          * ubc_setsize can cause a pagein here
2008                          * so we need to drop cnode lock.
2009                          */
2010                         hfs_unlock(cp);
2011                         ubc_setsize(vp, length);
2012                         hfs_lock(cp, HFS_FORCE_LOCK);
2013                 }
2014
2015         } else { /* Shorten the size of the file */
2016
2017                 if ((off_t)fp->ff_size > length) {
2018                         /*
2019                          * Any buffers that are past the truncation point need to be
2020                          * invalidated (to maintain buffer cache consistency).
2021                          */
2022
2023                          /* Nested transactions will do their own ubc_setsize. */
2024                          if (!skipsetsize) {
2025                                 /*
2026                                  * ubc_setsize can cause a pageout here
2027                                  * so we need to drop cnode lock.
2028                                  */
2029                                 hfs_unlock(cp);
2030                                 ubc_setsize(vp, length);
2031                                 hfs_lock(cp, HFS_FORCE_LOCK);
2032                         }
2033
2034                         /* Any space previously marked as invalid is now irrelevant: */
2035                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2036                 }
2037
2038                 /*
2039                  * Account for any unmapped blocks. Note that the new
2040                  * file length can still end up with unmapped blocks.
2041                  */
2042                 if (fp->ff_unallocblocks > 0) {
2043                         u_int32_t finalblks;
2044                         u_int32_t loanedBlocks;
2045
2046                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2047
2048                         loanedBlocks = fp->ff_unallocblocks;
2049                         cp->c_blocks -= loanedBlocks;
2050                         fp->ff_blocks -= loanedBlocks;
2051                         fp->ff_unallocblocks = 0;
2052
2053                         hfsmp->loanedBlocks -= loanedBlocks;
2054
2055                         finalblks = (length + blksize - 1) / blksize;
2056                         if (finalblks > fp->ff_blocks) {
2057                                 /* calculate required unmapped blocks */
2058                                 loanedBlocks = finalblks - fp->ff_blocks;
2059                                 hfsmp->loanedBlocks += loanedBlocks;
2060
2061                                 fp->ff_unallocblocks = loanedBlocks;
2062                                 cp->c_blocks += loanedBlocks;
2063                                 fp->ff_blocks += loanedBlocks;
2064                         }
2065                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2066                 }
2067
2068                 /*
2069                  * For a TBE process the deallocation of the file blocks is
2070                  * delayed until the file is closed.  And hfs_close calls
2071                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2072                  * isn't set, we make sure this isn't a TBE process.
2073                  */
2074                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2075 #if QUOTA
2076                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2077 #endif /* QUOTA */
2078                   if (hfs_start_transaction(hfsmp) != 0) {
2079                       retval = EINVAL;
2080                       goto Err_Exit;
2081                   }
2082
2083                         if (fp->ff_unallocblocks == 0) {
2084                                 /* Protect extents b-tree and allocation bitmap */
2085                                 lockflags = SFL_BITMAP;
2086                                 if (overflow_extents(fp))
2087                                         lockflags |= SFL_EXTENTS;
2088                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2089
2090                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2091                                                 (FCB*)fp, length, false));
2092
2093                                 hfs_systemfile_unlock(hfsmp, lockflags);
2094                         }
2095                         if (hfsmp->jnl) {
2096                                 if (retval == 0) {
2097                                         fp->ff_size = length;
2098                                 }
2099                                 (void) hfs_update(vp, TRUE);
2100                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2101                         }
2102
2103                         hfs_end_transaction(hfsmp);
2104
2105                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2106                         if (retval)
2107                                 goto Err_Exit;
2108 #if QUOTA
2109                         /* These are bytesreleased */
2110                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2111 #endif /* QUOTA */
2112                 }
2113                 /* Only set update flag if the logical length changes */
2114                 if (old_filesize != length)
2115                         cp->c_touch_modtime = TRUE;
2116                 fp->ff_size = length;
2117         }
2118         cp->c_touch_chgtime = TRUE;
2119         retval = hfs_update(vp, MNT_WAIT);
2120         if (retval) {
2121                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2122                      -1, -1, -1, retval, 0);
2123         }
2124
2125 Err_Exit:
2126
2127         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2128                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2129
2130         return (retval);
2131 }
2132
2133
2134
2135 /*
2136  * Truncate a cnode to at most length size, freeing (or adding) the
2137  * disk blocks.
2138  */
2139 __private_extern__
2140 int
2141 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2142              vfs_context_t context)
2143 {
2144         struct filefork *fp = VTOF(vp);
2145         off_t filebytes;
2146         u_long fileblocks;
2147         int blksize, error = 0;
2148         struct cnode *cp = VTOC(vp);
2149
2150         if (vnode_isdir(vp))
2151                 return (EISDIR);        /* cannot truncate an HFS directory! */
2152
2153         blksize = VTOVCB(vp)->blockSize;
2154         fileblocks = fp->ff_blocks;
2155         filebytes = (off_t)fileblocks * (off_t)blksize;
2156
2157         // have to loop truncating or growing files that are
2158         // really big because otherwise transactions can get
2159         // enormous and consume too many kernel resources.
2160
2161         if (length < filebytes) {
2162                 while (filebytes > length) {
2163                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2164                                 filebytes -= HFS_BIGFILE_SIZE;
2165                         } else {
2166                                 filebytes = length;
2167                         }
2168                         cp->c_flag |= C_FORCEUPDATE;
2169                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2170                         if (error)
2171                                 break;
2172                 }
2173         } else if (length > filebytes) {
2174                 while (filebytes < length) {
2175                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2176                                 filebytes += HFS_BIGFILE_SIZE;
2177                         } else {
2178                                 filebytes = length;
2179                         }
2180                         cp->c_flag |= C_FORCEUPDATE;
2181                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2182                         if (error)
2183                                 break;
2184                 }
2185         } else /* Same logical size */ {
2186
2187                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2188         }
2189         /* Files that are changing size are not hot file candidates. */
2190         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2191                 fp->ff_bytesread = 0;
2192         }
2193
2194         return (error);
2195 }
2196
2197
2198
2199 /*
2200  * Preallocate file storage space.
2201  */
2202 int
2203 hfs_vnop_allocate(struct vnop_allocate_args /* {
2204                 vnode_t a_vp;
2205                 off_t a_length;
2206                 u_int32_t  a_flags;
2207                 off_t *a_bytesallocated;
2208                 off_t a_offset;
2209                 vfs_context_t a_context;
2210         } */ *ap)
2211 {
2212         struct vnode *vp = ap->a_vp;
2213         struct cnode *cp;
2214         struct filefork *fp;
2215         ExtendedVCB *vcb;
2216         off_t length = ap->a_length;
2217         off_t startingPEOF;
2218         off_t moreBytesRequested;
2219         off_t actualBytesAdded;
2220         off_t filebytes;
2221         u_long fileblocks;
2222         int retval, retval2;
2223         UInt32 blockHint;
2224         UInt32 extendFlags;   /* For call to ExtendFileC */
2225         struct hfsmount *hfsmp;
2226         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2227         int lockflags;
2228
2229         *(ap->a_bytesallocated) = 0;
2230
2231         if (!vnode_isreg(vp))
2232                 return (EISDIR);
2233         if (length < (off_t)0)
2234                 return (EINVAL);
2235
2236         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2237                 return (retval);
2238         cp = VTOC(vp);
2239         fp = VTOF(vp);
2240         hfsmp = VTOHFS(vp);
2241         vcb = VTOVCB(vp);
2242
2243         fileblocks = fp->ff_blocks;
2244         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2245
2246         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2247                 retval = EINVAL;
2248                 goto Err_Exit;
2249         }
2250
2251         /* Fill in the flags word for the call to Extend the file */
2252
2253         extendFlags = kEFNoClumpMask;
2254         if (ap->a_flags & ALLOCATECONTIG)
2255                 extendFlags |= kEFContigMask;
2256         if (ap->a_flags & ALLOCATEALL)
2257                 extendFlags |= kEFAllMask;
2258         if (cred && suser(cred, NULL) != 0)
2259                 extendFlags |= kEFReserveMask;
2260
2261         retval = E_NONE;
2262         blockHint = 0;
2263         startingPEOF = filebytes;
2264
2265         if (ap->a_flags & ALLOCATEFROMPEOF)
2266                 length += filebytes;
2267         else if (ap->a_flags & ALLOCATEFROMVOL)
2268                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2269
2270         /* If no changes are necesary, then we're done */
2271         if (filebytes == length)
2272                 goto Std_Exit;
2273
2274         /*
2275          * Lengthen the size of the file. We must ensure that the
2276          * last byte of the file is allocated. Since the smallest
2277          * value of filebytes is 0, length will be at least 1.
2278          */
2279         if (length > filebytes) {
2280                 moreBytesRequested = length - filebytes;
2281
2282 #if QUOTA
2283                 retval = hfs_chkdq(cp,
2284                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2285                                 cred, 0);
2286                 if (retval)
2287                         goto Err_Exit;
2288
2289 #endif /* QUOTA */
2290                 /*
2291                  * Metadata zone checks.
2292                  */
2293                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2294                         /*
2295                          * Allocate Journal and Quota files in metadata zone.
2296                          */
2297                         if (hfs_virtualmetafile(cp)) {
2298                                 extendFlags |= kEFMetadataMask;
2299                                 blockHint = hfsmp->hfs_metazone_start;
2300                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2301                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2302                                 /*
2303                                  * Move blockHint outside metadata zone.
2304                                  */
2305                                 blockHint = hfsmp->hfs_metazone_end + 1;
2306                         }
2307                 }
2308
2309                 if (hfs_start_transaction(hfsmp) != 0) {
2310                     retval = EINVAL;
2311                     goto Err_Exit;
2312                 }
2313
2314                 /* Protect extents b-tree and allocation bitmap */
2315                 lockflags = SFL_BITMAP;
2316                 if (overflow_extents(fp))
2317                         lockflags |= SFL_EXTENTS;
2318                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2319
2320                 retval = MacToVFSError(ExtendFileC(vcb,
2321                                                 (FCB*)fp,
2322                                                 moreBytesRequested,
2323                                                 blockHint,
2324                                                 extendFlags,
2325                                                 &actualBytesAdded));
2326
2327                 *(ap->a_bytesallocated) = actualBytesAdded;
2328                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2329
2330                 hfs_systemfile_unlock(hfsmp, lockflags);
2331
2332                 if (hfsmp->jnl) {
2333                         (void) hfs_update(vp, TRUE);
2334                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2335                 }
2336
2337                 hfs_end_transaction(hfsmp);
2338
2339                 /*
2340                  * if we get an error and no changes were made then exit
2341                  * otherwise we must do the hfs_update to reflect the changes
2342                  */
2343                 if (retval && (startingPEOF == filebytes))
2344                         goto Err_Exit;
2345
2346                 /*
2347                  * Adjust actualBytesAdded to be allocation block aligned, not
2348                  * clump size aligned.
2349                  * NOTE: So what we are reporting does not affect reality
2350                  * until the file is closed, when we truncate the file to allocation
2351                  * block size.
2352                  */
2353                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2354                         *(ap->a_bytesallocated) =
2355                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2356
2357         } else { /* Shorten the size of the file */
2358
2359                 if (fp->ff_size > length) {
2360                         /*
2361                          * Any buffers that are past the truncation point need to be
2362                          * invalidated (to maintain buffer cache consistency).
2363                          */
2364                 }
2365
2366                 if (hfs_start_transaction(hfsmp) != 0) {
2367                     retval = EINVAL;
2368                     goto Err_Exit;
2369                 }
2370
2371                 /* Protect extents b-tree and allocation bitmap */
2372                 lockflags = SFL_BITMAP;
2373                 if (overflow_extents(fp))
2374                         lockflags |= SFL_EXTENTS;
2375                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2376
2377                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2378
2379                 hfs_systemfile_unlock(hfsmp, lockflags);
2380
2381                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2382
2383                 if (hfsmp->jnl) {
2384                         (void) hfs_update(vp, TRUE);
2385                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2386                 }
2387
2388                 hfs_end_transaction(hfsmp);
2389
2390
2391                 /*
2392                  * if we get an error and no changes were made then exit
2393                  * otherwise we must do the hfs_update to reflect the changes
2394                  */
2395                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2396 #if QUOTA
2397                 /* These are  bytesreleased */
2398                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2399 #endif /* QUOTA */
2400
2401                 if (fp->ff_size > filebytes) {
2402                         fp->ff_size = filebytes;
2403
2404                         hfs_unlock(cp);
2405                         ubc_setsize(vp, fp->ff_size);
2406                         hfs_lock(cp, HFS_FORCE_LOCK);
2407                 }
2408         }
2409
2410 Std_Exit:
2411         cp->c_touch_chgtime = TRUE;
2412         cp->c_touch_modtime = TRUE;
2413         retval2 = hfs_update(vp, MNT_WAIT);
2414
2415         if (retval == 0)
2416                 retval = retval2;
2417 Err_Exit:
2418         hfs_unlock(cp);
2419         return (retval);
2420 }
2421
2422
2423 /*
2424  * Pagein for HFS filesystem
2425  */
2426 int
2427 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2428 /*
2429         struct vnop_pagein_args {
2430                 vnode_t a_vp,
2431                 upl_t         a_pl,
2432                 vm_offset_t   a_pl_offset,
2433                 off_t         a_f_offset,
2434                 size_t        a_size,
2435                 int           a_flags
2436                 vfs_context_t a_context;
2437         };
2438 */
2439 {
2440         vnode_t vp = ap->a_vp;
2441         int error;
2442
2443         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2444                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2445         /*
2446          * Keep track of blocks read.
2447          */
2448         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2449                 struct cnode *cp;
2450                 struct filefork *fp;
2451                 int bytesread;
2452                 int took_cnode_lock = 0;
2453
2454                 cp = VTOC(vp);
2455                 fp = VTOF(vp);
2456
2457                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2458                         bytesread = fp->ff_size;
2459                 else
2460                         bytesread = ap->a_size;
2461
2462                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2463                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2464                         hfs_lock(cp, HFS_FORCE_LOCK);
2465                         took_cnode_lock = 1;
2466                 }
2467                 /*
2468                  * If this file hasn't been seen since the start of
2469                  * the current sampling period then start over.
2470                  */
2471                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2472                         struct timeval tv;
2473
2474                         fp->ff_bytesread = bytesread;
2475                         microtime(&tv);
2476                         cp->c_atime = tv.tv_sec;
2477                 } else {
2478                         fp->ff_bytesread += bytesread;
2479                 }
2480                 cp->c_touch_acctime = TRUE;
2481                 if (took_cnode_lock)
2482                         hfs_unlock(cp);
2483         }
2484         return (error);
2485 }
2486
2487 /*
2488  * Pageout for HFS filesystem.
2489  */
2490 int
2491 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2492 /*
2493         struct vnop_pageout_args {
2494            vnode_t a_vp,
2495            upl_t         a_pl,
2496            vm_offset_t   a_pl_offset,
2497            off_t         a_f_offset,
2498            size_t        a_size,
2499            int           a_flags
2500            vfs_context_t a_context;
2501         };
2502 */
2503 {
2504         vnode_t vp = ap->a_vp;
2505         struct cnode *cp;
2506         struct filefork *fp;
2507         int retval;
2508         off_t end_of_range;
2509         off_t filesize;
2510
2511         cp = VTOC(vp);
2512         if (cp->c_lockowner == current_thread()) {
2513                 panic("pageout: %s cnode lock already held!\n",
2514                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2515         }
2516         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2517                 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2518                         ubc_upl_abort_range(ap->a_pl,
2519                                             ap->a_pl_offset,
2520                                             ap->a_size,
2521                                             UPL_ABORT_FREE_ON_EMPTY);
2522                 }
2523                 return (retval);
2524         }
2525         fp = VTOF(vp);
2526
2527         filesize = fp->ff_size;
2528         end_of_range = ap->a_f_offset + ap->a_size - 1;
2529
2530         if (end_of_range >= filesize) {
2531                 end_of_range = (off_t)(filesize - 1);
2532         }
2533         if (ap->a_f_offset < filesize) {
2534                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2535                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2536         }
2537         hfs_unlock(cp);
2538
2539         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2540                                  ap->a_size, filesize, ap->a_flags);
2541
2542         /*
2543          * If data was written, and setuid or setgid bits are set and
2544          * this process is not the superuser then clear the setuid and
2545          * setgid bits as a precaution against tampering.
2546          */
2547         if ((retval == 0) &&
2548             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2549             (vfs_context_suser(ap->a_context) != 0)) {
2550                 hfs_lock(cp, HFS_FORCE_LOCK);
2551                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2552                 cp->c_touch_chgtime = TRUE;
2553                 hfs_unlock(cp);
2554         }
2555         return (retval);
2556 }
2557
2558 /*
2559  * Intercept B-Tree node writes to unswap them if necessary.
2560  */
2561 int
2562 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2563 {
2564         int retval = 0;
2565         register struct buf *bp = ap->a_bp;
2566         register struct vnode *vp = buf_vnode(bp);
2567         BlockDescriptor block;
2568
2569         /* Trap B-Tree writes */
2570         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2571             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2572             (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
2573             (vp == VTOHFS(vp)->hfc_filevp)) {
2574
2575                 /*
2576                  * Swap and validate the node if it is in native byte order.
2577                  * This is always be true on big endian, so we always validate
2578                  * before writing here.  On little endian, the node typically has
2579                  * been swapped and validatated when it was written to the journal,
2580                  * so we won't do anything here.
2581                  */
2582                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2583                         /* Prepare the block pointer */
2584                         block.blockHeader = bp;
2585                         block.buffer = (char *)buf_dataptr(bp);
2586                         block.blockNum = buf_lblkno(bp);
2587                         /* not found in cache ==> came from disk */
2588                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2589                         block.blockSize = buf_count(bp);
2590
2591                         /* Endian un-swap B-Tree node */
2592                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2593                         if (retval)
2594                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2595                 }
2596         }
2597
2598         /* This buffer shouldn't be locked anymore but if it is clear it */
2599         if ((buf_flags(bp) & B_LOCKED)) {
2600                 // XXXdbg
2601                 if (VTOHFS(vp)->jnl) {
2602                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2603                 }
2604                 buf_clearflags(bp, B_LOCKED);
2605         }
2606         retval = vn_bwrite (ap);
2607
2608         return (retval);
2609 }
2610
2611 /*
2612  * Relocate a file to a new location on disk
2613  *  cnode must be locked on entry
2614  *
2615  * Relocation occurs by cloning the file's data from its
2616  * current set of blocks to a new set of blocks. During
2617  * the relocation all of the blocks (old and new) are
2618  * owned by the file.
2619  *
2620  * -----------------
2621  * |///////////////|
2622  * -----------------
2623  * 0               N (file offset)
2624  *
2625  * -----------------     -----------------
2626  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2627  * -----------------     -----------------
2628  * 0               N     N+1             2N
2629  *
2630  * -----------------     -----------------
2631  * |///////////////|     |///////////////|     STEP 2 (clone data)
2632  * -----------------     -----------------
2633  * 0               N     N+1             2N
2634  *
2635  *                       -----------------
2636  *                       |///////////////|     STEP 3 (head truncate blocks)
2637  *                       -----------------
2638  *                       0               N
2639  *
2640  * During steps 2 and 3 page-outs to file offsets less
2641  * than or equal to N are suspended.
2642  *
2643  * During step 3 page-ins to the file get supended.
2644  */
2645 __private_extern__
2646 int
2647 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2648         struct  proc *p)
2649 {
2650         struct  cnode *cp;
2651         struct  filefork *fp;
2652         struct  hfsmount *hfsmp;
2653         u_int32_t  headblks;
2654         u_int32_t  datablks;
2655         u_int32_t  blksize;
2656         u_int32_t  growsize;
2657         u_int32_t  nextallocsave;
2658         daddr64_t  sector_a,  sector_b;
2659         int disabled_caching = 0;
2660         int eflags;
2661         off_t  newbytes;
2662         int  retval;
2663         int lockflags = 0;
2664         int took_trunc_lock = 0;
2665         int started_tr = 0;
2666         enum vtype vnodetype;
2667
2668         vnodetype = vnode_vtype(vp);
2669         if (vnodetype != VREG && vnodetype != VLNK) {
2670                 return (EPERM);
2671         }
2672
2673         hfsmp = VTOHFS(vp);
2674         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2675                 return (ENOSPC);
2676         }
2677
2678         cp = VTOC(vp);
2679         fp = VTOF(vp);
2680         if (fp->ff_unallocblocks)
2681                 return (EINVAL);
2682         blksize = hfsmp->blockSize;
2683         if (blockHint == 0)
2684                 blockHint = hfsmp->nextAllocation;
2685
2686         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2687             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2688                 return (EFBIG);
2689         }
2690
2691         //
2692         // We do not believe that this call to hfs_fsync() is
2693         // necessary and it causes a journal transaction
2694         // deadlock so we are removing it.
2695         //
2696         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2697         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2698         //      if (retval)
2699         //              return (retval);
2700         //}
2701
2702         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2703                 hfs_unlock(cp);
2704                 hfs_lock_truncate(cp, TRUE);
2705                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2706                         hfs_unlock_truncate(cp);
2707                         return (retval);
2708                 }
2709                 took_trunc_lock = 1;
2710         }
2711         headblks = fp->ff_blocks;
2712         datablks = howmany(fp->ff_size, blksize);
2713         growsize = datablks * blksize;
2714         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2715         if (blockHint >= hfsmp->hfs_metazone_start &&
2716             blockHint <= hfsmp->hfs_metazone_end)
2717                 eflags |= kEFMetadataMask;
2718
2719         if (hfs_start_transaction(hfsmp) != 0) {
2720                 if (took_trunc_lock)
2721                         hfs_unlock_truncate(cp);
2722             return (EINVAL);
2723         }
2724         started_tr = 1;
2725         /*
2726          * Protect the extents b-tree and the allocation bitmap
2727          * during MapFileBlockC and ExtendFileC operations.
2728          */
2729         lockflags = SFL_BITMAP;
2730         if (overflow_extents(fp))
2731                 lockflags |= SFL_EXTENTS;
2732         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2733
2734         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2735         if (retval) {
2736                 retval = MacToVFSError(retval);
2737                 goto out;
2738         }
2739
2740         /*
2741          * STEP 1 - aquire new allocation blocks.
2742          */
2743         if (!vnode_isnocache(vp)) {
2744                 vnode_setnocache(vp);
2745                 disabled_caching = 1;
2746
2747         }
2748         nextallocsave = hfsmp->nextAllocation;
2749         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2750         if (eflags & kEFMetadataMask) {
2751                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2752                 hfsmp->nextAllocation = nextallocsave;
2753                 hfsmp->vcbFlags |= 0xFF00;
2754                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2755         }
2756
2757         retval = MacToVFSError(retval);
2758         if (retval == 0) {
2759                 cp->c_flag |= C_MODIFIED;
2760                 if (newbytes < growsize) {
2761                         retval = ENOSPC;
2762                         goto restore;
2763                 } else if (fp->ff_blocks < (headblks + datablks)) {
2764                         printf("hfs_relocate: allocation failed");
2765                         retval = ENOSPC;
2766                         goto restore;
2767                 }
2768
2769                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2770                 if (retval) {
2771                         retval = MacToVFSError(retval);
2772                 } else if ((sector_a + 1) == sector_b) {
2773                         retval = ENOSPC;
2774                         goto restore;
2775                 } else if ((eflags & kEFMetadataMask) &&
2776                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2777                               hfsmp->hfs_metazone_end)) {
2778                         printf("hfs_relocate: didn't move into metadata zone\n");
2779                         retval = ENOSPC;
2780                         goto restore;
2781                 }
2782         }
2783         /* Done with system locks and journal for now. */
2784         hfs_systemfile_unlock(hfsmp, lockflags);
2785         lockflags = 0;
2786         hfs_end_transaction(hfsmp);
2787         started_tr = 0;
2788
2789         if (retval) {
2790                 /*
2791                  * Check to see if failure is due to excessive fragmentation.
2792                  */
2793                 if ((retval == ENOSPC) &&
2794                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2795                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2796                 }
2797                 goto out;
2798         }
2799         /*
2800          * STEP 2 - clone file data into the new allocation blocks.
2801          */
2802
2803         if (vnodetype == VLNK)
2804                 retval = hfs_clonelink(vp, blksize, cred, p);
2805         else if (vnode_issystem(vp))
2806                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2807         else
2808                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2809
2810         /* Start transaction for step 3 or for a restore. */
2811         if (hfs_start_transaction(hfsmp) != 0) {
2812                 retval = EINVAL;
2813                 goto out;
2814         }
2815         started_tr = 1;
2816         if (retval)
2817                 goto restore;
2818
2819         /*
2820          * STEP 3 - switch to cloned data and remove old blocks.
2821          */
2822         lockflags = SFL_BITMAP;
2823         if (overflow_extents(fp))
2824                 lockflags |= SFL_EXTENTS;
2825         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2826
2827         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2828
2829         hfs_systemfile_unlock(hfsmp, lockflags);
2830         lockflags = 0;
2831         if (retval)
2832                 goto restore;
2833 out:
2834         if (took_trunc_lock)
2835                 hfs_unlock_truncate(cp);
2836
2837         if (lockflags) {
2838                 hfs_systemfile_unlock(hfsmp, lockflags);
2839                 lockflags = 0;
2840         }
2841
2842         /* Push cnode's new extent data to disk. */
2843         if (retval == 0) {
2844                 (void) hfs_update(vp, MNT_WAIT);
2845         }
2846
2847         if (hfsmp->jnl) {
2848                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2849                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2850                 else
2851                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2852         }
2853 exit:
2854         if (disabled_caching) {
2855                 vnode_clearnocache(vp);
2856         }
2857         if (started_tr)
2858                 hfs_end_transaction(hfsmp);
2859
2860         return (retval);
2861
2862 restore:
2863         if (fp->ff_blocks == headblks)
2864                 goto exit;
2865         /*
2866          * Give back any newly allocated space.
2867          */
2868         if (lockflags == 0) {
2869                 lockflags = SFL_BITMAP;
2870                 if (overflow_extents(fp))
2871                         lockflags |= SFL_EXTENTS;
2872                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2873         }
2874
2875         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2876
2877         hfs_systemfile_unlock(hfsmp, lockflags);
2878         lockflags = 0;
2879
2880         if (took_trunc_lock)
2881                 hfs_unlock_truncate(cp);
2882         goto exit;
2883 }
2884
2885
2886 /*
2887  * Clone a symlink.
2888  *
2889  */
2890 static int
2891 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2892 {
2893         struct buf *head_bp = NULL;
2894         struct buf *tail_bp = NULL;
2895         int error;
2896
2897
2898         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2899         if (error)
2900                 goto out;
2901
2902         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2903         if (tail_bp == NULL) {
2904                 error = EIO;
2905                 goto out;
2906         }
2907         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2908         error = (int)buf_bwrite(tail_bp);
2909 out:
2910         if (head_bp) {
2911                 buf_markinvalid(head_bp);
2912                 buf_brelse(head_bp);
2913         }
2914         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2915
2916         return (error);
2917 }
2918
2919 /*
2920  * Clone a file's data within the file.
2921  *
2922  */
2923 static int
2924 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2925 {
2926         caddr_t  bufp;
2927         size_t  writebase;
2928         size_t  bufsize;
2929         size_t  copysize;
2930         size_t  iosize;
2931         off_t   filesize;
2932         size_t  offset;
2933         uio_t auio;
2934         int  error = 0;
2935
2936         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2937         writebase = blkstart * blksize;
2938         copysize = blkcnt * blksize;
2939         iosize = bufsize = MIN(copysize, 128 * 1024);
2940         offset = 0;
2941
2942         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2943                 return (ENOMEM);
2944         }
2945         hfs_unlock(VTOC(vp));
2946
2947         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2948
2949         while (offset < copysize) {
2950                 iosize = MIN(copysize - offset, iosize);
2951
2952                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2953                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2954
2955                 error = cluster_read(vp, auio, copysize, 0);
2956                 if (error) {
2957                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2958                         break;
2959                 }
2960                 if (uio_resid(auio) != 0) {
2961                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2962                         error = EIO;
2963                         break;
2964                 }
2965
2966                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2967                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2968
2969                 error = cluster_write(vp, auio, filesize + offset,
2970                                       filesize + offset + iosize,
2971                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2972                 if (error) {
2973                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2974                         break;
2975                 }
2976                 if (uio_resid(auio) != 0) {
2977                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2978                         error = EIO;
2979                         break;
2980                 }
2981                 offset += iosize;
2982         }
2983         uio_free(auio);
2984
2985         /*
2986          * No need to call ubc_sync_range or hfs_invalbuf
2987          * since the file was copied using IO_NOCACHE.
2988          */
2989
2990         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2991
2992         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2993         return (error);
2994 }
2995
2996 /*
2997  * Clone a system (metadata) file.
2998  *
2999  */
3000 static int
3001 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3002                  kauth_cred_t cred, struct proc *p)
3003 {
3004         caddr_t  bufp;
3005         char * offset;
3006         size_t  bufsize;
3007         size_t  iosize;
3008         struct buf *bp = NULL;
3009         daddr64_t  blkno;
3010         daddr64_t  blk;
3011         daddr64_t  start_blk;
3012         daddr64_t  last_blk;
3013         int  breadcnt;
3014         int  i;
3015         int  error = 0;
3016
3017
3018         iosize = GetLogicalBlockSize(vp);
3019         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3020         breadcnt = bufsize / iosize;
3021
3022         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3023                 return (ENOMEM);
3024         }
3025         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3026         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3027         blkno = 0;
3028
3029         while (blkno < last_blk) {
3030                 /*
3031                  * Read up to a megabyte
3032                  */
3033                 offset = bufp;
3034                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3035                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3036                         if (error) {
3037                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3038                                 goto out;
3039                         }
3040                         if (buf_count(bp) != iosize) {
3041                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3042                                 goto out;
3043                         }
3044                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3045
3046                         buf_markinvalid(bp);
3047                         buf_brelse(bp);
3048                         bp = NULL;
3049
3050                         offset += iosize;
3051                 }
3052
3053                 /*
3054                  * Write up to a megabyte
3055                  */
3056                 offset = bufp;
3057                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3058                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3059                         if (bp == NULL) {
3060                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3061                                 error = EIO;
3062                                 goto out;
3063                         }
3064                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3065                         error = (int)buf_bwrite(bp);
3066                         bp = NULL;
3067                         if (error)
3068                                 goto out;
3069                         offset += iosize;
3070                 }
3071         }
3072 out:
3073         if (bp) {
3074                 buf_brelse(bp);
3075         }
3076
3077         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3078
3079         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3080
3081         return (error);
3082 }