bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_LICENSE_HEADER_START@
   5  *
   6  * The contents of this file constitute Original Code as defined in and
   7  * are subject to the Apple Public Source License Version 1.1 (the
   8  * "License").  You may not use this file except in compliance with the
   9  * License.  Please obtain a copy of the License at
  10  * http://www.apple.com/publicsource and read it before using this file.
  11  *
  12  * This Original Code and all software distributed under the License are
  13  * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  14  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  15  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  16  * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
  17  * License for the specific language governing rights and limitations
  18  * under the License.
  19  *
  20  * @APPLE_LICENSE_HEADER_END@
  21  */
  22 /*      @(#)hfs_readwrite.c     1.0
  23  *
  24  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  25  *
  26  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  27  *
  28  */
  29
  30 #include <sys/param.h>
  31 #include <sys/systm.h>
  32 #include <sys/resourcevar.h>
  33 #include <sys/kernel.h>
  34 #include <sys/fcntl.h>
  35 #include <sys/filedesc.h>
  36 #include <sys/stat.h>
  37 #include <sys/buf.h>
  38 #include <sys/proc.h>
  39 #include <sys/kauth.h>
  40 #include <sys/vnode.h>
  41 #include <sys/uio.h>
  42 #include <sys/vfs_context.h>
  43 #include <sys/disk.h>
  44 #include <sys/sysctl.h>
  45
  46 #include <miscfs/specfs/specdev.h>
  47
  48 #include <sys/ubc.h>
  49 #include <vm/vm_pageout.h>
  50 #include <vm/vm_kern.h>
  51
  52 #include <sys/kdebug.h>
  53
  54 #include        "hfs.h"
  55 #include        "hfs_endian.h"
  56 #include  "hfs_fsctl.h"
  57 #include        "hfs_quota.h"
  58 #include        "hfscommon/headers/FileMgrInternal.h"
  59 #include        "hfscommon/headers/BTreesInternal.h"
  60 #include        "hfs_cnode.h"
  61 #include        "hfs_dbg.h"
  62
  63 extern int overflow_extents(struct filefork *fp);
  64
  65 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  66
  67 enum {
  68         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  69 };
  70
  71 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  72
  73 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  74
  75
  76 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  77 static int  hfs_clonefile(struct vnode *, int, int, int);
  78 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  79
  80
  81 int flush_cache_on_write = 0;
  82 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
  83
  84
  85 /*****************************************************************************
  86 *
  87 *       I/O Operations on vnodes
  88 *
  89 *****************************************************************************/
  90 int  hfs_vnop_read(struct vnop_read_args *);
  91 int  hfs_vnop_write(struct vnop_write_args *);
  92 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  93 int  hfs_vnop_select(struct vnop_select_args *);
  94 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  95 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  96 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  97 int  hfs_vnop_strategy(struct vnop_strategy_args *);
  98 int  hfs_vnop_allocate(struct vnop_allocate_args *);
  99 int  hfs_vnop_pagein(struct vnop_pagein_args *);
 100 int  hfs_vnop_pageout(struct vnop_pageout_args *);
 101 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
 102
 103
 104 /*
 105  * Read data from a file.
 106  */
 107 int
 108 hfs_vnop_read(struct vnop_read_args *ap)
 109 {
 110         uio_t uio = ap->a_uio;
 111         struct vnode *vp = ap->a_vp;
 112         struct cnode *cp;
 113         struct filefork *fp;
 114         struct hfsmount *hfsmp;
 115         off_t filesize;
 116         off_t filebytes;
 117         off_t start_resid = uio_resid(uio);
 118         off_t offset = uio_offset(uio);
 119         int retval = 0;
 120
 121
 122         /* Preflight checks */
 123         if (!vnode_isreg(vp)) {
 124                 /* can only read regular files */
 125                 if (vnode_isdir(vp))
 126                         return (EISDIR);
 127                 else
 128                         return (EPERM);
 129         }
 130         if (start_resid == 0)
 131                 return (0);             /* Nothing left to do */
 132         if (offset < 0)
 133                 return (EINVAL);        /* cant read from a negative offset */
 134
 135         cp = VTOC(vp);
 136         fp = VTOF(vp);
 137         hfsmp = VTOHFS(vp);
 138
 139         /* Protect against a size change. */
 140         hfs_lock_truncate(cp, 0);
 141
 142         filesize = fp->ff_size;
 143         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 144         if (offset > filesize) {
 145                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 146                     (offset > (off_t)MAXHFSFILESIZE)) {
 147                         retval = EFBIG;
 148                 }
 149                 goto exit;
 150         }
 151
 152         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 153                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 154
 155         retval = cluster_read(vp, uio, filesize, 0);
 156
 157         cp->c_touch_acctime = TRUE;
 158
 159         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 160                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 161
 162         /*
 163          * Keep track blocks read
 164          */
 165         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 166                 int took_cnode_lock = 0;
 167                 off_t bytesread;
 168
 169                 bytesread = start_resid - uio_resid(uio);
 170
 171                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 172                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 173                         hfs_lock(cp, HFS_FORCE_LOCK);
 174                         took_cnode_lock = 1;
 175                 }
 176                 /*
 177                  * If this file hasn't been seen since the start of
 178                  * the current sampling period then start over.
 179                  */
 180                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 181                         struct timeval tv;
 182
 183                         fp->ff_bytesread = bytesread;
 184                         microtime(&tv);
 185                         cp->c_atime = tv.tv_sec;
 186                 } else {
 187                         fp->ff_bytesread += bytesread;
 188                 }
 189                 if (took_cnode_lock)
 190                         hfs_unlock(cp);
 191         }
 192 exit:
 193         hfs_unlock_truncate(cp);
 194         return (retval);
 195 }
 196
 197 /*
 198  * Write data to a file.
 199  */
 200 int
 201 hfs_vnop_write(struct vnop_write_args *ap)
 202 {
 203         uio_t uio = ap->a_uio;
 204         struct vnode *vp = ap->a_vp;
 205         struct cnode *cp;
 206         struct filefork *fp;
 207         struct hfsmount *hfsmp;
 208         kauth_cred_t cred = NULL;
 209         off_t origFileSize;
 210         off_t writelimit;
 211         off_t bytesToAdd;
 212         off_t actualBytesAdded;
 213         off_t filebytes;
 214         off_t offset;
 215         size_t resid;
 216         int eflags;
 217         int ioflag = ap->a_ioflag;
 218         int retval = 0;
 219         int lockflags;
 220         int cnode_locked = 0;
 221
 222         // LP64todo - fix this! uio_resid may be 64-bit value
 223         resid = uio_resid(uio);
 224         offset = uio_offset(uio);
 225
 226         if (offset < 0)
 227                 return (EINVAL);
 228         if (resid == 0)
 229                 return (E_NONE);
 230         if (!vnode_isreg(vp))
 231                 return (EPERM);  /* Can only write regular files */
 232
 233         /* Protect against a size change. */
 234         hfs_lock_truncate(VTOC(vp), TRUE);
 235
 236         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 237                 hfs_unlock_truncate(VTOC(vp));
 238                 return (retval);
 239         }
 240         cnode_locked = 1;
 241         cp = VTOC(vp);
 242         fp = VTOF(vp);
 243         hfsmp = VTOHFS(vp);
 244         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 245
 246         if (ioflag & IO_APPEND) {
 247                 uio_setoffset(uio, fp->ff_size);
 248                 offset = fp->ff_size;
 249         }
 250         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 251                 retval = EPERM;
 252                 goto exit;
 253         }
 254
 255         origFileSize = fp->ff_size;
 256         eflags = kEFDeferMask;  /* defer file block allocations */
 257
 258 #ifdef HFS_SPARSE_DEV
 259         /*
 260          * When the underlying device is sparse and space
 261          * is low (< 8MB), stop doing delayed allocations
 262          * and begin doing synchronous I/O.
 263          */
 264         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 265             (hfs_freeblks(hfsmp, 0) < 2048)) {
 266                 eflags &= ~kEFDeferMask;
 267                 ioflag |= IO_SYNC;
 268         }
 269 #endif /* HFS_SPARSE_DEV */
 270
 271         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 272                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 273
 274         /* Now test if we need to extend the file */
 275         /* Doing so will adjust the filebytes for us */
 276
 277         writelimit = offset + resid;
 278         if (writelimit <= filebytes)
 279                 goto sizeok;
 280
 281         cred = vfs_context_ucred(ap->a_context);
 282 #if QUOTA
 283         bytesToAdd = writelimit - filebytes;
 284         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 285                            cred, 0);
 286         if (retval)
 287                 goto exit;
 288 #endif /* QUOTA */
 289
 290         if (hfs_start_transaction(hfsmp) != 0) {
 291                 retval = EINVAL;
 292                 goto exit;
 293         }
 294
 295         while (writelimit > filebytes) {
 296                 bytesToAdd = writelimit - filebytes;
 297                 if (cred && suser(cred, NULL) != 0)
 298                         eflags |= kEFReserveMask;
 299
 300                 /* Protect extents b-tree and allocation bitmap */
 301                 lockflags = SFL_BITMAP;
 302                 if (overflow_extents(fp))
 303                         lockflags |= SFL_EXTENTS;
 304                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 305
 306                 /* Files that are changing size are not hot file candidates. */
 307                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 308                         fp->ff_bytesread = 0;
 309                 }
 310                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 311                                 0, eflags, &actualBytesAdded));
 312
 313                 hfs_systemfile_unlock(hfsmp, lockflags);
 314
 315                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 316                         retval = ENOSPC;
 317                 if (retval != E_NONE)
 318                         break;
 319                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 320                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 321                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 322         }
 323         (void) hfs_update(vp, TRUE);
 324         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 325         (void) hfs_end_transaction(hfsmp);
 326
 327 sizeok:
 328         if (retval == E_NONE) {
 329                 off_t filesize;
 330                 off_t zero_off;
 331                 off_t tail_off;
 332                 off_t inval_start;
 333                 off_t inval_end;
 334                 off_t io_start;
 335                 int lflag;
 336                 struct rl_entry *invalid_range;
 337
 338                 if (writelimit > fp->ff_size)
 339                         filesize = writelimit;
 340                 else
 341                         filesize = fp->ff_size;
 342
 343                 lflag = (ioflag & IO_SYNC);
 344
 345                 if (offset <= fp->ff_size) {
 346                         zero_off = offset & ~PAGE_MASK_64;
 347
 348                         /* Check to see whether the area between the zero_offset and the start
 349                            of the transfer to see whether is invalid and should be zero-filled
 350                            as part of the transfer:
 351                          */
 352                         if (offset > zero_off) {
 353                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 354                                         lflag |= IO_HEADZEROFILL;
 355                         }
 356                 } else {
 357                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 358
 359                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 360                            read without being zeroed.  The current last block is filled with zeroes
 361                            if it holds valid data but in all cases merely do a little bookkeeping
 362                            to track the area from the end of the current last page to the start of
 363                            the area actually written.  For the same reason only the bytes up to the
 364                            start of the page where this write will start is invalidated; any remainder
 365                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 366
 367                            Note that inval_start, the start of the page after the current EOF,
 368                            may be past the start of the write, in which case the zeroing
 369                            will be handled by the cluser_write of the actual data.
 370                          */
 371                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 372                         inval_end = offset & ~PAGE_MASK_64;
 373                         zero_off = fp->ff_size;
 374
 375                         if ((fp->ff_size & PAGE_MASK_64) &&
 376                                 (rl_scan(&fp->ff_invalidranges,
 377                                                         eof_page_base,
 378                                                         fp->ff_size - 1,
 379                                                         &invalid_range) != RL_NOOVERLAP)) {
 380                                 /* The page containing the EOF is not valid, so the
 381                                    entire page must be made inaccessible now.  If the write
 382                                    starts on a page beyond the page containing the eof
 383                                    (inval_end > eof_page_base), add the
 384                                    whole page to the range to be invalidated.  Otherwise
 385                                    (i.e. if the write starts on the same page), zero-fill
 386                                    the entire page explicitly now:
 387                                  */
 388                                 if (inval_end > eof_page_base) {
 389                                         inval_start = eof_page_base;
 390                                 } else {
 391                                         zero_off = eof_page_base;
 392                                 };
 393                         };
 394
 395                         if (inval_start < inval_end) {
 396                                 struct timeval tv;
 397                                 /* There's some range of data that's going to be marked invalid */
 398
 399                                 if (zero_off < inval_start) {
 400                                         /* The pages between inval_start and inval_end are going to be invalidated,
 401                                            and the actual write will start on a page past inval_end.  Now's the last
 402                                            chance to zero-fill the page containing the EOF:
 403                                          */
 404                                         hfs_unlock(cp);
 405                                         cnode_locked = 0;
 406                                         retval = cluster_write(vp, (uio_t) 0,
 407                                                         fp->ff_size, inval_start,
 408                                                         zero_off, (off_t)0,
 409                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 410                                         hfs_lock(cp, HFS_FORCE_LOCK);
 411                                         cnode_locked = 1;
 412                                         if (retval) goto ioerr_exit;
 413                                         offset = uio_offset(uio);
 414                                 };
 415
 416                                 /* Mark the remaining area of the newly allocated space as invalid: */
 417                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 418                                 microuptime(&tv);
 419                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 420                                 zero_off = fp->ff_size = inval_end;
 421                         };
 422
 423                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 424                 };
 425
 426                 /* Check to see whether the area between the end of the write and the end of
 427                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 428                  */
 429                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 430                 if (tail_off > filesize) tail_off = filesize;
 431                 if (tail_off > writelimit) {
 432                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 433                                 lflag |= IO_TAILZEROFILL;
 434                         };
 435                 };
 436
 437                 /*
 438                  * if the write starts beyond the current EOF (possibly advanced in the
 439                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 440                  * to where the write begins:
 441                  *
 442                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 443                  *       before the current EOF it might be marked as invalid now and must be
 444                  *       made readable (removed from the invalid ranges) before cluster_write
 445                  *       tries to write it:
 446                  */
 447                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 448                 if (io_start < fp->ff_size) {
 449                         off_t io_end;
 450
 451                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 452                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 453                 };
 454
 455                 hfs_unlock(cp);
 456                 cnode_locked = 0;
 457                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 458                                 tail_off, lflag | IO_NOZERODIRTY);
 459                 offset = uio_offset(uio);
 460                 if (offset > fp->ff_size) {
 461                         fp->ff_size = offset;
 462
 463                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 464                         /* Files that are changing size are not hot file candidates. */
 465                         if (hfsmp->hfc_stage == HFC_RECORDING)
 466                                 fp->ff_bytesread = 0;
 467                 }
 468                 if (resid > uio_resid(uio)) {
 469                         cp->c_touch_chgtime = TRUE;
 470                         cp->c_touch_modtime = TRUE;
 471                 }
 472         }
 473
 474         // XXXdbg - testing for vivek and paul lambert
 475         {
 476             if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
 477                 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
 478             }
 479         }
 480         HFS_KNOTE(vp, NOTE_WRITE);
 481
 482 ioerr_exit:
 483         /*
 484          * If we successfully wrote any data, and we are not the superuser
 485          * we clear the setuid and setgid bits as a precaution against
 486          * tampering.
 487          */
 488         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 489                 cred = vfs_context_ucred(ap->a_context);
 490                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 491                         if (!cnode_locked) {
 492                                 hfs_lock(cp, HFS_FORCE_LOCK);
 493                                 cnode_locked = 1;
 494                         }
 495                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 496                 }
 497         }
 498         if (retval) {
 499                 if (ioflag & IO_UNIT) {
 500                         if (!cnode_locked) {
 501                                 hfs_lock(cp, HFS_FORCE_LOCK);
 502                                 cnode_locked = 1;
 503                         }
 504                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 505                                            0, ap->a_context);
 506                         // LP64todo - fix this!  resid needs to by user_ssize_t
 507                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 508                         uio_setresid(uio, resid);
 509                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 510                 }
 511         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 512                 if (!cnode_locked) {
 513                         hfs_lock(cp, HFS_FORCE_LOCK);
 514                         cnode_locked = 1;
 515                 }
 516                 retval = hfs_update(vp, TRUE);
 517         }
 518         /* Updating vcbWrCnt doesn't need to be atomic. */
 519         hfsmp->vcbWrCnt++;
 520
 521         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 522                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 523 exit:
 524         if (cnode_locked)
 525                 hfs_unlock(cp);
 526         hfs_unlock_truncate(cp);
 527         return (retval);
 528 }
 529
 530 /* support for the "bulk-access" fcntl */
 531
 532 #define CACHE_ELEMS 64
 533 #define CACHE_LEVELS 16
 534 #define PARENT_IDS_FLAG 0x100
 535
 536 /* from hfs_attrlist.c */
 537 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 538                         mode_t obj_mode, struct mount *mp,
 539                         kauth_cred_t cred, struct proc *p);
 540
 541 /* from vfs/vfs_fsevents.c */
 542 extern char *get_pathbuff(void);
 543 extern void release_pathbuff(char *buff);
 544
 545 struct access_cache {
 546        int numcached;
 547        int cachehits; /* these two for statistics gathering */
 548        int lookups;
 549        unsigned int *acache;
 550        Boolean *haveaccess;
 551 };
 552
 553 struct access_t {
 554         uid_t     uid;              /* IN: effective user id */
 555         short     flags;            /* IN: access requested (i.e. R_OK) */
 556         short     num_groups;       /* IN: number of groups user belongs to */
 557         int       num_files;        /* IN: number of files to process */
 558         int       *file_ids;        /* IN: array of file ids */
 559         gid_t     *groups;          /* IN: array of groups */
 560         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 561 };
 562
 563 struct user_access_t {
 564         uid_t           uid;                    /* IN: effective user id */
 565         short           flags;                  /* IN: access requested (i.e. R_OK) */
 566         short           num_groups;             /* IN: number of groups user belongs to */
 567         int                     num_files;              /* IN: number of files to process */
 568         user_addr_t     file_ids;               /* IN: array of file ids */
 569         user_addr_t     groups;                 /* IN: array of groups */
 570         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 571 };
 572
 573 /*
 574  * Perform a binary search for the given parent_id. Return value is
 575  * found/not found boolean, and indexp will be the index of the item
 576  * or the index at which to insert the item if it's not found.
 577  */
 578 static int
 579 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 580 {
 581         unsigned int lo, hi;
 582         int index, matches = 0;
 583
 584         if (cache->numcached == 0) {
 585                 *indexp = 0;
 586                 return 0; // table is empty, so insert at index=0 and report no match
 587         }
 588
 589         if (cache->numcached > CACHE_ELEMS) {
 590                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 591                   cache->numcached, CACHE_ELEMS);*/
 592                 cache->numcached = CACHE_ELEMS;
 593         }
 594
 595         lo = 0;
 596         hi = cache->numcached - 1;
 597         index = -1;
 598
 599         /* perform binary search for parent_id */
 600         do {
 601                 unsigned int mid = (hi - lo)/2 + lo;
 602                 unsigned int this_id = cache->acache[mid];
 603
 604                 if (parent_id == this_id) {
 605                         index = mid;
 606                         break;
 607                 }
 608
 609                 if (parent_id < this_id) {
 610                         hi = mid;
 611                         continue;
 612                 }
 613
 614                 if (parent_id > this_id) {
 615                         lo = mid + 1;
 616                         continue;
 617                 }
 618         } while(lo < hi);
 619
 620         /* check if lo and hi converged on the match */
 621         if (parent_id == cache->acache[hi]) {
 622                 index = hi;
 623         }
 624
 625         /* if no existing entry found, find index for new one */
 626         if (index == -1) {
 627                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 628                 matches = 0;
 629         } else {
 630                 matches = 1;
 631         }
 632
 633         *indexp = index;
 634         return matches;
 635 }
 636
 637 /*
 638  * Add a node to the access_cache at the given index (or do a lookup first
 639  * to find the index if -1 is passed in). We currently do a replace rather
 640  * than an insert if the cache is full.
 641  */
 642 static void
 643 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 644 {
 645        int lookup_index = -1;
 646
 647        /* need to do a lookup first if -1 passed for index */
 648        if (index == -1) {
 649                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 650                        if (cache->haveaccess[lookup_index] != access) {
 651                                /* change access info for existing entry... should never happen */
 652                                cache->haveaccess[lookup_index] = access;
 653                        }
 654
 655                        /* mission accomplished */
 656                        return;
 657                } else {
 658                        index = lookup_index;
 659                }
 660
 661        }
 662
 663        /* if the cache is full, do a replace rather than an insert */
 664        if (cache->numcached >= CACHE_ELEMS) {
 665                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 666                cache->numcached = CACHE_ELEMS-1;
 667
 668                if (index > cache->numcached) {
 669                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 670                        index = cache->numcached;
 671                }
 672        } else if (index >= 0 && index < cache->numcached) {
 673                /* only do bcopy if we're inserting */
 674                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 675                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 676        }
 677
 678        cache->acache[index] = nodeID;
 679        cache->haveaccess[index] = access;
 680        cache->numcached++;
 681 }
 682
 683
 684 struct cinfo {
 685         uid_t   uid;
 686         gid_t   gid;
 687         mode_t  mode;
 688         cnid_t  parentcnid;
 689 };
 690
 691 static int
 692 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 693 {
 694         struct cinfo *cip = (struct cinfo *)arg;
 695
 696         cip->uid = attrp->ca_uid;
 697         cip->gid = attrp->ca_gid;
 698         cip->mode = attrp->ca_mode;
 699         cip->parentcnid = descp->cd_parentcnid;
 700
 701         return (0);
 702 }
 703
 704 /*
 705  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 706  * isn't incore, then go to the catalog.
 707  */
 708 static int
 709 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 710                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 711 {
 712         int error = 0;
 713
 714         /* if this id matches the one the fsctl was called with, skip the lookup */
 715         if (cnid == skip_cp->c_cnid) {
 716                 cnattrp->ca_uid = skip_cp->c_uid;
 717                 cnattrp->ca_gid = skip_cp->c_gid;
 718                 cnattrp->ca_mode = skip_cp->c_mode;
 719                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 720         } else {
 721                 struct cinfo c_info;
 722
 723                 /* otherwise, check the cnode hash incase the file/dir is incore */
 724                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 725                         cnattrp->ca_uid = c_info.uid;
 726                         cnattrp->ca_gid = c_info.gid;
 727                         cnattrp->ca_mode = c_info.mode;
 728                         keyp->hfsPlus.parentID = c_info.parentcnid;
 729                 } else {
 730                         int lockflags;
 731
 732                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 733
 734                         /* lookup this cnid in the catalog */
 735                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 736
 737                         hfs_systemfile_unlock(hfsmp, lockflags);
 738
 739                         cache->lookups++;
 740                 }
 741         }
 742
 743         return (error);
 744 }
 745
 746 /*
 747  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 748  * up to CACHE_LEVELS as we progress towards the root.
 749  */
 750 static int
 751 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 752                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 753 {
 754        int                     myErr = 0;
 755        int                     myResult;
 756        HFSCatalogNodeID        thisNodeID;
 757        unsigned long           myPerms;
 758        struct cat_attr         cnattr;
 759        int                     cache_index = -1;
 760        CatalogKey              catkey;
 761
 762        int i = 0, ids_to_cache = 0;
 763        int parent_ids[CACHE_LEVELS];
 764
 765        /* root always has access */
 766        if (!suser(myp_ucred, NULL)) {
 767                return (1);
 768        }
 769
 770        thisNodeID = nodeID;
 771        while (thisNodeID >=  kRootDirID) {
 772                myResult = 0;   /* default to "no access" */
 773
 774                /* check the cache before resorting to hitting the catalog */
 775
 776                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 777                 * to look any further after hitting cached dir */
 778
 779                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 780                        cache->cachehits++;
 781                        myResult = cache->haveaccess[cache_index];
 782                        goto ExitThisRoutine;
 783                }
 784
 785                /* remember which parents we want to cache */
 786                if (ids_to_cache < CACHE_LEVELS) {
 787                        parent_ids[ids_to_cache] = thisNodeID;
 788                        ids_to_cache++;
 789                }
 790
 791                /* do the lookup (checks the cnode hash, then the catalog) */
 792                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 793                if (myErr) {
 794                        goto ExitThisRoutine; /* no access */
 795                }
 796
 797                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 798                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 799                                                  myp_ucred, theProcPtr);
 800
 801                if ( (myPerms & X_OK) == 0 ) {
 802                        myResult = 0;
 803                        goto ExitThisRoutine;   /* no access */
 804                }
 805
 806                /* up the hierarchy we go */
 807                thisNodeID = catkey.hfsPlus.parentID;
 808        }
 809
 810        /* if here, we have access to this node */
 811        myResult = 1;
 812
 813  ExitThisRoutine:
 814        if (myErr) {
 815                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 816                myResult = 0;
 817        }
 818        *err = myErr;
 819
 820        /* cache the parent directory(ies) */
 821        for (i = 0; i < ids_to_cache; i++) {
 822                /* small optimization: get rid of double-lookup for all these */
 823                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 824                add_node(cache, -1, parent_ids[i], myResult);
 825        }
 826
 827        return (myResult);
 828 }
 829 /* end "bulk-access" support */
 830
 831
 832
 833 /*
 834  * Callback for use with freeze ioctl.
 835  */
 836 static int
 837 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 838 {
 839         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 840
 841         return 0;
 842 }
 843
 844 /*
 845  * Control filesystem operating characteristics.
 846  */
 847 int
 848 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 849                 vnode_t a_vp;
 850                 int  a_command;
 851                 caddr_t  a_data;
 852                 int  a_fflag;
 853                 vfs_context_t a_context;
 854         } */ *ap)
 855 {
 856         struct vnode * vp = ap->a_vp;
 857         struct hfsmount *hfsmp = VTOHFS(vp);
 858         vfs_context_t context = ap->a_context;
 859         kauth_cred_t cred = vfs_context_ucred(context);
 860         proc_t p = vfs_context_proc(context);
 861         struct vfsstatfs *vfsp;
 862         boolean_t is64bit;
 863
 864         is64bit = proc_is64bit(p);
 865
 866         switch (ap->a_command) {
 867
 868         case HFS_RESIZE_VOLUME: {
 869                 u_int64_t newsize;
 870                 u_int64_t cursize;
 871
 872                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 873                 if (suser(cred, NULL) &&
 874                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 875                         return (EACCES); /* must be owner of file system */
 876                 }
 877                 if (!vnode_isvroot(vp)) {
 878                         return (EINVAL);
 879                 }
 880                 newsize = *(u_int64_t *)ap->a_data;
 881                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 882
 883                 if (newsize > cursize) {
 884                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 885                 } else if (newsize < cursize) {
 886                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 887                 } else {
 888                         return (0);
 889                 }
 890         }
 891         case HFS_CHANGE_NEXT_ALLOCATION: {
 892                 u_int32_t location;
 893
 894                 if (vnode_vfsisrdonly(vp)) {
 895                         return (EROFS);
 896                 }
 897                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 898                 if (suser(cred, NULL) &&
 899                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 900                         return (EACCES); /* must be owner of file system */
 901                 }
 902                 if (!vnode_isvroot(vp)) {
 903                         return (EINVAL);
 904                 }
 905                 location = *(u_int32_t *)ap->a_data;
 906                 if (location > hfsmp->totalBlocks - 1) {
 907                         return (EINVAL);
 908                 }
 909                 /* Return previous value. */
 910                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 911                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 912                 hfsmp->nextAllocation = location;
 913                 hfsmp->vcbFlags |= 0xFF00;
 914                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 915                 return (0);
 916         }
 917
 918 #ifdef HFS_SPARSE_DEV
 919         case HFS_SETBACKINGSTOREINFO: {
 920                 struct vnode * bsfs_rootvp;
 921                 struct vnode * di_vp;
 922                 struct hfs_backingstoreinfo *bsdata;
 923                 int error = 0;
 924
 925                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 926                         return (EALREADY);
 927                 }
 928                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 929                 if (suser(cred, NULL) &&
 930                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 931                         return (EACCES); /* must be owner of file system */
 932                 }
 933                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 934                 if (bsdata == NULL) {
 935                         return (EINVAL);
 936                 }
 937                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 938                         return (error);
 939                 }
 940                 if ((error = vnode_getwithref(di_vp))) {
 941                         file_drop(bsdata->backingfd);
 942                         return(error);
 943                 }
 944
 945                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 946                         (void)vnode_put(di_vp);
 947                         file_drop(bsdata->backingfd);
 948                         return (EINVAL);
 949                 }
 950
 951                 /*
 952                  * Obtain the backing fs root vnode and keep a reference
 953                  * on it.  This reference will be dropped in hfs_unmount.
 954                  */
 955                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 956                 if (error) {
 957                         (void)vnode_put(di_vp);
 958                         file_drop(bsdata->backingfd);
 959                         return (error);
 960                 }
 961                 vnode_ref(bsfs_rootvp);
 962                 vnode_put(bsfs_rootvp);
 963
 964                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 965                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 966                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 967                 hfsmp->hfs_sparsebandblks *= 4;
 968
 969                 (void)vnode_put(di_vp);
 970                 file_drop(bsdata->backingfd);
 971                 return (0);
 972         }
 973         case HFS_CLRBACKINGSTOREINFO: {
 974                 struct vnode * tmpvp;
 975
 976                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 977                 if (suser(cred, NULL) &&
 978                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 979                         return (EACCES); /* must be owner of file system */
 980                 }
 981                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 982                     hfsmp->hfs_backingfs_rootvp) {
 983
 984                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 985                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 986                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 987                         hfsmp->hfs_sparsebandblks = 0;
 988                         vnode_rele(tmpvp);
 989                 }
 990                 return (0);
 991         }
 992 #endif /* HFS_SPARSE_DEV */
 993
 994         case F_FREEZE_FS: {
 995                 struct mount *mp;
 996                 task_t task;
 997
 998                 if (!is_suser())
 999                         return (EACCES);
1000
1001                 mp = vnode_mount(vp);
1002                 hfsmp = VFSTOHFS(mp);
1003
1004                 if (!(hfsmp->jnl))
1005                         return (ENOTSUP);
1006
1007                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1008
1009                 task = current_task();
1010                 task_working_set_disable(task);
1011
1012                 // flush things before we get started to try and prevent
1013                 // dirty data from being paged out while we're frozen.
1014                 // note: can't do this after taking the lock as it will
1015                 // deadlock against ourselves.
1016                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1017                 hfs_global_exclusive_lock_acquire(hfsmp);
1018                 journal_flush(hfsmp->jnl);
1019
1020                 // don't need to iterate on all vnodes, we just need to
1021                 // wait for writes to the system files and the device vnode
1022                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1023                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1024                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1025                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1026                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1027                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1028                 if (hfsmp->hfs_attribute_vp)
1029                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1030                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1031
1032                 hfsmp->hfs_freezing_proc = current_proc();
1033
1034                 return (0);
1035         }
1036
1037         case F_THAW_FS: {
1038                 if (!is_suser())
1039                         return (EACCES);
1040
1041                 // if we're not the one who froze the fs then we
1042                 // can't thaw it.
1043                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1044                     return EPERM;
1045                 }
1046
1047                 // NOTE: if you add code here, also go check the
1048                 //       code that "thaws" the fs in hfs_vnop_close()
1049                 //
1050                 hfsmp->hfs_freezing_proc = NULL;
1051                 hfs_global_exclusive_lock_release(hfsmp);
1052                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1053
1054                 return (0);
1055         }
1056
1057 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1058 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1059
1060         case HFS_BULKACCESS_FSCTL:
1061         case HFS_BULKACCESS: {
1062                 /*
1063                  * NOTE: on entry, the vnode is locked. Incase this vnode
1064                  * happens to be in our list of file_ids, we'll note it
1065                  * avoid calling hfs_chashget_nowait() on that id as that
1066                  * will cause a "locking against myself" panic.
1067                  */
1068                 Boolean check_leaf = true;
1069
1070                 struct user_access_t *user_access_structp;
1071                 struct user_access_t tmp_user_access_t;
1072                 struct access_cache cache;
1073
1074                 int error = 0, i;
1075
1076                 dev_t dev = VTOC(vp)->c_dev;
1077
1078                 short flags;
1079                 struct ucred myucred;   /* XXX ILLEGAL */
1080                 int num_files;
1081                 int *file_ids = NULL;
1082                 short *access = NULL;
1083
1084                 cnid_t cnid;
1085                 cnid_t prevParent_cnid = 0;
1086                 unsigned long myPerms;
1087                 short myaccess = 0;
1088                 struct cat_attr cnattr;
1089                 CatalogKey catkey;
1090                 struct cnode *skip_cp = VTOC(vp);
1091                 struct vfs_context      my_context;
1092
1093                 /* first, return error if not run as root */
1094                 if (cred->cr_ruid != 0) {
1095                         return EPERM;
1096                 }
1097
1098                 /* initialize the local cache and buffers */
1099                 cache.numcached = 0;
1100                 cache.cachehits = 0;
1101                 cache.lookups = 0;
1102
1103                 file_ids = (int *) get_pathbuff();
1104                 access = (short *) get_pathbuff();
1105                 cache.acache = (int *) get_pathbuff();
1106                 cache.haveaccess = (Boolean *) get_pathbuff();
1107
1108                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1109                         release_pathbuff((char *) file_ids);
1110                         release_pathbuff((char *) access);
1111                         release_pathbuff((char *) cache.acache);
1112                         release_pathbuff((char *) cache.haveaccess);
1113
1114                         return ENOMEM;
1115                 }
1116
1117                 /* struct copyin done during dispatch... need to copy file_id array separately */
1118                 if (ap->a_data == NULL) {
1119                         error = EINVAL;
1120                         goto err_exit_bulk_access;
1121                 }
1122
1123                 if (is64bit) {
1124                         user_access_structp = (struct user_access_t *)ap->a_data;
1125                 }
1126                 else {
1127                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1128                         tmp_user_access_t.uid = accessp->uid;
1129                         tmp_user_access_t.flags = accessp->flags;
1130                         tmp_user_access_t.num_groups = accessp->num_groups;
1131                         tmp_user_access_t.num_files = accessp->num_files;
1132                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1133                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1134                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1135                         user_access_structp = &tmp_user_access_t;
1136                 }
1137
1138                 num_files = user_access_structp->num_files;
1139                 if (num_files < 1) {
1140                         goto err_exit_bulk_access;
1141                 }
1142                 if (num_files > 256) {
1143                         error = EINVAL;
1144                         goto err_exit_bulk_access;
1145                 }
1146
1147                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1148                                                         num_files * sizeof(int)))) {
1149                         goto err_exit_bulk_access;
1150                 }
1151
1152                 /* fill in the ucred structure */
1153                 flags = user_access_structp->flags;
1154                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1155                         flags = R_OK;
1156                 }
1157
1158                 /* check if we've been passed leaf node ids or parent ids */
1159                 if (flags & PARENT_IDS_FLAG) {
1160                         check_leaf = false;
1161                 }
1162
1163                 memset(&myucred, 0, sizeof(myucred));
1164                 myucred.cr_ref = 1;
1165                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1166                 myucred.cr_ngroups = user_access_structp->num_groups;
1167                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1168                         myucred.cr_ngroups = 0;
1169                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1170                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1171                         goto err_exit_bulk_access;
1172                 }
1173                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1174                 myucred.cr_gmuid = myucred.cr_uid;
1175
1176                 my_context.vc_proc = p;
1177                 my_context.vc_ucred = &myucred;
1178
1179                 /* Check access to each file_id passed in */
1180                 for (i = 0; i < num_files; i++) {
1181 #if 0
1182                         cnid = (cnid_t) file_ids[i];
1183
1184                         /* root always has access */
1185                         if (!suser(&myucred, NULL)) {
1186                                 access[i] = 0;
1187                                 continue;
1188                         }
1189
1190                         if (check_leaf) {
1191
1192                                 /* do the lookup (checks the cnode hash, then the catalog) */
1193                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1194                                 if (error) {
1195                                         access[i] = (short) error;
1196                                         continue;
1197                                 }
1198
1199                                 /* before calling CheckAccess(), check the target file for read access */
1200                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1201                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1202
1203
1204                                 /* fail fast if no access */
1205                                 if ((myPerms & flags) == 0) {
1206                                         access[i] = EACCES;
1207                                         continue;
1208                                 }
1209                         } else {
1210                                 /* we were passed an array of parent ids */
1211                                 catkey.hfsPlus.parentID = cnid;
1212                         }
1213
1214                         /* if the last guy had the same parent and had access, we're done */
1215                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1216                                 cache.cachehits++;
1217                                 access[i] = 0;
1218                                 continue;
1219                         }
1220
1221                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1222                                                    skip_cp, p, &myucred, dev);
1223
1224                         if ( myaccess ) {
1225                                 access[i] = 0; // have access.. no errors to report
1226                         } else {
1227                                 access[i] = (error != 0 ? (short) error : EACCES);
1228                         }
1229
1230                         prevParent_cnid = catkey.hfsPlus.parentID;
1231 #else
1232                         int myErr;
1233
1234                         cnid = (cnid_t)file_ids[i];
1235
1236                         while (cnid >= kRootDirID) {
1237                             /* get the vnode for this cnid */
1238                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1239                             if ( myErr ) {
1240                                 access[i] = EACCES;
1241                                 break;
1242                             }
1243
1244                             cnid = VTOC(vp)->c_parentcnid;
1245
1246                             hfs_unlock(VTOC(vp));
1247                             if (vnode_vtype(vp) == VDIR) {
1248                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1249                             } else {
1250                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1251                             }
1252                             vnode_put(vp);
1253                             access[i] = myErr;
1254                             if (myErr) {
1255                                 break;
1256                             }
1257                         }
1258 #endif
1259                 }
1260
1261                 /* copyout the access array */
1262                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1263                                      num_files * sizeof (short)))) {
1264                         goto err_exit_bulk_access;
1265                 }
1266
1267         err_exit_bulk_access:
1268
1269                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1270
1271                 release_pathbuff((char *) cache.acache);
1272                 release_pathbuff((char *) cache.haveaccess);
1273                 release_pathbuff((char *) file_ids);
1274                 release_pathbuff((char *) access);
1275
1276                 return (error);
1277         } /* HFS_BULKACCESS */
1278
1279         case HFS_SETACLSTATE: {
1280                 int state;
1281
1282                 if (ap->a_data == NULL) {
1283                         return (EINVAL);
1284                 }
1285
1286                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1287                 state = *(int *)ap->a_data;
1288
1289                 // super-user can enable or disable acl's on a volume.
1290                 // the volume owner can only enable acl's
1291                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1292                         return (EPERM);
1293                 }
1294                 if (state == 0 || state == 1)
1295                         return hfs_setextendedsecurity(hfsmp, state);
1296                 else
1297                         return (EINVAL);
1298         }
1299
1300         case F_FULLFSYNC: {
1301                 int error;
1302
1303                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1304                 if (error == 0) {
1305                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1306                         hfs_unlock(VTOC(vp));
1307                 }
1308
1309                 return error;
1310         }
1311
1312         case F_CHKCLEAN: {
1313                 register struct cnode *cp;
1314                 int error;
1315
1316                 if (!vnode_isreg(vp))
1317                         return EINVAL;
1318
1319                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1320                 if (error == 0) {
1321                         cp = VTOC(vp);
1322                         /*
1323                          * used by regression test to determine if
1324                          * all the dirty pages (via write) have been cleaned
1325                          * after a call to 'fsysnc'.
1326                          */
1327                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1328                         hfs_unlock(cp);
1329                 }
1330                 return (error);
1331         }
1332
1333         case F_RDADVISE: {
1334                 register struct radvisory *ra;
1335                 struct filefork *fp;
1336                 int error;
1337
1338                 if (!vnode_isreg(vp))
1339                         return EINVAL;
1340
1341                 ra = (struct radvisory *)(ap->a_data);
1342                 fp = VTOF(vp);
1343
1344                 /* Protect against a size change. */
1345                 hfs_lock_truncate(VTOC(vp), TRUE);
1346
1347                 if (ra->ra_offset >= fp->ff_size) {
1348                         error = EFBIG;
1349                 } else {
1350                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1351                 }
1352
1353                 hfs_unlock_truncate(VTOC(vp));
1354                 return (error);
1355         }
1356
1357         case F_READBOOTSTRAP:
1358         case F_WRITEBOOTSTRAP:
1359         {
1360             struct vnode *devvp = NULL;
1361             user_fbootstraptransfer_t *user_bootstrapp;
1362             int devBlockSize;
1363             int error;
1364             uio_t auio;
1365             daddr64_t blockNumber;
1366             u_long blockOffset;
1367             u_long xfersize;
1368             struct buf *bp;
1369             user_fbootstraptransfer_t user_bootstrap;
1370
1371                 if (!vnode_isvroot(vp))
1372                         return (EINVAL);
1373                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1374                  * to a user_fbootstraptransfer_t else we get a pointer to a
1375                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1376                  */
1377                 if (is64bit) {
1378                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1379                 }
1380                 else {
1381                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1382                         user_bootstrapp = &user_bootstrap;
1383                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1384                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1385                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1386                 }
1387                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1388                         return EINVAL;
1389
1390             devvp = VTOHFS(vp)->hfs_devvp;
1391                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1392                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1393                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1394                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1395
1396             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1397
1398             while (uio_resid(auio) > 0) {
1399                         blockNumber = uio_offset(auio) / devBlockSize;
1400                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1401                         if (error) {
1402                                 if (bp) buf_brelse(bp);
1403                                 uio_free(auio);
1404                                 return error;
1405                         };
1406
1407                         blockOffset = uio_offset(auio) % devBlockSize;
1408                         xfersize = devBlockSize - blockOffset;
1409                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1410                         if (error) {
1411                                 buf_brelse(bp);
1412                                 uio_free(auio);
1413                                 return error;
1414                         };
1415                         if (uio_rw(auio) == UIO_WRITE) {
1416                                 error = VNOP_BWRITE(bp);
1417                                 if (error) {
1418                                         uio_free(auio);
1419                         return error;
1420                                 }
1421                         } else {
1422                                 buf_brelse(bp);
1423                         };
1424                 };
1425                 uio_free(auio);
1426         };
1427         return 0;
1428
1429         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1430         {
1431                 if (is64bit) {
1432                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1433                 }
1434                 else {
1435                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1436                 }
1437                 return 0;
1438         }
1439
1440         case HFS_GET_MOUNT_TIME:
1441             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1442             break;
1443
1444         case HFS_GET_LAST_MTIME:
1445             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1446             break;
1447
1448         case HFS_SET_BOOT_INFO:
1449                 if (!vnode_isvroot(vp))
1450                         return(EINVAL);
1451                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1452                         return(EACCES); /* must be superuser or owner of filesystem */
1453                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1454                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1455                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1456                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1457                 break;
1458
1459         case HFS_GET_BOOT_INFO:
1460                 if (!vnode_isvroot(vp))
1461                         return(EINVAL);
1462                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1463                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1464                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1465                 break;
1466
1467         default:
1468                 return (ENOTTY);
1469         }
1470
1471     /* Should never get here */
1472         return 0;
1473 }
1474
1475 /*
1476  * select
1477  */
1478 int
1479 hfs_vnop_select(__unused struct vnop_select_args *ap)
1480 /*
1481         struct vnop_select_args {
1482                 vnode_t a_vp;
1483                 int  a_which;
1484                 int  a_fflags;
1485                 void *a_wql;
1486                 vfs_context_t a_context;
1487         };
1488 */
1489 {
1490         /*
1491          * We should really check to see if I/O is possible.
1492          */
1493         return (1);
1494 }
1495
1496 /*
1497  * Converts a logical block number to a physical block, and optionally returns
1498  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1499  * The physical block number is based on the device block size, currently its 512.
1500  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1501  */
1502 int
1503 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1504 {
1505         struct cnode *cp = VTOC(vp);
1506         struct filefork *fp = VTOF(vp);
1507         struct hfsmount *hfsmp = VTOHFS(vp);
1508         int  retval = E_NONE;
1509         daddr_t  logBlockSize;
1510         size_t  bytesContAvail = 0;
1511         off_t  blockposition;
1512         int lockExtBtree;
1513         int lockflags = 0;
1514
1515         /*
1516          * Check for underlying vnode requests and ensure that logical
1517          * to physical mapping is requested.
1518          */
1519         if (vpp != NULL)
1520                 *vpp = cp->c_devvp;
1521         if (bnp == NULL)
1522                 return (0);
1523
1524         logBlockSize = GetLogicalBlockSize(vp);
1525         blockposition = (off_t)bn * (off_t)logBlockSize;
1526
1527         lockExtBtree = overflow_extents(fp);
1528
1529         if (lockExtBtree)
1530                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1531
1532         retval = MacToVFSError(
1533                             MapFileBlockC (HFSTOVCB(hfsmp),
1534                                             (FCB*)fp,
1535                                             MAXPHYSIO,
1536                                             blockposition,
1537                                             bnp,
1538                                             &bytesContAvail));
1539
1540         if (lockExtBtree)
1541                 hfs_systemfile_unlock(hfsmp, lockflags);
1542
1543         if (retval == E_NONE) {
1544                 /* Figure out how many read ahead blocks there are */
1545                 if (runp != NULL) {
1546                         if (can_cluster(logBlockSize)) {
1547                                 /* Make sure this result never goes negative: */
1548                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1549                         } else {
1550                                 *runp = 0;
1551                         }
1552                 }
1553         }
1554         return (retval);
1555 }
1556
1557 /*
1558  * Convert logical block number to file offset.
1559  */
1560 int
1561 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1562 /*
1563         struct vnop_blktooff_args {
1564                 vnode_t a_vp;
1565                 daddr64_t a_lblkno;
1566                 off_t *a_offset;
1567         };
1568 */
1569 {
1570         if (ap->a_vp == NULL)
1571                 return (EINVAL);
1572         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1573
1574         return(0);
1575 }
1576
1577 /*
1578  * Convert file offset to logical block number.
1579  */
1580 int
1581 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1582 /*
1583         struct vnop_offtoblk_args {
1584                 vnode_t a_vp;
1585                 off_t a_offset;
1586                 daddr64_t *a_lblkno;
1587         };
1588 */
1589 {
1590         if (ap->a_vp == NULL)
1591                 return (EINVAL);
1592         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1593
1594         return(0);
1595 }
1596
1597 /*
1598  * Map file offset to physical block number.
1599  *
1600  * System file cnodes are expected to be locked (shared or exclusive).
1601  */
1602 int
1603 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1604 /*
1605         struct vnop_blockmap_args {
1606                 vnode_t a_vp;
1607                 off_t a_foffset;
1608                 size_t a_size;
1609                 daddr64_t *a_bpn;
1610                 size_t *a_run;
1611                 void *a_poff;
1612                 int a_flags;
1613                 vfs_context_t a_context;
1614         };
1615 */
1616 {
1617         struct vnode *vp = ap->a_vp;
1618         struct cnode *cp;
1619         struct filefork *fp;
1620         struct hfsmount *hfsmp;
1621         size_t bytesContAvail = 0;
1622         int retval = E_NONE;
1623         int syslocks = 0;
1624         int lockflags = 0;
1625         struct rl_entry *invalid_range;
1626         enum rl_overlaptype overlaptype;
1627         int started_tr = 0;
1628         int tooklock = 0;
1629
1630         /* Do not allow blockmap operation on a directory */
1631         if (vnode_isdir(vp)) {
1632                 return (ENOTSUP);
1633         }
1634
1635         /*
1636          * Check for underlying vnode requests and ensure that logical
1637          * to physical mapping is requested.
1638          */
1639         if (ap->a_bpn == NULL)
1640                 return (0);
1641
1642         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1643                 if (VTOC(vp)->c_lockowner != current_thread()) {
1644                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1645                         tooklock = 1;
1646                 } else {
1647                         cp = VTOC(vp);
1648                         panic("blockmap: %s cnode lock already held!\n",
1649                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1650                 }
1651         }
1652         hfsmp = VTOHFS(vp);
1653         cp = VTOC(vp);
1654         fp = VTOF(vp);
1655
1656 retry:
1657         if (fp->ff_unallocblocks) {
1658                 if (hfs_start_transaction(hfsmp) != 0) {
1659                         retval = EINVAL;
1660                         goto exit;
1661                 } else {
1662                         started_tr = 1;
1663                 }
1664                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1665
1666         } else if (overflow_extents(fp)) {
1667                 syslocks = SFL_EXTENTS;
1668         }
1669
1670         if (syslocks)
1671                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1672
1673         /*
1674          * Check for any delayed allocations.
1675          */
1676         if (fp->ff_unallocblocks) {
1677                 SInt64 actbytes;
1678                 u_int32_t loanedBlocks;
1679
1680                 //
1681                 // Make sure we have a transaction.  It's possible
1682                 // that we came in and fp->ff_unallocblocks was zero
1683                 // but during the time we blocked acquiring the extents
1684                 // btree, ff_unallocblocks became non-zero and so we
1685                 // will need to start a transaction.
1686                 //
1687                 if (started_tr == 0) {
1688                         if (syslocks) {
1689                                 hfs_systemfile_unlock(hfsmp, lockflags);
1690                                 syslocks = 0;
1691                         }
1692                         goto retry;
1693                 }
1694
1695                 /*
1696                  * Note: ExtendFileC will Release any blocks on loan and
1697                  * aquire real blocks.  So we ask to extend by zero bytes
1698                  * since ExtendFileC will account for the virtual blocks.
1699                  */
1700
1701                 loanedBlocks = fp->ff_unallocblocks;
1702                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1703                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1704
1705                 if (retval) {
1706                         fp->ff_unallocblocks = loanedBlocks;
1707                         cp->c_blocks += loanedBlocks;
1708                         fp->ff_blocks += loanedBlocks;
1709
1710                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1711                         hfsmp->loanedBlocks += loanedBlocks;
1712                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1713                 }
1714
1715                 if (retval) {
1716                         hfs_systemfile_unlock(hfsmp, lockflags);
1717                         cp->c_flag |= C_MODIFIED;
1718                         if (started_tr) {
1719                                 (void) hfs_update(vp, TRUE);
1720                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1721
1722                                 hfs_end_transaction(hfsmp);
1723                         }
1724                         goto exit;
1725                 }
1726         }
1727
1728         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1729                                ap->a_bpn, &bytesContAvail);
1730         if (syslocks) {
1731                 hfs_systemfile_unlock(hfsmp, lockflags);
1732                 syslocks = 0;
1733         }
1734
1735         if (started_tr) {
1736                 (void) hfs_update(vp, TRUE);
1737                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1738                 hfs_end_transaction(hfsmp);
1739                 started_tr = 0;
1740         }
1741         if (retval) {
1742                 goto exit;
1743         }
1744
1745         /* Adjust the mapping information for invalid file ranges: */
1746         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1747                               ap->a_foffset + (off_t)bytesContAvail - 1,
1748                               &invalid_range);
1749         if (overlaptype != RL_NOOVERLAP) {
1750                 switch(overlaptype) {
1751                 case RL_MATCHINGOVERLAP:
1752                 case RL_OVERLAPCONTAINSRANGE:
1753                 case RL_OVERLAPSTARTSBEFORE:
1754                         /* There's no valid block for this byte offset: */
1755                         *ap->a_bpn = (daddr64_t)-1;
1756                         /* There's no point limiting the amount to be returned
1757                          * if the invalid range that was hit extends all the way
1758                          * to the EOF (i.e. there's no valid bytes between the
1759                          * end of this range and the file's EOF):
1760                          */
1761                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1762                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1763                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1764                         }
1765                         break;
1766
1767                 case RL_OVERLAPISCONTAINED:
1768                 case RL_OVERLAPENDSAFTER:
1769                         /* The range of interest hits an invalid block before the end: */
1770                         if (invalid_range->rl_start == ap->a_foffset) {
1771                                 /* There's actually no valid information to be had starting here: */
1772                                 *ap->a_bpn = (daddr64_t)-1;
1773                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1774                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1775                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1776                                 }
1777                         } else {
1778                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1779                         }
1780                         break;
1781
1782                 case RL_NOOVERLAP:
1783                         break;
1784                 } /* end switch */
1785                 if (bytesContAvail > ap->a_size)
1786                         bytesContAvail = ap->a_size;
1787         }
1788         if (ap->a_run)
1789                 *ap->a_run = bytesContAvail;
1790
1791         if (ap->a_poff)
1792                 *(int *)ap->a_poff = 0;
1793 exit:
1794         if (tooklock)
1795                 hfs_unlock(cp);
1796
1797         return (MacToVFSError(retval));
1798 }
1799
1800
1801 /*
1802  * prepare and issue the I/O
1803  * buf_strategy knows how to deal
1804  * with requests that require
1805  * fragmented I/Os
1806  */
1807 int
1808 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1809 {
1810         buf_t   bp = ap->a_bp;
1811         vnode_t vp = buf_vnode(bp);
1812         struct cnode *cp = VTOC(vp);
1813
1814         return (buf_strategy(cp->c_devvp, ap));
1815 }
1816
1817
1818 static int
1819 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1820 {
1821         register struct cnode *cp = VTOC(vp);
1822         struct filefork *fp = VTOF(vp);
1823         struct proc *p = vfs_context_proc(context);;
1824         kauth_cred_t cred = vfs_context_ucred(context);
1825         int retval;
1826         off_t bytesToAdd;
1827         off_t actualBytesAdded;
1828         off_t filebytes;
1829         u_int64_t old_filesize;
1830         u_long fileblocks;
1831         int blksize;
1832         struct hfsmount *hfsmp;
1833         int lockflags;
1834
1835         blksize = VTOVCB(vp)->blockSize;
1836         fileblocks = fp->ff_blocks;
1837         filebytes = (off_t)fileblocks * (off_t)blksize;
1838         old_filesize = fp->ff_size;
1839
1840         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1841                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1842
1843         if (length < 0)
1844                 return (EINVAL);
1845
1846         /* This should only happen with a corrupt filesystem */
1847         if ((off_t)fp->ff_size < 0)
1848                 return (EINVAL);
1849
1850         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1851                 return (EFBIG);
1852
1853         hfsmp = VTOHFS(vp);
1854
1855         retval = E_NONE;
1856
1857         /* Files that are changing size are not hot file candidates. */
1858         if (hfsmp->hfc_stage == HFC_RECORDING) {
1859                 fp->ff_bytesread = 0;
1860         }
1861
1862         /*
1863          * We cannot just check if fp->ff_size == length (as an optimization)
1864          * since there may be extra physical blocks that also need truncation.
1865          */
1866 #if QUOTA
1867         if ((retval = hfs_getinoquota(cp)))
1868                 return(retval);
1869 #endif /* QUOTA */
1870
1871         /*
1872          * Lengthen the size of the file. We must ensure that the
1873          * last byte of the file is allocated. Since the smallest
1874          * value of ff_size is 0, length will be at least 1.
1875          */
1876         if (length > (off_t)fp->ff_size) {
1877 #if QUOTA
1878                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1879                                    cred, 0);
1880                 if (retval)
1881                         goto Err_Exit;
1882 #endif /* QUOTA */
1883                 /*
1884                  * If we don't have enough physical space then
1885                  * we need to extend the physical size.
1886                  */
1887                 if (length > filebytes) {
1888                         int eflags;
1889                         u_long blockHint = 0;
1890
1891                         /* All or nothing and don't round up to clumpsize. */
1892                         eflags = kEFAllMask | kEFNoClumpMask;
1893
1894                         if (cred && suser(cred, NULL) != 0)
1895                                 eflags |= kEFReserveMask;  /* keep a reserve */
1896
1897                         /*
1898                          * Allocate Journal and Quota files in metadata zone.
1899                          */
1900                         if (filebytes == 0 &&
1901                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1902                             hfs_virtualmetafile(cp)) {
1903                                 eflags |= kEFMetadataMask;
1904                                 blockHint = hfsmp->hfs_metazone_start;
1905                         }
1906                         if (hfs_start_transaction(hfsmp) != 0) {
1907                             retval = EINVAL;
1908                             goto Err_Exit;
1909                         }
1910
1911                         /* Protect extents b-tree and allocation bitmap */
1912                         lockflags = SFL_BITMAP;
1913                         if (overflow_extents(fp))
1914                                 lockflags |= SFL_EXTENTS;
1915                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1916
1917                         while ((length > filebytes) && (retval == E_NONE)) {
1918                                 bytesToAdd = length - filebytes;
1919                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1920                                                     (FCB*)fp,
1921                                                     bytesToAdd,
1922                                                     blockHint,
1923                                                     eflags,
1924                                                     &actualBytesAdded));
1925
1926                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1927                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1928                                         if (length > filebytes)
1929                                                 length = filebytes;
1930                                         break;
1931                                 }
1932                         } /* endwhile */
1933
1934                         hfs_systemfile_unlock(hfsmp, lockflags);
1935
1936                         if (hfsmp->jnl) {
1937                             (void) hfs_update(vp, TRUE);
1938                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1939                         }
1940
1941                         hfs_end_transaction(hfsmp);
1942
1943                         if (retval)
1944                                 goto Err_Exit;
1945
1946                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1947                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1948                 }
1949
1950                 if (!(flags & IO_NOZEROFILL)) {
1951                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1952                                 struct rl_entry *invalid_range;
1953                                 off_t zero_limit;
1954
1955                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1956                                 if (length < zero_limit) zero_limit = length;
1957
1958                                 if (length > (off_t)fp->ff_size) {
1959                                         struct timeval tv;
1960
1961                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1962                                         if ((fp->ff_size & PAGE_MASK_64) &&
1963                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1964                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1965
1966                                                 /* There's some valid data at the start of the (current) last page
1967                                                    of the file, so zero out the remainder of that page to ensure the
1968                                                    entire page contains valid data.  Since there is no invalid range
1969                                                    possible past the (current) eof, there's no need to remove anything
1970                                                    from the invalid range list before calling cluster_write():  */
1971                                                 hfs_unlock(cp);
1972                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1973                                                                 fp->ff_size, (off_t)0,
1974                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1975                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1976                                                 if (retval) goto Err_Exit;
1977
1978                                                 /* Merely invalidate the remaining area, if necessary: */
1979                                                 if (length > zero_limit) {
1980                                                         microuptime(&tv);
1981                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1982                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1983                                                 }
1984                                         } else {
1985                                         /* The page containing the (current) eof is invalid: just add the
1986                                            remainder of the page to the invalid list, along with the area
1987                                            being newly allocated:
1988                                          */
1989                                         microuptime(&tv);
1990                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1991                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1992                                         };
1993                                 }
1994                         } else {
1995                                         panic("hfs_truncate: invoked on non-UBC object?!");
1996                         };
1997                 }
1998                 cp->c_touch_modtime = TRUE;
1999                 fp->ff_size = length;
2000
2001                 /* Nested transactions will do their own ubc_setsize. */
2002                 if (!skipsetsize) {
2003                         /*
2004                          * ubc_setsize can cause a pagein here
2005                          * so we need to drop cnode lock.
2006                          */
2007                         hfs_unlock(cp);
2008                         ubc_setsize(vp, length);
2009                         hfs_lock(cp, HFS_FORCE_LOCK);
2010                 }
2011
2012         } else { /* Shorten the size of the file */
2013
2014                 if ((off_t)fp->ff_size > length) {
2015                         /*
2016                          * Any buffers that are past the truncation point need to be
2017                          * invalidated (to maintain buffer cache consistency).
2018                          */
2019
2020                          /* Nested transactions will do their own ubc_setsize. */
2021                          if (!skipsetsize) {
2022                                 /*
2023                                  * ubc_setsize can cause a pageout here
2024                                  * so we need to drop cnode lock.
2025                                  */
2026                                 hfs_unlock(cp);
2027                                 ubc_setsize(vp, length);
2028                                 hfs_lock(cp, HFS_FORCE_LOCK);
2029                         }
2030
2031                         /* Any space previously marked as invalid is now irrelevant: */
2032                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2033                 }
2034
2035                 /*
2036                  * Account for any unmapped blocks. Note that the new
2037                  * file length can still end up with unmapped blocks.
2038                  */
2039                 if (fp->ff_unallocblocks > 0) {
2040                         u_int32_t finalblks;
2041                         u_int32_t loanedBlocks;
2042
2043                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2044
2045                         loanedBlocks = fp->ff_unallocblocks;
2046                         cp->c_blocks -= loanedBlocks;
2047                         fp->ff_blocks -= loanedBlocks;
2048                         fp->ff_unallocblocks = 0;
2049
2050                         hfsmp->loanedBlocks -= loanedBlocks;
2051
2052                         finalblks = (length + blksize - 1) / blksize;
2053                         if (finalblks > fp->ff_blocks) {
2054                                 /* calculate required unmapped blocks */
2055                                 loanedBlocks = finalblks - fp->ff_blocks;
2056                                 hfsmp->loanedBlocks += loanedBlocks;
2057
2058                                 fp->ff_unallocblocks = loanedBlocks;
2059                                 cp->c_blocks += loanedBlocks;
2060                                 fp->ff_blocks += loanedBlocks;
2061                         }
2062                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2063                 }
2064
2065                 /*
2066                  * For a TBE process the deallocation of the file blocks is
2067                  * delayed until the file is closed.  And hfs_close calls
2068                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2069                  * isn't set, we make sure this isn't a TBE process.
2070                  */
2071                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2072 #if QUOTA
2073                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2074 #endif /* QUOTA */
2075                   if (hfs_start_transaction(hfsmp) != 0) {
2076                       retval = EINVAL;
2077                       goto Err_Exit;
2078                   }
2079
2080                         if (fp->ff_unallocblocks == 0) {
2081                                 /* Protect extents b-tree and allocation bitmap */
2082                                 lockflags = SFL_BITMAP;
2083                                 if (overflow_extents(fp))
2084                                         lockflags |= SFL_EXTENTS;
2085                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2086
2087                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2088                                                 (FCB*)fp, length, false));
2089
2090                                 hfs_systemfile_unlock(hfsmp, lockflags);
2091                         }
2092                         if (hfsmp->jnl) {
2093                                 if (retval == 0) {
2094                                         fp->ff_size = length;
2095                                 }
2096                                 (void) hfs_update(vp, TRUE);
2097                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2098                         }
2099
2100                         hfs_end_transaction(hfsmp);
2101
2102                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2103                         if (retval)
2104                                 goto Err_Exit;
2105 #if QUOTA
2106                         /* These are bytesreleased */
2107                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2108 #endif /* QUOTA */
2109                 }
2110                 /* Only set update flag if the logical length changes */
2111                 if (old_filesize != length)
2112                         cp->c_touch_modtime = TRUE;
2113                 fp->ff_size = length;
2114         }
2115         cp->c_touch_chgtime = TRUE;
2116         retval = hfs_update(vp, MNT_WAIT);
2117         if (retval) {
2118                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2119                      -1, -1, -1, retval, 0);
2120         }
2121
2122 Err_Exit:
2123
2124         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2125                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2126
2127         return (retval);
2128 }
2129
2130
2131
2132 /*
2133  * Truncate a cnode to at most length size, freeing (or adding) the
2134  * disk blocks.
2135  */
2136 __private_extern__
2137 int
2138 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2139              vfs_context_t context)
2140 {
2141         struct filefork *fp = VTOF(vp);
2142         off_t filebytes;
2143         u_long fileblocks;
2144         int blksize, error = 0;
2145         struct cnode *cp = VTOC(vp);
2146
2147         if (vnode_isdir(vp))
2148                 return (EISDIR);        /* cannot truncate an HFS directory! */
2149
2150         blksize = VTOVCB(vp)->blockSize;
2151         fileblocks = fp->ff_blocks;
2152         filebytes = (off_t)fileblocks * (off_t)blksize;
2153
2154         // have to loop truncating or growing files that are
2155         // really big because otherwise transactions can get
2156         // enormous and consume too many kernel resources.
2157
2158         if (length < filebytes) {
2159                 while (filebytes > length) {
2160                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2161                                 filebytes -= HFS_BIGFILE_SIZE;
2162                         } else {
2163                                 filebytes = length;
2164                         }
2165                         cp->c_flag |= C_FORCEUPDATE;
2166                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2167                         if (error)
2168                                 break;
2169                 }
2170         } else if (length > filebytes) {
2171                 while (filebytes < length) {
2172                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2173                                 filebytes += HFS_BIGFILE_SIZE;
2174                         } else {
2175                                 filebytes = length;
2176                         }
2177                         cp->c_flag |= C_FORCEUPDATE;
2178                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2179                         if (error)
2180                                 break;
2181                 }
2182         } else /* Same logical size */ {
2183
2184                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2185         }
2186         /* Files that are changing size are not hot file candidates. */
2187         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2188                 fp->ff_bytesread = 0;
2189         }
2190
2191         return (error);
2192 }
2193
2194
2195
2196 /*
2197  * Preallocate file storage space.
2198  */
2199 int
2200 hfs_vnop_allocate(struct vnop_allocate_args /* {
2201                 vnode_t a_vp;
2202                 off_t a_length;
2203                 u_int32_t  a_flags;
2204                 off_t *a_bytesallocated;
2205                 off_t a_offset;
2206                 vfs_context_t a_context;
2207         } */ *ap)
2208 {
2209         struct vnode *vp = ap->a_vp;
2210         struct cnode *cp;
2211         struct filefork *fp;
2212         ExtendedVCB *vcb;
2213         off_t length = ap->a_length;
2214         off_t startingPEOF;
2215         off_t moreBytesRequested;
2216         off_t actualBytesAdded;
2217         off_t filebytes;
2218         u_long fileblocks;
2219         int retval, retval2;
2220         UInt32 blockHint;
2221         UInt32 extendFlags;   /* For call to ExtendFileC */
2222         struct hfsmount *hfsmp;
2223         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2224         int lockflags;
2225
2226         *(ap->a_bytesallocated) = 0;
2227
2228         if (!vnode_isreg(vp))
2229                 return (EISDIR);
2230         if (length < (off_t)0)
2231                 return (EINVAL);
2232
2233         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2234                 return (retval);
2235         cp = VTOC(vp);
2236         fp = VTOF(vp);
2237         hfsmp = VTOHFS(vp);
2238         vcb = VTOVCB(vp);
2239
2240         fileblocks = fp->ff_blocks;
2241         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2242
2243         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2244                 retval = EINVAL;
2245                 goto Err_Exit;
2246         }
2247
2248         /* Fill in the flags word for the call to Extend the file */
2249
2250         extendFlags = kEFNoClumpMask;
2251         if (ap->a_flags & ALLOCATECONTIG)
2252                 extendFlags |= kEFContigMask;
2253         if (ap->a_flags & ALLOCATEALL)
2254                 extendFlags |= kEFAllMask;
2255         if (cred && suser(cred, NULL) != 0)
2256                 extendFlags |= kEFReserveMask;
2257
2258         retval = E_NONE;
2259         blockHint = 0;
2260         startingPEOF = filebytes;
2261
2262         if (ap->a_flags & ALLOCATEFROMPEOF)
2263                 length += filebytes;
2264         else if (ap->a_flags & ALLOCATEFROMVOL)
2265                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2266
2267         /* If no changes are necesary, then we're done */
2268         if (filebytes == length)
2269                 goto Std_Exit;
2270
2271         /*
2272          * Lengthen the size of the file. We must ensure that the
2273          * last byte of the file is allocated. Since the smallest
2274          * value of filebytes is 0, length will be at least 1.
2275          */
2276         if (length > filebytes) {
2277                 moreBytesRequested = length - filebytes;
2278
2279 #if QUOTA
2280                 retval = hfs_chkdq(cp,
2281                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2282                                 cred, 0);
2283                 if (retval)
2284                         goto Err_Exit;
2285
2286 #endif /* QUOTA */
2287                 /*
2288                  * Metadata zone checks.
2289                  */
2290                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2291                         /*
2292                          * Allocate Journal and Quota files in metadata zone.
2293                          */
2294                         if (hfs_virtualmetafile(cp)) {
2295                                 extendFlags |= kEFMetadataMask;
2296                                 blockHint = hfsmp->hfs_metazone_start;
2297                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2298                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2299                                 /*
2300                                  * Move blockHint outside metadata zone.
2301                                  */
2302                                 blockHint = hfsmp->hfs_metazone_end + 1;
2303                         }
2304                 }
2305
2306                 if (hfs_start_transaction(hfsmp) != 0) {
2307                     retval = EINVAL;
2308                     goto Err_Exit;
2309                 }
2310
2311                 /* Protect extents b-tree and allocation bitmap */
2312                 lockflags = SFL_BITMAP;
2313                 if (overflow_extents(fp))
2314                         lockflags |= SFL_EXTENTS;
2315                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2316
2317                 retval = MacToVFSError(ExtendFileC(vcb,
2318                                                 (FCB*)fp,
2319                                                 moreBytesRequested,
2320                                                 blockHint,
2321                                                 extendFlags,
2322                                                 &actualBytesAdded));
2323
2324                 *(ap->a_bytesallocated) = actualBytesAdded;
2325                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2326
2327                 hfs_systemfile_unlock(hfsmp, lockflags);
2328
2329                 if (hfsmp->jnl) {
2330                         (void) hfs_update(vp, TRUE);
2331                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2332                 }
2333
2334                 hfs_end_transaction(hfsmp);
2335
2336                 /*
2337                  * if we get an error and no changes were made then exit
2338                  * otherwise we must do the hfs_update to reflect the changes
2339                  */
2340                 if (retval && (startingPEOF == filebytes))
2341                         goto Err_Exit;
2342
2343                 /*
2344                  * Adjust actualBytesAdded to be allocation block aligned, not
2345                  * clump size aligned.
2346                  * NOTE: So what we are reporting does not affect reality
2347                  * until the file is closed, when we truncate the file to allocation
2348                  * block size.
2349                  */
2350                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2351                         *(ap->a_bytesallocated) =
2352                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2353
2354         } else { /* Shorten the size of the file */
2355
2356                 if (fp->ff_size > length) {
2357                         /*
2358                          * Any buffers that are past the truncation point need to be
2359                          * invalidated (to maintain buffer cache consistency).
2360                          */
2361                 }
2362
2363                 if (hfs_start_transaction(hfsmp) != 0) {
2364                     retval = EINVAL;
2365                     goto Err_Exit;
2366                 }
2367
2368                 /* Protect extents b-tree and allocation bitmap */
2369                 lockflags = SFL_BITMAP;
2370                 if (overflow_extents(fp))
2371                         lockflags |= SFL_EXTENTS;
2372                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2373
2374                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2375
2376                 hfs_systemfile_unlock(hfsmp, lockflags);
2377
2378                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2379
2380                 if (hfsmp->jnl) {
2381                         (void) hfs_update(vp, TRUE);
2382                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2383                 }
2384
2385                 hfs_end_transaction(hfsmp);
2386
2387
2388                 /*
2389                  * if we get an error and no changes were made then exit
2390                  * otherwise we must do the hfs_update to reflect the changes
2391                  */
2392                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2393 #if QUOTA
2394                 /* These are  bytesreleased */
2395                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2396 #endif /* QUOTA */
2397
2398                 if (fp->ff_size > filebytes) {
2399                         fp->ff_size = filebytes;
2400
2401                         hfs_unlock(cp);
2402                         ubc_setsize(vp, fp->ff_size);
2403                         hfs_lock(cp, HFS_FORCE_LOCK);
2404                 }
2405         }
2406
2407 Std_Exit:
2408         cp->c_touch_chgtime = TRUE;
2409         cp->c_touch_modtime = TRUE;
2410         retval2 = hfs_update(vp, MNT_WAIT);
2411
2412         if (retval == 0)
2413                 retval = retval2;
2414 Err_Exit:
2415         hfs_unlock(cp);
2416         return (retval);
2417 }
2418
2419
2420 /*
2421  * Pagein for HFS filesystem
2422  */
2423 int
2424 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2425 /*
2426         struct vnop_pagein_args {
2427                 vnode_t a_vp,
2428                 upl_t         a_pl,
2429                 vm_offset_t   a_pl_offset,
2430                 off_t         a_f_offset,
2431                 size_t        a_size,
2432                 int           a_flags
2433                 vfs_context_t a_context;
2434         };
2435 */
2436 {
2437         vnode_t vp = ap->a_vp;
2438         int error;
2439
2440         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2441                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2442         /*
2443          * Keep track of blocks read.
2444          */
2445         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2446                 struct cnode *cp;
2447                 struct filefork *fp;
2448                 int bytesread;
2449                 int took_cnode_lock = 0;
2450
2451                 cp = VTOC(vp);
2452                 fp = VTOF(vp);
2453
2454                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2455                         bytesread = fp->ff_size;
2456                 else
2457                         bytesread = ap->a_size;
2458
2459                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2460                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2461                         hfs_lock(cp, HFS_FORCE_LOCK);
2462                         took_cnode_lock = 1;
2463                 }
2464                 /*
2465                  * If this file hasn't been seen since the start of
2466                  * the current sampling period then start over.
2467                  */
2468                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2469                         struct timeval tv;
2470
2471                         fp->ff_bytesread = bytesread;
2472                         microtime(&tv);
2473                         cp->c_atime = tv.tv_sec;
2474                 } else {
2475                         fp->ff_bytesread += bytesread;
2476                 }
2477                 cp->c_touch_acctime = TRUE;
2478                 if (took_cnode_lock)
2479                         hfs_unlock(cp);
2480         }
2481         return (error);
2482 }
2483
2484 /*
2485  * Pageout for HFS filesystem.
2486  */
2487 int
2488 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2489 /*
2490         struct vnop_pageout_args {
2491            vnode_t a_vp,
2492            upl_t         a_pl,
2493            vm_offset_t   a_pl_offset,
2494            off_t         a_f_offset,
2495            size_t        a_size,
2496            int           a_flags
2497            vfs_context_t a_context;
2498         };
2499 */
2500 {
2501         vnode_t vp = ap->a_vp;
2502         struct cnode *cp;
2503         struct filefork *fp;
2504         int retval;
2505         off_t end_of_range;
2506         off_t filesize;
2507
2508         cp = VTOC(vp);
2509         if (cp->c_lockowner == current_thread()) {
2510                 panic("pageout: %s cnode lock already held!\n",
2511                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2512         }
2513         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2514                 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2515                         ubc_upl_abort_range(ap->a_pl,
2516                                             ap->a_pl_offset,
2517                                             ap->a_size,
2518                                             UPL_ABORT_FREE_ON_EMPTY);
2519                 }
2520                 return (retval);
2521         }
2522         fp = VTOF(vp);
2523
2524         filesize = fp->ff_size;
2525         end_of_range = ap->a_f_offset + ap->a_size - 1;
2526
2527         if (end_of_range >= filesize) {
2528                 end_of_range = (off_t)(filesize - 1);
2529         }
2530         if (ap->a_f_offset < filesize) {
2531                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2532                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2533         }
2534         hfs_unlock(cp);
2535
2536         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2537                                  ap->a_size, filesize, ap->a_flags);
2538
2539         /*
2540          * If data was written, and setuid or setgid bits are set and
2541          * this process is not the superuser then clear the setuid and
2542          * setgid bits as a precaution against tampering.
2543          */
2544         if ((retval == 0) &&
2545             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2546             (vfs_context_suser(ap->a_context) != 0)) {
2547                 hfs_lock(cp, HFS_FORCE_LOCK);
2548                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2549                 cp->c_touch_chgtime = TRUE;
2550                 hfs_unlock(cp);
2551         }
2552         return (retval);
2553 }
2554
2555 /*
2556  * Intercept B-Tree node writes to unswap them if necessary.
2557  */
2558 int
2559 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2560 {
2561         int retval = 0;
2562         register struct buf *bp = ap->a_bp;
2563         register struct vnode *vp = buf_vnode(bp);
2564         BlockDescriptor block;
2565
2566         /* Trap B-Tree writes */
2567         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2568             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2569             (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2570
2571                 /*
2572                  * Swap and validate the node if it is in native byte order.
2573                  * This is always be true on big endian, so we always validate
2574                  * before writing here.  On little endian, the node typically has
2575                  * been swapped and validatated when it was written to the journal,
2576                  * so we won't do anything here.
2577                  */
2578                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2579                         /* Prepare the block pointer */
2580                         block.blockHeader = bp;
2581                         block.buffer = (char *)buf_dataptr(bp);
2582                         block.blockNum = buf_lblkno(bp);
2583                         /* not found in cache ==> came from disk */
2584                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2585                         block.blockSize = buf_count(bp);
2586
2587                         /* Endian un-swap B-Tree node */
2588                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2589                         if (retval)
2590                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2591                 }
2592         }
2593
2594         /* This buffer shouldn't be locked anymore but if it is clear it */
2595         if ((buf_flags(bp) & B_LOCKED)) {
2596                 // XXXdbg
2597                 if (VTOHFS(vp)->jnl) {
2598                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2599                 }
2600                 buf_clearflags(bp, B_LOCKED);
2601         }
2602         retval = vn_bwrite (ap);
2603
2604         return (retval);
2605 }
2606
2607 /*
2608  * Relocate a file to a new location on disk
2609  *  cnode must be locked on entry
2610  *
2611  * Relocation occurs by cloning the file's data from its
2612  * current set of blocks to a new set of blocks. During
2613  * the relocation all of the blocks (old and new) are
2614  * owned by the file.
2615  *
2616  * -----------------
2617  * |///////////////|
2618  * -----------------
2619  * 0               N (file offset)
2620  *
2621  * -----------------     -----------------
2622  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2623  * -----------------     -----------------
2624  * 0               N     N+1             2N
2625  *
2626  * -----------------     -----------------
2627  * |///////////////|     |///////////////|     STEP 2 (clone data)
2628  * -----------------     -----------------
2629  * 0               N     N+1             2N
2630  *
2631  *                       -----------------
2632  *                       |///////////////|     STEP 3 (head truncate blocks)
2633  *                       -----------------
2634  *                       0               N
2635  *
2636  * During steps 2 and 3 page-outs to file offsets less
2637  * than or equal to N are suspended.
2638  *
2639  * During step 3 page-ins to the file get supended.
2640  */
2641 __private_extern__
2642 int
2643 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2644         struct  proc *p)
2645 {
2646         struct  cnode *cp;
2647         struct  filefork *fp;
2648         struct  hfsmount *hfsmp;
2649         u_int32_t  headblks;
2650         u_int32_t  datablks;
2651         u_int32_t  blksize;
2652         u_int32_t  growsize;
2653         u_int32_t  nextallocsave;
2654         daddr64_t  sector_a,  sector_b;
2655         int disabled_caching = 0;
2656         int eflags;
2657         off_t  newbytes;
2658         int  retval;
2659         int lockflags = 0;
2660         int took_trunc_lock = 0;
2661         int started_tr = 0;
2662         enum vtype vnodetype;
2663
2664         vnodetype = vnode_vtype(vp);
2665         if (vnodetype != VREG && vnodetype != VLNK) {
2666                 return (EPERM);
2667         }
2668
2669         hfsmp = VTOHFS(vp);
2670         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2671                 return (ENOSPC);
2672         }
2673
2674         cp = VTOC(vp);
2675         fp = VTOF(vp);
2676         if (fp->ff_unallocblocks)
2677                 return (EINVAL);
2678         blksize = hfsmp->blockSize;
2679         if (blockHint == 0)
2680                 blockHint = hfsmp->nextAllocation;
2681
2682         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2683             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2684                 return (EFBIG);
2685         }
2686
2687         //
2688         // We do not believe that this call to hfs_fsync() is
2689         // necessary and it causes a journal transaction
2690         // deadlock so we are removing it.
2691         //
2692         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2693         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2694         //      if (retval)
2695         //              return (retval);
2696         //}
2697
2698         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2699                 hfs_unlock(cp);
2700                 hfs_lock_truncate(cp, TRUE);
2701                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2702                         hfs_unlock_truncate(cp);
2703                         return (retval);
2704                 }
2705                 took_trunc_lock = 1;
2706         }
2707         headblks = fp->ff_blocks;
2708         datablks = howmany(fp->ff_size, blksize);
2709         growsize = datablks * blksize;
2710         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2711         if (blockHint >= hfsmp->hfs_metazone_start &&
2712             blockHint <= hfsmp->hfs_metazone_end)
2713                 eflags |= kEFMetadataMask;
2714
2715         if (hfs_start_transaction(hfsmp) != 0) {
2716                 if (took_trunc_lock)
2717                         hfs_unlock_truncate(cp);
2718             return (EINVAL);
2719         }
2720         started_tr = 1;
2721         /*
2722          * Protect the extents b-tree and the allocation bitmap
2723          * during MapFileBlockC and ExtendFileC operations.
2724          */
2725         lockflags = SFL_BITMAP;
2726         if (overflow_extents(fp))
2727                 lockflags |= SFL_EXTENTS;
2728         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2729
2730         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2731         if (retval) {
2732                 retval = MacToVFSError(retval);
2733                 goto out;
2734         }
2735
2736         /*
2737          * STEP 1 - aquire new allocation blocks.
2738          */
2739         if (!vnode_isnocache(vp)) {
2740                 vnode_setnocache(vp);
2741                 disabled_caching = 1;
2742
2743         }
2744         nextallocsave = hfsmp->nextAllocation;
2745         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2746         if (eflags & kEFMetadataMask) {
2747                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2748                 hfsmp->nextAllocation = nextallocsave;
2749                 hfsmp->vcbFlags |= 0xFF00;
2750                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2751         }
2752
2753         retval = MacToVFSError(retval);
2754         if (retval == 0) {
2755                 cp->c_flag |= C_MODIFIED;
2756                 if (newbytes < growsize) {
2757                         retval = ENOSPC;
2758                         goto restore;
2759                 } else if (fp->ff_blocks < (headblks + datablks)) {
2760                         printf("hfs_relocate: allocation failed");
2761                         retval = ENOSPC;
2762                         goto restore;
2763                 }
2764
2765                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2766                 if (retval) {
2767                         retval = MacToVFSError(retval);
2768                 } else if ((sector_a + 1) == sector_b) {
2769                         retval = ENOSPC;
2770                         goto restore;
2771                 } else if ((eflags & kEFMetadataMask) &&
2772                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2773                               hfsmp->hfs_metazone_end)) {
2774                         printf("hfs_relocate: didn't move into metadata zone\n");
2775                         retval = ENOSPC;
2776                         goto restore;
2777                 }
2778         }
2779         /* Done with system locks and journal for now. */
2780         hfs_systemfile_unlock(hfsmp, lockflags);
2781         lockflags = 0;
2782         hfs_end_transaction(hfsmp);
2783         started_tr = 0;
2784
2785         if (retval) {
2786                 /*
2787                  * Check to see if failure is due to excessive fragmentation.
2788                  */
2789                 if ((retval == ENOSPC) &&
2790                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2791                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2792                 }
2793                 goto out;
2794         }
2795         /*
2796          * STEP 2 - clone file data into the new allocation blocks.
2797          */
2798
2799         if (vnodetype == VLNK)
2800                 retval = hfs_clonelink(vp, blksize, cred, p);
2801         else if (vnode_issystem(vp))
2802                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2803         else
2804                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2805
2806         /* Start transaction for step 3 or for a restore. */
2807         if (hfs_start_transaction(hfsmp) != 0) {
2808                 retval = EINVAL;
2809                 goto out;
2810         }
2811         started_tr = 1;
2812         if (retval)
2813                 goto restore;
2814
2815         /*
2816          * STEP 3 - switch to cloned data and remove old blocks.
2817          */
2818         lockflags = SFL_BITMAP;
2819         if (overflow_extents(fp))
2820                 lockflags |= SFL_EXTENTS;
2821         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2822
2823         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2824
2825         hfs_systemfile_unlock(hfsmp, lockflags);
2826         lockflags = 0;
2827         if (retval)
2828                 goto restore;
2829 out:
2830         if (took_trunc_lock)
2831                 hfs_unlock_truncate(cp);
2832
2833         if (lockflags) {
2834                 hfs_systemfile_unlock(hfsmp, lockflags);
2835                 lockflags = 0;
2836         }
2837
2838         // See comment up above about calls to hfs_fsync()
2839         //
2840         //if (retval == 0)
2841         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2842
2843         if (hfsmp->jnl) {
2844                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2845                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2846                 else
2847                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2848         }
2849 exit:
2850         if (disabled_caching) {
2851                 vnode_clearnocache(vp);
2852         }
2853         if (started_tr)
2854                 hfs_end_transaction(hfsmp);
2855
2856         return (retval);
2857
2858 restore:
2859         if (fp->ff_blocks == headblks)
2860                 goto exit;
2861         /*
2862          * Give back any newly allocated space.
2863          */
2864         if (lockflags == 0) {
2865                 lockflags = SFL_BITMAP;
2866                 if (overflow_extents(fp))
2867                         lockflags |= SFL_EXTENTS;
2868                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2869         }
2870
2871         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2872
2873         hfs_systemfile_unlock(hfsmp, lockflags);
2874         lockflags = 0;
2875
2876         if (took_trunc_lock)
2877                 hfs_unlock_truncate(cp);
2878         goto exit;
2879 }
2880
2881
2882 /*
2883  * Clone a symlink.
2884  *
2885  */
2886 static int
2887 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2888 {
2889         struct buf *head_bp = NULL;
2890         struct buf *tail_bp = NULL;
2891         int error;
2892
2893
2894         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2895         if (error)
2896                 goto out;
2897
2898         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2899         if (tail_bp == NULL) {
2900                 error = EIO;
2901                 goto out;
2902         }
2903         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2904         error = (int)buf_bwrite(tail_bp);
2905 out:
2906         if (head_bp) {
2907                 buf_markinvalid(head_bp);
2908                 buf_brelse(head_bp);
2909         }
2910         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2911
2912         return (error);
2913 }
2914
2915 /*
2916  * Clone a file's data within the file.
2917  *
2918  */
2919 static int
2920 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2921 {
2922         caddr_t  bufp;
2923         size_t  writebase;
2924         size_t  bufsize;
2925         size_t  copysize;
2926         size_t  iosize;
2927         off_t   filesize;
2928         size_t  offset;
2929         uio_t auio;
2930         int  error = 0;
2931
2932         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2933         writebase = blkstart * blksize;
2934         copysize = blkcnt * blksize;
2935         iosize = bufsize = MIN(copysize, 4096 * 16);
2936         offset = 0;
2937
2938         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2939                 return (ENOMEM);
2940         }
2941         hfs_unlock(VTOC(vp));
2942
2943         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2944
2945         while (offset < copysize) {
2946                 iosize = MIN(copysize - offset, iosize);
2947
2948                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2949                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2950
2951                 error = cluster_read(vp, auio, copysize, 0);
2952                 if (error) {
2953                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2954                         break;
2955                 }
2956                 if (uio_resid(auio) != 0) {
2957                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2958                         error = EIO;
2959                         break;
2960                 }
2961
2962                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2963                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2964
2965                 error = cluster_write(vp, auio, filesize + offset,
2966                                       filesize + offset + iosize,
2967                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2968                 if (error) {
2969                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2970                         break;
2971                 }
2972                 if (uio_resid(auio) != 0) {
2973                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2974                         error = EIO;
2975                         break;
2976                 }
2977                 offset += iosize;
2978         }
2979         uio_free(auio);
2980
2981         /*
2982          * No need to call ubc_sync_range or hfs_invalbuf
2983          * since the file was copied using IO_NOCACHE.
2984          */
2985
2986         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2987
2988         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2989         return (error);
2990 }
2991
2992 /*
2993  * Clone a system (metadata) file.
2994  *
2995  */
2996 static int
2997 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2998                  kauth_cred_t cred, struct proc *p)
2999 {
3000         caddr_t  bufp;
3001         char * offset;
3002         size_t  bufsize;
3003         size_t  iosize;
3004         struct buf *bp = NULL;
3005         daddr64_t  blkno;
3006         daddr64_t  blk;
3007         daddr64_t  start_blk;
3008         daddr64_t  last_blk;
3009         int  breadcnt;
3010         int  i;
3011         int  error = 0;
3012
3013
3014         iosize = GetLogicalBlockSize(vp);
3015         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3016         breadcnt = bufsize / iosize;
3017
3018         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3019                 return (ENOMEM);
3020         }
3021         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3022         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3023         blkno = 0;
3024
3025         while (blkno < last_blk) {
3026                 /*
3027                  * Read up to a megabyte
3028                  */
3029                 offset = bufp;
3030                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3031                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3032                         if (error) {
3033                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3034                                 goto out;
3035                         }
3036                         if (buf_count(bp) != iosize) {
3037                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3038                                 goto out;
3039                         }
3040                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3041
3042                         buf_markinvalid(bp);
3043                         buf_brelse(bp);
3044                         bp = NULL;
3045
3046                         offset += iosize;
3047                 }
3048
3049                 /*
3050                  * Write up to a megabyte
3051                  */
3052                 offset = bufp;
3053                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3054                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3055                         if (bp == NULL) {
3056                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3057                                 error = EIO;
3058                                 goto out;
3059                         }
3060                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3061                         error = (int)buf_bwrite(bp);
3062                         bp = NULL;
3063                         if (error)
3064                                 goto out;
3065                         offset += iosize;
3066                 }
3067         }
3068 out:
3069         if (bp) {
3070                 buf_brelse(bp);
3071         }
3072
3073         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3074
3075         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3076
3077         return (error);
3078 }