bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2006 Apple Computer, Inc. All Rights Reserved.
   3  *
   4  * @APPLE_LICENSE_OSREFERENCE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License.  The rights granted to you under the
  10  * License may not be used to create, or enable the creation or
  11  * redistribution of, unlawful or unlicensed copies of an Apple operating
  12  * system, or to circumvent, violate, or enable the circumvention or
  13  * violation of, any terms of an Apple operating system software license
  14  * agreement.
  15  *
  16  * Please obtain a copy of the License at
  17  * http://www.opensource.apple.com/apsl/ and read it before using this
  18  * file.
  19  *
  20  * The Original Code and all software distributed under the License are
  21  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  22  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  23  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  24  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  25  * Please see the License for the specific language governing rights and
  26  * limitations under the License.
  27  *
  28  * @APPLE_LICENSE_OSREFERENCE_HEADER_END@
  29  */
  30 /*      @(#)hfs_readwrite.c     1.0
  31  *
  32  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  33  *
  34  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  35  *
  36  */
  37
  38 #include <sys/param.h>
  39 #include <sys/systm.h>
  40 #include <sys/resourcevar.h>
  41 #include <sys/kernel.h>
  42 #include <sys/fcntl.h>
  43 #include <sys/filedesc.h>
  44 #include <sys/stat.h>
  45 #include <sys/buf.h>
  46 #include <sys/proc.h>
  47 #include <sys/kauth.h>
  48 #include <sys/vnode.h>
  49 #include <sys/uio.h>
  50 #include <sys/vfs_context.h>
  51
  52 #include <miscfs/specfs/specdev.h>
  53
  54 #include <sys/ubc.h>
  55 #include <vm/vm_pageout.h>
  56 #include <vm/vm_kern.h>
  57
  58 #include <sys/kdebug.h>
  59
  60 #include        "hfs.h"
  61 #include        "hfs_endian.h"
  62 #include  "hfs_fsctl.h"
  63 #include        "hfs_quota.h"
  64 #include        "hfscommon/headers/FileMgrInternal.h"
  65 #include        "hfscommon/headers/BTreesInternal.h"
  66 #include        "hfs_cnode.h"
  67 #include        "hfs_dbg.h"
  68
  69 extern int overflow_extents(struct filefork *fp);
  70
  71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  72
  73 enum {
  74         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  75 };
  76
  77 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  78
  79 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  80
  81
  82 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  83 static int  hfs_clonefile(struct vnode *, int, int, int);
  84 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  85
  86
  87 /*****************************************************************************
  88 *
  89 *       I/O Operations on vnodes
  90 *
  91 *****************************************************************************/
  92 int  hfs_vnop_read(struct vnop_read_args *);
  93 int  hfs_vnop_write(struct vnop_write_args *);
  94 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  95 int  hfs_vnop_select(struct vnop_select_args *);
  96 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
  97 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
  98 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
  99 int  hfs_vnop_strategy(struct vnop_strategy_args *);
 100 int  hfs_vnop_allocate(struct vnop_allocate_args *);
 101 int  hfs_vnop_pagein(struct vnop_pagein_args *);
 102 int  hfs_vnop_pageout(struct vnop_pageout_args *);
 103 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
 104
 105
 106 /*
 107  * Read data from a file.
 108  */
 109 int
 110 hfs_vnop_read(struct vnop_read_args *ap)
 111 {
 112         uio_t uio = ap->a_uio;
 113         struct vnode *vp = ap->a_vp;
 114         struct cnode *cp;
 115         struct filefork *fp;
 116         struct hfsmount *hfsmp;
 117         off_t filesize;
 118         off_t filebytes;
 119         off_t start_resid = uio_resid(uio);
 120         off_t offset = uio_offset(uio);
 121         int retval = 0;
 122
 123
 124         /* Preflight checks */
 125         if (!vnode_isreg(vp)) {
 126                 /* can only read regular files */
 127                 if (vnode_isdir(vp))
 128                         return (EISDIR);
 129                 else
 130                         return (EPERM);
 131         }
 132         if (start_resid == 0)
 133                 return (0);             /* Nothing left to do */
 134         if (offset < 0)
 135                 return (EINVAL);        /* cant read from a negative offset */
 136
 137         cp = VTOC(vp);
 138         fp = VTOF(vp);
 139         hfsmp = VTOHFS(vp);
 140
 141         /* Protect against a size change. */
 142         hfs_lock_truncate(cp, 0);
 143
 144         filesize = fp->ff_size;
 145         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 146         if (offset > filesize) {
 147                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 148                     (offset > (off_t)MAXHFSFILESIZE)) {
 149                         retval = EFBIG;
 150                 }
 151                 goto exit;
 152         }
 153
 154         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 155                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 156
 157         retval = cluster_read(vp, uio, filesize, 0);
 158
 159         cp->c_touch_acctime = TRUE;
 160
 161         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 162                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 163
 164         /*
 165          * Keep track blocks read
 166          */
 167         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 168                 int took_cnode_lock = 0;
 169                 off_t bytesread;
 170
 171                 bytesread = start_resid - uio_resid(uio);
 172
 173                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 174                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 175                         hfs_lock(cp, HFS_FORCE_LOCK);
 176                         took_cnode_lock = 1;
 177                 }
 178                 /*
 179                  * If this file hasn't been seen since the start of
 180                  * the current sampling period then start over.
 181                  */
 182                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 183                         struct timeval tv;
 184
 185                         fp->ff_bytesread = bytesread;
 186                         microtime(&tv);
 187                         cp->c_atime = tv.tv_sec;
 188                 } else {
 189                         fp->ff_bytesread += bytesread;
 190                 }
 191                 if (took_cnode_lock)
 192                         hfs_unlock(cp);
 193         }
 194 exit:
 195         hfs_unlock_truncate(cp);
 196         return (retval);
 197 }
 198
 199 /*
 200  * Write data to a file.
 201  */
 202 int
 203 hfs_vnop_write(struct vnop_write_args *ap)
 204 {
 205         uio_t uio = ap->a_uio;
 206         struct vnode *vp = ap->a_vp;
 207         struct cnode *cp;
 208         struct filefork *fp;
 209         struct hfsmount *hfsmp;
 210         kauth_cred_t cred = NULL;
 211         off_t origFileSize;
 212         off_t writelimit;
 213         off_t bytesToAdd;
 214         off_t actualBytesAdded;
 215         off_t filebytes;
 216         off_t offset;
 217         size_t resid;
 218         int eflags;
 219         int ioflag = ap->a_ioflag;
 220         int retval = 0;
 221         int lockflags;
 222         int cnode_locked = 0;
 223
 224         // LP64todo - fix this! uio_resid may be 64-bit value
 225         resid = uio_resid(uio);
 226         offset = uio_offset(uio);
 227
 228         if (offset < 0)
 229                 return (EINVAL);
 230         if (resid == 0)
 231                 return (E_NONE);
 232         if (!vnode_isreg(vp))
 233                 return (EPERM);  /* Can only write regular files */
 234
 235         /* Protect against a size change. */
 236         hfs_lock_truncate(VTOC(vp), TRUE);
 237
 238         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 239                 hfs_unlock_truncate(VTOC(vp));
 240                 return (retval);
 241         }
 242         cnode_locked = 1;
 243         cp = VTOC(vp);
 244         fp = VTOF(vp);
 245         hfsmp = VTOHFS(vp);
 246         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 247
 248         if (ioflag & IO_APPEND) {
 249                 uio_setoffset(uio, fp->ff_size);
 250                 offset = fp->ff_size;
 251         }
 252         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 253                 retval = EPERM;
 254                 goto exit;
 255         }
 256
 257         origFileSize = fp->ff_size;
 258         eflags = kEFDeferMask;  /* defer file block allocations */
 259
 260 #ifdef HFS_SPARSE_DEV
 261         /*
 262          * When the underlying device is sparse and space
 263          * is low (< 8MB), stop doing delayed allocations
 264          * and begin doing synchronous I/O.
 265          */
 266         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 267             (hfs_freeblks(hfsmp, 0) < 2048)) {
 268                 eflags &= ~kEFDeferMask;
 269                 ioflag |= IO_SYNC;
 270         }
 271 #endif /* HFS_SPARSE_DEV */
 272
 273         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 274                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 275
 276         /* Now test if we need to extend the file */
 277         /* Doing so will adjust the filebytes for us */
 278
 279         writelimit = offset + resid;
 280         if (writelimit <= filebytes)
 281                 goto sizeok;
 282
 283         cred = vfs_context_ucred(ap->a_context);
 284 #if QUOTA
 285         bytesToAdd = writelimit - filebytes;
 286         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 287                            cred, 0);
 288         if (retval)
 289                 goto exit;
 290 #endif /* QUOTA */
 291
 292         if (hfs_start_transaction(hfsmp) != 0) {
 293                 retval = EINVAL;
 294                 goto exit;
 295         }
 296
 297         while (writelimit > filebytes) {
 298                 bytesToAdd = writelimit - filebytes;
 299                 if (cred && suser(cred, NULL) != 0)
 300                         eflags |= kEFReserveMask;
 301
 302                 /* Protect extents b-tree and allocation bitmap */
 303                 lockflags = SFL_BITMAP;
 304                 if (overflow_extents(fp))
 305                         lockflags |= SFL_EXTENTS;
 306                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 307
 308                 /* Files that are changing size are not hot file candidates. */
 309                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 310                         fp->ff_bytesread = 0;
 311                 }
 312                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 313                                 0, eflags, &actualBytesAdded));
 314
 315                 hfs_systemfile_unlock(hfsmp, lockflags);
 316
 317                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 318                         retval = ENOSPC;
 319                 if (retval != E_NONE)
 320                         break;
 321                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 322                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 323                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 324         }
 325         (void) hfs_update(vp, TRUE);
 326         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 327         (void) hfs_end_transaction(hfsmp);
 328
 329 sizeok:
 330         if (retval == E_NONE) {
 331                 off_t filesize;
 332                 off_t zero_off;
 333                 off_t tail_off;
 334                 off_t inval_start;
 335                 off_t inval_end;
 336                 off_t io_start;
 337                 int lflag;
 338                 struct rl_entry *invalid_range;
 339
 340                 if (writelimit > fp->ff_size)
 341                         filesize = writelimit;
 342                 else
 343                         filesize = fp->ff_size;
 344
 345                 lflag = (ioflag & IO_SYNC);
 346
 347                 if (offset <= fp->ff_size) {
 348                         zero_off = offset & ~PAGE_MASK_64;
 349
 350                         /* Check to see whether the area between the zero_offset and the start
 351                            of the transfer to see whether is invalid and should be zero-filled
 352                            as part of the transfer:
 353                          */
 354                         if (offset > zero_off) {
 355                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 356                                         lflag |= IO_HEADZEROFILL;
 357                         }
 358                 } else {
 359                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 360
 361                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 362                            read without being zeroed.  The current last block is filled with zeroes
 363                            if it holds valid data but in all cases merely do a little bookkeeping
 364                            to track the area from the end of the current last page to the start of
 365                            the area actually written.  For the same reason only the bytes up to the
 366                            start of the page where this write will start is invalidated; any remainder
 367                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 368
 369                            Note that inval_start, the start of the page after the current EOF,
 370                            may be past the start of the write, in which case the zeroing
 371                            will be handled by the cluser_write of the actual data.
 372                          */
 373                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 374                         inval_end = offset & ~PAGE_MASK_64;
 375                         zero_off = fp->ff_size;
 376
 377                         if ((fp->ff_size & PAGE_MASK_64) &&
 378                                 (rl_scan(&fp->ff_invalidranges,
 379                                                         eof_page_base,
 380                                                         fp->ff_size - 1,
 381                                                         &invalid_range) != RL_NOOVERLAP)) {
 382                                 /* The page containing the EOF is not valid, so the
 383                                    entire page must be made inaccessible now.  If the write
 384                                    starts on a page beyond the page containing the eof
 385                                    (inval_end > eof_page_base), add the
 386                                    whole page to the range to be invalidated.  Otherwise
 387                                    (i.e. if the write starts on the same page), zero-fill
 388                                    the entire page explicitly now:
 389                                  */
 390                                 if (inval_end > eof_page_base) {
 391                                         inval_start = eof_page_base;
 392                                 } else {
 393                                         zero_off = eof_page_base;
 394                                 };
 395                         };
 396
 397                         if (inval_start < inval_end) {
 398                                 struct timeval tv;
 399                                 /* There's some range of data that's going to be marked invalid */
 400
 401                                 if (zero_off < inval_start) {
 402                                         /* The pages between inval_start and inval_end are going to be invalidated,
 403                                            and the actual write will start on a page past inval_end.  Now's the last
 404                                            chance to zero-fill the page containing the EOF:
 405                                          */
 406                                         hfs_unlock(cp);
 407                                         cnode_locked = 0;
 408                                         retval = cluster_write(vp, (uio_t) 0,
 409                                                         fp->ff_size, inval_start,
 410                                                         zero_off, (off_t)0,
 411                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 412                                         hfs_lock(cp, HFS_FORCE_LOCK);
 413                                         cnode_locked = 1;
 414                                         if (retval) goto ioerr_exit;
 415                                         offset = uio_offset(uio);
 416                                 };
 417
 418                                 /* Mark the remaining area of the newly allocated space as invalid: */
 419                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 420                                 microuptime(&tv);
 421                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 422                                 zero_off = fp->ff_size = inval_end;
 423                         };
 424
 425                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 426                 };
 427
 428                 /* Check to see whether the area between the end of the write and the end of
 429                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 430                  */
 431                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 432                 if (tail_off > filesize) tail_off = filesize;
 433                 if (tail_off > writelimit) {
 434                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 435                                 lflag |= IO_TAILZEROFILL;
 436                         };
 437                 };
 438
 439                 /*
 440                  * if the write starts beyond the current EOF (possibly advanced in the
 441                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 442                  * to where the write begins:
 443                  *
 444                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 445                  *       before the current EOF it might be marked as invalid now and must be
 446                  *       made readable (removed from the invalid ranges) before cluster_write
 447                  *       tries to write it:
 448                  */
 449                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 450                 if (io_start < fp->ff_size) {
 451                         off_t io_end;
 452
 453                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 454                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 455                 };
 456
 457                 hfs_unlock(cp);
 458                 cnode_locked = 0;
 459                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 460                                 tail_off, lflag | IO_NOZERODIRTY);
 461                 offset = uio_offset(uio);
 462                 if (offset > fp->ff_size) {
 463                         fp->ff_size = offset;
 464
 465                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 466                         /* Files that are changing size are not hot file candidates. */
 467                         if (hfsmp->hfc_stage == HFC_RECORDING)
 468                                 fp->ff_bytesread = 0;
 469                 }
 470                 if (resid > uio_resid(uio)) {
 471                         cp->c_touch_chgtime = TRUE;
 472                         cp->c_touch_modtime = TRUE;
 473                 }
 474         }
 475         HFS_KNOTE(vp, NOTE_WRITE);
 476
 477 ioerr_exit:
 478         /*
 479          * If we successfully wrote any data, and we are not the superuser
 480          * we clear the setuid and setgid bits as a precaution against
 481          * tampering.
 482          */
 483         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 484                 cred = vfs_context_ucred(ap->a_context);
 485                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 486                         if (!cnode_locked) {
 487                                 hfs_lock(cp, HFS_FORCE_LOCK);
 488                                 cnode_locked = 1;
 489                         }
 490                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 491                 }
 492         }
 493         if (retval) {
 494                 if (ioflag & IO_UNIT) {
 495                         if (!cnode_locked) {
 496                                 hfs_lock(cp, HFS_FORCE_LOCK);
 497                                 cnode_locked = 1;
 498                         }
 499                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 500                                            0, ap->a_context);
 501                         // LP64todo - fix this!  resid needs to by user_ssize_t
 502                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 503                         uio_setresid(uio, resid);
 504                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 505                 }
 506         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 507                 if (!cnode_locked) {
 508                         hfs_lock(cp, HFS_FORCE_LOCK);
 509                         cnode_locked = 1;
 510                 }
 511                 retval = hfs_update(vp, TRUE);
 512         }
 513         /* Updating vcbWrCnt doesn't need to be atomic. */
 514         hfsmp->vcbWrCnt++;
 515
 516         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 517                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 518 exit:
 519         if (cnode_locked)
 520                 hfs_unlock(cp);
 521         hfs_unlock_truncate(cp);
 522         return (retval);
 523 }
 524
 525 /* support for the "bulk-access" fcntl */
 526
 527 #define CACHE_ELEMS 64
 528 #define CACHE_LEVELS 16
 529 #define PARENT_IDS_FLAG 0x100
 530
 531 /* from hfs_attrlist.c */
 532 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 533                         mode_t obj_mode, struct mount *mp,
 534                         kauth_cred_t cred, struct proc *p);
 535
 536 /* from vfs/vfs_fsevents.c */
 537 extern char *get_pathbuff(void);
 538 extern void release_pathbuff(char *buff);
 539
 540 struct access_cache {
 541        int numcached;
 542        int cachehits; /* these two for statistics gathering */
 543        int lookups;
 544        unsigned int *acache;
 545        Boolean *haveaccess;
 546 };
 547
 548 struct access_t {
 549         uid_t     uid;              /* IN: effective user id */
 550         short     flags;            /* IN: access requested (i.e. R_OK) */
 551         short     num_groups;       /* IN: number of groups user belongs to */
 552         int       num_files;        /* IN: number of files to process */
 553         int       *file_ids;        /* IN: array of file ids */
 554         gid_t     *groups;          /* IN: array of groups */
 555         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 556 };
 557
 558 struct user_access_t {
 559         uid_t           uid;                    /* IN: effective user id */
 560         short           flags;                  /* IN: access requested (i.e. R_OK) */
 561         short           num_groups;             /* IN: number of groups user belongs to */
 562         int                     num_files;              /* IN: number of files to process */
 563         user_addr_t     file_ids;               /* IN: array of file ids */
 564         user_addr_t     groups;                 /* IN: array of groups */
 565         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 566 };
 567
 568 /*
 569  * Perform a binary search for the given parent_id. Return value is
 570  * found/not found boolean, and indexp will be the index of the item
 571  * or the index at which to insert the item if it's not found.
 572  */
 573 static int
 574 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 575 {
 576         unsigned int lo, hi;
 577         int index, matches = 0;
 578
 579         if (cache->numcached == 0) {
 580                 *indexp = 0;
 581                 return 0; // table is empty, so insert at index=0 and report no match
 582         }
 583
 584         if (cache->numcached > CACHE_ELEMS) {
 585                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 586                   cache->numcached, CACHE_ELEMS);*/
 587                 cache->numcached = CACHE_ELEMS;
 588         }
 589
 590         lo = 0;
 591         hi = cache->numcached - 1;
 592         index = -1;
 593
 594         /* perform binary search for parent_id */
 595         do {
 596                 unsigned int mid = (hi - lo)/2 + lo;
 597                 unsigned int this_id = cache->acache[mid];
 598
 599                 if (parent_id == this_id) {
 600                         index = mid;
 601                         break;
 602                 }
 603
 604                 if (parent_id < this_id) {
 605                         hi = mid;
 606                         continue;
 607                 }
 608
 609                 if (parent_id > this_id) {
 610                         lo = mid + 1;
 611                         continue;
 612                 }
 613         } while(lo < hi);
 614
 615         /* check if lo and hi converged on the match */
 616         if (parent_id == cache->acache[hi]) {
 617                 index = hi;
 618         }
 619
 620         /* if no existing entry found, find index for new one */
 621         if (index == -1) {
 622                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 623                 matches = 0;
 624         } else {
 625                 matches = 1;
 626         }
 627
 628         *indexp = index;
 629         return matches;
 630 }
 631
 632 /*
 633  * Add a node to the access_cache at the given index (or do a lookup first
 634  * to find the index if -1 is passed in). We currently do a replace rather
 635  * than an insert if the cache is full.
 636  */
 637 static void
 638 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 639 {
 640        int lookup_index = -1;
 641
 642        /* need to do a lookup first if -1 passed for index */
 643        if (index == -1) {
 644                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 645                        if (cache->haveaccess[lookup_index] != access) {
 646                                /* change access info for existing entry... should never happen */
 647                                cache->haveaccess[lookup_index] = access;
 648                        }
 649
 650                        /* mission accomplished */
 651                        return;
 652                } else {
 653                        index = lookup_index;
 654                }
 655
 656        }
 657
 658        /* if the cache is full, do a replace rather than an insert */
 659        if (cache->numcached >= CACHE_ELEMS) {
 660                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 661                cache->numcached = CACHE_ELEMS-1;
 662
 663                if (index > cache->numcached) {
 664                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 665                        index = cache->numcached;
 666                }
 667        } else if (index >= 0 && index < cache->numcached) {
 668                /* only do bcopy if we're inserting */
 669                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 670                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 671        }
 672
 673        cache->acache[index] = nodeID;
 674        cache->haveaccess[index] = access;
 675        cache->numcached++;
 676 }
 677
 678
 679 struct cinfo {
 680         uid_t   uid;
 681         gid_t   gid;
 682         mode_t  mode;
 683         cnid_t  parentcnid;
 684 };
 685
 686 static int
 687 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 688 {
 689         struct cinfo *cip = (struct cinfo *)arg;
 690
 691         cip->uid = attrp->ca_uid;
 692         cip->gid = attrp->ca_gid;
 693         cip->mode = attrp->ca_mode;
 694         cip->parentcnid = descp->cd_parentcnid;
 695
 696         return (0);
 697 }
 698
 699 /*
 700  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 701  * isn't incore, then go to the catalog.
 702  */
 703 static int
 704 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 705                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 706 {
 707         int error = 0;
 708
 709         /* if this id matches the one the fsctl was called with, skip the lookup */
 710         if (cnid == skip_cp->c_cnid) {
 711                 cnattrp->ca_uid = skip_cp->c_uid;
 712                 cnattrp->ca_gid = skip_cp->c_gid;
 713                 cnattrp->ca_mode = skip_cp->c_mode;
 714                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 715         } else {
 716                 struct cinfo c_info;
 717
 718                 /* otherwise, check the cnode hash incase the file/dir is incore */
 719                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 720                         cnattrp->ca_uid = c_info.uid;
 721                         cnattrp->ca_gid = c_info.gid;
 722                         cnattrp->ca_mode = c_info.mode;
 723                         keyp->hfsPlus.parentID = c_info.parentcnid;
 724                 } else {
 725                         int lockflags;
 726
 727                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 728
 729                         /* lookup this cnid in the catalog */
 730                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 731
 732                         hfs_systemfile_unlock(hfsmp, lockflags);
 733
 734                         cache->lookups++;
 735                 }
 736         }
 737
 738         return (error);
 739 }
 740
 741 /*
 742  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 743  * up to CACHE_LEVELS as we progress towards the root.
 744  */
 745 static int
 746 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 747                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 748 {
 749        int                     myErr = 0;
 750        int                     myResult;
 751        HFSCatalogNodeID        thisNodeID;
 752        unsigned long           myPerms;
 753        struct cat_attr         cnattr;
 754        int                     cache_index = -1;
 755        CatalogKey              catkey;
 756
 757        int i = 0, ids_to_cache = 0;
 758        int parent_ids[CACHE_LEVELS];
 759
 760        /* root always has access */
 761        if (!suser(myp_ucred, NULL)) {
 762                return (1);
 763        }
 764
 765        thisNodeID = nodeID;
 766        while (thisNodeID >=  kRootDirID) {
 767                myResult = 0;   /* default to "no access" */
 768
 769                /* check the cache before resorting to hitting the catalog */
 770
 771                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 772                 * to look any further after hitting cached dir */
 773
 774                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 775                        cache->cachehits++;
 776                        myResult = cache->haveaccess[cache_index];
 777                        goto ExitThisRoutine;
 778                }
 779
 780                /* remember which parents we want to cache */
 781                if (ids_to_cache < CACHE_LEVELS) {
 782                        parent_ids[ids_to_cache] = thisNodeID;
 783                        ids_to_cache++;
 784                }
 785
 786                /* do the lookup (checks the cnode hash, then the catalog) */
 787                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 788                if (myErr) {
 789                        goto ExitThisRoutine; /* no access */
 790                }
 791
 792                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 793                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 794                                                  myp_ucred, theProcPtr);
 795
 796                if ( (myPerms & X_OK) == 0 ) {
 797                        myResult = 0;
 798                        goto ExitThisRoutine;   /* no access */
 799                }
 800
 801                /* up the hierarchy we go */
 802                thisNodeID = catkey.hfsPlus.parentID;
 803        }
 804
 805        /* if here, we have access to this node */
 806        myResult = 1;
 807
 808  ExitThisRoutine:
 809        if (myErr) {
 810                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 811                myResult = 0;
 812        }
 813        *err = myErr;
 814
 815        /* cache the parent directory(ies) */
 816        for (i = 0; i < ids_to_cache; i++) {
 817                /* small optimization: get rid of double-lookup for all these */
 818                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 819                add_node(cache, -1, parent_ids[i], myResult);
 820        }
 821
 822        return (myResult);
 823 }
 824 /* end "bulk-access" support */
 825
 826
 827
 828 /*
 829  * Callback for use with freeze ioctl.
 830  */
 831 static int
 832 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 833 {
 834         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 835
 836         return 0;
 837 }
 838
 839 /*
 840  * Control filesystem operating characteristics.
 841  */
 842 int
 843 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 844                 vnode_t a_vp;
 845                 int  a_command;
 846                 caddr_t  a_data;
 847                 int  a_fflag;
 848                 vfs_context_t a_context;
 849         } */ *ap)
 850 {
 851         struct vnode * vp = ap->a_vp;
 852         struct hfsmount *hfsmp = VTOHFS(vp);
 853         vfs_context_t context = ap->a_context;
 854         kauth_cred_t cred = vfs_context_ucred(context);
 855         proc_t p = vfs_context_proc(context);
 856         struct vfsstatfs *vfsp;
 857         boolean_t is64bit;
 858
 859         is64bit = proc_is64bit(p);
 860
 861         switch (ap->a_command) {
 862
 863         case HFS_RESIZE_VOLUME: {
 864                 u_int64_t newsize;
 865                 u_int64_t cursize;
 866
 867                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 868                 if (suser(cred, NULL) &&
 869                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 870                         return (EACCES); /* must be owner of file system */
 871                 }
 872                 if (!vnode_isvroot(vp)) {
 873                         return (EINVAL);
 874                 }
 875                 newsize = *(u_int64_t *)ap->a_data;
 876                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 877
 878                 if (newsize > cursize) {
 879                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 880                 } else if (newsize < cursize) {
 881                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 882                 } else {
 883                         return (0);
 884                 }
 885         }
 886         case HFS_CHANGE_NEXT_ALLOCATION: {
 887                 u_int32_t location;
 888
 889                 if (vnode_vfsisrdonly(vp)) {
 890                         return (EROFS);
 891                 }
 892                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 893                 if (suser(cred, NULL) &&
 894                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 895                         return (EACCES); /* must be owner of file system */
 896                 }
 897                 if (!vnode_isvroot(vp)) {
 898                         return (EINVAL);
 899                 }
 900                 location = *(u_int32_t *)ap->a_data;
 901                 if (location > hfsmp->totalBlocks - 1) {
 902                         return (EINVAL);
 903                 }
 904                 /* Return previous value. */
 905                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 906                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 907                 hfsmp->nextAllocation = location;
 908                 hfsmp->vcbFlags |= 0xFF00;
 909                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 910                 return (0);
 911         }
 912
 913 #ifdef HFS_SPARSE_DEV
 914         case HFS_SETBACKINGSTOREINFO: {
 915                 struct vnode * bsfs_rootvp;
 916                 struct vnode * di_vp;
 917                 struct hfs_backingstoreinfo *bsdata;
 918                 int error = 0;
 919
 920                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 921                         return (EALREADY);
 922                 }
 923                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 924                 if (suser(cred, NULL) &&
 925                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 926                         return (EACCES); /* must be owner of file system */
 927                 }
 928                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 929                 if (bsdata == NULL) {
 930                         return (EINVAL);
 931                 }
 932                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 933                         return (error);
 934                 }
 935                 if ((error = vnode_getwithref(di_vp))) {
 936                         file_drop(bsdata->backingfd);
 937                         return(error);
 938                 }
 939
 940                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 941                         (void)vnode_put(di_vp);
 942                         file_drop(bsdata->backingfd);
 943                         return (EINVAL);
 944                 }
 945
 946                 /*
 947                  * Obtain the backing fs root vnode and keep a reference
 948                  * on it.  This reference will be dropped in hfs_unmount.
 949                  */
 950                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 951                 if (error) {
 952                         (void)vnode_put(di_vp);
 953                         file_drop(bsdata->backingfd);
 954                         return (error);
 955                 }
 956                 vnode_ref(bsfs_rootvp);
 957                 vnode_put(bsfs_rootvp);
 958
 959                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 960                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 961                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 962                 hfsmp->hfs_sparsebandblks *= 4;
 963
 964                 (void)vnode_put(di_vp);
 965                 file_drop(bsdata->backingfd);
 966                 return (0);
 967         }
 968         case HFS_CLRBACKINGSTOREINFO: {
 969                 struct vnode * tmpvp;
 970
 971                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 972                 if (suser(cred, NULL) &&
 973                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 974                         return (EACCES); /* must be owner of file system */
 975                 }
 976                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 977                     hfsmp->hfs_backingfs_rootvp) {
 978
 979                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 980                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 981                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 982                         hfsmp->hfs_sparsebandblks = 0;
 983                         vnode_rele(tmpvp);
 984                 }
 985                 return (0);
 986         }
 987 #endif /* HFS_SPARSE_DEV */
 988
 989         case F_FREEZE_FS: {
 990                 struct mount *mp;
 991                 task_t task;
 992
 993                 if (!is_suser())
 994                         return (EACCES);
 995
 996                 mp = vnode_mount(vp);
 997                 hfsmp = VFSTOHFS(mp);
 998
 999                 if (!(hfsmp->jnl))
1000                         return (ENOTSUP);
1001
1002                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1003
1004                 task = current_task();
1005                 task_working_set_disable(task);
1006
1007                 // flush things before we get started to try and prevent
1008                 // dirty data from being paged out while we're frozen.
1009                 // note: can't do this after taking the lock as it will
1010                 // deadlock against ourselves.
1011                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1012                 hfs_global_exclusive_lock_acquire(hfsmp);
1013                 journal_flush(hfsmp->jnl);
1014
1015                 // don't need to iterate on all vnodes, we just need to
1016                 // wait for writes to the system files and the device vnode
1017                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1018                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1019                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1020                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1021                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1022                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1023                 if (hfsmp->hfs_attribute_vp)
1024                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1025                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1026
1027                 hfsmp->hfs_freezing_proc = current_proc();
1028
1029                 return (0);
1030         }
1031
1032         case F_THAW_FS: {
1033                 if (!is_suser())
1034                         return (EACCES);
1035
1036                 // if we're not the one who froze the fs then we
1037                 // can't thaw it.
1038                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1039                     return EPERM;
1040                 }
1041
1042                 // NOTE: if you add code here, also go check the
1043                 //       code that "thaws" the fs in hfs_vnop_close()
1044                 //
1045                 hfsmp->hfs_freezing_proc = NULL;
1046                 hfs_global_exclusive_lock_release(hfsmp);
1047                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1048
1049                 return (0);
1050         }
1051
1052 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1053 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1054
1055         case HFS_BULKACCESS_FSCTL:
1056         case HFS_BULKACCESS: {
1057                 /*
1058                  * NOTE: on entry, the vnode is locked. Incase this vnode
1059                  * happens to be in our list of file_ids, we'll note it
1060                  * avoid calling hfs_chashget_nowait() on that id as that
1061                  * will cause a "locking against myself" panic.
1062                  */
1063                 Boolean check_leaf = true;
1064
1065                 struct user_access_t *user_access_structp;
1066                 struct user_access_t tmp_user_access_t;
1067                 struct access_cache cache;
1068
1069                 int error = 0, i;
1070
1071                 dev_t dev = VTOC(vp)->c_dev;
1072
1073                 short flags;
1074                 struct ucred myucred;   /* XXX ILLEGAL */
1075                 int num_files;
1076                 int *file_ids = NULL;
1077                 short *access = NULL;
1078
1079                 cnid_t cnid;
1080                 cnid_t prevParent_cnid = 0;
1081                 unsigned long myPerms;
1082                 short myaccess = 0;
1083                 struct cat_attr cnattr;
1084                 CatalogKey catkey;
1085                 struct cnode *skip_cp = VTOC(vp);
1086                 struct vfs_context      my_context;
1087
1088                 /* first, return error if not run as root */
1089                 if (cred->cr_ruid != 0) {
1090                         return EPERM;
1091                 }
1092
1093                 /* initialize the local cache and buffers */
1094                 cache.numcached = 0;
1095                 cache.cachehits = 0;
1096                 cache.lookups = 0;
1097
1098                 file_ids = (int *) get_pathbuff();
1099                 access = (short *) get_pathbuff();
1100                 cache.acache = (int *) get_pathbuff();
1101                 cache.haveaccess = (Boolean *) get_pathbuff();
1102
1103                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1104                         release_pathbuff((char *) file_ids);
1105                         release_pathbuff((char *) access);
1106                         release_pathbuff((char *) cache.acache);
1107                         release_pathbuff((char *) cache.haveaccess);
1108
1109                         return ENOMEM;
1110                 }
1111
1112                 /* struct copyin done during dispatch... need to copy file_id array separately */
1113                 if (ap->a_data == NULL) {
1114                         error = EINVAL;
1115                         goto err_exit_bulk_access;
1116                 }
1117
1118                 if (is64bit) {
1119                         user_access_structp = (struct user_access_t *)ap->a_data;
1120                 }
1121                 else {
1122                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1123                         tmp_user_access_t.uid = accessp->uid;
1124                         tmp_user_access_t.flags = accessp->flags;
1125                         tmp_user_access_t.num_groups = accessp->num_groups;
1126                         tmp_user_access_t.num_files = accessp->num_files;
1127                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1128                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1129                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1130                         user_access_structp = &tmp_user_access_t;
1131                 }
1132
1133                 num_files = user_access_structp->num_files;
1134                 if (num_files < 1) {
1135                         goto err_exit_bulk_access;
1136                 }
1137                 if (num_files > 256) {
1138                         error = EINVAL;
1139                         goto err_exit_bulk_access;
1140                 }
1141
1142                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1143                                                         num_files * sizeof(int)))) {
1144                         goto err_exit_bulk_access;
1145                 }
1146
1147                 /* fill in the ucred structure */
1148                 flags = user_access_structp->flags;
1149                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1150                         flags = R_OK;
1151                 }
1152
1153                 /* check if we've been passed leaf node ids or parent ids */
1154                 if (flags & PARENT_IDS_FLAG) {
1155                         check_leaf = false;
1156                 }
1157
1158                 memset(&myucred, 0, sizeof(myucred));
1159                 myucred.cr_ref = 1;
1160                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1161                 myucred.cr_ngroups = user_access_structp->num_groups;
1162                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1163                         myucred.cr_ngroups = 0;
1164                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1165                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1166                         goto err_exit_bulk_access;
1167                 }
1168                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1169                 myucred.cr_gmuid = myucred.cr_uid;
1170
1171                 my_context.vc_proc = p;
1172                 my_context.vc_ucred = &myucred;
1173
1174                 /* Check access to each file_id passed in */
1175                 for (i = 0; i < num_files; i++) {
1176 #if 0
1177                         cnid = (cnid_t) file_ids[i];
1178
1179                         /* root always has access */
1180                         if (!suser(&myucred, NULL)) {
1181                                 access[i] = 0;
1182                                 continue;
1183                         }
1184
1185                         if (check_leaf) {
1186
1187                                 /* do the lookup (checks the cnode hash, then the catalog) */
1188                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1189                                 if (error) {
1190                                         access[i] = (short) error;
1191                                         continue;
1192                                 }
1193
1194                                 /* before calling CheckAccess(), check the target file for read access */
1195                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1196                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1197
1198
1199                                 /* fail fast if no access */
1200                                 if ((myPerms & flags) == 0) {
1201                                         access[i] = EACCES;
1202                                         continue;
1203                                 }
1204                         } else {
1205                                 /* we were passed an array of parent ids */
1206                                 catkey.hfsPlus.parentID = cnid;
1207                         }
1208
1209                         /* if the last guy had the same parent and had access, we're done */
1210                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1211                                 cache.cachehits++;
1212                                 access[i] = 0;
1213                                 continue;
1214                         }
1215
1216                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1217                                                    skip_cp, p, &myucred, dev);
1218
1219                         if ( myaccess ) {
1220                                 access[i] = 0; // have access.. no errors to report
1221                         } else {
1222                                 access[i] = (error != 0 ? (short) error : EACCES);
1223                         }
1224
1225                         prevParent_cnid = catkey.hfsPlus.parentID;
1226 #else
1227                         int myErr;
1228
1229                         cnid = (cnid_t)file_ids[i];
1230
1231                         while (cnid >= kRootDirID) {
1232                             /* get the vnode for this cnid */
1233                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1234                             if ( myErr ) {
1235                                 access[i] = EACCES;
1236                                 break;
1237                             }
1238
1239                             cnid = VTOC(vp)->c_parentcnid;
1240
1241                             hfs_unlock(VTOC(vp));
1242                             if (vnode_vtype(vp) == VDIR) {
1243                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1244                             } else {
1245                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1246                             }
1247                             vnode_put(vp);
1248                             access[i] = myErr;
1249                             if (myErr) {
1250                                 break;
1251                             }
1252                         }
1253 #endif
1254                 }
1255
1256                 /* copyout the access array */
1257                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1258                                      num_files * sizeof (short)))) {
1259                         goto err_exit_bulk_access;
1260                 }
1261
1262         err_exit_bulk_access:
1263
1264                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1265
1266                 release_pathbuff((char *) cache.acache);
1267                 release_pathbuff((char *) cache.haveaccess);
1268                 release_pathbuff((char *) file_ids);
1269                 release_pathbuff((char *) access);
1270
1271                 return (error);
1272         } /* HFS_BULKACCESS */
1273
1274         case HFS_SETACLSTATE: {
1275                 int state;
1276
1277                 if (ap->a_data == NULL) {
1278                         return (EINVAL);
1279                 }
1280
1281                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1282                 state = *(int *)ap->a_data;
1283
1284                 // super-user can enable or disable acl's on a volume.
1285                 // the volume owner can only enable acl's
1286                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1287                         return (EPERM);
1288                 }
1289                 if (state == 0 || state == 1)
1290                         return hfs_setextendedsecurity(hfsmp, state);
1291                 else
1292                         return (EINVAL);
1293         }
1294
1295         case F_FULLFSYNC: {
1296                 int error;
1297
1298                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1299                 if (error == 0) {
1300                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1301                         hfs_unlock(VTOC(vp));
1302                 }
1303
1304                 return error;
1305         }
1306
1307         case F_CHKCLEAN: {
1308                 register struct cnode *cp;
1309                 int error;
1310
1311                 if (!vnode_isreg(vp))
1312                         return EINVAL;
1313
1314                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1315                 if (error == 0) {
1316                         cp = VTOC(vp);
1317                         /*
1318                          * used by regression test to determine if
1319                          * all the dirty pages (via write) have been cleaned
1320                          * after a call to 'fsysnc'.
1321                          */
1322                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1323                         hfs_unlock(cp);
1324                 }
1325                 return (error);
1326         }
1327
1328         case F_RDADVISE: {
1329                 register struct radvisory *ra;
1330                 struct filefork *fp;
1331                 int error;
1332
1333                 if (!vnode_isreg(vp))
1334                         return EINVAL;
1335
1336                 ra = (struct radvisory *)(ap->a_data);
1337                 fp = VTOF(vp);
1338
1339                 /* Protect against a size change. */
1340                 hfs_lock_truncate(VTOC(vp), TRUE);
1341
1342                 if (ra->ra_offset >= fp->ff_size) {
1343                         error = EFBIG;
1344                 } else {
1345                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1346                 }
1347
1348                 hfs_unlock_truncate(VTOC(vp));
1349                 return (error);
1350         }
1351
1352         case F_READBOOTSTRAP:
1353         case F_WRITEBOOTSTRAP:
1354         {
1355             struct vnode *devvp = NULL;
1356             user_fbootstraptransfer_t *user_bootstrapp;
1357             int devBlockSize;
1358             int error;
1359             uio_t auio;
1360             daddr64_t blockNumber;
1361             u_long blockOffset;
1362             u_long xfersize;
1363             struct buf *bp;
1364             user_fbootstraptransfer_t user_bootstrap;
1365
1366                 if (!vnode_isvroot(vp))
1367                         return (EINVAL);
1368                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1369                  * to a user_fbootstraptransfer_t else we get a pointer to a
1370                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1371                  */
1372                 if (is64bit) {
1373                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1374                 }
1375                 else {
1376                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1377                         user_bootstrapp = &user_bootstrap;
1378                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1379                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1380                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1381                 }
1382                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1383                         return EINVAL;
1384
1385             devvp = VTOHFS(vp)->hfs_devvp;
1386                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1387                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1388                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1389                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1390
1391             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1392
1393             while (uio_resid(auio) > 0) {
1394                         blockNumber = uio_offset(auio) / devBlockSize;
1395                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1396                         if (error) {
1397                                 if (bp) buf_brelse(bp);
1398                                 uio_free(auio);
1399                                 return error;
1400                         };
1401
1402                         blockOffset = uio_offset(auio) % devBlockSize;
1403                         xfersize = devBlockSize - blockOffset;
1404                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1405                         if (error) {
1406                                 buf_brelse(bp);
1407                                 uio_free(auio);
1408                                 return error;
1409                         };
1410                         if (uio_rw(auio) == UIO_WRITE) {
1411                                 error = VNOP_BWRITE(bp);
1412                                 if (error) {
1413                                         uio_free(auio);
1414                         return error;
1415                                 }
1416                         } else {
1417                                 buf_brelse(bp);
1418                         };
1419                 };
1420                 uio_free(auio);
1421         };
1422         return 0;
1423
1424         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1425         {
1426                 if (is64bit) {
1427                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1428                 }
1429                 else {
1430                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1431                 }
1432                 return 0;
1433         }
1434
1435         case HFS_GET_MOUNT_TIME:
1436             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1437             break;
1438
1439         case HFS_GET_LAST_MTIME:
1440             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1441             break;
1442
1443         case HFS_SET_BOOT_INFO:
1444                 if (!vnode_isvroot(vp))
1445                         return(EINVAL);
1446                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1447                         return(EACCES); /* must be superuser or owner of filesystem */
1448                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1449                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1450                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1451                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1452                 break;
1453
1454         case HFS_GET_BOOT_INFO:
1455                 if (!vnode_isvroot(vp))
1456                         return(EINVAL);
1457                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1458                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1459                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1460                 break;
1461
1462         default:
1463                 return (ENOTTY);
1464         }
1465
1466     /* Should never get here */
1467         return 0;
1468 }
1469
1470 /*
1471  * select
1472  */
1473 int
1474 hfs_vnop_select(__unused struct vnop_select_args *ap)
1475 /*
1476         struct vnop_select_args {
1477                 vnode_t a_vp;
1478                 int  a_which;
1479                 int  a_fflags;
1480                 void *a_wql;
1481                 vfs_context_t a_context;
1482         };
1483 */
1484 {
1485         /*
1486          * We should really check to see if I/O is possible.
1487          */
1488         return (1);
1489 }
1490
1491 /*
1492  * Converts a logical block number to a physical block, and optionally returns
1493  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1494  * The physical block number is based on the device block size, currently its 512.
1495  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1496  */
1497 int
1498 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1499 {
1500         struct cnode *cp = VTOC(vp);
1501         struct filefork *fp = VTOF(vp);
1502         struct hfsmount *hfsmp = VTOHFS(vp);
1503         int  retval = E_NONE;
1504         daddr_t  logBlockSize;
1505         size_t  bytesContAvail = 0;
1506         off_t  blockposition;
1507         int lockExtBtree;
1508         int lockflags = 0;
1509
1510         /*
1511          * Check for underlying vnode requests and ensure that logical
1512          * to physical mapping is requested.
1513          */
1514         if (vpp != NULL)
1515                 *vpp = cp->c_devvp;
1516         if (bnp == NULL)
1517                 return (0);
1518
1519         logBlockSize = GetLogicalBlockSize(vp);
1520         blockposition = (off_t)bn * (off_t)logBlockSize;
1521
1522         lockExtBtree = overflow_extents(fp);
1523
1524         if (lockExtBtree)
1525                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1526
1527         retval = MacToVFSError(
1528                             MapFileBlockC (HFSTOVCB(hfsmp),
1529                                             (FCB*)fp,
1530                                             MAXPHYSIO,
1531                                             blockposition,
1532                                             bnp,
1533                                             &bytesContAvail));
1534
1535         if (lockExtBtree)
1536                 hfs_systemfile_unlock(hfsmp, lockflags);
1537
1538         if (retval == E_NONE) {
1539                 /* Figure out how many read ahead blocks there are */
1540                 if (runp != NULL) {
1541                         if (can_cluster(logBlockSize)) {
1542                                 /* Make sure this result never goes negative: */
1543                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1544                         } else {
1545                                 *runp = 0;
1546                         }
1547                 }
1548         }
1549         return (retval);
1550 }
1551
1552 /*
1553  * Convert logical block number to file offset.
1554  */
1555 int
1556 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1557 /*
1558         struct vnop_blktooff_args {
1559                 vnode_t a_vp;
1560                 daddr64_t a_lblkno;
1561                 off_t *a_offset;
1562         };
1563 */
1564 {
1565         if (ap->a_vp == NULL)
1566                 return (EINVAL);
1567         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1568
1569         return(0);
1570 }
1571
1572 /*
1573  * Convert file offset to logical block number.
1574  */
1575 int
1576 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1577 /*
1578         struct vnop_offtoblk_args {
1579                 vnode_t a_vp;
1580                 off_t a_offset;
1581                 daddr64_t *a_lblkno;
1582         };
1583 */
1584 {
1585         if (ap->a_vp == NULL)
1586                 return (EINVAL);
1587         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1588
1589         return(0);
1590 }
1591
1592 /*
1593  * Map file offset to physical block number.
1594  *
1595  * System file cnodes are expected to be locked (shared or exclusive).
1596  */
1597 int
1598 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1599 /*
1600         struct vnop_blockmap_args {
1601                 vnode_t a_vp;
1602                 off_t a_foffset;
1603                 size_t a_size;
1604                 daddr64_t *a_bpn;
1605                 size_t *a_run;
1606                 void *a_poff;
1607                 int a_flags;
1608                 vfs_context_t a_context;
1609         };
1610 */
1611 {
1612         struct vnode *vp = ap->a_vp;
1613         struct cnode *cp;
1614         struct filefork *fp;
1615         struct hfsmount *hfsmp;
1616         size_t bytesContAvail = 0;
1617         int retval = E_NONE;
1618         int syslocks = 0;
1619         int lockflags = 0;
1620         struct rl_entry *invalid_range;
1621         enum rl_overlaptype overlaptype;
1622         int started_tr = 0;
1623         int tooklock = 0;
1624
1625         /* Do not allow blockmap operation on a directory */
1626         if (vnode_isdir(vp)) {
1627                 return (ENOTSUP);
1628         }
1629
1630         /*
1631          * Check for underlying vnode requests and ensure that logical
1632          * to physical mapping is requested.
1633          */
1634         if (ap->a_bpn == NULL)
1635                 return (0);
1636
1637         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1638                 if (VTOC(vp)->c_lockowner != current_thread()) {
1639                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1640                         tooklock = 1;
1641                 } else {
1642                         cp = VTOC(vp);
1643                         panic("blockmap: %s cnode lock already held!\n",
1644                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1645                 }
1646         }
1647         hfsmp = VTOHFS(vp);
1648         cp = VTOC(vp);
1649         fp = VTOF(vp);
1650
1651 retry:
1652         if (fp->ff_unallocblocks) {
1653                 if (hfs_start_transaction(hfsmp) != 0) {
1654                         retval = EINVAL;
1655                         goto exit;
1656                 } else {
1657                         started_tr = 1;
1658                 }
1659                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1660
1661         } else if (overflow_extents(fp)) {
1662                 syslocks = SFL_EXTENTS;
1663         }
1664
1665         if (syslocks)
1666                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1667
1668         /*
1669          * Check for any delayed allocations.
1670          */
1671         if (fp->ff_unallocblocks) {
1672                 SInt64 actbytes;
1673                 u_int32_t loanedBlocks;
1674
1675                 //
1676                 // Make sure we have a transaction.  It's possible
1677                 // that we came in and fp->ff_unallocblocks was zero
1678                 // but during the time we blocked acquiring the extents
1679                 // btree, ff_unallocblocks became non-zero and so we
1680                 // will need to start a transaction.
1681                 //
1682                 if (started_tr == 0) {
1683                         if (syslocks) {
1684                                 hfs_systemfile_unlock(hfsmp, lockflags);
1685                                 syslocks = 0;
1686                         }
1687                         goto retry;
1688                 }
1689
1690                 /*
1691                  * Note: ExtendFileC will Release any blocks on loan and
1692                  * aquire real blocks.  So we ask to extend by zero bytes
1693                  * since ExtendFileC will account for the virtual blocks.
1694                  */
1695
1696                 loanedBlocks = fp->ff_unallocblocks;
1697                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1698                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1699
1700                 if (retval) {
1701                         fp->ff_unallocblocks = loanedBlocks;
1702                         cp->c_blocks += loanedBlocks;
1703                         fp->ff_blocks += loanedBlocks;
1704
1705                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1706                         hfsmp->loanedBlocks += loanedBlocks;
1707                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1708                 }
1709
1710                 if (retval) {
1711                         hfs_systemfile_unlock(hfsmp, lockflags);
1712                         cp->c_flag |= C_MODIFIED;
1713                         if (started_tr) {
1714                                 (void) hfs_update(vp, TRUE);
1715                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1716
1717                                 hfs_end_transaction(hfsmp);
1718                         }
1719                         goto exit;
1720                 }
1721         }
1722
1723         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1724                                ap->a_bpn, &bytesContAvail);
1725         if (syslocks) {
1726                 hfs_systemfile_unlock(hfsmp, lockflags);
1727                 syslocks = 0;
1728         }
1729
1730         if (started_tr) {
1731                 (void) hfs_update(vp, TRUE);
1732                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1733                 hfs_end_transaction(hfsmp);
1734                 started_tr = 0;
1735         }
1736         if (retval) {
1737                 goto exit;
1738         }
1739
1740         /* Adjust the mapping information for invalid file ranges: */
1741         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1742                               ap->a_foffset + (off_t)bytesContAvail - 1,
1743                               &invalid_range);
1744         if (overlaptype != RL_NOOVERLAP) {
1745                 switch(overlaptype) {
1746                 case RL_MATCHINGOVERLAP:
1747                 case RL_OVERLAPCONTAINSRANGE:
1748                 case RL_OVERLAPSTARTSBEFORE:
1749                         /* There's no valid block for this byte offset: */
1750                         *ap->a_bpn = (daddr64_t)-1;
1751                         /* There's no point limiting the amount to be returned
1752                          * if the invalid range that was hit extends all the way
1753                          * to the EOF (i.e. there's no valid bytes between the
1754                          * end of this range and the file's EOF):
1755                          */
1756                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1757                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1758                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1759                         }
1760                         break;
1761
1762                 case RL_OVERLAPISCONTAINED:
1763                 case RL_OVERLAPENDSAFTER:
1764                         /* The range of interest hits an invalid block before the end: */
1765                         if (invalid_range->rl_start == ap->a_foffset) {
1766                                 /* There's actually no valid information to be had starting here: */
1767                                 *ap->a_bpn = (daddr64_t)-1;
1768                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1769                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1770                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1771                                 }
1772                         } else {
1773                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1774                         }
1775                         break;
1776
1777                 case RL_NOOVERLAP:
1778                         break;
1779                 } /* end switch */
1780                 if (bytesContAvail > ap->a_size)
1781                         bytesContAvail = ap->a_size;
1782         }
1783         if (ap->a_run)
1784                 *ap->a_run = bytesContAvail;
1785
1786         if (ap->a_poff)
1787                 *(int *)ap->a_poff = 0;
1788 exit:
1789         if (tooklock)
1790                 hfs_unlock(cp);
1791
1792         return (MacToVFSError(retval));
1793 }
1794
1795
1796 /*
1797  * prepare and issue the I/O
1798  * buf_strategy knows how to deal
1799  * with requests that require
1800  * fragmented I/Os
1801  */
1802 int
1803 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1804 {
1805         buf_t   bp = ap->a_bp;
1806         vnode_t vp = buf_vnode(bp);
1807         struct cnode *cp = VTOC(vp);
1808
1809         return (buf_strategy(cp->c_devvp, ap));
1810 }
1811
1812
1813 static int
1814 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1815 {
1816         register struct cnode *cp = VTOC(vp);
1817         struct filefork *fp = VTOF(vp);
1818         struct proc *p = vfs_context_proc(context);;
1819         kauth_cred_t cred = vfs_context_ucred(context);
1820         int retval;
1821         off_t bytesToAdd;
1822         off_t actualBytesAdded;
1823         off_t filebytes;
1824         u_int64_t old_filesize;
1825         u_long fileblocks;
1826         int blksize;
1827         struct hfsmount *hfsmp;
1828         int lockflags;
1829
1830         blksize = VTOVCB(vp)->blockSize;
1831         fileblocks = fp->ff_blocks;
1832         filebytes = (off_t)fileblocks * (off_t)blksize;
1833         old_filesize = fp->ff_size;
1834
1835         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1836                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1837
1838         if (length < 0)
1839                 return (EINVAL);
1840
1841         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1842                 return (EFBIG);
1843
1844         hfsmp = VTOHFS(vp);
1845
1846         retval = E_NONE;
1847
1848         /* Files that are changing size are not hot file candidates. */
1849         if (hfsmp->hfc_stage == HFC_RECORDING) {
1850                 fp->ff_bytesread = 0;
1851         }
1852
1853         /*
1854          * We cannot just check if fp->ff_size == length (as an optimization)
1855          * since there may be extra physical blocks that also need truncation.
1856          */
1857 #if QUOTA
1858         if ((retval = hfs_getinoquota(cp)))
1859                 return(retval);
1860 #endif /* QUOTA */
1861
1862         /*
1863          * Lengthen the size of the file. We must ensure that the
1864          * last byte of the file is allocated. Since the smallest
1865          * value of ff_size is 0, length will be at least 1.
1866          */
1867         if (length > (off_t)fp->ff_size) {
1868 #if QUOTA
1869                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1870                                    cred, 0);
1871                 if (retval)
1872                         goto Err_Exit;
1873 #endif /* QUOTA */
1874                 /*
1875                  * If we don't have enough physical space then
1876                  * we need to extend the physical size.
1877                  */
1878                 if (length > filebytes) {
1879                         int eflags;
1880                         u_long blockHint = 0;
1881
1882                         /* All or nothing and don't round up to clumpsize. */
1883                         eflags = kEFAllMask | kEFNoClumpMask;
1884
1885                         if (cred && suser(cred, NULL) != 0)
1886                                 eflags |= kEFReserveMask;  /* keep a reserve */
1887
1888                         /*
1889                          * Allocate Journal and Quota files in metadata zone.
1890                          */
1891                         if (filebytes == 0 &&
1892                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1893                             hfs_virtualmetafile(cp)) {
1894                                 eflags |= kEFMetadataMask;
1895                                 blockHint = hfsmp->hfs_metazone_start;
1896                         }
1897                         if (hfs_start_transaction(hfsmp) != 0) {
1898                             retval = EINVAL;
1899                             goto Err_Exit;
1900                         }
1901
1902                         /* Protect extents b-tree and allocation bitmap */
1903                         lockflags = SFL_BITMAP;
1904                         if (overflow_extents(fp))
1905                                 lockflags |= SFL_EXTENTS;
1906                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1907
1908                         while ((length > filebytes) && (retval == E_NONE)) {
1909                                 bytesToAdd = length - filebytes;
1910                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1911                                                     (FCB*)fp,
1912                                                     bytesToAdd,
1913                                                     blockHint,
1914                                                     eflags,
1915                                                     &actualBytesAdded));
1916
1917                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1918                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1919                                         if (length > filebytes)
1920                                                 length = filebytes;
1921                                         break;
1922                                 }
1923                         } /* endwhile */
1924
1925                         hfs_systemfile_unlock(hfsmp, lockflags);
1926
1927                         if (hfsmp->jnl) {
1928                             (void) hfs_update(vp, TRUE);
1929                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1930                         }
1931
1932                         hfs_end_transaction(hfsmp);
1933
1934                         if (retval)
1935                                 goto Err_Exit;
1936
1937                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1938                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1939                 }
1940
1941                 if (!(flags & IO_NOZEROFILL)) {
1942                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1943                                 struct rl_entry *invalid_range;
1944                                 off_t zero_limit;
1945
1946                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1947                                 if (length < zero_limit) zero_limit = length;
1948
1949                                 if (length > (off_t)fp->ff_size) {
1950                                         struct timeval tv;
1951
1952                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1953                                         if ((fp->ff_size & PAGE_MASK_64) &&
1954                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1955                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1956
1957                                                 /* There's some valid data at the start of the (current) last page
1958                                                    of the file, so zero out the remainder of that page to ensure the
1959                                                    entire page contains valid data.  Since there is no invalid range
1960                                                    possible past the (current) eof, there's no need to remove anything
1961                                                    from the invalid range list before calling cluster_write():  */
1962                                                 hfs_unlock(cp);
1963                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1964                                                                 fp->ff_size, (off_t)0,
1965                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1966                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1967                                                 if (retval) goto Err_Exit;
1968
1969                                                 /* Merely invalidate the remaining area, if necessary: */
1970                                                 if (length > zero_limit) {
1971                                                         microuptime(&tv);
1972                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1973                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1974                                                 }
1975                                         } else {
1976                                         /* The page containing the (current) eof is invalid: just add the
1977                                            remainder of the page to the invalid list, along with the area
1978                                            being newly allocated:
1979                                          */
1980                                         microuptime(&tv);
1981                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1982                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1983                                         };
1984                                 }
1985                         } else {
1986                                         panic("hfs_truncate: invoked on non-UBC object?!");
1987                         };
1988                 }
1989                 cp->c_touch_modtime = TRUE;
1990                 fp->ff_size = length;
1991
1992                 /* Nested transactions will do their own ubc_setsize. */
1993                 if (!skipsetsize) {
1994                         /*
1995                          * ubc_setsize can cause a pagein here
1996                          * so we need to drop cnode lock.
1997                          */
1998                         hfs_unlock(cp);
1999                         ubc_setsize(vp, length);
2000                         hfs_lock(cp, HFS_FORCE_LOCK);
2001                 }
2002
2003         } else { /* Shorten the size of the file */
2004
2005                 if ((off_t)fp->ff_size > length) {
2006                         /*
2007                          * Any buffers that are past the truncation point need to be
2008                          * invalidated (to maintain buffer cache consistency).
2009                          */
2010
2011                          /* Nested transactions will do their own ubc_setsize. */
2012                          if (!skipsetsize) {
2013                                 /*
2014                                  * ubc_setsize can cause a pageout here
2015                                  * so we need to drop cnode lock.
2016                                  */
2017                                 hfs_unlock(cp);
2018                                 ubc_setsize(vp, length);
2019                                 hfs_lock(cp, HFS_FORCE_LOCK);
2020                         }
2021
2022                         /* Any space previously marked as invalid is now irrelevant: */
2023                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2024                 }
2025
2026                 /*
2027                  * Account for any unmapped blocks. Note that the new
2028                  * file length can still end up with unmapped blocks.
2029                  */
2030                 if (fp->ff_unallocblocks > 0) {
2031                         u_int32_t finalblks;
2032                         u_int32_t loanedBlocks;
2033
2034                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2035
2036                         loanedBlocks = fp->ff_unallocblocks;
2037                         cp->c_blocks -= loanedBlocks;
2038                         fp->ff_blocks -= loanedBlocks;
2039                         fp->ff_unallocblocks = 0;
2040
2041                         hfsmp->loanedBlocks -= loanedBlocks;
2042
2043                         finalblks = (length + blksize - 1) / blksize;
2044                         if (finalblks > fp->ff_blocks) {
2045                                 /* calculate required unmapped blocks */
2046                                 loanedBlocks = finalblks - fp->ff_blocks;
2047                                 hfsmp->loanedBlocks += loanedBlocks;
2048
2049                                 fp->ff_unallocblocks = loanedBlocks;
2050                                 cp->c_blocks += loanedBlocks;
2051                                 fp->ff_blocks += loanedBlocks;
2052                         }
2053                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2054                 }
2055
2056                 /*
2057                  * For a TBE process the deallocation of the file blocks is
2058                  * delayed until the file is closed.  And hfs_close calls
2059                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2060                  * isn't set, we make sure this isn't a TBE process.
2061                  */
2062                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2063 #if QUOTA
2064                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2065 #endif /* QUOTA */
2066                   if (hfs_start_transaction(hfsmp) != 0) {
2067                       retval = EINVAL;
2068                       goto Err_Exit;
2069                   }
2070
2071                         if (fp->ff_unallocblocks == 0) {
2072                                 /* Protect extents b-tree and allocation bitmap */
2073                                 lockflags = SFL_BITMAP;
2074                                 if (overflow_extents(fp))
2075                                         lockflags |= SFL_EXTENTS;
2076                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2077
2078                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2079                                                 (FCB*)fp, length, false));
2080
2081                                 hfs_systemfile_unlock(hfsmp, lockflags);
2082                         }
2083                         if (hfsmp->jnl) {
2084                                 if (retval == 0) {
2085                                         fp->ff_size = length;
2086                                 }
2087                                 (void) hfs_update(vp, TRUE);
2088                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2089                         }
2090
2091                         hfs_end_transaction(hfsmp);
2092
2093                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2094                         if (retval)
2095                                 goto Err_Exit;
2096 #if QUOTA
2097                         /* These are bytesreleased */
2098                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2099 #endif /* QUOTA */
2100                 }
2101                 /* Only set update flag if the logical length changes */
2102                 if (old_filesize != length)
2103                         cp->c_touch_modtime = TRUE;
2104                 fp->ff_size = length;
2105         }
2106         cp->c_touch_chgtime = TRUE;
2107         retval = hfs_update(vp, MNT_WAIT);
2108         if (retval) {
2109                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2110                      -1, -1, -1, retval, 0);
2111         }
2112
2113 Err_Exit:
2114
2115         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2116                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2117
2118         return (retval);
2119 }
2120
2121
2122
2123 /*
2124  * Truncate a cnode to at most length size, freeing (or adding) the
2125  * disk blocks.
2126  */
2127 __private_extern__
2128 int
2129 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2130              vfs_context_t context)
2131 {
2132         struct filefork *fp = VTOF(vp);
2133         off_t filebytes;
2134         u_long fileblocks;
2135         int blksize, error = 0;
2136         struct cnode *cp = VTOC(vp);
2137
2138         if (vnode_isdir(vp))
2139                 return (EISDIR);        /* cannot truncate an HFS directory! */
2140
2141         blksize = VTOVCB(vp)->blockSize;
2142         fileblocks = fp->ff_blocks;
2143         filebytes = (off_t)fileblocks * (off_t)blksize;
2144
2145         // have to loop truncating or growing files that are
2146         // really big because otherwise transactions can get
2147         // enormous and consume too many kernel resources.
2148
2149         if (length < filebytes) {
2150                 while (filebytes > length) {
2151                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2152                                 filebytes -= HFS_BIGFILE_SIZE;
2153                         } else {
2154                                 filebytes = length;
2155                         }
2156                         cp->c_flag |= C_FORCEUPDATE;
2157                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2158                         if (error)
2159                                 break;
2160                 }
2161         } else if (length > filebytes) {
2162                 while (filebytes < length) {
2163                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2164                                 filebytes += HFS_BIGFILE_SIZE;
2165                         } else {
2166                                 filebytes = length;
2167                         }
2168                         cp->c_flag |= C_FORCEUPDATE;
2169                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2170                         if (error)
2171                                 break;
2172                 }
2173         } else /* Same logical size */ {
2174
2175                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2176         }
2177         /* Files that are changing size are not hot file candidates. */
2178         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2179                 fp->ff_bytesread = 0;
2180         }
2181
2182         return (error);
2183 }
2184
2185
2186
2187 /*
2188  * Preallocate file storage space.
2189  */
2190 int
2191 hfs_vnop_allocate(struct vnop_allocate_args /* {
2192                 vnode_t a_vp;
2193                 off_t a_length;
2194                 u_int32_t  a_flags;
2195                 off_t *a_bytesallocated;
2196                 off_t a_offset;
2197                 vfs_context_t a_context;
2198         } */ *ap)
2199 {
2200         struct vnode *vp = ap->a_vp;
2201         struct cnode *cp;
2202         struct filefork *fp;
2203         ExtendedVCB *vcb;
2204         off_t length = ap->a_length;
2205         off_t startingPEOF;
2206         off_t moreBytesRequested;
2207         off_t actualBytesAdded;
2208         off_t filebytes;
2209         u_long fileblocks;
2210         int retval, retval2;
2211         UInt32 blockHint;
2212         UInt32 extendFlags;   /* For call to ExtendFileC */
2213         struct hfsmount *hfsmp;
2214         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2215         int lockflags;
2216
2217         *(ap->a_bytesallocated) = 0;
2218
2219         if (!vnode_isreg(vp))
2220                 return (EISDIR);
2221         if (length < (off_t)0)
2222                 return (EINVAL);
2223
2224         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2225                 return (retval);
2226         cp = VTOC(vp);
2227         fp = VTOF(vp);
2228         hfsmp = VTOHFS(vp);
2229         vcb = VTOVCB(vp);
2230
2231         fileblocks = fp->ff_blocks;
2232         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2233
2234         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2235                 retval = EINVAL;
2236                 goto Err_Exit;
2237         }
2238
2239         /* Fill in the flags word for the call to Extend the file */
2240
2241         extendFlags = kEFNoClumpMask;
2242         if (ap->a_flags & ALLOCATECONTIG)
2243                 extendFlags |= kEFContigMask;
2244         if (ap->a_flags & ALLOCATEALL)
2245                 extendFlags |= kEFAllMask;
2246         if (cred && suser(cred, NULL) != 0)
2247                 extendFlags |= kEFReserveMask;
2248
2249         retval = E_NONE;
2250         blockHint = 0;
2251         startingPEOF = filebytes;
2252
2253         if (ap->a_flags & ALLOCATEFROMPEOF)
2254                 length += filebytes;
2255         else if (ap->a_flags & ALLOCATEFROMVOL)
2256                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2257
2258         /* If no changes are necesary, then we're done */
2259         if (filebytes == length)
2260                 goto Std_Exit;
2261
2262         /*
2263          * Lengthen the size of the file. We must ensure that the
2264          * last byte of the file is allocated. Since the smallest
2265          * value of filebytes is 0, length will be at least 1.
2266          */
2267         if (length > filebytes) {
2268                 moreBytesRequested = length - filebytes;
2269
2270 #if QUOTA
2271                 retval = hfs_chkdq(cp,
2272                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2273                                 cred, 0);
2274                 if (retval)
2275                         goto Err_Exit;
2276
2277 #endif /* QUOTA */
2278                 /*
2279                  * Metadata zone checks.
2280                  */
2281                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2282                         /*
2283                          * Allocate Journal and Quota files in metadata zone.
2284                          */
2285                         if (hfs_virtualmetafile(cp)) {
2286                                 extendFlags |= kEFMetadataMask;
2287                                 blockHint = hfsmp->hfs_metazone_start;
2288                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2289                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2290                                 /*
2291                                  * Move blockHint outside metadata zone.
2292                                  */
2293                                 blockHint = hfsmp->hfs_metazone_end + 1;
2294                         }
2295                 }
2296
2297                 if (hfs_start_transaction(hfsmp) != 0) {
2298                     retval = EINVAL;
2299                     goto Err_Exit;
2300                 }
2301
2302                 /* Protect extents b-tree and allocation bitmap */
2303                 lockflags = SFL_BITMAP;
2304                 if (overflow_extents(fp))
2305                         lockflags |= SFL_EXTENTS;
2306                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2307
2308                 retval = MacToVFSError(ExtendFileC(vcb,
2309                                                 (FCB*)fp,
2310                                                 moreBytesRequested,
2311                                                 blockHint,
2312                                                 extendFlags,
2313                                                 &actualBytesAdded));
2314
2315                 *(ap->a_bytesallocated) = actualBytesAdded;
2316                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2317
2318                 hfs_systemfile_unlock(hfsmp, lockflags);
2319
2320                 if (hfsmp->jnl) {
2321                         (void) hfs_update(vp, TRUE);
2322                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2323                 }
2324
2325                 hfs_end_transaction(hfsmp);
2326
2327                 /*
2328                  * if we get an error and no changes were made then exit
2329                  * otherwise we must do the hfs_update to reflect the changes
2330                  */
2331                 if (retval && (startingPEOF == filebytes))
2332                         goto Err_Exit;
2333
2334                 /*
2335                  * Adjust actualBytesAdded to be allocation block aligned, not
2336                  * clump size aligned.
2337                  * NOTE: So what we are reporting does not affect reality
2338                  * until the file is closed, when we truncate the file to allocation
2339                  * block size.
2340                  */
2341                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2342                         *(ap->a_bytesallocated) =
2343                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2344
2345         } else { /* Shorten the size of the file */
2346
2347                 if (fp->ff_size > length) {
2348                         /*
2349                          * Any buffers that are past the truncation point need to be
2350                          * invalidated (to maintain buffer cache consistency).
2351                          */
2352                 }
2353
2354                 if (hfs_start_transaction(hfsmp) != 0) {
2355                     retval = EINVAL;
2356                     goto Err_Exit;
2357                 }
2358
2359                 /* Protect extents b-tree and allocation bitmap */
2360                 lockflags = SFL_BITMAP;
2361                 if (overflow_extents(fp))
2362                         lockflags |= SFL_EXTENTS;
2363                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2364
2365                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2366
2367                 hfs_systemfile_unlock(hfsmp, lockflags);
2368
2369                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2370
2371                 if (hfsmp->jnl) {
2372                         (void) hfs_update(vp, TRUE);
2373                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2374                 }
2375
2376                 hfs_end_transaction(hfsmp);
2377
2378
2379                 /*
2380                  * if we get an error and no changes were made then exit
2381                  * otherwise we must do the hfs_update to reflect the changes
2382                  */
2383                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2384 #if QUOTA
2385                 /* These are  bytesreleased */
2386                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2387 #endif /* QUOTA */
2388
2389                 if (fp->ff_size > filebytes) {
2390                         fp->ff_size = filebytes;
2391
2392                         hfs_unlock(cp);
2393                         ubc_setsize(vp, fp->ff_size);
2394                         hfs_lock(cp, HFS_FORCE_LOCK);
2395                 }
2396         }
2397
2398 Std_Exit:
2399         cp->c_touch_chgtime = TRUE;
2400         cp->c_touch_modtime = TRUE;
2401         retval2 = hfs_update(vp, MNT_WAIT);
2402
2403         if (retval == 0)
2404                 retval = retval2;
2405 Err_Exit:
2406         hfs_unlock(cp);
2407         return (retval);
2408 }
2409
2410
2411 /*
2412  * Pagein for HFS filesystem
2413  */
2414 int
2415 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2416 /*
2417         struct vnop_pagein_args {
2418                 vnode_t a_vp,
2419                 upl_t         a_pl,
2420                 vm_offset_t   a_pl_offset,
2421                 off_t         a_f_offset,
2422                 size_t        a_size,
2423                 int           a_flags
2424                 vfs_context_t a_context;
2425         };
2426 */
2427 {
2428         vnode_t vp = ap->a_vp;
2429         int error;
2430
2431         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2432                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2433         /*
2434          * Keep track of blocks read.
2435          */
2436         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2437                 struct cnode *cp;
2438                 struct filefork *fp;
2439                 int bytesread;
2440                 int took_cnode_lock = 0;
2441
2442                 cp = VTOC(vp);
2443                 fp = VTOF(vp);
2444
2445                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2446                         bytesread = fp->ff_size;
2447                 else
2448                         bytesread = ap->a_size;
2449
2450                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2451                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2452                         hfs_lock(cp, HFS_FORCE_LOCK);
2453                         took_cnode_lock = 1;
2454                 }
2455                 /*
2456                  * If this file hasn't been seen since the start of
2457                  * the current sampling period then start over.
2458                  */
2459                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2460                         struct timeval tv;
2461
2462                         fp->ff_bytesread = bytesread;
2463                         microtime(&tv);
2464                         cp->c_atime = tv.tv_sec;
2465                 } else {
2466                         fp->ff_bytesread += bytesread;
2467                 }
2468                 cp->c_touch_acctime = TRUE;
2469                 if (took_cnode_lock)
2470                         hfs_unlock(cp);
2471         }
2472         return (error);
2473 }
2474
2475 /*
2476  * Pageout for HFS filesystem.
2477  */
2478 int
2479 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2480 /*
2481         struct vnop_pageout_args {
2482            vnode_t a_vp,
2483            upl_t         a_pl,
2484            vm_offset_t   a_pl_offset,
2485            off_t         a_f_offset,
2486            size_t        a_size,
2487            int           a_flags
2488            vfs_context_t a_context;
2489         };
2490 */
2491 {
2492         vnode_t vp = ap->a_vp;
2493         struct cnode *cp;
2494         struct filefork *fp;
2495         int retval;
2496         off_t end_of_range;
2497         off_t filesize;
2498
2499         cp = VTOC(vp);
2500         if (cp->c_lockowner == current_thread()) {
2501                 panic("pageout: %s cnode lock already held!\n",
2502                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2503         }
2504         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2505                 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2506                         ubc_upl_abort_range(ap->a_pl,
2507                                             ap->a_pl_offset,
2508                                             ap->a_size,
2509                                             UPL_ABORT_FREE_ON_EMPTY);
2510                 }
2511                 return (retval);
2512         }
2513         fp = VTOF(vp);
2514
2515         filesize = fp->ff_size;
2516         end_of_range = ap->a_f_offset + ap->a_size - 1;
2517
2518         if (end_of_range >= filesize) {
2519                 end_of_range = (off_t)(filesize - 1);
2520         }
2521         if (ap->a_f_offset < filesize) {
2522                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2523                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2524         }
2525         hfs_unlock(cp);
2526
2527         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2528                                  ap->a_size, filesize, ap->a_flags);
2529
2530         /*
2531          * If data was written, and setuid or setgid bits are set and
2532          * this process is not the superuser then clear the setuid and
2533          * setgid bits as a precaution against tampering.
2534          */
2535         if ((retval == 0) &&
2536             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2537             (vfs_context_suser(ap->a_context) != 0)) {
2538                 hfs_lock(cp, HFS_FORCE_LOCK);
2539                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2540                 cp->c_touch_chgtime = TRUE;
2541                 hfs_unlock(cp);
2542         }
2543         return (retval);
2544 }
2545
2546 /*
2547  * Intercept B-Tree node writes to unswap them if necessary.
2548  */
2549 int
2550 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2551 {
2552         int retval = 0;
2553         register struct buf *bp = ap->a_bp;
2554         register struct vnode *vp = buf_vnode(bp);
2555         BlockDescriptor block;
2556
2557         /* Trap B-Tree writes */
2558         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2559             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2560             (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2561
2562                 /*
2563                  * Swap and validate the node if it is in native byte order.
2564                  * This is always be true on big endian, so we always validate
2565                  * before writing here.  On little endian, the node typically has
2566                  * been swapped and validatated when it was written to the journal,
2567                  * so we won't do anything here.
2568                  */
2569                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2570                         /* Prepare the block pointer */
2571                         block.blockHeader = bp;
2572                         block.buffer = (char *)buf_dataptr(bp);
2573                         block.blockNum = buf_lblkno(bp);
2574                         /* not found in cache ==> came from disk */
2575                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2576                         block.blockSize = buf_count(bp);
2577
2578                         /* Endian un-swap B-Tree node */
2579                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2580                         if (retval)
2581                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2582                 }
2583         }
2584
2585         /* This buffer shouldn't be locked anymore but if it is clear it */
2586         if ((buf_flags(bp) & B_LOCKED)) {
2587                 // XXXdbg
2588                 if (VTOHFS(vp)->jnl) {
2589                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2590                 }
2591                 buf_clearflags(bp, B_LOCKED);
2592         }
2593         retval = vn_bwrite (ap);
2594
2595         return (retval);
2596 }
2597
2598 /*
2599  * Relocate a file to a new location on disk
2600  *  cnode must be locked on entry
2601  *
2602  * Relocation occurs by cloning the file's data from its
2603  * current set of blocks to a new set of blocks. During
2604  * the relocation all of the blocks (old and new) are
2605  * owned by the file.
2606  *
2607  * -----------------
2608  * |///////////////|
2609  * -----------------
2610  * 0               N (file offset)
2611  *
2612  * -----------------     -----------------
2613  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2614  * -----------------     -----------------
2615  * 0               N     N+1             2N
2616  *
2617  * -----------------     -----------------
2618  * |///////////////|     |///////////////|     STEP 2 (clone data)
2619  * -----------------     -----------------
2620  * 0               N     N+1             2N
2621  *
2622  *                       -----------------
2623  *                       |///////////////|     STEP 3 (head truncate blocks)
2624  *                       -----------------
2625  *                       0               N
2626  *
2627  * During steps 2 and 3 page-outs to file offsets less
2628  * than or equal to N are suspended.
2629  *
2630  * During step 3 page-ins to the file get supended.
2631  */
2632 __private_extern__
2633 int
2634 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2635         struct  proc *p)
2636 {
2637         struct  cnode *cp;
2638         struct  filefork *fp;
2639         struct  hfsmount *hfsmp;
2640         u_int32_t  headblks;
2641         u_int32_t  datablks;
2642         u_int32_t  blksize;
2643         u_int32_t  growsize;
2644         u_int32_t  nextallocsave;
2645         daddr64_t  sector_a,  sector_b;
2646         int disabled_caching = 0;
2647         int eflags;
2648         off_t  newbytes;
2649         int  retval;
2650         int lockflags = 0;
2651         int took_trunc_lock = 0;
2652         int started_tr = 0;
2653         enum vtype vnodetype;
2654
2655         vnodetype = vnode_vtype(vp);
2656         if (vnodetype != VREG && vnodetype != VLNK) {
2657                 return (EPERM);
2658         }
2659
2660         hfsmp = VTOHFS(vp);
2661         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2662                 return (ENOSPC);
2663         }
2664
2665         cp = VTOC(vp);
2666         fp = VTOF(vp);
2667         if (fp->ff_unallocblocks)
2668                 return (EINVAL);
2669         blksize = hfsmp->blockSize;
2670         if (blockHint == 0)
2671                 blockHint = hfsmp->nextAllocation;
2672
2673         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2674             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2675                 return (EFBIG);
2676         }
2677
2678         //
2679         // We do not believe that this call to hfs_fsync() is
2680         // necessary and it causes a journal transaction
2681         // deadlock so we are removing it.
2682         //
2683         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2684         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2685         //      if (retval)
2686         //              return (retval);
2687         //}
2688
2689         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2690                 hfs_unlock(cp);
2691                 hfs_lock_truncate(cp, TRUE);
2692                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2693                         hfs_unlock_truncate(cp);
2694                         return (retval);
2695                 }
2696                 took_trunc_lock = 1;
2697         }
2698         headblks = fp->ff_blocks;
2699         datablks = howmany(fp->ff_size, blksize);
2700         growsize = datablks * blksize;
2701         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2702         if (blockHint >= hfsmp->hfs_metazone_start &&
2703             blockHint <= hfsmp->hfs_metazone_end)
2704                 eflags |= kEFMetadataMask;
2705
2706         if (hfs_start_transaction(hfsmp) != 0) {
2707                 if (took_trunc_lock)
2708                         hfs_unlock_truncate(cp);
2709             return (EINVAL);
2710         }
2711         started_tr = 1;
2712         /*
2713          * Protect the extents b-tree and the allocation bitmap
2714          * during MapFileBlockC and ExtendFileC operations.
2715          */
2716         lockflags = SFL_BITMAP;
2717         if (overflow_extents(fp))
2718                 lockflags |= SFL_EXTENTS;
2719         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2720
2721         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2722         if (retval) {
2723                 retval = MacToVFSError(retval);
2724                 goto out;
2725         }
2726
2727         /*
2728          * STEP 1 - aquire new allocation blocks.
2729          */
2730         if (!vnode_isnocache(vp)) {
2731                 vnode_setnocache(vp);
2732                 disabled_caching = 1;
2733
2734         }
2735         nextallocsave = hfsmp->nextAllocation;
2736         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2737         if (eflags & kEFMetadataMask) {
2738                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2739                 hfsmp->nextAllocation = nextallocsave;
2740                 hfsmp->vcbFlags |= 0xFF00;
2741                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2742         }
2743
2744         retval = MacToVFSError(retval);
2745         if (retval == 0) {
2746                 cp->c_flag |= C_MODIFIED;
2747                 if (newbytes < growsize) {
2748                         retval = ENOSPC;
2749                         goto restore;
2750                 } else if (fp->ff_blocks < (headblks + datablks)) {
2751                         printf("hfs_relocate: allocation failed");
2752                         retval = ENOSPC;
2753                         goto restore;
2754                 }
2755
2756                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2757                 if (retval) {
2758                         retval = MacToVFSError(retval);
2759                 } else if ((sector_a + 1) == sector_b) {
2760                         retval = ENOSPC;
2761                         goto restore;
2762                 } else if ((eflags & kEFMetadataMask) &&
2763                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2764                               hfsmp->hfs_metazone_end)) {
2765                         printf("hfs_relocate: didn't move into metadata zone\n");
2766                         retval = ENOSPC;
2767                         goto restore;
2768                 }
2769         }
2770         /* Done with system locks and journal for now. */
2771         hfs_systemfile_unlock(hfsmp, lockflags);
2772         lockflags = 0;
2773         hfs_end_transaction(hfsmp);
2774         started_tr = 0;
2775
2776         if (retval) {
2777                 /*
2778                  * Check to see if failure is due to excessive fragmentation.
2779                  */
2780                 if ((retval == ENOSPC) &&
2781                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2782                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2783                 }
2784                 goto out;
2785         }
2786         /*
2787          * STEP 2 - clone file data into the new allocation blocks.
2788          */
2789
2790         if (vnodetype == VLNK)
2791                 retval = hfs_clonelink(vp, blksize, cred, p);
2792         else if (vnode_issystem(vp))
2793                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2794         else
2795                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2796
2797         /* Start transaction for step 3 or for a restore. */
2798         if (hfs_start_transaction(hfsmp) != 0) {
2799                 retval = EINVAL;
2800                 goto out;
2801         }
2802         started_tr = 1;
2803         if (retval)
2804                 goto restore;
2805
2806         /*
2807          * STEP 3 - switch to cloned data and remove old blocks.
2808          */
2809         lockflags = SFL_BITMAP;
2810         if (overflow_extents(fp))
2811                 lockflags |= SFL_EXTENTS;
2812         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2813
2814         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2815
2816         hfs_systemfile_unlock(hfsmp, lockflags);
2817         lockflags = 0;
2818         if (retval)
2819                 goto restore;
2820 out:
2821         if (took_trunc_lock)
2822                 hfs_unlock_truncate(cp);
2823
2824         if (lockflags) {
2825                 hfs_systemfile_unlock(hfsmp, lockflags);
2826                 lockflags = 0;
2827         }
2828
2829         // See comment up above about calls to hfs_fsync()
2830         //
2831         //if (retval == 0)
2832         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2833
2834         if (hfsmp->jnl) {
2835                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2836                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2837                 else
2838                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2839         }
2840 exit:
2841         if (disabled_caching) {
2842                 vnode_clearnocache(vp);
2843         }
2844         if (started_tr)
2845                 hfs_end_transaction(hfsmp);
2846
2847         return (retval);
2848
2849 restore:
2850         if (fp->ff_blocks == headblks)
2851                 goto exit;
2852         /*
2853          * Give back any newly allocated space.
2854          */
2855         if (lockflags == 0) {
2856                 lockflags = SFL_BITMAP;
2857                 if (overflow_extents(fp))
2858                         lockflags |= SFL_EXTENTS;
2859                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2860         }
2861
2862         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2863
2864         hfs_systemfile_unlock(hfsmp, lockflags);
2865         lockflags = 0;
2866
2867         if (took_trunc_lock)
2868                 hfs_unlock_truncate(cp);
2869         goto exit;
2870 }
2871
2872
2873 /*
2874  * Clone a symlink.
2875  *
2876  */
2877 static int
2878 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2879 {
2880         struct buf *head_bp = NULL;
2881         struct buf *tail_bp = NULL;
2882         int error;
2883
2884
2885         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2886         if (error)
2887                 goto out;
2888
2889         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2890         if (tail_bp == NULL) {
2891                 error = EIO;
2892                 goto out;
2893         }
2894         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2895         error = (int)buf_bwrite(tail_bp);
2896 out:
2897         if (head_bp) {
2898                 buf_markinvalid(head_bp);
2899                 buf_brelse(head_bp);
2900         }
2901         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2902
2903         return (error);
2904 }
2905
2906 /*
2907  * Clone a file's data within the file.
2908  *
2909  */
2910 static int
2911 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2912 {
2913         caddr_t  bufp;
2914         size_t  writebase;
2915         size_t  bufsize;
2916         size_t  copysize;
2917         size_t  iosize;
2918         off_t   filesize;
2919         size_t  offset;
2920         uio_t auio;
2921         int  error = 0;
2922
2923         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2924         writebase = blkstart * blksize;
2925         copysize = blkcnt * blksize;
2926         iosize = bufsize = MIN(copysize, 4096 * 16);
2927         offset = 0;
2928
2929         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2930                 return (ENOMEM);
2931         }
2932         hfs_unlock(VTOC(vp));
2933
2934         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2935
2936         while (offset < copysize) {
2937                 iosize = MIN(copysize - offset, iosize);
2938
2939                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2940                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2941
2942                 error = cluster_read(vp, auio, copysize, 0);
2943                 if (error) {
2944                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2945                         break;
2946                 }
2947                 if (uio_resid(auio) != 0) {
2948                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2949                         error = EIO;
2950                         break;
2951                 }
2952
2953                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2954                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2955
2956                 error = cluster_write(vp, auio, filesize + offset,
2957                                       filesize + offset + iosize,
2958                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2959                 if (error) {
2960                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2961                         break;
2962                 }
2963                 if (uio_resid(auio) != 0) {
2964                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2965                         error = EIO;
2966                         break;
2967                 }
2968                 offset += iosize;
2969         }
2970         uio_free(auio);
2971
2972         /*
2973          * No need to call ubc_sync_range or hfs_invalbuf
2974          * since the file was copied using IO_NOCACHE.
2975          */
2976
2977         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2978
2979         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2980         return (error);
2981 }
2982
2983 /*
2984  * Clone a system (metadata) file.
2985  *
2986  */
2987 static int
2988 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
2989                  kauth_cred_t cred, struct proc *p)
2990 {
2991         caddr_t  bufp;
2992         char * offset;
2993         size_t  bufsize;
2994         size_t  iosize;
2995         struct buf *bp = NULL;
2996         daddr64_t  blkno;
2997         daddr64_t  blk;
2998         daddr64_t  start_blk;
2999         daddr64_t  last_blk;
3000         int  breadcnt;
3001         int  i;
3002         int  error = 0;
3003
3004
3005         iosize = GetLogicalBlockSize(vp);
3006         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3007         breadcnt = bufsize / iosize;
3008
3009         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3010                 return (ENOMEM);
3011         }
3012         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3013         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3014         blkno = 0;
3015
3016         while (blkno < last_blk) {
3017                 /*
3018                  * Read up to a megabyte
3019                  */
3020                 offset = bufp;
3021                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3022                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3023                         if (error) {
3024                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3025                                 goto out;
3026                         }
3027                         if (buf_count(bp) != iosize) {
3028                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3029                                 goto out;
3030                         }
3031                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3032
3033                         buf_markinvalid(bp);
3034                         buf_brelse(bp);
3035                         bp = NULL;
3036
3037                         offset += iosize;
3038                 }
3039
3040                 /*
3041                  * Write up to a megabyte
3042                  */
3043                 offset = bufp;
3044                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3045                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3046                         if (bp == NULL) {
3047                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3048                                 error = EIO;
3049                                 goto out;
3050                         }
3051                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3052                         error = (int)buf_bwrite(bp);
3053                         bp = NULL;
3054                         if (error)
3055                                 goto out;
3056                         offset += iosize;
3057                 }
3058         }
3059 out:
3060         if (bp) {
3061                 buf_brelse(bp);
3062         }
3063
3064         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3065
3066         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3067
3068         return (error);
3069 }