bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_readwrite.c     1.0
  29  *
  30  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  31  *
  32  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  33  *
  34  */
  35
  36 #include <sys/param.h>
  37 #include <sys/systm.h>
  38 #include <sys/resourcevar.h>
  39 #include <sys/kernel.h>
  40 #include <sys/fcntl.h>
  41 #include <sys/filedesc.h>
  42 #include <sys/stat.h>
  43 #include <sys/buf.h>
  44 #include <sys/proc.h>
  45 #include <sys/kauth.h>
  46 #include <sys/vnode.h>
  47 #include <sys/uio.h>
  48 #include <sys/vfs_context.h>
  49 #include <sys/disk.h>
  50 #include <sys/sysctl.h>
  51
  52 #include <miscfs/specfs/specdev.h>
  53
  54 #include <sys/ubc.h>
  55 #include <vm/vm_pageout.h>
  56 #include <vm/vm_kern.h>
  57
  58 #include <sys/kdebug.h>
  59
  60 #include        "hfs.h"
  61 #include        "hfs_endian.h"
  62 #include  "hfs_fsctl.h"
  63 #include        "hfs_quota.h"
  64 #include        "hfscommon/headers/FileMgrInternal.h"
  65 #include        "hfscommon/headers/BTreesInternal.h"
  66 #include        "hfs_cnode.h"
  67 #include        "hfs_dbg.h"
  68
  69 extern int overflow_extents(struct filefork *fp);
  70
  71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  72
  73 enum {
  74         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  75 };
  76
  77 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  78
  79 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  80
  81
  82 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  83 static int  hfs_clonefile(struct vnode *, int, int, int);
  84 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  85
  86
  87 int flush_cache_on_write = 0;
  88 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
  89
  90
  91 /*****************************************************************************
  92 *
  93 *       I/O Operations on vnodes
  94 *
  95 *****************************************************************************/
  96 int  hfs_vnop_read(struct vnop_read_args *);
  97 int  hfs_vnop_write(struct vnop_write_args *);
  98 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  99 int  hfs_vnop_select(struct vnop_select_args *);
 100 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
 101 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
 102 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
 103 int  hfs_vnop_strategy(struct vnop_strategy_args *);
 104 int  hfs_vnop_allocate(struct vnop_allocate_args *);
 105 int  hfs_vnop_pagein(struct vnop_pagein_args *);
 106 int  hfs_vnop_pageout(struct vnop_pageout_args *);
 107 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
 108
 109
 110 /*
 111  * Read data from a file.
 112  */
 113 int
 114 hfs_vnop_read(struct vnop_read_args *ap)
 115 {
 116         uio_t uio = ap->a_uio;
 117         struct vnode *vp = ap->a_vp;
 118         struct cnode *cp;
 119         struct filefork *fp;
 120         struct hfsmount *hfsmp;
 121         off_t filesize;
 122         off_t filebytes;
 123         off_t start_resid = uio_resid(uio);
 124         off_t offset = uio_offset(uio);
 125         int retval = 0;
 126
 127
 128         /* Preflight checks */
 129         if (!vnode_isreg(vp)) {
 130                 /* can only read regular files */
 131                 if (vnode_isdir(vp))
 132                         return (EISDIR);
 133                 else
 134                         return (EPERM);
 135         }
 136         if (start_resid == 0)
 137                 return (0);             /* Nothing left to do */
 138         if (offset < 0)
 139                 return (EINVAL);        /* cant read from a negative offset */
 140
 141         cp = VTOC(vp);
 142         fp = VTOF(vp);
 143         hfsmp = VTOHFS(vp);
 144
 145         /* Protect against a size change. */
 146         hfs_lock_truncate(cp, 0);
 147
 148         filesize = fp->ff_size;
 149         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 150         if (offset > filesize) {
 151                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 152                     (offset > (off_t)MAXHFSFILESIZE)) {
 153                         retval = EFBIG;
 154                 }
 155                 goto exit;
 156         }
 157
 158         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 159                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 160
 161         retval = cluster_read(vp, uio, filesize, 0);
 162
 163         cp->c_touch_acctime = TRUE;
 164
 165         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 166                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 167
 168         /*
 169          * Keep track blocks read
 170          */
 171         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 172                 int took_cnode_lock = 0;
 173                 off_t bytesread;
 174
 175                 bytesread = start_resid - uio_resid(uio);
 176
 177                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 178                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 179                         hfs_lock(cp, HFS_FORCE_LOCK);
 180                         took_cnode_lock = 1;
 181                 }
 182                 /*
 183                  * If this file hasn't been seen since the start of
 184                  * the current sampling period then start over.
 185                  */
 186                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 187                         struct timeval tv;
 188
 189                         fp->ff_bytesread = bytesread;
 190                         microtime(&tv);
 191                         cp->c_atime = tv.tv_sec;
 192                 } else {
 193                         fp->ff_bytesread += bytesread;
 194                 }
 195                 if (took_cnode_lock)
 196                         hfs_unlock(cp);
 197         }
 198 exit:
 199         hfs_unlock_truncate(cp);
 200         return (retval);
 201 }
 202
 203 /*
 204  * Write data to a file.
 205  */
 206 int
 207 hfs_vnop_write(struct vnop_write_args *ap)
 208 {
 209         uio_t uio = ap->a_uio;
 210         struct vnode *vp = ap->a_vp;
 211         struct cnode *cp;
 212         struct filefork *fp;
 213         struct hfsmount *hfsmp;
 214         kauth_cred_t cred = NULL;
 215         off_t origFileSize;
 216         off_t writelimit;
 217         off_t bytesToAdd;
 218         off_t actualBytesAdded;
 219         off_t filebytes;
 220         off_t offset;
 221         size_t resid;
 222         int eflags;
 223         int ioflag = ap->a_ioflag;
 224         int retval = 0;
 225         int lockflags;
 226         int cnode_locked = 0;
 227
 228         // LP64todo - fix this! uio_resid may be 64-bit value
 229         resid = uio_resid(uio);
 230         offset = uio_offset(uio);
 231
 232         if (offset < 0)
 233                 return (EINVAL);
 234         if (resid == 0)
 235                 return (E_NONE);
 236         if (!vnode_isreg(vp))
 237                 return (EPERM);  /* Can only write regular files */
 238
 239         /* Protect against a size change. */
 240         hfs_lock_truncate(VTOC(vp), TRUE);
 241
 242         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 243                 hfs_unlock_truncate(VTOC(vp));
 244                 return (retval);
 245         }
 246         cnode_locked = 1;
 247         cp = VTOC(vp);
 248         fp = VTOF(vp);
 249         hfsmp = VTOHFS(vp);
 250         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 251
 252         if (ioflag & IO_APPEND) {
 253                 uio_setoffset(uio, fp->ff_size);
 254                 offset = fp->ff_size;
 255         }
 256         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 257                 retval = EPERM;
 258                 goto exit;
 259         }
 260
 261         origFileSize = fp->ff_size;
 262         eflags = kEFDeferMask;  /* defer file block allocations */
 263
 264 #ifdef HFS_SPARSE_DEV
 265         /*
 266          * When the underlying device is sparse and space
 267          * is low (< 8MB), stop doing delayed allocations
 268          * and begin doing synchronous I/O.
 269          */
 270         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 271             (hfs_freeblks(hfsmp, 0) < 2048)) {
 272                 eflags &= ~kEFDeferMask;
 273                 ioflag |= IO_SYNC;
 274         }
 275 #endif /* HFS_SPARSE_DEV */
 276
 277         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 278                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 279
 280         /* Now test if we need to extend the file */
 281         /* Doing so will adjust the filebytes for us */
 282
 283         writelimit = offset + resid;
 284         if (writelimit <= filebytes)
 285                 goto sizeok;
 286
 287         cred = vfs_context_ucred(ap->a_context);
 288 #if QUOTA
 289         bytesToAdd = writelimit - filebytes;
 290         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 291                            cred, 0);
 292         if (retval)
 293                 goto exit;
 294 #endif /* QUOTA */
 295
 296         if (hfs_start_transaction(hfsmp) != 0) {
 297                 retval = EINVAL;
 298                 goto exit;
 299         }
 300
 301         while (writelimit > filebytes) {
 302                 bytesToAdd = writelimit - filebytes;
 303                 if (cred && suser(cred, NULL) != 0)
 304                         eflags |= kEFReserveMask;
 305
 306                 /* Protect extents b-tree and allocation bitmap */
 307                 lockflags = SFL_BITMAP;
 308                 if (overflow_extents(fp))
 309                         lockflags |= SFL_EXTENTS;
 310                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 311
 312                 /* Files that are changing size are not hot file candidates. */
 313                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 314                         fp->ff_bytesread = 0;
 315                 }
 316                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 317                                 0, eflags, &actualBytesAdded));
 318
 319                 hfs_systemfile_unlock(hfsmp, lockflags);
 320
 321                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 322                         retval = ENOSPC;
 323                 if (retval != E_NONE)
 324                         break;
 325                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 326                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 327                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 328         }
 329         (void) hfs_update(vp, TRUE);
 330         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 331         (void) hfs_end_transaction(hfsmp);
 332
 333 sizeok:
 334         if (retval == E_NONE) {
 335                 off_t filesize;
 336                 off_t zero_off;
 337                 off_t tail_off;
 338                 off_t inval_start;
 339                 off_t inval_end;
 340                 off_t io_start;
 341                 int lflag;
 342                 struct rl_entry *invalid_range;
 343
 344                 if (writelimit > fp->ff_size)
 345                         filesize = writelimit;
 346                 else
 347                         filesize = fp->ff_size;
 348
 349                 lflag = (ioflag & IO_SYNC);
 350
 351                 if (offset <= fp->ff_size) {
 352                         zero_off = offset & ~PAGE_MASK_64;
 353
 354                         /* Check to see whether the area between the zero_offset and the start
 355                            of the transfer to see whether is invalid and should be zero-filled
 356                            as part of the transfer:
 357                          */
 358                         if (offset > zero_off) {
 359                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 360                                         lflag |= IO_HEADZEROFILL;
 361                         }
 362                 } else {
 363                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 364
 365                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 366                            read without being zeroed.  The current last block is filled with zeroes
 367                            if it holds valid data but in all cases merely do a little bookkeeping
 368                            to track the area from the end of the current last page to the start of
 369                            the area actually written.  For the same reason only the bytes up to the
 370                            start of the page where this write will start is invalidated; any remainder
 371                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 372
 373                            Note that inval_start, the start of the page after the current EOF,
 374                            may be past the start of the write, in which case the zeroing
 375                            will be handled by the cluser_write of the actual data.
 376                          */
 377                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 378                         inval_end = offset & ~PAGE_MASK_64;
 379                         zero_off = fp->ff_size;
 380
 381                         if ((fp->ff_size & PAGE_MASK_64) &&
 382                                 (rl_scan(&fp->ff_invalidranges,
 383                                                         eof_page_base,
 384                                                         fp->ff_size - 1,
 385                                                         &invalid_range) != RL_NOOVERLAP)) {
 386                                 /* The page containing the EOF is not valid, so the
 387                                    entire page must be made inaccessible now.  If the write
 388                                    starts on a page beyond the page containing the eof
 389                                    (inval_end > eof_page_base), add the
 390                                    whole page to the range to be invalidated.  Otherwise
 391                                    (i.e. if the write starts on the same page), zero-fill
 392                                    the entire page explicitly now:
 393                                  */
 394                                 if (inval_end > eof_page_base) {
 395                                         inval_start = eof_page_base;
 396                                 } else {
 397                                         zero_off = eof_page_base;
 398                                 };
 399                         };
 400
 401                         if (inval_start < inval_end) {
 402                                 struct timeval tv;
 403                                 /* There's some range of data that's going to be marked invalid */
 404
 405                                 if (zero_off < inval_start) {
 406                                         /* The pages between inval_start and inval_end are going to be invalidated,
 407                                            and the actual write will start on a page past inval_end.  Now's the last
 408                                            chance to zero-fill the page containing the EOF:
 409                                          */
 410                                         hfs_unlock(cp);
 411                                         cnode_locked = 0;
 412                                         retval = cluster_write(vp, (uio_t) 0,
 413                                                         fp->ff_size, inval_start,
 414                                                         zero_off, (off_t)0,
 415                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 416                                         hfs_lock(cp, HFS_FORCE_LOCK);
 417                                         cnode_locked = 1;
 418                                         if (retval) goto ioerr_exit;
 419                                         offset = uio_offset(uio);
 420                                 };
 421
 422                                 /* Mark the remaining area of the newly allocated space as invalid: */
 423                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 424                                 microuptime(&tv);
 425                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 426                                 zero_off = fp->ff_size = inval_end;
 427                         };
 428
 429                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 430                 };
 431
 432                 /* Check to see whether the area between the end of the write and the end of
 433                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 434                  */
 435                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 436                 if (tail_off > filesize) tail_off = filesize;
 437                 if (tail_off > writelimit) {
 438                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 439                                 lflag |= IO_TAILZEROFILL;
 440                         };
 441                 };
 442
 443                 /*
 444                  * if the write starts beyond the current EOF (possibly advanced in the
 445                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 446                  * to where the write begins:
 447                  *
 448                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 449                  *       before the current EOF it might be marked as invalid now and must be
 450                  *       made readable (removed from the invalid ranges) before cluster_write
 451                  *       tries to write it:
 452                  */
 453                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 454                 if (io_start < fp->ff_size) {
 455                         off_t io_end;
 456
 457                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 458                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 459                 };
 460
 461                 hfs_unlock(cp);
 462                 cnode_locked = 0;
 463                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 464                                 tail_off, lflag | IO_NOZERODIRTY);
 465                 offset = uio_offset(uio);
 466                 if (offset > fp->ff_size) {
 467                         fp->ff_size = offset;
 468
 469                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 470                         /* Files that are changing size are not hot file candidates. */
 471                         if (hfsmp->hfc_stage == HFC_RECORDING)
 472                                 fp->ff_bytesread = 0;
 473                 }
 474                 if (resid > uio_resid(uio)) {
 475                         cp->c_touch_chgtime = TRUE;
 476                         cp->c_touch_modtime = TRUE;
 477                 }
 478         }
 479
 480         // XXXdbg - testing for vivek and paul lambert
 481         {
 482             if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
 483                 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
 484             }
 485         }
 486         HFS_KNOTE(vp, NOTE_WRITE);
 487
 488 ioerr_exit:
 489         /*
 490          * If we successfully wrote any data, and we are not the superuser
 491          * we clear the setuid and setgid bits as a precaution against
 492          * tampering.
 493          */
 494         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 495                 cred = vfs_context_ucred(ap->a_context);
 496                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 497                         if (!cnode_locked) {
 498                                 hfs_lock(cp, HFS_FORCE_LOCK);
 499                                 cnode_locked = 1;
 500                         }
 501                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 502                 }
 503         }
 504         if (retval) {
 505                 if (ioflag & IO_UNIT) {
 506                         if (!cnode_locked) {
 507                                 hfs_lock(cp, HFS_FORCE_LOCK);
 508                                 cnode_locked = 1;
 509                         }
 510                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 511                                            0, ap->a_context);
 512                         // LP64todo - fix this!  resid needs to by user_ssize_t
 513                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 514                         uio_setresid(uio, resid);
 515                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 516                 }
 517         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 518                 if (!cnode_locked) {
 519                         hfs_lock(cp, HFS_FORCE_LOCK);
 520                         cnode_locked = 1;
 521                 }
 522                 retval = hfs_update(vp, TRUE);
 523         }
 524         /* Updating vcbWrCnt doesn't need to be atomic. */
 525         hfsmp->vcbWrCnt++;
 526
 527         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 528                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 529 exit:
 530         if (cnode_locked)
 531                 hfs_unlock(cp);
 532         hfs_unlock_truncate(cp);
 533         return (retval);
 534 }
 535
 536 /* support for the "bulk-access" fcntl */
 537
 538 #define CACHE_ELEMS 64
 539 #define CACHE_LEVELS 16
 540 #define PARENT_IDS_FLAG 0x100
 541
 542 /* from hfs_attrlist.c */
 543 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 544                         mode_t obj_mode, struct mount *mp,
 545                         kauth_cred_t cred, struct proc *p);
 546
 547 /* from vfs/vfs_fsevents.c */
 548 extern char *get_pathbuff(void);
 549 extern void release_pathbuff(char *buff);
 550
 551 struct access_cache {
 552        int numcached;
 553        int cachehits; /* these two for statistics gathering */
 554        int lookups;
 555        unsigned int *acache;
 556        Boolean *haveaccess;
 557 };
 558
 559 struct access_t {
 560         uid_t     uid;              /* IN: effective user id */
 561         short     flags;            /* IN: access requested (i.e. R_OK) */
 562         short     num_groups;       /* IN: number of groups user belongs to */
 563         int       num_files;        /* IN: number of files to process */
 564         int       *file_ids;        /* IN: array of file ids */
 565         gid_t     *groups;          /* IN: array of groups */
 566         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 567 };
 568
 569 struct user_access_t {
 570         uid_t           uid;                    /* IN: effective user id */
 571         short           flags;                  /* IN: access requested (i.e. R_OK) */
 572         short           num_groups;             /* IN: number of groups user belongs to */
 573         int                     num_files;              /* IN: number of files to process */
 574         user_addr_t     file_ids;               /* IN: array of file ids */
 575         user_addr_t     groups;                 /* IN: array of groups */
 576         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 577 };
 578
 579 /*
 580  * Perform a binary search for the given parent_id. Return value is
 581  * found/not found boolean, and indexp will be the index of the item
 582  * or the index at which to insert the item if it's not found.
 583  */
 584 static int
 585 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 586 {
 587         unsigned int lo, hi;
 588         int index, matches = 0;
 589
 590         if (cache->numcached == 0) {
 591                 *indexp = 0;
 592                 return 0; // table is empty, so insert at index=0 and report no match
 593         }
 594
 595         if (cache->numcached > CACHE_ELEMS) {
 596                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 597                   cache->numcached, CACHE_ELEMS);*/
 598                 cache->numcached = CACHE_ELEMS;
 599         }
 600
 601         lo = 0;
 602         hi = cache->numcached - 1;
 603         index = -1;
 604
 605         /* perform binary search for parent_id */
 606         do {
 607                 unsigned int mid = (hi - lo)/2 + lo;
 608                 unsigned int this_id = cache->acache[mid];
 609
 610                 if (parent_id == this_id) {
 611                         index = mid;
 612                         break;
 613                 }
 614
 615                 if (parent_id < this_id) {
 616                         hi = mid;
 617                         continue;
 618                 }
 619
 620                 if (parent_id > this_id) {
 621                         lo = mid + 1;
 622                         continue;
 623                 }
 624         } while(lo < hi);
 625
 626         /* check if lo and hi converged on the match */
 627         if (parent_id == cache->acache[hi]) {
 628                 index = hi;
 629         }
 630
 631         /* if no existing entry found, find index for new one */
 632         if (index == -1) {
 633                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 634                 matches = 0;
 635         } else {
 636                 matches = 1;
 637         }
 638
 639         *indexp = index;
 640         return matches;
 641 }
 642
 643 /*
 644  * Add a node to the access_cache at the given index (or do a lookup first
 645  * to find the index if -1 is passed in). We currently do a replace rather
 646  * than an insert if the cache is full.
 647  */
 648 static void
 649 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 650 {
 651        int lookup_index = -1;
 652
 653        /* need to do a lookup first if -1 passed for index */
 654        if (index == -1) {
 655                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 656                        if (cache->haveaccess[lookup_index] != access) {
 657                                /* change access info for existing entry... should never happen */
 658                                cache->haveaccess[lookup_index] = access;
 659                        }
 660
 661                        /* mission accomplished */
 662                        return;
 663                } else {
 664                        index = lookup_index;
 665                }
 666
 667        }
 668
 669        /* if the cache is full, do a replace rather than an insert */
 670        if (cache->numcached >= CACHE_ELEMS) {
 671                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 672                cache->numcached = CACHE_ELEMS-1;
 673
 674                if (index > cache->numcached) {
 675                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 676                        index = cache->numcached;
 677                }
 678        } else if (index >= 0 && index < cache->numcached) {
 679                /* only do bcopy if we're inserting */
 680                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 681                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 682        }
 683
 684        cache->acache[index] = nodeID;
 685        cache->haveaccess[index] = access;
 686        cache->numcached++;
 687 }
 688
 689
 690 struct cinfo {
 691         uid_t   uid;
 692         gid_t   gid;
 693         mode_t  mode;
 694         cnid_t  parentcnid;
 695 };
 696
 697 static int
 698 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 699 {
 700         struct cinfo *cip = (struct cinfo *)arg;
 701
 702         cip->uid = attrp->ca_uid;
 703         cip->gid = attrp->ca_gid;
 704         cip->mode = attrp->ca_mode;
 705         cip->parentcnid = descp->cd_parentcnid;
 706
 707         return (0);
 708 }
 709
 710 /*
 711  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 712  * isn't incore, then go to the catalog.
 713  */
 714 static int
 715 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 716                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 717 {
 718         int error = 0;
 719
 720         /* if this id matches the one the fsctl was called with, skip the lookup */
 721         if (cnid == skip_cp->c_cnid) {
 722                 cnattrp->ca_uid = skip_cp->c_uid;
 723                 cnattrp->ca_gid = skip_cp->c_gid;
 724                 cnattrp->ca_mode = skip_cp->c_mode;
 725                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 726         } else {
 727                 struct cinfo c_info;
 728
 729                 /* otherwise, check the cnode hash incase the file/dir is incore */
 730                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 731                         cnattrp->ca_uid = c_info.uid;
 732                         cnattrp->ca_gid = c_info.gid;
 733                         cnattrp->ca_mode = c_info.mode;
 734                         keyp->hfsPlus.parentID = c_info.parentcnid;
 735                 } else {
 736                         int lockflags;
 737
 738                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 739
 740                         /* lookup this cnid in the catalog */
 741                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 742
 743                         hfs_systemfile_unlock(hfsmp, lockflags);
 744
 745                         cache->lookups++;
 746                 }
 747         }
 748
 749         return (error);
 750 }
 751
 752 /*
 753  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 754  * up to CACHE_LEVELS as we progress towards the root.
 755  */
 756 static int
 757 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 758                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 759 {
 760        int                     myErr = 0;
 761        int                     myResult;
 762        HFSCatalogNodeID        thisNodeID;
 763        unsigned long           myPerms;
 764        struct cat_attr         cnattr;
 765        int                     cache_index = -1;
 766        CatalogKey              catkey;
 767
 768        int i = 0, ids_to_cache = 0;
 769        int parent_ids[CACHE_LEVELS];
 770
 771        /* root always has access */
 772        if (!suser(myp_ucred, NULL)) {
 773                return (1);
 774        }
 775
 776        thisNodeID = nodeID;
 777        while (thisNodeID >=  kRootDirID) {
 778                myResult = 0;   /* default to "no access" */
 779
 780                /* check the cache before resorting to hitting the catalog */
 781
 782                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 783                 * to look any further after hitting cached dir */
 784
 785                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 786                        cache->cachehits++;
 787                        myResult = cache->haveaccess[cache_index];
 788                        goto ExitThisRoutine;
 789                }
 790
 791                /* remember which parents we want to cache */
 792                if (ids_to_cache < CACHE_LEVELS) {
 793                        parent_ids[ids_to_cache] = thisNodeID;
 794                        ids_to_cache++;
 795                }
 796
 797                /* do the lookup (checks the cnode hash, then the catalog) */
 798                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 799                if (myErr) {
 800                        goto ExitThisRoutine; /* no access */
 801                }
 802
 803                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 804                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 805                                                  myp_ucred, theProcPtr);
 806
 807                if ( (myPerms & X_OK) == 0 ) {
 808                        myResult = 0;
 809                        goto ExitThisRoutine;   /* no access */
 810                }
 811
 812                /* up the hierarchy we go */
 813                thisNodeID = catkey.hfsPlus.parentID;
 814        }
 815
 816        /* if here, we have access to this node */
 817        myResult = 1;
 818
 819  ExitThisRoutine:
 820        if (myErr) {
 821                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 822                myResult = 0;
 823        }
 824        *err = myErr;
 825
 826        /* cache the parent directory(ies) */
 827        for (i = 0; i < ids_to_cache; i++) {
 828                /* small optimization: get rid of double-lookup for all these */
 829                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 830                add_node(cache, -1, parent_ids[i], myResult);
 831        }
 832
 833        return (myResult);
 834 }
 835 /* end "bulk-access" support */
 836
 837
 838
 839 /*
 840  * Callback for use with freeze ioctl.
 841  */
 842 static int
 843 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 844 {
 845         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 846
 847         return 0;
 848 }
 849
 850 /*
 851  * Control filesystem operating characteristics.
 852  */
 853 int
 854 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 855                 vnode_t a_vp;
 856                 int  a_command;
 857                 caddr_t  a_data;
 858                 int  a_fflag;
 859                 vfs_context_t a_context;
 860         } */ *ap)
 861 {
 862         struct vnode * vp = ap->a_vp;
 863         struct hfsmount *hfsmp = VTOHFS(vp);
 864         vfs_context_t context = ap->a_context;
 865         kauth_cred_t cred = vfs_context_ucred(context);
 866         proc_t p = vfs_context_proc(context);
 867         struct vfsstatfs *vfsp;
 868         boolean_t is64bit;
 869
 870         is64bit = proc_is64bit(p);
 871
 872         switch (ap->a_command) {
 873
 874         case HFS_RESIZE_VOLUME: {
 875                 u_int64_t newsize;
 876                 u_int64_t cursize;
 877
 878                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 879                 if (suser(cred, NULL) &&
 880                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 881                         return (EACCES); /* must be owner of file system */
 882                 }
 883                 if (!vnode_isvroot(vp)) {
 884                         return (EINVAL);
 885                 }
 886                 newsize = *(u_int64_t *)ap->a_data;
 887                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 888
 889                 if (newsize > cursize) {
 890                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 891                 } else if (newsize < cursize) {
 892                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 893                 } else {
 894                         return (0);
 895                 }
 896         }
 897         case HFS_CHANGE_NEXT_ALLOCATION: {
 898                 u_int32_t location;
 899
 900                 if (vnode_vfsisrdonly(vp)) {
 901                         return (EROFS);
 902                 }
 903                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 904                 if (suser(cred, NULL) &&
 905                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 906                         return (EACCES); /* must be owner of file system */
 907                 }
 908                 if (!vnode_isvroot(vp)) {
 909                         return (EINVAL);
 910                 }
 911                 location = *(u_int32_t *)ap->a_data;
 912                 if (location > hfsmp->totalBlocks - 1) {
 913                         return (EINVAL);
 914                 }
 915                 /* Return previous value. */
 916                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 917                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 918                 hfsmp->nextAllocation = location;
 919                 hfsmp->vcbFlags |= 0xFF00;
 920                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 921                 return (0);
 922         }
 923
 924 #ifdef HFS_SPARSE_DEV
 925         case HFS_SETBACKINGSTOREINFO: {
 926                 struct vnode * bsfs_rootvp;
 927                 struct vnode * di_vp;
 928                 struct hfs_backingstoreinfo *bsdata;
 929                 int error = 0;
 930
 931                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 932                         return (EALREADY);
 933                 }
 934                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 935                 if (suser(cred, NULL) &&
 936                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 937                         return (EACCES); /* must be owner of file system */
 938                 }
 939                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 940                 if (bsdata == NULL) {
 941                         return (EINVAL);
 942                 }
 943                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 944                         return (error);
 945                 }
 946                 if ((error = vnode_getwithref(di_vp))) {
 947                         file_drop(bsdata->backingfd);
 948                         return(error);
 949                 }
 950
 951                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 952                         (void)vnode_put(di_vp);
 953                         file_drop(bsdata->backingfd);
 954                         return (EINVAL);
 955                 }
 956
 957                 /*
 958                  * Obtain the backing fs root vnode and keep a reference
 959                  * on it.  This reference will be dropped in hfs_unmount.
 960                  */
 961                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 962                 if (error) {
 963                         (void)vnode_put(di_vp);
 964                         file_drop(bsdata->backingfd);
 965                         return (error);
 966                 }
 967                 vnode_ref(bsfs_rootvp);
 968                 vnode_put(bsfs_rootvp);
 969
 970                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 971                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 972                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 973                 hfsmp->hfs_sparsebandblks *= 4;
 974
 975                 (void)vnode_put(di_vp);
 976                 file_drop(bsdata->backingfd);
 977                 return (0);
 978         }
 979         case HFS_CLRBACKINGSTOREINFO: {
 980                 struct vnode * tmpvp;
 981
 982                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 983                 if (suser(cred, NULL) &&
 984                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 985                         return (EACCES); /* must be owner of file system */
 986                 }
 987                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 988                     hfsmp->hfs_backingfs_rootvp) {
 989
 990                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 991                         tmpvp = hfsmp->hfs_backingfs_rootvp;
 992                         hfsmp->hfs_backingfs_rootvp = NULLVP;
 993                         hfsmp->hfs_sparsebandblks = 0;
 994                         vnode_rele(tmpvp);
 995                 }
 996                 return (0);
 997         }
 998 #endif /* HFS_SPARSE_DEV */
 999
1000         case F_FREEZE_FS: {
1001                 struct mount *mp;
1002                 task_t task;
1003
1004                 if (!is_suser())
1005                         return (EACCES);
1006
1007                 mp = vnode_mount(vp);
1008                 hfsmp = VFSTOHFS(mp);
1009
1010                 if (!(hfsmp->jnl))
1011                         return (ENOTSUP);
1012
1013                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1014
1015                 task = current_task();
1016                 task_working_set_disable(task);
1017
1018                 // flush things before we get started to try and prevent
1019                 // dirty data from being paged out while we're frozen.
1020                 // note: can't do this after taking the lock as it will
1021                 // deadlock against ourselves.
1022                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1023                 hfs_global_exclusive_lock_acquire(hfsmp);
1024                 journal_flush(hfsmp->jnl);
1025
1026                 // don't need to iterate on all vnodes, we just need to
1027                 // wait for writes to the system files and the device vnode
1028                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1029                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1030                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1031                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1032                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1033                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1034                 if (hfsmp->hfs_attribute_vp)
1035                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1036                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1037
1038                 hfsmp->hfs_freezing_proc = current_proc();
1039
1040                 return (0);
1041         }
1042
1043         case F_THAW_FS: {
1044                 if (!is_suser())
1045                         return (EACCES);
1046
1047                 // if we're not the one who froze the fs then we
1048                 // can't thaw it.
1049                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1050                     return EPERM;
1051                 }
1052
1053                 // NOTE: if you add code here, also go check the
1054                 //       code that "thaws" the fs in hfs_vnop_close()
1055                 //
1056                 hfsmp->hfs_freezing_proc = NULL;
1057                 hfs_global_exclusive_lock_release(hfsmp);
1058                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1059
1060                 return (0);
1061         }
1062
1063 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1064 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1065
1066         case HFS_BULKACCESS_FSCTL:
1067         case HFS_BULKACCESS: {
1068                 /*
1069                  * NOTE: on entry, the vnode is locked. Incase this vnode
1070                  * happens to be in our list of file_ids, we'll note it
1071                  * avoid calling hfs_chashget_nowait() on that id as that
1072                  * will cause a "locking against myself" panic.
1073                  */
1074                 Boolean check_leaf = true;
1075
1076                 struct user_access_t *user_access_structp;
1077                 struct user_access_t tmp_user_access_t;
1078                 struct access_cache cache;
1079
1080                 int error = 0, i;
1081
1082                 dev_t dev = VTOC(vp)->c_dev;
1083
1084                 short flags;
1085                 struct ucred myucred;   /* XXX ILLEGAL */
1086                 int num_files;
1087                 int *file_ids = NULL;
1088                 short *access = NULL;
1089
1090                 cnid_t cnid;
1091                 cnid_t prevParent_cnid = 0;
1092                 unsigned long myPerms;
1093                 short myaccess = 0;
1094                 struct cat_attr cnattr;
1095                 CatalogKey catkey;
1096                 struct cnode *skip_cp = VTOC(vp);
1097                 struct vfs_context      my_context;
1098
1099                 /* first, return error if not run as root */
1100                 if (cred->cr_ruid != 0) {
1101                         return EPERM;
1102                 }
1103
1104                 /* initialize the local cache and buffers */
1105                 cache.numcached = 0;
1106                 cache.cachehits = 0;
1107                 cache.lookups = 0;
1108
1109                 file_ids = (int *) get_pathbuff();
1110                 access = (short *) get_pathbuff();
1111                 cache.acache = (int *) get_pathbuff();
1112                 cache.haveaccess = (Boolean *) get_pathbuff();
1113
1114                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1115                         release_pathbuff((char *) file_ids);
1116                         release_pathbuff((char *) access);
1117                         release_pathbuff((char *) cache.acache);
1118                         release_pathbuff((char *) cache.haveaccess);
1119
1120                         return ENOMEM;
1121                 }
1122
1123                 /* struct copyin done during dispatch... need to copy file_id array separately */
1124                 if (ap->a_data == NULL) {
1125                         error = EINVAL;
1126                         goto err_exit_bulk_access;
1127                 }
1128
1129                 if (is64bit) {
1130                         user_access_structp = (struct user_access_t *)ap->a_data;
1131                 }
1132                 else {
1133                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1134                         tmp_user_access_t.uid = accessp->uid;
1135                         tmp_user_access_t.flags = accessp->flags;
1136                         tmp_user_access_t.num_groups = accessp->num_groups;
1137                         tmp_user_access_t.num_files = accessp->num_files;
1138                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1139                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1140                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1141                         user_access_structp = &tmp_user_access_t;
1142                 }
1143
1144                 num_files = user_access_structp->num_files;
1145                 if (num_files < 1) {
1146                         goto err_exit_bulk_access;
1147                 }
1148                 if (num_files > 256) {
1149                         error = EINVAL;
1150                         goto err_exit_bulk_access;
1151                 }
1152
1153                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1154                                                         num_files * sizeof(int)))) {
1155                         goto err_exit_bulk_access;
1156                 }
1157
1158                 /* fill in the ucred structure */
1159                 flags = user_access_structp->flags;
1160                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1161                         flags = R_OK;
1162                 }
1163
1164                 /* check if we've been passed leaf node ids or parent ids */
1165                 if (flags & PARENT_IDS_FLAG) {
1166                         check_leaf = false;
1167                 }
1168
1169                 memset(&myucred, 0, sizeof(myucred));
1170                 myucred.cr_ref = 1;
1171                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1172                 myucred.cr_ngroups = user_access_structp->num_groups;
1173                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1174                         myucred.cr_ngroups = 0;
1175                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1176                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1177                         goto err_exit_bulk_access;
1178                 }
1179                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1180                 myucred.cr_gmuid = myucred.cr_uid;
1181
1182                 my_context.vc_proc = p;
1183                 my_context.vc_ucred = &myucred;
1184
1185                 /* Check access to each file_id passed in */
1186                 for (i = 0; i < num_files; i++) {
1187 #if 0
1188                         cnid = (cnid_t) file_ids[i];
1189
1190                         /* root always has access */
1191                         if (!suser(&myucred, NULL)) {
1192                                 access[i] = 0;
1193                                 continue;
1194                         }
1195
1196                         if (check_leaf) {
1197
1198                                 /* do the lookup (checks the cnode hash, then the catalog) */
1199                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1200                                 if (error) {
1201                                         access[i] = (short) error;
1202                                         continue;
1203                                 }
1204
1205                                 /* before calling CheckAccess(), check the target file for read access */
1206                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1207                                                                   cnattr.ca_mode, hfsmp->hfs_mp, &myucred, p  );
1208
1209
1210                                 /* fail fast if no access */
1211                                 if ((myPerms & flags) == 0) {
1212                                         access[i] = EACCES;
1213                                         continue;
1214                                 }
1215                         } else {
1216                                 /* we were passed an array of parent ids */
1217                                 catkey.hfsPlus.parentID = cnid;
1218                         }
1219
1220                         /* if the last guy had the same parent and had access, we're done */
1221                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1222                                 cache.cachehits++;
1223                                 access[i] = 0;
1224                                 continue;
1225                         }
1226
1227                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1228                                                    skip_cp, p, &myucred, dev);
1229
1230                         if ( myaccess ) {
1231                                 access[i] = 0; // have access.. no errors to report
1232                         } else {
1233                                 access[i] = (error != 0 ? (short) error : EACCES);
1234                         }
1235
1236                         prevParent_cnid = catkey.hfsPlus.parentID;
1237 #else
1238                         int myErr;
1239
1240                         cnid = (cnid_t)file_ids[i];
1241
1242                         while (cnid >= kRootDirID) {
1243                             /* get the vnode for this cnid */
1244                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1245                             if ( myErr ) {
1246                                 access[i] = EACCES;
1247                                 break;
1248                             }
1249
1250                             cnid = VTOC(vp)->c_parentcnid;
1251
1252                             hfs_unlock(VTOC(vp));
1253                             if (vnode_vtype(vp) == VDIR) {
1254                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1255                             } else {
1256                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1257                             }
1258                             vnode_put(vp);
1259                             access[i] = myErr;
1260                             if (myErr) {
1261                                 break;
1262                             }
1263                         }
1264 #endif
1265                 }
1266
1267                 /* copyout the access array */
1268                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1269                                      num_files * sizeof (short)))) {
1270                         goto err_exit_bulk_access;
1271                 }
1272
1273         err_exit_bulk_access:
1274
1275                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1276
1277                 release_pathbuff((char *) cache.acache);
1278                 release_pathbuff((char *) cache.haveaccess);
1279                 release_pathbuff((char *) file_ids);
1280                 release_pathbuff((char *) access);
1281
1282                 return (error);
1283         } /* HFS_BULKACCESS */
1284
1285         case HFS_SETACLSTATE: {
1286                 int state;
1287
1288                 if (ap->a_data == NULL) {
1289                         return (EINVAL);
1290                 }
1291
1292                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1293                 state = *(int *)ap->a_data;
1294
1295                 // super-user can enable or disable acl's on a volume.
1296                 // the volume owner can only enable acl's
1297                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1298                         return (EPERM);
1299                 }
1300                 if (state == 0 || state == 1)
1301                         return hfs_setextendedsecurity(hfsmp, state);
1302                 else
1303                         return (EINVAL);
1304         }
1305
1306         case F_FULLFSYNC: {
1307                 int error;
1308
1309                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1310                 if (error == 0) {
1311                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1312                         hfs_unlock(VTOC(vp));
1313                 }
1314
1315                 return error;
1316         }
1317
1318         case F_CHKCLEAN: {
1319                 register struct cnode *cp;
1320                 int error;
1321
1322                 if (!vnode_isreg(vp))
1323                         return EINVAL;
1324
1325                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1326                 if (error == 0) {
1327                         cp = VTOC(vp);
1328                         /*
1329                          * used by regression test to determine if
1330                          * all the dirty pages (via write) have been cleaned
1331                          * after a call to 'fsysnc'.
1332                          */
1333                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1334                         hfs_unlock(cp);
1335                 }
1336                 return (error);
1337         }
1338
1339         case F_RDADVISE: {
1340                 register struct radvisory *ra;
1341                 struct filefork *fp;
1342                 int error;
1343
1344                 if (!vnode_isreg(vp))
1345                         return EINVAL;
1346
1347                 ra = (struct radvisory *)(ap->a_data);
1348                 fp = VTOF(vp);
1349
1350                 /* Protect against a size change. */
1351                 hfs_lock_truncate(VTOC(vp), TRUE);
1352
1353                 if (ra->ra_offset >= fp->ff_size) {
1354                         error = EFBIG;
1355                 } else {
1356                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1357                 }
1358
1359                 hfs_unlock_truncate(VTOC(vp));
1360                 return (error);
1361         }
1362
1363         case F_READBOOTSTRAP:
1364         case F_WRITEBOOTSTRAP:
1365         {
1366             struct vnode *devvp = NULL;
1367             user_fbootstraptransfer_t *user_bootstrapp;
1368             int devBlockSize;
1369             int error;
1370             uio_t auio;
1371             daddr64_t blockNumber;
1372             u_long blockOffset;
1373             u_long xfersize;
1374             struct buf *bp;
1375             user_fbootstraptransfer_t user_bootstrap;
1376
1377                 if (!vnode_isvroot(vp))
1378                         return (EINVAL);
1379                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1380                  * to a user_fbootstraptransfer_t else we get a pointer to a
1381                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1382                  */
1383                 if (is64bit) {
1384                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1385                 }
1386                 else {
1387                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1388                         user_bootstrapp = &user_bootstrap;
1389                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1390                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1391                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1392                 }
1393                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1394                         return EINVAL;
1395
1396             devvp = VTOHFS(vp)->hfs_devvp;
1397                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1398                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1399                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1400                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1401
1402             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1403
1404             while (uio_resid(auio) > 0) {
1405                         blockNumber = uio_offset(auio) / devBlockSize;
1406                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1407                         if (error) {
1408                                 if (bp) buf_brelse(bp);
1409                                 uio_free(auio);
1410                                 return error;
1411                         };
1412
1413                         blockOffset = uio_offset(auio) % devBlockSize;
1414                         xfersize = devBlockSize - blockOffset;
1415                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1416                         if (error) {
1417                                 buf_brelse(bp);
1418                                 uio_free(auio);
1419                                 return error;
1420                         };
1421                         if (uio_rw(auio) == UIO_WRITE) {
1422                                 error = VNOP_BWRITE(bp);
1423                                 if (error) {
1424                                         uio_free(auio);
1425                         return error;
1426                                 }
1427                         } else {
1428                                 buf_brelse(bp);
1429                         };
1430                 };
1431                 uio_free(auio);
1432         };
1433         return 0;
1434
1435         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1436         {
1437                 if (is64bit) {
1438                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1439                 }
1440                 else {
1441                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1442                 }
1443                 return 0;
1444         }
1445
1446         case HFS_GET_MOUNT_TIME:
1447             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1448             break;
1449
1450         case HFS_GET_LAST_MTIME:
1451             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1452             break;
1453
1454         case HFS_SET_BOOT_INFO:
1455                 if (!vnode_isvroot(vp))
1456                         return(EINVAL);
1457                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1458                         return(EACCES); /* must be superuser or owner of filesystem */
1459                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1460                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1461                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1462                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1463                 break;
1464
1465         case HFS_GET_BOOT_INFO:
1466                 if (!vnode_isvroot(vp))
1467                         return(EINVAL);
1468                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1469                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1470                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1471                 break;
1472
1473         default:
1474                 return (ENOTTY);
1475         }
1476
1477     /* Should never get here */
1478         return 0;
1479 }
1480
1481 /*
1482  * select
1483  */
1484 int
1485 hfs_vnop_select(__unused struct vnop_select_args *ap)
1486 /*
1487         struct vnop_select_args {
1488                 vnode_t a_vp;
1489                 int  a_which;
1490                 int  a_fflags;
1491                 void *a_wql;
1492                 vfs_context_t a_context;
1493         };
1494 */
1495 {
1496         /*
1497          * We should really check to see if I/O is possible.
1498          */
1499         return (1);
1500 }
1501
1502 /*
1503  * Converts a logical block number to a physical block, and optionally returns
1504  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1505  * The physical block number is based on the device block size, currently its 512.
1506  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1507  */
1508 int
1509 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1510 {
1511         struct cnode *cp = VTOC(vp);
1512         struct filefork *fp = VTOF(vp);
1513         struct hfsmount *hfsmp = VTOHFS(vp);
1514         int  retval = E_NONE;
1515         daddr_t  logBlockSize;
1516         size_t  bytesContAvail = 0;
1517         off_t  blockposition;
1518         int lockExtBtree;
1519         int lockflags = 0;
1520
1521         /*
1522          * Check for underlying vnode requests and ensure that logical
1523          * to physical mapping is requested.
1524          */
1525         if (vpp != NULL)
1526                 *vpp = cp->c_devvp;
1527         if (bnp == NULL)
1528                 return (0);
1529
1530         logBlockSize = GetLogicalBlockSize(vp);
1531         blockposition = (off_t)bn * (off_t)logBlockSize;
1532
1533         lockExtBtree = overflow_extents(fp);
1534
1535         if (lockExtBtree)
1536                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1537
1538         retval = MacToVFSError(
1539                             MapFileBlockC (HFSTOVCB(hfsmp),
1540                                             (FCB*)fp,
1541                                             MAXPHYSIO,
1542                                             blockposition,
1543                                             bnp,
1544                                             &bytesContAvail));
1545
1546         if (lockExtBtree)
1547                 hfs_systemfile_unlock(hfsmp, lockflags);
1548
1549         if (retval == E_NONE) {
1550                 /* Figure out how many read ahead blocks there are */
1551                 if (runp != NULL) {
1552                         if (can_cluster(logBlockSize)) {
1553                                 /* Make sure this result never goes negative: */
1554                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1555                         } else {
1556                                 *runp = 0;
1557                         }
1558                 }
1559         }
1560         return (retval);
1561 }
1562
1563 /*
1564  * Convert logical block number to file offset.
1565  */
1566 int
1567 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1568 /*
1569         struct vnop_blktooff_args {
1570                 vnode_t a_vp;
1571                 daddr64_t a_lblkno;
1572                 off_t *a_offset;
1573         };
1574 */
1575 {
1576         if (ap->a_vp == NULL)
1577                 return (EINVAL);
1578         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1579
1580         return(0);
1581 }
1582
1583 /*
1584  * Convert file offset to logical block number.
1585  */
1586 int
1587 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1588 /*
1589         struct vnop_offtoblk_args {
1590                 vnode_t a_vp;
1591                 off_t a_offset;
1592                 daddr64_t *a_lblkno;
1593         };
1594 */
1595 {
1596         if (ap->a_vp == NULL)
1597                 return (EINVAL);
1598         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1599
1600         return(0);
1601 }
1602
1603 /*
1604  * Map file offset to physical block number.
1605  *
1606  * System file cnodes are expected to be locked (shared or exclusive).
1607  */
1608 int
1609 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1610 /*
1611         struct vnop_blockmap_args {
1612                 vnode_t a_vp;
1613                 off_t a_foffset;
1614                 size_t a_size;
1615                 daddr64_t *a_bpn;
1616                 size_t *a_run;
1617                 void *a_poff;
1618                 int a_flags;
1619                 vfs_context_t a_context;
1620         };
1621 */
1622 {
1623         struct vnode *vp = ap->a_vp;
1624         struct cnode *cp;
1625         struct filefork *fp;
1626         struct hfsmount *hfsmp;
1627         size_t bytesContAvail = 0;
1628         int retval = E_NONE;
1629         int syslocks = 0;
1630         int lockflags = 0;
1631         struct rl_entry *invalid_range;
1632         enum rl_overlaptype overlaptype;
1633         int started_tr = 0;
1634         int tooklock = 0;
1635
1636         /* Do not allow blockmap operation on a directory */
1637         if (vnode_isdir(vp)) {
1638                 return (ENOTSUP);
1639         }
1640
1641         /*
1642          * Check for underlying vnode requests and ensure that logical
1643          * to physical mapping is requested.
1644          */
1645         if (ap->a_bpn == NULL)
1646                 return (0);
1647
1648         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1649                 if (VTOC(vp)->c_lockowner != current_thread()) {
1650                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1651                         tooklock = 1;
1652                 } else {
1653                         cp = VTOC(vp);
1654                         panic("blockmap: %s cnode lock already held!\n",
1655                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1656                 }
1657         }
1658         hfsmp = VTOHFS(vp);
1659         cp = VTOC(vp);
1660         fp = VTOF(vp);
1661
1662 retry:
1663         if (fp->ff_unallocblocks) {
1664                 if (hfs_start_transaction(hfsmp) != 0) {
1665                         retval = EINVAL;
1666                         goto exit;
1667                 } else {
1668                         started_tr = 1;
1669                 }
1670                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1671
1672         } else if (overflow_extents(fp)) {
1673                 syslocks = SFL_EXTENTS;
1674         }
1675
1676         if (syslocks)
1677                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1678
1679         /*
1680          * Check for any delayed allocations.
1681          */
1682         if (fp->ff_unallocblocks) {
1683                 SInt64 actbytes;
1684                 u_int32_t loanedBlocks;
1685
1686                 //
1687                 // Make sure we have a transaction.  It's possible
1688                 // that we came in and fp->ff_unallocblocks was zero
1689                 // but during the time we blocked acquiring the extents
1690                 // btree, ff_unallocblocks became non-zero and so we
1691                 // will need to start a transaction.
1692                 //
1693                 if (started_tr == 0) {
1694                         if (syslocks) {
1695                                 hfs_systemfile_unlock(hfsmp, lockflags);
1696                                 syslocks = 0;
1697                         }
1698                         goto retry;
1699                 }
1700
1701                 /*
1702                  * Note: ExtendFileC will Release any blocks on loan and
1703                  * aquire real blocks.  So we ask to extend by zero bytes
1704                  * since ExtendFileC will account for the virtual blocks.
1705                  */
1706
1707                 loanedBlocks = fp->ff_unallocblocks;
1708                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1709                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1710
1711                 if (retval) {
1712                         fp->ff_unallocblocks = loanedBlocks;
1713                         cp->c_blocks += loanedBlocks;
1714                         fp->ff_blocks += loanedBlocks;
1715
1716                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1717                         hfsmp->loanedBlocks += loanedBlocks;
1718                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1719                 }
1720
1721                 if (retval) {
1722                         hfs_systemfile_unlock(hfsmp, lockflags);
1723                         cp->c_flag |= C_MODIFIED;
1724                         if (started_tr) {
1725                                 (void) hfs_update(vp, TRUE);
1726                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1727
1728                                 hfs_end_transaction(hfsmp);
1729                         }
1730                         goto exit;
1731                 }
1732         }
1733
1734         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1735                                ap->a_bpn, &bytesContAvail);
1736         if (syslocks) {
1737                 hfs_systemfile_unlock(hfsmp, lockflags);
1738                 syslocks = 0;
1739         }
1740
1741         if (started_tr) {
1742                 (void) hfs_update(vp, TRUE);
1743                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1744                 hfs_end_transaction(hfsmp);
1745                 started_tr = 0;
1746         }
1747         if (retval) {
1748                 goto exit;
1749         }
1750
1751         /* Adjust the mapping information for invalid file ranges: */
1752         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1753                               ap->a_foffset + (off_t)bytesContAvail - 1,
1754                               &invalid_range);
1755         if (overlaptype != RL_NOOVERLAP) {
1756                 switch(overlaptype) {
1757                 case RL_MATCHINGOVERLAP:
1758                 case RL_OVERLAPCONTAINSRANGE:
1759                 case RL_OVERLAPSTARTSBEFORE:
1760                         /* There's no valid block for this byte offset: */
1761                         *ap->a_bpn = (daddr64_t)-1;
1762                         /* There's no point limiting the amount to be returned
1763                          * if the invalid range that was hit extends all the way
1764                          * to the EOF (i.e. there's no valid bytes between the
1765                          * end of this range and the file's EOF):
1766                          */
1767                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1768                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1769                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1770                         }
1771                         break;
1772
1773                 case RL_OVERLAPISCONTAINED:
1774                 case RL_OVERLAPENDSAFTER:
1775                         /* The range of interest hits an invalid block before the end: */
1776                         if (invalid_range->rl_start == ap->a_foffset) {
1777                                 /* There's actually no valid information to be had starting here: */
1778                                 *ap->a_bpn = (daddr64_t)-1;
1779                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1780                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1781                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1782                                 }
1783                         } else {
1784                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1785                         }
1786                         break;
1787
1788                 case RL_NOOVERLAP:
1789                         break;
1790                 } /* end switch */
1791                 if (bytesContAvail > ap->a_size)
1792                         bytesContAvail = ap->a_size;
1793         }
1794         if (ap->a_run)
1795                 *ap->a_run = bytesContAvail;
1796
1797         if (ap->a_poff)
1798                 *(int *)ap->a_poff = 0;
1799 exit:
1800         if (tooklock)
1801                 hfs_unlock(cp);
1802
1803         return (MacToVFSError(retval));
1804 }
1805
1806
1807 /*
1808  * prepare and issue the I/O
1809  * buf_strategy knows how to deal
1810  * with requests that require
1811  * fragmented I/Os
1812  */
1813 int
1814 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1815 {
1816         buf_t   bp = ap->a_bp;
1817         vnode_t vp = buf_vnode(bp);
1818         struct cnode *cp = VTOC(vp);
1819
1820         return (buf_strategy(cp->c_devvp, ap));
1821 }
1822
1823
1824 static int
1825 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1826 {
1827         register struct cnode *cp = VTOC(vp);
1828         struct filefork *fp = VTOF(vp);
1829         struct proc *p = vfs_context_proc(context);;
1830         kauth_cred_t cred = vfs_context_ucred(context);
1831         int retval;
1832         off_t bytesToAdd;
1833         off_t actualBytesAdded;
1834         off_t filebytes;
1835         u_int64_t old_filesize;
1836         u_long fileblocks;
1837         int blksize;
1838         struct hfsmount *hfsmp;
1839         int lockflags;
1840
1841         blksize = VTOVCB(vp)->blockSize;
1842         fileblocks = fp->ff_blocks;
1843         filebytes = (off_t)fileblocks * (off_t)blksize;
1844         old_filesize = fp->ff_size;
1845
1846         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1847                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1848
1849         if (length < 0)
1850                 return (EINVAL);
1851
1852         /* This should only happen with a corrupt filesystem */
1853         if ((off_t)fp->ff_size < 0)
1854                 return (EINVAL);
1855
1856         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1857                 return (EFBIG);
1858
1859         hfsmp = VTOHFS(vp);
1860
1861         retval = E_NONE;
1862
1863         /* Files that are changing size are not hot file candidates. */
1864         if (hfsmp->hfc_stage == HFC_RECORDING) {
1865                 fp->ff_bytesread = 0;
1866         }
1867
1868         /*
1869          * We cannot just check if fp->ff_size == length (as an optimization)
1870          * since there may be extra physical blocks that also need truncation.
1871          */
1872 #if QUOTA
1873         if ((retval = hfs_getinoquota(cp)))
1874                 return(retval);
1875 #endif /* QUOTA */
1876
1877         /*
1878          * Lengthen the size of the file. We must ensure that the
1879          * last byte of the file is allocated. Since the smallest
1880          * value of ff_size is 0, length will be at least 1.
1881          */
1882         if (length > (off_t)fp->ff_size) {
1883 #if QUOTA
1884                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1885                                    cred, 0);
1886                 if (retval)
1887                         goto Err_Exit;
1888 #endif /* QUOTA */
1889                 /*
1890                  * If we don't have enough physical space then
1891                  * we need to extend the physical size.
1892                  */
1893                 if (length > filebytes) {
1894                         int eflags;
1895                         u_long blockHint = 0;
1896
1897                         /* All or nothing and don't round up to clumpsize. */
1898                         eflags = kEFAllMask | kEFNoClumpMask;
1899
1900                         if (cred && suser(cred, NULL) != 0)
1901                                 eflags |= kEFReserveMask;  /* keep a reserve */
1902
1903                         /*
1904                          * Allocate Journal and Quota files in metadata zone.
1905                          */
1906                         if (filebytes == 0 &&
1907                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1908                             hfs_virtualmetafile(cp)) {
1909                                 eflags |= kEFMetadataMask;
1910                                 blockHint = hfsmp->hfs_metazone_start;
1911                         }
1912                         if (hfs_start_transaction(hfsmp) != 0) {
1913                             retval = EINVAL;
1914                             goto Err_Exit;
1915                         }
1916
1917                         /* Protect extents b-tree and allocation bitmap */
1918                         lockflags = SFL_BITMAP;
1919                         if (overflow_extents(fp))
1920                                 lockflags |= SFL_EXTENTS;
1921                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1922
1923                         while ((length > filebytes) && (retval == E_NONE)) {
1924                                 bytesToAdd = length - filebytes;
1925                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1926                                                     (FCB*)fp,
1927                                                     bytesToAdd,
1928                                                     blockHint,
1929                                                     eflags,
1930                                                     &actualBytesAdded));
1931
1932                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1933                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1934                                         if (length > filebytes)
1935                                                 length = filebytes;
1936                                         break;
1937                                 }
1938                         } /* endwhile */
1939
1940                         hfs_systemfile_unlock(hfsmp, lockflags);
1941
1942                         if (hfsmp->jnl) {
1943                             (void) hfs_update(vp, TRUE);
1944                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1945                         }
1946
1947                         hfs_end_transaction(hfsmp);
1948
1949                         if (retval)
1950                                 goto Err_Exit;
1951
1952                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1953                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1954                 }
1955
1956                 if (!(flags & IO_NOZEROFILL)) {
1957                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1958                                 struct rl_entry *invalid_range;
1959                                 off_t zero_limit;
1960
1961                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1962                                 if (length < zero_limit) zero_limit = length;
1963
1964                                 if (length > (off_t)fp->ff_size) {
1965                                         struct timeval tv;
1966
1967                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1968                                         if ((fp->ff_size & PAGE_MASK_64) &&
1969                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
1970                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
1971
1972                                                 /* There's some valid data at the start of the (current) last page
1973                                                    of the file, so zero out the remainder of that page to ensure the
1974                                                    entire page contains valid data.  Since there is no invalid range
1975                                                    possible past the (current) eof, there's no need to remove anything
1976                                                    from the invalid range list before calling cluster_write():  */
1977                                                 hfs_unlock(cp);
1978                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
1979                                                                 fp->ff_size, (off_t)0,
1980                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
1981                                                 hfs_lock(cp, HFS_FORCE_LOCK);
1982                                                 if (retval) goto Err_Exit;
1983
1984                                                 /* Merely invalidate the remaining area, if necessary: */
1985                                                 if (length > zero_limit) {
1986                                                         microuptime(&tv);
1987                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
1988                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1989                                                 }
1990                                         } else {
1991                                         /* The page containing the (current) eof is invalid: just add the
1992                                            remainder of the page to the invalid list, along with the area
1993                                            being newly allocated:
1994                                          */
1995                                         microuptime(&tv);
1996                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
1997                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
1998                                         };
1999                                 }
2000                         } else {
2001                                         panic("hfs_truncate: invoked on non-UBC object?!");
2002                         };
2003                 }
2004                 cp->c_touch_modtime = TRUE;
2005                 fp->ff_size = length;
2006
2007                 /* Nested transactions will do their own ubc_setsize. */
2008                 if (!skipsetsize) {
2009                         /*
2010                          * ubc_setsize can cause a pagein here
2011                          * so we need to drop cnode lock.
2012                          */
2013                         hfs_unlock(cp);
2014                         ubc_setsize(vp, length);
2015                         hfs_lock(cp, HFS_FORCE_LOCK);
2016                 }
2017
2018         } else { /* Shorten the size of the file */
2019
2020                 if ((off_t)fp->ff_size > length) {
2021                         /*
2022                          * Any buffers that are past the truncation point need to be
2023                          * invalidated (to maintain buffer cache consistency).
2024                          */
2025
2026                          /* Nested transactions will do their own ubc_setsize. */
2027                          if (!skipsetsize) {
2028                                 /*
2029                                  * ubc_setsize can cause a pageout here
2030                                  * so we need to drop cnode lock.
2031                                  */
2032                                 hfs_unlock(cp);
2033                                 ubc_setsize(vp, length);
2034                                 hfs_lock(cp, HFS_FORCE_LOCK);
2035                         }
2036
2037                         /* Any space previously marked as invalid is now irrelevant: */
2038                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2039                 }
2040
2041                 /*
2042                  * Account for any unmapped blocks. Note that the new
2043                  * file length can still end up with unmapped blocks.
2044                  */
2045                 if (fp->ff_unallocblocks > 0) {
2046                         u_int32_t finalblks;
2047                         u_int32_t loanedBlocks;
2048
2049                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2050
2051                         loanedBlocks = fp->ff_unallocblocks;
2052                         cp->c_blocks -= loanedBlocks;
2053                         fp->ff_blocks -= loanedBlocks;
2054                         fp->ff_unallocblocks = 0;
2055
2056                         hfsmp->loanedBlocks -= loanedBlocks;
2057
2058                         finalblks = (length + blksize - 1) / blksize;
2059                         if (finalblks > fp->ff_blocks) {
2060                                 /* calculate required unmapped blocks */
2061                                 loanedBlocks = finalblks - fp->ff_blocks;
2062                                 hfsmp->loanedBlocks += loanedBlocks;
2063
2064                                 fp->ff_unallocblocks = loanedBlocks;
2065                                 cp->c_blocks += loanedBlocks;
2066                                 fp->ff_blocks += loanedBlocks;
2067                         }
2068                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2069                 }
2070
2071                 /*
2072                  * For a TBE process the deallocation of the file blocks is
2073                  * delayed until the file is closed.  And hfs_close calls
2074                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2075                  * isn't set, we make sure this isn't a TBE process.
2076                  */
2077                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2078 #if QUOTA
2079                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2080 #endif /* QUOTA */
2081                   if (hfs_start_transaction(hfsmp) != 0) {
2082                       retval = EINVAL;
2083                       goto Err_Exit;
2084                   }
2085
2086                         if (fp->ff_unallocblocks == 0) {
2087                                 /* Protect extents b-tree and allocation bitmap */
2088                                 lockflags = SFL_BITMAP;
2089                                 if (overflow_extents(fp))
2090                                         lockflags |= SFL_EXTENTS;
2091                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2092
2093                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2094                                                 (FCB*)fp, length, false));
2095
2096                                 hfs_systemfile_unlock(hfsmp, lockflags);
2097                         }
2098                         if (hfsmp->jnl) {
2099                                 if (retval == 0) {
2100                                         fp->ff_size = length;
2101                                 }
2102                                 (void) hfs_update(vp, TRUE);
2103                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2104                         }
2105
2106                         hfs_end_transaction(hfsmp);
2107
2108                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2109                         if (retval)
2110                                 goto Err_Exit;
2111 #if QUOTA
2112                         /* These are bytesreleased */
2113                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2114 #endif /* QUOTA */
2115                 }
2116                 /* Only set update flag if the logical length changes */
2117                 if (old_filesize != length)
2118                         cp->c_touch_modtime = TRUE;
2119                 fp->ff_size = length;
2120         }
2121         cp->c_touch_chgtime = TRUE;
2122         retval = hfs_update(vp, MNT_WAIT);
2123         if (retval) {
2124                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2125                      -1, -1, -1, retval, 0);
2126         }
2127
2128 Err_Exit:
2129
2130         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2131                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2132
2133         return (retval);
2134 }
2135
2136
2137
2138 /*
2139  * Truncate a cnode to at most length size, freeing (or adding) the
2140  * disk blocks.
2141  */
2142 __private_extern__
2143 int
2144 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2145              vfs_context_t context)
2146 {
2147         struct filefork *fp = VTOF(vp);
2148         off_t filebytes;
2149         u_long fileblocks;
2150         int blksize, error = 0;
2151         struct cnode *cp = VTOC(vp);
2152
2153         if (vnode_isdir(vp))
2154                 return (EISDIR);        /* cannot truncate an HFS directory! */
2155
2156         blksize = VTOVCB(vp)->blockSize;
2157         fileblocks = fp->ff_blocks;
2158         filebytes = (off_t)fileblocks * (off_t)blksize;
2159
2160         // have to loop truncating or growing files that are
2161         // really big because otherwise transactions can get
2162         // enormous and consume too many kernel resources.
2163
2164         if (length < filebytes) {
2165                 while (filebytes > length) {
2166                         if ((filebytes - length) > HFS_BIGFILE_SIZE) {
2167                                 filebytes -= HFS_BIGFILE_SIZE;
2168                         } else {
2169                                 filebytes = length;
2170                         }
2171                         cp->c_flag |= C_FORCEUPDATE;
2172                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2173                         if (error)
2174                                 break;
2175                 }
2176         } else if (length > filebytes) {
2177                 while (filebytes < length) {
2178                         if ((length - filebytes) > HFS_BIGFILE_SIZE) {
2179                                 filebytes += HFS_BIGFILE_SIZE;
2180                         } else {
2181                                 filebytes = length;
2182                         }
2183                         cp->c_flag |= C_FORCEUPDATE;
2184                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2185                         if (error)
2186                                 break;
2187                 }
2188         } else /* Same logical size */ {
2189
2190                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2191         }
2192         /* Files that are changing size are not hot file candidates. */
2193         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2194                 fp->ff_bytesread = 0;
2195         }
2196
2197         return (error);
2198 }
2199
2200
2201
2202 /*
2203  * Preallocate file storage space.
2204  */
2205 int
2206 hfs_vnop_allocate(struct vnop_allocate_args /* {
2207                 vnode_t a_vp;
2208                 off_t a_length;
2209                 u_int32_t  a_flags;
2210                 off_t *a_bytesallocated;
2211                 off_t a_offset;
2212                 vfs_context_t a_context;
2213         } */ *ap)
2214 {
2215         struct vnode *vp = ap->a_vp;
2216         struct cnode *cp;
2217         struct filefork *fp;
2218         ExtendedVCB *vcb;
2219         off_t length = ap->a_length;
2220         off_t startingPEOF;
2221         off_t moreBytesRequested;
2222         off_t actualBytesAdded;
2223         off_t filebytes;
2224         u_long fileblocks;
2225         int retval, retval2;
2226         UInt32 blockHint;
2227         UInt32 extendFlags;   /* For call to ExtendFileC */
2228         struct hfsmount *hfsmp;
2229         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2230         int lockflags;
2231
2232         *(ap->a_bytesallocated) = 0;
2233
2234         if (!vnode_isreg(vp))
2235                 return (EISDIR);
2236         if (length < (off_t)0)
2237                 return (EINVAL);
2238
2239         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2240                 return (retval);
2241         cp = VTOC(vp);
2242         fp = VTOF(vp);
2243         hfsmp = VTOHFS(vp);
2244         vcb = VTOVCB(vp);
2245
2246         fileblocks = fp->ff_blocks;
2247         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2248
2249         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2250                 retval = EINVAL;
2251                 goto Err_Exit;
2252         }
2253
2254         /* Fill in the flags word for the call to Extend the file */
2255
2256         extendFlags = kEFNoClumpMask;
2257         if (ap->a_flags & ALLOCATECONTIG)
2258                 extendFlags |= kEFContigMask;
2259         if (ap->a_flags & ALLOCATEALL)
2260                 extendFlags |= kEFAllMask;
2261         if (cred && suser(cred, NULL) != 0)
2262                 extendFlags |= kEFReserveMask;
2263
2264         retval = E_NONE;
2265         blockHint = 0;
2266         startingPEOF = filebytes;
2267
2268         if (ap->a_flags & ALLOCATEFROMPEOF)
2269                 length += filebytes;
2270         else if (ap->a_flags & ALLOCATEFROMVOL)
2271                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2272
2273         /* If no changes are necesary, then we're done */
2274         if (filebytes == length)
2275                 goto Std_Exit;
2276
2277         /*
2278          * Lengthen the size of the file. We must ensure that the
2279          * last byte of the file is allocated. Since the smallest
2280          * value of filebytes is 0, length will be at least 1.
2281          */
2282         if (length > filebytes) {
2283                 moreBytesRequested = length - filebytes;
2284
2285 #if QUOTA
2286                 retval = hfs_chkdq(cp,
2287                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2288                                 cred, 0);
2289                 if (retval)
2290                         goto Err_Exit;
2291
2292 #endif /* QUOTA */
2293                 /*
2294                  * Metadata zone checks.
2295                  */
2296                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2297                         /*
2298                          * Allocate Journal and Quota files in metadata zone.
2299                          */
2300                         if (hfs_virtualmetafile(cp)) {
2301                                 extendFlags |= kEFMetadataMask;
2302                                 blockHint = hfsmp->hfs_metazone_start;
2303                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2304                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2305                                 /*
2306                                  * Move blockHint outside metadata zone.
2307                                  */
2308                                 blockHint = hfsmp->hfs_metazone_end + 1;
2309                         }
2310                 }
2311
2312                 if (hfs_start_transaction(hfsmp) != 0) {
2313                     retval = EINVAL;
2314                     goto Err_Exit;
2315                 }
2316
2317                 /* Protect extents b-tree and allocation bitmap */
2318                 lockflags = SFL_BITMAP;
2319                 if (overflow_extents(fp))
2320                         lockflags |= SFL_EXTENTS;
2321                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2322
2323                 retval = MacToVFSError(ExtendFileC(vcb,
2324                                                 (FCB*)fp,
2325                                                 moreBytesRequested,
2326                                                 blockHint,
2327                                                 extendFlags,
2328                                                 &actualBytesAdded));
2329
2330                 *(ap->a_bytesallocated) = actualBytesAdded;
2331                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2332
2333                 hfs_systemfile_unlock(hfsmp, lockflags);
2334
2335                 if (hfsmp->jnl) {
2336                         (void) hfs_update(vp, TRUE);
2337                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2338                 }
2339
2340                 hfs_end_transaction(hfsmp);
2341
2342                 /*
2343                  * if we get an error and no changes were made then exit
2344                  * otherwise we must do the hfs_update to reflect the changes
2345                  */
2346                 if (retval && (startingPEOF == filebytes))
2347                         goto Err_Exit;
2348
2349                 /*
2350                  * Adjust actualBytesAdded to be allocation block aligned, not
2351                  * clump size aligned.
2352                  * NOTE: So what we are reporting does not affect reality
2353                  * until the file is closed, when we truncate the file to allocation
2354                  * block size.
2355                  */
2356                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2357                         *(ap->a_bytesallocated) =
2358                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2359
2360         } else { /* Shorten the size of the file */
2361
2362                 if (fp->ff_size > length) {
2363                         /*
2364                          * Any buffers that are past the truncation point need to be
2365                          * invalidated (to maintain buffer cache consistency).
2366                          */
2367                 }
2368
2369                 if (hfs_start_transaction(hfsmp) != 0) {
2370                     retval = EINVAL;
2371                     goto Err_Exit;
2372                 }
2373
2374                 /* Protect extents b-tree and allocation bitmap */
2375                 lockflags = SFL_BITMAP;
2376                 if (overflow_extents(fp))
2377                         lockflags |= SFL_EXTENTS;
2378                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2379
2380                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2381
2382                 hfs_systemfile_unlock(hfsmp, lockflags);
2383
2384                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2385
2386                 if (hfsmp->jnl) {
2387                         (void) hfs_update(vp, TRUE);
2388                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2389                 }
2390
2391                 hfs_end_transaction(hfsmp);
2392
2393
2394                 /*
2395                  * if we get an error and no changes were made then exit
2396                  * otherwise we must do the hfs_update to reflect the changes
2397                  */
2398                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2399 #if QUOTA
2400                 /* These are  bytesreleased */
2401                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2402 #endif /* QUOTA */
2403
2404                 if (fp->ff_size > filebytes) {
2405                         fp->ff_size = filebytes;
2406
2407                         hfs_unlock(cp);
2408                         ubc_setsize(vp, fp->ff_size);
2409                         hfs_lock(cp, HFS_FORCE_LOCK);
2410                 }
2411         }
2412
2413 Std_Exit:
2414         cp->c_touch_chgtime = TRUE;
2415         cp->c_touch_modtime = TRUE;
2416         retval2 = hfs_update(vp, MNT_WAIT);
2417
2418         if (retval == 0)
2419                 retval = retval2;
2420 Err_Exit:
2421         hfs_unlock(cp);
2422         return (retval);
2423 }
2424
2425
2426 /*
2427  * Pagein for HFS filesystem
2428  */
2429 int
2430 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2431 /*
2432         struct vnop_pagein_args {
2433                 vnode_t a_vp,
2434                 upl_t         a_pl,
2435                 vm_offset_t   a_pl_offset,
2436                 off_t         a_f_offset,
2437                 size_t        a_size,
2438                 int           a_flags
2439                 vfs_context_t a_context;
2440         };
2441 */
2442 {
2443         vnode_t vp = ap->a_vp;
2444         int error;
2445
2446         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2447                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2448         /*
2449          * Keep track of blocks read.
2450          */
2451         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2452                 struct cnode *cp;
2453                 struct filefork *fp;
2454                 int bytesread;
2455                 int took_cnode_lock = 0;
2456
2457                 cp = VTOC(vp);
2458                 fp = VTOF(vp);
2459
2460                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2461                         bytesread = fp->ff_size;
2462                 else
2463                         bytesread = ap->a_size;
2464
2465                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2466                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2467                         hfs_lock(cp, HFS_FORCE_LOCK);
2468                         took_cnode_lock = 1;
2469                 }
2470                 /*
2471                  * If this file hasn't been seen since the start of
2472                  * the current sampling period then start over.
2473                  */
2474                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2475                         struct timeval tv;
2476
2477                         fp->ff_bytesread = bytesread;
2478                         microtime(&tv);
2479                         cp->c_atime = tv.tv_sec;
2480                 } else {
2481                         fp->ff_bytesread += bytesread;
2482                 }
2483                 cp->c_touch_acctime = TRUE;
2484                 if (took_cnode_lock)
2485                         hfs_unlock(cp);
2486         }
2487         return (error);
2488 }
2489
2490 /*
2491  * Pageout for HFS filesystem.
2492  */
2493 int
2494 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2495 /*
2496         struct vnop_pageout_args {
2497            vnode_t a_vp,
2498            upl_t         a_pl,
2499            vm_offset_t   a_pl_offset,
2500            off_t         a_f_offset,
2501            size_t        a_size,
2502            int           a_flags
2503            vfs_context_t a_context;
2504         };
2505 */
2506 {
2507         vnode_t vp = ap->a_vp;
2508         struct cnode *cp;
2509         struct filefork *fp;
2510         int retval;
2511         off_t end_of_range;
2512         off_t filesize;
2513
2514         cp = VTOC(vp);
2515         if (cp->c_lockowner == current_thread()) {
2516                 panic("pageout: %s cnode lock already held!\n",
2517                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2518         }
2519         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2520                 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2521                         ubc_upl_abort_range(ap->a_pl,
2522                                             ap->a_pl_offset,
2523                                             ap->a_size,
2524                                             UPL_ABORT_FREE_ON_EMPTY);
2525                 }
2526                 return (retval);
2527         }
2528         fp = VTOF(vp);
2529
2530         filesize = fp->ff_size;
2531         end_of_range = ap->a_f_offset + ap->a_size - 1;
2532
2533         if (end_of_range >= filesize) {
2534                 end_of_range = (off_t)(filesize - 1);
2535         }
2536         if (ap->a_f_offset < filesize) {
2537                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2538                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2539         }
2540         hfs_unlock(cp);
2541
2542         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2543                                  ap->a_size, filesize, ap->a_flags);
2544
2545         /*
2546          * If data was written, and setuid or setgid bits are set and
2547          * this process is not the superuser then clear the setuid and
2548          * setgid bits as a precaution against tampering.
2549          */
2550         if ((retval == 0) &&
2551             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2552             (vfs_context_suser(ap->a_context) != 0)) {
2553                 hfs_lock(cp, HFS_FORCE_LOCK);
2554                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2555                 cp->c_touch_chgtime = TRUE;
2556                 hfs_unlock(cp);
2557         }
2558         return (retval);
2559 }
2560
2561 /*
2562  * Intercept B-Tree node writes to unswap them if necessary.
2563  */
2564 int
2565 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2566 {
2567         int retval = 0;
2568         register struct buf *bp = ap->a_bp;
2569         register struct vnode *vp = buf_vnode(bp);
2570         BlockDescriptor block;
2571
2572         /* Trap B-Tree writes */
2573         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2574             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2575             (VTOC(vp)->c_fileid == kHFSAttributesFileID)) {
2576
2577                 /*
2578                  * Swap and validate the node if it is in native byte order.
2579                  * This is always be true on big endian, so we always validate
2580                  * before writing here.  On little endian, the node typically has
2581                  * been swapped and validatated when it was written to the journal,
2582                  * so we won't do anything here.
2583                  */
2584                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2585                         /* Prepare the block pointer */
2586                         block.blockHeader = bp;
2587                         block.buffer = (char *)buf_dataptr(bp);
2588                         block.blockNum = buf_lblkno(bp);
2589                         /* not found in cache ==> came from disk */
2590                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2591                         block.blockSize = buf_count(bp);
2592
2593                         /* Endian un-swap B-Tree node */
2594                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2595                         if (retval)
2596                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2597                 }
2598         }
2599
2600         /* This buffer shouldn't be locked anymore but if it is clear it */
2601         if ((buf_flags(bp) & B_LOCKED)) {
2602                 // XXXdbg
2603                 if (VTOHFS(vp)->jnl) {
2604                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2605                 }
2606                 buf_clearflags(bp, B_LOCKED);
2607         }
2608         retval = vn_bwrite (ap);
2609
2610         return (retval);
2611 }
2612
2613 /*
2614  * Relocate a file to a new location on disk
2615  *  cnode must be locked on entry
2616  *
2617  * Relocation occurs by cloning the file's data from its
2618  * current set of blocks to a new set of blocks. During
2619  * the relocation all of the blocks (old and new) are
2620  * owned by the file.
2621  *
2622  * -----------------
2623  * |///////////////|
2624  * -----------------
2625  * 0               N (file offset)
2626  *
2627  * -----------------     -----------------
2628  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2629  * -----------------     -----------------
2630  * 0               N     N+1             2N
2631  *
2632  * -----------------     -----------------
2633  * |///////////////|     |///////////////|     STEP 2 (clone data)
2634  * -----------------     -----------------
2635  * 0               N     N+1             2N
2636  *
2637  *                       -----------------
2638  *                       |///////////////|     STEP 3 (head truncate blocks)
2639  *                       -----------------
2640  *                       0               N
2641  *
2642  * During steps 2 and 3 page-outs to file offsets less
2643  * than or equal to N are suspended.
2644  *
2645  * During step 3 page-ins to the file get supended.
2646  */
2647 __private_extern__
2648 int
2649 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2650         struct  proc *p)
2651 {
2652         struct  cnode *cp;
2653         struct  filefork *fp;
2654         struct  hfsmount *hfsmp;
2655         u_int32_t  headblks;
2656         u_int32_t  datablks;
2657         u_int32_t  blksize;
2658         u_int32_t  growsize;
2659         u_int32_t  nextallocsave;
2660         daddr64_t  sector_a,  sector_b;
2661         int disabled_caching = 0;
2662         int eflags;
2663         off_t  newbytes;
2664         int  retval;
2665         int lockflags = 0;
2666         int took_trunc_lock = 0;
2667         int started_tr = 0;
2668         enum vtype vnodetype;
2669
2670         vnodetype = vnode_vtype(vp);
2671         if (vnodetype != VREG && vnodetype != VLNK) {
2672                 return (EPERM);
2673         }
2674
2675         hfsmp = VTOHFS(vp);
2676         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2677                 return (ENOSPC);
2678         }
2679
2680         cp = VTOC(vp);
2681         fp = VTOF(vp);
2682         if (fp->ff_unallocblocks)
2683                 return (EINVAL);
2684         blksize = hfsmp->blockSize;
2685         if (blockHint == 0)
2686                 blockHint = hfsmp->nextAllocation;
2687
2688         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2689             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2690                 return (EFBIG);
2691         }
2692
2693         //
2694         // We do not believe that this call to hfs_fsync() is
2695         // necessary and it causes a journal transaction
2696         // deadlock so we are removing it.
2697         //
2698         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2699         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2700         //      if (retval)
2701         //              return (retval);
2702         //}
2703
2704         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2705                 hfs_unlock(cp);
2706                 hfs_lock_truncate(cp, TRUE);
2707                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2708                         hfs_unlock_truncate(cp);
2709                         return (retval);
2710                 }
2711                 took_trunc_lock = 1;
2712         }
2713         headblks = fp->ff_blocks;
2714         datablks = howmany(fp->ff_size, blksize);
2715         growsize = datablks * blksize;
2716         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2717         if (blockHint >= hfsmp->hfs_metazone_start &&
2718             blockHint <= hfsmp->hfs_metazone_end)
2719                 eflags |= kEFMetadataMask;
2720
2721         if (hfs_start_transaction(hfsmp) != 0) {
2722                 if (took_trunc_lock)
2723                         hfs_unlock_truncate(cp);
2724             return (EINVAL);
2725         }
2726         started_tr = 1;
2727         /*
2728          * Protect the extents b-tree and the allocation bitmap
2729          * during MapFileBlockC and ExtendFileC operations.
2730          */
2731         lockflags = SFL_BITMAP;
2732         if (overflow_extents(fp))
2733                 lockflags |= SFL_EXTENTS;
2734         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2735
2736         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2737         if (retval) {
2738                 retval = MacToVFSError(retval);
2739                 goto out;
2740         }
2741
2742         /*
2743          * STEP 1 - aquire new allocation blocks.
2744          */
2745         if (!vnode_isnocache(vp)) {
2746                 vnode_setnocache(vp);
2747                 disabled_caching = 1;
2748
2749         }
2750         nextallocsave = hfsmp->nextAllocation;
2751         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2752         if (eflags & kEFMetadataMask) {
2753                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2754                 hfsmp->nextAllocation = nextallocsave;
2755                 hfsmp->vcbFlags |= 0xFF00;
2756                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2757         }
2758
2759         retval = MacToVFSError(retval);
2760         if (retval == 0) {
2761                 cp->c_flag |= C_MODIFIED;
2762                 if (newbytes < growsize) {
2763                         retval = ENOSPC;
2764                         goto restore;
2765                 } else if (fp->ff_blocks < (headblks + datablks)) {
2766                         printf("hfs_relocate: allocation failed");
2767                         retval = ENOSPC;
2768                         goto restore;
2769                 }
2770
2771                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2772                 if (retval) {
2773                         retval = MacToVFSError(retval);
2774                 } else if ((sector_a + 1) == sector_b) {
2775                         retval = ENOSPC;
2776                         goto restore;
2777                 } else if ((eflags & kEFMetadataMask) &&
2778                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2779                               hfsmp->hfs_metazone_end)) {
2780                         printf("hfs_relocate: didn't move into metadata zone\n");
2781                         retval = ENOSPC;
2782                         goto restore;
2783                 }
2784         }
2785         /* Done with system locks and journal for now. */
2786         hfs_systemfile_unlock(hfsmp, lockflags);
2787         lockflags = 0;
2788         hfs_end_transaction(hfsmp);
2789         started_tr = 0;
2790
2791         if (retval) {
2792                 /*
2793                  * Check to see if failure is due to excessive fragmentation.
2794                  */
2795                 if ((retval == ENOSPC) &&
2796                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2797                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2798                 }
2799                 goto out;
2800         }
2801         /*
2802          * STEP 2 - clone file data into the new allocation blocks.
2803          */
2804
2805         if (vnodetype == VLNK)
2806                 retval = hfs_clonelink(vp, blksize, cred, p);
2807         else if (vnode_issystem(vp))
2808                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2809         else
2810                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2811
2812         /* Start transaction for step 3 or for a restore. */
2813         if (hfs_start_transaction(hfsmp) != 0) {
2814                 retval = EINVAL;
2815                 goto out;
2816         }
2817         started_tr = 1;
2818         if (retval)
2819                 goto restore;
2820
2821         /*
2822          * STEP 3 - switch to cloned data and remove old blocks.
2823          */
2824         lockflags = SFL_BITMAP;
2825         if (overflow_extents(fp))
2826                 lockflags |= SFL_EXTENTS;
2827         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2828
2829         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2830
2831         hfs_systemfile_unlock(hfsmp, lockflags);
2832         lockflags = 0;
2833         if (retval)
2834                 goto restore;
2835 out:
2836         if (took_trunc_lock)
2837                 hfs_unlock_truncate(cp);
2838
2839         if (lockflags) {
2840                 hfs_systemfile_unlock(hfsmp, lockflags);
2841                 lockflags = 0;
2842         }
2843
2844         // See comment up above about calls to hfs_fsync()
2845         //
2846         //if (retval == 0)
2847         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2848
2849         if (hfsmp->jnl) {
2850                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2851                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2852                 else
2853                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2854         }
2855 exit:
2856         if (disabled_caching) {
2857                 vnode_clearnocache(vp);
2858         }
2859         if (started_tr)
2860                 hfs_end_transaction(hfsmp);
2861
2862         return (retval);
2863
2864 restore:
2865         if (fp->ff_blocks == headblks)
2866                 goto exit;
2867         /*
2868          * Give back any newly allocated space.
2869          */
2870         if (lockflags == 0) {
2871                 lockflags = SFL_BITMAP;
2872                 if (overflow_extents(fp))
2873                         lockflags |= SFL_EXTENTS;
2874                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2875         }
2876
2877         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2878
2879         hfs_systemfile_unlock(hfsmp, lockflags);
2880         lockflags = 0;
2881
2882         if (took_trunc_lock)
2883                 hfs_unlock_truncate(cp);
2884         goto exit;
2885 }
2886
2887
2888 /*
2889  * Clone a symlink.
2890  *
2891  */
2892 static int
2893 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2894 {
2895         struct buf *head_bp = NULL;
2896         struct buf *tail_bp = NULL;
2897         int error;
2898
2899
2900         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2901         if (error)
2902                 goto out;
2903
2904         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2905         if (tail_bp == NULL) {
2906                 error = EIO;
2907                 goto out;
2908         }
2909         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2910         error = (int)buf_bwrite(tail_bp);
2911 out:
2912         if (head_bp) {
2913                 buf_markinvalid(head_bp);
2914                 buf_brelse(head_bp);
2915         }
2916         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2917
2918         return (error);
2919 }
2920
2921 /*
2922  * Clone a file's data within the file.
2923  *
2924  */
2925 static int
2926 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2927 {
2928         caddr_t  bufp;
2929         size_t  writebase;
2930         size_t  bufsize;
2931         size_t  copysize;
2932         size_t  iosize;
2933         off_t   filesize;
2934         size_t  offset;
2935         uio_t auio;
2936         int  error = 0;
2937
2938         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2939         writebase = blkstart * blksize;
2940         copysize = blkcnt * blksize;
2941         iosize = bufsize = MIN(copysize, 4096 * 16);
2942         offset = 0;
2943
2944         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2945                 return (ENOMEM);
2946         }
2947         hfs_unlock(VTOC(vp));
2948
2949         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2950
2951         while (offset < copysize) {
2952                 iosize = MIN(copysize - offset, iosize);
2953
2954                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2955                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2956
2957                 error = cluster_read(vp, auio, copysize, 0);
2958                 if (error) {
2959                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2960                         break;
2961                 }
2962                 if (uio_resid(auio) != 0) {
2963                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2964                         error = EIO;
2965                         break;
2966                 }
2967
2968                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
2969                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2970
2971                 error = cluster_write(vp, auio, filesize + offset,
2972                                       filesize + offset + iosize,
2973                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
2974                 if (error) {
2975                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
2976                         break;
2977                 }
2978                 if (uio_resid(auio) != 0) {
2979                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
2980                         error = EIO;
2981                         break;
2982                 }
2983                 offset += iosize;
2984         }
2985         uio_free(auio);
2986
2987         /*
2988          * No need to call ubc_sync_range or hfs_invalbuf
2989          * since the file was copied using IO_NOCACHE.
2990          */
2991
2992         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
2993
2994         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
2995         return (error);
2996 }
2997
2998 /*
2999  * Clone a system (metadata) file.
3000  *
3001  */
3002 static int
3003 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3004                  kauth_cred_t cred, struct proc *p)
3005 {
3006         caddr_t  bufp;
3007         char * offset;
3008         size_t  bufsize;
3009         size_t  iosize;
3010         struct buf *bp = NULL;
3011         daddr64_t  blkno;
3012         daddr64_t  blk;
3013         daddr64_t  start_blk;
3014         daddr64_t  last_blk;
3015         int  breadcnt;
3016         int  i;
3017         int  error = 0;
3018
3019
3020         iosize = GetLogicalBlockSize(vp);
3021         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3022         breadcnt = bufsize / iosize;
3023
3024         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3025                 return (ENOMEM);
3026         }
3027         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3028         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3029         blkno = 0;
3030
3031         while (blkno < last_blk) {
3032                 /*
3033                  * Read up to a megabyte
3034                  */
3035                 offset = bufp;
3036                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3037                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3038                         if (error) {
3039                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3040                                 goto out;
3041                         }
3042                         if (buf_count(bp) != iosize) {
3043                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3044                                 goto out;
3045                         }
3046                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3047
3048                         buf_markinvalid(bp);
3049                         buf_brelse(bp);
3050                         bp = NULL;
3051
3052                         offset += iosize;
3053                 }
3054
3055                 /*
3056                  * Write up to a megabyte
3057                  */
3058                 offset = bufp;
3059                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3060                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3061                         if (bp == NULL) {
3062                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3063                                 error = EIO;
3064                                 goto out;
3065                         }
3066                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3067                         error = (int)buf_bwrite(bp);
3068                         bp = NULL;
3069                         if (error)
3070                                 goto out;
3071                         offset += iosize;
3072                 }
3073         }
3074 out:
3075         if (bp) {
3076                 buf_brelse(bp);
3077         }
3078
3079         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3080
3081         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3082
3083         return (error);
3084 }