bsd/hfs/hfs_readwrite.c

   1 /*
   2  * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 /*      @(#)hfs_readwrite.c     1.0
  29  *
  30  *      (c) 1998-2001 Apple Computer, Inc.  All Rights Reserved
  31  *
  32  *      hfs_readwrite.c -- vnode operations to deal with reading and writing files.
  33  *
  34  */
  35
  36 #include <sys/param.h>
  37 #include <sys/systm.h>
  38 #include <sys/resourcevar.h>
  39 #include <sys/kernel.h>
  40 #include <sys/fcntl.h>
  41 #include <sys/filedesc.h>
  42 #include <sys/stat.h>
  43 #include <sys/buf.h>
  44 #include <sys/proc.h>
  45 #include <sys/kauth.h>
  46 #include <sys/vnode.h>
  47 #include <sys/uio.h>
  48 #include <sys/vfs_context.h>
  49 #include <sys/disk.h>
  50 #include <sys/sysctl.h>
  51
  52 #include <miscfs/specfs/specdev.h>
  53
  54 #include <sys/ubc.h>
  55 #include <vm/vm_pageout.h>
  56 #include <vm/vm_kern.h>
  57
  58 #include <sys/kdebug.h>
  59
  60 #include        "hfs.h"
  61 #include        "hfs_endian.h"
  62 #include  "hfs_fsctl.h"
  63 #include        "hfs_quota.h"
  64 #include        "hfscommon/headers/FileMgrInternal.h"
  65 #include        "hfscommon/headers/BTreesInternal.h"
  66 #include        "hfs_cnode.h"
  67 #include        "hfs_dbg.h"
  68
  69 extern int overflow_extents(struct filefork *fp);
  70
  71 #define can_cluster(size) ((((size & (4096-1))) == 0) && (size <= (MAXPHYSIO/2)))
  72
  73 enum {
  74         MAXHFSFILESIZE = 0x7FFFFFFF             /* this needs to go in the mount structure */
  75 };
  76
  77 extern u_int32_t GetLogicalBlockSize(struct vnode *vp);
  78
  79 extern int  hfs_setextendedsecurity(struct hfsmount *, int);
  80
  81
  82 static int  hfs_clonelink(struct vnode *, int, kauth_cred_t, struct proc *);
  83 static int  hfs_clonefile(struct vnode *, int, int, int);
  84 static int  hfs_clonesysfile(struct vnode *, int, int, int, kauth_cred_t, struct proc *);
  85
  86
  87 int flush_cache_on_write = 0;
  88 SYSCTL_INT (_kern, OID_AUTO, flush_cache_on_write, CTLFLAG_RW, &flush_cache_on_write, 0, "always flush the drive cache on writes to uncached files");
  89
  90
  91 /*****************************************************************************
  92 *
  93 *       I/O Operations on vnodes
  94 *
  95 *****************************************************************************/
  96 int  hfs_vnop_read(struct vnop_read_args *);
  97 int  hfs_vnop_write(struct vnop_write_args *);
  98 int  hfs_vnop_ioctl(struct vnop_ioctl_args *);
  99 int  hfs_vnop_select(struct vnop_select_args *);
 100 int  hfs_vnop_blktooff(struct vnop_blktooff_args *);
 101 int  hfs_vnop_offtoblk(struct vnop_offtoblk_args *);
 102 int  hfs_vnop_blockmap(struct vnop_blockmap_args *);
 103 int  hfs_vnop_strategy(struct vnop_strategy_args *);
 104 int  hfs_vnop_allocate(struct vnop_allocate_args *);
 105 int  hfs_vnop_pagein(struct vnop_pagein_args *);
 106 int  hfs_vnop_pageout(struct vnop_pageout_args *);
 107 int  hfs_vnop_bwrite(struct vnop_bwrite_args *);
 108
 109
 110 /*
 111  * Read data from a file.
 112  */
 113 int
 114 hfs_vnop_read(struct vnop_read_args *ap)
 115 {
 116         uio_t uio = ap->a_uio;
 117         struct vnode *vp = ap->a_vp;
 118         struct cnode *cp;
 119         struct filefork *fp;
 120         struct hfsmount *hfsmp;
 121         off_t filesize;
 122         off_t filebytes;
 123         off_t start_resid = uio_resid(uio);
 124         off_t offset = uio_offset(uio);
 125         int retval = 0;
 126
 127
 128         /* Preflight checks */
 129         if (!vnode_isreg(vp)) {
 130                 /* can only read regular files */
 131                 if (vnode_isdir(vp))
 132                         return (EISDIR);
 133                 else
 134                         return (EPERM);
 135         }
 136         if (start_resid == 0)
 137                 return (0);             /* Nothing left to do */
 138         if (offset < 0)
 139                 return (EINVAL);        /* cant read from a negative offset */
 140
 141         cp = VTOC(vp);
 142         fp = VTOF(vp);
 143         hfsmp = VTOHFS(vp);
 144
 145         /* Protect against a size change. */
 146         hfs_lock_truncate(cp, 0);
 147
 148         filesize = fp->ff_size;
 149         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 150         if (offset > filesize) {
 151                 if ((hfsmp->hfs_flags & HFS_STANDARD) &&
 152                     (offset > (off_t)MAXHFSFILESIZE)) {
 153                         retval = EFBIG;
 154                 }
 155                 goto exit;
 156         }
 157
 158         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_START,
 159                 (int)uio_offset(uio), uio_resid(uio), (int)filesize, (int)filebytes, 0);
 160
 161         retval = cluster_read(vp, uio, filesize, 0);
 162
 163         cp->c_touch_acctime = TRUE;
 164
 165         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 12)) | DBG_FUNC_END,
 166                 (int)uio_offset(uio), uio_resid(uio), (int)filesize,  (int)filebytes, 0);
 167
 168         /*
 169          * Keep track blocks read
 170          */
 171         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && retval == 0) {
 172                 int took_cnode_lock = 0;
 173                 off_t bytesread;
 174
 175                 bytesread = start_resid - uio_resid(uio);
 176
 177                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
 178                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
 179                         hfs_lock(cp, HFS_FORCE_LOCK);
 180                         took_cnode_lock = 1;
 181                 }
 182                 /*
 183                  * If this file hasn't been seen since the start of
 184                  * the current sampling period then start over.
 185                  */
 186                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
 187                         struct timeval tv;
 188
 189                         fp->ff_bytesread = bytesread;
 190                         microtime(&tv);
 191                         cp->c_atime = tv.tv_sec;
 192                 } else {
 193                         fp->ff_bytesread += bytesread;
 194                 }
 195                 if (took_cnode_lock)
 196                         hfs_unlock(cp);
 197         }
 198 exit:
 199         hfs_unlock_truncate(cp);
 200         return (retval);
 201 }
 202
 203 /*
 204  * Write data to a file.
 205  */
 206 int
 207 hfs_vnop_write(struct vnop_write_args *ap)
 208 {
 209         uio_t uio = ap->a_uio;
 210         struct vnode *vp = ap->a_vp;
 211         struct cnode *cp;
 212         struct filefork *fp;
 213         struct hfsmount *hfsmp;
 214         kauth_cred_t cred = NULL;
 215         off_t origFileSize;
 216         off_t writelimit;
 217         off_t bytesToAdd;
 218         off_t actualBytesAdded;
 219         off_t filebytes;
 220         off_t offset;
 221         size_t resid;
 222         int eflags;
 223         int ioflag = ap->a_ioflag;
 224         int retval = 0;
 225         int lockflags;
 226         int cnode_locked = 0;
 227
 228         // LP64todo - fix this! uio_resid may be 64-bit value
 229         resid = uio_resid(uio);
 230         offset = uio_offset(uio);
 231
 232         if (offset < 0)
 233                 return (EINVAL);
 234         if (resid == 0)
 235                 return (E_NONE);
 236         if (!vnode_isreg(vp))
 237                 return (EPERM);  /* Can only write regular files */
 238
 239         /* Protect against a size change. */
 240         hfs_lock_truncate(VTOC(vp), TRUE);
 241
 242         if ( (retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
 243                 hfs_unlock_truncate(VTOC(vp));
 244                 return (retval);
 245         }
 246         cnode_locked = 1;
 247         cp = VTOC(vp);
 248         fp = VTOF(vp);
 249         hfsmp = VTOHFS(vp);
 250         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 251
 252         if (ioflag & IO_APPEND) {
 253                 uio_setoffset(uio, fp->ff_size);
 254                 offset = fp->ff_size;
 255         }
 256         if ((cp->c_flags & APPEND) && offset != fp->ff_size) {
 257                 retval = EPERM;
 258                 goto exit;
 259         }
 260
 261         origFileSize = fp->ff_size;
 262         eflags = kEFDeferMask;  /* defer file block allocations */
 263
 264 #ifdef HFS_SPARSE_DEV
 265         /*
 266          * When the underlying device is sparse and space
 267          * is low (< 8MB), stop doing delayed allocations
 268          * and begin doing synchronous I/O.
 269          */
 270         if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
 271             (hfs_freeblks(hfsmp, 0) < 2048)) {
 272                 eflags &= ~kEFDeferMask;
 273                 ioflag |= IO_SYNC;
 274         }
 275 #endif /* HFS_SPARSE_DEV */
 276
 277         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_START,
 278                 (int)offset, uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 279
 280         /* Now test if we need to extend the file */
 281         /* Doing so will adjust the filebytes for us */
 282
 283         writelimit = offset + resid;
 284         if (writelimit <= filebytes)
 285                 goto sizeok;
 286
 287         cred = vfs_context_ucred(ap->a_context);
 288 #if QUOTA
 289         bytesToAdd = writelimit - filebytes;
 290         retval = hfs_chkdq(cp, (int64_t)(roundup(bytesToAdd, hfsmp->blockSize)),
 291                            cred, 0);
 292         if (retval)
 293                 goto exit;
 294 #endif /* QUOTA */
 295
 296         if (hfs_start_transaction(hfsmp) != 0) {
 297                 retval = EINVAL;
 298                 goto exit;
 299         }
 300
 301         while (writelimit > filebytes) {
 302                 bytesToAdd = writelimit - filebytes;
 303                 if (cred && suser(cred, NULL) != 0)
 304                         eflags |= kEFReserveMask;
 305
 306                 /* Protect extents b-tree and allocation bitmap */
 307                 lockflags = SFL_BITMAP;
 308                 if (overflow_extents(fp))
 309                         lockflags |= SFL_EXTENTS;
 310                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
 311
 312                 /* Files that are changing size are not hot file candidates. */
 313                 if (hfsmp->hfc_stage == HFC_RECORDING) {
 314                         fp->ff_bytesread = 0;
 315                 }
 316                 retval = MacToVFSError(ExtendFileC (hfsmp, (FCB*)fp, bytesToAdd,
 317                                 0, eflags, &actualBytesAdded));
 318
 319                 hfs_systemfile_unlock(hfsmp, lockflags);
 320
 321                 if ((actualBytesAdded == 0) && (retval == E_NONE))
 322                         retval = ENOSPC;
 323                 if (retval != E_NONE)
 324                         break;
 325                 filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 326                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_NONE,
 327                         (int)offset, uio_resid(uio), (int)fp->ff_size,  (int)filebytes, 0);
 328         }
 329         (void) hfs_update(vp, TRUE);
 330         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
 331         (void) hfs_end_transaction(hfsmp);
 332
 333 sizeok:
 334         if (retval == E_NONE) {
 335                 off_t filesize;
 336                 off_t zero_off;
 337                 off_t tail_off;
 338                 off_t inval_start;
 339                 off_t inval_end;
 340                 off_t io_start;
 341                 int lflag;
 342                 struct rl_entry *invalid_range;
 343
 344                 if (writelimit > fp->ff_size)
 345                         filesize = writelimit;
 346                 else
 347                         filesize = fp->ff_size;
 348
 349                 lflag = (ioflag & IO_SYNC);
 350
 351                 if (offset <= fp->ff_size) {
 352                         zero_off = offset & ~PAGE_MASK_64;
 353
 354                         /* Check to see whether the area between the zero_offset and the start
 355                            of the transfer to see whether is invalid and should be zero-filled
 356                            as part of the transfer:
 357                          */
 358                         if (offset > zero_off) {
 359                                 if (rl_scan(&fp->ff_invalidranges, zero_off, offset - 1, &invalid_range) != RL_NOOVERLAP)
 360                                         lflag |= IO_HEADZEROFILL;
 361                         }
 362                 } else {
 363                         off_t eof_page_base = fp->ff_size & ~PAGE_MASK_64;
 364
 365                         /* The bytes between fp->ff_size and uio->uio_offset must never be
 366                            read without being zeroed.  The current last block is filled with zeroes
 367                            if it holds valid data but in all cases merely do a little bookkeeping
 368                            to track the area from the end of the current last page to the start of
 369                            the area actually written.  For the same reason only the bytes up to the
 370                            start of the page where this write will start is invalidated; any remainder
 371                            before uio->uio_offset is explicitly zeroed as part of the cluster_write.
 372
 373                            Note that inval_start, the start of the page after the current EOF,
 374                            may be past the start of the write, in which case the zeroing
 375                            will be handled by the cluser_write of the actual data.
 376                          */
 377                         inval_start = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 378                         inval_end = offset & ~PAGE_MASK_64;
 379                         zero_off = fp->ff_size;
 380
 381                         if ((fp->ff_size & PAGE_MASK_64) &&
 382                                 (rl_scan(&fp->ff_invalidranges,
 383                                                         eof_page_base,
 384                                                         fp->ff_size - 1,
 385                                                         &invalid_range) != RL_NOOVERLAP)) {
 386                                 /* The page containing the EOF is not valid, so the
 387                                    entire page must be made inaccessible now.  If the write
 388                                    starts on a page beyond the page containing the eof
 389                                    (inval_end > eof_page_base), add the
 390                                    whole page to the range to be invalidated.  Otherwise
 391                                    (i.e. if the write starts on the same page), zero-fill
 392                                    the entire page explicitly now:
 393                                  */
 394                                 if (inval_end > eof_page_base) {
 395                                         inval_start = eof_page_base;
 396                                 } else {
 397                                         zero_off = eof_page_base;
 398                                 };
 399                         };
 400
 401                         if (inval_start < inval_end) {
 402                                 struct timeval tv;
 403                                 /* There's some range of data that's going to be marked invalid */
 404
 405                                 if (zero_off < inval_start) {
 406                                         /* The pages between inval_start and inval_end are going to be invalidated,
 407                                            and the actual write will start on a page past inval_end.  Now's the last
 408                                            chance to zero-fill the page containing the EOF:
 409                                          */
 410                                         hfs_unlock(cp);
 411                                         cnode_locked = 0;
 412                                         retval = cluster_write(vp, (uio_t) 0,
 413                                                         fp->ff_size, inval_start,
 414                                                         zero_off, (off_t)0,
 415                                                         lflag | IO_HEADZEROFILL | IO_NOZERODIRTY);
 416                                         hfs_lock(cp, HFS_FORCE_LOCK);
 417                                         cnode_locked = 1;
 418                                         if (retval) goto ioerr_exit;
 419                                         offset = uio_offset(uio);
 420                                 };
 421
 422                                 /* Mark the remaining area of the newly allocated space as invalid: */
 423                                 rl_add(inval_start, inval_end - 1 , &fp->ff_invalidranges);
 424                                 microuptime(&tv);
 425                                 cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
 426                                 zero_off = fp->ff_size = inval_end;
 427                         };
 428
 429                         if (offset > zero_off) lflag |= IO_HEADZEROFILL;
 430                 };
 431
 432                 /* Check to see whether the area between the end of the write and the end of
 433                    the page it falls in is invalid and should be zero-filled as part of the transfer:
 434                  */
 435                 tail_off = (writelimit + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
 436                 if (tail_off > filesize) tail_off = filesize;
 437                 if (tail_off > writelimit) {
 438                         if (rl_scan(&fp->ff_invalidranges, writelimit, tail_off - 1, &invalid_range) != RL_NOOVERLAP) {
 439                                 lflag |= IO_TAILZEROFILL;
 440                         };
 441                 };
 442
 443                 /*
 444                  * if the write starts beyond the current EOF (possibly advanced in the
 445                  * zeroing of the last block, above), then we'll zero fill from the current EOF
 446                  * to where the write begins:
 447                  *
 448                  * NOTE: If (and ONLY if) the portion of the file about to be written is
 449                  *       before the current EOF it might be marked as invalid now and must be
 450                  *       made readable (removed from the invalid ranges) before cluster_write
 451                  *       tries to write it:
 452                  */
 453                 io_start = (lflag & IO_HEADZEROFILL) ? zero_off : offset;
 454                 if (io_start < fp->ff_size) {
 455                         off_t io_end;
 456
 457                         io_end = (lflag & IO_TAILZEROFILL) ? tail_off : writelimit;
 458                         rl_remove(io_start, io_end - 1, &fp->ff_invalidranges);
 459                 };
 460
 461                 hfs_unlock(cp);
 462                 cnode_locked = 0;
 463                 retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
 464                                 tail_off, lflag | IO_NOZERODIRTY);
 465                 offset = uio_offset(uio);
 466                 if (offset > fp->ff_size) {
 467                         fp->ff_size = offset;
 468
 469                         ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
 470                         /* Files that are changing size are not hot file candidates. */
 471                         if (hfsmp->hfc_stage == HFC_RECORDING)
 472                                 fp->ff_bytesread = 0;
 473                 }
 474                 if (resid > uio_resid(uio)) {
 475                         cp->c_touch_chgtime = TRUE;
 476                         cp->c_touch_modtime = TRUE;
 477                 }
 478         }
 479
 480         // XXXdbg - testing for vivek and paul lambert
 481         {
 482             if (flush_cache_on_write && ((ioflag & IO_NOCACHE) || vnode_isnocache(vp))) {
 483                 VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, NULL);
 484             }
 485         }
 486         HFS_KNOTE(vp, NOTE_WRITE);
 487
 488 ioerr_exit:
 489         /*
 490          * If we successfully wrote any data, and we are not the superuser
 491          * we clear the setuid and setgid bits as a precaution against
 492          * tampering.
 493          */
 494         if (cp->c_mode & (S_ISUID | S_ISGID)) {
 495                 cred = vfs_context_ucred(ap->a_context);
 496                 if (resid > uio_resid(uio) && cred && suser(cred, NULL)) {
 497                         if (!cnode_locked) {
 498                                 hfs_lock(cp, HFS_FORCE_LOCK);
 499                                 cnode_locked = 1;
 500                         }
 501                         cp->c_mode &= ~(S_ISUID | S_ISGID);
 502                 }
 503         }
 504         if (retval) {
 505                 if (ioflag & IO_UNIT) {
 506                         if (!cnode_locked) {
 507                                 hfs_lock(cp, HFS_FORCE_LOCK);
 508                                 cnode_locked = 1;
 509                         }
 510                         (void)hfs_truncate(vp, origFileSize, ioflag & IO_SYNC,
 511                                            0, ap->a_context);
 512                         // LP64todo - fix this!  resid needs to by user_ssize_t
 513                         uio_setoffset(uio, (uio_offset(uio) - (resid - uio_resid(uio))));
 514                         uio_setresid(uio, resid);
 515                         filebytes = (off_t)fp->ff_blocks * (off_t)hfsmp->blockSize;
 516                 }
 517         } else if ((ioflag & IO_SYNC) && (resid > uio_resid(uio))) {
 518                 if (!cnode_locked) {
 519                         hfs_lock(cp, HFS_FORCE_LOCK);
 520                         cnode_locked = 1;
 521                 }
 522                 retval = hfs_update(vp, TRUE);
 523         }
 524         /* Updating vcbWrCnt doesn't need to be atomic. */
 525         hfsmp->vcbWrCnt++;
 526
 527         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 0)) | DBG_FUNC_END,
 528                 (int)uio_offset(uio), uio_resid(uio), (int)fp->ff_size, (int)filebytes, 0);
 529 exit:
 530         if (cnode_locked)
 531                 hfs_unlock(cp);
 532         hfs_unlock_truncate(cp);
 533         return (retval);
 534 }
 535
 536 /* support for the "bulk-access" fcntl */
 537
 538 #define CACHE_ELEMS 64
 539 #define CACHE_LEVELS 16
 540 #define PARENT_IDS_FLAG 0x100
 541
 542 /* from hfs_attrlist.c */
 543 extern unsigned long DerivePermissionSummary(uid_t obj_uid, gid_t obj_gid,
 544                         mode_t obj_mode, struct mount *mp,
 545                         kauth_cred_t cred, struct proc *p);
 546
 547 /* from vfs/vfs_fsevents.c */
 548 extern char *get_pathbuff(void);
 549 extern void release_pathbuff(char *buff);
 550
 551 struct access_cache {
 552        int numcached;
 553        int cachehits; /* these two for statistics gathering */
 554        int lookups;
 555        unsigned int *acache;
 556        Boolean *haveaccess;
 557 };
 558
 559 struct access_t {
 560         uid_t     uid;              /* IN: effective user id */
 561         short     flags;            /* IN: access requested (i.e. R_OK) */
 562         short     num_groups;       /* IN: number of groups user belongs to */
 563         int       num_files;        /* IN: number of files to process */
 564         int       *file_ids;        /* IN: array of file ids */
 565         gid_t     *groups;          /* IN: array of groups */
 566         short     *access;          /* OUT: access info for each file (0 for 'has access') */
 567 };
 568
 569 struct user_access_t {
 570         uid_t           uid;                    /* IN: effective user id */
 571         short           flags;                  /* IN: access requested (i.e. R_OK) */
 572         short           num_groups;             /* IN: number of groups user belongs to */
 573         int                     num_files;              /* IN: number of files to process */
 574         user_addr_t     file_ids;               /* IN: array of file ids */
 575         user_addr_t     groups;                 /* IN: array of groups */
 576         user_addr_t     access;                 /* OUT: access info for each file (0 for 'has access') */
 577 };
 578
 579 /*
 580  * Perform a binary search for the given parent_id. Return value is
 581  * found/not found boolean, and indexp will be the index of the item
 582  * or the index at which to insert the item if it's not found.
 583  */
 584 static int
 585 lookup_bucket(struct access_cache *cache, int *indexp, cnid_t parent_id)
 586 {
 587         unsigned int lo, hi;
 588         int index, matches = 0;
 589
 590         if (cache->numcached == 0) {
 591                 *indexp = 0;
 592                 return 0; // table is empty, so insert at index=0 and report no match
 593         }
 594
 595         if (cache->numcached > CACHE_ELEMS) {
 596                 /*printf("EGAD! numcached is %d... cut our losses and trim to %d\n",
 597                   cache->numcached, CACHE_ELEMS);*/
 598                 cache->numcached = CACHE_ELEMS;
 599         }
 600
 601         lo = 0;
 602         hi = cache->numcached - 1;
 603         index = -1;
 604
 605         /* perform binary search for parent_id */
 606         do {
 607                 unsigned int mid = (hi - lo)/2 + lo;
 608                 unsigned int this_id = cache->acache[mid];
 609
 610                 if (parent_id == this_id) {
 611                         index = mid;
 612                         break;
 613                 }
 614
 615                 if (parent_id < this_id) {
 616                         hi = mid;
 617                         continue;
 618                 }
 619
 620                 if (parent_id > this_id) {
 621                         lo = mid + 1;
 622                         continue;
 623                 }
 624         } while(lo < hi);
 625
 626         /* check if lo and hi converged on the match */
 627         if (parent_id == cache->acache[hi]) {
 628                 index = hi;
 629         }
 630
 631         /* if no existing entry found, find index for new one */
 632         if (index == -1) {
 633                 index = (parent_id < cache->acache[hi]) ? hi : hi + 1;
 634                 matches = 0;
 635         } else {
 636                 matches = 1;
 637         }
 638
 639         *indexp = index;
 640         return matches;
 641 }
 642
 643 /*
 644  * Add a node to the access_cache at the given index (or do a lookup first
 645  * to find the index if -1 is passed in). We currently do a replace rather
 646  * than an insert if the cache is full.
 647  */
 648 static void
 649 add_node(struct access_cache *cache, int index, cnid_t nodeID, int access)
 650 {
 651        int lookup_index = -1;
 652
 653        /* need to do a lookup first if -1 passed for index */
 654        if (index == -1) {
 655                if (lookup_bucket(cache, &lookup_index, nodeID)) {
 656                        if (cache->haveaccess[lookup_index] != access) {
 657                                /* change access info for existing entry... should never happen */
 658                                cache->haveaccess[lookup_index] = access;
 659                        }
 660
 661                        /* mission accomplished */
 662                        return;
 663                } else {
 664                        index = lookup_index;
 665                }
 666
 667        }
 668
 669        /* if the cache is full, do a replace rather than an insert */
 670        if (cache->numcached >= CACHE_ELEMS) {
 671                //printf("cache is full (%d). replace at index %d\n", cache->numcached, index);
 672                cache->numcached = CACHE_ELEMS-1;
 673
 674                if (index > cache->numcached) {
 675                  //    printf("index %d pinned to %d\n", index, cache->numcached);
 676                        index = cache->numcached;
 677                }
 678        } else if (index >= 0 && index < cache->numcached) {
 679                /* only do bcopy if we're inserting */
 680                bcopy( cache->acache+index, cache->acache+(index+1), (cache->numcached - index)*sizeof(int) );
 681                bcopy( cache->haveaccess+index, cache->haveaccess+(index+1), (cache->numcached - index)*sizeof(Boolean) );
 682        }
 683
 684        cache->acache[index] = nodeID;
 685        cache->haveaccess[index] = access;
 686        cache->numcached++;
 687 }
 688
 689
 690 struct cinfo {
 691         uid_t   uid;
 692         gid_t   gid;
 693         mode_t  mode;
 694         cnid_t  parentcnid;
 695 };
 696
 697 static int
 698 snoop_callback(const struct cat_desc *descp, const struct cat_attr *attrp, void * arg)
 699 {
 700         struct cinfo *cip = (struct cinfo *)arg;
 701
 702         cip->uid = attrp->ca_uid;
 703         cip->gid = attrp->ca_gid;
 704         cip->mode = attrp->ca_mode;
 705         cip->parentcnid = descp->cd_parentcnid;
 706
 707         return (0);
 708 }
 709
 710 /*
 711  * Lookup the cnid's attr info (uid, gid, and mode) as well as its parent id. If the item
 712  * isn't incore, then go to the catalog.
 713  */
 714 static int
 715 do_attr_lookup(struct hfsmount *hfsmp, struct access_cache *cache, dev_t dev, cnid_t cnid,
 716                struct cnode *skip_cp, CatalogKey *keyp, struct cat_attr *cnattrp, struct proc *p)
 717 {
 718         int error = 0;
 719
 720         /* if this id matches the one the fsctl was called with, skip the lookup */
 721         if (cnid == skip_cp->c_cnid) {
 722                 cnattrp->ca_uid = skip_cp->c_uid;
 723                 cnattrp->ca_gid = skip_cp->c_gid;
 724                 cnattrp->ca_mode = skip_cp->c_mode;
 725                 keyp->hfsPlus.parentID = skip_cp->c_parentcnid;
 726         } else {
 727                 struct cinfo c_info;
 728
 729                 /* otherwise, check the cnode hash incase the file/dir is incore */
 730                 if (hfs_chash_snoop(dev, cnid, snoop_callback, &c_info) == 0) {
 731                         cnattrp->ca_uid = c_info.uid;
 732                         cnattrp->ca_gid = c_info.gid;
 733                         cnattrp->ca_mode = c_info.mode;
 734                         keyp->hfsPlus.parentID = c_info.parentcnid;
 735                 } else {
 736                         int lockflags;
 737
 738                         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
 739
 740                         /* lookup this cnid in the catalog */
 741                         error = cat_getkeyplusattr(hfsmp, cnid, keyp, cnattrp);
 742
 743                         hfs_systemfile_unlock(hfsmp, lockflags);
 744
 745                         cache->lookups++;
 746                 }
 747         }
 748
 749         return (error);
 750 }
 751
 752 /*
 753  * Compute whether we have access to the given directory (nodeID) and all its parents. Cache
 754  * up to CACHE_LEVELS as we progress towards the root.
 755  */
 756 static int
 757 do_access_check(struct hfsmount *hfsmp, int *err, struct access_cache *cache, HFSCatalogNodeID nodeID,
 758                 struct cnode *skip_cp, struct proc *theProcPtr, kauth_cred_t myp_ucred, dev_t dev )
 759 {
 760        int                     myErr = 0;
 761        int                     myResult;
 762        HFSCatalogNodeID        thisNodeID;
 763        unsigned long           myPerms;
 764        struct cat_attr         cnattr;
 765        int                     cache_index = -1;
 766        CatalogKey              catkey;
 767
 768        int i = 0, ids_to_cache = 0;
 769        int parent_ids[CACHE_LEVELS];
 770
 771        /* root always has access */
 772        if (!suser(myp_ucred, NULL)) {
 773                return (1);
 774        }
 775
 776        thisNodeID = nodeID;
 777        while (thisNodeID >=  kRootDirID) {
 778                myResult = 0;   /* default to "no access" */
 779
 780                /* check the cache before resorting to hitting the catalog */
 781
 782                /* ASSUMPTION: access info of cached entries is "final"... i.e. no need
 783                 * to look any further after hitting cached dir */
 784
 785                if (lookup_bucket(cache, &cache_index, thisNodeID)) {
 786                        cache->cachehits++;
 787                        myResult = cache->haveaccess[cache_index];
 788                        goto ExitThisRoutine;
 789                }
 790
 791                /* remember which parents we want to cache */
 792                if (ids_to_cache < CACHE_LEVELS) {
 793                        parent_ids[ids_to_cache] = thisNodeID;
 794                        ids_to_cache++;
 795                }
 796
 797                /* do the lookup (checks the cnode hash, then the catalog) */
 798                myErr = do_attr_lookup(hfsmp, cache, dev, thisNodeID, skip_cp, &catkey, &cnattr, theProcPtr);
 799                if (myErr) {
 800                        goto ExitThisRoutine; /* no access */
 801                }
 802
 803                myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
 804                                                  cnattr.ca_mode, hfsmp->hfs_mp,
 805                                                  myp_ucred, theProcPtr);
 806
 807                if ( (myPerms & X_OK) == 0 ) {
 808                        myResult = 0;
 809                        goto ExitThisRoutine;   /* no access */
 810                }
 811
 812                /* up the hierarchy we go */
 813                thisNodeID = catkey.hfsPlus.parentID;
 814        }
 815
 816        /* if here, we have access to this node */
 817        myResult = 1;
 818
 819  ExitThisRoutine:
 820        if (myErr) {
 821                //printf("*** error %d from catalog looking up parent %d/%d!\n", myErr, dev, thisNodeID);
 822                myResult = 0;
 823        }
 824        *err = myErr;
 825
 826        /* cache the parent directory(ies) */
 827        for (i = 0; i < ids_to_cache; i++) {
 828                /* small optimization: get rid of double-lookup for all these */
 829                // printf("adding %d to cache with result: %d\n", parent_ids[i], myResult);
 830                add_node(cache, -1, parent_ids[i], myResult);
 831        }
 832
 833        return (myResult);
 834 }
 835 /* end "bulk-access" support */
 836
 837
 838
 839 /*
 840  * Callback for use with freeze ioctl.
 841  */
 842 static int
 843 hfs_freezewrite_callback(struct vnode *vp, void *cargs)
 844 {
 845         vnode_waitforwrites(vp, 0, 0, 0, "hfs freeze");
 846
 847         return 0;
 848 }
 849
 850 /*
 851  * Control filesystem operating characteristics.
 852  */
 853 int
 854 hfs_vnop_ioctl( struct vnop_ioctl_args /* {
 855                 vnode_t a_vp;
 856                 int  a_command;
 857                 caddr_t  a_data;
 858                 int  a_fflag;
 859                 vfs_context_t a_context;
 860         } */ *ap)
 861 {
 862         struct vnode * vp = ap->a_vp;
 863         struct hfsmount *hfsmp = VTOHFS(vp);
 864         vfs_context_t context = ap->a_context;
 865         kauth_cred_t cred = vfs_context_ucred(context);
 866         proc_t p = vfs_context_proc(context);
 867         struct vfsstatfs *vfsp;
 868         boolean_t is64bit;
 869
 870         is64bit = proc_is64bit(p);
 871
 872         switch (ap->a_command) {
 873
 874         case HFS_RESIZE_PROGRESS: {
 875
 876                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 877                 if (suser(cred, NULL) &&
 878                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 879                         return (EACCES); /* must be owner of file system */
 880                 }
 881                 if (!vnode_isvroot(vp)) {
 882                         return (EINVAL);
 883                 }
 884                 return hfs_resize_progress(hfsmp, (u_int32_t *)ap->a_data);
 885         }
 886         case HFS_RESIZE_VOLUME: {
 887                 u_int64_t newsize;
 888                 u_int64_t cursize;
 889
 890                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 891                 if (suser(cred, NULL) &&
 892                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 893                         return (EACCES); /* must be owner of file system */
 894                 }
 895                 if (!vnode_isvroot(vp)) {
 896                         return (EINVAL);
 897                 }
 898                 newsize = *(u_int64_t *)ap->a_data;
 899                 cursize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 900
 901                 if (newsize > cursize) {
 902                         return hfs_extendfs(hfsmp, *(u_int64_t *)ap->a_data, context);
 903                 } else if (newsize < cursize) {
 904                         return hfs_truncatefs(hfsmp, *(u_int64_t *)ap->a_data, context);
 905                 } else {
 906                         return (0);
 907                 }
 908         }
 909         case HFS_CHANGE_NEXT_ALLOCATION: {
 910                 u_int32_t location;
 911
 912                 if (vnode_vfsisrdonly(vp)) {
 913                         return (EROFS);
 914                 }
 915                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 916                 if (suser(cred, NULL) &&
 917                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 918                         return (EACCES); /* must be owner of file system */
 919                 }
 920                 if (!vnode_isvroot(vp)) {
 921                         return (EINVAL);
 922                 }
 923                 location = *(u_int32_t *)ap->a_data;
 924                 if (location > hfsmp->totalBlocks - 1) {
 925                         return (EINVAL);
 926                 }
 927                 /* Return previous value. */
 928                 *(u_int32_t *)ap->a_data = hfsmp->nextAllocation;
 929                 HFS_MOUNT_LOCK(hfsmp, TRUE);
 930                 hfsmp->nextAllocation = location;
 931                 hfsmp->vcbFlags |= 0xFF00;
 932                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
 933                 return (0);
 934         }
 935
 936 #ifdef HFS_SPARSE_DEV
 937         case HFS_SETBACKINGSTOREINFO: {
 938                 struct vnode * bsfs_rootvp;
 939                 struct vnode * di_vp;
 940                 struct hfs_backingstoreinfo *bsdata;
 941                 int error = 0;
 942
 943                 if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 944                         return (EALREADY);
 945                 }
 946                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 947                 if (suser(cred, NULL) &&
 948                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 949                         return (EACCES); /* must be owner of file system */
 950                 }
 951                 bsdata = (struct hfs_backingstoreinfo *)ap->a_data;
 952                 if (bsdata == NULL) {
 953                         return (EINVAL);
 954                 }
 955                 if ((error = file_vnode(bsdata->backingfd, &di_vp))) {
 956                         return (error);
 957                 }
 958                 if ((error = vnode_getwithref(di_vp))) {
 959                         file_drop(bsdata->backingfd);
 960                         return(error);
 961                 }
 962
 963                 if (vnode_mount(vp) == vnode_mount(di_vp)) {
 964                         (void)vnode_put(di_vp);
 965                         file_drop(bsdata->backingfd);
 966                         return (EINVAL);
 967                 }
 968
 969                 /*
 970                  * Obtain the backing fs root vnode and keep a reference
 971                  * on it.  This reference will be dropped in hfs_unmount.
 972                  */
 973                 error = VFS_ROOT(vnode_mount(di_vp), &bsfs_rootvp, NULL); /* XXX use context! */
 974                 if (error) {
 975                         (void)vnode_put(di_vp);
 976                         file_drop(bsdata->backingfd);
 977                         return (error);
 978                 }
 979                 vnode_ref(bsfs_rootvp);
 980                 vnode_put(bsfs_rootvp);
 981
 982                 hfsmp->hfs_backingfs_rootvp = bsfs_rootvp;
 983                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 984                 hfsmp->hfs_sparsebandblks = bsdata->bandsize / HFSTOVCB(hfsmp)->blockSize;
 985                 hfsmp->hfs_sparsebandblks *= 4;
 986
 987                 (void)vnode_put(di_vp);
 988                 file_drop(bsdata->backingfd);
 989                 return (0);
 990         }
 991         case HFS_CLRBACKINGSTOREINFO: {
 992                 struct vnode * tmpvp;
 993
 994                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
 995                 if (suser(cred, NULL) &&
 996                         kauth_cred_getuid(cred) != vfsp->f_owner) {
 997                         return (EACCES); /* must be owner of file system */
 998                 }
 999                 if ((hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) &&
1000                     hfsmp->hfs_backingfs_rootvp) {
1001
1002                         hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
1003                         tmpvp = hfsmp->hfs_backingfs_rootvp;
1004                         hfsmp->hfs_backingfs_rootvp = NULLVP;
1005                         hfsmp->hfs_sparsebandblks = 0;
1006                         vnode_rele(tmpvp);
1007                 }
1008                 return (0);
1009         }
1010 #endif /* HFS_SPARSE_DEV */
1011
1012         case F_FREEZE_FS: {
1013                 struct mount *mp;
1014                 task_t task;
1015
1016                 if (!is_suser())
1017                         return (EACCES);
1018
1019                 mp = vnode_mount(vp);
1020                 hfsmp = VFSTOHFS(mp);
1021
1022                 if (!(hfsmp->jnl))
1023                         return (ENOTSUP);
1024
1025                 lck_rw_lock_exclusive(&hfsmp->hfs_insync);
1026
1027                 task = current_task();
1028                 task_working_set_disable(task);
1029
1030                 // flush things before we get started to try and prevent
1031                 // dirty data from being paged out while we're frozen.
1032                 // note: can't do this after taking the lock as it will
1033                 // deadlock against ourselves.
1034                 vnode_iterate(mp, 0, hfs_freezewrite_callback, NULL);
1035                 hfs_global_exclusive_lock_acquire(hfsmp);
1036                 journal_flush(hfsmp->jnl);
1037
1038                 // don't need to iterate on all vnodes, we just need to
1039                 // wait for writes to the system files and the device vnode
1040                 if (HFSTOVCB(hfsmp)->extentsRefNum)
1041                     vnode_waitforwrites(HFSTOVCB(hfsmp)->extentsRefNum, 0, 0, 0, "hfs freeze");
1042                 if (HFSTOVCB(hfsmp)->catalogRefNum)
1043                     vnode_waitforwrites(HFSTOVCB(hfsmp)->catalogRefNum, 0, 0, 0, "hfs freeze");
1044                 if (HFSTOVCB(hfsmp)->allocationsRefNum)
1045                     vnode_waitforwrites(HFSTOVCB(hfsmp)->allocationsRefNum, 0, 0, 0, "hfs freeze");
1046                 if (hfsmp->hfs_attribute_vp)
1047                     vnode_waitforwrites(hfsmp->hfs_attribute_vp, 0, 0, 0, "hfs freeze");
1048                 vnode_waitforwrites(hfsmp->hfs_devvp, 0, 0, 0, "hfs freeze");
1049
1050                 hfsmp->hfs_freezing_proc = current_proc();
1051
1052                 return (0);
1053         }
1054
1055         case F_THAW_FS: {
1056                 if (!is_suser())
1057                         return (EACCES);
1058
1059                 // if we're not the one who froze the fs then we
1060                 // can't thaw it.
1061                 if (hfsmp->hfs_freezing_proc != current_proc()) {
1062                     return EPERM;
1063                 }
1064
1065                 // NOTE: if you add code here, also go check the
1066                 //       code that "thaws" the fs in hfs_vnop_close()
1067                 //
1068                 hfsmp->hfs_freezing_proc = NULL;
1069                 hfs_global_exclusive_lock_release(hfsmp);
1070                 lck_rw_unlock_exclusive(&hfsmp->hfs_insync);
1071
1072                 return (0);
1073         }
1074
1075 #define HFSIOC_BULKACCESS _IOW('h', 9, struct access_t)
1076 #define HFS_BULKACCESS_FSCTL IOCBASECMD(HFSIOC_BULKACCESS)
1077
1078         case HFS_BULKACCESS_FSCTL:
1079         case HFS_BULKACCESS: {
1080                 /*
1081                  * NOTE: on entry, the vnode is locked. Incase this vnode
1082                  * happens to be in our list of file_ids, we'll note it
1083                  * avoid calling hfs_chashget_nowait() on that id as that
1084                  * will cause a "locking against myself" panic.
1085                  */
1086                 Boolean check_leaf = true;
1087
1088                 struct user_access_t *user_access_structp;
1089                 struct user_access_t tmp_user_access_t;
1090                 struct access_cache cache;
1091
1092                 int error = 0, i;
1093
1094                 dev_t dev = VTOC(vp)->c_dev;
1095
1096                 short flags;
1097                 struct ucred myucred;
1098                 int num_files;
1099                 int *file_ids = NULL;
1100                 short *access = NULL;
1101
1102                 cnid_t cnid;
1103                 cnid_t prevParent_cnid = 0;
1104                 unsigned long myPerms;
1105                 short myaccess = 0;
1106                 struct cat_attr cnattr;
1107                 CatalogKey catkey;
1108                 struct cnode *skip_cp = VTOC(vp);
1109                 struct vfs_context      my_context;
1110
1111                 /* set up front for common exit code */
1112                 my_context.vc_ucred = NOCRED;
1113
1114                 /* first, return error if not run as root */
1115                 if (cred->cr_ruid != 0) {
1116                         return EPERM;
1117                 }
1118
1119                 /* initialize the local cache and buffers */
1120                 cache.numcached = 0;
1121                 cache.cachehits = 0;
1122                 cache.lookups = 0;
1123
1124                 file_ids = (int *) get_pathbuff();
1125                 access = (short *) get_pathbuff();
1126                 cache.acache = (int *) get_pathbuff();
1127                 cache.haveaccess = (Boolean *) get_pathbuff();
1128
1129                 if (file_ids == NULL || access == NULL || cache.acache == NULL || cache.haveaccess == NULL) {
1130                         release_pathbuff((char *) file_ids);
1131                         release_pathbuff((char *) access);
1132                         release_pathbuff((char *) cache.acache);
1133                         release_pathbuff((char *) cache.haveaccess);
1134
1135                         return ENOMEM;
1136                 }
1137
1138                 /* struct copyin done during dispatch... need to copy file_id array separately */
1139                 if (ap->a_data == NULL) {
1140                         error = EINVAL;
1141                         goto err_exit_bulk_access;
1142                 }
1143
1144                 if (is64bit) {
1145                         user_access_structp = (struct user_access_t *)ap->a_data;
1146                 }
1147                 else {
1148                         struct access_t *       accessp = (struct access_t *)ap->a_data;
1149                         tmp_user_access_t.uid = accessp->uid;
1150                         tmp_user_access_t.flags = accessp->flags;
1151                         tmp_user_access_t.num_groups = accessp->num_groups;
1152                         tmp_user_access_t.num_files = accessp->num_files;
1153                         tmp_user_access_t.file_ids = CAST_USER_ADDR_T(accessp->file_ids);
1154                         tmp_user_access_t.groups = CAST_USER_ADDR_T(accessp->groups);
1155                         tmp_user_access_t.access = CAST_USER_ADDR_T(accessp->access);
1156                         user_access_structp = &tmp_user_access_t;
1157                 }
1158
1159                 num_files = user_access_structp->num_files;
1160                 if (num_files < 1) {
1161                         goto err_exit_bulk_access;
1162                 }
1163                 if (num_files > 256) {
1164                         error = EINVAL;
1165                         goto err_exit_bulk_access;
1166                 }
1167
1168                 if ((error = copyin(user_access_structp->file_ids, (caddr_t)file_ids,
1169                                                         num_files * sizeof(int)))) {
1170                         goto err_exit_bulk_access;
1171                 }
1172
1173                 /* fill in the ucred structure */
1174                 flags = user_access_structp->flags;
1175                 if ((flags & (F_OK | R_OK | W_OK | X_OK)) == 0) {
1176                         flags = R_OK;
1177                 }
1178
1179                 /* check if we've been passed leaf node ids or parent ids */
1180                 if (flags & PARENT_IDS_FLAG) {
1181                         check_leaf = false;
1182                 }
1183
1184                 /*
1185                  * Create a templated credential; this credential may *NOT*
1186                  * be used unless instantiated with a kauth_cred_create();
1187                  * there must be a correcponding kauth_cred_unref() when it
1188                  * is no longer in use (i.e. before it goes out of scope).
1189                  */
1190                 memset(&myucred, 0, sizeof(myucred));
1191                 myucred.cr_ref = 1;
1192                 myucred.cr_uid = myucred.cr_ruid = myucred.cr_svuid = user_access_structp->uid;
1193                 myucred.cr_ngroups = user_access_structp->num_groups;
1194                 if (myucred.cr_ngroups < 1 || myucred.cr_ngroups > 16) {
1195                         myucred.cr_ngroups = 0;
1196                 } else if ((error = copyin(user_access_structp->groups, (caddr_t)myucred.cr_groups,
1197                                           myucred.cr_ngroups * sizeof(gid_t)))) {
1198                         goto err_exit_bulk_access;
1199                 }
1200                 myucred.cr_rgid = myucred.cr_svgid = myucred.cr_groups[0];
1201                 myucred.cr_gmuid = myucred.cr_uid;
1202
1203                 my_context.vc_proc = p;
1204                 my_context.vc_ucred = kauth_cred_create(&myucred);
1205
1206                 /* Check access to each file_id passed in */
1207                 for (i = 0; i < num_files; i++) {
1208 #if 0
1209                         cnid = (cnid_t) file_ids[i];
1210
1211                         /* root always has access */
1212                         if (!suser(my_context.vc_ucred, NULL)) {
1213                                 access[i] = 0;
1214                                 continue;
1215                         }
1216
1217                         if (check_leaf) {
1218
1219                                 /* do the lookup (checks the cnode hash, then the catalog) */
1220                                 error = do_attr_lookup(hfsmp, &cache, dev, cnid, skip_cp, &catkey, &cnattr, p);
1221                                 if (error) {
1222                                         access[i] = (short) error;
1223                                         continue;
1224                                 }
1225
1226                                 /* before calling CheckAccess(), check the target file for read access */
1227                                 myPerms = DerivePermissionSummary(cnattr.ca_uid, cnattr.ca_gid,
1228                                                                   cnattr.ca_mode, hfsmp->hfs_mp, my_context.vc_ucred, p  );
1229
1230
1231                                 /* fail fast if no access */
1232                                 if ((myPerms & flags) == 0) {
1233                                         access[i] = EACCES;
1234                                         continue;
1235                                 }
1236                         } else {
1237                                 /* we were passed an array of parent ids */
1238                                 catkey.hfsPlus.parentID = cnid;
1239                         }
1240
1241                         /* if the last guy had the same parent and had access, we're done */
1242                         if (i > 0 && catkey.hfsPlus.parentID == prevParent_cnid && access[i-1] == 0) {
1243                                 cache.cachehits++;
1244                                 access[i] = 0;
1245                                 continue;
1246                         }
1247
1248                         myaccess = do_access_check(hfsmp, &error, &cache, catkey.hfsPlus.parentID,
1249                                                    skip_cp, p, my_context.vc_ucred, dev);
1250
1251                         if ( myaccess ) {
1252                                 access[i] = 0; // have access.. no errors to report
1253                         } else {
1254                                 access[i] = (error != 0 ? (short) error : EACCES);
1255                         }
1256
1257                         prevParent_cnid = catkey.hfsPlus.parentID;
1258 #else
1259                         int myErr;
1260
1261                         cnid = (cnid_t)file_ids[i];
1262
1263                         while (cnid >= kRootDirID) {
1264                             /* get the vnode for this cnid */
1265                             myErr = hfs_vget(hfsmp, cnid, &vp, 0);
1266                             if ( myErr ) {
1267                                 access[i] = EACCES;
1268                                 break;
1269                             }
1270
1271                             cnid = VTOC(vp)->c_parentcnid;
1272
1273                             hfs_unlock(VTOC(vp));
1274                             if (vnode_vtype(vp) == VDIR) {
1275                                 /*
1276                                  * XXX This code assumes that none of the
1277                                  * XXX callbacks from vnode_authorize() will
1278                                  * XXX take a persistent ref on the context
1279                                  * XXX credential, which is a bad assumption.
1280                                  */
1281                                 myErr = vnode_authorize(vp, NULL, (KAUTH_VNODE_SEARCH | KAUTH_VNODE_LIST_DIRECTORY), &my_context);
1282                             } else {
1283                                 myErr = vnode_authorize(vp, NULL, KAUTH_VNODE_READ_DATA, &my_context);
1284                             }
1285                             vnode_put(vp);
1286                             access[i] = myErr;
1287                             if (myErr) {
1288                                 break;
1289                             }
1290                         }
1291 #endif
1292                 }
1293
1294                 /* copyout the access array */
1295                 if ((error = copyout((caddr_t)access, user_access_structp->access,
1296                                      num_files * sizeof (short)))) {
1297                         goto err_exit_bulk_access;
1298                 }
1299
1300         err_exit_bulk_access:
1301
1302                 //printf("on exit (err %d), numfiles/numcached/cachehits/lookups is %d/%d/%d/%d\n", error, num_files, cache.numcached, cache.cachehits, cache.lookups);
1303
1304                 release_pathbuff((char *) cache.acache);
1305                 release_pathbuff((char *) cache.haveaccess);
1306                 release_pathbuff((char *) file_ids);
1307                 release_pathbuff((char *) access);
1308                 /* clean up local context, if needed */
1309                 if (IS_VALID_CRED(my_context.vc_ucred))
1310                         kauth_cred_unref(&my_context.vc_ucred);
1311
1312                 return (error);
1313         } /* HFS_BULKACCESS */
1314
1315         case HFS_SETACLSTATE: {
1316                 int state;
1317
1318                 if (ap->a_data == NULL) {
1319                         return (EINVAL);
1320                 }
1321
1322                 vfsp = vfs_statfs(HFSTOVFS(hfsmp));
1323                 state = *(int *)ap->a_data;
1324
1325                 // super-user can enable or disable acl's on a volume.
1326                 // the volume owner can only enable acl's
1327                 if (!is_suser() && (state == 0 || kauth_cred_getuid(cred) != vfsp->f_owner)) {
1328                         return (EPERM);
1329                 }
1330                 if (state == 0 || state == 1)
1331                         return hfs_setextendedsecurity(hfsmp, state);
1332                 else
1333                         return (EINVAL);
1334         }
1335
1336         case F_FULLFSYNC: {
1337                 int error;
1338
1339                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1340                 if (error == 0) {
1341                         error = hfs_fsync(vp, MNT_NOWAIT, TRUE, p);
1342                         hfs_unlock(VTOC(vp));
1343                 }
1344
1345                 return error;
1346         }
1347
1348         case F_CHKCLEAN: {
1349                 register struct cnode *cp;
1350                 int error;
1351
1352                 if (!vnode_isreg(vp))
1353                         return EINVAL;
1354
1355                 error = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK);
1356                 if (error == 0) {
1357                         cp = VTOC(vp);
1358                         /*
1359                          * used by regression test to determine if
1360                          * all the dirty pages (via write) have been cleaned
1361                          * after a call to 'fsysnc'.
1362                          */
1363                         error = is_file_clean(vp, VTOF(vp)->ff_size);
1364                         hfs_unlock(cp);
1365                 }
1366                 return (error);
1367         }
1368
1369         case F_RDADVISE: {
1370                 register struct radvisory *ra;
1371                 struct filefork *fp;
1372                 int error;
1373
1374                 if (!vnode_isreg(vp))
1375                         return EINVAL;
1376
1377                 ra = (struct radvisory *)(ap->a_data);
1378                 fp = VTOF(vp);
1379
1380                 /* Protect against a size change. */
1381                 hfs_lock_truncate(VTOC(vp), TRUE);
1382
1383                 if (ra->ra_offset >= fp->ff_size) {
1384                         error = EFBIG;
1385                 } else {
1386                         error = advisory_read(vp, fp->ff_size, ra->ra_offset, ra->ra_count);
1387                 }
1388
1389                 hfs_unlock_truncate(VTOC(vp));
1390                 return (error);
1391         }
1392
1393         case F_READBOOTSTRAP:
1394         case F_WRITEBOOTSTRAP:
1395         {
1396             struct vnode *devvp = NULL;
1397             user_fbootstraptransfer_t *user_bootstrapp;
1398             int devBlockSize;
1399             int error;
1400             uio_t auio;
1401             daddr64_t blockNumber;
1402             u_long blockOffset;
1403             u_long xfersize;
1404             struct buf *bp;
1405             user_fbootstraptransfer_t user_bootstrap;
1406
1407                 if (!vnode_isvroot(vp))
1408                         return (EINVAL);
1409                 /* LP64 - when caller is a 64 bit process then we are passed a pointer
1410                  * to a user_fbootstraptransfer_t else we get a pointer to a
1411                  * fbootstraptransfer_t which we munge into a user_fbootstraptransfer_t
1412                  */
1413                 if (is64bit) {
1414                         user_bootstrapp = (user_fbootstraptransfer_t *)ap->a_data;
1415                 }
1416                 else {
1417                 fbootstraptransfer_t *bootstrapp = (fbootstraptransfer_t *)ap->a_data;
1418                         user_bootstrapp = &user_bootstrap;
1419                         user_bootstrap.fbt_offset = bootstrapp->fbt_offset;
1420                         user_bootstrap.fbt_length = bootstrapp->fbt_length;
1421                         user_bootstrap.fbt_buffer = CAST_USER_ADDR_T(bootstrapp->fbt_buffer);
1422                 }
1423                 if (user_bootstrapp->fbt_offset + user_bootstrapp->fbt_length > 1024)
1424                         return EINVAL;
1425
1426             devvp = VTOHFS(vp)->hfs_devvp;
1427                 auio = uio_create(1, user_bootstrapp->fbt_offset,
1428                                                   is64bit ? UIO_USERSPACE64 : UIO_USERSPACE32,
1429                                                   (ap->a_command == F_WRITEBOOTSTRAP) ? UIO_WRITE : UIO_READ);
1430                 uio_addiov(auio, user_bootstrapp->fbt_buffer, user_bootstrapp->fbt_length);
1431
1432             devBlockSize = vfs_devblocksize(vnode_mount(vp));
1433
1434             while (uio_resid(auio) > 0) {
1435                         blockNumber = uio_offset(auio) / devBlockSize;
1436                         error = (int)buf_bread(devvp, blockNumber, devBlockSize, cred, &bp);
1437                         if (error) {
1438                                 if (bp) buf_brelse(bp);
1439                                 uio_free(auio);
1440                                 return error;
1441                         };
1442
1443                         blockOffset = uio_offset(auio) % devBlockSize;
1444                         xfersize = devBlockSize - blockOffset;
1445                         error = uiomove((caddr_t)buf_dataptr(bp) + blockOffset, (int)xfersize, auio);
1446                         if (error) {
1447                                 buf_brelse(bp);
1448                                 uio_free(auio);
1449                                 return error;
1450                         };
1451                         if (uio_rw(auio) == UIO_WRITE) {
1452                                 error = VNOP_BWRITE(bp);
1453                                 if (error) {
1454                                         uio_free(auio);
1455                         return error;
1456                                 }
1457                         } else {
1458                                 buf_brelse(bp);
1459                         };
1460                 };
1461                 uio_free(auio);
1462         };
1463         return 0;
1464
1465         case _IOC(IOC_OUT,'h', 4, 0):     /* Create date in local time */
1466         {
1467                 if (is64bit) {
1468                         *(user_time_t *)(ap->a_data) = (user_time_t) (to_bsd_time(VTOVCB(vp)->localCreateDate));
1469                 }
1470                 else {
1471                         *(time_t *)(ap->a_data) = to_bsd_time(VTOVCB(vp)->localCreateDate);
1472                 }
1473                 return 0;
1474         }
1475
1476         case HFS_GET_MOUNT_TIME:
1477             return copyout(&hfsmp->hfs_mount_time, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_mount_time));
1478             break;
1479
1480         case HFS_GET_LAST_MTIME:
1481             return copyout(&hfsmp->hfs_last_mounted_mtime, CAST_USER_ADDR_T(ap->a_data), sizeof(hfsmp->hfs_last_mounted_mtime));
1482             break;
1483
1484         case HFS_SET_BOOT_INFO:
1485                 if (!vnode_isvroot(vp))
1486                         return(EINVAL);
1487                 if (!kauth_cred_issuser(cred) && (kauth_cred_getuid(cred) != vfs_statfs(HFSTOVFS(hfsmp))->f_owner))
1488                         return(EACCES); /* must be superuser or owner of filesystem */
1489                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1490                 bcopy(ap->a_data, &hfsmp->vcbFndrInfo, sizeof(hfsmp->vcbFndrInfo));
1491                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1492                 (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
1493                 break;
1494
1495         case HFS_GET_BOOT_INFO:
1496                 if (!vnode_isvroot(vp))
1497                         return(EINVAL);
1498                 HFS_MOUNT_LOCK(hfsmp, TRUE);
1499                 bcopy(&hfsmp->vcbFndrInfo, ap->a_data, sizeof(hfsmp->vcbFndrInfo));
1500                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1501                 break;
1502
1503         default:
1504                 return (ENOTTY);
1505         }
1506
1507     /* Should never get here */
1508         return 0;
1509 }
1510
1511 /*
1512  * select
1513  */
1514 int
1515 hfs_vnop_select(__unused struct vnop_select_args *ap)
1516 /*
1517         struct vnop_select_args {
1518                 vnode_t a_vp;
1519                 int  a_which;
1520                 int  a_fflags;
1521                 void *a_wql;
1522                 vfs_context_t a_context;
1523         };
1524 */
1525 {
1526         /*
1527          * We should really check to see if I/O is possible.
1528          */
1529         return (1);
1530 }
1531
1532 /*
1533  * Converts a logical block number to a physical block, and optionally returns
1534  * the amount of remaining blocks in a run. The logical block is based on hfsNode.logBlockSize.
1535  * The physical block number is based on the device block size, currently its 512.
1536  * The block run is returned in logical blocks, and is the REMAINING amount of blocks
1537  */
1538 int
1539 hfs_bmap(struct vnode *vp, daddr_t bn, struct vnode **vpp, daddr64_t *bnp, int *runp)
1540 {
1541         struct cnode *cp = VTOC(vp);
1542         struct filefork *fp = VTOF(vp);
1543         struct hfsmount *hfsmp = VTOHFS(vp);
1544         int  retval = E_NONE;
1545         daddr_t  logBlockSize;
1546         size_t  bytesContAvail = 0;
1547         off_t  blockposition;
1548         int lockExtBtree;
1549         int lockflags = 0;
1550
1551         /*
1552          * Check for underlying vnode requests and ensure that logical
1553          * to physical mapping is requested.
1554          */
1555         if (vpp != NULL)
1556                 *vpp = cp->c_devvp;
1557         if (bnp == NULL)
1558                 return (0);
1559
1560         logBlockSize = GetLogicalBlockSize(vp);
1561         blockposition = (off_t)bn * (off_t)logBlockSize;
1562
1563         lockExtBtree = overflow_extents(fp);
1564
1565         if (lockExtBtree)
1566                 lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
1567
1568         retval = MacToVFSError(
1569                             MapFileBlockC (HFSTOVCB(hfsmp),
1570                                             (FCB*)fp,
1571                                             MAXPHYSIO,
1572                                             blockposition,
1573                                             bnp,
1574                                             &bytesContAvail));
1575
1576         if (lockExtBtree)
1577                 hfs_systemfile_unlock(hfsmp, lockflags);
1578
1579         if (retval == E_NONE) {
1580                 /* Figure out how many read ahead blocks there are */
1581                 if (runp != NULL) {
1582                         if (can_cluster(logBlockSize)) {
1583                                 /* Make sure this result never goes negative: */
1584                                 *runp = (bytesContAvail < logBlockSize) ? 0 : (bytesContAvail / logBlockSize) - 1;
1585                         } else {
1586                                 *runp = 0;
1587                         }
1588                 }
1589         }
1590         return (retval);
1591 }
1592
1593 /*
1594  * Convert logical block number to file offset.
1595  */
1596 int
1597 hfs_vnop_blktooff(struct vnop_blktooff_args *ap)
1598 /*
1599         struct vnop_blktooff_args {
1600                 vnode_t a_vp;
1601                 daddr64_t a_lblkno;
1602                 off_t *a_offset;
1603         };
1604 */
1605 {
1606         if (ap->a_vp == NULL)
1607                 return (EINVAL);
1608         *ap->a_offset = (off_t)ap->a_lblkno * (off_t)GetLogicalBlockSize(ap->a_vp);
1609
1610         return(0);
1611 }
1612
1613 /*
1614  * Convert file offset to logical block number.
1615  */
1616 int
1617 hfs_vnop_offtoblk(struct vnop_offtoblk_args *ap)
1618 /*
1619         struct vnop_offtoblk_args {
1620                 vnode_t a_vp;
1621                 off_t a_offset;
1622                 daddr64_t *a_lblkno;
1623         };
1624 */
1625 {
1626         if (ap->a_vp == NULL)
1627                 return (EINVAL);
1628         *ap->a_lblkno = (daddr64_t)(ap->a_offset / (off_t)GetLogicalBlockSize(ap->a_vp));
1629
1630         return(0);
1631 }
1632
1633 /*
1634  * Map file offset to physical block number.
1635  *
1636  * System file cnodes are expected to be locked (shared or exclusive).
1637  */
1638 int
1639 hfs_vnop_blockmap(struct vnop_blockmap_args *ap)
1640 /*
1641         struct vnop_blockmap_args {
1642                 vnode_t a_vp;
1643                 off_t a_foffset;
1644                 size_t a_size;
1645                 daddr64_t *a_bpn;
1646                 size_t *a_run;
1647                 void *a_poff;
1648                 int a_flags;
1649                 vfs_context_t a_context;
1650         };
1651 */
1652 {
1653         struct vnode *vp = ap->a_vp;
1654         struct cnode *cp;
1655         struct filefork *fp;
1656         struct hfsmount *hfsmp;
1657         size_t bytesContAvail = 0;
1658         int retval = E_NONE;
1659         int syslocks = 0;
1660         int lockflags = 0;
1661         struct rl_entry *invalid_range;
1662         enum rl_overlaptype overlaptype;
1663         int started_tr = 0;
1664         int tooklock = 0;
1665
1666         /* Do not allow blockmap operation on a directory */
1667         if (vnode_isdir(vp)) {
1668                 return (ENOTSUP);
1669         }
1670
1671         /*
1672          * Check for underlying vnode requests and ensure that logical
1673          * to physical mapping is requested.
1674          */
1675         if (ap->a_bpn == NULL)
1676                 return (0);
1677
1678         if ( !vnode_issystem(vp) && !vnode_islnk(vp)) {
1679                 if (VTOC(vp)->c_lockowner != current_thread()) {
1680                         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
1681                         tooklock = 1;
1682                 } else {
1683                         cp = VTOC(vp);
1684                         panic("blockmap: %s cnode lock already held!\n",
1685                                 cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
1686                 }
1687         }
1688         hfsmp = VTOHFS(vp);
1689         cp = VTOC(vp);
1690         fp = VTOF(vp);
1691
1692 retry:
1693         if (fp->ff_unallocblocks) {
1694                 if (hfs_start_transaction(hfsmp) != 0) {
1695                         retval = EINVAL;
1696                         goto exit;
1697                 } else {
1698                         started_tr = 1;
1699                 }
1700                 syslocks = SFL_EXTENTS | SFL_BITMAP;
1701
1702         } else if (overflow_extents(fp)) {
1703                 syslocks = SFL_EXTENTS;
1704         }
1705
1706         if (syslocks)
1707                 lockflags = hfs_systemfile_lock(hfsmp, syslocks, HFS_EXCLUSIVE_LOCK);
1708
1709         /*
1710          * Check for any delayed allocations.
1711          */
1712         if (fp->ff_unallocblocks) {
1713                 SInt64 actbytes;
1714                 u_int32_t loanedBlocks;
1715
1716                 //
1717                 // Make sure we have a transaction.  It's possible
1718                 // that we came in and fp->ff_unallocblocks was zero
1719                 // but during the time we blocked acquiring the extents
1720                 // btree, ff_unallocblocks became non-zero and so we
1721                 // will need to start a transaction.
1722                 //
1723                 if (started_tr == 0) {
1724                         if (syslocks) {
1725                                 hfs_systemfile_unlock(hfsmp, lockflags);
1726                                 syslocks = 0;
1727                         }
1728                         goto retry;
1729                 }
1730
1731                 /*
1732                  * Note: ExtendFileC will Release any blocks on loan and
1733                  * aquire real blocks.  So we ask to extend by zero bytes
1734                  * since ExtendFileC will account for the virtual blocks.
1735                  */
1736
1737                 loanedBlocks = fp->ff_unallocblocks;
1738                 retval = ExtendFileC(hfsmp, (FCB*)fp, 0, 0,
1739                                      kEFAllMask | kEFNoClumpMask, &actbytes);
1740
1741                 if (retval) {
1742                         fp->ff_unallocblocks = loanedBlocks;
1743                         cp->c_blocks += loanedBlocks;
1744                         fp->ff_blocks += loanedBlocks;
1745
1746                         HFS_MOUNT_LOCK(hfsmp, TRUE);
1747                         hfsmp->loanedBlocks += loanedBlocks;
1748                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
1749                 }
1750
1751                 if (retval) {
1752                         hfs_systemfile_unlock(hfsmp, lockflags);
1753                         cp->c_flag |= C_MODIFIED;
1754                         if (started_tr) {
1755                                 (void) hfs_update(vp, TRUE);
1756                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1757
1758                                 hfs_end_transaction(hfsmp);
1759                         }
1760                         goto exit;
1761                 }
1762         }
1763
1764         retval = MapFileBlockC(hfsmp, (FCB *)fp, ap->a_size, ap->a_foffset,
1765                                ap->a_bpn, &bytesContAvail);
1766         if (syslocks) {
1767                 hfs_systemfile_unlock(hfsmp, lockflags);
1768                 syslocks = 0;
1769         }
1770
1771         if (started_tr) {
1772                 (void) hfs_update(vp, TRUE);
1773                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1774                 hfs_end_transaction(hfsmp);
1775                 started_tr = 0;
1776         }
1777         if (retval) {
1778                 goto exit;
1779         }
1780
1781         /* Adjust the mapping information for invalid file ranges: */
1782         overlaptype = rl_scan(&fp->ff_invalidranges, ap->a_foffset,
1783                               ap->a_foffset + (off_t)bytesContAvail - 1,
1784                               &invalid_range);
1785         if (overlaptype != RL_NOOVERLAP) {
1786                 switch(overlaptype) {
1787                 case RL_MATCHINGOVERLAP:
1788                 case RL_OVERLAPCONTAINSRANGE:
1789                 case RL_OVERLAPSTARTSBEFORE:
1790                         /* There's no valid block for this byte offset: */
1791                         *ap->a_bpn = (daddr64_t)-1;
1792                         /* There's no point limiting the amount to be returned
1793                          * if the invalid range that was hit extends all the way
1794                          * to the EOF (i.e. there's no valid bytes between the
1795                          * end of this range and the file's EOF):
1796                          */
1797                         if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1798                             (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1799                                 bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1800                         }
1801                         break;
1802
1803                 case RL_OVERLAPISCONTAINED:
1804                 case RL_OVERLAPENDSAFTER:
1805                         /* The range of interest hits an invalid block before the end: */
1806                         if (invalid_range->rl_start == ap->a_foffset) {
1807                                 /* There's actually no valid information to be had starting here: */
1808                                 *ap->a_bpn = (daddr64_t)-1;
1809                                 if (((off_t)fp->ff_size > (invalid_range->rl_end + 1)) &&
1810                                     (invalid_range->rl_end + 1 - ap->a_foffset < bytesContAvail)) {
1811                                         bytesContAvail = invalid_range->rl_end + 1 - ap->a_foffset;
1812                                 }
1813                         } else {
1814                                 bytesContAvail = invalid_range->rl_start - ap->a_foffset;
1815                         }
1816                         break;
1817
1818                 case RL_NOOVERLAP:
1819                         break;
1820                 } /* end switch */
1821                 if (bytesContAvail > ap->a_size)
1822                         bytesContAvail = ap->a_size;
1823         }
1824         if (ap->a_run)
1825                 *ap->a_run = bytesContAvail;
1826
1827         if (ap->a_poff)
1828                 *(int *)ap->a_poff = 0;
1829 exit:
1830         if (tooklock)
1831                 hfs_unlock(cp);
1832
1833         return (MacToVFSError(retval));
1834 }
1835
1836
1837 /*
1838  * prepare and issue the I/O
1839  * buf_strategy knows how to deal
1840  * with requests that require
1841  * fragmented I/Os
1842  */
1843 int
1844 hfs_vnop_strategy(struct vnop_strategy_args *ap)
1845 {
1846         buf_t   bp = ap->a_bp;
1847         vnode_t vp = buf_vnode(bp);
1848         struct cnode *cp = VTOC(vp);
1849
1850         return (buf_strategy(cp->c_devvp, ap));
1851 }
1852
1853
1854 static int
1855 do_hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize, vfs_context_t context)
1856 {
1857         register struct cnode *cp = VTOC(vp);
1858         struct filefork *fp = VTOF(vp);
1859         struct proc *p = vfs_context_proc(context);;
1860         kauth_cred_t cred = vfs_context_ucred(context);
1861         int retval;
1862         off_t bytesToAdd;
1863         off_t actualBytesAdded;
1864         off_t filebytes;
1865         u_int64_t old_filesize;
1866         u_long fileblocks;
1867         int blksize;
1868         struct hfsmount *hfsmp;
1869         int lockflags;
1870
1871         blksize = VTOVCB(vp)->blockSize;
1872         fileblocks = fp->ff_blocks;
1873         filebytes = (off_t)fileblocks * (off_t)blksize;
1874         old_filesize = fp->ff_size;
1875
1876         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_START,
1877                  (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1878
1879         if (length < 0)
1880                 return (EINVAL);
1881
1882         /* This should only happen with a corrupt filesystem */
1883         if ((off_t)fp->ff_size < 0)
1884                 return (EINVAL);
1885
1886         if ((!ISHFSPLUS(VTOVCB(vp))) && (length > (off_t)MAXHFSFILESIZE))
1887                 return (EFBIG);
1888
1889         hfsmp = VTOHFS(vp);
1890
1891         retval = E_NONE;
1892
1893         /* Files that are changing size are not hot file candidates. */
1894         if (hfsmp->hfc_stage == HFC_RECORDING) {
1895                 fp->ff_bytesread = 0;
1896         }
1897
1898         /*
1899          * We cannot just check if fp->ff_size == length (as an optimization)
1900          * since there may be extra physical blocks that also need truncation.
1901          */
1902 #if QUOTA
1903         if ((retval = hfs_getinoquota(cp)))
1904                 return(retval);
1905 #endif /* QUOTA */
1906
1907         /*
1908          * Lengthen the size of the file. We must ensure that the
1909          * last byte of the file is allocated. Since the smallest
1910          * value of ff_size is 0, length will be at least 1.
1911          */
1912         if (length > (off_t)fp->ff_size) {
1913 #if QUOTA
1914                 retval = hfs_chkdq(cp, (int64_t)(roundup(length - filebytes, blksize)),
1915                                    cred, 0);
1916                 if (retval)
1917                         goto Err_Exit;
1918 #endif /* QUOTA */
1919                 /*
1920                  * If we don't have enough physical space then
1921                  * we need to extend the physical size.
1922                  */
1923                 if (length > filebytes) {
1924                         int eflags;
1925                         u_long blockHint = 0;
1926
1927                         /* All or nothing and don't round up to clumpsize. */
1928                         eflags = kEFAllMask | kEFNoClumpMask;
1929
1930                         if (cred && suser(cred, NULL) != 0)
1931                                 eflags |= kEFReserveMask;  /* keep a reserve */
1932
1933                         /*
1934                          * Allocate Journal and Quota files in metadata zone.
1935                          */
1936                         if (filebytes == 0 &&
1937                             hfsmp->hfs_flags & HFS_METADATA_ZONE &&
1938                             hfs_virtualmetafile(cp)) {
1939                                 eflags |= kEFMetadataMask;
1940                                 blockHint = hfsmp->hfs_metazone_start;
1941                         }
1942                         if (hfs_start_transaction(hfsmp) != 0) {
1943                             retval = EINVAL;
1944                             goto Err_Exit;
1945                         }
1946
1947                         /* Protect extents b-tree and allocation bitmap */
1948                         lockflags = SFL_BITMAP;
1949                         if (overflow_extents(fp))
1950                                 lockflags |= SFL_EXTENTS;
1951                         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
1952
1953                         while ((length > filebytes) && (retval == E_NONE)) {
1954                                 bytesToAdd = length - filebytes;
1955                                 retval = MacToVFSError(ExtendFileC(VTOVCB(vp),
1956                                                     (FCB*)fp,
1957                                                     bytesToAdd,
1958                                                     blockHint,
1959                                                     eflags,
1960                                                     &actualBytesAdded));
1961
1962                                 filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
1963                                 if (actualBytesAdded == 0 && retval == E_NONE) {
1964                                         if (length > filebytes)
1965                                                 length = filebytes;
1966                                         break;
1967                                 }
1968                         } /* endwhile */
1969
1970                         hfs_systemfile_unlock(hfsmp, lockflags);
1971
1972                         if (hfsmp->jnl) {
1973                             (void) hfs_update(vp, TRUE);
1974                             (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
1975                         }
1976
1977                         hfs_end_transaction(hfsmp);
1978
1979                         if (retval)
1980                                 goto Err_Exit;
1981
1982                         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
1983                                 (int)length, (int)fp->ff_size, (int)filebytes, 0, 0);
1984                 }
1985
1986                 if (!(flags & IO_NOZEROFILL)) {
1987                         if (UBCINFOEXISTS(vp) && retval == E_NONE) {
1988                                 struct rl_entry *invalid_range;
1989                                 off_t zero_limit;
1990
1991                                 zero_limit = (fp->ff_size + (PAGE_SIZE_64 - 1)) & ~PAGE_MASK_64;
1992                                 if (length < zero_limit) zero_limit = length;
1993
1994                                 if (length > (off_t)fp->ff_size) {
1995                                         struct timeval tv;
1996
1997                                         /* Extending the file: time to fill out the current last page w. zeroes? */
1998                                         if ((fp->ff_size & PAGE_MASK_64) &&
1999                                             (rl_scan(&fp->ff_invalidranges, fp->ff_size & ~PAGE_MASK_64,
2000                                             fp->ff_size - 1, &invalid_range) == RL_NOOVERLAP)) {
2001
2002                                                 /* There's some valid data at the start of the (current) last page
2003                                                    of the file, so zero out the remainder of that page to ensure the
2004                                                    entire page contains valid data.  Since there is no invalid range
2005                                                    possible past the (current) eof, there's no need to remove anything
2006                                                    from the invalid range list before calling cluster_write():  */
2007                                                 hfs_unlock(cp);
2008                                                 retval = cluster_write(vp, (struct uio *) 0, fp->ff_size, zero_limit,
2009                                                                 fp->ff_size, (off_t)0,
2010                                                                 (flags & IO_SYNC) | IO_HEADZEROFILL | IO_NOZERODIRTY);
2011                                                 hfs_lock(cp, HFS_FORCE_LOCK);
2012                                                 if (retval) goto Err_Exit;
2013
2014                                                 /* Merely invalidate the remaining area, if necessary: */
2015                                                 if (length > zero_limit) {
2016                                                         microuptime(&tv);
2017                                                         rl_add(zero_limit, length - 1, &fp->ff_invalidranges);
2018                                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2019                                                 }
2020                                         } else {
2021                                         /* The page containing the (current) eof is invalid: just add the
2022                                            remainder of the page to the invalid list, along with the area
2023                                            being newly allocated:
2024                                          */
2025                                         microuptime(&tv);
2026                                         rl_add(fp->ff_size, length - 1, &fp->ff_invalidranges);
2027                                         cp->c_zftimeout = tv.tv_sec + ZFTIMELIMIT;
2028                                         };
2029                                 }
2030                         } else {
2031                                         panic("hfs_truncate: invoked on non-UBC object?!");
2032                         };
2033                 }
2034                 cp->c_touch_modtime = TRUE;
2035                 fp->ff_size = length;
2036
2037                 /* Nested transactions will do their own ubc_setsize. */
2038                 if (!skipsetsize) {
2039                         /*
2040                          * ubc_setsize can cause a pagein here
2041                          * so we need to drop cnode lock.
2042                          */
2043                         hfs_unlock(cp);
2044                         ubc_setsize(vp, length);
2045                         hfs_lock(cp, HFS_FORCE_LOCK);
2046                 }
2047
2048         } else { /* Shorten the size of the file */
2049
2050                 if ((off_t)fp->ff_size > length) {
2051                         /*
2052                          * Any buffers that are past the truncation point need to be
2053                          * invalidated (to maintain buffer cache consistency).
2054                          */
2055
2056                          /* Nested transactions will do their own ubc_setsize. */
2057                          if (!skipsetsize) {
2058                                 /*
2059                                  * ubc_setsize can cause a pageout here
2060                                  * so we need to drop cnode lock.
2061                                  */
2062                                 hfs_unlock(cp);
2063                                 ubc_setsize(vp, length);
2064                                 hfs_lock(cp, HFS_FORCE_LOCK);
2065                         }
2066
2067                         /* Any space previously marked as invalid is now irrelevant: */
2068                         rl_remove(length, fp->ff_size - 1, &fp->ff_invalidranges);
2069                 }
2070
2071                 /*
2072                  * Account for any unmapped blocks. Note that the new
2073                  * file length can still end up with unmapped blocks.
2074                  */
2075                 if (fp->ff_unallocblocks > 0) {
2076                         u_int32_t finalblks;
2077                         u_int32_t loanedBlocks;
2078
2079                         HFS_MOUNT_LOCK(hfsmp, TRUE);
2080
2081                         loanedBlocks = fp->ff_unallocblocks;
2082                         cp->c_blocks -= loanedBlocks;
2083                         fp->ff_blocks -= loanedBlocks;
2084                         fp->ff_unallocblocks = 0;
2085
2086                         hfsmp->loanedBlocks -= loanedBlocks;
2087
2088                         finalblks = (length + blksize - 1) / blksize;
2089                         if (finalblks > fp->ff_blocks) {
2090                                 /* calculate required unmapped blocks */
2091                                 loanedBlocks = finalblks - fp->ff_blocks;
2092                                 hfsmp->loanedBlocks += loanedBlocks;
2093
2094                                 fp->ff_unallocblocks = loanedBlocks;
2095                                 cp->c_blocks += loanedBlocks;
2096                                 fp->ff_blocks += loanedBlocks;
2097                         }
2098                         HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2099                 }
2100
2101                 /*
2102                  * For a TBE process the deallocation of the file blocks is
2103                  * delayed until the file is closed.  And hfs_close calls
2104                  * truncate with the IO_NDELAY flag set.  So when IO_NDELAY
2105                  * isn't set, we make sure this isn't a TBE process.
2106                  */
2107                 if ((flags & IO_NDELAY) || (proc_tbe(p) == 0)) {
2108 #if QUOTA
2109                   off_t savedbytes = ((off_t)fp->ff_blocks * (off_t)blksize);
2110 #endif /* QUOTA */
2111                   if (hfs_start_transaction(hfsmp) != 0) {
2112                       retval = EINVAL;
2113                       goto Err_Exit;
2114                   }
2115
2116                         if (fp->ff_unallocblocks == 0) {
2117                                 /* Protect extents b-tree and allocation bitmap */
2118                                 lockflags = SFL_BITMAP;
2119                                 if (overflow_extents(fp))
2120                                         lockflags |= SFL_EXTENTS;
2121                                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2122
2123                                 retval = MacToVFSError(TruncateFileC(VTOVCB(vp),
2124                                                 (FCB*)fp, length, false));
2125
2126                                 hfs_systemfile_unlock(hfsmp, lockflags);
2127                         }
2128                         if (hfsmp->jnl) {
2129                                 if (retval == 0) {
2130                                         fp->ff_size = length;
2131                                 }
2132                                 (void) hfs_update(vp, TRUE);
2133                                 (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2134                         }
2135
2136                         hfs_end_transaction(hfsmp);
2137
2138                         filebytes = (off_t)fp->ff_blocks * (off_t)blksize;
2139                         if (retval)
2140                                 goto Err_Exit;
2141 #if QUOTA
2142                         /* These are bytesreleased */
2143                         (void) hfs_chkdq(cp, (int64_t)-(savedbytes - filebytes), NOCRED, 0);
2144 #endif /* QUOTA */
2145                 }
2146                 /* Only set update flag if the logical length changes */
2147                 if (old_filesize != length)
2148                         cp->c_touch_modtime = TRUE;
2149                 fp->ff_size = length;
2150         }
2151         cp->c_touch_chgtime = TRUE;
2152         retval = hfs_update(vp, MNT_WAIT);
2153         if (retval) {
2154                 KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_NONE,
2155                      -1, -1, -1, retval, 0);
2156         }
2157
2158 Err_Exit:
2159
2160         KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 7)) | DBG_FUNC_END,
2161                  (int)length, (int)fp->ff_size, (int)filebytes, retval, 0);
2162
2163         return (retval);
2164 }
2165
2166
2167
2168 /*
2169  * Truncate a cnode to at most length size, freeing (or adding) the
2170  * disk blocks.
2171  */
2172 __private_extern__
2173 int
2174 hfs_truncate(struct vnode *vp, off_t length, int flags, int skipsetsize,
2175              vfs_context_t context)
2176 {
2177         struct filefork *fp = VTOF(vp);
2178         off_t filebytes;
2179         u_long fileblocks;
2180         int blksize, error = 0;
2181         struct cnode *cp = VTOC(vp);
2182
2183         if (vnode_isdir(vp))
2184                 return (EISDIR);        /* cannot truncate an HFS directory! */
2185
2186         blksize = VTOVCB(vp)->blockSize;
2187         fileblocks = fp->ff_blocks;
2188         filebytes = (off_t)fileblocks * (off_t)blksize;
2189
2190         // have to loop truncating or growing files that are
2191         // really big because otherwise transactions can get
2192         // enormous and consume too many kernel resources.
2193
2194         if (length < filebytes) {
2195                 while (filebytes > length) {
2196                         if ((filebytes - length) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2197                                 filebytes -= HFS_BIGFILE_SIZE;
2198                         } else {
2199                                 filebytes = length;
2200                         }
2201                         cp->c_flag |= C_FORCEUPDATE;
2202                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2203                         if (error)
2204                                 break;
2205                 }
2206         } else if (length > filebytes) {
2207                 while (filebytes < length) {
2208                         if ((length - filebytes) > HFS_BIGFILE_SIZE && overflow_extents(fp)) {
2209                                 filebytes += HFS_BIGFILE_SIZE;
2210                         } else {
2211                                 filebytes = length;
2212                         }
2213                         cp->c_flag |= C_FORCEUPDATE;
2214                         error = do_hfs_truncate(vp, filebytes, flags, skipsetsize, context);
2215                         if (error)
2216                                 break;
2217                 }
2218         } else /* Same logical size */ {
2219
2220                 error = do_hfs_truncate(vp, length, flags, skipsetsize, context);
2221         }
2222         /* Files that are changing size are not hot file candidates. */
2223         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING) {
2224                 fp->ff_bytesread = 0;
2225         }
2226
2227         return (error);
2228 }
2229
2230
2231
2232 /*
2233  * Preallocate file storage space.
2234  */
2235 int
2236 hfs_vnop_allocate(struct vnop_allocate_args /* {
2237                 vnode_t a_vp;
2238                 off_t a_length;
2239                 u_int32_t  a_flags;
2240                 off_t *a_bytesallocated;
2241                 off_t a_offset;
2242                 vfs_context_t a_context;
2243         } */ *ap)
2244 {
2245         struct vnode *vp = ap->a_vp;
2246         struct cnode *cp;
2247         struct filefork *fp;
2248         ExtendedVCB *vcb;
2249         off_t length = ap->a_length;
2250         off_t startingPEOF;
2251         off_t moreBytesRequested;
2252         off_t actualBytesAdded;
2253         off_t filebytes;
2254         u_long fileblocks;
2255         int retval, retval2;
2256         UInt32 blockHint;
2257         UInt32 extendFlags;   /* For call to ExtendFileC */
2258         struct hfsmount *hfsmp;
2259         kauth_cred_t cred = vfs_context_ucred(ap->a_context);
2260         int lockflags;
2261
2262         *(ap->a_bytesallocated) = 0;
2263
2264         if (!vnode_isreg(vp))
2265                 return (EISDIR);
2266         if (length < (off_t)0)
2267                 return (EINVAL);
2268
2269         if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK)))
2270                 return (retval);
2271         cp = VTOC(vp);
2272         fp = VTOF(vp);
2273         hfsmp = VTOHFS(vp);
2274         vcb = VTOVCB(vp);
2275
2276         fileblocks = fp->ff_blocks;
2277         filebytes = (off_t)fileblocks * (off_t)vcb->blockSize;
2278
2279         if ((ap->a_flags & ALLOCATEFROMVOL) && (length < filebytes)) {
2280                 retval = EINVAL;
2281                 goto Err_Exit;
2282         }
2283
2284         /* Fill in the flags word for the call to Extend the file */
2285
2286         extendFlags = kEFNoClumpMask;
2287         if (ap->a_flags & ALLOCATECONTIG)
2288                 extendFlags |= kEFContigMask;
2289         if (ap->a_flags & ALLOCATEALL)
2290                 extendFlags |= kEFAllMask;
2291         if (cred && suser(cred, NULL) != 0)
2292                 extendFlags |= kEFReserveMask;
2293
2294         retval = E_NONE;
2295         blockHint = 0;
2296         startingPEOF = filebytes;
2297
2298         if (ap->a_flags & ALLOCATEFROMPEOF)
2299                 length += filebytes;
2300         else if (ap->a_flags & ALLOCATEFROMVOL)
2301                 blockHint = ap->a_offset / VTOVCB(vp)->blockSize;
2302
2303         /* If no changes are necesary, then we're done */
2304         if (filebytes == length)
2305                 goto Std_Exit;
2306
2307         /*
2308          * Lengthen the size of the file. We must ensure that the
2309          * last byte of the file is allocated. Since the smallest
2310          * value of filebytes is 0, length will be at least 1.
2311          */
2312         if (length > filebytes) {
2313                 moreBytesRequested = length - filebytes;
2314
2315 #if QUOTA
2316                 retval = hfs_chkdq(cp,
2317                                 (int64_t)(roundup(moreBytesRequested, vcb->blockSize)),
2318                                 cred, 0);
2319                 if (retval)
2320                         goto Err_Exit;
2321
2322 #endif /* QUOTA */
2323                 /*
2324                  * Metadata zone checks.
2325                  */
2326                 if (hfsmp->hfs_flags & HFS_METADATA_ZONE) {
2327                         /*
2328                          * Allocate Journal and Quota files in metadata zone.
2329                          */
2330                         if (hfs_virtualmetafile(cp)) {
2331                                 extendFlags |= kEFMetadataMask;
2332                                 blockHint = hfsmp->hfs_metazone_start;
2333                         } else if ((blockHint >= hfsmp->hfs_metazone_start) &&
2334                                    (blockHint <= hfsmp->hfs_metazone_end)) {
2335                                 /*
2336                                  * Move blockHint outside metadata zone.
2337                                  */
2338                                 blockHint = hfsmp->hfs_metazone_end + 1;
2339                         }
2340                 }
2341
2342                 if (hfs_start_transaction(hfsmp) != 0) {
2343                     retval = EINVAL;
2344                     goto Err_Exit;
2345                 }
2346
2347                 /* Protect extents b-tree and allocation bitmap */
2348                 lockflags = SFL_BITMAP;
2349                 if (overflow_extents(fp))
2350                         lockflags |= SFL_EXTENTS;
2351                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2352
2353                 retval = MacToVFSError(ExtendFileC(vcb,
2354                                                 (FCB*)fp,
2355                                                 moreBytesRequested,
2356                                                 blockHint,
2357                                                 extendFlags,
2358                                                 &actualBytesAdded));
2359
2360                 *(ap->a_bytesallocated) = actualBytesAdded;
2361                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2362
2363                 hfs_systemfile_unlock(hfsmp, lockflags);
2364
2365                 if (hfsmp->jnl) {
2366                         (void) hfs_update(vp, TRUE);
2367                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2368                 }
2369
2370                 hfs_end_transaction(hfsmp);
2371
2372                 /*
2373                  * if we get an error and no changes were made then exit
2374                  * otherwise we must do the hfs_update to reflect the changes
2375                  */
2376                 if (retval && (startingPEOF == filebytes))
2377                         goto Err_Exit;
2378
2379                 /*
2380                  * Adjust actualBytesAdded to be allocation block aligned, not
2381                  * clump size aligned.
2382                  * NOTE: So what we are reporting does not affect reality
2383                  * until the file is closed, when we truncate the file to allocation
2384                  * block size.
2385                  */
2386                 if ((actualBytesAdded != 0) && (moreBytesRequested < actualBytesAdded))
2387                         *(ap->a_bytesallocated) =
2388                                 roundup(moreBytesRequested, (off_t)vcb->blockSize);
2389
2390         } else { /* Shorten the size of the file */
2391
2392                 if (fp->ff_size > length) {
2393                         /*
2394                          * Any buffers that are past the truncation point need to be
2395                          * invalidated (to maintain buffer cache consistency).
2396                          */
2397                 }
2398
2399                 if (hfs_start_transaction(hfsmp) != 0) {
2400                     retval = EINVAL;
2401                     goto Err_Exit;
2402                 }
2403
2404                 /* Protect extents b-tree and allocation bitmap */
2405                 lockflags = SFL_BITMAP;
2406                 if (overflow_extents(fp))
2407                         lockflags |= SFL_EXTENTS;
2408                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2409
2410                 retval = MacToVFSError(TruncateFileC(vcb, (FCB*)fp, length, false));
2411
2412                 hfs_systemfile_unlock(hfsmp, lockflags);
2413
2414                 filebytes = (off_t)fp->ff_blocks * (off_t)vcb->blockSize;
2415
2416                 if (hfsmp->jnl) {
2417                         (void) hfs_update(vp, TRUE);
2418                         (void) hfs_volupdate(hfsmp, VOL_UPDATE, 0);
2419                 }
2420
2421                 hfs_end_transaction(hfsmp);
2422
2423
2424                 /*
2425                  * if we get an error and no changes were made then exit
2426                  * otherwise we must do the hfs_update to reflect the changes
2427                  */
2428                 if (retval && (startingPEOF == filebytes)) goto Err_Exit;
2429 #if QUOTA
2430                 /* These are  bytesreleased */
2431                 (void) hfs_chkdq(cp, (int64_t)-((startingPEOF - filebytes)), NOCRED,0);
2432 #endif /* QUOTA */
2433
2434                 if (fp->ff_size > filebytes) {
2435                         fp->ff_size = filebytes;
2436
2437                         hfs_unlock(cp);
2438                         ubc_setsize(vp, fp->ff_size);
2439                         hfs_lock(cp, HFS_FORCE_LOCK);
2440                 }
2441         }
2442
2443 Std_Exit:
2444         cp->c_touch_chgtime = TRUE;
2445         cp->c_touch_modtime = TRUE;
2446         retval2 = hfs_update(vp, MNT_WAIT);
2447
2448         if (retval == 0)
2449                 retval = retval2;
2450 Err_Exit:
2451         hfs_unlock(cp);
2452         return (retval);
2453 }
2454
2455
2456 /*
2457  * Pagein for HFS filesystem
2458  */
2459 int
2460 hfs_vnop_pagein(struct vnop_pagein_args *ap)
2461 /*
2462         struct vnop_pagein_args {
2463                 vnode_t a_vp,
2464                 upl_t         a_pl,
2465                 vm_offset_t   a_pl_offset,
2466                 off_t         a_f_offset,
2467                 size_t        a_size,
2468                 int           a_flags
2469                 vfs_context_t a_context;
2470         };
2471 */
2472 {
2473         vnode_t vp = ap->a_vp;
2474         int error;
2475
2476         error = cluster_pagein(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2477                                ap->a_size, (off_t)VTOF(vp)->ff_size, ap->a_flags);
2478         /*
2479          * Keep track of blocks read.
2480          */
2481         if (VTOHFS(vp)->hfc_stage == HFC_RECORDING && error == 0) {
2482                 struct cnode *cp;
2483                 struct filefork *fp;
2484                 int bytesread;
2485                 int took_cnode_lock = 0;
2486
2487                 cp = VTOC(vp);
2488                 fp = VTOF(vp);
2489
2490                 if (ap->a_f_offset == 0 && fp->ff_size < PAGE_SIZE)
2491                         bytesread = fp->ff_size;
2492                 else
2493                         bytesread = ap->a_size;
2494
2495                 /* When ff_bytesread exceeds 32-bits, update it behind the cnode lock. */
2496                 if ((fp->ff_bytesread + bytesread) > 0x00000000ffffffff) {
2497                         hfs_lock(cp, HFS_FORCE_LOCK);
2498                         took_cnode_lock = 1;
2499                 }
2500                 /*
2501                  * If this file hasn't been seen since the start of
2502                  * the current sampling period then start over.
2503                  */
2504                 if (cp->c_atime < VTOHFS(vp)->hfc_timebase) {
2505                         struct timeval tv;
2506
2507                         fp->ff_bytesread = bytesread;
2508                         microtime(&tv);
2509                         cp->c_atime = tv.tv_sec;
2510                 } else {
2511                         fp->ff_bytesread += bytesread;
2512                 }
2513                 cp->c_touch_acctime = TRUE;
2514                 if (took_cnode_lock)
2515                         hfs_unlock(cp);
2516         }
2517         return (error);
2518 }
2519
2520 /*
2521  * Pageout for HFS filesystem.
2522  */
2523 int
2524 hfs_vnop_pageout(struct vnop_pageout_args *ap)
2525 /*
2526         struct vnop_pageout_args {
2527            vnode_t a_vp,
2528            upl_t         a_pl,
2529            vm_offset_t   a_pl_offset,
2530            off_t         a_f_offset,
2531            size_t        a_size,
2532            int           a_flags
2533            vfs_context_t a_context;
2534         };
2535 */
2536 {
2537         vnode_t vp = ap->a_vp;
2538         struct cnode *cp;
2539         struct filefork *fp;
2540         int retval;
2541         off_t end_of_range;
2542         off_t filesize;
2543
2544         cp = VTOC(vp);
2545         if (cp->c_lockowner == current_thread()) {
2546                 panic("pageout: %s cnode lock already held!\n",
2547                       cp->c_desc.cd_nameptr ? cp->c_desc.cd_nameptr : "");
2548         }
2549         if ( (retval = hfs_lock(cp, HFS_EXCLUSIVE_LOCK))) {
2550                 if (!(ap->a_flags & UPL_NOCOMMIT)) {
2551                         ubc_upl_abort_range(ap->a_pl,
2552                                             ap->a_pl_offset,
2553                                             ap->a_size,
2554                                             UPL_ABORT_FREE_ON_EMPTY);
2555                 }
2556                 return (retval);
2557         }
2558         fp = VTOF(vp);
2559
2560         filesize = fp->ff_size;
2561         end_of_range = ap->a_f_offset + ap->a_size - 1;
2562
2563         if (end_of_range >= filesize) {
2564                 end_of_range = (off_t)(filesize - 1);
2565         }
2566         if (ap->a_f_offset < filesize) {
2567                 rl_remove(ap->a_f_offset, end_of_range, &fp->ff_invalidranges);
2568                 cp->c_flag |= C_MODIFIED;  /* leof is dirty */
2569         }
2570         hfs_unlock(cp);
2571
2572         retval = cluster_pageout(vp, ap->a_pl, ap->a_pl_offset, ap->a_f_offset,
2573                                  ap->a_size, filesize, ap->a_flags);
2574
2575         /*
2576          * If data was written, and setuid or setgid bits are set and
2577          * this process is not the superuser then clear the setuid and
2578          * setgid bits as a precaution against tampering.
2579          */
2580         if ((retval == 0) &&
2581             (cp->c_mode & (S_ISUID | S_ISGID)) &&
2582             (vfs_context_suser(ap->a_context) != 0)) {
2583                 hfs_lock(cp, HFS_FORCE_LOCK);
2584                 cp->c_mode &= ~(S_ISUID | S_ISGID);
2585                 cp->c_touch_chgtime = TRUE;
2586                 hfs_unlock(cp);
2587         }
2588         return (retval);
2589 }
2590
2591 /*
2592  * Intercept B-Tree node writes to unswap them if necessary.
2593  */
2594 int
2595 hfs_vnop_bwrite(struct vnop_bwrite_args *ap)
2596 {
2597         int retval = 0;
2598         register struct buf *bp = ap->a_bp;
2599         register struct vnode *vp = buf_vnode(bp);
2600         BlockDescriptor block;
2601
2602         /* Trap B-Tree writes */
2603         if ((VTOC(vp)->c_fileid == kHFSExtentsFileID) ||
2604             (VTOC(vp)->c_fileid == kHFSCatalogFileID) ||
2605             (VTOC(vp)->c_fileid == kHFSAttributesFileID) ||
2606             (vp == VTOHFS(vp)->hfc_filevp)) {
2607
2608                 /*
2609                  * Swap and validate the node if it is in native byte order.
2610                  * This is always be true on big endian, so we always validate
2611                  * before writing here.  On little endian, the node typically has
2612                  * been swapped and validatated when it was written to the journal,
2613                  * so we won't do anything here.
2614                  */
2615                 if (((UInt16 *)((char *)buf_dataptr(bp) + buf_count(bp) - 2))[0] == 0x000e) {
2616                         /* Prepare the block pointer */
2617                         block.blockHeader = bp;
2618                         block.buffer = (char *)buf_dataptr(bp);
2619                         block.blockNum = buf_lblkno(bp);
2620                         /* not found in cache ==> came from disk */
2621                         block.blockReadFromDisk = (buf_fromcache(bp) == 0);
2622                         block.blockSize = buf_count(bp);
2623
2624                         /* Endian un-swap B-Tree node */
2625                         retval = hfs_swap_BTNode (&block, vp, kSwapBTNodeHostToBig);
2626                         if (retval)
2627                                 panic("hfs_vnop_bwrite: about to write corrupt node!\n");
2628                 }
2629         }
2630
2631         /* This buffer shouldn't be locked anymore but if it is clear it */
2632         if ((buf_flags(bp) & B_LOCKED)) {
2633                 // XXXdbg
2634                 if (VTOHFS(vp)->jnl) {
2635                         panic("hfs: CLEARING the lock bit on bp 0x%x\n", bp);
2636                 }
2637                 buf_clearflags(bp, B_LOCKED);
2638         }
2639         retval = vn_bwrite (ap);
2640
2641         return (retval);
2642 }
2643
2644 /*
2645  * Relocate a file to a new location on disk
2646  *  cnode must be locked on entry
2647  *
2648  * Relocation occurs by cloning the file's data from its
2649  * current set of blocks to a new set of blocks. During
2650  * the relocation all of the blocks (old and new) are
2651  * owned by the file.
2652  *
2653  * -----------------
2654  * |///////////////|
2655  * -----------------
2656  * 0               N (file offset)
2657  *
2658  * -----------------     -----------------
2659  * |///////////////|     |               |     STEP 1 (aquire new blocks)
2660  * -----------------     -----------------
2661  * 0               N     N+1             2N
2662  *
2663  * -----------------     -----------------
2664  * |///////////////|     |///////////////|     STEP 2 (clone data)
2665  * -----------------     -----------------
2666  * 0               N     N+1             2N
2667  *
2668  *                       -----------------
2669  *                       |///////////////|     STEP 3 (head truncate blocks)
2670  *                       -----------------
2671  *                       0               N
2672  *
2673  * During steps 2 and 3 page-outs to file offsets less
2674  * than or equal to N are suspended.
2675  *
2676  * During step 3 page-ins to the file get supended.
2677  */
2678 __private_extern__
2679 int
2680 hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
2681         struct  proc *p)
2682 {
2683         struct  cnode *cp;
2684         struct  filefork *fp;
2685         struct  hfsmount *hfsmp;
2686         u_int32_t  headblks;
2687         u_int32_t  datablks;
2688         u_int32_t  blksize;
2689         u_int32_t  growsize;
2690         u_int32_t  nextallocsave;
2691         daddr64_t  sector_a,  sector_b;
2692         int disabled_caching = 0;
2693         int eflags;
2694         off_t  newbytes;
2695         int  retval;
2696         int lockflags = 0;
2697         int took_trunc_lock = 0;
2698         int started_tr = 0;
2699         enum vtype vnodetype;
2700
2701         vnodetype = vnode_vtype(vp);
2702         if (vnodetype != VREG && vnodetype != VLNK) {
2703                 return (EPERM);
2704         }
2705
2706         hfsmp = VTOHFS(vp);
2707         if (hfsmp->hfs_flags & HFS_FRAGMENTED_FREESPACE) {
2708                 return (ENOSPC);
2709         }
2710
2711         cp = VTOC(vp);
2712         fp = VTOF(vp);
2713         if (fp->ff_unallocblocks)
2714                 return (EINVAL);
2715         blksize = hfsmp->blockSize;
2716         if (blockHint == 0)
2717                 blockHint = hfsmp->nextAllocation;
2718
2719         if ((fp->ff_size > (u_int64_t)0x7fffffff) ||
2720             ((fp->ff_size > blksize) && vnodetype == VLNK)) {
2721                 return (EFBIG);
2722         }
2723
2724         //
2725         // We do not believe that this call to hfs_fsync() is
2726         // necessary and it causes a journal transaction
2727         // deadlock so we are removing it.
2728         //
2729         //if (vnodetype == VREG && !vnode_issystem(vp)) {
2730         //      retval = hfs_fsync(vp, MNT_WAIT, 0, p);
2731         //      if (retval)
2732         //              return (retval);
2733         //}
2734
2735         if (!vnode_issystem(vp) && (vnodetype != VLNK)) {
2736                 hfs_unlock(cp);
2737                 hfs_lock_truncate(cp, TRUE);
2738                 if ((retval = hfs_lock(VTOC(vp), HFS_EXCLUSIVE_LOCK))) {
2739                         hfs_unlock_truncate(cp);
2740                         return (retval);
2741                 }
2742                 took_trunc_lock = 1;
2743         }
2744         headblks = fp->ff_blocks;
2745         datablks = howmany(fp->ff_size, blksize);
2746         growsize = datablks * blksize;
2747         eflags = kEFContigMask | kEFAllMask | kEFNoClumpMask;
2748         if (blockHint >= hfsmp->hfs_metazone_start &&
2749             blockHint <= hfsmp->hfs_metazone_end)
2750                 eflags |= kEFMetadataMask;
2751
2752         if (hfs_start_transaction(hfsmp) != 0) {
2753                 if (took_trunc_lock)
2754                         hfs_unlock_truncate(cp);
2755             return (EINVAL);
2756         }
2757         started_tr = 1;
2758         /*
2759          * Protect the extents b-tree and the allocation bitmap
2760          * during MapFileBlockC and ExtendFileC operations.
2761          */
2762         lockflags = SFL_BITMAP;
2763         if (overflow_extents(fp))
2764                 lockflags |= SFL_EXTENTS;
2765         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2766
2767         retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize - 1, &sector_a, NULL);
2768         if (retval) {
2769                 retval = MacToVFSError(retval);
2770                 goto out;
2771         }
2772
2773         /*
2774          * STEP 1 - aquire new allocation blocks.
2775          */
2776         if (!vnode_isnocache(vp)) {
2777                 vnode_setnocache(vp);
2778                 disabled_caching = 1;
2779
2780         }
2781         nextallocsave = hfsmp->nextAllocation;
2782         retval = ExtendFileC(hfsmp, (FCB*)fp, growsize, blockHint, eflags, &newbytes);
2783         if (eflags & kEFMetadataMask) {
2784                 HFS_MOUNT_LOCK(hfsmp, TRUE);
2785                 hfsmp->nextAllocation = nextallocsave;
2786                 hfsmp->vcbFlags |= 0xFF00;
2787                 HFS_MOUNT_UNLOCK(hfsmp, TRUE);
2788         }
2789
2790         retval = MacToVFSError(retval);
2791         if (retval == 0) {
2792                 cp->c_flag |= C_MODIFIED;
2793                 if (newbytes < growsize) {
2794                         retval = ENOSPC;
2795                         goto restore;
2796                 } else if (fp->ff_blocks < (headblks + datablks)) {
2797                         printf("hfs_relocate: allocation failed");
2798                         retval = ENOSPC;
2799                         goto restore;
2800                 }
2801
2802                 retval = MapFileBlockC(hfsmp, (FCB *)fp, 1, growsize, &sector_b, NULL);
2803                 if (retval) {
2804                         retval = MacToVFSError(retval);
2805                 } else if ((sector_a + 1) == sector_b) {
2806                         retval = ENOSPC;
2807                         goto restore;
2808                 } else if ((eflags & kEFMetadataMask) &&
2809                            ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
2810                               hfsmp->hfs_metazone_end)) {
2811                         printf("hfs_relocate: didn't move into metadata zone\n");
2812                         retval = ENOSPC;
2813                         goto restore;
2814                 }
2815         }
2816         /* Done with system locks and journal for now. */
2817         hfs_systemfile_unlock(hfsmp, lockflags);
2818         lockflags = 0;
2819         hfs_end_transaction(hfsmp);
2820         started_tr = 0;
2821
2822         if (retval) {
2823                 /*
2824                  * Check to see if failure is due to excessive fragmentation.
2825                  */
2826                 if ((retval == ENOSPC) &&
2827                     (hfs_freeblks(hfsmp, 0) > (datablks * 2))) {
2828                         hfsmp->hfs_flags |= HFS_FRAGMENTED_FREESPACE;
2829                 }
2830                 goto out;
2831         }
2832         /*
2833          * STEP 2 - clone file data into the new allocation blocks.
2834          */
2835
2836         if (vnodetype == VLNK)
2837                 retval = hfs_clonelink(vp, blksize, cred, p);
2838         else if (vnode_issystem(vp))
2839                 retval = hfs_clonesysfile(vp, headblks, datablks, blksize, cred, p);
2840         else
2841                 retval = hfs_clonefile(vp, headblks, datablks, blksize);
2842
2843         /* Start transaction for step 3 or for a restore. */
2844         if (hfs_start_transaction(hfsmp) != 0) {
2845                 retval = EINVAL;
2846                 goto out;
2847         }
2848         started_tr = 1;
2849         if (retval)
2850                 goto restore;
2851
2852         /*
2853          * STEP 3 - switch to cloned data and remove old blocks.
2854          */
2855         lockflags = SFL_BITMAP;
2856         if (overflow_extents(fp))
2857                 lockflags |= SFL_EXTENTS;
2858         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2859
2860         retval = HeadTruncateFile(hfsmp, (FCB*)fp, headblks);
2861
2862         hfs_systemfile_unlock(hfsmp, lockflags);
2863         lockflags = 0;
2864         if (retval)
2865                 goto restore;
2866 out:
2867         if (took_trunc_lock)
2868                 hfs_unlock_truncate(cp);
2869
2870         if (lockflags) {
2871                 hfs_systemfile_unlock(hfsmp, lockflags);
2872                 lockflags = 0;
2873         }
2874
2875         /* Push cnode's new extent data to disk. */
2876         if (retval == 0) {
2877                 (void) hfs_update(vp, MNT_WAIT);
2878         }
2879
2880         if (hfsmp->jnl) {
2881                 if (cp->c_cnid < kHFSFirstUserCatalogNodeID)
2882                         (void) hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2883                 else
2884                         (void) hfs_flushvolumeheader(hfsmp, MNT_NOWAIT, 0);
2885         }
2886 exit:
2887         if (disabled_caching) {
2888                 vnode_clearnocache(vp);
2889         }
2890         if (started_tr)
2891                 hfs_end_transaction(hfsmp);
2892
2893         return (retval);
2894
2895 restore:
2896         if (fp->ff_blocks == headblks)
2897                 goto exit;
2898         /*
2899          * Give back any newly allocated space.
2900          */
2901         if (lockflags == 0) {
2902                 lockflags = SFL_BITMAP;
2903                 if (overflow_extents(fp))
2904                         lockflags |= SFL_EXTENTS;
2905                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2906         }
2907
2908         (void) TruncateFileC(hfsmp, (FCB*)fp, fp->ff_size, false);
2909
2910         hfs_systemfile_unlock(hfsmp, lockflags);
2911         lockflags = 0;
2912
2913         if (took_trunc_lock)
2914                 hfs_unlock_truncate(cp);
2915         goto exit;
2916 }
2917
2918
2919 /*
2920  * Clone a symlink.
2921  *
2922  */
2923 static int
2924 hfs_clonelink(struct vnode *vp, int blksize, kauth_cred_t cred, struct proc *p)
2925 {
2926         struct buf *head_bp = NULL;
2927         struct buf *tail_bp = NULL;
2928         int error;
2929
2930
2931         error = (int)buf_meta_bread(vp, (daddr64_t)0, blksize, cred, &head_bp);
2932         if (error)
2933                 goto out;
2934
2935         tail_bp = buf_getblk(vp, (daddr64_t)1, blksize, 0, 0, BLK_META);
2936         if (tail_bp == NULL) {
2937                 error = EIO;
2938                 goto out;
2939         }
2940         bcopy((char *)buf_dataptr(head_bp), (char *)buf_dataptr(tail_bp), blksize);
2941         error = (int)buf_bwrite(tail_bp);
2942 out:
2943         if (head_bp) {
2944                 buf_markinvalid(head_bp);
2945                 buf_brelse(head_bp);
2946         }
2947         (void) buf_invalidateblks(vp, BUF_WRITE_DATA, 0, 0);
2948
2949         return (error);
2950 }
2951
2952 /*
2953  * Clone a file's data within the file.
2954  *
2955  */
2956 static int
2957 hfs_clonefile(struct vnode *vp, int blkstart, int blkcnt, int blksize)
2958 {
2959         caddr_t  bufp;
2960         size_t  writebase;
2961         size_t  bufsize;
2962         size_t  copysize;
2963         size_t  iosize;
2964         off_t   filesize;
2965         size_t  offset;
2966         uio_t auio;
2967         int  error = 0;
2968
2969         filesize = VTOF(vp)->ff_blocks * blksize;  /* virtual file size */
2970         writebase = blkstart * blksize;
2971         copysize = blkcnt * blksize;
2972         iosize = bufsize = MIN(copysize, 128 * 1024);
2973         offset = 0;
2974
2975         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
2976                 return (ENOMEM);
2977         }
2978         hfs_unlock(VTOC(vp));
2979
2980         auio = uio_create(1, 0, UIO_SYSSPACE32, UIO_READ);
2981
2982         while (offset < copysize) {
2983                 iosize = MIN(copysize - offset, iosize);
2984
2985                 uio_reset(auio, offset, UIO_SYSSPACE32, UIO_READ);
2986                 uio_addiov(auio, (uintptr_t)bufp, iosize);
2987
2988                 error = cluster_read(vp, auio, copysize, 0);
2989                 if (error) {
2990                         printf("hfs_clonefile: cluster_read failed - %d\n", error);
2991                         break;
2992                 }
2993                 if (uio_resid(auio) != 0) {
2994                         printf("clonedata: cluster_read: uio_resid = %lld\n", uio_resid(auio));
2995                         error = EIO;
2996                         break;
2997                 }
2998
2999                 uio_reset(auio, writebase + offset, UIO_SYSSPACE32, UIO_WRITE);
3000                 uio_addiov(auio, (uintptr_t)bufp, iosize);
3001
3002                 error = cluster_write(vp, auio, filesize + offset,
3003                                       filesize + offset + iosize,
3004                                       uio_offset(auio), 0, IO_NOCACHE | IO_SYNC);
3005                 if (error) {
3006                         printf("hfs_clonefile: cluster_write failed - %d\n", error);
3007                         break;
3008                 }
3009                 if (uio_resid(auio) != 0) {
3010                         printf("hfs_clonefile: cluster_write failed - uio_resid not zero\n");
3011                         error = EIO;
3012                         break;
3013                 }
3014                 offset += iosize;
3015         }
3016         uio_free(auio);
3017
3018         /*
3019          * No need to call ubc_sync_range or hfs_invalbuf
3020          * since the file was copied using IO_NOCACHE.
3021          */
3022
3023         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3024
3025         hfs_lock(VTOC(vp), HFS_FORCE_LOCK);
3026         return (error);
3027 }
3028
3029 /*
3030  * Clone a system (metadata) file.
3031  *
3032  */
3033 static int
3034 hfs_clonesysfile(struct vnode *vp, int blkstart, int blkcnt, int blksize,
3035                  kauth_cred_t cred, struct proc *p)
3036 {
3037         caddr_t  bufp;
3038         char * offset;
3039         size_t  bufsize;
3040         size_t  iosize;
3041         struct buf *bp = NULL;
3042         daddr64_t  blkno;
3043         daddr64_t  blk;
3044         daddr64_t  start_blk;
3045         daddr64_t  last_blk;
3046         int  breadcnt;
3047         int  i;
3048         int  error = 0;
3049
3050
3051         iosize = GetLogicalBlockSize(vp);
3052         bufsize = MIN(blkcnt * blksize, 1024 * 1024) & ~(iosize - 1);
3053         breadcnt = bufsize / iosize;
3054
3055         if (kmem_alloc(kernel_map, (vm_offset_t *)&bufp, bufsize)) {
3056                 return (ENOMEM);
3057         }
3058         start_blk = ((daddr64_t)blkstart * blksize) / iosize;
3059         last_blk  = ((daddr64_t)blkcnt * blksize) / iosize;
3060         blkno = 0;
3061
3062         while (blkno < last_blk) {
3063                 /*
3064                  * Read up to a megabyte
3065                  */
3066                 offset = bufp;
3067                 for (i = 0, blk = blkno; (i < breadcnt) && (blk < last_blk); ++i, ++blk) {
3068                         error = (int)buf_meta_bread(vp, blk, iosize, cred, &bp);
3069                         if (error) {
3070                                 printf("hfs_clonesysfile: meta_bread error %d\n", error);
3071                                 goto out;
3072                         }
3073                         if (buf_count(bp) != iosize) {
3074                                 printf("hfs_clonesysfile: b_bcount is only %d\n", buf_count(bp));
3075                                 goto out;
3076                         }
3077                         bcopy((char *)buf_dataptr(bp), offset, iosize);
3078
3079                         buf_markinvalid(bp);
3080                         buf_brelse(bp);
3081                         bp = NULL;
3082
3083                         offset += iosize;
3084                 }
3085
3086                 /*
3087                  * Write up to a megabyte
3088                  */
3089                 offset = bufp;
3090                 for (i = 0; (i < breadcnt) && (blkno < last_blk); ++i, ++blkno) {
3091                         bp = buf_getblk(vp, start_blk + blkno, iosize, 0, 0, BLK_META);
3092                         if (bp == NULL) {
3093                                 printf("hfs_clonesysfile: getblk failed on blk %qd\n", start_blk + blkno);
3094                                 error = EIO;
3095                                 goto out;
3096                         }
3097                         bcopy(offset, (char *)buf_dataptr(bp), iosize);
3098                         error = (int)buf_bwrite(bp);
3099                         bp = NULL;
3100                         if (error)
3101                                 goto out;
3102                         offset += iosize;
3103                 }
3104         }
3105 out:
3106         if (bp) {
3107                 buf_brelse(bp);
3108         }
3109
3110         kmem_free(kernel_map, (vm_offset_t)bufp, bufsize);
3111
3112         error = hfs_fsync(vp, MNT_WAIT, 0, p);
3113
3114         return (error);
3115 }