bsd/hfs/hfs_resize.c

   1 /*
   2  * Copyright (c) 2013-2014 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28 #include <sys/systm.h>
  29 #include <sys/kauth.h>
  30 #include <sys/ubc.h>
  31 #include <sys/vnode_internal.h>
  32 #include <sys/mount_internal.h>
  33 #include <sys/buf_internal.h>
  34 #include <vfs/vfs_journal.h>
  35 #include <miscfs/specfs/specdev.h>
  36
  37 #include "hfs.h"
  38 #include "hfs_catalog.h"
  39 #include "hfs_cnode.h"
  40 #include "hfs_endian.h"
  41 #include "hfs_btreeio.h"
  42
  43 #if CONFIG_PROTECT
  44 #include <sys/cprotect.h>
  45 #endif
  46
  47 /* Enable/disable debugging code for live volume resizing */
  48 int hfs_resize_debug = 0;
  49
  50 static int hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec);
  51 static int hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context);
  52 static int hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context);
  53
  54 /*
  55  * Extend a file system.
  56  */
  57 int
  58 hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
  59 {
  60         struct proc *p = vfs_context_proc(context);
  61         kauth_cred_t cred = vfs_context_ucred(context);
  62         struct  vnode *vp;
  63         struct  vnode *devvp;
  64         struct  buf *bp;
  65         struct  filefork *fp = NULL;
  66         ExtendedVCB  *vcb;
  67         struct  cat_fork forkdata;
  68         u_int64_t  oldsize;
  69         u_int64_t  newblkcnt;
  70         u_int64_t  prev_phys_block_count;
  71         u_int32_t  addblks;
  72         u_int64_t  sector_count;
  73         u_int32_t  sector_size;
  74         u_int32_t  phys_sector_size;
  75         u_int32_t  overage_blocks;
  76         daddr64_t  prev_fs_alt_sector;
  77         daddr_t    bitmapblks;
  78         int  lockflags = 0;
  79         int  error;
  80         int64_t oldBitmapSize;
  81
  82         Boolean  usedExtendFileC = false;
  83         int transaction_begun = 0;
  84
  85         devvp = hfsmp->hfs_devvp;
  86         vcb = HFSTOVCB(hfsmp);
  87
  88         /*
  89          * - HFS Plus file systems only.
  90          * - Journaling must be enabled.
  91          * - No embedded volumes.
  92          */
  93         if ((vcb->vcbSigWord == kHFSSigWord) ||
  94         (hfsmp->jnl == NULL) ||
  95         (vcb->hfsPlusIOPosOffset != 0)) {
  96                 return (EPERM);
  97         }
  98         /*
  99          * If extending file system by non-root, then verify
 100          * ownership and check permissions.
 101          */
 102         if (suser(cred, NULL)) {
 103                 error = hfs_vget(hfsmp, kHFSRootFolderID, &vp, 0, 0);
 104
 105                 if (error)
 106                         return (error);
 107                 error = hfs_owner_rights(hfsmp, VTOC(vp)->c_uid, cred, p, 0);
 108                 if (error == 0) {
 109                         error = hfs_write_access(vp, cred, p, false);
 110                 }
 111                 hfs_unlock(VTOC(vp));
 112                 vnode_put(vp);
 113                 if (error)
 114                         return (error);
 115
 116                 error = vnode_authorize(devvp, NULL, KAUTH_VNODE_READ_DATA | KAUTH_VNODE_WRITE_DATA, context);
 117                 if (error)
 118                         return (error);
 119         }
 120         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sector_size, 0, context)) {
 121                 return (ENXIO);
 122         }
 123         if (sector_size != hfsmp->hfs_logical_block_size) {
 124                 return (ENXIO);
 125         }
 126         if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sector_count, 0, context)) {
 127                 return (ENXIO);
 128         }
 129         /* Check if partition size is correct for new file system size */
 130         if ((sector_size * sector_count) < newsize) {
 131                 printf("hfs_extendfs: not enough space on device (vol=%s)\n", hfsmp->vcbVN);
 132                 return (ENOSPC);
 133         }
 134         error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sector_size, 0, context);
 135         if (error) {
 136                 if ((error != ENOTSUP) && (error != ENOTTY)) {
 137                         return (ENXIO);
 138                 }
 139                 /* If ioctl is not supported, force physical and logical sector size to be same */
 140                 phys_sector_size = sector_size;
 141         }
 142         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 143
 144         /*
 145          * Validate new size.
 146          */
 147         if ((newsize <= oldsize) || (newsize % sector_size) || (newsize % phys_sector_size)) {
 148                 printf("hfs_extendfs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
 149                 return (EINVAL);
 150         }
 151         newblkcnt = newsize / vcb->blockSize;
 152         if (newblkcnt > (u_int64_t)0xFFFFFFFF) {
 153                 printf ("hfs_extendfs: current blockSize=%u too small for newsize=%qu\n", hfsmp->blockSize, newsize);
 154                 return (EOVERFLOW);
 155         }
 156
 157         addblks = newblkcnt - vcb->totalBlocks;
 158
 159         if (hfs_resize_debug) {
 160                 printf ("hfs_extendfs: old: size=%qu, blkcnt=%u\n", oldsize, hfsmp->totalBlocks);
 161                 printf ("hfs_extendfs: new: size=%qu, blkcnt=%u, addblks=%u\n", newsize, (u_int32_t)newblkcnt, addblks);
 162         }
 163         printf("hfs_extendfs: will extend \"%s\" by %d blocks\n", vcb->vcbVN, addblks);
 164
 165         hfs_lock_mount (hfsmp);
 166         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
 167                 hfs_unlock_mount(hfsmp);
 168                 error = EALREADY;
 169                 goto out;
 170         }
 171         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
 172         hfs_unlock_mount (hfsmp);
 173
 174         /* Start with a clean journal. */
 175         hfs_journal_flush(hfsmp, TRUE);
 176
 177         /*
 178          * Enclose changes inside a transaction.
 179          */
 180         if (hfs_start_transaction(hfsmp) != 0) {
 181                 error = EINVAL;
 182                 goto out;
 183         }
 184         transaction_begun = 1;
 185
 186
 187         /* Update the hfsmp fields for the physical information about the device */
 188         prev_phys_block_count = hfsmp->hfs_logical_block_count;
 189         prev_fs_alt_sector = hfsmp->hfs_fs_avh_sector;
 190
 191         hfsmp->hfs_logical_block_count = sector_count;
 192         hfsmp->hfs_logical_bytes = (uint64_t) sector_count * (uint64_t) sector_size;
 193
 194         /*
 195          * It is possible that the new file system is smaller than the partition size.
 196          * Therefore, update offsets for AVH accordingly.
 197          */
 198         if (hfs_resize_debug) {
 199                 printf ("hfs_extendfs: old: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 200                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 201         }
 202         hfsmp->hfs_partition_avh_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) +
 203                 HFS_ALT_SECTOR(sector_size, hfsmp->hfs_logical_block_count);
 204
 205         hfsmp->hfs_fs_avh_sector = (hfsmp->hfsPlusIOPosOffset / sector_size) +
 206                 HFS_ALT_SECTOR(sector_size, (newsize/hfsmp->hfs_logical_block_size));
 207         if (hfs_resize_debug) {
 208                 printf ("hfs_extendfs: new: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 209                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 210         }
 211
 212         /*
 213          * Note: we take the attributes lock in case we have an attribute data vnode
 214          * which needs to change size.
 215          */
 216         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
 217         vp = vcb->allocationsRefNum;
 218         fp = VTOF(vp);
 219         bcopy(&fp->ff_data, &forkdata, sizeof(forkdata));
 220
 221         /*
 222          * Calculate additional space required (if any) by allocation bitmap.
 223          */
 224         oldBitmapSize = fp->ff_size;
 225         bitmapblks = roundup((newblkcnt+7) / 8, vcb->vcbVBMIOSize) / vcb->blockSize;
 226         if (bitmapblks > (daddr_t)fp->ff_blocks)
 227                 bitmapblks -= fp->ff_blocks;
 228         else
 229                 bitmapblks = 0;
 230
 231         /*
 232          * The allocation bitmap can contain unused bits that are beyond end of
 233          * current volume's allocation blocks.  Usually they are supposed to be
 234          * zero'ed out but there can be cases where they might be marked as used.
 235          * After extending the file system, those bits can represent valid
 236          * allocation blocks, so we mark all the bits from the end of current
 237          * volume to end of allocation bitmap as "free".
 238          *
 239          * Figure out the number of overage blocks before proceeding though,
 240          * so we don't add more bytes to our I/O than necessary.
 241          * First figure out the total number of blocks representable by the
 242          * end of the bitmap file vs. the total number of blocks in the new FS.
 243          * Then subtract away the number of blocks in the current FS.  This is how much
 244          * we can mark as free right now without having to grow the bitmap file.
 245          */
 246         overage_blocks = fp->ff_blocks * vcb->blockSize * 8;
 247         overage_blocks = MIN (overage_blocks, newblkcnt);
 248         overage_blocks -= vcb->totalBlocks;
 249
 250         BlockMarkFreeUnused(vcb, vcb->totalBlocks, overage_blocks);
 251
 252         if (bitmapblks > 0) {
 253                 daddr64_t blkno;
 254                 daddr_t blkcnt;
 255                 off_t bytesAdded;
 256
 257                 /*
 258                  * Get the bitmap's current size (in allocation blocks) so we know
 259                  * where to start zero filling once the new space is added.  We've
 260                  * got to do this before the bitmap is grown.
 261                  */
 262                 blkno  = (daddr64_t)fp->ff_blocks;
 263
 264                 /*
 265                  * Try to grow the allocation file in the normal way, using allocation
 266                  * blocks already existing in the file system.  This way, we might be
 267                  * able to grow the bitmap contiguously, or at least in the metadata
 268                  * zone.
 269                  */
 270                 error = ExtendFileC(vcb, fp, bitmapblks * vcb->blockSize, 0,
 271                             kEFAllMask | kEFNoClumpMask | kEFReserveMask
 272                             | kEFMetadataMask | kEFContigMask, &bytesAdded);
 273
 274                 if (error == 0) {
 275                         usedExtendFileC = true;
 276                 } else {
 277                         /*
 278                          * If the above allocation failed, fall back to allocating the new
 279                          * extent of the bitmap from the space we're going to add.  Since those
 280                          * blocks don't yet belong to the file system, we have to update the
 281                          * extent list directly, and manually adjust the file size.
 282                          */
 283                         bytesAdded = 0;
 284                         error = AddFileExtent(vcb, fp, vcb->totalBlocks, bitmapblks);
 285                         if (error) {
 286                                 printf("hfs_extendfs: error %d adding extents\n", error);
 287                                 goto out;
 288                         }
 289                         fp->ff_blocks += bitmapblks;
 290                         VTOC(vp)->c_blocks = fp->ff_blocks;
 291                         VTOC(vp)->c_flag |= C_MODIFIED;
 292                 }
 293
 294                 /*
 295                  * Update the allocation file's size to include the newly allocated
 296                  * blocks.  Note that ExtendFileC doesn't do this, which is why this
 297                  * statement is outside the above "if" statement.
 298                  */
 299                 fp->ff_size += (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
 300
 301                 /*
 302                  * Zero out the new bitmap blocks.
 303                  */
 304                 {
 305
 306                         bp = NULL;
 307                         blkcnt = bitmapblks;
 308                         while (blkcnt > 0) {
 309                                 error = (int)buf_meta_bread(vp, blkno, vcb->blockSize, NOCRED, &bp);
 310                                 if (error) {
 311                                         if (bp) {
 312                                                 buf_brelse(bp);
 313                                         }
 314                                         break;
 315                                 }
 316                                 bzero((char *)buf_dataptr(bp), vcb->blockSize);
 317                                 buf_markaged(bp);
 318                                 error = (int)buf_bwrite(bp);
 319                                 if (error)
 320                                         break;
 321                                 --blkcnt;
 322                                 ++blkno;
 323                         }
 324                 }
 325                 if (error) {
 326                         printf("hfs_extendfs: error %d clearing blocks\n", error);
 327                         goto out;
 328                 }
 329                 /*
 330                  * Mark the new bitmap space as allocated.
 331                  *
 332                  * Note that ExtendFileC will have marked any blocks it allocated, so
 333                  * this is only needed if we used AddFileExtent.  Also note that this
 334                  * has to come *after* the zero filling of new blocks in the case where
 335                  * we used AddFileExtent (since the part of the bitmap we're touching
 336                  * is in those newly allocated blocks).
 337                  */
 338                 if (!usedExtendFileC) {
 339                         error = BlockMarkAllocated(vcb, vcb->totalBlocks, bitmapblks);
 340                         if (error) {
 341                                 printf("hfs_extendfs: error %d setting bitmap\n", error);
 342                                 goto out;
 343                         }
 344                         vcb->freeBlocks -= bitmapblks;
 345                 }
 346         }
 347
 348         /*
 349          * Mark the new alternate VH as allocated.
 350          */
 351         if (vcb->blockSize == 512)
 352                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 2, 2);
 353         else
 354                 error = BlockMarkAllocated(vcb, vcb->totalBlocks + addblks - 1, 1);
 355         if (error) {
 356                 printf("hfs_extendfs: error %d setting bitmap (VH)\n", error);
 357                 goto out;
 358         }
 359
 360         /*
 361          * Mark the old alternate VH as free.
 362          */
 363         if (vcb->blockSize == 512)
 364                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 2, 2);
 365         else
 366                 (void) BlockMarkFree(vcb, vcb->totalBlocks - 1, 1);
 367
 368         /*
 369          * Adjust file system variables for new space.
 370          */
 371         vcb->totalBlocks += addblks;
 372         vcb->freeBlocks += addblks;
 373         MarkVCBDirty(vcb);
 374         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
 375         if (error) {
 376                 printf("hfs_extendfs: couldn't flush volume headers (%d)", error);
 377                 /*
 378                  * Restore to old state.
 379                  */
 380                 if (usedExtendFileC) {
 381                         (void) TruncateFileC(vcb, fp, oldBitmapSize, 0, FORK_IS_RSRC(fp),
 382                                                                  FTOC(fp)->c_fileid, false);
 383                 } else {
 384                         fp->ff_blocks -= bitmapblks;
 385                         fp->ff_size -= (u_int64_t)bitmapblks * (u_int64_t)vcb->blockSize;
 386                         /*
 387                          * No need to mark the excess blocks free since those bitmap blocks
 388                          * are no longer part of the bitmap.  But we do need to undo the
 389                          * effect of the "vcb->freeBlocks -= bitmapblks" above.
 390                          */
 391                         vcb->freeBlocks += bitmapblks;
 392                 }
 393                 vcb->totalBlocks -= addblks;
 394                 vcb->freeBlocks -= addblks;
 395                 hfsmp->hfs_logical_block_count = prev_phys_block_count;
 396                 hfsmp->hfs_fs_avh_sector = prev_fs_alt_sector;
 397                 /* Do not revert hfs_partition_avh_sector because the
 398                  * partition size is larger than file system size
 399                  */
 400                 MarkVCBDirty(vcb);
 401                 if (vcb->blockSize == 512) {
 402                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 2, 2)) {
 403                                 hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
 404                         }
 405                 } else {
 406                         if (BlockMarkAllocated(vcb, vcb->totalBlocks - 1, 1)) {
 407                                 hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
 408                         }
 409                 }
 410                 goto out;
 411         }
 412         /*
 413          * Invalidate the old alternate volume header.  We are growing the filesystem so
 414          * this sector must be returned to the FS as free space.
 415          */
 416         bp = NULL;
 417         if (prev_fs_alt_sector) {
 418                 if (buf_meta_bread(hfsmp->hfs_devvp,
 419                            HFS_PHYSBLK_ROUNDDOWN(prev_fs_alt_sector, hfsmp->hfs_log_per_phys),
 420                            hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
 421                         journal_modify_block_start(hfsmp->jnl, bp);
 422
 423                         bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
 424
 425                         journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
 426                 } else if (bp) {
 427                         buf_brelse(bp);
 428                 }
 429         }
 430
 431         /*
 432          * Update the metadata zone size based on current volume size
 433          */
 434         hfs_metadatazone_init(hfsmp, false);
 435
 436         /*
 437          * Adjust the size of hfsmp->hfs_attrdata_vp
 438          */
 439         if (hfsmp->hfs_attrdata_vp) {
 440                 struct cnode *attr_cp;
 441                 struct filefork *attr_fp;
 442
 443                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
 444                         attr_cp = VTOC(hfsmp->hfs_attrdata_vp);
 445                         attr_fp = VTOF(hfsmp->hfs_attrdata_vp);
 446
 447                         attr_cp->c_blocks = newblkcnt;
 448                         attr_fp->ff_blocks = newblkcnt;
 449                         attr_fp->ff_extents[0].blockCount = newblkcnt;
 450                         attr_fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
 451                         ubc_setsize(hfsmp->hfs_attrdata_vp, attr_fp->ff_size);
 452                         vnode_put(hfsmp->hfs_attrdata_vp);
 453                 }
 454         }
 455
 456         /*
 457          * We only update hfsmp->allocLimit if totalBlocks actually increased.
 458          */
 459         if (error == 0) {
 460                 UpdateAllocLimit(hfsmp, hfsmp->totalBlocks);
 461         }
 462
 463         /* Release all locks and sync up journal content before
 464          * checking and extending, if required, the journal
 465          */
 466         if (lockflags) {
 467                 hfs_systemfile_unlock(hfsmp, lockflags);
 468                 lockflags = 0;
 469         }
 470         if (transaction_begun) {
 471                 hfs_end_transaction(hfsmp);
 472                 hfs_journal_flush(hfsmp, TRUE);
 473                 transaction_begun = 0;
 474         }
 475
 476         /* Increase the journal size, if required. */
 477         error = hfs_extend_journal(hfsmp, sector_size, sector_count, context);
 478         if (error) {
 479                 printf ("hfs_extendfs: Could not extend journal size\n");
 480                 goto out_noalloc;
 481         }
 482
 483         /* Log successful extending */
 484         printf("hfs_extendfs: extended \"%s\" to %d blocks (was %d blocks)\n",
 485                hfsmp->vcbVN, hfsmp->totalBlocks, (u_int32_t)(oldsize/hfsmp->blockSize));
 486
 487 out:
 488         if (error && fp) {
 489                 /* Restore allocation fork. */
 490                 bcopy(&forkdata, &fp->ff_data, sizeof(forkdata));
 491                 VTOC(vp)->c_blocks = fp->ff_blocks;
 492
 493         }
 494
 495 out_noalloc:
 496         hfs_lock_mount (hfsmp);
 497         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
 498         hfs_unlock_mount (hfsmp);
 499         if (lockflags) {
 500                 hfs_systemfile_unlock(hfsmp, lockflags);
 501         }
 502         if (transaction_begun) {
 503                 hfs_end_transaction(hfsmp);
 504                 hfs_journal_flush(hfsmp, FALSE);
 505                 /* Just to be sure, sync all data to the disk */
 506                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
 507         }
 508         if (error) {
 509                 printf ("hfs_extentfs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
 510         }
 511
 512         return MacToVFSError(error);
 513 }
 514
 515 #define HFS_MIN_SIZE  (32LL * 1024LL * 1024LL)
 516
 517 /*
 518  * Truncate a file system (while still mounted).
 519  */
 520 int
 521 hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
 522 {
 523         u_int64_t oldsize;
 524         u_int32_t newblkcnt;
 525         u_int32_t reclaimblks = 0;
 526         int lockflags = 0;
 527         int transaction_begun = 0;
 528         Boolean updateFreeBlocks = false;
 529         Boolean disable_sparse = false;
 530         int error = 0;
 531
 532         hfs_lock_mount (hfsmp);
 533         if (hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) {
 534                 hfs_unlock_mount (hfsmp);
 535                 return (EALREADY);
 536         }
 537         hfsmp->hfs_flags |= HFS_RESIZE_IN_PROGRESS;
 538         hfsmp->hfs_resize_blocksmoved = 0;
 539         hfsmp->hfs_resize_totalblocks = 0;
 540         hfsmp->hfs_resize_progress = 0;
 541         hfs_unlock_mount (hfsmp);
 542
 543         /*
 544          * - Journaled HFS Plus volumes only.
 545          * - No embedded volumes.
 546          */
 547         if ((hfsmp->jnl == NULL) ||
 548             (hfsmp->hfsPlusIOPosOffset != 0)) {
 549                 error = EPERM;
 550                 goto out;
 551         }
 552         oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 553         newblkcnt = newsize / hfsmp->blockSize;
 554         reclaimblks = hfsmp->totalBlocks - newblkcnt;
 555
 556         if (hfs_resize_debug) {
 557                 printf ("hfs_truncatefs: old: size=%qu, blkcnt=%u, freeblks=%u\n", oldsize, hfsmp->totalBlocks, hfs_freeblks(hfsmp, 1));
 558                 printf ("hfs_truncatefs: new: size=%qu, blkcnt=%u, reclaimblks=%u\n", newsize, newblkcnt, reclaimblks);
 559         }
 560
 561         /* Make sure new size is valid. */
 562         if ((newsize < HFS_MIN_SIZE) ||
 563             (newsize >= oldsize) ||
 564             (newsize % hfsmp->hfs_logical_block_size) ||
 565             (newsize % hfsmp->hfs_physical_block_size)) {
 566                 printf ("hfs_truncatefs: invalid size (newsize=%qu, oldsize=%qu)\n", newsize, oldsize);
 567                 error = EINVAL;
 568                 goto out;
 569         }
 570
 571         /*
 572          * Make sure that the file system has enough free blocks reclaim.
 573          *
 574          * Before resize, the disk is divided into four zones -
 575          *      A. Allocated_Stationary - These are allocated blocks that exist
 576          *         before the new end of disk.  These blocks will not be
 577          *         relocated or modified during resize.
 578          *      B. Free_Stationary - These are free blocks that exist before the
 579          *         new end of disk.  These blocks can be used for any new
 580          *         allocations during resize, including allocation for relocating
 581          *         data from the area of disk being reclaimed.
 582          *      C. Allocated_To-Reclaim - These are allocated blocks that exist
 583          *         beyond the new end of disk.  These blocks need to be reclaimed
 584          *         during resize by allocating equal number of blocks in Free
 585          *         Stationary zone and copying the data.
 586          *      D. Free_To-Reclaim - These are free blocks that exist beyond the
 587          *         new end of disk.  Nothing special needs to be done to reclaim
 588          *         them.
 589          *
 590          * Total number of blocks on the disk before resize:
 591          * ------------------------------------------------
 592          *      Total Blocks = Allocated_Stationary + Free_Stationary +
 593          *                     Allocated_To-Reclaim + Free_To-Reclaim
 594          *
 595          * Total number of blocks that need to be reclaimed:
 596          * ------------------------------------------------
 597          *      Blocks to Reclaim = Allocated_To-Reclaim + Free_To-Reclaim
 598          *
 599          * Note that the check below also makes sure that we have enough space
 600          * to relocate data from Allocated_To-Reclaim to Free_Stationary.
 601          * Therefore we do not need to check total number of blocks to relocate
 602          * later in the code.
 603          *
 604          * The condition below gets converted to:
 605          *
 606          * Allocated To-Reclaim + Free To-Reclaim >= Free Stationary + Free To-Reclaim
 607          *
 608          * which is equivalent to:
 609          *
 610          *              Allocated To-Reclaim >= Free Stationary
 611          */
 612         if (reclaimblks >= hfs_freeblks(hfsmp, 1)) {
 613                 printf("hfs_truncatefs: insufficient space (need %u blocks; have %u free blocks)\n", reclaimblks, hfs_freeblks(hfsmp, 1));
 614                 error = ENOSPC;
 615                 goto out;
 616         }
 617
 618         /* Start with a clean journal. */
 619         hfs_journal_flush(hfsmp, TRUE);
 620
 621         if (hfs_start_transaction(hfsmp) != 0) {
 622                 error = EINVAL;
 623                 goto out;
 624         }
 625         transaction_begun = 1;
 626
 627         /* Take the bitmap lock to update the alloc limit field */
 628         lockflags = hfs_systemfile_lock(hfsmp, SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
 629
 630         /*
 631          * Prevent new allocations from using the part we're trying to truncate.
 632          *
 633          * NOTE: allocLimit is set to the allocation block number where the new
 634          * alternate volume header will be.  That way there will be no files to
 635          * interfere with allocating the new alternate volume header, and no files
 636          * in the allocation blocks beyond (i.e. the blocks we're trying to
 637          * truncate away.
 638          */
 639         if (hfsmp->blockSize == 512) {
 640                 error = UpdateAllocLimit (hfsmp, newblkcnt - 2);
 641         }
 642         else {
 643                 error = UpdateAllocLimit (hfsmp, newblkcnt - 1);
 644         }
 645
 646         /* Sparse devices use first fit allocation which is not ideal
 647          * for volume resize which requires best fit allocation.  If a
 648          * sparse device is being truncated, disable the sparse device
 649          * property temporarily for the duration of resize.  Also reset
 650          * the free extent cache so that it is rebuilt as sorted by
 651          * totalBlocks instead of startBlock.
 652          *
 653          * Note that this will affect all allocations on the volume and
 654          * ideal fix would be just to modify resize-related allocations,
 655          * but it will result in complexity like handling of two free
 656          * extent caches sorted differently, etc.  So we stick to this
 657          * solution for now.
 658          */
 659         hfs_lock_mount (hfsmp);
 660         if (hfsmp->hfs_flags & HFS_HAS_SPARSE_DEVICE) {
 661                 hfsmp->hfs_flags &= ~HFS_HAS_SPARSE_DEVICE;
 662                 ResetVCBFreeExtCache(hfsmp);
 663                 disable_sparse = true;
 664         }
 665
 666         /*
 667          * Update the volume free block count to reflect the total number
 668          * of free blocks that will exist after a successful resize.
 669          * Relocation of extents will result in no net change in the total
 670          * free space on the disk.  Therefore the code that allocates
 671          * space for new extent and deallocates the old extent explicitly
 672          * prevents updating the volume free block count.  It will also
 673          * prevent false disk full error when the number of blocks in
 674          * an extent being relocated is more than the free blocks that
 675          * will exist after the volume is resized.
 676          */
 677         hfsmp->freeBlocks -= reclaimblks;
 678         updateFreeBlocks = true;
 679         hfs_unlock_mount(hfsmp);
 680
 681         if (lockflags) {
 682                 hfs_systemfile_unlock(hfsmp, lockflags);
 683                 lockflags = 0;
 684         }
 685
 686         /*
 687          * Update the metadata zone size to match the new volume size,
 688          * and if it too less, metadata zone might be disabled.
 689          */
 690         hfs_metadatazone_init(hfsmp, false);
 691
 692         /*
 693          * If some files have blocks at or beyond the location of the
 694          * new alternate volume header, recalculate free blocks and
 695          * reclaim blocks.  Otherwise just update free blocks count.
 696          *
 697          * The current allocLimit is set to the location of new alternate
 698          * volume header, and reclaimblks are the total number of blocks
 699          * that need to be reclaimed.  So the check below is really
 700          * ignoring the blocks allocated for old alternate volume header.
 701          */
 702         if (hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks)) {
 703                 /*
 704                  * hfs_reclaimspace will use separate transactions when
 705                  * relocating files (so we don't overwhelm the journal).
 706                  */
 707                 hfs_end_transaction(hfsmp);
 708                 transaction_begun = 0;
 709
 710                 /* Attempt to reclaim some space. */
 711                 error = hfs_reclaimspace(hfsmp, hfsmp->allocLimit, reclaimblks, context);
 712                 if (error != 0) {
 713                         printf("hfs_truncatefs: couldn't reclaim space on %s (error=%d)\n", hfsmp->vcbVN, error);
 714                         error = ENOSPC;
 715                         goto out;
 716                 }
 717                 if (hfs_start_transaction(hfsmp) != 0) {
 718                         error = EINVAL;
 719                         goto out;
 720                 }
 721                 transaction_begun = 1;
 722
 723                 /* Check if we're clear now. */
 724                 error = hfs_isallocated(hfsmp, hfsmp->allocLimit, reclaimblks);
 725                 if (error != 0) {
 726                         printf("hfs_truncatefs: didn't reclaim enough space on %s (error=%d)\n", hfsmp->vcbVN, error);
 727                         error = EAGAIN;  /* tell client to try again */
 728                         goto out;
 729                 }
 730         }
 731
 732         /*
 733          * Note: we take the attributes lock in case we have an attribute data vnode
 734          * which needs to change size.
 735          */
 736         lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE | SFL_EXTENTS | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
 737
 738         /*
 739          * Allocate last 1KB for alternate volume header.
 740          */
 741         error = BlockMarkAllocated(hfsmp, hfsmp->allocLimit, (hfsmp->blockSize == 512) ? 2 : 1);
 742         if (error) {
 743                 printf("hfs_truncatefs: Error %d allocating new alternate volume header\n", error);
 744                 goto out;
 745         }
 746
 747         /*
 748          * Mark the old alternate volume header as free.
 749          * We don't bother shrinking allocation bitmap file.
 750          */
 751         if (hfsmp->blockSize == 512)
 752                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 2, 2);
 753         else
 754                 (void) BlockMarkFree(hfsmp, hfsmp->totalBlocks - 1, 1);
 755
 756         /* Don't invalidate the old AltVH yet.  It is still valid until the partition size is updated ! */
 757
 758         /* Log successful shrinking. */
 759         printf("hfs_truncatefs: shrank \"%s\" to %d blocks (was %d blocks)\n",
 760                hfsmp->vcbVN, newblkcnt, hfsmp->totalBlocks);
 761
 762         /*
 763          * Adjust file system variables and flush them to disk.
 764          *
 765          * Note that although the logical block size is updated here, it is only
 766          * done for the benefit/convenience of the partition management software.  The
 767          * logical block count change has not yet actually been propagated to
 768          * the disk device yet (and we won't get any notification when it does).
 769          */
 770         hfsmp->totalBlocks = newblkcnt;
 771         hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
 772         hfsmp->hfs_logical_bytes = (uint64_t) hfsmp->hfs_logical_block_count * (uint64_t) hfsmp->hfs_logical_block_size;
 773
 774         /*
 775          * At this point, a smaller HFS file system exists in a larger volume.
 776          * As per volume format, the alternate volume header is located 1024 bytes
 777          * before end of the partition.  So, until the partition is also resized,
 778          * a valid alternate volume header will need to be updated at 1024 bytes
 779          * before end of the volume.  Under normal circumstances, a file system
 780          * resize is always followed by a volume resize, so we also need to
 781          * write a copy of the new alternate volume header at 1024 bytes before
 782          * end of the new file system.
 783          */
 784         if (hfs_resize_debug) {
 785                 printf ("hfs_truncatefs: old: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 786                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 787         }
 788         hfsmp->hfs_fs_avh_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
 789         /* Note hfs_partition_avh_sector stays unchanged! partition size has not yet been modified */
 790         if (hfs_resize_debug) {
 791                 printf ("hfs_truncatefs: new: partition_avh_sector=%qu, fs_avh_sector=%qu\n",
 792                                 hfsmp->hfs_partition_avh_sector, hfsmp->hfs_fs_avh_sector);
 793         }
 794
 795         MarkVCBDirty(hfsmp);
 796         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
 797         if (error) {
 798                 panic("hfs_truncatefs: unexpected error flushing volume header (%d)\n", error);
 799         }
 800
 801         /*
 802          * Adjust the size of hfsmp->hfs_attrdata_vp
 803          */
 804         if (hfsmp->hfs_attrdata_vp) {
 805                 struct cnode *cp;
 806                 struct filefork *fp;
 807
 808                 if (vnode_get(hfsmp->hfs_attrdata_vp) == 0) {
 809                         cp = VTOC(hfsmp->hfs_attrdata_vp);
 810                         fp = VTOF(hfsmp->hfs_attrdata_vp);
 811
 812                         cp->c_blocks = newblkcnt;
 813                         fp->ff_blocks = newblkcnt;
 814                         fp->ff_extents[0].blockCount = newblkcnt;
 815                         fp->ff_size = (off_t) newblkcnt * hfsmp->blockSize;
 816                         ubc_setsize(hfsmp->hfs_attrdata_vp, fp->ff_size);
 817                         vnode_put(hfsmp->hfs_attrdata_vp);
 818                 }
 819         }
 820
 821 out:
 822         /*
 823          * Update the allocLimit to acknowledge the last one or two blocks now.
 824          * Add it to the tree as well if necessary.
 825          */
 826         UpdateAllocLimit (hfsmp, hfsmp->totalBlocks);
 827
 828         hfs_lock_mount (hfsmp);
 829         if (disable_sparse == true) {
 830                 /* Now that resize is completed, set the volume to be sparse
 831                  * device again so that all further allocations will be first
 832                  * fit instead of best fit.  Reset free extent cache so that
 833                  * it is rebuilt.
 834                  */
 835                 hfsmp->hfs_flags |= HFS_HAS_SPARSE_DEVICE;
 836                 ResetVCBFreeExtCache(hfsmp);
 837         }
 838
 839         if (error && (updateFreeBlocks == true)) {
 840                 hfsmp->freeBlocks += reclaimblks;
 841         }
 842
 843         if (hfsmp->nextAllocation >= hfsmp->allocLimit) {
 844                 hfsmp->nextAllocation = hfsmp->hfs_metazone_end + 1;
 845         }
 846         hfsmp->hfs_flags &= ~HFS_RESIZE_IN_PROGRESS;
 847         hfs_unlock_mount (hfsmp);
 848
 849         /* On error, reset the metadata zone for original volume size */
 850         if (error && (updateFreeBlocks == true)) {
 851                 hfs_metadatazone_init(hfsmp, false);
 852         }
 853
 854         if (lockflags) {
 855                 hfs_systemfile_unlock(hfsmp, lockflags);
 856         }
 857         if (transaction_begun) {
 858                 hfs_end_transaction(hfsmp);
 859                 hfs_journal_flush(hfsmp, FALSE);
 860                 /* Just to be sure, sync all data to the disk */
 861                 (void) VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
 862         }
 863
 864         if (error) {
 865                 printf ("hfs_truncatefs: failed error=%d on vol=%s\n", MacToVFSError(error), hfsmp->vcbVN);
 866         }
 867
 868         return MacToVFSError(error);
 869 }
 870
 871
 872 /*
 873  * Invalidate the physical block numbers associated with buffer cache blocks
 874  * in the given extent of the given vnode.
 875  */
 876 struct hfs_inval_blk_no {
 877         daddr64_t sectorStart;
 878         daddr64_t sectorCount;
 879 };
 880 static int
 881 hfs_invalidate_block_numbers_callback(buf_t bp, void *args_in)
 882 {
 883         daddr64_t blkno;
 884         struct hfs_inval_blk_no *args;
 885
 886         blkno = buf_blkno(bp);
 887         args = args_in;
 888
 889         if (blkno >= args->sectorStart && blkno < args->sectorStart+args->sectorCount)
 890                 buf_setblkno(bp, buf_lblkno(bp));
 891
 892         return BUF_RETURNED;
 893 }
 894 static void
 895 hfs_invalidate_sectors(struct vnode *vp, daddr64_t sectorStart, daddr64_t sectorCount)
 896 {
 897         struct hfs_inval_blk_no args;
 898         args.sectorStart = sectorStart;
 899         args.sectorCount = sectorCount;
 900
 901         buf_iterate(vp, hfs_invalidate_block_numbers_callback, BUF_SCAN_DIRTY|BUF_SCAN_CLEAN, &args);
 902 }
 903
 904
 905 /*
 906  * Copy the contents of an extent to a new location.  Also invalidates the
 907  * physical block number of any buffer cache block in the copied extent
 908  * (so that if the block is written, it will go through VNOP_BLOCKMAP to
 909  * determine the new physical block number).
 910  *
 911  * At this point, for regular files, we hold the truncate lock exclusive
 912  * and the cnode lock exclusive.
 913  */
 914 static int
 915 hfs_copy_extent(
 916                 struct hfsmount *hfsmp,
 917                 struct vnode *vp,               /* The file whose extent is being copied. */
 918                 u_int32_t oldStart,             /* The start of the source extent. */
 919                 u_int32_t newStart,             /* The start of the destination extent. */
 920                 u_int32_t blockCount,   /* The number of allocation blocks to copy. */
 921                 vfs_context_t context)
 922 {
 923         int err = 0;
 924         size_t bufferSize;
 925         void *buffer = NULL;
 926         struct vfsioattr ioattr;
 927         buf_t bp = NULL;
 928         off_t resid;
 929         size_t ioSize;
 930         u_int32_t ioSizeSectors;        /* Device sectors in this I/O */
 931         daddr64_t srcSector, destSector;
 932         u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
 933 #if CONFIG_PROTECT
 934         int cpenabled = 0;
 935 #endif
 936
 937         /*
 938          * Sanity check that we have locked the vnode of the file we're copying.
 939          *
 940          * But since hfs_systemfile_lock() doesn't actually take the lock on
 941          * the allocation file if a journal is active, ignore the check if the
 942          * file being copied is the allocation file.
 943          */
 944         struct cnode *cp = VTOC(vp);
 945         if (cp != hfsmp->hfs_allocation_cp && cp->c_lockowner != current_thread())
 946                 panic("hfs_copy_extent: vp=%p (cp=%p) not owned?\n", vp, cp);
 947
 948 #if CONFIG_PROTECT
 949         /*
 950          * Prepare the CP blob and get it ready for use, if necessary.
 951          *
 952          * Note that we specifically *exclude* system vnodes (catalog, bitmap, extents, EAs),
 953          * because they are implicitly protected via the media key on iOS.  As such, they
 954          * must not be relocated except with the media key.  So it is OK to not pass down
 955          * a special cpentry to the IOMedia/LwVM code for handling.
 956          */
 957         if (!vnode_issystem (vp) && vnode_isreg(vp) && cp_fs_protected (hfsmp->hfs_mp)) {
 958                 int cp_err = 0;
 959                 /*
 960                  * Ideally, the file whose extents we are about to manipulate is using the
 961                  * newer offset-based IVs so that we can manipulate it regardless of the
 962                  * current lock state.  However, we must maintain support for older-style
 963                  * EAs.
 964                  *
 965                  * For the older EA case, the IV was tied to the device LBA for file content.
 966                  * This means that encrypted data cannot be moved from one location to another
 967                  * in the filesystem without garbling the IV data.  As a result, we need to
 968                  * access the file's plaintext because we cannot do our AES-symmetry trick
 969                  * here.  This requires that we attempt a key-unwrap here (via cp_handle_relocate)
 970                  * to make forward progress.  If the keys are unavailable then we will
 971                  * simply stop the resize in its tracks here since we cannot move
 972                  * this extent at this time.
 973                  */
 974                 if ((cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) == 0) {
 975                         cp_err = cp_handle_relocate(cp, hfsmp);
 976                 }
 977
 978                 if (cp_err) {
 979                         printf ("hfs_copy_extent: cp_handle_relocate failed (%d) \n", cp_err);
 980                         return cp_err;
 981                 }
 982
 983                 cpenabled = 1;
 984         }
 985 #endif
 986
 987
 988         /*
 989          * Determine the I/O size to use
 990          *
 991          * NOTE: Many external drives will result in an ioSize of 128KB.
 992          * TODO: Should we use a larger buffer, doing several consecutive
 993          * reads, then several consecutive writes?
 994          */
 995         vfs_ioattr(hfsmp->hfs_mp, &ioattr);
 996         bufferSize = MIN(ioattr.io_maxreadcnt, ioattr.io_maxwritecnt);
 997         if (kmem_alloc(kernel_map, (vm_offset_t*) &buffer, bufferSize))
 998                 return ENOMEM;
 999
1000         /* Get a buffer for doing the I/O */
1001         bp = buf_alloc(hfsmp->hfs_devvp);
1002         buf_setdataptr(bp, (uintptr_t)buffer);
1003
1004         resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
1005         srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
1006         destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
1007         while (resid > 0) {
1008                 ioSize = MIN(bufferSize, (size_t) resid);
1009                 ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
1010
1011                 /* Prepare the buffer for reading */
1012                 buf_reset(bp, B_READ);
1013                 buf_setsize(bp, ioSize);
1014                 buf_setcount(bp, ioSize);
1015                 buf_setblkno(bp, srcSector);
1016                 buf_setlblkno(bp, srcSector);
1017
1018                 /*
1019                  * Note that because this is an I/O to the device vp
1020                  * it is correct to have lblkno and blkno both point to the
1021                  * start sector being read from.  If it were being issued against the
1022                  * underlying file then that would be different.
1023                  */
1024
1025                 /* Attach the new CP blob  to the buffer if needed */
1026 #if CONFIG_PROTECT
1027                 if (cpenabled) {
1028                         if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
1029                                 /* attach the RELOCATION_INFLIGHT flag for the underlying call to VNOP_STRATEGY */
1030                                 cp->c_cpentry->cp_flags |= CP_RELOCATION_INFLIGHT;
1031                                 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
1032                         }
1033                         else {
1034                                 /*
1035                                  * Use the cnode's cp key.  This file is tied to the
1036                                  * LBAs of the physical blocks that it occupies.
1037                                  */
1038                                 buf_setcpaddr (bp, cp->c_cpentry);
1039                         }
1040
1041                         /* Initialize the content protection file offset to start at 0 */
1042                         buf_setcpoff (bp, 0);
1043                 }
1044 #endif
1045
1046                 /* Do the read */
1047                 err = VNOP_STRATEGY(bp);
1048                 if (!err)
1049                         err = buf_biowait(bp);
1050                 if (err) {
1051 #if CONFIG_PROTECT
1052                         /* Turn the flag off in error cases. */
1053                         if (cpenabled) {
1054                                 cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
1055                         }
1056 #endif
1057                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (read)\n", err);
1058                         break;
1059                 }
1060
1061                 /* Prepare the buffer for writing */
1062                 buf_reset(bp, B_WRITE);
1063                 buf_setsize(bp, ioSize);
1064                 buf_setcount(bp, ioSize);
1065                 buf_setblkno(bp, destSector);
1066                 buf_setlblkno(bp, destSector);
1067                 if (vnode_issystem(vp) && journal_uses_fua(hfsmp->jnl))
1068                         buf_markfua(bp);
1069
1070 #if CONFIG_PROTECT
1071                 /* Attach the CP to the buffer if needed */
1072                 if (cpenabled) {
1073                         if (cp->c_cpentry->cp_flags & CP_OFF_IV_ENABLED) {
1074                                 buf_setcpaddr(bp, hfsmp->hfs_resize_cpentry);
1075                         }
1076                         else {
1077                                 /*
1078                                  * Use the cnode's CP key.  This file is still tied
1079                                  * to the LBAs of the physical blocks that it occupies.
1080                                  */
1081                                 buf_setcpaddr (bp, cp->c_cpentry);
1082                         }
1083                         /*
1084                          * The last STRATEGY call may have updated the cp file offset behind our
1085                          * back, so we cannot trust it.  Re-initialize the content protection
1086                          * file offset back to 0 before initiating the write portion of this I/O.
1087                          */
1088                         buf_setcpoff (bp, 0);
1089                 }
1090 #endif
1091
1092                 /* Do the write */
1093                 vnode_startwrite(hfsmp->hfs_devvp);
1094                 err = VNOP_STRATEGY(bp);
1095                 if (!err) {
1096                         err = buf_biowait(bp);
1097                 }
1098 #if CONFIG_PROTECT
1099                 /* Turn the flag off regardless once the strategy call finishes. */
1100                 if (cpenabled) {
1101                         cp->c_cpentry->cp_flags &= ~CP_RELOCATION_INFLIGHT;
1102                 }
1103 #endif
1104                 if (err) {
1105                         printf("hfs_copy_extent: Error %d from VNOP_STRATEGY (write)\n", err);
1106                         break;
1107                 }
1108
1109                 resid -= ioSize;
1110                 srcSector += ioSizeSectors;
1111                 destSector += ioSizeSectors;
1112         }
1113         if (bp)
1114                 buf_free(bp);
1115         if (buffer)
1116                 kmem_free(kernel_map, (vm_offset_t)buffer, bufferSize);
1117
1118         /* Make sure all writes have been flushed to disk. */
1119         if (vnode_issystem(vp) && !journal_uses_fua(hfsmp->jnl)) {
1120                 err = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
1121                 if (err) {
1122                         printf("hfs_copy_extent: DKIOCSYNCHRONIZECACHE failed (%d)\n", err);
1123                         err = 0;        /* Don't fail the copy. */
1124                 }
1125         }
1126
1127         if (!err)
1128                 hfs_invalidate_sectors(vp, (daddr64_t)oldStart*sectorsPerBlock, (daddr64_t)blockCount*sectorsPerBlock);
1129
1130         return err;
1131 }
1132
1133
1134 /* Structure to store state of reclaiming extents from a
1135  * given file.  hfs_reclaim_file()/hfs_reclaim_xattr()
1136  * initializes the values in this structure which are then
1137  * used by code that reclaims and splits the extents.
1138  */
1139 struct hfs_reclaim_extent_info {
1140         struct vnode *vp;
1141         u_int32_t fileID;
1142         u_int8_t forkType;
1143         u_int8_t is_dirlink;                 /* Extent belongs to directory hard link */
1144         u_int8_t is_sysfile;                 /* Extent belongs to system file */
1145         u_int8_t is_xattr;                   /* Extent belongs to extent-based xattr */
1146         u_int8_t extent_index;
1147         int lockflags;                       /* Locks that reclaim and split code should grab before modifying the extent record */
1148         u_int32_t blocks_relocated;          /* Total blocks relocated for this file till now */
1149         u_int32_t recStartBlock;             /* File allocation block number (FABN) for current extent record */
1150         u_int32_t cur_blockCount;            /* Number of allocation blocks that have been checked for reclaim */
1151         struct filefork *catalog_fp;         /* If non-NULL, extent is from catalog record */
1152         union record {
1153                 HFSPlusExtentRecord overflow;/* Extent record from overflow extents btree */
1154                 HFSPlusAttrRecord xattr;     /* Attribute record for large EAs */
1155         } record;
1156         HFSPlusExtentDescriptor *extents;    /* Pointer to current extent record being processed.
1157                                           * For catalog extent record, points to the correct
1158                                           * extent information in filefork.  For overflow extent
1159                                           * record, or xattr record, points to extent record
1160                                           * in the structure above
1161                                           */
1162         struct cat_desc *dirlink_desc;
1163         struct cat_attr *dirlink_attr;
1164         struct filefork *dirlink_fork;        /* For directory hard links, fp points actually to this */
1165         struct BTreeIterator *iterator;       /* Shared read/write iterator, hfs_reclaim_file/xattr()
1166                                            * use it for reading and hfs_reclaim_extent()/hfs_split_extent()
1167                                            * use it for writing updated extent record
1168                                            */
1169         struct FSBufferDescriptor btdata;     /* Shared btdata for reading/writing extent record, same as iterator above */
1170         u_int16_t recordlen;
1171         int overflow_count;                   /* For debugging, counter for overflow extent record */
1172         FCB *fcb;                             /* Pointer to the current btree being traversed */
1173 };
1174
1175 /*
1176  * Split the current extent into two extents, with first extent
1177  * to contain given number of allocation blocks.  Splitting of
1178  * extent creates one new extent entry which can result in
1179  * shifting of many entries through all the extent records of a
1180  * file, and/or creating a new extent record in the overflow
1181  * extent btree.
1182  *
1183  * Example:
1184  * The diagram below represents two consecutive extent records,
1185  * for simplicity, lets call them record X and X+1 respectively.
1186  * Interesting extent entries have been denoted by letters.
1187  * If the letter is unchanged before and after split, it means
1188  * that the extent entry was not modified during the split.
1189  * A '.' means that the entry remains unchanged after the split
1190  * and is not relevant for our example.  A '0' means that the
1191  * extent entry is empty.
1192  *
1193  * If there isn't sufficient contiguous free space to relocate
1194  * an extent (extent "C" below), we will have to break the one
1195  * extent into multiple smaller extents, and relocate each of
1196  * the smaller extents individually.  The way we do this is by
1197  * finding the largest contiguous free space that is currently
1198  * available (N allocation blocks), and then convert extent "C"
1199  * into two extents, C1 and C2, that occupy exactly the same
1200  * allocation blocks as extent C.  Extent C1 is the first
1201  * N allocation blocks of extent C, and extent C2 is the remainder
1202  * of extent C.  Then we can relocate extent C1 since we know
1203  * we have enough contiguous free space to relocate it in its
1204  * entirety.  We then repeat the process starting with extent C2.
1205  *
1206  * In record X, only the entries following entry C are shifted, and
1207  * the original entry C is replaced with two entries C1 and C2 which
1208  * are actually two extent entries for contiguous allocation blocks.
1209  *
1210  * Note that the entry E from record X is shifted into record X+1 as
1211  * the new first entry.  Since the first entry of record X+1 is updated,
1212  * the FABN will also get updated with the blockCount of entry E.
1213  * This also results in shifting of all extent entries in record X+1.
1214  * Note that the number of empty entries after the split has been
1215  * changed from 3 to 2.
1216  *
1217  * Before:
1218  *               record X                           record X+1
1219  *  ---------------------===---------     ---------------------------------
1220  *  | A | . | . | . | B | C | D | E |     | F | . | . | . | G | 0 | 0 | 0 |
1221  *  ---------------------===---------     ---------------------------------
1222  *
1223  * After:
1224  *  ---------------------=======-----     ---------------------------------
1225  *  | A | . | . | . | B | C1| C2| D |     | E | F | . | . | . | G | 0 | 0 |
1226  *  ---------------------=======-----     ---------------------------------
1227  *
1228  *  C1.startBlock = C.startBlock
1229  *  C1.blockCount = N
1230  *
1231  *  C2.startBlock = C.startBlock + N
1232  *  C2.blockCount = C.blockCount - N
1233  *
1234  *                                        FABN = old FABN - E.blockCount
1235  *
1236  * Inputs:
1237  *      extent_info -   This is the structure that contains state about
1238  *                      the current file, extent, and extent record that
1239  *                      is being relocated.  This structure is shared
1240  *                      among code that traverses through all the extents
1241  *                      of the file, code that relocates extents, and
1242  *                      code that splits the extent.
1243  *      newBlockCount - The blockCount of the extent to be split after
1244  *                      successfully split operation.
1245  * Output:
1246  *      Zero on success, non-zero on failure.
1247  */
1248 static int
1249 hfs_split_extent(struct hfs_reclaim_extent_info *extent_info, uint32_t newBlockCount)
1250 {
1251         int error = 0;
1252         int index = extent_info->extent_index;
1253         int i;
1254         HFSPlusExtentDescriptor shift_extent; /* Extent entry that should be shifted into next extent record */
1255         HFSPlusExtentDescriptor last_extent;
1256         HFSPlusExtentDescriptor *extents; /* Pointer to current extent record being manipulated */
1257         HFSPlusExtentRecord *extents_rec = NULL;
1258         HFSPlusExtentKey *extents_key = NULL;
1259         HFSPlusAttrRecord *xattr_rec = NULL;
1260         HFSPlusAttrKey *xattr_key = NULL;
1261         struct BTreeIterator iterator;
1262         struct FSBufferDescriptor btdata;
1263         uint16_t reclen;
1264         uint32_t read_recStartBlock;    /* Starting allocation block number to read old extent record */
1265         uint32_t write_recStartBlock;   /* Starting allocation block number to insert newly updated extent record */
1266         Boolean create_record = false;
1267         Boolean is_xattr;
1268         struct cnode *cp;
1269
1270         is_xattr = extent_info->is_xattr;
1271         extents = extent_info->extents;
1272         cp = VTOC(extent_info->vp);
1273
1274         if (newBlockCount == 0) {
1275                 if (hfs_resize_debug) {
1276                         printf ("hfs_split_extent: No splitting required for newBlockCount=0\n");
1277                 }
1278                 return error;
1279         }
1280
1281         if (hfs_resize_debug) {
1282                 printf ("hfs_split_extent: Split record:%u recStartBlock=%u %u:(%u,%u) for %u blocks\n", extent_info->overflow_count, extent_info->recStartBlock, index, extents[index].startBlock, extents[index].blockCount, newBlockCount);
1283         }
1284
1285         /* Extents overflow btree can not have more than 8 extents.
1286          * No split allowed if the 8th extent is already used.
1287          */
1288         if ((extent_info->fileID == kHFSExtentsFileID) && (extents[kHFSPlusExtentDensity - 1].blockCount != 0)) {
1289                 printf ("hfs_split_extent: Maximum 8 extents allowed for extents overflow btree, cannot split further.\n");
1290                 error = ENOSPC;
1291                 goto out;
1292         }
1293
1294         /* Determine the starting allocation block number for the following
1295          * overflow extent record, if any, before the current record
1296          * gets modified.
1297          */
1298         read_recStartBlock = extent_info->recStartBlock;
1299         for (i = 0; i < kHFSPlusExtentDensity; i++) {
1300                 if (extents[i].blockCount == 0) {
1301                         break;
1302                 }
1303                 read_recStartBlock += extents[i].blockCount;
1304         }
1305
1306         /* Shift and split */
1307         if (index == kHFSPlusExtentDensity-1) {
1308                 /* The new extent created after split will go into following overflow extent record */
1309                 shift_extent.startBlock = extents[index].startBlock + newBlockCount;
1310                 shift_extent.blockCount = extents[index].blockCount - newBlockCount;
1311
1312                 /* Last extent in the record will be split, so nothing to shift */
1313         } else {
1314                 /* Splitting of extents can result in at most of one
1315                  * extent entry to be shifted into following overflow extent
1316                  * record.  So, store the last extent entry for later.
1317                  */
1318                 shift_extent = extents[kHFSPlusExtentDensity-1];
1319                 if ((hfs_resize_debug) && (shift_extent.blockCount != 0)) {
1320                         printf ("hfs_split_extent: Save 7:(%u,%u) to shift into overflow record\n", shift_extent.startBlock, shift_extent.blockCount);
1321                 }
1322
1323                 /* Start shifting extent information from the end of the extent
1324                  * record to the index where we want to insert the new extent.
1325                  * Note that kHFSPlusExtentDensity-1 is already saved above, and
1326                  * does not need to be shifted.  The extent entry that is being
1327                  * split does not get shifted.
1328                  */
1329                 for (i = kHFSPlusExtentDensity-2; i > index; i--) {
1330                         if (hfs_resize_debug) {
1331                                 if (extents[i].blockCount) {
1332                                         printf ("hfs_split_extent: Shift %u:(%u,%u) to %u:(%u,%u)\n", i, extents[i].startBlock, extents[i].blockCount, i+1, extents[i].startBlock, extents[i].blockCount);
1333                                 }
1334                         }
1335                         extents[i+1] = extents[i];
1336                 }
1337         }
1338
1339         if (index == kHFSPlusExtentDensity-1) {
1340                 /* The second half of the extent being split will be the overflow
1341                  * entry that will go into following overflow extent record.  The
1342                  * value has been stored in 'shift_extent' above, so there is
1343                  * nothing to be done here.
1344                  */
1345         } else {
1346                 /* Update the values in the second half of the extent being split
1347                  * before updating the first half of the split.  Note that the
1348                  * extent to split or first half of the split is at index 'index'
1349                  * and a new extent or second half of the split will be inserted at
1350                  * 'index+1' or into following overflow extent record.
1351                  */
1352                 extents[index+1].startBlock = extents[index].startBlock + newBlockCount;
1353                 extents[index+1].blockCount = extents[index].blockCount - newBlockCount;
1354         }
1355         /* Update the extent being split, only the block count will change */
1356         extents[index].blockCount = newBlockCount;
1357
1358         if (hfs_resize_debug) {
1359                 printf ("hfs_split_extent: Split %u:(%u,%u) and ", index, extents[index].startBlock, extents[index].blockCount);
1360                 if (index != kHFSPlusExtentDensity-1) {
1361                         printf ("%u:(%u,%u)\n", index+1, extents[index+1].startBlock, extents[index+1].blockCount);
1362                 } else {
1363                         printf ("overflow:(%u,%u)\n", shift_extent.startBlock, shift_extent.blockCount);
1364                 }
1365         }
1366
1367         /* Write out information about the newly split extent to the disk */
1368         if (extent_info->catalog_fp) {
1369                 /* (extent_info->catalog_fp != NULL) means the newly split
1370                  * extent exists in the catalog record.  This means that
1371                  * the cnode was updated.  Therefore, to write out the changes,
1372                  * mark the cnode as modified.   We cannot call hfs_update()
1373                  * in this function because the caller hfs_reclaim_extent()
1374                  * is holding the catalog lock currently.
1375                  */
1376                 cp->c_flag |= C_MODIFIED;
1377         } else {
1378                 /* The newly split extent is for large EAs or is in overflow
1379                  * extent record, so update it directly in the btree using the
1380                  * iterator information from the shared extent_info structure
1381                  */
1382                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
1383                                 &(extent_info->btdata), extent_info->recordlen);
1384                 if (error) {
1385                         printf ("hfs_split_extent: fileID=%u BTReplaceRecord returned error=%d\n", extent_info->fileID, error);
1386                         goto out;
1387                 }
1388         }
1389
1390         /* No extent entry to be shifted into another extent overflow record */
1391         if (shift_extent.blockCount == 0) {
1392                 if (hfs_resize_debug) {
1393                         printf ("hfs_split_extent: No extent entry to be shifted into overflow records\n");
1394                 }
1395                 error = 0;
1396                 goto out;
1397         }
1398
1399         /* The overflow extent entry has to be shifted into an extent
1400          * overflow record.  This means that we might have to shift
1401          * extent entries from all subsequent overflow records by one.
1402          * We start iteration from the first record to the last record,
1403          * and shift the extent entry from one record to another.
1404          * We might have to create a new extent record for the last
1405          * extent entry for the file.
1406          */
1407
1408         /* Initialize iterator to search the next record */
1409         bzero(&iterator, sizeof(iterator));
1410         if (is_xattr) {
1411                 /* Copy the key from the iterator that was used to update the modified attribute record. */
1412                 xattr_key = (HFSPlusAttrKey *)&(iterator.key);
1413                 bcopy((HFSPlusAttrKey *)&(extent_info->iterator->key), xattr_key, sizeof(HFSPlusAttrKey));
1414                 /* Note: xattr_key->startBlock will be initialized later in the iteration loop */
1415
1416                 MALLOC(xattr_rec, HFSPlusAttrRecord *,
1417                sizeof(HFSPlusAttrRecord), M_TEMP, M_WAITOK);
1418                 if (xattr_rec == NULL) {
1419                         error = ENOMEM;
1420                         goto out;
1421                 }
1422                 btdata.bufferAddress = xattr_rec;
1423                 btdata.itemSize = sizeof(HFSPlusAttrRecord);
1424                 btdata.itemCount = 1;
1425                 extents = xattr_rec->overflowExtents.extents;
1426         } else {
1427                 /* Initialize the extent key for the current file */
1428                 extents_key = (HFSPlusExtentKey *) &(iterator.key);
1429                 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
1430                 extents_key->forkType = extent_info->forkType;
1431                 extents_key->fileID = extent_info->fileID;
1432                 /* Note: extents_key->startBlock will be initialized later in the iteration loop */
1433
1434                 MALLOC(extents_rec, HFSPlusExtentRecord *,
1435                sizeof(HFSPlusExtentRecord), M_TEMP, M_WAITOK);
1436                 if (extents_rec == NULL) {
1437                         error = ENOMEM;
1438                         goto out;
1439                 }
1440                 btdata.bufferAddress = extents_rec;
1441                 btdata.itemSize = sizeof(HFSPlusExtentRecord);
1442                 btdata.itemCount = 1;
1443                 extents = extents_rec[0];
1444         }
1445
1446         /* The overflow extent entry has to be shifted into an extent
1447          * overflow record.  This means that we might have to shift
1448          * extent entries from all subsequent overflow records by one.
1449          * We start iteration from the first record to the last record,
1450          * examine one extent record in each iteration and shift one
1451          * extent entry from one record to another.  We might have to
1452          * create a new extent record for the last extent entry for the
1453          * file.
1454          *
1455          * If shift_extent.blockCount is non-zero, it means that there is
1456          * an extent entry that needs to be shifted into the next
1457          * overflow extent record.  We keep on going till there are no such
1458          * entries left to be shifted.  This will also change the starting
1459          * allocation block number of the extent record which is part of
1460          * the key for the extent record in each iteration.  Note that
1461          * because the extent record key is changing while we are searching,
1462          * the record can not be updated directly, instead it has to be
1463          * deleted and inserted again.
1464          */
1465         while (shift_extent.blockCount) {
1466                 if (hfs_resize_debug) {
1467                         printf ("hfs_split_extent: Will shift (%u,%u) into overflow record with startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, read_recStartBlock);
1468                 }
1469
1470                 /* Search if there is any existing overflow extent record
1471                  * that matches the current file and the logical start block
1472                  * number.
1473                  *
1474                  * For this, the logical start block number in the key is
1475                  * the value calculated based on the logical start block
1476                  * number of the current extent record and the total number
1477                  * of blocks existing in the current extent record.
1478                  */
1479                 if (is_xattr) {
1480                         xattr_key->startBlock = read_recStartBlock;
1481                 } else {
1482                         extents_key->startBlock = read_recStartBlock;
1483                 }
1484                 error = BTSearchRecord(extent_info->fcb, &iterator, &btdata, &reclen, &iterator);
1485                 if (error) {
1486                         if (error != btNotFound) {
1487                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTSearchRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
1488                                 goto out;
1489                         }
1490                         /* No matching record was found, so create a new extent record.
1491                          * Note:  Since no record was found, we can't rely on the
1492                          * btree key in the iterator any longer.  This will be initialized
1493                          * later before we insert the record.
1494                          */
1495                         create_record = true;
1496                 }
1497
1498                 /* The extra extent entry from the previous record is being inserted
1499                  * as the first entry in the current extent record.  This will change
1500                  * the file allocation block number (FABN) of the current extent
1501                  * record, which is the startBlock value from the extent record key.
1502                  * Since one extra entry is being inserted in the record, the new
1503                  * FABN for the record will less than old FABN by the number of blocks
1504                  * in the new extent entry being inserted at the start.  We have to
1505                  * do this before we update read_recStartBlock to point at the
1506                  * startBlock of the following record.
1507                  */
1508                 write_recStartBlock = read_recStartBlock - shift_extent.blockCount;
1509                 if (hfs_resize_debug) {
1510                         if (create_record) {
1511                                 printf ("hfs_split_extent: No records found for startBlock=%u, will create new with startBlock=%u\n", read_recStartBlock, write_recStartBlock);
1512                         }
1513                 }
1514
1515                 /* Now update the read_recStartBlock to account for total number
1516                  * of blocks in this extent record.  It will now point to the
1517                  * starting allocation block number for the next extent record.
1518                  */
1519                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
1520                         if (extents[i].blockCount == 0) {
1521                                 break;
1522                         }
1523                         read_recStartBlock += extents[i].blockCount;
1524                 }
1525
1526                 if (create_record == true) {
1527                         /* Initialize new record content with only one extent entry */
1528                         bzero(extents, sizeof(HFSPlusExtentRecord));
1529                         /* The new record will contain only one extent entry */
1530                         extents[0] = shift_extent;
1531                         /* There are no more overflow extents to be shifted */
1532                         shift_extent.startBlock = shift_extent.blockCount = 0;
1533
1534                         if (is_xattr) {
1535                                 /* BTSearchRecord above returned btNotFound,
1536                                  * but since the attribute btree is never empty
1537                                  * if we are trying to insert new overflow
1538                                  * record for the xattrs, the extents_key will
1539                                  * contain correct data.  So we don't need to
1540                                  * re-initialize it again like below.
1541                                  */
1542
1543                                 /* Initialize the new xattr record */
1544                                 xattr_rec->recordType = kHFSPlusAttrExtents;
1545                                 xattr_rec->overflowExtents.reserved = 0;
1546                                 reclen = sizeof(HFSPlusAttrExtents);
1547                         } else {
1548                                 /* BTSearchRecord above returned btNotFound,
1549                                  * which means that extents_key content might
1550                                  * not correspond to the record that we are
1551                                  * trying to create, especially when the extents
1552                                  * overflow btree is empty.  So we reinitialize
1553                                  * the extents_key again always.
1554                                  */
1555                                 extents_key->keyLength = kHFSPlusExtentKeyMaximumLength;
1556                                 extents_key->forkType = extent_info->forkType;
1557                                 extents_key->fileID = extent_info->fileID;
1558
1559                                 /* Initialize the new extent record */
1560                                 reclen = sizeof(HFSPlusExtentRecord);
1561                         }
1562                 } else {
1563                         /* The overflow extent entry from previous record will be
1564                          * the first entry in this extent record.  If the last
1565                          * extent entry in this record is valid, it will be shifted
1566                          * into the following extent record as its first entry.  So
1567                          * save the last entry before shifting entries in current
1568                          * record.
1569                          */
1570                         last_extent = extents[kHFSPlusExtentDensity-1];
1571
1572                         /* Shift all entries by one index towards the end */
1573                         for (i = kHFSPlusExtentDensity-2; i >= 0; i--) {
1574                                 extents[i+1] = extents[i];
1575                         }
1576
1577                         /* Overflow extent entry saved from previous record
1578                          * is now the first entry in the current record.
1579                          */
1580                         extents[0] = shift_extent;
1581
1582                         if (hfs_resize_debug) {
1583                                 printf ("hfs_split_extent: Shift overflow=(%u,%u) to record with updated startBlock=%u\n", shift_extent.startBlock, shift_extent.blockCount, write_recStartBlock);
1584                         }
1585
1586                         /* The last entry from current record will be the
1587                          * overflow entry which will be the first entry for
1588                          * the following extent record.
1589                          */
1590                         shift_extent = last_extent;
1591
1592                         /* Since the key->startBlock is being changed for this record,
1593                          * it should be deleted and inserted with the new key.
1594                          */
1595                         error = BTDeleteRecord(extent_info->fcb, &iterator);
1596                         if (error) {
1597                                 printf ("hfs_split_extent: fileID=%u startBlock=%u BTDeleteRecord error=%d\n", extent_info->fileID, read_recStartBlock, error);
1598                                 goto out;
1599                         }
1600                         if (hfs_resize_debug) {
1601                                 printf ("hfs_split_extent: Deleted extent record with startBlock=%u\n", (is_xattr ? xattr_key->startBlock : extents_key->startBlock));
1602                         }
1603                 }
1604
1605                 /* Insert the newly created or modified extent record */
1606                 bzero(&iterator.hint, sizeof(iterator.hint));
1607                 if (is_xattr) {
1608                         xattr_key->startBlock = write_recStartBlock;
1609                 } else {
1610                         extents_key->startBlock = write_recStartBlock;
1611                 }
1612                 error = BTInsertRecord(extent_info->fcb, &iterator, &btdata, reclen);
1613                 if (error) {
1614                         printf ("hfs_split_extent: fileID=%u, startBlock=%u BTInsertRecord error=%d\n", extent_info->fileID, write_recStartBlock, error);
1615                         goto out;
1616                 }
1617                 if (hfs_resize_debug) {
1618                         printf ("hfs_split_extent: Inserted extent record with startBlock=%u\n", write_recStartBlock);
1619                 }
1620         }
1621
1622 out:
1623         /*
1624          * Extents overflow btree or attributes btree headers might have
1625          * been modified during the split/shift operation, so flush the
1626          * changes to the disk while we are inside journal transaction.
1627          * We should only be able to generate I/O that modifies the B-Tree
1628          * header nodes while we're in the middle of a journal transaction.
1629          * Otherwise it might result in panic during unmount.
1630          */
1631         BTFlushPath(extent_info->fcb);
1632
1633         if (extents_rec) {
1634                 FREE (extents_rec, M_TEMP);
1635         }
1636         if (xattr_rec) {
1637                 FREE (xattr_rec, M_TEMP);
1638         }
1639         return error;
1640 }
1641
1642
1643 /*
1644  * Relocate an extent if it lies beyond the expected end of volume.
1645  *
1646  * This function is called for every extent of the file being relocated.
1647  * It allocates space for relocation, copies the data, deallocates
1648  * the old extent, and update corresponding on-disk extent.  If the function
1649  * does not find contiguous space to  relocate an extent, it splits the
1650  * extent in smaller size to be able to relocate it out of the area of
1651  * disk being reclaimed.  As an optimization, if an extent lies partially
1652  * in the area of the disk being reclaimed, it is split so that we only
1653  * have to relocate the area that was overlapping with the area of disk
1654  * being reclaimed.
1655  *
1656  * Note that every extent is relocated in its own transaction so that
1657  * they do not overwhelm the journal.  This function handles the extent
1658  * record that exists in the catalog record, extent record from overflow
1659  * extents btree, and extents for large EAs.
1660  *
1661  * Inputs:
1662  *      extent_info - This is the structure that contains state about
1663  *                    the current file, extent, and extent record that
1664  *                    is being relocated.  This structure is shared
1665  *                    among code that traverses through all the extents
1666  *                    of the file, code that relocates extents, and
1667  *                    code that splits the extent.
1668  */
1669 static int
1670 hfs_reclaim_extent(struct hfsmount *hfsmp, const u_long allocLimit, struct hfs_reclaim_extent_info *extent_info, vfs_context_t context)
1671 {
1672         int error = 0;
1673         int index;
1674         struct cnode *cp;
1675         u_int32_t oldStartBlock;
1676         u_int32_t oldBlockCount;
1677         u_int32_t newStartBlock;
1678         u_int32_t newBlockCount;
1679         u_int32_t roundedBlockCount;
1680         uint16_t node_size;
1681         uint32_t remainder_blocks;
1682         u_int32_t alloc_flags;
1683         int blocks_allocated = false;
1684
1685         index = extent_info->extent_index;
1686         cp = VTOC(extent_info->vp);
1687
1688         oldStartBlock = extent_info->extents[index].startBlock;
1689         oldBlockCount = extent_info->extents[index].blockCount;
1690
1691         if (0 && hfs_resize_debug) {
1692                 printf ("hfs_reclaim_extent: Examine record:%u recStartBlock=%u, %u:(%u,%u)\n", extent_info->overflow_count, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount);
1693         }
1694
1695         /* If the current extent lies completely within allocLimit,
1696          * it does not require any relocation.
1697          */
1698         if ((oldStartBlock + oldBlockCount) <= allocLimit) {
1699                 extent_info->cur_blockCount += oldBlockCount;
1700                 return error;
1701         }
1702
1703         /* Every extent should be relocated in its own transaction
1704          * to make sure that we don't overflow the journal buffer.
1705          */
1706         error = hfs_start_transaction(hfsmp);
1707         if (error) {
1708                 return error;
1709         }
1710         extent_info->lockflags = hfs_systemfile_lock(hfsmp, extent_info->lockflags, HFS_EXCLUSIVE_LOCK);
1711
1712         /* Check if the extent lies partially in the area to reclaim,
1713          * i.e. it starts before allocLimit and ends beyond allocLimit.
1714          * We have already skipped extents that lie completely within
1715          * allocLimit in the check above, so we only check for the
1716          * startBlock.  If it lies partially, split it so that we
1717          * only relocate part of the extent.
1718          */
1719         if (oldStartBlock < allocLimit) {
1720                 newBlockCount = allocLimit - oldStartBlock;
1721
1722                 if (hfs_resize_debug) {
1723                         int idx = extent_info->extent_index;
1724                         printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
1725                 }
1726
1727                 /* If the extent belongs to a btree, check and trim
1728                  * it to be multiple of the node size.
1729                  */
1730                 if (extent_info->is_sysfile) {
1731                         node_size = get_btree_nodesize(extent_info->vp);
1732                         /* If the btree node size is less than the block size,
1733                          * splitting this extent will not split a node across
1734                          * different extents.  So we only check and trim if
1735                          * node size is more than the allocation block size.
1736                          */
1737                         if (node_size > hfsmp->blockSize) {
1738                                 remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
1739                                 if (remainder_blocks) {
1740                                         newBlockCount -= remainder_blocks;
1741                                         if (hfs_resize_debug) {
1742                                                 printf ("hfs_reclaim_extent: Round-down newBlockCount to be multiple of nodeSize, node_allocblks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
1743                                         }
1744                                 }
1745                         }
1746                         /* The newBlockCount is zero because of rounding-down so that
1747                          * btree nodes are not split across extents.  Therefore this
1748                          * straddling extent across resize-boundary does not require
1749                          * splitting.  Skip over to relocating of complete extent.
1750                          */
1751                         if (newBlockCount == 0) {
1752                                 if (hfs_resize_debug) {
1753                                         printf ("hfs_reclaim_extent: After round-down newBlockCount=0, skip split, relocate full extent\n");
1754                                 }
1755                                 goto relocate_full_extent;
1756                         }
1757                 }
1758
1759                 /* Split the extents into two parts --- the first extent lies
1760                  * completely within allocLimit and therefore does not require
1761                  * relocation.  The second extent will require relocation which
1762                  * will be handled when the caller calls this function again
1763                  * for the next extent.
1764                  */
1765                 error = hfs_split_extent(extent_info, newBlockCount);
1766                 if (error == 0) {
1767                         /* Split success, no relocation required */
1768                         goto out;
1769                 }
1770                 /* Split failed, so try to relocate entire extent */
1771                 if (hfs_resize_debug) {
1772                         int idx = extent_info->extent_index;
1773                         printf ("hfs_reclaim_extent: Split straddling extent %u:(%u,%u) for %u blocks failed, relocate full extent\n", idx, extent_info->extents[idx].startBlock, extent_info->extents[idx].blockCount, newBlockCount);
1774                 }
1775         }
1776
1777 relocate_full_extent:
1778         /* At this point, the current extent requires relocation.
1779          * We will try to allocate space equal to the size of the extent
1780          * being relocated first to try to relocate it without splitting.
1781          * If the allocation fails, we will try to allocate contiguous
1782          * blocks out of metadata zone.  If that allocation also fails,
1783          * then we will take a whatever contiguous block run is returned
1784          * by the allocation, split the extent into two parts, and then
1785          * relocate the first splitted extent.
1786          */
1787         alloc_flags = HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS;
1788         if (extent_info->is_sysfile) {
1789                 alloc_flags |= HFS_ALLOC_METAZONE;
1790         }
1791
1792         error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount, alloc_flags,
1793                           &newStartBlock, &newBlockCount);
1794         if ((extent_info->is_sysfile == false) &&
1795             ((error == dskFulErr) || (error == ENOSPC))) {
1796                 /* For non-system files, try reallocating space in metadata zone */
1797                 alloc_flags |= HFS_ALLOC_METAZONE;
1798                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
1799                               alloc_flags, &newStartBlock, &newBlockCount);
1800         }
1801         if ((error == dskFulErr) || (error == ENOSPC)) {
1802                 /*
1803                  * We did not find desired contiguous space for this
1804                  * extent, when we asked for it, including the metazone allocations.
1805                  * At this point we are not worrying about getting contiguity anymore.
1806                  *
1807                  * HOWEVER, if we now allow blocks to be used which were recently
1808                  * de-allocated, we may find a contiguous range (though this seems
1809                  * unlikely). As a result, assume that we will have to split the
1810                  * current extent into two pieces, but if we are able to satisfy
1811                  * the request with a single extent, detect that as well.
1812                  */
1813                 alloc_flags &= ~HFS_ALLOC_FORCECONTIG;
1814                 alloc_flags |= HFS_ALLOC_FLUSHTXN;
1815
1816                 error = BlockAllocate(hfsmp, 1, oldBlockCount, oldBlockCount,
1817                               alloc_flags, &newStartBlock, &newBlockCount);
1818                 if (error) {
1819                         printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
1820                         goto out;
1821                 }
1822
1823                 /*
1824                  * Allowing recently deleted extents may now allow us to find
1825                  * a single contiguous extent in the amount & size desired.  If so,
1826                  * do NOT split this extent into two pieces.  This is technically a
1827                  * check for "< oldBlockCount", but we use != to highlight the point
1828                  * that the special case is when they're equal. The allocator should
1829                  * never vend back more blocks than were requested.
1830                  */
1831                 if (newBlockCount != oldBlockCount) {
1832                         blocks_allocated = true;
1833
1834                         /* The number of blocks allocated is less than the requested
1835                          * number of blocks.  For btree extents, check and trim the
1836                          * extent to be multiple of the node size.
1837                          */
1838                         if (extent_info->is_sysfile) {
1839                                 node_size = get_btree_nodesize(extent_info->vp);
1840                                 if (node_size > hfsmp->blockSize) {
1841                                         remainder_blocks = newBlockCount % (node_size / hfsmp->blockSize);
1842                                         if (remainder_blocks) {
1843                                                 roundedBlockCount = newBlockCount - remainder_blocks;
1844                                                 /* Free tail-end blocks of the newly allocated extent */
1845                                                 BlockDeallocate(hfsmp, newStartBlock + roundedBlockCount,
1846                                         newBlockCount - roundedBlockCount,
1847                                         HFS_ALLOC_SKIPFREEBLKS);
1848                                                 newBlockCount = roundedBlockCount;
1849                                                 if (hfs_resize_debug) {
1850                                                         printf ("hfs_reclaim_extent: Fixing extent block count, node_blks=%u, old=%u, new=%u\n", node_size/hfsmp->blockSize, newBlockCount + remainder_blocks, newBlockCount);
1851                                                 }
1852                                                 if (newBlockCount == 0) {
1853                                                         printf ("hfs_reclaim_extent: Not enough contiguous blocks available to relocate fileID=%d\n", extent_info->fileID);
1854                                                         error = ENOSPC;
1855                                                         goto out;
1856                                                 }
1857                                         }
1858                                 }
1859                         }
1860
1861                         /* The number of blocks allocated is less than the number of
1862                          * blocks requested, so split this extent --- the first extent
1863                          * will be relocated as part of this function call and the caller
1864                          * will handle relocating the second extent by calling this
1865                          * function again for the second extent.
1866                          */
1867                         error = hfs_split_extent(extent_info, newBlockCount);
1868                         if (error) {
1869                                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) split error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
1870                                 goto out;
1871                         }
1872                         oldBlockCount = newBlockCount;
1873                 } /* end oldBlockCount != newBlockCount */
1874         } /* end allocation request for any available free space */
1875
1876         if (error) {
1877                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) contig BlockAllocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
1878                 goto out;
1879         }
1880         blocks_allocated = true;
1881
1882         /* Copy data from old location to new location */
1883         error = hfs_copy_extent(hfsmp, extent_info->vp, oldStartBlock,
1884                             newStartBlock, newBlockCount, context);
1885         if (error) {
1886                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u)=>(%u,%u) hfs_copy_extent error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount, error);
1887                 goto out;
1888         }
1889
1890         /* Update the extent record with the new start block information */
1891         extent_info->extents[index].startBlock = newStartBlock;
1892
1893         /* Sync the content back to the disk */
1894         if (extent_info->catalog_fp) {
1895                 /* Update the extents in catalog record */
1896                 if (extent_info->is_dirlink) {
1897                         error = cat_update_dirlink(hfsmp, extent_info->forkType,
1898                                        extent_info->dirlink_desc, extent_info->dirlink_attr,
1899                                        &(extent_info->dirlink_fork->ff_data));
1900                 } else {
1901                         cp->c_flag |= C_MODIFIED;
1902                         /* If this is a system file, sync volume headers on disk */
1903                         if (extent_info->is_sysfile) {
1904                                 error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
1905                         }
1906                 }
1907         } else {
1908                 /* Replace record for extents overflow or extents-based xattrs */
1909                 error = BTReplaceRecord(extent_info->fcb, extent_info->iterator,
1910                                 &(extent_info->btdata), extent_info->recordlen);
1911         }
1912         if (error) {
1913                 printf ("hfs_reclaim_extent: fileID=%u, update record error=%u\n", extent_info->fileID, error);
1914                 goto out;
1915         }
1916
1917         /* Deallocate the old extent */
1918         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, HFS_ALLOC_SKIPFREEBLKS);
1919         if (error) {
1920                 printf ("hfs_reclaim_extent: fileID=%u start=%u, %u:(%u,%u) BlockDeallocate error=%d\n", extent_info->fileID, extent_info->recStartBlock, index, oldStartBlock, oldBlockCount, error);
1921                 goto out;
1922         }
1923         extent_info->blocks_relocated += newBlockCount;
1924
1925         if (hfs_resize_debug) {
1926                 printf ("hfs_reclaim_extent: Relocated record:%u %u:(%u,%u) to (%u,%u)\n", extent_info->overflow_count, index, oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
1927         }
1928
1929 out:
1930         if (error != 0) {
1931                 if (blocks_allocated == true) {
1932                         BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
1933                 }
1934         } else {
1935                 /* On success, increment the total allocation blocks processed */
1936                 extent_info->cur_blockCount += newBlockCount;
1937         }
1938
1939         hfs_systemfile_unlock(hfsmp, extent_info->lockflags);
1940
1941         /* For a non-system file, if an extent entry from catalog record
1942          * was modified, sync the in-memory changes to the catalog record
1943          * on disk before ending the transaction.
1944          */
1945     if ((extent_info->catalog_fp) &&
1946         (extent_info->is_sysfile == false)) {
1947                 (void) hfs_update(extent_info->vp, MNT_WAIT);
1948         }
1949
1950         hfs_end_transaction(hfsmp);
1951
1952         return error;
1953 }
1954
1955 /* Report intermediate progress during volume resize */
1956 static void
1957 hfs_truncatefs_progress(struct hfsmount *hfsmp)
1958 {
1959         u_int32_t cur_progress = 0;
1960
1961         hfs_resize_progress(hfsmp, &cur_progress);
1962         if (cur_progress > (hfsmp->hfs_resize_progress + 9)) {
1963                 printf("hfs_truncatefs: %d%% done...\n", cur_progress);
1964                 hfsmp->hfs_resize_progress = cur_progress;
1965         }
1966         return;
1967 }
1968
1969 /*
1970  * Reclaim space at the end of a volume for given file and forktype.
1971  *
1972  * This routine attempts to move any extent which contains allocation blocks
1973  * at or after "allocLimit."  A separate transaction is used for every extent
1974  * that needs to be moved.  If there is not contiguous space available for
1975  * moving an extent, it can be split into smaller extents.  The contents of
1976  * any moved extents are read and written via the volume's device vnode --
1977  * NOT via "vp."  During the move, moved blocks which are part of a transaction
1978  * have their physical block numbers invalidated so they will eventually be
1979  * written to their new locations.
1980  *
1981  * This function is also called for directory hard links.  Directory hard links
1982  * are regular files with no data fork and resource fork that contains alias
1983  * information for backward compatibility with pre-Leopard systems.  However
1984  * non-Mac OS X implementation can add/modify data fork or resource fork
1985  * information to directory hard links, so we check, and if required, relocate
1986  * both data fork and resource fork.
1987  *
1988  * Inputs:
1989  *    hfsmp       The volume being resized.
1990  *    vp          The vnode for the system file.
1991  *    fileID      ID of the catalog record that needs to be relocated
1992  *    forktype    The type of fork that needs relocated,
1993  *                      kHFSResourceForkType for resource fork,
1994  *                      kHFSDataForkType for data fork
1995  *    allocLimit  Allocation limit for the new volume size,
1996  *                do not use this block or beyond.  All extents
1997  *                that use this block or any blocks beyond this limit
1998  *                will be relocated.
1999  *
2000  * Side Effects:
2001  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
2002  * blocks that were relocated.
2003  */
2004 static int
2005 hfs_reclaim_file(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID,
2006                  u_int8_t forktype, u_long allocLimit, vfs_context_t context)
2007 {
2008         int error = 0;
2009         struct hfs_reclaim_extent_info *extent_info;
2010         int i;
2011         int lockflags = 0;
2012         struct cnode *cp;
2013         struct filefork *fp;
2014         int took_truncate_lock = false;
2015         int release_desc = false;
2016         HFSPlusExtentKey *key;
2017
2018         /* If there is no vnode for this file, then there's nothing to do. */
2019         if (vp == NULL) {
2020                 return 0;
2021         }
2022
2023         cp = VTOC(vp);
2024
2025         if (hfs_resize_debug) {
2026                 const char *filename = (const char *) cp->c_desc.cd_nameptr;
2027                 int namelen = cp->c_desc.cd_namelen;
2028
2029                 if (filename == NULL) {
2030                         filename = "";
2031                         namelen = 0;
2032                 }
2033                 printf("hfs_reclaim_file: reclaiming '%.*s'\n", namelen, filename);
2034         }
2035
2036         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
2037                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
2038         if (extent_info == NULL) {
2039                 return ENOMEM;
2040         }
2041         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
2042         extent_info->vp = vp;
2043         extent_info->fileID = fileID;
2044         extent_info->forkType = forktype;
2045         extent_info->is_sysfile = vnode_issystem(vp);
2046         if (vnode_isdir(vp) && (cp->c_flag & C_HARDLINK)) {
2047                 extent_info->is_dirlink = true;
2048         }
2049         /* We always need allocation bitmap and extent btree lock */
2050         lockflags = SFL_BITMAP | SFL_EXTENTS;
2051         if ((fileID == kHFSCatalogFileID) || (extent_info->is_dirlink == true)) {
2052                 lockflags |= SFL_CATALOG;
2053         } else if (fileID == kHFSAttributesFileID) {
2054                 lockflags |= SFL_ATTRIBUTE;
2055         } else if (fileID == kHFSStartupFileID) {
2056                 lockflags |= SFL_STARTUP;
2057         }
2058         extent_info->lockflags = lockflags;
2059         extent_info->fcb = VTOF(hfsmp->hfs_extents_vp);
2060
2061         /* Flush data associated with current file on disk.
2062          *
2063          * If the current vnode is directory hard link, no flushing of
2064          * journal or vnode is required.  The current kernel does not
2065          * modify data/resource fork of directory hard links, so nothing
2066          * will be in the cache.  If a directory hard link is newly created,
2067          * the resource fork data is written directly using devvp and
2068          * the code that actually relocates data (hfs_copy_extent()) also
2069          * uses devvp for its I/O --- so they will see a consistent copy.
2070          */
2071         if (extent_info->is_sysfile) {
2072                 /* If the current vnode is system vnode, flush journal
2073                  * to make sure that all data is written to the disk.
2074                  */
2075                 error = hfs_journal_flush(hfsmp, TRUE);
2076                 if (error) {
2077                         printf ("hfs_reclaim_file: journal_flush returned %d\n", error);
2078                         goto out;
2079                 }
2080         } else if (extent_info->is_dirlink == false) {
2081                 /* Flush all blocks associated with this regular file vnode.
2082                  * Normally there should not be buffer cache blocks for regular
2083                  * files, but for objects like symlinks, we can have buffer cache
2084                  * blocks associated with the vnode.  Therefore we call
2085                  * buf_flushdirtyblks() also.
2086                  */
2087                 buf_flushdirtyblks(vp, 0, BUF_SKIP_LOCKED, "hfs_reclaim_file");
2088
2089                 hfs_unlock(cp);
2090                 hfs_lock_truncate(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2091                 took_truncate_lock = true;
2092                 (void) cluster_push(vp, 0);
2093                 error = hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
2094                 if (error) {
2095                         goto out;
2096                 }
2097
2098                 /* If the file no longer exists, nothing left to do */
2099                 if (cp->c_flag & C_NOEXISTS) {
2100                         error = 0;
2101                         goto out;
2102                 }
2103
2104                 /* Wait for any in-progress writes to this vnode to complete, so that we'll
2105                  * be copying consistent bits.  (Otherwise, it's possible that an async
2106                  * write will complete to the old extent after we read from it.  That
2107                  * could lead to corruption.)
2108                  */
2109                 error = vnode_waitforwrites(vp, 0, 0, 0, "hfs_reclaim_file");
2110                 if (error) {
2111                         goto out;
2112                 }
2113         }
2114
2115         if (hfs_resize_debug) {
2116                 printf("hfs_reclaim_file: === Start reclaiming %sfork for %sid=%u ===\n", (forktype ? "rsrc" : "data"), (extent_info->is_dirlink ? "dirlink" : "file"), fileID);
2117         }
2118
2119         if (extent_info->is_dirlink) {
2120                 MALLOC(extent_info->dirlink_desc, struct cat_desc *,
2121                sizeof(struct cat_desc), M_TEMP, M_WAITOK);
2122                 MALLOC(extent_info->dirlink_attr, struct cat_attr *,
2123                sizeof(struct cat_attr), M_TEMP, M_WAITOK);
2124                 MALLOC(extent_info->dirlink_fork, struct filefork *,
2125                sizeof(struct filefork), M_TEMP, M_WAITOK);
2126                 if ((extent_info->dirlink_desc == NULL) ||
2127                     (extent_info->dirlink_attr == NULL) ||
2128                     (extent_info->dirlink_fork == NULL)) {
2129                         error = ENOMEM;
2130                         goto out;
2131                 }
2132
2133                 /* Lookup catalog record for directory hard link and
2134                  * create a fake filefork for the value looked up from
2135                  * the disk.
2136                  */
2137                 fp = extent_info->dirlink_fork;
2138                 bzero(extent_info->dirlink_fork, sizeof(struct filefork));
2139                 extent_info->dirlink_fork->ff_cp = cp;
2140                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2141                 error = cat_lookup_dirlink(hfsmp, fileID, forktype,
2142                                    extent_info->dirlink_desc, extent_info->dirlink_attr,
2143                                    &(extent_info->dirlink_fork->ff_data));
2144                 hfs_systemfile_unlock(hfsmp, lockflags);
2145                 if (error) {
2146                         printf ("hfs_reclaim_file: cat_lookup_dirlink for fileID=%u returned error=%u\n", fileID, error);
2147                         goto out;
2148                 }
2149                 release_desc = true;
2150         } else {
2151                 fp = VTOF(vp);
2152         }
2153
2154         extent_info->catalog_fp = fp;
2155         extent_info->recStartBlock = 0;
2156         extent_info->extents = extent_info->catalog_fp->ff_extents;
2157         /* Relocate extents from the catalog record */
2158         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
2159                 if (fp->ff_extents[i].blockCount == 0) {
2160                         break;
2161                 }
2162                 extent_info->extent_index = i;
2163                 error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
2164                 if (error) {
2165                         printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, fp->ff_extents[i].startBlock, fp->ff_extents[i].blockCount, error);
2166                         goto out;
2167                 }
2168         }
2169
2170         /* If the number of allocation blocks processed for reclaiming
2171          * are less than total number of blocks for the file, continuing
2172          * working on overflow extents record.
2173          */
2174         if (fp->ff_blocks <= extent_info->cur_blockCount) {
2175                 if (0 && hfs_resize_debug) {
2176                         printf ("hfs_reclaim_file: Nothing more to relocate, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
2177                 }
2178                 goto out;
2179         }
2180
2181         if (hfs_resize_debug) {
2182                 printf ("hfs_reclaim_file: Will check overflow records, offset=%d, ff_blocks=%u, cur_blockCount=%u\n", i, fp->ff_blocks, extent_info->cur_blockCount);
2183         }
2184
2185         MALLOC(extent_info->iterator, struct BTreeIterator *, sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
2186         if (extent_info->iterator == NULL) {
2187                 error = ENOMEM;
2188                 goto out;
2189         }
2190         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
2191         key = (HFSPlusExtentKey *) &(extent_info->iterator->key);
2192         key->keyLength = kHFSPlusExtentKeyMaximumLength;
2193         key->forkType = forktype;
2194         key->fileID = fileID;
2195         key->startBlock = extent_info->cur_blockCount;
2196
2197         extent_info->btdata.bufferAddress = extent_info->record.overflow;
2198         extent_info->btdata.itemSize = sizeof(HFSPlusExtentRecord);
2199         extent_info->btdata.itemCount = 1;
2200
2201         extent_info->catalog_fp = NULL;
2202
2203         /* Search the first overflow extent with expected startBlock as 'cur_blockCount' */
2204         lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2205         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
2206                            &(extent_info->btdata), &(extent_info->recordlen),
2207                            extent_info->iterator);
2208         hfs_systemfile_unlock(hfsmp, lockflags);
2209         while (error == 0) {
2210                 extent_info->overflow_count++;
2211                 extent_info->recStartBlock = key->startBlock;
2212                 extent_info->extents = extent_info->record.overflow;
2213                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
2214                         if (extent_info->record.overflow[i].blockCount == 0) {
2215                                 goto out;
2216                         }
2217                         extent_info->extent_index = i;
2218                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
2219                         if (error) {
2220                                 printf ("hfs_reclaim_file: fileID=%u #%d %u:(%u,%u) hfs_reclaim_extent error=%d\n", fileID, extent_info->overflow_count, i, extent_info->record.overflow[i].startBlock, extent_info->record.overflow[i].blockCount, error);
2221                                 goto out;
2222                         }
2223                 }
2224
2225                 /* Look for more overflow records */
2226                 lockflags = hfs_systemfile_lock(hfsmp, lockflags, HFS_EXCLUSIVE_LOCK);
2227                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
2228                                 extent_info->iterator, &(extent_info->btdata),
2229                                 &(extent_info->recordlen));
2230                 hfs_systemfile_unlock(hfsmp, lockflags);
2231                 if (error) {
2232                         break;
2233                 }
2234                 /* Stop when we encounter a different file or fork. */
2235                 if ((key->fileID != fileID) || (key->forkType != forktype)) {
2236                         break;
2237                 }
2238         }
2239         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
2240                 error = 0;
2241         }
2242
2243 out:
2244         /* If any blocks were relocated, account them and report progress */
2245         if (extent_info->blocks_relocated) {
2246                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
2247                 hfs_truncatefs_progress(hfsmp);
2248                 if (fileID < kHFSFirstUserCatalogNodeID) {
2249                         printf ("hfs_reclaim_file: Relocated %u blocks from fileID=%u on \"%s\"\n",
2250                                         extent_info->blocks_relocated, fileID, hfsmp->vcbVN);
2251                 }
2252         }
2253         if (extent_info->iterator) {
2254                 FREE(extent_info->iterator, M_TEMP);
2255         }
2256         if (release_desc == true) {
2257                 cat_releasedesc(extent_info->dirlink_desc);
2258         }
2259         if (extent_info->dirlink_desc) {
2260                 FREE(extent_info->dirlink_desc, M_TEMP);
2261         }
2262         if (extent_info->dirlink_attr) {
2263                 FREE(extent_info->dirlink_attr, M_TEMP);
2264         }
2265         if (extent_info->dirlink_fork) {
2266                 FREE(extent_info->dirlink_fork, M_TEMP);
2267         }
2268         if ((extent_info->blocks_relocated != 0) && (extent_info->is_sysfile == false)) {
2269                 (void) hfs_update(vp, MNT_WAIT);
2270         }
2271         if (took_truncate_lock) {
2272                 hfs_unlock_truncate(cp, HFS_LOCK_DEFAULT);
2273         }
2274         if (extent_info) {
2275                 FREE(extent_info, M_TEMP);
2276         }
2277         if (hfs_resize_debug) {
2278                 printf("hfs_reclaim_file: === Finished relocating %sfork for fileid=%u (error=%d) ===\n", (forktype ? "rsrc" : "data"), fileID, error);
2279         }
2280
2281         return error;
2282 }
2283
2284
2285 /*
2286  * This journal_relocate callback updates the journal info block to point
2287  * at the new journal location.  This write must NOT be done using the
2288  * transaction.  We must write the block immediately.  We must also force
2289  * it to get to the media so that the new journal location will be seen by
2290  * the replay code before we can safely let journaled blocks be written
2291  * to their normal locations.
2292  *
2293  * The tests for journal_uses_fua below are mildly hacky.  Since the journal
2294  * and the file system are both on the same device, I'm leveraging what
2295  * the journal has decided about FUA.
2296  */
2297 struct hfs_journal_relocate_args {
2298         struct hfsmount *hfsmp;
2299         vfs_context_t context;
2300         u_int32_t newStartBlock;
2301         u_int32_t newBlockCount;
2302 };
2303
2304 static errno_t
2305 hfs_journal_relocate_callback(void *_args)
2306 {
2307         int error;
2308         struct hfs_journal_relocate_args *args = _args;
2309         struct hfsmount *hfsmp = args->hfsmp;
2310         buf_t bp;
2311         JournalInfoBlock *jibp;
2312
2313         error = buf_meta_bread(hfsmp->hfs_devvp,
2314                            hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
2315                            hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
2316         if (error) {
2317                 printf("hfs_journal_relocate_callback: failed to read JIB (%d)\n", error);
2318                 if (bp) {
2319             buf_brelse(bp);
2320                 }
2321                 return error;
2322         }
2323         jibp = (JournalInfoBlock*) buf_dataptr(bp);
2324         jibp->offset = SWAP_BE64((u_int64_t)args->newStartBlock * hfsmp->blockSize);
2325         jibp->size = SWAP_BE64((u_int64_t)args->newBlockCount * hfsmp->blockSize);
2326         if (journal_uses_fua(hfsmp->jnl))
2327                 buf_markfua(bp);
2328         error = buf_bwrite(bp);
2329         if (error) {
2330                 printf("hfs_journal_relocate_callback: failed to write JIB (%d)\n", error);
2331                 return error;
2332         }
2333         if (!journal_uses_fua(hfsmp->jnl)) {
2334                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, args->context);
2335                 if (error) {
2336                         printf("hfs_journal_relocate_callback: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
2337                         error = 0;              /* Don't fail the operation. */
2338                 }
2339         }
2340
2341         return error;
2342 }
2343
2344
2345 /* Type of resize operation in progress */
2346 #define HFS_RESIZE_TRUNCATE     1
2347 #define HFS_RESIZE_EXTEND       2
2348
2349 /*
2350  * Core function to relocate the journal file.  This function takes the
2351  * journal size of the newly relocated journal --- the caller can
2352  * provide a new journal size if they want to change the size of
2353  * the journal.  The function takes care of updating the journal info
2354  * block and all other data structures correctly.
2355  *
2356  * Note: This function starts a transaction and grabs the btree locks.
2357  */
2358 static int
2359 hfs_relocate_journal_file(struct hfsmount *hfsmp, u_int32_t jnl_size, int resize_type, vfs_context_t context)
2360 {
2361         int error;
2362         int journal_err;
2363         int lockflags;
2364         u_int32_t oldStartBlock;
2365         u_int32_t newStartBlock;
2366         u_int32_t oldBlockCount;
2367         u_int32_t newBlockCount;
2368         u_int32_t jnlBlockCount;
2369         u_int32_t alloc_skipfreeblks;
2370         struct cat_desc journal_desc;
2371         struct cat_attr journal_attr;
2372         struct cat_fork journal_fork;
2373         struct hfs_journal_relocate_args callback_args;
2374
2375         /* Calculate the number of allocation blocks required for the journal */
2376         jnlBlockCount = howmany(jnl_size, hfsmp->blockSize);
2377
2378         /*
2379          * During truncatefs(), the volume free block count is updated
2380          * before relocating data and reflects the total number of free
2381          * blocks that will exist on volume after the resize is successful.
2382          * This means that the allocation blocks required for relocation
2383          * have already been reserved and accounted for in the free block
2384          * count.  Therefore, block allocation and deallocation routines
2385          * can skip the free block check by passing HFS_ALLOC_SKIPFREEBLKS
2386          * flag.
2387          *
2388          * This special handling is not required when the file system
2389          * is being extended as we want all the allocated and deallocated
2390          * blocks to be accounted for correctly.
2391          */
2392         if (resize_type == HFS_RESIZE_TRUNCATE) {
2393                 alloc_skipfreeblks = HFS_ALLOC_SKIPFREEBLKS;
2394         } else {
2395                 alloc_skipfreeblks = 0;
2396         }
2397
2398         error = hfs_start_transaction(hfsmp);
2399         if (error) {
2400                 printf("hfs_relocate_journal_file: hfs_start_transaction returned %d\n", error);
2401                 return error;
2402         }
2403         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2404
2405         error = BlockAllocate(hfsmp, 1, jnlBlockCount, jnlBlockCount,
2406                           HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_FLUSHTXN | alloc_skipfreeblks,
2407                           &newStartBlock, &newBlockCount);
2408         if (error) {
2409                 printf("hfs_relocate_journal_file: BlockAllocate returned %d\n", error);
2410                 goto fail;
2411         }
2412         if (newBlockCount != jnlBlockCount) {
2413                 printf("hfs_relocate_journal_file: newBlockCount != jnlBlockCount (%u, %u)\n", newBlockCount, jnlBlockCount);
2414                 goto free_fail;
2415         }
2416
2417         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlfileid, 1, 0, &journal_desc, &journal_attr, &journal_fork);
2418         if (error) {
2419                 printf("hfs_relocate_journal_file: cat_idlookup returned %d\n", error);
2420                 goto free_fail;
2421         }
2422
2423         oldStartBlock = journal_fork.cf_extents[0].startBlock;
2424         oldBlockCount = journal_fork.cf_extents[0].blockCount;
2425         error = BlockDeallocate(hfsmp, oldStartBlock, oldBlockCount, alloc_skipfreeblks);
2426         if (error) {
2427                 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
2428                 goto free_fail;
2429         }
2430
2431         /* Update the catalog record for .journal */
2432         journal_fork.cf_size = newBlockCount * hfsmp->blockSize;
2433         journal_fork.cf_extents[0].startBlock = newStartBlock;
2434         journal_fork.cf_extents[0].blockCount = newBlockCount;
2435         journal_fork.cf_blocks = newBlockCount;
2436         error = cat_update(hfsmp, &journal_desc, &journal_attr, &journal_fork, NULL);
2437         cat_releasedesc(&journal_desc);  /* all done with cat descriptor */
2438         if (error) {
2439                 printf("hfs_relocate_journal_file: cat_update returned %d\n", error);
2440                 goto free_fail;
2441         }
2442
2443         /*
2444          * If the journal is part of the file system, then tell the journal
2445          * code about the new location.  If the journal is on an external
2446          * device, then just keep using it as-is.
2447          */
2448         if (hfsmp->jvp == hfsmp->hfs_devvp) {
2449                 callback_args.hfsmp = hfsmp;
2450                 callback_args.context = context;
2451                 callback_args.newStartBlock = newStartBlock;
2452                 callback_args.newBlockCount = newBlockCount;
2453
2454                 error = journal_relocate(hfsmp->jnl, (off_t)newStartBlock*hfsmp->blockSize,
2455                                  (off_t)newBlockCount*hfsmp->blockSize, 0,
2456                                  hfs_journal_relocate_callback, &callback_args);
2457                 if (error) {
2458                         /* NOTE: journal_relocate will mark the journal invalid. */
2459                         printf("hfs_relocate_journal_file: journal_relocate returned %d\n", error);
2460                         goto fail;
2461                 }
2462                 if (hfs_resize_debug) {
2463                         printf ("hfs_relocate_journal_file: Successfully relocated journal from (%u,%u) to (%u,%u)\n", oldStartBlock, oldBlockCount, newStartBlock, newBlockCount);
2464                 }
2465                 hfsmp->jnl_start = newStartBlock;
2466                 hfsmp->jnl_size = (off_t)newBlockCount * hfsmp->blockSize;
2467         }
2468
2469         hfs_systemfile_unlock(hfsmp, lockflags);
2470         error = hfs_end_transaction(hfsmp);
2471         if (error) {
2472                 printf("hfs_relocate_journal_file: hfs_end_transaction returned %d\n", error);
2473         }
2474
2475         return error;
2476
2477 free_fail:
2478         journal_err = BlockDeallocate(hfsmp, newStartBlock, newBlockCount, HFS_ALLOC_SKIPFREEBLKS);
2479         if (journal_err) {
2480                 printf("hfs_relocate_journal_file: BlockDeallocate returned %d\n", error);
2481                 hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
2482         }
2483 fail:
2484         hfs_systemfile_unlock(hfsmp, lockflags);
2485         (void) hfs_end_transaction(hfsmp);
2486         if (hfs_resize_debug) {
2487                 printf ("hfs_relocate_journal_file: Error relocating journal file (error=%d)\n", error);
2488         }
2489         return error;
2490 }
2491
2492
2493 /*
2494  * Relocate the journal file when the file system is being truncated.
2495  * We do not down-size the journal when the file system size is
2496  * reduced, so we always provide the current journal size to the
2497  * relocate code.
2498  */
2499 static int
2500 hfs_reclaim_journal_file(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
2501 {
2502         int error = 0;
2503         u_int32_t startBlock;
2504         u_int32_t blockCount = hfsmp->jnl_size / hfsmp->blockSize;
2505
2506         /*
2507          * Figure out the location of the .journal file.  When the journal
2508          * is on an external device, we need to look up the .journal file.
2509          */
2510         if (hfsmp->jvp == hfsmp->hfs_devvp) {
2511                 startBlock = hfsmp->jnl_start;
2512                 blockCount = hfsmp->jnl_size / hfsmp->blockSize;
2513         } else {
2514                 u_int32_t fileid;
2515                 u_int32_t old_jnlfileid;
2516                 struct cat_attr attr;
2517                 struct cat_fork fork;
2518
2519                 /*
2520                  * The cat_lookup inside GetFileInfo will fail because hfs_jnlfileid
2521                  * is set, and it is trying to hide the .journal file.  So temporarily
2522                  * unset the field while calling GetFileInfo.
2523                  */
2524                 old_jnlfileid = hfsmp->hfs_jnlfileid;
2525                 hfsmp->hfs_jnlfileid = 0;
2526                 fileid = GetFileInfo(hfsmp, kHFSRootFolderID, ".journal", &attr, &fork);
2527                 hfsmp->hfs_jnlfileid = old_jnlfileid;
2528                 if (fileid != old_jnlfileid) {
2529                         printf("hfs_reclaim_journal_file: cannot find .journal file!\n");
2530                         return EIO;
2531                 }
2532
2533                 startBlock = fork.cf_extents[0].startBlock;
2534                 blockCount = fork.cf_extents[0].blockCount;
2535         }
2536
2537         if (startBlock + blockCount <= allocLimit) {
2538                 /* The journal file does not require relocation */
2539                 return 0;
2540         }
2541
2542         error = hfs_relocate_journal_file(hfsmp, blockCount * hfsmp->blockSize, HFS_RESIZE_TRUNCATE, context);
2543         if (error == 0) {
2544                 hfsmp->hfs_resize_blocksmoved += blockCount;
2545                 hfs_truncatefs_progress(hfsmp);
2546                 printf ("hfs_reclaim_journal_file: Relocated %u blocks from journal on \"%s\"\n",
2547                                 blockCount, hfsmp->vcbVN);
2548         }
2549
2550         return error;
2551 }
2552
2553
2554 /*
2555  * Move the journal info block to a new location.  We have to make sure the
2556  * new copy of the journal info block gets to the media first, then change
2557  * the field in the volume header and the catalog record.
2558  */
2559 static int
2560 hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
2561 {
2562         int error;
2563         int journal_err;
2564         int lockflags;
2565         u_int32_t oldBlock;
2566         u_int32_t newBlock;
2567         u_int32_t blockCount;
2568         struct cat_desc jib_desc;
2569         struct cat_attr jib_attr;
2570         struct cat_fork jib_fork;
2571         buf_t old_bp, new_bp;
2572
2573         if (hfsmp->vcbJinfoBlock <= allocLimit) {
2574                 /* The journal info block does not require relocation */
2575                 return 0;
2576         }
2577
2578         error = hfs_start_transaction(hfsmp);
2579         if (error) {
2580                 printf("hfs_reclaim_journal_info_block: hfs_start_transaction returned %d\n", error);
2581                 return error;
2582         }
2583         lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG | SFL_BITMAP, HFS_EXCLUSIVE_LOCK);
2584
2585         error = BlockAllocate(hfsmp, 1, 1, 1,
2586                           HFS_ALLOC_METAZONE | HFS_ALLOC_FORCECONTIG | HFS_ALLOC_SKIPFREEBLKS | HFS_ALLOC_FLUSHTXN,
2587                           &newBlock, &blockCount);
2588         if (error) {
2589                 printf("hfs_reclaim_journal_info_block: BlockAllocate returned %d\n", error);
2590                 goto fail;
2591         }
2592         if (blockCount != 1) {
2593                 printf("hfs_reclaim_journal_info_block: blockCount != 1 (%u)\n", blockCount);
2594                 goto free_fail;
2595         }
2596
2597         /* Copy the old journal info block content to the new location */
2598         error = buf_meta_bread(hfsmp->hfs_devvp,
2599                            hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
2600                            hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
2601         if (error) {
2602                 printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
2603                 if (old_bp) {
2604             buf_brelse(old_bp);
2605                 }
2606                 goto free_fail;
2607         }
2608         new_bp = buf_getblk(hfsmp->hfs_devvp,
2609                         newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
2610                         hfsmp->blockSize, 0, 0, BLK_META);
2611         bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
2612         buf_brelse(old_bp);
2613         if (journal_uses_fua(hfsmp->jnl))
2614                 buf_markfua(new_bp);
2615         error = buf_bwrite(new_bp);
2616         if (error) {
2617                 printf("hfs_reclaim_journal_info_block: failed to write new JIB (%d)\n", error);
2618                 goto free_fail;
2619         }
2620         if (!journal_uses_fua(hfsmp->jnl)) {
2621                 error = VNOP_IOCTL(hfsmp->hfs_devvp, DKIOCSYNCHRONIZECACHE, NULL, FWRITE, context);
2622                 if (error) {
2623                         printf("hfs_reclaim_journal_info_block: DKIOCSYNCHRONIZECACHE failed (%d)\n", error);
2624                         /* Don't fail the operation. */
2625                 }
2626         }
2627
2628         /* Deallocate the old block once the new one has the new valid content */
2629         error = BlockDeallocate(hfsmp, hfsmp->vcbJinfoBlock, 1, HFS_ALLOC_SKIPFREEBLKS);
2630         if (error) {
2631                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
2632                 goto free_fail;
2633         }
2634
2635
2636         /* Update the catalog record for .journal_info_block */
2637         error = cat_idlookup(hfsmp, hfsmp->hfs_jnlinfoblkid, 1, 0, &jib_desc, &jib_attr, &jib_fork);
2638         if (error) {
2639                 printf("hfs_reclaim_journal_info_block: cat_idlookup returned %d\n", error);
2640                 goto fail;
2641         }
2642         oldBlock = jib_fork.cf_extents[0].startBlock;
2643         jib_fork.cf_size = hfsmp->blockSize;
2644         jib_fork.cf_extents[0].startBlock = newBlock;
2645         jib_fork.cf_extents[0].blockCount = 1;
2646         jib_fork.cf_blocks = 1;
2647         error = cat_update(hfsmp, &jib_desc, &jib_attr, &jib_fork, NULL);
2648         cat_releasedesc(&jib_desc);  /* all done with cat descriptor */
2649         if (error) {
2650                 printf("hfs_reclaim_journal_info_block: cat_update returned %d\n", error);
2651                 goto fail;
2652         }
2653
2654         /* Update the pointer to the journal info block in the volume header. */
2655         hfsmp->vcbJinfoBlock = newBlock;
2656         error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
2657         if (error) {
2658                 printf("hfs_reclaim_journal_info_block: hfs_flushvolumeheader returned %d\n", error);
2659                 goto fail;
2660         }
2661         hfs_systemfile_unlock(hfsmp, lockflags);
2662         error = hfs_end_transaction(hfsmp);
2663         if (error) {
2664                 printf("hfs_reclaim_journal_info_block: hfs_end_transaction returned %d\n", error);
2665         }
2666         error = hfs_journal_flush(hfsmp, FALSE);
2667         if (error) {
2668                 printf("hfs_reclaim_journal_info_block: journal_flush returned %d\n", error);
2669         }
2670
2671         /* Account for the block relocated and print progress */
2672         hfsmp->hfs_resize_blocksmoved += 1;
2673         hfs_truncatefs_progress(hfsmp);
2674         if (!error) {
2675                 printf ("hfs_reclaim_journal_info: Relocated 1 block from journal info on \"%s\"\n",
2676                                 hfsmp->vcbVN);
2677                 if (hfs_resize_debug) {
2678                         printf ("hfs_reclaim_journal_info_block: Successfully relocated journal info block from (%u,%u) to (%u,%u)\n", oldBlock, blockCount, newBlock, blockCount);
2679                 }
2680         }
2681         return error;
2682
2683 free_fail:
2684         journal_err = BlockDeallocate(hfsmp, newBlock, blockCount, HFS_ALLOC_SKIPFREEBLKS);
2685         if (journal_err) {
2686                 printf("hfs_reclaim_journal_info_block: BlockDeallocate returned %d\n", error);
2687                 hfs_mark_inconsistent(hfsmp, HFS_ROLLBACK_FAILED);
2688         }
2689
2690 fail:
2691         hfs_systemfile_unlock(hfsmp, lockflags);
2692         (void) hfs_end_transaction(hfsmp);
2693         if (hfs_resize_debug) {
2694                 printf ("hfs_reclaim_journal_info_block: Error relocating journal info block (error=%d)\n", error);
2695         }
2696         return error;
2697 }
2698
2699
2700 static u_int64_t
2701 calculate_journal_size(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count)
2702 {
2703         u_int64_t journal_size;
2704         u_int32_t journal_scale;
2705
2706 #define DEFAULT_JOURNAL_SIZE (8*1024*1024)
2707 #define MAX_JOURNAL_SIZE     (512*1024*1024)
2708
2709         /* Calculate the journal size for this volume.   We want
2710          * at least 8 MB of journal for each 100 GB of disk space.
2711          * We cap the size at 512 MB, unless the allocation block
2712          * size is larger, in which case, we use one allocation
2713          * block.
2714          */
2715         journal_scale = (sector_size * sector_count) / ((u_int64_t)100 * 1024 * 1024 * 1024);
2716         journal_size = DEFAULT_JOURNAL_SIZE * (journal_scale + 1);
2717         if (journal_size > MAX_JOURNAL_SIZE) {
2718                 journal_size = MAX_JOURNAL_SIZE;
2719         }
2720         if (journal_size < hfsmp->blockSize) {
2721                 journal_size = hfsmp->blockSize;
2722         }
2723         return journal_size;
2724 }
2725
2726
2727 /*
2728  * Calculate the expected journal size based on current partition size.
2729  * If the size of the current journal is less than the calculated size,
2730  * force journal relocation with the new journal size.
2731  */
2732 static int
2733 hfs_extend_journal(struct hfsmount *hfsmp, u_int32_t sector_size, u_int64_t sector_count, vfs_context_t context)
2734 {
2735         int error = 0;
2736         u_int64_t calc_journal_size;
2737
2738         if (hfsmp->jvp != hfsmp->hfs_devvp) {
2739                 if (hfs_resize_debug) {
2740                         printf("hfs_extend_journal: not resizing the journal because it is on an external device.\n");
2741                 }
2742                 return 0;
2743         }
2744
2745         calc_journal_size = calculate_journal_size(hfsmp, sector_size, sector_count);
2746         if (calc_journal_size <= hfsmp->jnl_size) {
2747                 /* The journal size requires no modification */
2748                 goto out;
2749         }
2750
2751         if (hfs_resize_debug) {
2752                 printf ("hfs_extend_journal: journal old=%u, new=%qd\n", hfsmp->jnl_size, calc_journal_size);
2753         }
2754
2755         /* Extend the journal to the new calculated size */
2756         error = hfs_relocate_journal_file(hfsmp, calc_journal_size, HFS_RESIZE_EXTEND, context);
2757         if (error == 0) {
2758                 printf ("hfs_extend_journal: Extended journal size to %u bytes on \"%s\"\n",
2759                                 hfsmp->jnl_size, hfsmp->vcbVN);
2760         }
2761 out:
2762         return error;
2763 }
2764
2765
2766 /*
2767  * This function traverses through all extended attribute records for a given
2768  * fileID, and calls function that reclaims data blocks that exist in the
2769  * area of the disk being reclaimed which in turn is responsible for allocating
2770  * new space, copying extent data, deallocating new space, and if required,
2771  * splitting the extent.
2772  *
2773  * Note: The caller has already acquired the cnode lock on the file.  Therefore
2774  * we are assured that no other thread would be creating/deleting/modifying
2775  * extended attributes for this file.
2776  *
2777  * Side Effects:
2778  * hfsmp->hfs_resize_blocksmoved is incremented by the number of allocation
2779  * blocks that were relocated.
2780  *
2781  * Returns:
2782  *      0 on success, non-zero on failure.
2783  */
2784 static int
2785 hfs_reclaim_xattr(struct hfsmount *hfsmp, struct vnode *vp, u_int32_t fileID, u_int32_t allocLimit, vfs_context_t context)
2786 {
2787         int error = 0;
2788         struct hfs_reclaim_extent_info *extent_info;
2789         int i;
2790         HFSPlusAttrKey *key;
2791         int *lockflags;
2792
2793         if (hfs_resize_debug) {
2794                 printf("hfs_reclaim_xattr: === Start reclaiming xattr for id=%u ===\n", fileID);
2795         }
2796
2797         MALLOC(extent_info, struct hfs_reclaim_extent_info *,
2798                sizeof(struct hfs_reclaim_extent_info), M_TEMP, M_WAITOK);
2799         if (extent_info == NULL) {
2800                 return ENOMEM;
2801         }
2802         bzero(extent_info, sizeof(struct hfs_reclaim_extent_info));
2803         extent_info->vp = vp;
2804         extent_info->fileID = fileID;
2805         extent_info->is_xattr = true;
2806         extent_info->is_sysfile = vnode_issystem(vp);
2807         extent_info->fcb = VTOF(hfsmp->hfs_attribute_vp);
2808         lockflags = &(extent_info->lockflags);
2809         *lockflags = SFL_ATTRIBUTE | SFL_BITMAP;
2810
2811         /* Initialize iterator from the extent_info structure */
2812         MALLOC(extent_info->iterator, struct BTreeIterator *,
2813                sizeof(struct BTreeIterator), M_TEMP, M_WAITOK);
2814         if (extent_info->iterator == NULL) {
2815                 error = ENOMEM;
2816                 goto out;
2817         }
2818         bzero(extent_info->iterator, sizeof(struct BTreeIterator));
2819
2820         /* Build attribute key */
2821         key = (HFSPlusAttrKey *)&(extent_info->iterator->key);
2822         error = hfs_buildattrkey(fileID, NULL, key);
2823         if (error) {
2824                 goto out;
2825         }
2826
2827         /* Initialize btdata from extent_info structure.  Note that the
2828          * buffer pointer actually points to the xattr record from the
2829          * extent_info structure itself.
2830          */
2831         extent_info->btdata.bufferAddress = &(extent_info->record.xattr);
2832         extent_info->btdata.itemSize = sizeof(HFSPlusAttrRecord);
2833         extent_info->btdata.itemCount = 1;
2834
2835         /*
2836          * Sync all extent-based attribute data to the disk.
2837          *
2838          * All extent-based attribute data I/O is performed via cluster
2839          * I/O using a virtual file that spans across entire file system
2840          * space.
2841          */
2842         hfs_lock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
2843         (void)cluster_push(hfsmp->hfs_attrdata_vp, 0);
2844         error = vnode_waitforwrites(hfsmp->hfs_attrdata_vp, 0, 0, 0, "hfs_reclaim_xattr");
2845         hfs_unlock_truncate(VTOC(hfsmp->hfs_attrdata_vp), HFS_LOCK_DEFAULT);
2846         if (error) {
2847                 goto out;
2848         }
2849
2850         /* Search for extended attribute for current file.  This
2851          * will place the iterator before the first matching record.
2852          */
2853         *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
2854         error = BTSearchRecord(extent_info->fcb, extent_info->iterator,
2855                            &(extent_info->btdata), &(extent_info->recordlen),
2856                            extent_info->iterator);
2857         hfs_systemfile_unlock(hfsmp, *lockflags);
2858         if (error) {
2859                 if (error != btNotFound) {
2860                         goto out;
2861                 }
2862                 /* btNotFound is expected here, so just mask it */
2863                 error = 0;
2864         }
2865
2866         while (1) {
2867                 /* Iterate to the next record */
2868                 *lockflags = hfs_systemfile_lock(hfsmp, *lockflags, HFS_EXCLUSIVE_LOCK);
2869                 error = BTIterateRecord(extent_info->fcb, kBTreeNextRecord,
2870                                 extent_info->iterator, &(extent_info->btdata),
2871                                 &(extent_info->recordlen));
2872                 hfs_systemfile_unlock(hfsmp, *lockflags);
2873
2874                 /* Stop the iteration if we encounter end of btree or xattr with different fileID */
2875                 if (error || key->fileID != fileID) {
2876                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
2877                                 error = 0;
2878                         }
2879                         break;
2880                 }
2881
2882                 /* We only care about extent-based EAs */
2883                 if ((extent_info->record.xattr.recordType != kHFSPlusAttrForkData) &&
2884                     (extent_info->record.xattr.recordType != kHFSPlusAttrExtents)) {
2885                         continue;
2886                 }
2887
2888                 if (extent_info->record.xattr.recordType == kHFSPlusAttrForkData) {
2889                         extent_info->overflow_count = 0;
2890                         extent_info->extents = extent_info->record.xattr.forkData.theFork.extents;
2891                 } else if (extent_info->record.xattr.recordType == kHFSPlusAttrExtents) {
2892                         extent_info->overflow_count++;
2893                         extent_info->extents = extent_info->record.xattr.overflowExtents.extents;
2894                 }
2895
2896                 extent_info->recStartBlock = key->startBlock;
2897                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
2898                         if (extent_info->extents[i].blockCount == 0) {
2899                                 break;
2900                         }
2901                         extent_info->extent_index = i;
2902                         error = hfs_reclaim_extent(hfsmp, allocLimit, extent_info, context);
2903                         if (error) {
2904                                 printf ("hfs_reclaim_xattr: fileID=%u hfs_reclaim_extent error=%d\n", fileID, error);
2905                                 goto out;
2906                         }
2907                 }
2908         }
2909
2910 out:
2911         /* If any blocks were relocated, account them and report progress */
2912         if (extent_info->blocks_relocated) {
2913                 hfsmp->hfs_resize_blocksmoved += extent_info->blocks_relocated;
2914                 hfs_truncatefs_progress(hfsmp);
2915         }
2916         if (extent_info->iterator) {
2917                 FREE(extent_info->iterator, M_TEMP);
2918         }
2919         if (extent_info) {
2920                 FREE(extent_info, M_TEMP);
2921         }
2922         if (hfs_resize_debug) {
2923                 printf("hfs_reclaim_xattr: === Finished relocating xattr for fileid=%u (error=%d) ===\n", fileID, error);
2924         }
2925         return error;
2926 }
2927
2928 /*
2929  * Reclaim any extent-based extended attributes allocation blocks from
2930  * the area of the disk that is being truncated.
2931  *
2932  * The function traverses the attribute btree to find out the fileIDs
2933  * of the extended attributes that need to be relocated.  For every
2934  * file whose large EA requires relocation, it looks up the cnode and
2935  * calls hfs_reclaim_xattr() to do all the work for allocating
2936  * new space, copying data, deallocating old space, and if required,
2937  * splitting the extents.
2938  *
2939  * Inputs:
2940  *      allocLimit    - starting block of the area being reclaimed
2941  *
2942  * Returns:
2943  *      returns 0 on success, non-zero on failure.
2944  */
2945 static int
2946 hfs_reclaim_xattrspace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
2947 {
2948         int error = 0;
2949         FCB *fcb;
2950         struct BTreeIterator *iterator = NULL;
2951         struct FSBufferDescriptor btdata;
2952         HFSPlusAttrKey *key;
2953         HFSPlusAttrRecord rec;
2954         int lockflags = 0;
2955         cnid_t prev_fileid = 0;
2956         struct vnode *vp;
2957         int need_relocate;
2958         int btree_operation;
2959         u_int32_t files_moved = 0;
2960         u_int32_t prev_blocksmoved;
2961         int i;
2962
2963         fcb = VTOF(hfsmp->hfs_attribute_vp);
2964         /* Store the value to print total blocks moved by this function in end */
2965         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
2966
2967         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
2968                 return ENOMEM;
2969         }
2970         bzero(iterator, sizeof(*iterator));
2971         key = (HFSPlusAttrKey *)&iterator->key;
2972         btdata.bufferAddress = &rec;
2973         btdata.itemSize = sizeof(rec);
2974         btdata.itemCount = 1;
2975
2976         need_relocate = false;
2977         btree_operation = kBTreeFirstRecord;
2978         /* Traverse the attribute btree to find extent-based EAs to reclaim */
2979         while (1) {
2980                 lockflags = hfs_systemfile_lock(hfsmp, SFL_ATTRIBUTE, HFS_SHARED_LOCK);
2981                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
2982                 hfs_systemfile_unlock(hfsmp, lockflags);
2983                 if (error) {
2984                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
2985                                 error = 0;
2986                         }
2987                         break;
2988                 }
2989                 btree_operation = kBTreeNextRecord;
2990
2991                 /* If the extents of current fileID were already relocated, skip it */
2992                 if (prev_fileid == key->fileID) {
2993                         continue;
2994                 }
2995
2996                 /* Check if any of the extents in the current record need to be relocated */
2997                 need_relocate = false;
2998                 switch(rec.recordType) {
2999                         case kHFSPlusAttrForkData:
3000                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3001                                         if (rec.forkData.theFork.extents[i].blockCount == 0) {
3002                                                 break;
3003                                         }
3004                                         if ((rec.forkData.theFork.extents[i].startBlock +
3005                                              rec.forkData.theFork.extents[i].blockCount) > allocLimit) {
3006                                                 need_relocate = true;
3007                                                 break;
3008                                         }
3009                                 }
3010                                 break;
3011
3012                         case kHFSPlusAttrExtents:
3013                                 for (i = 0; i < kHFSPlusExtentDensity; i++) {
3014                                         if (rec.overflowExtents.extents[i].blockCount == 0) {
3015                                                 break;
3016                                         }
3017                                         if ((rec.overflowExtents.extents[i].startBlock +
3018                                              rec.overflowExtents.extents[i].blockCount) > allocLimit) {
3019                                                 need_relocate = true;
3020                                                 break;
3021                                         }
3022                                 }
3023                                 break;
3024                 };
3025
3026                 /* Continue iterating to next attribute record */
3027                 if (need_relocate == false) {
3028                         continue;
3029                 }
3030
3031                 /* Look up the vnode for corresponding file.  The cnode
3032                  * will be locked which will ensure that no one modifies
3033                  * the xattrs when we are relocating them.
3034                  *
3035                  * We want to allow open-unlinked files to be moved,
3036                  * so provide allow_deleted == 1 for hfs_vget().
3037                  */
3038                 if (hfs_vget(hfsmp, key->fileID, &vp, 0, 1) != 0) {
3039                         continue;
3040                 }
3041
3042                 error = hfs_reclaim_xattr(hfsmp, vp, key->fileID, allocLimit, context);
3043                 hfs_unlock(VTOC(vp));
3044                 vnode_put(vp);
3045                 if (error) {
3046                         printf ("hfs_reclaim_xattrspace: Error relocating xattrs for fileid=%u (error=%d)\n", key->fileID, error);
3047                         break;
3048                 }
3049                 prev_fileid = key->fileID;
3050                 files_moved++;
3051         }
3052
3053         if (files_moved) {
3054                 printf("hfs_reclaim_xattrspace: Relocated %u xattr blocks from %u files on \"%s\"\n",
3055                (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
3056                files_moved, hfsmp->vcbVN);
3057         }
3058
3059         kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
3060         return error;
3061 }
3062
3063 /*
3064  * Reclaim blocks from regular files.
3065  *
3066  * This function iterates over all the record in catalog btree looking
3067  * for files with extents that overlap into the space we're trying to
3068  * free up.  If a file extent requires relocation, it looks up the vnode
3069  * and calls function to relocate the data.
3070  *
3071  * Returns:
3072  *      Zero on success, non-zero on failure.
3073  */
3074 static int
3075 hfs_reclaim_filespace(struct hfsmount *hfsmp, u_int32_t allocLimit, vfs_context_t context)
3076 {
3077         int error;
3078         FCB *fcb;
3079         struct BTreeIterator *iterator = NULL;
3080         struct FSBufferDescriptor btdata;
3081         int btree_operation;
3082         int lockflags;
3083         struct HFSPlusCatalogFile filerec;
3084         struct vnode *vp;
3085         struct vnode *rvp;
3086         struct filefork *datafork;
3087         u_int32_t files_moved = 0;
3088         u_int32_t prev_blocksmoved;
3089
3090 #if CONFIG_PROTECT
3091         int keys_generated = 0;
3092 #endif
3093
3094         fcb = VTOF(hfsmp->hfs_catalog_vp);
3095         /* Store the value to print total blocks moved by this function at the end */
3096         prev_blocksmoved = hfsmp->hfs_resize_blocksmoved;
3097
3098         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
3099                 error = ENOMEM;
3100                 goto reclaim_filespace_done;
3101         }
3102
3103 #if CONFIG_PROTECT
3104         /*
3105          * For content-protected filesystems, we may need to relocate files that
3106          * are encrypted.  If they use the new-style offset-based IVs, then
3107          * we can move them regardless of the lock state.  We create a temporary
3108          * key here that we use to read/write the data, then we discard it at the
3109          * end of the function.
3110          */
3111         if (cp_fs_protected (hfsmp->hfs_mp)) {
3112                 int needs = 0;
3113                 error = cp_needs_tempkeys(hfsmp, &needs);
3114
3115                 if ((error == 0) && (needs)) {
3116                         error = cp_entry_gentempkeys(&hfsmp->hfs_resize_cpentry, hfsmp);
3117                         if (error == 0) {
3118                                 keys_generated = 1;
3119                         }
3120                 }
3121
3122                 if (error) {
3123                         printf("hfs_reclaimspace: Error generating temporary keys for resize (%d)\n", error);
3124                         goto reclaim_filespace_done;
3125                 }
3126         }
3127
3128 #endif
3129
3130         bzero(iterator, sizeof(*iterator));
3131
3132         btdata.bufferAddress = &filerec;
3133         btdata.itemSize = sizeof(filerec);
3134         btdata.itemCount = 1;
3135
3136         btree_operation = kBTreeFirstRecord;
3137         while (1) {
3138                 lockflags = hfs_systemfile_lock(hfsmp, SFL_CATALOG, HFS_SHARED_LOCK);
3139                 error = BTIterateRecord(fcb, btree_operation, iterator, &btdata, NULL);
3140                 hfs_systemfile_unlock(hfsmp, lockflags);
3141                 if (error) {
3142                         if (error == fsBTRecordNotFoundErr || error == fsBTEndOfIterationErr) {
3143                                 error = 0;
3144                         }
3145                         break;
3146                 }
3147                 btree_operation = kBTreeNextRecord;
3148
3149                 if (filerec.recordType != kHFSPlusFileRecord) {
3150                         continue;
3151                 }
3152
3153                 /* Check if any of the extents require relocation */
3154                 if (hfs_file_extent_overlaps(hfsmp, allocLimit, &filerec) == false) {
3155                         continue;
3156                 }
3157
3158                 /* We want to allow open-unlinked files to be moved, so allow_deleted == 1 */
3159                 if (hfs_vget(hfsmp, filerec.fileID, &vp, 0, 1) != 0) {
3160                         if (hfs_resize_debug) {
3161                                 printf("hfs_reclaim_filespace: hfs_vget(%u) failed.\n", filerec.fileID);
3162                         }
3163                         continue;
3164                 }
3165
3166                 /* If data fork exists or item is a directory hard link, relocate blocks */
3167                 datafork = VTOF(vp);
3168                 if ((datafork && datafork->ff_blocks > 0) || vnode_isdir(vp)) {
3169                         error = hfs_reclaim_file(hfsmp, vp, filerec.fileID,
3170                                      kHFSDataForkType, allocLimit, context);
3171                         if (error)  {
3172                                 printf ("hfs_reclaimspace: Error reclaiming datafork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
3173                                 hfs_unlock(VTOC(vp));
3174                                 vnode_put(vp);
3175                                 break;
3176                         }
3177                 }
3178
3179                 /* If resource fork exists or item is a directory hard link, relocate blocks */
3180                 if (((VTOC(vp)->c_blocks - (datafork ? datafork->ff_blocks : 0)) > 0) || vnode_isdir(vp)) {
3181                         if (vnode_isdir(vp)) {
3182                                 /* Resource fork vnode lookup is invalid for directory hard link.
3183                                  * So we fake data fork vnode as resource fork vnode.
3184                                  */
3185                                 rvp = vp;
3186                         } else {
3187                                 error = hfs_vgetrsrc(hfsmp, vp, &rvp);
3188                                 if (error) {
3189                                         printf ("hfs_reclaimspace: Error looking up rvp for fileid=%u (error=%d)\n", filerec.fileID, error);
3190                                         hfs_unlock(VTOC(vp));
3191                                         vnode_put(vp);
3192                                         break;
3193                                 }
3194                                 VTOC(rvp)->c_flag |= C_NEED_RVNODE_PUT;
3195                         }
3196
3197                         error = hfs_reclaim_file(hfsmp, rvp, filerec.fileID,
3198                                      kHFSResourceForkType, allocLimit, context);
3199                         if (error) {
3200                                 printf ("hfs_reclaimspace: Error reclaiming rsrcfork blocks of fileid=%u (error=%d)\n", filerec.fileID, error);
3201                                 hfs_unlock(VTOC(vp));
3202                                 vnode_put(vp);
3203                                 break;
3204                         }
3205                 }
3206
3207                 /* The file forks were relocated successfully, now drop the
3208                  * cnode lock and vnode reference, and continue iterating to
3209                  * next catalog record.
3210                  */
3211                 hfs_unlock(VTOC(vp));
3212                 vnode_put(vp);
3213                 files_moved++;
3214         }
3215
3216         if (files_moved) {
3217                 printf("hfs_reclaim_filespace: Relocated %u blocks from %u files on \"%s\"\n",
3218                (hfsmp->hfs_resize_blocksmoved - prev_blocksmoved),
3219                files_moved, hfsmp->vcbVN);
3220         }
3221
3222 reclaim_filespace_done:
3223         if (iterator) {
3224                 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
3225         }
3226
3227 #if CONFIG_PROTECT
3228         if (keys_generated) {
3229                 cp_entry_destroy(hfsmp->hfs_resize_cpentry);
3230                 hfsmp->hfs_resize_cpentry = NULL;
3231         }
3232 #endif
3233         return error;
3234 }
3235
3236 /*
3237  * Reclaim space at the end of a file system.
3238  *
3239  * Inputs -
3240  *      allocLimit      - start block of the space being reclaimed
3241  *      reclaimblks     - number of allocation blocks to reclaim
3242  */
3243 static int
3244 hfs_reclaimspace(struct hfsmount *hfsmp, u_int32_t allocLimit, u_int32_t reclaimblks, vfs_context_t context)
3245 {
3246         int error = 0;
3247
3248         /*
3249          * Preflight the bitmap to find out total number of blocks that need
3250          * relocation.
3251          *
3252          * Note: Since allocLimit is set to the location of new alternate volume
3253          * header, the check below does not account for blocks allocated for old
3254          * alternate volume header.
3255          */
3256         error = hfs_count_allocated(hfsmp, allocLimit, reclaimblks, &(hfsmp->hfs_resize_totalblocks));
3257         if (error) {
3258                 printf ("hfs_reclaimspace: Unable to determine total blocks to reclaim error=%d\n", error);
3259                 return error;
3260         }
3261         if (hfs_resize_debug) {
3262                 printf ("hfs_reclaimspace: Total number of blocks to reclaim = %u\n", hfsmp->hfs_resize_totalblocks);
3263         }
3264
3265         /* Just to be safe, sync the content of the journal to the disk before we proceed */
3266         hfs_journal_flush(hfsmp, TRUE);
3267
3268         /* First, relocate journal file blocks if they're in the way.
3269          * Doing this first will make sure that journal relocate code
3270          * gets access to contiguous blocks on disk first.  The journal
3271          * file has to be contiguous on the disk, otherwise resize will
3272          * fail.
3273          */
3274         error = hfs_reclaim_journal_file(hfsmp, allocLimit, context);
3275         if (error) {
3276                 printf("hfs_reclaimspace: hfs_reclaim_journal_file failed (%d)\n", error);
3277                 return error;
3278         }
3279
3280         /* Relocate journal info block blocks if they're in the way. */
3281         error = hfs_reclaim_journal_info_block(hfsmp, allocLimit, context);
3282         if (error) {
3283                 printf("hfs_reclaimspace: hfs_reclaim_journal_info_block failed (%d)\n", error);
3284                 return error;
3285         }
3286
3287         /* Relocate extents of the Extents B-tree if they're in the way.
3288          * Relocating extents btree before other btrees is important as
3289          * this will provide access to largest contiguous block range on
3290          * the disk for relocating extents btree.  Note that extents btree
3291          * can only have maximum of 8 extents.
3292          */
3293         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_extents_vp, kHFSExtentsFileID,
3294                              kHFSDataForkType, allocLimit, context);
3295         if (error) {
3296                 printf("hfs_reclaimspace: reclaim extents b-tree returned %d\n", error);
3297                 return error;
3298         }
3299
3300         /* Relocate extents of the Allocation file if they're in the way. */
3301         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_allocation_vp, kHFSAllocationFileID,
3302                              kHFSDataForkType, allocLimit, context);
3303         if (error) {
3304                 printf("hfs_reclaimspace: reclaim allocation file returned %d\n", error);
3305                 return error;
3306         }
3307
3308         /* Relocate extents of the Catalog B-tree if they're in the way. */
3309         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_catalog_vp, kHFSCatalogFileID,
3310                              kHFSDataForkType, allocLimit, context);
3311         if (error) {
3312                 printf("hfs_reclaimspace: reclaim catalog b-tree returned %d\n", error);
3313                 return error;
3314         }
3315
3316         /* Relocate extents of the Attributes B-tree if they're in the way. */
3317         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_attribute_vp, kHFSAttributesFileID,
3318                              kHFSDataForkType, allocLimit, context);
3319         if (error) {
3320                 printf("hfs_reclaimspace: reclaim attribute b-tree returned %d\n", error);
3321                 return error;
3322         }
3323
3324         /* Relocate extents of the Startup File if there is one and they're in the way. */
3325         error = hfs_reclaim_file(hfsmp, hfsmp->hfs_startup_vp, kHFSStartupFileID,
3326                              kHFSDataForkType, allocLimit, context);
3327         if (error) {
3328                 printf("hfs_reclaimspace: reclaim startup file returned %d\n", error);
3329                 return error;
3330         }
3331
3332         /*
3333          * We need to make sure the alternate volume header gets flushed if we moved
3334          * any extents in the volume header.  But we need to do that before
3335          * shrinking the size of the volume, or else the journal code will panic
3336          * with an invalid (too large) block number.
3337          *
3338          * Note that blks_moved will be set if ANY extent was moved, even
3339          * if it was just an overflow extent.  In this case, the journal_flush isn't
3340          * strictly required, but shouldn't hurt.
3341          */
3342         if (hfsmp->hfs_resize_blocksmoved) {
3343                 hfs_journal_flush(hfsmp, TRUE);
3344         }
3345
3346         /* Reclaim extents from catalog file records */
3347         error = hfs_reclaim_filespace(hfsmp, allocLimit, context);
3348         if (error) {
3349                 printf ("hfs_reclaimspace: hfs_reclaim_filespace returned error=%d\n", error);
3350                 return error;
3351         }
3352
3353         /* Reclaim extents from extent-based extended attributes, if any */
3354         error = hfs_reclaim_xattrspace(hfsmp, allocLimit, context);
3355         if (error) {
3356                 printf ("hfs_reclaimspace: hfs_reclaim_xattrspace returned error=%d\n", error);
3357                 return error;
3358         }
3359
3360         return error;
3361 }
3362
3363
3364 /*
3365  * Check if there are any extents (including overflow extents) that overlap
3366  * into the disk space that is being reclaimed.
3367  *
3368  * Output -
3369  *      true  - One of the extents need to be relocated
3370  *      false - No overflow extents need to be relocated, or there was an error
3371  */
3372 static int
3373 hfs_file_extent_overlaps(struct hfsmount *hfsmp, u_int32_t allocLimit, struct HFSPlusCatalogFile *filerec)
3374 {
3375         struct BTreeIterator * iterator = NULL;
3376         struct FSBufferDescriptor btdata;
3377         HFSPlusExtentRecord extrec;
3378         HFSPlusExtentKey *extkeyptr;
3379         FCB *fcb;
3380         int overlapped = false;
3381         int i, j;
3382         int error;
3383         int lockflags = 0;
3384         u_int32_t endblock;
3385
3386         /* Check if data fork overlaps the target space */
3387         for (i = 0; i < kHFSPlusExtentDensity; ++i) {
3388                 if (filerec->dataFork.extents[i].blockCount == 0) {
3389                         break;
3390                 }
3391                 endblock = filerec->dataFork.extents[i].startBlock +
3392         filerec->dataFork.extents[i].blockCount;
3393                 if (endblock > allocLimit) {
3394                         overlapped = true;
3395                         goto out;
3396                 }
3397         }
3398
3399         /* Check if resource fork overlaps the target space */
3400         for (j = 0; j < kHFSPlusExtentDensity; ++j) {
3401                 if (filerec->resourceFork.extents[j].blockCount == 0) {
3402                         break;
3403                 }
3404                 endblock = filerec->resourceFork.extents[j].startBlock +
3405         filerec->resourceFork.extents[j].blockCount;
3406                 if (endblock > allocLimit) {
3407                         overlapped = true;
3408                         goto out;
3409                 }
3410         }
3411
3412         /* Return back if there are no overflow extents for this file */
3413         if ((i < kHFSPlusExtentDensity) && (j < kHFSPlusExtentDensity)) {
3414                 goto out;
3415         }
3416
3417         if (kmem_alloc(kernel_map, (vm_offset_t *)&iterator, sizeof(*iterator))) {
3418                 return 0;
3419         }
3420         bzero(iterator, sizeof(*iterator));
3421         extkeyptr = (HFSPlusExtentKey *)&iterator->key;
3422         extkeyptr->keyLength = kHFSPlusExtentKeyMaximumLength;
3423         extkeyptr->forkType = 0;
3424         extkeyptr->fileID = filerec->fileID;
3425         extkeyptr->startBlock = 0;
3426
3427         btdata.bufferAddress = &extrec;
3428         btdata.itemSize = sizeof(extrec);
3429         btdata.itemCount = 1;
3430
3431         fcb = VTOF(hfsmp->hfs_extents_vp);
3432
3433         lockflags = hfs_systemfile_lock(hfsmp, SFL_EXTENTS, HFS_SHARED_LOCK);
3434
3435         /* This will position the iterator just before the first overflow
3436          * extent record for given fileID.  It will always return btNotFound,
3437          * so we special case the error code.
3438          */
3439         error = BTSearchRecord(fcb, iterator, &btdata, NULL, iterator);
3440         if (error && (error != btNotFound)) {
3441                 goto out;
3442         }
3443
3444         /* BTIterateRecord() might return error if the btree is empty, and
3445          * therefore we return that the extent does not overflow to the caller
3446          */
3447         error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
3448         while (error == 0) {
3449                 /* Stop when we encounter a different file. */
3450                 if (extkeyptr->fileID != filerec->fileID) {
3451                         break;
3452                 }
3453                 /* Check if any of the forks exist in the target space. */
3454                 for (i = 0; i < kHFSPlusExtentDensity; ++i) {
3455                         if (extrec[i].blockCount == 0) {
3456                                 break;
3457                         }
3458                         endblock = extrec[i].startBlock + extrec[i].blockCount;
3459                         if (endblock > allocLimit) {
3460                                 overlapped = true;
3461                                 goto out;
3462                         }
3463                 }
3464                 /* Look for more records. */
3465                 error = BTIterateRecord(fcb, kBTreeNextRecord, iterator, &btdata, NULL);
3466         }
3467
3468 out:
3469         if (lockflags) {
3470                 hfs_systemfile_unlock(hfsmp, lockflags);
3471         }
3472         if (iterator) {
3473                 kmem_free(kernel_map, (vm_offset_t)iterator, sizeof(*iterator));
3474         }
3475         return overlapped;
3476 }
3477
3478
3479 /*
3480  * Calculate the progress of a file system resize operation.
3481  */
3482 __private_extern__
3483 int
3484 hfs_resize_progress(struct hfsmount *hfsmp, u_int32_t *progress)
3485 {
3486         if ((hfsmp->hfs_flags & HFS_RESIZE_IN_PROGRESS) == 0) {
3487                 return (ENXIO);
3488         }
3489
3490         if (hfsmp->hfs_resize_totalblocks > 0) {
3491                 *progress = (u_int32_t)((hfsmp->hfs_resize_blocksmoved * 100ULL) / hfsmp->hfs_resize_totalblocks);
3492         } else {
3493                 *progress = 0;
3494         }
3495
3496         return (0);
3497 }