core/hfs_chash.c

   1 /*
   2  * Copyright (c) 2002-2015 Apple Inc. All rights reserved.
   3  *
   4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
   5  *
   6  * This file contains Original Code and/or Modifications of Original Code
   7  * as defined in and that are subject to the Apple Public Source License
   8  * Version 2.0 (the 'License'). You may not use this file except in
   9  * compliance with the License. The rights granted to you under the License
  10  * may not be used to create, or enable the creation or redistribution of,
  11  * unlawful or unlicensed copies of an Apple operating system, or to
  12  * circumvent, violate, or enable the circumvention or violation of, any
  13  * terms of an Apple operating system software license agreement.
  14  *
  15  * Please obtain a copy of the License at
  16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
  17  *
  18  * The Original Code and all software distributed under the License are
  19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
  22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
  23  * Please see the License for the specific language governing rights and
  24  * limitations under the License.
  25  *
  26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  27  */
  28
  29 /*
  30  * Copyright (c) 1982, 1986, 1989, 1991, 1993, 1995
  31  *      The Regents of the University of California.  All rights reserved.
  32  *
  33  * Redistribution and use in source and binary forms, with or without
  34  * modification, are permitted provided that the following conditions
  35  * are met:
  36  * 1. Redistributions of source code must retain the above copyright
  37  *        notice, this list of conditions and the following disclaimer.
  38  * 2. Redistributions in binary form must reproduce the above copyright
  39  *        notice, this list of conditions and the following disclaimer in the
  40  *        documentation and/or other materials provided with the distribution.
  41  * 3. All advertising materials mentioning features or use of this software
  42  *        must display the following acknowledgement:
  43  *      This product includes software developed by the University of
  44  *      California, Berkeley and its contributors.
  45  * 4. Neither the name of the University nor the names of its contributors
  46  *        may be used to endorse or promote products derived from this software
  47  *        without specific prior written permission.
  48  *
  49  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
  50  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  51  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  52  * ARE DISCLAIMED.      IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
  53  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  54  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  55  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  56  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  57  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  58  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  59  * SUCH DAMAGE.
  60  *
  61  *      @(#)hfs_chash.c
  62  *      derived from @(#)ufs_ihash.c    8.7 (Berkeley) 5/17/95
  63  */
  64
  65 #include <sys/param.h>
  66 #include <sys/systm.h>
  67 #include <sys/vnode.h>
  68 #include <sys/kernel.h>
  69 #include <sys/malloc.h>
  70 #include <sys/proc.h>
  71 #include <sys/queue.h>
  72
  73 #include "hfs.h"        /* XXX bringup */
  74 #include "hfs_cnode.h"
  75
  76 extern lck_attr_t *  hfs_lock_attr;
  77 extern lck_grp_t *  hfs_mutex_group;
  78 extern lck_grp_t *  hfs_rwlock_group;
  79
  80 lck_grp_t * chash_lck_grp;
  81 lck_grp_attr_t * chash_lck_grp_attr;
  82 lck_attr_t * chash_lck_attr;
  83
  84 static cnode_t *hfs_cnode_alloc(hfsmount_t *hfsmp, bool *unlocked);
  85
  86 #define CNODEHASH(hfsmp, inum) (&hfsmp->hfs_cnodehashtbl[(inum) & hfsmp->hfs_cnodehash])
  87
  88 static void hfs_cnode_zone_init(hfsmount_t *hfsmp);
  89 static void hfs_cnode_zone_free(hfsmount_t *hfsmp);
  90
  91 /*
  92  * Initialize cnode hash table.
  93  */
  94 void
  95 hfs_chashinit()
  96 {
  97         chash_lck_grp_attr= lck_grp_attr_alloc_init();
  98         chash_lck_grp  = lck_grp_alloc_init("cnode_hash", chash_lck_grp_attr);
  99         chash_lck_attr = lck_attr_alloc_init();
 100 }
 101
 102 static void hfs_chash_lock(struct hfsmount *hfsmp)
 103 {
 104         lck_mtx_lock(&hfsmp->hfs_chash_mutex);
 105 }
 106
 107 static void hfs_chash_lock_spin(struct hfsmount *hfsmp)
 108 {
 109         lck_mtx_lock_spin(&hfsmp->hfs_chash_mutex);
 110 }
 111
 112 static void hfs_chash_lock_convert(struct hfsmount *hfsmp)
 113 {
 114         lck_mtx_convert_spin(&hfsmp->hfs_chash_mutex);
 115 }
 116
 117 static void hfs_chash_unlock(struct hfsmount *hfsmp)
 118 {
 119         lck_mtx_unlock(&hfsmp->hfs_chash_mutex);
 120 }
 121
 122 void
 123 hfs_chashinit_finish(struct hfsmount *hfsmp)
 124 {
 125         lck_mtx_init(&hfsmp->hfs_chash_mutex, chash_lck_grp, chash_lck_attr);
 126
 127         hfsmp->hfs_cnodehashtbl = hashinit(desiredvnodes / 4, M_TEMP, &hfsmp->hfs_cnodehash);
 128
 129         hfs_cnode_zone_init(hfsmp);
 130 }
 131
 132 void
 133 hfs_delete_chash(struct hfsmount *hfsmp)
 134 {
 135         lck_mtx_destroy(&hfsmp->hfs_chash_mutex, chash_lck_grp);
 136
 137         FREE(hfsmp->hfs_cnodehashtbl, M_TEMP);
 138
 139         hfs_cnode_zone_free(hfsmp);
 140 }
 141
 142
 143 /*
 144  * Use the device, inum pair to find the incore cnode.
 145  *
 146  * If it is in core, but locked, wait for it.
 147  */
 148 struct vnode *
 149 hfs_chash_getvnode(struct hfsmount *hfsmp, ino_t inum, int wantrsrc, int skiplock, int allow_deleted)
 150 {
 151         struct cnode *cp;
 152         struct vnode *vp;
 153         int error;
 154         u_int32_t vid;
 155
 156         /*
 157          * Go through the hash list
 158          * If a cnode is in the process of being cleaned out or being
 159          * allocated, wait for it to be finished and then try again.
 160          */
 161 loop:
 162         hfs_chash_lock_spin(hfsmp);
 163
 164         for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) {
 165                 if (cp->c_fileid != inum)
 166                         continue;
 167                 /* Wait if cnode is being created or reclaimed. */
 168                 if (ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) {
 169                         SET(cp->c_hflag, H_WAITING);
 170
 171                         (void) msleep(cp, &hfsmp->hfs_chash_mutex, PDROP | PINOD,
 172                                       "hfs_chash_getvnode", 0);
 173                         goto loop;
 174                 }
 175                 /* Obtain the desired vnode. */
 176                 vp = wantrsrc ? cp->c_rsrc_vp : cp->c_vp;
 177                 if (vp == NULLVP)
 178                         goto exit;
 179
 180                 vid = vnode_vid(vp);
 181                 hfs_chash_unlock(hfsmp);
 182
 183                 if ((error = vnode_getwithvid(vp, vid))) {
 184                         /*
 185                          * If vnode is being reclaimed, or has
 186                          * already changed identity, no need to wait
 187                          */
 188                         return (NULL);
 189                 }
 190                 if (!skiplock && hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT) != 0) {
 191                         vnode_put(vp);
 192                         return (NULL);
 193                 }
 194
 195                 /*
 196                  * Skip cnodes that are not in the name space anymore
 197                  * we need to check with the cnode lock held because
 198                  * we may have blocked acquiring the vnode ref or the
 199                  * lock on the cnode which would allow the node to be
 200                  * unlinked
 201                  */
 202                 if (!allow_deleted) {
 203                         if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
 204                                 if (!skiplock) {
 205                                         hfs_unlock(cp);
 206                                 }
 207                                 vnode_put(vp);
 208                                 return (NULL);
 209                         }
 210                 }
 211                 return (vp);
 212         }
 213 exit:
 214         hfs_chash_unlock(hfsmp);
 215         return (NULL);
 216 }
 217
 218
 219 /*
 220  * Use the device, fileid pair to snoop an incore cnode.
 221  *
 222  * A cnode can exists in chash even after it has been
 223  * deleted from the catalog, so this function returns
 224  * ENOENT if C_NOEXIST is set in the cnode's flag.
 225  *
 226  */
 227 int
 228 hfs_chash_snoop(struct hfsmount *hfsmp, ino_t inum, int existence_only,
 229                                 int (*callout)(const cnode_t *cp, void *), void * arg)
 230 {
 231         struct cnode *cp;
 232         int result = ENOENT;
 233
 234         /*
 235          * Go through the hash list
 236          * If a cnode is in the process of being cleaned out or being
 237          * allocated, wait for it to be finished and then try again.
 238          */
 239         hfs_chash_lock(hfsmp);
 240
 241         for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) {
 242                 if (cp->c_fileid != inum)
 243                         continue;
 244
 245                 /*
 246                  * Under normal circumstances, we would want to return ENOENT if a cnode is in
 247                  * the hash and it is marked C_NOEXISTS or C_DELETED.  However, if the CNID
 248                  * namespace has wrapped around, then we have the possibility of collisions.
 249                  * In that case, we may use this function to validate whether or not we
 250                  * should trust the nextCNID value in the hfs mount point.
 251                  *
 252                  * If we didn't do this, then it would be possible for a cnode that is no longer backed
 253                  * by anything on-disk (C_NOEXISTS) to still exist in the hash along with its
 254                  * vnode.  The cat_create routine could then create a new entry in the catalog
 255                  * re-using that CNID.  Then subsequent hfs_getnewvnode calls will repeatedly fail
 256                  * trying to look it up/validate it because it is marked C_NOEXISTS.  So we want
 257                  * to prevent that from happening as much as possible.
 258                  */
 259                 if (existence_only) {
 260                         result = 0;
 261                         break;
 262                 }
 263
 264                 /* Skip cnodes that have been removed from the catalog */
 265                 if (cp->c_flag & (C_NOEXISTS | C_DELETED)) {
 266                         result = EACCES;
 267                         break;
 268                 }
 269
 270                 /* Skip cnodes being created or reclaimed. */
 271                 if (!ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) {
 272                         result = callout(cp, arg);
 273                 }
 274                 break;
 275         }
 276         hfs_chash_unlock(hfsmp);
 277
 278         return (result);
 279 }
 280
 281 /*
 282  * Use the device, fileid pair to find the incore cnode.
 283  * If no cnode if found one is created
 284  *
 285  * If it is in core, but locked, wait for it.
 286  *
 287  * If the cnode is C_DELETED, then return NULL since that
 288  * inum is no longer valid for lookups (open-unlinked file).
 289  *
 290  * If the cnode is C_DELETED but also marked C_RENAMED, then that means
 291  * the cnode was renamed over and a new entry exists in its place.  The caller
 292  * should re-drive the lookup to get the newer entry.  In that case, we'll still
 293  * return NULL for the cnode, but also return GNV_CHASH_RENAMED in the output flags
 294  * of this function to indicate the caller that they should re-drive.
 295  */
 296 struct cnode *
 297 hfs_chash_getcnode(struct hfsmount *hfsmp, ino_t inum, struct vnode **vpp,
 298                                    int wantrsrc, int skiplock, int *out_flags, int *hflags)
 299 {
 300         struct cnode    *cp;
 301         struct cnode    *ncp = NULL;
 302         vnode_t         vp;
 303         u_int32_t       vid;
 304
 305         /*
 306          * Go through the hash list
 307          * If a cnode is in the process of being cleaned out or being
 308          * allocated, wait for it to be finished and then try again.
 309          */
 310 loop:
 311         hfs_chash_lock_spin(hfsmp);
 312
 313 loop_with_lock:
 314         for (cp = CNODEHASH(hfsmp, inum)->lh_first; cp; cp = cp->c_hash.le_next) {
 315                 if (cp->c_fileid != inum)
 316                         continue;
 317                 /*
 318                  * Wait if cnode is being created, attached to or reclaimed.
 319                  */
 320                 if (ISSET(cp->c_hflag, H_ALLOC | H_ATTACH | H_TRANSIT)) {
 321                         SET(cp->c_hflag, H_WAITING);
 322
 323                         (void) msleep(cp, &hfsmp->hfs_chash_mutex, PINOD,
 324                                       "hfs_chash_getcnode", 0);
 325                         goto loop_with_lock;
 326                 }
 327                 vp = wantrsrc ? cp->c_rsrc_vp : cp->c_vp;
 328                 if (vp == NULL) {
 329                         /*
 330                          * The desired vnode isn't there so tag the cnode.
 331                          */
 332                         SET(cp->c_hflag, H_ATTACH);
 333                         *hflags |= H_ATTACH;
 334
 335                         hfs_chash_unlock(hfsmp);
 336                 } else {
 337                         vid = vnode_vid(vp);
 338
 339                         hfs_chash_unlock(hfsmp);
 340
 341                         if (vnode_getwithvid(vp, vid))
 342                                 goto loop;
 343                 }
 344                 if (ncp) {
 345                         /*
 346                          * someone else won the race to create
 347                          * this cnode and add it to the hash
 348                          * just dump our allocation
 349                          */
 350                         hfs_cnode_free(hfsmp, ncp);
 351                         ncp = NULL;
 352                 }
 353
 354                 if (!skiplock) {
 355                         hfs_lock(cp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_ALLOW_NOEXISTS);
 356                 }
 357
 358                 /*
 359                  * Skip cnodes that are not in the name space anymore
 360                  * we need to check with the cnode lock held because
 361                  * we may have blocked acquiring the vnode ref or the
 362                  * lock on the cnode which would allow the node to be
 363                  * unlinked.
 364                  *
 365                  * Don't return a cnode in this case since the inum
 366                  * is no longer valid for lookups.
 367                  */
 368                 if ((cp->c_flag & (C_NOEXISTS | C_DELETED)) && !wantrsrc) {
 369                         int renamed = 0;
 370                         if (cp->c_flag & C_RENAMED) {
 371                                 renamed = 1;
 372                         }
 373                         if (!skiplock)
 374                                 hfs_unlock(cp);
 375                         if (vp != NULLVP) {
 376                                 vnode_put(vp);
 377                         } else {
 378                                 hfs_chash_lock_spin(hfsmp);
 379                                 CLR(cp->c_hflag, H_ATTACH);
 380                                 *hflags &= ~H_ATTACH;
 381                                 if (ISSET(cp->c_hflag, H_WAITING)) {
 382                                         CLR(cp->c_hflag, H_WAITING);
 383                                         wakeup((caddr_t)cp);
 384                                 }
 385                                 hfs_chash_unlock(hfsmp);
 386                         }
 387                         vp = NULL;
 388                         cp = NULL;
 389                         if (renamed) {
 390                                 *out_flags = GNV_CHASH_RENAMED;
 391                         }
 392                 }
 393                 *vpp = vp;
 394                 return (cp);
 395         }
 396
 397         /*
 398          * Allocate a new cnode
 399          */
 400         if (skiplock && !wantrsrc)
 401                 panic("%s - should never get here when skiplock is set \n", __FUNCTION__);
 402
 403         if (ncp == NULL) {
 404                 bool unlocked;
 405
 406                 ncp = hfs_cnode_alloc(hfsmp, &unlocked);
 407
 408                 if (unlocked)
 409                         goto loop_with_lock;
 410         }
 411         hfs_chash_lock_convert(hfsmp);
 412
 413 #if HFS_MALLOC_DEBUG
 414         bzero(ncp, __builtin_offsetof(struct cnode, magic));
 415 #else
 416         bzero(ncp, sizeof(*ncp));
 417 #endif
 418
 419         SET(ncp->c_hflag, H_ALLOC);
 420         *hflags |= H_ALLOC;
 421         ncp->c_fileid = inum;
 422         TAILQ_INIT(&ncp->c_hintlist); /* make the list empty */
 423         TAILQ_INIT(&ncp->c_originlist);
 424
 425         lck_rw_init(&ncp->c_rwlock, hfs_rwlock_group, hfs_lock_attr);
 426         if (!skiplock)
 427                 (void) hfs_lock(ncp, HFS_EXCLUSIVE_LOCK, HFS_LOCK_DEFAULT);
 428
 429         /* Insert the new cnode with it's H_ALLOC flag set */
 430         LIST_INSERT_HEAD(CNODEHASH(hfsmp, inum), ncp, c_hash);
 431         hfs_chash_unlock(hfsmp);
 432
 433         *vpp = NULL;
 434         return (ncp);
 435 }
 436
 437
 438 void
 439 hfs_chashwakeup(struct hfsmount *hfsmp, struct cnode *cp, int hflags)
 440 {
 441         hfs_chash_lock_spin(hfsmp);
 442
 443         CLR(cp->c_hflag, hflags);
 444
 445         if (ISSET(cp->c_hflag, H_WAITING)) {
 446                 CLR(cp->c_hflag, H_WAITING);
 447                 wakeup((caddr_t)cp);
 448         }
 449         hfs_chash_unlock(hfsmp);
 450 }
 451
 452
 453 /*
 454  * Re-hash two cnodes in the hash table.
 455  */
 456 void
 457 hfs_chash_rehash(struct hfsmount *hfsmp, struct cnode *cp1, struct cnode *cp2)
 458 {
 459         hfs_chash_lock_spin(hfsmp);
 460
 461         LIST_REMOVE(cp1, c_hash);
 462         LIST_REMOVE(cp2, c_hash);
 463         LIST_INSERT_HEAD(CNODEHASH(hfsmp, cp1->c_fileid), cp1, c_hash);
 464         LIST_INSERT_HEAD(CNODEHASH(hfsmp, cp2->c_fileid), cp2, c_hash);
 465
 466         hfs_chash_unlock(hfsmp);
 467 }
 468
 469
 470 /*
 471  * Remove a cnode from the hash table.
 472  */
 473 int
 474 hfs_chashremove(struct hfsmount *hfsmp, struct cnode *cp)
 475 {
 476         hfs_chash_lock_spin(hfsmp);
 477
 478         /* Check if a vnode is getting attached */
 479         if (ISSET(cp->c_hflag, H_ATTACH)) {
 480                 hfs_chash_unlock(hfsmp);
 481                 return (EBUSY);
 482         }
 483         if (cp->c_hash.le_next || cp->c_hash.le_prev) {
 484             LIST_REMOVE(cp, c_hash);
 485             cp->c_hash.le_next = NULL;
 486             cp->c_hash.le_prev = NULL;
 487         }
 488         hfs_chash_unlock(hfsmp);
 489
 490         return (0);
 491 }
 492
 493 /*
 494  * Remove a cnode from the hash table and wakeup any waiters.
 495  */
 496 void
 497 hfs_chash_abort(struct hfsmount *hfsmp, struct cnode *cp)
 498 {
 499         hfs_chash_lock_spin(hfsmp);
 500
 501         LIST_REMOVE(cp, c_hash);
 502         cp->c_hash.le_next = NULL;
 503         cp->c_hash.le_prev = NULL;
 504
 505         CLR(cp->c_hflag, H_ATTACH | H_ALLOC);
 506         if (ISSET(cp->c_hflag, H_WAITING)) {
 507                 CLR(cp->c_hflag, H_WAITING);
 508                 wakeup((caddr_t)cp);
 509         }
 510         hfs_chash_unlock(hfsmp);
 511 }
 512
 513
 514 /*
 515  * mark a cnode as in transition
 516  */
 517 void
 518 hfs_chash_mark_in_transit(struct hfsmount *hfsmp, struct cnode *cp)
 519 {
 520         hfs_chash_lock_spin(hfsmp);
 521
 522         SET(cp->c_hflag, H_TRANSIT);
 523
 524         hfs_chash_unlock(hfsmp);
 525 }
 526
 527 /* Search a cnode in the hash.  This function does not return cnode which
 528  * are getting created, destroyed or in transition.  Note that this function
 529  * does not acquire the cnode hash mutex, and expects the caller to acquire it.
 530  * On success, returns pointer to the cnode found.  On failure, returns NULL.
 531  */
 532 static
 533 struct cnode *
 534 hfs_chash_search_cnid(struct hfsmount *hfsmp, cnid_t cnid)
 535 {
 536         struct cnode *cp;
 537
 538         for (cp = CNODEHASH(hfsmp, cnid)->lh_first; cp; cp = cp->c_hash.le_next) {
 539                 if (cp->c_fileid == cnid) {
 540                         break;
 541                 }
 542         }
 543
 544         /* If cnode is being created or reclaimed, return error. */
 545         if (cp && ISSET(cp->c_hflag, H_ALLOC | H_TRANSIT | H_ATTACH)) {
 546                 cp = NULL;
 547         }
 548
 549         return cp;
 550 }
 551
 552 /* Search a cnode corresponding to given device and ID in the hash.  If the
 553  * found cnode has kHFSHasChildLinkBit cleared, set it.  If the cnode is not
 554  * found, no new cnode is created and error is returned.
 555  *
 556  * Return values -
 557  *      -1 : The cnode was not found.
 558  *       0 : The cnode was found, and the kHFSHasChildLinkBit was already set.
 559  *       1 : The cnode was found, the kHFSHasChildLinkBit was not set, and the
 560  *           function had to set that bit.
 561  */
 562 int
 563 hfs_chash_set_childlinkbit(struct hfsmount *hfsmp, cnid_t cnid)
 564 {
 565         int retval = -1;
 566         struct cnode *cp;
 567
 568         hfs_chash_lock_spin(hfsmp);
 569
 570         cp = hfs_chash_search_cnid(hfsmp, cnid);
 571         if (cp) {
 572                 if (cp->c_attr.ca_recflags & kHFSHasChildLinkMask) {
 573                         retval = 0;
 574                 } else {
 575                         cp->c_attr.ca_recflags |= kHFSHasChildLinkMask;
 576                         retval = 1;
 577                 }
 578         }
 579         hfs_chash_unlock(hfsmp);
 580
 581         return retval;
 582 }
 583
 584 // -- Cnode Memory Allocation --
 585
 586 /*
 587  * We allocate chunks but to minimise fragmentation, we store the
 588  * chunks in a heap ordered such that the chunk with the least number
 589  * of free elements is at the top.  At the end of each chunk we store
 590  * a pointer to a header somewhere within the chunk.  When all
 591  * elements in a chunk are in use, the pointer is NULL.  Given that
 592  * chunks will always be aligned to a page, we can compute the element
 593  * index in the chunk using this equation:
 594  *
 595  *              y * (uintptr_t)el % PAGE_SIZE / gcd % (PAGE_SIZE / gcd)
 596  *
 597  * where gcd is the greatest common divisor of elem_size and
 598  * PAGE_SIZE, (which we calculate using the Euclidean algorithm) and y
 599  * also falls out of the Euclidean algorithm -- see
 600  * hfs_cnode_zone_init below.  The proof for this is left as an
 601  * exercise for the reader.  Hint: start with the equation:
 602  *
 603  *      elem_ndx * elem_size = PAGE_SIZE * elem_pg + elem_addr % PAGE_SIZE
 604  *
 605  * Now realise that can be made to look like a Diophantine equation
 606  * (ax + by = c) which is solvable using the Extended Euclidean
 607  * algorithm and from there you will arrive at the equation above.
 608  */
 609
 610 enum {
 611         CHUNK_HDR_MAGIC         = 0x6b6e6863,                   // chnk
 612         CNODE_MAGIC                     = 0x65646f6e63736668,   // hfscnode
 613         ELEMENT_ALIGNMENT       = 8,
 614
 615         /*
 616          * We store the pointer to the header for a chunk 8 bytes in from
 617          * the end of the chunk.
 618          */
 619         CHUNK_TAIL_LEN          = 8,
 620 };
 621
 622 typedef struct chunk_hdr
 623 {
 624         void *next_free;
 625         uint32_t magic;                                         // = CHUNK_HDR_MAGIC
 626         uint16_t free_count;
 627         uint16_t heap_ndx;
 628         struct chunk_hdr **phdr;
 629 } chunk_hdr_t;
 630
 631 static void hfs_cnode_zone_init(hfsmount_t *hfsmp)
 632 {
 633         struct cnode_zone *z = &hfsmp->z;
 634
 635         int elem_size = sizeof(cnode_t);
 636
 637         elem_size = roundup(elem_size, ELEMENT_ALIGNMENT);
 638
 639         z->elem_size = elem_size;
 640
 641         // Figure out chunk_size
 642         int npages, chunk_size, waste;
 643
 644         for (npages = 1;; ++npages) {
 645                 chunk_size = npages * page_size;
 646                 waste = (chunk_size - CHUNK_TAIL_LEN) % elem_size;
 647
 648                 // If waste is less than 1%, that's good enough
 649                 if (waste < chunk_size / 100)
 650                         break;
 651         }
 652
 653         z->chunk_size = chunk_size;
 654         z->alloc_count = (chunk_size - CHUNK_TAIL_LEN) / elem_size;
 655
 656         // Compute the GCD of elem_size and page_size using Euclidean algorithm
 657         int t = 1, last_t = 0, r = z->elem_size, last_r = PAGE_SIZE;
 658
 659         while (r != 0) {
 660                 int q = last_r / r;
 661                 int next_r = last_r - q * r;
 662                 last_r = r;
 663                 r = next_r;
 664                 int next_t = last_t - q * t;
 665                 last_t = t;
 666                 t = next_t;
 667         }
 668
 669         z->gcd = last_r;
 670         z->y = last_t;
 671
 672         z->heap_max_count = 128 / sizeof(void *);
 673         z->heap = hfs_malloc(z->heap_max_count * sizeof(void *));
 674
 675 #if DEBUG
 676         printf("hfs: cnode size %d, chunk size %d, # per chunk %d, waste %d\n",
 677                    elem_size, chunk_size, z->alloc_count, waste);
 678 #endif
 679 }
 680
 681 static void hfs_cnode_zone_free(hfsmount_t *hfsmp)
 682 {
 683         // Check that all heap chunks are completely free
 684         struct cnode_zone *z = &hfsmp->z;
 685
 686         for (int i = 0; i < z->heap_count; ++i) {
 687 #if DEBUG
 688                 hfs_assert(z->heap[i]->free_count == z->alloc_count);
 689 #else
 690                 if (z->heap[i]->free_count != z->alloc_count) {
 691                         printf("hfs: cnodes leaked!\n");
 692                         continue;
 693                 }
 694 #endif
 695                 void *chunk = (void *)((uintptr_t)z->heap[i]->phdr
 696                                                            - (z->chunk_size - CHUNK_TAIL_LEN));
 697                 hfs_free(chunk, z->chunk_size);
 698         }
 699
 700         hfs_free(z->heap, z->heap_max_count * sizeof(void *));
 701 }
 702
 703 static void *zel(struct cnode_zone *z, void *chunk, int i)
 704 {
 705         return chunk + i * z->elem_size;
 706 }
 707
 708 static chunk_hdr_t **zphdr(struct cnode_zone *z, void *chunk)
 709 {
 710         return chunk + z->chunk_size - CHUNK_TAIL_LEN;
 711 }
 712
 713 #if 0
 714 static void hfs_check_heap(hfsmount_t *hfsmp, void *just_removed)
 715 {
 716         struct cnode_zone *z = &hfsmp->z;
 717
 718         for (int i = 0; i < z->heap_count; ++i) {
 719                 hfs_assert(z->heap[i]->magic == CHUNK_HDR_MAGIC);
 720                 hfs_assert(z->heap[i]->free_count > 0
 721                            && z->heap[i]->free_count <= z->alloc_count);
 722                 hfs_assert(z->heap[i]->heap_ndx == i);
 723                 void *max_el = z->heap[i]->phdr;
 724                 void *min_el = (void *)((uintptr_t)z->heap[i]->phdr
 725                                                                 + CHUNK_TAIL_LEN - z->chunk_size);
 726                 int count = 1;
 727                 hfs_assert(z->heap[i] != just_removed);
 728                 void *el = z->heap[i]->next_free;
 729                 size_t bitmap_size = (z->alloc_count + 7) / 8;
 730                 uint8_t bitmap[bitmap_size];
 731                 bzero(bitmap, bitmap_size);
 732                 int ndx = (int)((void *)z->heap[i] - min_el) / z->elem_size;
 733                 bitmap[ndx / 8] |= 0x80 >> ndx % 8;
 734                 while (el) {
 735                         hfs_assert(el != just_removed);
 736                         hfs_assert(el >= min_el
 737                                    && el < max_el && (el - min_el) % z->elem_size == 0);
 738                         int n = (int)(el - min_el) / z->elem_size;
 739                         hfs_assert(!(bitmap[n / 8] & (0x80 >> n % 8)));
 740                         bitmap[n / 8] |= 0x80 >> n % 8;
 741                         el = *(void **)el;
 742                         ++count;
 743                 }
 744                 hfs_assert(count == z->heap[i]->free_count);
 745                 if (i)
 746                         hfs_assert(z->heap[(i + 1) / 2 - 1]->free_count <= z->heap[i]->free_count);
 747         }
 748 }
 749 #else
 750 #define hfs_check_heap(a, b)    (void)0
 751 #endif
 752
 753 static void hfs_cnode_set_magic(cnode_t *cp, uint64_t v)
 754 {
 755 #if HFS_MALLOC_DEBUG
 756         cp->magic = v;
 757 #endif
 758 }
 759
 760 static cnode_t *hfs_cnode_alloc(hfsmount_t *hfsmp, bool *unlocked)
 761 {
 762         hfs_check_heap(hfsmp, NULL);
 763
 764         *unlocked = false;
 765
 766         struct cnode_zone *z = &hfsmp->z;
 767         void *el;
 768
 769         while (!z->heap_count) {
 770                 if (z->spare) {
 771                         /*
 772                          * We have a spare chunk we can use so initialise it, add
 773                          * it to the heap and return one element from it.
 774                          */
 775                         chunk_hdr_t *chunk_hdr = z->spare;
 776                         z->spare = NULL;
 777                         void *last = NULL;
 778                         for (int i = z->alloc_count - 2; i > 0; --i) {
 779                                 void *p = zel(z, chunk_hdr, i);
 780                                 *(void **)p = last;
 781                                 hfs_cnode_set_magic(p, 0);
 782                                 last = p;
 783                         }
 784                         hfs_cnode_set_magic((void *)chunk_hdr, 0);
 785                         chunk_hdr->phdr = zphdr(z, chunk_hdr);
 786                         chunk_hdr->next_free = last;
 787                         chunk_hdr->free_count = z->alloc_count - 1;
 788                         chunk_hdr->heap_ndx = 0;
 789                         // Set the tail to the index of the chunk_hdr
 790                         *zphdr(z, chunk_hdr) = chunk_hdr;
 791                         z->heap[0] = chunk_hdr;
 792                         z->heap_count = 1;
 793                         el = zel(z, chunk_hdr, z->alloc_count - 1);
 794                         hfs_cnode_set_magic(el, 0);
 795                         goto exit;
 796                 }
 797
 798                 *unlocked = true;
 799
 800                 if (z->allocating) {
 801                         z->waiting = true;
 802                         msleep(z, &hfsmp->hfs_chash_mutex, PINOD | PSPIN,
 803                                    "hfs_cnode_alloc", NULL);
 804                 } else {
 805                         z->allocating = true;
 806                         lck_mtx_unlock(&hfsmp->hfs_chash_mutex);
 807                         chunk_hdr_t *chunk_hdr = hfs_malloc(z->chunk_size);
 808                         chunk_hdr->magic = CHUNK_HDR_MAGIC;
 809                         hfs_assert(!((uintptr_t)chunk_hdr & PAGE_MASK));
 810                         lck_mtx_lock_spin(&hfsmp->hfs_chash_mutex);
 811                         z->allocating = false;
 812                         if (z->waiting) {
 813                                 wakeup(z);
 814                                 z->waiting = false;
 815                         }
 816                         hfs_assert(!z->spare);
 817                         z->spare = chunk_hdr;
 818                 }
 819         }
 820
 821         chunk_hdr_t *chunk_hdr = z->heap[0];
 822         if (chunk_hdr->next_free) {
 823                 /*
 824                  * Not the last element in this chunk, so just pull an element
 825                  * off and return it.  This chunk should remain at the top of
 826                  * the heap.
 827                  */
 828                 el = chunk_hdr->next_free;
 829                 chunk_hdr->next_free = *(void **)chunk_hdr->next_free;
 830                 --chunk_hdr->free_count;
 831
 832                 goto exit;
 833         }
 834
 835         /*
 836          * This is the last element in the chunk so we need to fix up the
 837          * heap.
 838          */
 839         el = chunk_hdr;
 840
 841         *chunk_hdr->phdr = NULL;
 842         chunk_hdr->magic = ~CHUNK_HDR_MAGIC;
 843
 844         if (!--z->heap_count)
 845                 goto exit;
 846
 847         chunk_hdr_t *v = z->heap[z->heap_count];
 848
 849         // Fix heap downwards; offset by one to make easier
 850         int k = 1;
 851         chunk_hdr_t **a = &z->heap[0] - 1;
 852         int N = z->heap_count;
 853
 854         for (;;) {
 855                 // Look at the next level down
 856                 int j = k * 2;
 857                 if (j > N)
 858                         break;
 859                 // Pick the smaller of the two that we're looking at
 860                 if (j < N && a[j]->free_count > a[j+1]->free_count)
 861                         ++j;
 862                 if (v->free_count <= a[j]->free_count)
 863                         break;
 864                 // Shift element at j to k
 865                 a[k] = a[j];
 866                 a[k]->heap_ndx = k - 1;
 867
 868                 // Descend
 869                 k = j;
 870         }
 871
 872         a[k] = v;
 873         a[k]->heap_ndx = k - 1;
 874
 875 exit:
 876
 877         hfs_check_heap(hfsmp, el);
 878
 879 #if HFS_MALLOC_DEBUG
 880         if (((cnode_t *)el)->magic == CNODE_MAGIC) {
 881 #if __x86_64__
 882                 asm("int $3\n");
 883 #elif __arm64__
 884                 asm("svc 0\n");
 885 #else
 886                 asm("trap\n");
 887 #endif
 888         }
 889 #endif // HFS_MALLOC_DEBUG
 890
 891         hfs_cnode_set_magic(el, CNODE_MAGIC);
 892
 893         return el;
 894 }
 895
 896 void hfs_cnode_free(hfsmount_t *hfsmp, cnode_t *cp)
 897 {
 898         void *el = cp;
 899         void *old_heap = NULL;
 900         size_t old_heap_size = 0;
 901         struct cnode_zone *z = &hfsmp->z;
 902
 903         int ndx_in_chunk = (z->y * (uintptr_t)el % PAGE_SIZE
 904                                                 / z->gcd % (PAGE_SIZE / z->gcd));
 905
 906         void *free_chunk = NULL, *chunk = el - ndx_in_chunk * z->elem_size;
 907
 908         lck_mtx_lock_spin(&hfsmp->hfs_chash_mutex);
 909
 910 #if HFS_MALLOC_DEBUG
 911         hfs_assert(cp->magic == CNODE_MAGIC);
 912         cp->magic = ~CNODE_MAGIC;
 913 #endif
 914
 915 loop:
 916
 917         hfs_check_heap(hfsmp, NULL);
 918
 919         chunk_hdr_t *hdr = *zphdr(z, chunk);
 920
 921         int k, orig_k;
 922
 923         if (hdr) {
 924                 /*
 925                  * This chunk already has some free elements in it so we chain this
 926                  * element on and then fix up the heap.
 927                  */
 928                 hfs_assert(hdr->magic == CHUNK_HDR_MAGIC);
 929
 930                 *(void **)el = hdr->next_free;
 931                 hdr->next_free = el;
 932                 hfs_assert(hdr->next_free != hdr);
 933
 934                 chunk_hdr_t *v;
 935                 orig_k = k = hdr->heap_ndx + 1;
 936
 937                 chunk_hdr_t **a = &z->heap[0] - 1;
 938
 939                 if (++hdr->free_count == z->alloc_count) {
 940                         // Chunk is totally free
 941
 942                         // Always keep at least one chunk on the heap
 943                         if (z->heap_count == 1)
 944                                 goto exit;
 945
 946                         // Remove the chunk
 947                         free_chunk = chunk;
 948                         --z->heap_count;
 949                         v = z->heap[z->heap_count];
 950
 951                         if (k > 1 && a[k / 2]->free_count > v->free_count) {
 952                                 do {
 953                                         a[k] = a[k / 2];
 954                                         a[k]->heap_ndx = k - 1;
 955                                         k = k / 2;
 956                                 } while (k > 1 && a[k / 2]->free_count > v->free_count);
 957                                 a[k] = v;
 958                                 a[k]->heap_ndx = k - 1;
 959                                 goto exit;
 960                         }
 961                 } else
 962                         v = hdr;
 963
 964                 // Fix up the heap
 965                 int N = z->heap_count;
 966
 967                 for (;;) {
 968                         // Look at the next level down
 969                         int j = k * 2;
 970                         if (j > N)
 971                                 break;
 972                         // Pick the smaller of the two that we're looking at
 973                         if (j < N && a[j]->free_count > a[j+1]->free_count)
 974                                 ++j;
 975                         if (v->free_count <= a[j]->free_count)
 976                                 break;
 977                         // Shift element at j to k
 978                         a[k] = a[j];
 979                         a[k]->heap_ndx = k - 1;
 980
 981                         // Descend
 982                         k = j;
 983                 }
 984
 985                 a[k] = v;
 986                 a[k]->heap_ndx = k - 1;
 987         } else {
 988                 // This element is the first that's free in this chunk
 989
 990                 if (z->heap_count == z->heap_max_count) {
 991                         // We need to resize the heap
 992                         int new_max_count = z->heap_max_count * 2;
 993                         lck_mtx_unlock(&hfsmp->hfs_chash_mutex);
 994                         if (old_heap)
 995                                 hfs_free(old_heap, old_heap_size);
 996                         struct chunk_hdr **new_heap = hfs_malloc(new_max_count * sizeof(void *));
 997                         lck_mtx_lock_spin(&hfsmp->hfs_chash_mutex);
 998                         // Check to see if someone beat us to it
 999                         if (new_max_count > z->heap_max_count) {
1000                                 memcpy(new_heap, z->heap, z->heap_count * sizeof(void *));
1001                                 old_heap_size = z->heap_max_count * sizeof(void *);
1002                                 z->heap_max_count = new_max_count;
1003                                 old_heap = z->heap;
1004                                 z->heap = new_heap;
1005                         } else {
1006                                 old_heap = new_heap;
1007                                 old_heap_size = new_max_count * sizeof(void *);
1008                         }
1009                         // Lock was dropped so we have to go around again
1010                         goto loop;
1011                 }
1012
1013                 hdr = (chunk_hdr_t *)el;
1014                 *zphdr(z, chunk) = hdr;
1015                 hdr->free_count = 1;
1016                 hdr->next_free = NULL;
1017                 hdr->heap_ndx = 0;
1018                 hdr->phdr = zphdr(z, chunk);
1019                 hdr->magic = CHUNK_HDR_MAGIC;
1020
1021                 // Fix heap upwards; offset by one to make easier
1022                 chunk_hdr_t **a = &z->heap[0] - 1;
1023
1024                 if (z->heap_count++) {
1025                         k = z->heap_count;
1026                         chunk_hdr_t *v = z->heap[0];
1027                         while (k > 3 && a[k / 2]->free_count > v->free_count) {
1028                                 a[k] = a[k / 2];
1029                                 a[k]->heap_ndx = k - 1;
1030                                 k = k / 2;
1031                         }
1032                         a[k] = v;
1033                         a[k]->heap_ndx = k - 1;
1034                 }
1035
1036                 z->heap[0] = hdr;
1037         }
1038
1039 exit:
1040
1041         hfs_check_heap(hfsmp, NULL);
1042         lck_mtx_unlock(&hfsmp->hfs_chash_mutex);
1043
1044         if (old_heap)
1045                 hfs_free(old_heap, old_heap_size);
1046         if (free_chunk)
1047                 hfs_free(free_chunk, z->chunk_size);
1048 }