]>
git.saurik.com Git - apple/xnu.git/blob - bsd/dev/dtrace/blist.c
   2  * BLIST.C -    Bitmap allocator/deallocator, using a radix tree with hinting 
   4  *      (c)Copyright 1998, Matthew Dillon.  Terms for use and redistribution 
   5  *      are covered by the BSD Copyright as found in /usr/src/COPYRIGHT. 
   7  *      This module implements a general bitmap allocator/deallocator.  The 
   8  *      allocator eats around 2 bits per 'block'.  The module does not 
   9  *      try to interpret the meaning of a 'block' other then to return 
  10  *      SWAPBLK_NONE on an allocation failure. 
  12  *      A radix tree is used to maintain the bitmap.  Two radix constants are 
  13  *      involved:  One for the bitmaps contained in the leaf nodes (typically 
  14  *      32), and one for the meta nodes (typically 16).  Both meta and leaf 
  15  *      nodes have a hint field.  This field gives us a hint as to the largest 
  16  *      free contiguous range of blocks under the node.  It may contain a 
  17  *      value that is too high, but will never contain a value that is too 
  18  *      low.  When the radix tree is searched, allocation failures in subtrees 
  21  *      The radix tree also implements two collapsed states for meta nodes: 
  22  *      the ALL-ALLOCATED state and the ALL-FREE state.  If a meta node is 
  23  *      in either of these two states, all information contained underneath 
  24  *      the node is considered stale.  These states are used to optimize 
  25  *      allocation and freeing operations. 
  27  *      The hinting greatly increases code efficiency for allocations while 
  28  *      the general radix structure optimizes both allocations and frees.  The 
  29  *      radix tree should be able to operate well no matter how much 
  30  *      fragmentation there is and no matter how large a bitmap is used. 
  32  *      Unlike the rlist code, the blist code wires all necessary memory at 
  33  *      creation time.  Neither allocations nor frees require interaction with 
  34  *      the memory subsystem.  In contrast, the rlist code may allocate memory 
  35  *      on an rlist_free() call.  The non-blocking features of the blist code 
  36  *      are used to great advantage in the swap code (vm/nswap_pager.c).  The 
  37  *      rlist code uses a little less overall memory then the blist code (but 
  38  *      due to swap interleaving not all that much less), but the blist code 
  39  *      scales much, much better. 
  41  *      LAYOUT: The radix tree is layed out recursively using a 
  42  *      linear array.  Each meta node is immediately followed (layed out 
  43  *      sequentially in memory) by BLIST_META_RADIX lower level nodes.  This 
  44  *      is a recursive structure but one that can be easily scanned through 
  45  *      a very simple 'skip' calculation.  In order to support large radixes, 
  46  *      portions of the tree may reside outside our memory allocation.  We 
  47  *      handle this with an early-termination optimization (when bighint is 
  48  *      set to -1) on the scan.  The memory allocation is only large enough 
  49  *      to cover the number of blocks requested at creation time even if it 
  50  *      must be encompassed in larger root-node radix. 
  52  *      NOTE: the allocator cannot currently allocate more then 
  53  *      BLIST_BMAP_RADIX blocks per call.  It will panic with 'allocation too 
  54  *      large' if you try.  This is an area that could use improvement.  The 
  55  *      radix is large enough that this restriction does not effect the swap 
  56  *      system, though.  Currently only the allocation code is effected by 
  57  *      this algorithmic unfeature.  The freeing code can handle arbitrary 
  60  *      This code can be compiled stand-alone for debugging. 
  62  * $FreeBSD: src/sys/kern/subr_blist.c,v 1.5.2.1 2000/03/17 10:47:29 ps Exp $ 
  65 typedef unsigned int u_daddr_t
; 
  67 #include <sys/param.h> 
  68 #include <sys/systm.h> 
  70 #include <sys/kernel.h> 
  72 #include <sys/malloc.h> 
  74 #if !defined(__APPLE__) 
  75 #define SWAPBLK_NONE ((daddr_t)-1) 
  78 #define malloc _MALLOC 
  83  * static support functions 
  86 static daddr_t 
blst_leaf_alloc(blmeta_t 
*scan
, daddr_t blk
, int count
); 
  87 static daddr_t 
blst_meta_alloc(blmeta_t 
*scan
, daddr_t blk
, 
  88     daddr_t count
, daddr_t radix
, int skip
); 
  89 static void blst_leaf_free(blmeta_t 
*scan
, daddr_t relblk
, int count
); 
  90 static void blst_meta_free(blmeta_t 
*scan
, daddr_t freeBlk
, daddr_t count
, 
  91     daddr_t radix
, int skip
, daddr_t blk
); 
  92 static void blst_copy(blmeta_t 
*scan
, daddr_t blk
, daddr_t radix
, 
  93     daddr_t skip
, blist_t dest
, daddr_t count
); 
  94 static daddr_t  
blst_radix_init(blmeta_t 
*scan
, daddr_t radix
, 
  95     int skip
, daddr_t count
); 
  98  * blist_create() - create a blist capable of handling up to the specified 
 101  *      blocks must be greater then 0 
 103  *      The smallest blist consists of a single leaf node capable of 
 104  *      managing BLIST_BMAP_RADIX blocks. 
 108 blist_create(daddr_t blocks
) 
 115          * Calculate radix and skip field used for scanning. 
 117         radix 
= BLIST_BMAP_RADIX
; 
 119         while (radix 
< blocks
) { 
 120                 radix 
<<= BLIST_META_RADIX_SHIFT
; 
 121                 skip 
= (skip 
+ 1) << BLIST_META_RADIX_SHIFT
; 
 124         bl 
= malloc(sizeof(struct blist
), M_SWAP
, M_WAITOK
); 
 126         bzero(bl
, sizeof(*bl
)); 
 128         bl
->bl_blocks 
= blocks
; 
 129         bl
->bl_radix 
= radix
; 
 131         bl
->bl_rootblks 
= 1 + 
 132             blst_radix_init(NULL
, bl
->bl_radix
, bl
->bl_skip
, blocks
); 
 133         bl
->bl_root 
= malloc(sizeof(blmeta_t
) * bl
->bl_rootblks
, M_SWAP
, M_WAITOK
); 
 135 #if defined(BLIST_DEBUG) 
 137                 "BLIST representing %d blocks (%d MB of swap)" 
 138                 ", requiring %dK of ram\n", 
 140                 bl
->bl_blocks 
* 4 / 1024, 
 141                 (bl
->bl_rootblks 
* sizeof(blmeta_t
) + 1023) / 1024 
 143         printf("BLIST raw radix tree contains %d records\n", bl
->bl_rootblks
); 
 145         blst_radix_init(bl
->bl_root
, bl
->bl_radix
, bl
->bl_skip
, blocks
); 
 151 blist_destroy(blist_t bl
) 
 153         free(bl
->bl_root
, M_SWAP
); 
 158  * blist_alloc() - reserve space in the block bitmap.  Return the base 
 159  *                   of a contiguous region or SWAPBLK_NONE if space could 
 164 blist_alloc(blist_t bl
, daddr_t count
) 
 166         daddr_t blk 
= SWAPBLK_NONE
; 
 169                 if (bl
->bl_radix 
== BLIST_BMAP_RADIX
) { 
 170                         blk 
= blst_leaf_alloc(bl
->bl_root
, 0, count
); 
 172                         blk 
= blst_meta_alloc(bl
->bl_root
, 0, count
, 
 173                             bl
->bl_radix
, bl
->bl_skip
); 
 175                 if (blk 
!= SWAPBLK_NONE
) { 
 176                         bl
->bl_free 
-= count
; 
 183  * blist_free() -       free up space in the block bitmap.  Return the base 
 184  *                      of a contiguous region.  Panic if an inconsistancy is 
 189 blist_free(blist_t bl
, daddr_t blkno
, daddr_t count
) 
 192                 if (bl
->bl_radix 
== BLIST_BMAP_RADIX
) { 
 193                         blst_leaf_free(bl
->bl_root
, blkno
, count
); 
 195                         blst_meta_free(bl
->bl_root
, blkno
, count
, 
 196                             bl
->bl_radix
, bl
->bl_skip
, 0); 
 198                 bl
->bl_free 
+= count
; 
 203  * blist_resize() -     resize an existing radix tree to handle the 
 204  *                      specified number of blocks.  This will reallocate 
 205  *                      the tree and transfer the previous bitmap to the new 
 206  *                      one.  When extending the tree you can specify whether 
 207  *                      the new blocks are to left allocated or freed. 
 211 blist_resize(blist_t 
*pbl
, daddr_t count
, int freenew
) 
 213         blist_t newbl 
= blist_create(count
); 
 217         if (count 
> save
->bl_blocks
) { 
 218                 count 
= save
->bl_blocks
; 
 220         blst_copy(save
->bl_root
, 0, save
->bl_radix
, save
->bl_skip
, newbl
, count
); 
 223          * If resizing upwards, should we free the new space or not? 
 225         if (freenew 
&& count 
< newbl
->bl_blocks
) { 
 226                 blist_free(newbl
, count
, newbl
->bl_blocks 
- count
); 
 234  * blist_print()    - dump radix tree 
 238 blist_print(blist_t bl
) 
 241         blst_radix_print(bl
->bl_root
, 0, bl
->bl_radix
, bl
->bl_skip
, 4); 
 247 /************************************************************************ 
 248  *                        ALLOCATION SUPPORT FUNCTIONS                  * 
 249  ************************************************************************ 
 251  *      These support functions do all the actual work.  They may seem 
 252  *      rather longish, but that's because I've commented them up.  The 
 253  *      actual code is straight forward. 
 258  * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap). 
 260  *      This is the core of the allocator and is optimized for the 1 block 
 261  *      and the BLIST_BMAP_RADIX block allocation cases.  Other cases are 
 262  *      somewhat slower.  The 1 block allocation case is log2 and extremely 
 267 blst_leaf_alloc(blmeta_t 
*scan
, daddr_t blk
, int count
) 
 269         u_daddr_t orig 
= scan
->u
.bmu_bitmap
; 
 273                  * Optimize bitmap all-allocated case.  Also, count = 1 
 274                  * case assumes at least 1 bit is free in the bitmap, so 
 275                  * we have to take care of this case here. 
 277                 scan
->bm_bighint 
= 0; 
 282                  * Optimized code to allocate one bit out of the bitmap 
 285                 int j 
= BLIST_BMAP_RADIX 
/ 2; 
 288                 mask 
= (u_daddr_t
)-1 >> (BLIST_BMAP_RADIX 
/ 2); 
 291                         if ((orig 
& mask
) == 0) { 
 298                 scan
->u
.bmu_bitmap 
&= ~(1 << r
); 
 301 #if !defined(__APPLE__) 
 302         if (count 
<= BLIST_BMAP_RADIX
) { 
 304         if (count 
<= (int)BLIST_BMAP_RADIX
) { 
 305 #endif /* __APPLE__ */ 
 307                  * non-optimized code to allocate N bits out of the bitmap. 
 308                  * The more bits, the faster the code runs.  It will run 
 309                  * the slowest allocating 2 bits, but since there aren't any 
 310                  * memory ops in the core loop (or shouldn't be, anyway), 
 311                  * you probably won't notice the difference. 
 314                 int n 
= BLIST_BMAP_RADIX 
- count
; 
 317                 mask 
= (u_daddr_t
)-1 >> n
; 
 319                 for (j 
= 0; j 
<= n
; ++j
) { 
 320                         if ((orig 
& mask
) == mask
) { 
 321                                 scan
->u
.bmu_bitmap 
&= ~mask
; 
 328          * We couldn't allocate count in this subtree, update bighint. 
 330         scan
->bm_bighint 
= count 
- 1; 
 335  * blist_meta_alloc() - allocate at a meta in the radix tree. 
 337  *      Attempt to allocate at a meta node.  If we can't, we update 
 338  *      bighint and return a failure.  Updating bighint optimize future 
 339  *      calls that hit this node.  We have to check for our collapse cases 
 340  *      and we have a few optimizations strewn in as well. 
 344 blst_meta_alloc(blmeta_t 
*scan
, daddr_t blk
, daddr_t count
, daddr_t radix
, 
 348         int next_skip 
= (skip 
>> BLIST_META_RADIX_SHIFT
); 
 350         if (scan
->u
.bmu_avail 
== 0) { 
 352                  * ALL-ALLOCATED special case 
 354                 scan
->bm_bighint 
= count
; 
 358         if (scan
->u
.bmu_avail 
== radix
) { 
 359                 radix 
>>= BLIST_META_RADIX_SHIFT
; 
 362                  * ALL-FREE special case, initialize uninitialize 
 365                 for (i 
= 1; i 
<= skip
; i 
+= next_skip
) { 
 366                         if (scan
[i
].bm_bighint 
== (daddr_t
)-1) { 
 369                         if (next_skip 
== 1) { 
 370                                 scan
[i
].u
.bmu_bitmap 
= (u_daddr_t
)-1; 
 371                                 scan
[i
].bm_bighint 
= BLIST_BMAP_RADIX
; 
 373                                 scan
[i
].bm_bighint 
= radix
; 
 374                                 scan
[i
].u
.bmu_avail 
= radix
; 
 378                 radix 
>>= BLIST_META_RADIX_SHIFT
; 
 381         for (i 
= 1; i 
<= skip
; i 
+= next_skip
) { 
 382                 if (count 
<= scan
[i
].bm_bighint
) { 
 384                          * count fits in object 
 387                         if (next_skip 
== 1) { 
 388                                 r 
= blst_leaf_alloc(&scan
[i
], blk
, count
); 
 390                                 r 
= blst_meta_alloc(&scan
[i
], blk
, count
, 
 391                                     radix
, next_skip 
- 1); 
 393                         if (r 
!= SWAPBLK_NONE
) { 
 394                                 scan
->u
.bmu_avail 
-= count
; 
 395                                 if (scan
->bm_bighint 
> scan
->u
.bmu_avail
) { 
 396                                         scan
->bm_bighint 
= scan
->u
.bmu_avail
; 
 400                 } else if (scan
[i
].bm_bighint 
== (daddr_t
)-1) { 
 405                 } else if (count 
> radix
) { 
 407                          * count does not fit in object even if it were 
 410                         panic("blist_meta_alloc: allocation too large"); 
 416          * We couldn't allocate count in this subtree, update bighint. 
 418         if (scan
->bm_bighint 
>= count
) { 
 419                 scan
->bm_bighint 
= count 
- 1; 
 425  * BLST_LEAF_FREE() -   free allocated block from leaf bitmap 
 430 blst_leaf_free(blmeta_t 
*scan
, daddr_t blk
, int count
) 
 433          * free some data in this bitmap 
 436          *      0000111111111110000 
 440         int n 
= blk 
& (BLIST_BMAP_RADIX 
- 1); 
 443         mask 
= ((u_daddr_t
)-1 << n
) & 
 444             ((u_daddr_t
)-1 >> (BLIST_BMAP_RADIX 
- count 
- n
)); 
 446         if (scan
->u
.bmu_bitmap 
& mask
) { 
 447                 panic("blst_radix_free: freeing free block"); 
 449         scan
->u
.bmu_bitmap 
|= mask
; 
 452          * We could probably do a better job here.  We are required to make 
 453          * bighint at least as large as the biggest contiguous block of 
 454          * data.  If we just shoehorn it, a little extra overhead will 
 455          * be incured on the next allocation (but only that one typically). 
 457         scan
->bm_bighint 
= BLIST_BMAP_RADIX
; 
 461  * BLST_META_FREE() - free allocated blocks from radix tree meta info 
 463  *      This support routine frees a range of blocks from the bitmap. 
 464  *      The range must be entirely enclosed by this radix node.  If a 
 465  *      meta node, we break the range down recursively to free blocks 
 466  *      in subnodes (which means that this code can free an arbitrary 
 467  *      range whereas the allocation code cannot allocate an arbitrary 
 472 blst_meta_free(blmeta_t 
*scan
, daddr_t freeBlk
, daddr_t count
, daddr_t radix
, 
 473     int skip
, daddr_t blk
) 
 476         int next_skip 
= (skip 
>> BLIST_META_RADIX_SHIFT
); 
 479         printf("FREE (%x,%d) FROM (%x,%d)\n", 
 485         if (scan
->u
.bmu_avail 
== 0) { 
 487                  * ALL-ALLOCATED special case, with possible 
 488                  * shortcut to ALL-FREE special case. 
 490                 scan
->u
.bmu_avail 
= count
; 
 491                 scan
->bm_bighint 
= count
; 
 493                 if (count 
!= radix
) { 
 494                         for (i 
= 1; i 
<= skip
; i 
+= next_skip
) { 
 495                                 if (scan
[i
].bm_bighint 
== (daddr_t
)-1) { 
 498                                 scan
[i
].bm_bighint 
= 0; 
 499                                 if (next_skip 
== 1) { 
 500                                         scan
[i
].u
.bmu_bitmap 
= 0; 
 502                                         scan
[i
].u
.bmu_avail 
= 0; 
 508                 scan
->u
.bmu_avail 
+= count
; 
 509                 /* scan->bm_bighint = radix; */ 
 513          * ALL-FREE special case. 
 516         if (scan
->u
.bmu_avail 
== radix
) { 
 519         if (scan
->u
.bmu_avail 
> radix
) { 
 520                 panic("blst_meta_free: freeing already free blocks (%d) %d/%d", count
, scan
->u
.bmu_avail
, radix
); 
 524          * Break the free down into its components 
 527         radix 
>>= BLIST_META_RADIX_SHIFT
; 
 529         i 
= (freeBlk 
- blk
) / radix
; 
 531         i 
= i 
* next_skip 
+ 1; 
 533         while (i 
<= skip 
&& blk 
< freeBlk 
+ count
) { 
 536                 v 
= blk 
+ radix 
- freeBlk
; 
 541                 if (scan
->bm_bighint 
== (daddr_t
)-1) { 
 542                         panic("blst_meta_free: freeing unexpected range"); 
 545                 if (next_skip 
== 1) { 
 546                         blst_leaf_free(&scan
[i
], freeBlk
, v
); 
 548                         blst_meta_free(&scan
[i
], freeBlk
, v
, radix
, 
 551                 if (scan
->bm_bighint 
< scan
[i
].bm_bighint
) { 
 552                         scan
->bm_bighint 
= scan
[i
].bm_bighint
; 
 562  * BLIST_RADIX_COPY() - copy one radix tree to another 
 564  *      Locates free space in the source tree and frees it in the destination 
 565  *      tree.  The space may not already be free in the destination. 
 569 blst_copy(blmeta_t 
*scan
, daddr_t blk
, daddr_t radix
, 
 570     daddr_t skip
, blist_t dest
, daddr_t count
) 
 579         if (radix 
== BLIST_BMAP_RADIX
) { 
 580                 u_daddr_t v 
= scan
->u
.bmu_bitmap
; 
 582                 if (v 
== (u_daddr_t
)-1) { 
 583                         blist_free(dest
, blk
, count
); 
 585 #if !defined(__APPLE__) 
 588                         for (i 
= 0; i 
< BLIST_BMAP_RADIX 
&& i 
< count
; ++i
) { 
 590                                         blist_free(dest
, blk 
+ i
, 1); 
 594                         int j
;   /* Avoid shadow warnings */ 
 596                         for (j 
= 0; j 
< (int)BLIST_BMAP_RADIX 
&& j 
< count
; ++j
) { 
 598                                         blist_free(dest
, blk 
+ j
, 1); 
 601 #endif /* __APPLE__ */ 
 611          * Source all allocated, leave dest allocated 
 613         if (scan
->u
.bmu_avail 
== 0) { 
 616         if (scan
->u
.bmu_avail 
== radix
) { 
 618                  * Source all free, free entire dest 
 621                         blist_free(dest
, blk
, count
); 
 623                         blist_free(dest
, blk
, radix
); 
 628         radix 
>>= BLIST_META_RADIX_SHIFT
; 
 629         next_skip 
= (skip 
>> BLIST_META_RADIX_SHIFT
); 
 631         for (i 
= 1; count 
&& i 
<= skip
; i 
+= next_skip
) { 
 632                 if (scan
[i
].bm_bighint 
== (daddr_t
)-1) { 
 636                 if (count 
>= radix
) { 
 664  * BLST_RADIX_INIT() - initialize radix tree 
 666  *      Initialize our meta structures and bitmaps and calculate the exact 
 667  *      amount of space required to manage 'count' blocks - this space may 
 668  *      be considerably less then the calculated radix due to the large 
 669  *      RADIX values we use. 
 673 blst_radix_init(blmeta_t 
*scan
, daddr_t radix
, int skip
, daddr_t count
) 
 677         daddr_t memindex 
= 0; 
 683         if (radix 
== BLIST_BMAP_RADIX
) { 
 685                         scan
->bm_bighint 
= 0; 
 686                         scan
->u
.bmu_bitmap 
= 0; 
 692          * Meta node.  If allocating the entire object we can special 
 693          * case it.  However, we need to figure out how much memory 
 694          * is required to manage 'count' blocks, so we continue on anyway. 
 698                 scan
->bm_bighint 
= 0; 
 699                 scan
->u
.bmu_avail 
= 0; 
 702         radix 
>>= BLIST_META_RADIX_SHIFT
; 
 703         next_skip 
= (skip 
>> BLIST_META_RADIX_SHIFT
); 
 705         for (i 
= 1; i 
<= skip
; i 
+= next_skip
) { 
 706                 if (count 
>= radix
) { 
 708                          * Allocate the entire object 
 710                         memindex 
= i 
+ blst_radix_init( 
 711                                 ((scan
) ? &scan
[i
] : NULL
), 
 717                 } else if (count 
> 0) { 
 719                          * Allocate a partial object 
 721                         memindex 
= i 
+ blst_radix_init( 
 722                                 ((scan
) ? &scan
[i
] : NULL
), 
 730                          * Add terminator and break out 
 733                                 scan
[i
].bm_bighint 
= (daddr_t
)-1; 
 747 blst_radix_print(blmeta_t 
*scan
, daddr_t blk
, daddr_t radix
, int skip
, int tab
) 
 753         if (radix 
== BLIST_BMAP_RADIX
) { 
 755                         "%*.*s(%04x,%d): bitmap %08x big=%d\n", 
 764         if (scan
->u
.bmu_avail 
== 0) { 
 766                         "%*.*s(%04x,%d) ALL ALLOCATED\n", 
 773         if (scan
->u
.bmu_avail 
== radix
) { 
 775                         "%*.*s(%04x,%d) ALL FREE\n", 
 784                 "%*.*s(%04x,%d): subtree (%d/%d) big=%d {\n", 
 792         radix 
>>= BLIST_META_RADIX_SHIFT
; 
 793         next_skip 
= (skip 
>> BLIST_META_RADIX_SHIFT
); 
 796         for (i 
= 1; i 
<= skip
; i 
+= next_skip
) { 
 797                 if (scan
[i
].bm_bighint 
== (daddr_t
)-1) { 
 799                                 "%*.*s(%04x,%d): Terminator\n", 
 828 main(int ac
, char **av
) 
 834         for (i 
= 1; i 
< ac
; ++i
) { 
 835                 const char *ptr 
= av
[i
]; 
 837                         size 
= strtol(ptr
, NULL
, 0); 
 841                 fprintf(stderr
, "Bad option: %s\n", ptr 
- 2); 
 844         bl 
= blist_create(size
); 
 845         blist_free(bl
, 0, size
); 
 853                 printf("%d/%d/%d> ", bl
->bl_free
, size
, bl
->bl_radix
); 
 855                 if (fgets(buf
, sizeof(buf
), stdin
) == NULL
) { 
 860                         if (sscanf(buf 
+ 1, "%d", &count
) == 1) { 
 861                                 blist_resize(&bl
, count
, 1); 
 869                         if (sscanf(buf 
+ 1, "%d", &count
) == 1) { 
 870                                 daddr_t blk 
= blist_alloc(bl
, count
); 
 871                                 printf("    R=%04x\n", blk
); 
 877                         if (sscanf(buf 
+ 1, "%x %d", &da
, &count
) == 2) { 
 878                                 blist_free(bl
, da
, count
); 
 902 panic(const char *ctl
, ...) 
 907         vfprintf(stderr
, ctl
, va
); 
 908         fprintf(stderr
, "\n");