]> git.saurik.com Git - apple/xnu.git/blame - osfmk/kern/ltable.c
xnu-6153.11.26.tar.gz
[apple/xnu.git] / osfmk / kern / ltable.c
CommitLineData
39037602
A
1/*
2 * Copyright (c) 2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28#include <kern/cpu_data.h>
29#include <kern/kern_types.h>
30#include <kern/locks.h>
31#include <kern/ltable.h>
32#include <kern/zalloc.h>
33#include <libkern/OSAtomic.h>
34#include <pexpert/pexpert.h>
35#include <vm/vm_kern.h>
36
37
0a7de745
A
38#define P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
39#define ROUNDDOWN(x, y) (((x)/(y))*(y))
39037602
A
40
41/* ----------------------------------------------------------------------
42 *
43 * Lockless Link Table Interface
44 *
45 * ---------------------------------------------------------------------- */
46
47vm_size_t g_lt_max_tbl_size;
48static lck_grp_t g_lt_lck_grp;
49
50/* default VA space for link tables (zone allocated) */
51#define DEFAULT_MAX_TABLE_SIZE P2ROUNDUP(8 * 1024 * 1024, PAGE_SIZE)
52
5ba3f43e 53#if DEVELOPMENT || DEBUG
39037602
A
54/* global for lldb macros */
55uint64_t g_lt_idx_max = LT_IDX_MAX;
56#endif
57
58
59/* construct a link table element from an offset and mask into a slab */
60#define lt_elem_ofst_slab(slab, slab_msk, ofst) \
61 /* cast through 'void *' to avoid compiler alignment warning messages */ \
62 ((struct lt_elem *)((void *)((uintptr_t)(slab) + ((ofst) & (slab_msk)))))
63
5ba3f43e 64#if CONFIG_LTABLE_STATS
39037602
A
65/* version that makes no assumption on waste within a slab */
66static inline struct lt_elem *
67lt_elem_idx(struct link_table *table, uint32_t idx)
68{
69 int slab_idx = idx / table->slab_elem;
70 struct lt_elem *slab = table->table[slab_idx];
0a7de745 71 if (!slab) {
39037602 72 panic("Invalid index:%d slab:%d (NULL) for table:%p\n",
0a7de745
A
73 idx, slab_idx, table);
74 }
39037602
A
75 assert(slab->lt_id.idx <= idx && (slab->lt_id.idx + table->slab_elem) > idx);
76 return lt_elem_ofst_slab(slab, table->slab_msk, (idx - slab->lt_id.idx) * table->elem_sz);
77}
78#else /* !CONFIG_LTABLE_STATS */
79/* verion that assumes 100% ultilization of slabs (no waste) */
80static inline struct lt_elem *
81lt_elem_idx(struct link_table *table, uint32_t idx)
82{
83 uint32_t ofst = idx * table->elem_sz;
84 struct lt_elem *slab = table->table[ofst >> table->slab_shift];
0a7de745 85 if (!slab) {
39037602 86 panic("Invalid index:%d slab:%d (NULL) for table:%p\n",
0a7de745
A
87 idx, (ofst >> table->slab_shift), table);
88 }
39037602
A
89 assert(slab->lt_id.idx <= idx && (slab->lt_id.idx + table->slab_elem) > idx);
90 return lt_elem_ofst_slab(slab, table->slab_msk, ofst);
91}
5ba3f43e 92#endif /* CONFIG_LTABLE_STATS */
39037602
A
93
94static int __assert_only
95lt_elem_in_range(struct lt_elem *elem, struct link_table *table)
96{
97 struct lt_elem **base = table->table;
98 uintptr_t e = (uintptr_t)elem;
99 assert(base != NULL);
100 while (*base != NULL) {
101 uintptr_t b = (uintptr_t)(*base);
0a7de745 102 if (e >= b && e < b + table->slab_sz) {
39037602 103 return 1;
0a7de745 104 }
39037602 105 base++;
0a7de745 106 if ((uintptr_t)base >= (uintptr_t)table->table + PAGE_SIZE) {
39037602 107 return 0;
0a7de745 108 }
39037602
A
109 }
110 return 0;
111}
112
113
114/**
115 * lt_elem_invalidate: mark 'elem' as invalid
116 *
117 * NOTE: this does _not_ get or put a reference on 'elem'
118 */
0a7de745
A
119void
120lt_elem_invalidate(struct lt_elem *elem)
39037602
A
121{
122 uint32_t __assert_only old = OSBitAndAtomic(~LT_BITS_VALID, &elem->lt_bits);
123 OSMemoryBarrier();
124 assert(((lt_bits_type(old) != LT_RESERVED) && (old & LT_BITS_VALID)) ||
0a7de745 125 ((lt_bits_type(old) == LT_RESERVED) && !(old & LT_BITS_VALID)));
39037602
A
126}
127
128/**
129 * lt_elem_mkvalid: mark 'elem' as valid
130 *
131 * NOTE: this does _not_ get or put a reference on 'elem'
132 */
0a7de745
A
133void
134lt_elem_mkvalid(struct lt_elem *elem)
39037602
A
135{
136 uint32_t __assert_only old = OSBitOrAtomic(LT_BITS_VALID, &elem->lt_bits);
137 OSMemoryBarrier();
138 assert(!(old & LT_BITS_VALID));
139}
140
0a7de745
A
141static void
142lt_elem_set_type(struct lt_elem *elem, int type)
39037602
A
143{
144 uint32_t old_bits, new_bits;
145 do {
146 old_bits = elem->lt_bits;
147 new_bits = (old_bits & ~LT_BITS_TYPE) |
0a7de745 148 ((type & LT_BITS_TYPE_MASK) << LT_BITS_TYPE_SHIFT);
39037602
A
149 } while (OSCompareAndSwap(old_bits, new_bits, &elem->lt_bits) == FALSE);
150 OSMemoryBarrier();
151}
152
153
154/**
155 * ltable_bootstrap: bootstrap a link table
156 *
157 * Called once at system boot
158 */
0a7de745
A
159void
160ltable_bootstrap(void)
39037602
A
161{
162 static int s_is_bootstrapped = 0;
163
164 uint32_t tmp32 = 0;
165
0a7de745 166 if (s_is_bootstrapped) {
39037602 167 return;
0a7de745 168 }
39037602
A
169 s_is_bootstrapped = 1;
170
171 g_lt_max_tbl_size = DEFAULT_MAX_TABLE_SIZE;
0a7de745 172 if (PE_parse_boot_argn("lt_tbl_size", &tmp32, sizeof(tmp32)) == TRUE) {
39037602 173 g_lt_max_tbl_size = (vm_size_t)P2ROUNDUP(tmp32, PAGE_SIZE);
0a7de745 174 }
39037602
A
175
176 lck_grp_init(&g_lt_lck_grp, "link_table_locks", LCK_GRP_ATTR_NULL);
177}
178
179/**
180 * ltable_init: initialize a link table with given parameters
181 *
182 */
0a7de745
A
183void
184ltable_init(struct link_table *table, const char *name,
185 uint32_t max_tbl_elem, uint32_t elem_sz,
186 ltable_poison_func poison)
39037602
A
187{
188 kern_return_t kr;
189 uint32_t slab_sz, slab_shift, slab_msk, slab_elem;
190 zone_t slab_zone;
191 size_t max_tbl_sz;
192 struct lt_elem *e, **base;
193
194#ifndef CONFIG_LTABLE_STATS
195 /* the element size _must_ be a power of two! */
0a7de745 196 if ((elem_sz & (elem_sz - 1)) != 0) {
39037602 197 panic("elem_sz:%d for table:'%s' must be a power of two!",
0a7de745
A
198 elem_sz, name);
199 }
39037602
A
200#endif
201
202 /*
203 * First, allocate a single page of memory to act as the base
204 * for the table's element slabs
205 */
206 kr = kernel_memory_allocate(kernel_map, (vm_offset_t *)&base,
0a7de745
A
207 PAGE_SIZE, 0, KMA_NOPAGEWAIT, VM_KERN_MEMORY_LTABLE);
208 if (kr != KERN_SUCCESS) {
39037602 209 panic("Cannot initialize %s table: "
0a7de745
A
210 "kernel_memory_allocate failed:%d\n", name, kr);
211 }
39037602
A
212 memset(base, 0, PAGE_SIZE);
213
214 /*
215 * Based on the maximum table size, calculate the slab size:
216 * we allocate 1 page of slab pointers for the table, and we need to
217 * index elements of 'elem_sz', this gives us the slab size based on
218 * the maximum size the table should grow.
219 */
220 max_tbl_sz = (max_tbl_elem * elem_sz);
221 max_tbl_sz = P2ROUNDUP(max_tbl_sz, PAGE_SIZE);
222
223 /* system maximum table size divided by number of slots in a page */
224 slab_sz = (uint32_t)(max_tbl_sz / (PAGE_SIZE / (sizeof(void *))));
0a7de745 225 if (slab_sz < PAGE_SIZE) {
39037602 226 slab_sz = PAGE_SIZE;
0a7de745 227 }
39037602
A
228
229 /* make sure the slab size is a power of two */
230 slab_shift = 0;
231 slab_msk = ~0;
232 for (uint32_t i = 0; i < 31; i++) {
233 uint32_t bit = (1 << i);
234 if ((slab_sz & bit) == slab_sz) {
235 slab_shift = i;
236 slab_msk = 0;
0a7de745 237 for (uint32_t j = 0; j < i; j++) {
39037602 238 slab_msk |= (1 << j);
0a7de745 239 }
39037602
A
240 break;
241 }
242 slab_sz &= ~bit;
243 }
244 slab_elem = slab_sz / elem_sz;
245
246 /* initialize the table's slab zone (for table growth) */
247 ltdbg("Initializing %s zone: slab:%d (%d,0x%x) max:%ld",
0a7de745 248 name, slab_sz, slab_shift, slab_msk, max_tbl_sz);
39037602
A
249 slab_zone = zinit(slab_sz, max_tbl_sz, slab_sz, name);
250 assert(slab_zone != ZONE_NULL);
251
252 /* allocate the first slab and populate it */
253 base[0] = (struct lt_elem *)zalloc(slab_zone);
0a7de745 254 if (base[0] == NULL) {
39037602 255 panic("Can't allocate a %s table slab from zone:%p",
0a7de745
A
256 name, slab_zone);
257 }
39037602
A
258
259 memset(base[0], 0, slab_sz);
260
261 /* setup the initial freelist */
262 ltdbg("initializing %d links (%d bytes each)...", slab_elem, elem_sz);
263 for (unsigned l = 0; l < slab_elem; l++) {
264 e = lt_elem_ofst_slab(base[0], slab_msk, l * elem_sz);
265 e->lt_id.idx = l;
266 /*
267 * setting generation to 0 ensures that a setid of 0 is
268 * invalid because the generation will be incremented before
269 * each element's allocation.
270 */
271 e->lt_id.generation = 0;
272 e->lt_next_idx = l + 1;
273 }
274
275 /* make sure the last free element points to a never-valid idx */
276 e = lt_elem_ofst_slab(base[0], slab_msk, (slab_elem - 1) * elem_sz);
277 e->lt_next_idx = LT_IDX_MAX;
278
279 lck_mtx_init(&table->lock, &g_lt_lck_grp, LCK_ATTR_NULL);
280
281 table->slab_sz = slab_sz;
282 table->slab_shift = slab_shift;
283 table->slab_msk = slab_msk;
284 table->slab_elem = slab_elem;
285 table->slab_zone = slab_zone;
286
287 table->elem_sz = elem_sz;
288 table->nelem = slab_elem;
289 table->used_elem = 0;
290 table->elem_sz = elem_sz;
291 table->poison = poison;
292
293 table->table = base;
294 table->next_free_slab = &base[1];
295 table->free_list.id = base[0]->lt_id.id;
296
297#if CONFIG_LTABLE_STATS
298 table->nslabs = 1;
299 table->nallocs = 0;
300 table->nreallocs = 0;
301 table->npreposts = 0;
302 table->nreservations = 0;
303 table->nreserved_releases = 0;
304
305 table->max_used = 0;
306 table->avg_used = 0;
307 table->max_reservations = 0;
308 table->avg_reservations = 0;
309#endif
310}
311
312
313/**
314 * ltable_grow: grow a link table by adding another 'slab' of table elements
315 *
316 * Conditions:
317 * table mutex is unlocked
318 * calling thread can block
319 */
0a7de745
A
320void
321ltable_grow(struct link_table *table, uint32_t min_free)
39037602
A
322{
323 struct lt_elem *slab, **slot;
324 struct lt_elem *e = NULL, *first_new_elem, *last_new_elem;
325 struct ltable_id free_id;
326 uint32_t free_elem;
327
328 assert(get_preemption_level() == 0);
329 assert(table && table->slab_zone);
330
331 lck_mtx_lock(&table->lock);
332
333 free_elem = table->nelem - table->used_elem;
334
335 /*
336 * If the caller just wanted to ensure a minimum number of elements,
337 * do that (and don't just blindly grow the table). Also, don't grow
338 * the table unnecessarily - we could have been beaten by a higher
339 * priority thread who acquired the lock and grew the table before we
340 * got here.
341 */
342 if (free_elem > min_free) {
343 lck_mtx_unlock(&table->lock);
344 return;
345 }
346
347 /* we are now committed to table growth */
348 ltdbg_v("BEGIN");
349
350 if (table->next_free_slab == NULL) {
351 /*
352 * before we panic, check one more time to see if any other
353 * threads have free'd from space in the table.
354 */
355 if ((table->nelem - table->used_elem) > 0) {
356 /* there's at least 1 free element: don't panic yet */
357 lck_mtx_unlock(&table->lock);
358 return;
359 }
360 panic("No more room to grow table: %p (nelem: %d, used: %d)",
0a7de745 361 table, table->nelem, table->used_elem);
39037602
A
362 }
363 slot = table->next_free_slab;
364 table->next_free_slab++;
0a7de745 365 if ((uintptr_t)table->next_free_slab >= (uintptr_t)table->table + PAGE_SIZE) {
39037602 366 table->next_free_slab = NULL;
0a7de745 367 }
39037602
A
368
369 assert(*slot == NULL);
370
371 /* allocate another slab */
372 slab = (struct lt_elem *)zalloc(table->slab_zone);
0a7de745 373 if (slab == NULL) {
39037602 374 panic("Can't allocate a %s table (%p) slab from zone:%p",
0a7de745
A
375 table->slab_zone->zone_name, table, table->slab_zone);
376 }
39037602
A
377
378 memset(slab, 0, table->slab_sz);
379
380 /* put the new elements into a freelist */
381 ltdbg_v(" init %d new links...", table->slab_elem);
382 for (unsigned l = 0; l < table->slab_elem; l++) {
383 uint32_t idx = l + table->nelem;
0a7de745 384 if (idx >= (LT_IDX_MAX - 1)) {
39037602 385 break; /* the last element of the last slab */
0a7de745 386 }
39037602
A
387 e = lt_elem_ofst_slab(slab, table->slab_msk, l * table->elem_sz);
388 e->lt_id.idx = idx;
389 e->lt_next_idx = idx + 1;
390 }
391 last_new_elem = e;
392 assert(last_new_elem != NULL);
393
394 first_new_elem = lt_elem_ofst_slab(slab, table->slab_msk, 0);
395
396 /* update table book keeping, and atomically swap the freelist head */
397 *slot = slab;
0a7de745 398 if (table->nelem + table->slab_elem >= LT_IDX_MAX) {
39037602 399 table->nelem = LT_IDX_MAX - 1;
0a7de745 400 } else {
39037602 401 table->nelem += table->slab_elem;
0a7de745 402 }
39037602
A
403
404#if CONFIG_LTABLE_STATS
405 table->nslabs += 1;
406#endif
407
408 /*
409 * The atomic swap of the free list head marks the end of table
410 * growth. Incoming requests may now use the newly allocated slab
411 * of table elements
412 */
413 free_id = table->free_list;
414 /* connect the existing free list to the end of the new free list */
415 last_new_elem->lt_next_idx = free_id.idx;
416 while (OSCompareAndSwap64(free_id.id, first_new_elem->lt_id.id,
0a7de745 417 &table->free_list.id) == FALSE) {
39037602
A
418 OSMemoryBarrier();
419 free_id = table->free_list;
420 last_new_elem->lt_next_idx = free_id.idx;
421 }
422 OSMemoryBarrier();
423
424 lck_mtx_unlock(&table->lock);
425
426 return;
427}
428
d9a64523
A
429#if DEVELOPMENT || DEBUG
430
431int
432ltable_nelem(struct link_table *table)
433{
434 int nelem = 0;
435
436 lck_mtx_lock(&table->lock);
437
438 nelem = table->used_elem;
439
440 lck_mtx_unlock(&table->lock);
441
442 return nelem;
443}
444#endif
39037602
A
445
446/**
447 * ltable_alloc_elem: allocate one or more elements from a given table
448 *
449 * The returned element(s) will be of type 'type', but will remain invalid.
450 *
451 * If the caller has disabled preemption, then this function may (rarely) spin
452 * waiting either for another thread to either release 'nelem' table elements,
453 * or grow the table.
454 *
455 * If the caller can block, then this function may (rarely) block while
456 * the table grows to meet the demand for 'nelem' element(s).
457 */
458__attribute__((noinline))
0a7de745
A
459struct lt_elem *
460ltable_alloc_elem(struct link_table *table, int type,
461 int nelem, int nattempts)
39037602
A
462{
463 int nspins = 0, ntries = 0, nalloc = 0;
464 uint32_t table_size;
465 struct lt_elem *elem = NULL;
466 struct ltable_id free_id, next_id;
467
468 static const int max_retries = 500;
469
0a7de745 470 if (type != LT_ELEM && type != LT_LINK && type != LT_RESERVED) {
39037602 471 panic("link_table_aloc of invalid elem type:%d from table @%p",
0a7de745
A
472 type, table);
473 }
39037602
A
474
475 assert(nelem > 0);
476
477 /*
478 * If the callers only wants to try a certain number of times, make it
479 * look like we've already made (MAX - nattempts) tries at allocation
480 */
481 if (nattempts > 0 && nattempts <= max_retries) {
482 ntries = max_retries - nattempts;
483 }
484
485try_again:
486 elem = NULL;
487 if (ntries++ > max_retries) {
488 struct lt_elem *tmp;
489 if (nattempts > 0) {
490 /*
491 * The caller specified a particular number of
492 * attempts before failure, so it's expected that
493 * they're prepared to handle a NULL return.
494 */
495 return NULL;
496 }
497
0a7de745 498 if (table->used_elem + nelem >= table_size) {
39037602 499 panic("No more room to grow table: 0x%p size:%d, used:%d, requested elem:%d",
0a7de745
A
500 table, table_size, table->used_elem, nelem);
501 }
502 if (nelem == 1) {
39037602 503 panic("Too many alloc retries: %d, table:%p, type:%d, nelem:%d",
0a7de745
A
504 ntries, table, type, nelem);
505 }
39037602
A
506 /* don't panic: try allocating one-at-a-time */
507 while (nelem > 0) {
508 tmp = ltable_alloc_elem(table, type, 1, nattempts);
0a7de745 509 if (elem) {
39037602 510 lt_elem_list_link(table, tmp, elem);
0a7de745 511 }
39037602
A
512 elem = tmp;
513 --nelem;
514 }
515 assert(elem != NULL);
516 return elem;
517 }
518
519 nalloc = 0;
520 table_size = table->nelem;
521
522 if (table->used_elem + nelem >= table_size) {
523 if (get_preemption_level() != 0) {
524#if CONFIG_LTABLE_STATS
525 table->nspins += 1;
526#endif
527 /*
528 * We may have just raced with table growth: check
529 * again to make sure there really isn't any space.
530 */
0a7de745 531 if (++nspins > 4) {
39037602 532 panic("Can't grow table %p with preemption"
0a7de745
A
533 " disabled!", table);
534 }
39037602
A
535 delay(1);
536 goto try_again;
537 }
538 ltable_grow(table, nelem);
539 goto try_again;
540 }
541
542 /* read this value only once before the CAS */
543 free_id = table->free_list;
0a7de745 544 if (free_id.idx >= table_size) {
39037602 545 goto try_again;
0a7de745 546 }
39037602
A
547
548 /*
549 * Find the item on the free list which will become the new free list
550 * head, but be careful not to modify any memory (read only)! Other
551 * threads can alter table state at any time up until the CAS. We
552 * don't modify any memory until we've successfully swapped out the
553 * free list head with the one we've investigated.
554 */
555 for (struct lt_elem *next_elem = lt_elem_idx(table, free_id.idx);
0a7de745
A
556 nalloc < nelem;
557 nalloc++) {
39037602
A
558 elem = next_elem;
559 next_id.generation = 0;
560 next_id.idx = next_elem->lt_next_idx;
561 if (next_id.idx < table->nelem) {
562 next_elem = lt_elem_idx(table, next_id.idx);
563 next_id.id = next_elem->lt_id.id;
564 } else {
565 goto try_again;
566 }
567 }
568 /* 'elem' points to the last element being allocated */
569
570 if (OSCompareAndSwap64(free_id.id, next_id.id,
0a7de745 571 &table->free_list.id) == FALSE) {
39037602 572 goto try_again;
0a7de745 573 }
39037602
A
574
575 /* load barrier */
576 OSMemoryBarrier();
577
578 /*
579 * After the CAS, we know that we own free_id, and it points to a
580 * valid table entry (checked above). Grab the table pointer and
581 * reset some values.
582 */
583 OSAddAtomic(nelem, &table->used_elem);
584
585 /* end the list of allocated elements */
586 elem->lt_next_idx = LT_IDX_MAX;
587 /* reset 'elem' to point to the first allocated element */
588 elem = lt_elem_idx(table, free_id.idx);
589
590 /*
591 * Update the generation count, and return the element(s)
592 * with a single reference (and no valid bit). If the
593 * caller immediately calls _put() on any element, then
594 * it will be released back to the free list. If the caller
595 * subsequently marks the element as valid, then the put
596 * will simply drop the reference.
597 */
0a7de745 598 for (struct lt_elem *tmp = elem;;) {
39037602 599 assert(!lt_bits_valid(tmp->lt_bits) &&
0a7de745 600 (lt_bits_refcnt(tmp->lt_bits) == 0));
39037602
A
601 --nalloc;
602 tmp->lt_id.generation += 1;
603 tmp->lt_bits = 1;
604 lt_elem_set_type(tmp, type);
0a7de745 605 if (tmp->lt_next_idx == LT_IDX_MAX) {
39037602 606 break;
0a7de745 607 }
39037602
A
608 assert(tmp->lt_next_idx != LT_IDX_MAX);
609 tmp = lt_elem_idx(table, tmp->lt_next_idx);
610 }
611 assert(nalloc == 0);
612
613#if CONFIG_LTABLE_STATS
614 uint64_t nreservations;
615 table->nallocs += nelem;
0a7de745 616 if (type == LT_RESERVED) {
39037602 617 OSIncrementAtomic64(&table->nreservations);
0a7de745 618 }
39037602 619 nreservations = table->nreservations;
0a7de745 620 if (table->used_elem > table->max_used) {
39037602 621 table->max_used = table->used_elem;
0a7de745
A
622 }
623 if (nreservations > table->max_reservations) {
39037602 624 table->max_reservations = nreservations;
0a7de745 625 }
39037602
A
626 table->avg_used = (table->avg_used + table->used_elem) / 2;
627 table->avg_reservations = (table->avg_reservations + nreservations) / 2;
628#endif
629
630 return elem;
631}
632
633
634/**
635 * ltable_realloc_elem: convert a reserved element to a particular type
636 *
637 * This funciton is used to convert reserved elements (not yet marked valid)
638 * to the given 'type'. The generation of 'elem' is incremented, the element
639 * is disconnected from any list to which it belongs, and its type is set to
640 * 'type'.
641 */
0a7de745
A
642void
643ltable_realloc_elem(struct link_table *table, struct lt_elem *elem, int type)
39037602
A
644{
645 (void)table;
646 assert(lt_elem_in_range(elem, table) &&
0a7de745 647 !lt_bits_valid(elem->lt_bits));
39037602
A
648
649#if CONFIG_LTABLE_STATS
650 table->nreallocs += 1;
651 if (lt_bits_type(elem->lt_bits) == LT_RESERVED && type != LT_RESERVED) {
652 /*
653 * This isn't under any lock, so we'll clamp it.
654 * the stats are meant to be informative, not perfectly
655 * accurate
656 */
657 OSDecrementAtomic64(&table->nreservations);
658 }
659 table->avg_reservations = (table->avg_reservations + table->nreservations) / 2;
660#endif
661
662 /*
663 * Return the same element with a new generation count, and a
664 * (potentially) new type. Don't touch the refcount: the caller
665 * is responsible for getting that (and the valid bit) correct.
666 */
667 elem->lt_id.generation += 1;
668 elem->lt_next_idx = LT_IDX_MAX;
669 lt_elem_set_type(elem, type);
670
671 return;
672}
673
674
675/**
676 * ltable_free_elem: release an element back to a link table
677 *
678 * Do not call this function directly: use ltable_[get|put]_elem!
679 *
680 * Conditions:
681 * 'elem' was originally allocated from 'table'
682 * 'elem' is _not_ marked valid
683 * 'elem' has a reference count of 0
684 */
0a7de745
A
685static void
686ltable_free_elem(struct link_table *table, struct lt_elem *elem)
39037602
A
687{
688 struct ltable_id next_id;
689
690 assert(lt_elem_in_range(elem, table) &&
0a7de745
A
691 !lt_bits_valid(elem->lt_bits) &&
692 (lt_bits_refcnt(elem->lt_bits) == 0));
39037602
A
693
694 OSDecrementAtomic(&table->used_elem);
695
696#if CONFIG_LTABLE_STATS
697 table->avg_used = (table->avg_used + table->used_elem) / 2;
0a7de745 698 if (lt_bits_type(elem->lt_bits) == LT_RESERVED) {
39037602 699 OSDecrementAtomic64(&table->nreservations);
0a7de745 700 }
39037602
A
701 table->avg_reservations = (table->avg_reservations + table->nreservations) / 2;
702#endif
703
704 elem->lt_bits = 0;
705
0a7de745 706 if (table->poison) {
39037602 707 (table->poison)(table, elem);
0a7de745 708 }
39037602
A
709
710again:
711 next_id = table->free_list;
0a7de745 712 if (next_id.idx >= table->nelem) {
39037602 713 elem->lt_next_idx = LT_IDX_MAX;
0a7de745 714 } else {
39037602 715 elem->lt_next_idx = next_id.idx;
0a7de745 716 }
39037602
A
717
718 /* store barrier */
719 OSMemoryBarrier();
720 if (OSCompareAndSwap64(next_id.id, elem->lt_id.id,
0a7de745 721 &table->free_list.id) == FALSE) {
39037602 722 goto again;
0a7de745 723 }
39037602
A
724}
725
726
727/**
728 * ltable_get_elem: get a reference to a table element identified by 'id'
729 *
730 * Returns a reference to the table element associated with the given 'id', or
731 * NULL if the 'id' was invalid or does not exist in 'table'. The caller is
732 * responsible to release the reference using ltable_put_elem().
733 *
734 * NOTE: if the table element pointed to by 'id' is marked as invalid,
735 * this function will return NULL.
736 */
0a7de745
A
737struct lt_elem *
738ltable_get_elem(struct link_table *table, uint64_t id)
39037602
A
739{
740 struct lt_elem *elem;
741 uint32_t idx, bits, new_bits;
742
743 /*
744 * Here we have a reference to the table which is guaranteed to remain
745 * valid until we drop the reference
746 */
747
748 idx = ((struct ltable_id *)&id)->idx;
749
0a7de745 750 if (idx >= table->nelem) {
39037602 751 panic("id:0x%llx : idx:%d > %d", id, idx, table->nelem);
0a7de745 752 }
39037602
A
753
754 elem = lt_elem_idx(table, idx);
755
756 /* verify the validity by taking a reference on the table object */
757 bits = elem->lt_bits;
0a7de745 758 if (!lt_bits_valid(bits)) {
39037602 759 return NULL;
0a7de745 760 }
39037602
A
761
762 /*
763 * do a pre-verify on the element ID to potentially
764 * avoid 2 compare-and-swaps
765 */
0a7de745 766 if (elem->lt_id.id != id) {
39037602 767 return NULL;
0a7de745 768 }
39037602
A
769
770 new_bits = bits + 1;
771
772 /* check for overflow */
773 assert(lt_bits_refcnt(new_bits) > 0);
774
775 while (OSCompareAndSwap(bits, new_bits, &elem->lt_bits) == FALSE) {
776 /*
777 * either the element became invalid,
778 * or someone else grabbed/removed a reference.
779 */
780 bits = elem->lt_bits;
781 if (!lt_bits_valid(bits)) {
782 /* don't return invalid elements */
783 return NULL;
784 }
785 new_bits = bits + 1;
786 assert(lt_bits_refcnt(new_bits) > 0);
787 }
788
789 /* load barrier */
790 OSMemoryBarrier();
791
792 /* check to see that our reference is to the same generation! */
793 if (elem->lt_id.id != id) {
794 /*
0a7de745
A
795 * ltdbg("ID:0x%llx table generation (%d) != %d",
796 * id, elem->lt_id.generation,
797 * ((struct ltable_id *)&id)->generation);
39037602
A
798 */
799 ltable_put_elem(table, elem);
800 return NULL;
801 }
802
803 /* We now have a reference on a valid object */
804 return elem;
805}
806
807/**
808 * ltable_put_elem: release a reference to table element
809 *
810 * This function releases a reference taken on a table element via
811 * ltable_get_elem(). This function will release the element back to 'table'
812 * when the reference count goes to 0 AND the element has been marked as
813 * invalid.
814 */
0a7de745
A
815void
816ltable_put_elem(struct link_table *table, struct lt_elem *elem)
39037602
A
817{
818 uint32_t bits, new_bits;
819
820 assert(lt_elem_in_range(elem, table));
821
822 bits = elem->lt_bits;
823 new_bits = bits - 1;
824
825 /* check for underflow */
826 assert(lt_bits_refcnt(new_bits) < LT_BITS_REFCNT_MASK);
827
828 while (OSCompareAndSwap(bits, new_bits, &elem->lt_bits) == FALSE) {
829 bits = elem->lt_bits;
830 new_bits = bits - 1;
831 /* catch underflow */
832 assert(lt_bits_refcnt(new_bits) < LT_BITS_REFCNT_MASK);
833 }
834
835 /* load barrier */
836 OSMemoryBarrier();
837
838 /*
839 * if this was the last reference, and it was marked as invalid,
840 * then we can add this link object back to the free list
841 */
0a7de745 842 if (!lt_bits_valid(new_bits) && (lt_bits_refcnt(new_bits) == 0)) {
39037602 843 ltable_free_elem(table, elem);
0a7de745 844 }
39037602
A
845
846 return;
847}
848
849
850/* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
851 *
852 * API: lt_elem_list_...
853 *
854 * Reuse the free list linkage member, 'lt_next_idx' of a table element
855 * in a slightly more generic singly-linked list. All members of this
856 * list have been allocated from a table, but have not been made valid.
857 *
858 * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/
859
860/**
861 * lt_elem_list_link: link a child onto a parent
862 *
863 * Note that if 'parent' is the head of a list, this function will follow that
864 * list and attach 'child' to the end of it. In the simplest case, this
865 * results in: parent->child
866 * however this could also result in: parent->...->child
867 */
0a7de745
A
868int
869lt_elem_list_link(struct link_table *table, struct lt_elem *parent, struct lt_elem *child)
39037602
A
870{
871 int nelem = 1;
872
873 assert(lt_elem_in_range(parent, table));
874
875 /* find the end of the parent's list */
876 while (parent->lt_next_idx != LT_IDX_MAX) {
877 assert(parent->lt_next_idx < table->nelem);
878 parent = lt_elem_idx(table, parent->lt_next_idx);
879 nelem++;
880 }
881
882 if (child) {
883 assert(lt_elem_in_range(child, table));
884 parent->lt_next_idx = child->lt_id.idx;
885 }
886
887 return nelem;
888}
889
890
891/**
892 * lt_elem_list_first: obtain a pointer to the first element of a list.
893 *
894 * This function converts the head of a singly-linked list, 'id', into a real
895 * lt_elem object and returns a pointer to the object.
896 *
897 * It does _not_ take an extra reference on the object: the list implicitly
898 * holds that reference.
899 */
0a7de745
A
900struct lt_elem *
901lt_elem_list_first(struct link_table *table, uint64_t id)
39037602
A
902{
903 uint32_t idx;
904 struct lt_elem *elem = NULL;
905
0a7de745 906 if (id == 0) {
39037602 907 return NULL;
0a7de745 908 }
39037602
A
909
910 idx = ((struct ltable_id *)&id)->idx;
911
0a7de745 912 if (idx > table->nelem) {
39037602 913 panic("Invalid element for id:0x%llx", id);
0a7de745 914 }
39037602
A
915 elem = lt_elem_idx(table, idx);
916
917 /* invalid element: reserved ID was probably already reallocated */
0a7de745 918 if (elem->lt_id.id != id) {
39037602 919 return NULL;
0a7de745 920 }
39037602
A
921
922 /* the returned element should _not_ be marked valid! */
923 if (lt_bits_valid(elem->lt_bits) ||
924 lt_bits_type(elem->lt_bits) != LT_RESERVED ||
925 lt_bits_refcnt(elem->lt_bits) != 1) {
926 panic("Valid/unreserved element %p (0x%x) in reserved list",
0a7de745 927 elem, elem->lt_bits);
39037602
A
928 }
929
930 return elem;
931}
932
933
934/**
935 * lt_elem_list_next: return the item subsequent to 'elem' in a list
936 *
937 * Note that this will return NULL if 'elem' is actually the end of the list.
938 */
0a7de745
A
939struct lt_elem *
940lt_elem_list_next(struct link_table *table, struct lt_elem *head)
39037602
A
941{
942 struct lt_elem *elem;
943
0a7de745 944 if (!head) {
39037602 945 return NULL;
0a7de745
A
946 }
947 if (head->lt_next_idx >= table->nelem) {
39037602 948 return NULL;
0a7de745 949 }
39037602
A
950
951 elem = lt_elem_idx(table, head->lt_next_idx);
952 assert(lt_elem_in_range(elem, table));
953
954 return elem;
955}
956
957
958/**
959 * lt_elem_list_break: break a list in two around 'elem'
960 *
961 * This function will reset the next_idx field of 'elem' (making it the end of
962 * the list), and return the element subsequent to 'elem' in the list
963 * (which could be NULL)
964 */
0a7de745
A
965struct lt_elem *
966lt_elem_list_break(struct link_table *table, struct lt_elem *elem)
39037602
A
967{
968 struct lt_elem *next;
969
0a7de745 970 if (!elem) {
39037602 971 return NULL;
0a7de745 972 }
39037602
A
973 next = lt_elem_list_next(table, elem);
974 elem->lt_next_idx = LT_IDX_MAX;
975
976 return next;
977}
978
979
980/**
981 * lt_elem_list_pop: pop an item off the head of a list
982 *
983 * The list head is pointed to by '*id', the element corresponding to '*id' is
984 * returned by this function, and the new list head is returned in the in/out
985 * parameter, '*id'. The caller is responsible for the reference on the
986 * returned object. A realloc is done to reset the type of the object, but it
987 * is still left invalid.
988 */
0a7de745
A
989struct lt_elem *
990lt_elem_list_pop(struct link_table *table, uint64_t *id, int type)
39037602
A
991{
992 struct lt_elem *first, *next;
993
0a7de745 994 if (!id || *id == 0) {
39037602 995 return NULL;
0a7de745 996 }
39037602
A
997
998 /* pop an item off the reserved stack */
999
1000 first = lt_elem_list_first(table, *id);
1001 if (!first) {
1002 *id = 0;
1003 return NULL;
1004 }
1005
1006 next = lt_elem_list_next(table, first);
0a7de745 1007 if (next) {
39037602 1008 *id = next->lt_id.id;
0a7de745 1009 } else {
39037602 1010 *id = 0;
0a7de745 1011 }
39037602
A
1012
1013 ltable_realloc_elem(table, first, type);
1014
1015 return first;
1016}
1017
1018/**
1019 * lt_elem_list_release: free an entire list of reserved elements
1020 *
1021 * All elements in the list whose first member is 'head' will be released back
1022 * to 'table' as free elements. The 'type' parameter is used in development
1023 * kernels to assert that all elements on the list are of the given type.
1024 */
0a7de745
A
1025int
1026lt_elem_list_release(struct link_table *table, struct lt_elem *head,
1027 int __assert_only type)
39037602
A
1028{
1029 struct lt_elem *elem;
1030 struct ltable_id free_id;
1031 int nelem = 0;
1032
0a7de745 1033 if (!head) {
39037602 1034 return 0;
0a7de745 1035 }
39037602 1036
0a7de745 1037 for (elem = head;;) {
39037602
A
1038 assert(lt_elem_in_range(elem, table));
1039 assert(!lt_bits_valid(elem->lt_bits) && (lt_bits_refcnt(elem->lt_bits) == 1));
1040 assert(lt_bits_type(elem->lt_bits) == type);
1041
1042 nelem++;
1043 elem->lt_bits = 0;
0a7de745 1044 if (table->poison) {
39037602 1045 (table->poison)(table, elem);
0a7de745 1046 }
39037602 1047
0a7de745 1048 if (elem->lt_next_idx == LT_IDX_MAX) {
39037602 1049 break;
0a7de745 1050 }
39037602
A
1051 assert(elem->lt_next_idx < table->nelem);
1052 elem = lt_elem_idx(table, elem->lt_next_idx);
1053 }
1054
1055 /*
1056 * 'elem' now points to the end of our list, and 'head' points to the
1057 * beginning. We want to atomically swap the free list pointer with
1058 * the 'head' and ensure that 'elem' points to the previous free list
1059 * head.
1060 */
1061
1062again:
1063 free_id = table->free_list;
0a7de745 1064 if (free_id.idx >= table->nelem) {
39037602 1065 elem->lt_next_idx = LT_IDX_MAX;
0a7de745 1066 } else {
39037602 1067 elem->lt_next_idx = free_id.idx;
0a7de745 1068 }
39037602
A
1069
1070 /* store barrier */
1071 OSMemoryBarrier();
1072 if (OSCompareAndSwap64(free_id.id, head->lt_id.id,
0a7de745 1073 &table->free_list.id) == FALSE) {
39037602 1074 goto again;
0a7de745 1075 }
39037602
A
1076
1077 OSAddAtomic(-nelem, &table->used_elem);
1078 return nelem;
1079}