]> git.saurik.com Git - apple/xnu.git/blob - osfmk/kern/ltable.c
xnu-3789.60.24.tar.gz
[apple/xnu.git] / osfmk / kern / ltable.c
1 /*
2 * Copyright (c) 2016 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <kern/cpu_data.h>
29 #include <kern/kern_types.h>
30 #include <kern/locks.h>
31 #include <kern/ltable.h>
32 #include <kern/zalloc.h>
33 #include <libkern/OSAtomic.h>
34 #include <pexpert/pexpert.h>
35 #include <vm/vm_kern.h>
36
37
38 #define P2ROUNDUP(x, align) (-(-((uint32_t)(x)) & -(align)))
39 #define ROUNDDOWN(x,y) (((x)/(y))*(y))
40
41 /* ----------------------------------------------------------------------
42 *
43 * Lockless Link Table Interface
44 *
45 * ---------------------------------------------------------------------- */
46
47 vm_size_t g_lt_max_tbl_size;
48 static lck_grp_t g_lt_lck_grp;
49
50 /* default VA space for link tables (zone allocated) */
51 #define DEFAULT_MAX_TABLE_SIZE P2ROUNDUP(8 * 1024 * 1024, PAGE_SIZE)
52
53 #if defined(DEVELOPMENT) || defined(DEBUG)
54 /* global for lldb macros */
55 uint64_t g_lt_idx_max = LT_IDX_MAX;
56 #endif
57
58
59 /* construct a link table element from an offset and mask into a slab */
60 #define lt_elem_ofst_slab(slab, slab_msk, ofst) \
61 /* cast through 'void *' to avoid compiler alignment warning messages */ \
62 ((struct lt_elem *)((void *)((uintptr_t)(slab) + ((ofst) & (slab_msk)))))
63
64 #if defined(CONFIG_LTABLE_STATS)
65 /* version that makes no assumption on waste within a slab */
66 static inline struct lt_elem *
67 lt_elem_idx(struct link_table *table, uint32_t idx)
68 {
69 int slab_idx = idx / table->slab_elem;
70 struct lt_elem *slab = table->table[slab_idx];
71 if (!slab)
72 panic("Invalid index:%d slab:%d (NULL) for table:%p\n",
73 idx, slab_idx, table);
74 assert(slab->lt_id.idx <= idx && (slab->lt_id.idx + table->slab_elem) > idx);
75 return lt_elem_ofst_slab(slab, table->slab_msk, (idx - slab->lt_id.idx) * table->elem_sz);
76 }
77 #else /* !CONFIG_LTABLE_STATS */
78 /* verion that assumes 100% ultilization of slabs (no waste) */
79 static inline struct lt_elem *
80 lt_elem_idx(struct link_table *table, uint32_t idx)
81 {
82 uint32_t ofst = idx * table->elem_sz;
83 struct lt_elem *slab = table->table[ofst >> table->slab_shift];
84 if (!slab)
85 panic("Invalid index:%d slab:%d (NULL) for table:%p\n",
86 idx, (ofst >> table->slab_shift), table);
87 assert(slab->lt_id.idx <= idx && (slab->lt_id.idx + table->slab_elem) > idx);
88 return lt_elem_ofst_slab(slab, table->slab_msk, ofst);
89 }
90 #endif /* !CONFIG_LTABLE_STATS */
91
92 static int __assert_only
93 lt_elem_in_range(struct lt_elem *elem, struct link_table *table)
94 {
95 struct lt_elem **base = table->table;
96 uintptr_t e = (uintptr_t)elem;
97 assert(base != NULL);
98 while (*base != NULL) {
99 uintptr_t b = (uintptr_t)(*base);
100 if (e >= b && e < b + table->slab_sz)
101 return 1;
102 base++;
103 if ((uintptr_t)base >= (uintptr_t)table->table + PAGE_SIZE)
104 return 0;
105 }
106 return 0;
107 }
108
109
110 /**
111 * lt_elem_invalidate: mark 'elem' as invalid
112 *
113 * NOTE: this does _not_ get or put a reference on 'elem'
114 */
115 void lt_elem_invalidate(struct lt_elem *elem)
116 {
117 uint32_t __assert_only old = OSBitAndAtomic(~LT_BITS_VALID, &elem->lt_bits);
118 OSMemoryBarrier();
119 assert(((lt_bits_type(old) != LT_RESERVED) && (old & LT_BITS_VALID)) ||
120 ((lt_bits_type(old) == LT_RESERVED) && !(old & LT_BITS_VALID)));
121 }
122
123 /**
124 * lt_elem_mkvalid: mark 'elem' as valid
125 *
126 * NOTE: this does _not_ get or put a reference on 'elem'
127 */
128 void lt_elem_mkvalid(struct lt_elem *elem)
129 {
130 uint32_t __assert_only old = OSBitOrAtomic(LT_BITS_VALID, &elem->lt_bits);
131 OSMemoryBarrier();
132 assert(!(old & LT_BITS_VALID));
133 }
134
135 static void lt_elem_set_type(struct lt_elem *elem, int type)
136 {
137 uint32_t old_bits, new_bits;
138 do {
139 old_bits = elem->lt_bits;
140 new_bits = (old_bits & ~LT_BITS_TYPE) |
141 ((type & LT_BITS_TYPE_MASK) << LT_BITS_TYPE_SHIFT);
142 } while (OSCompareAndSwap(old_bits, new_bits, &elem->lt_bits) == FALSE);
143 OSMemoryBarrier();
144 }
145
146
147 /**
148 * ltable_bootstrap: bootstrap a link table
149 *
150 * Called once at system boot
151 */
152 void ltable_bootstrap(void)
153 {
154 static int s_is_bootstrapped = 0;
155
156 uint32_t tmp32 = 0;
157
158 if (s_is_bootstrapped)
159 return;
160 s_is_bootstrapped = 1;
161
162 g_lt_max_tbl_size = DEFAULT_MAX_TABLE_SIZE;
163 if (PE_parse_boot_argn("lt_tbl_size", &tmp32, sizeof(tmp32)) == TRUE)
164 g_lt_max_tbl_size = (vm_size_t)P2ROUNDUP(tmp32, PAGE_SIZE);
165
166 lck_grp_init(&g_lt_lck_grp, "link_table_locks", LCK_GRP_ATTR_NULL);
167 }
168
169 /**
170 * ltable_init: initialize a link table with given parameters
171 *
172 */
173 void ltable_init(struct link_table *table, const char *name,
174 uint32_t max_tbl_elem, uint32_t elem_sz,
175 ltable_poison_func poison)
176 {
177 kern_return_t kr;
178 uint32_t slab_sz, slab_shift, slab_msk, slab_elem;
179 zone_t slab_zone;
180 size_t max_tbl_sz;
181 struct lt_elem *e, **base;
182
183 #ifndef CONFIG_LTABLE_STATS
184 /* the element size _must_ be a power of two! */
185 if ((elem_sz & (elem_sz - 1)) != 0)
186 panic("elem_sz:%d for table:'%s' must be a power of two!",
187 elem_sz, name);
188 #endif
189
190 /*
191 * First, allocate a single page of memory to act as the base
192 * for the table's element slabs
193 */
194 kr = kernel_memory_allocate(kernel_map, (vm_offset_t *)&base,
195 PAGE_SIZE, 0, KMA_NOPAGEWAIT, VM_KERN_MEMORY_LTABLE);
196 if (kr != KERN_SUCCESS)
197 panic("Cannot initialize %s table: "
198 "kernel_memory_allocate failed:%d\n", name, kr);
199 memset(base, 0, PAGE_SIZE);
200
201 /*
202 * Based on the maximum table size, calculate the slab size:
203 * we allocate 1 page of slab pointers for the table, and we need to
204 * index elements of 'elem_sz', this gives us the slab size based on
205 * the maximum size the table should grow.
206 */
207 max_tbl_sz = (max_tbl_elem * elem_sz);
208 max_tbl_sz = P2ROUNDUP(max_tbl_sz, PAGE_SIZE);
209
210 /* system maximum table size divided by number of slots in a page */
211 slab_sz = (uint32_t)(max_tbl_sz / (PAGE_SIZE / (sizeof(void *))));
212 if (slab_sz < PAGE_SIZE)
213 slab_sz = PAGE_SIZE;
214
215 /* make sure the slab size is a power of two */
216 slab_shift = 0;
217 slab_msk = ~0;
218 for (uint32_t i = 0; i < 31; i++) {
219 uint32_t bit = (1 << i);
220 if ((slab_sz & bit) == slab_sz) {
221 slab_shift = i;
222 slab_msk = 0;
223 for (uint32_t j = 0; j < i; j++)
224 slab_msk |= (1 << j);
225 break;
226 }
227 slab_sz &= ~bit;
228 }
229 slab_elem = slab_sz / elem_sz;
230
231 /* initialize the table's slab zone (for table growth) */
232 ltdbg("Initializing %s zone: slab:%d (%d,0x%x) max:%ld",
233 name, slab_sz, slab_shift, slab_msk, max_tbl_sz);
234 slab_zone = zinit(slab_sz, max_tbl_sz, slab_sz, name);
235 assert(slab_zone != ZONE_NULL);
236
237 /* allocate the first slab and populate it */
238 base[0] = (struct lt_elem *)zalloc(slab_zone);
239 if (base[0] == NULL)
240 panic("Can't allocate a %s table slab from zone:%p",
241 name, slab_zone);
242
243 memset(base[0], 0, slab_sz);
244
245 /* setup the initial freelist */
246 ltdbg("initializing %d links (%d bytes each)...", slab_elem, elem_sz);
247 for (unsigned l = 0; l < slab_elem; l++) {
248 e = lt_elem_ofst_slab(base[0], slab_msk, l * elem_sz);
249 e->lt_id.idx = l;
250 /*
251 * setting generation to 0 ensures that a setid of 0 is
252 * invalid because the generation will be incremented before
253 * each element's allocation.
254 */
255 e->lt_id.generation = 0;
256 e->lt_next_idx = l + 1;
257 }
258
259 /* make sure the last free element points to a never-valid idx */
260 e = lt_elem_ofst_slab(base[0], slab_msk, (slab_elem - 1) * elem_sz);
261 e->lt_next_idx = LT_IDX_MAX;
262
263 lck_mtx_init(&table->lock, &g_lt_lck_grp, LCK_ATTR_NULL);
264
265 table->slab_sz = slab_sz;
266 table->slab_shift = slab_shift;
267 table->slab_msk = slab_msk;
268 table->slab_elem = slab_elem;
269 table->slab_zone = slab_zone;
270
271 table->elem_sz = elem_sz;
272 table->nelem = slab_elem;
273 table->used_elem = 0;
274 table->elem_sz = elem_sz;
275 table->poison = poison;
276
277 table->table = base;
278 table->next_free_slab = &base[1];
279 table->free_list.id = base[0]->lt_id.id;
280
281 #if CONFIG_LTABLE_STATS
282 table->nslabs = 1;
283 table->nallocs = 0;
284 table->nreallocs = 0;
285 table->npreposts = 0;
286 table->nreservations = 0;
287 table->nreserved_releases = 0;
288
289 table->max_used = 0;
290 table->avg_used = 0;
291 table->max_reservations = 0;
292 table->avg_reservations = 0;
293 #endif
294 }
295
296
297 /**
298 * ltable_grow: grow a link table by adding another 'slab' of table elements
299 *
300 * Conditions:
301 * table mutex is unlocked
302 * calling thread can block
303 */
304 void ltable_grow(struct link_table *table, uint32_t min_free)
305 {
306 struct lt_elem *slab, **slot;
307 struct lt_elem *e = NULL, *first_new_elem, *last_new_elem;
308 struct ltable_id free_id;
309 uint32_t free_elem;
310
311 assert(get_preemption_level() == 0);
312 assert(table && table->slab_zone);
313
314 lck_mtx_lock(&table->lock);
315
316 free_elem = table->nelem - table->used_elem;
317
318 /*
319 * If the caller just wanted to ensure a minimum number of elements,
320 * do that (and don't just blindly grow the table). Also, don't grow
321 * the table unnecessarily - we could have been beaten by a higher
322 * priority thread who acquired the lock and grew the table before we
323 * got here.
324 */
325 if (free_elem > min_free) {
326 lck_mtx_unlock(&table->lock);
327 return;
328 }
329
330 /* we are now committed to table growth */
331 ltdbg_v("BEGIN");
332
333 if (table->next_free_slab == NULL) {
334 /*
335 * before we panic, check one more time to see if any other
336 * threads have free'd from space in the table.
337 */
338 if ((table->nelem - table->used_elem) > 0) {
339 /* there's at least 1 free element: don't panic yet */
340 lck_mtx_unlock(&table->lock);
341 return;
342 }
343 panic("No more room to grow table: %p (nelem: %d, used: %d)",
344 table, table->nelem, table->used_elem);
345 }
346 slot = table->next_free_slab;
347 table->next_free_slab++;
348 if ((uintptr_t)table->next_free_slab >= (uintptr_t)table->table + PAGE_SIZE)
349 table->next_free_slab = NULL;
350
351 assert(*slot == NULL);
352
353 /* allocate another slab */
354 slab = (struct lt_elem *)zalloc(table->slab_zone);
355 if (slab == NULL)
356 panic("Can't allocate a %s table (%p) slab from zone:%p",
357 table->slab_zone->zone_name, table, table->slab_zone);
358
359 memset(slab, 0, table->slab_sz);
360
361 /* put the new elements into a freelist */
362 ltdbg_v(" init %d new links...", table->slab_elem);
363 for (unsigned l = 0; l < table->slab_elem; l++) {
364 uint32_t idx = l + table->nelem;
365 if (idx >= (LT_IDX_MAX - 1))
366 break; /* the last element of the last slab */
367 e = lt_elem_ofst_slab(slab, table->slab_msk, l * table->elem_sz);
368 e->lt_id.idx = idx;
369 e->lt_next_idx = idx + 1;
370 }
371 last_new_elem = e;
372 assert(last_new_elem != NULL);
373
374 first_new_elem = lt_elem_ofst_slab(slab, table->slab_msk, 0);
375
376 /* update table book keeping, and atomically swap the freelist head */
377 *slot = slab;
378 if (table->nelem + table->slab_elem >= LT_IDX_MAX)
379 table->nelem = LT_IDX_MAX - 1;
380 else
381 table->nelem += table->slab_elem;
382
383 #if CONFIG_LTABLE_STATS
384 table->nslabs += 1;
385 #endif
386
387 /*
388 * The atomic swap of the free list head marks the end of table
389 * growth. Incoming requests may now use the newly allocated slab
390 * of table elements
391 */
392 free_id = table->free_list;
393 /* connect the existing free list to the end of the new free list */
394 last_new_elem->lt_next_idx = free_id.idx;
395 while (OSCompareAndSwap64(free_id.id, first_new_elem->lt_id.id,
396 &table->free_list.id) == FALSE) {
397 OSMemoryBarrier();
398 free_id = table->free_list;
399 last_new_elem->lt_next_idx = free_id.idx;
400 }
401 OSMemoryBarrier();
402
403 lck_mtx_unlock(&table->lock);
404
405 return;
406 }
407
408
409 /**
410 * ltable_alloc_elem: allocate one or more elements from a given table
411 *
412 * The returned element(s) will be of type 'type', but will remain invalid.
413 *
414 * If the caller has disabled preemption, then this function may (rarely) spin
415 * waiting either for another thread to either release 'nelem' table elements,
416 * or grow the table.
417 *
418 * If the caller can block, then this function may (rarely) block while
419 * the table grows to meet the demand for 'nelem' element(s).
420 */
421 __attribute__((noinline))
422 struct lt_elem *ltable_alloc_elem(struct link_table *table, int type,
423 int nelem, int nattempts)
424 {
425 int nspins = 0, ntries = 0, nalloc = 0;
426 uint32_t table_size;
427 struct lt_elem *elem = NULL;
428 struct ltable_id free_id, next_id;
429
430 static const int max_retries = 500;
431
432 if (type != LT_ELEM && type != LT_LINK && type != LT_RESERVED)
433 panic("link_table_aloc of invalid elem type:%d from table @%p",
434 type, table);
435
436 assert(nelem > 0);
437
438 /*
439 * If the callers only wants to try a certain number of times, make it
440 * look like we've already made (MAX - nattempts) tries at allocation
441 */
442 if (nattempts > 0 && nattempts <= max_retries) {
443 ntries = max_retries - nattempts;
444 }
445
446 try_again:
447 elem = NULL;
448 if (ntries++ > max_retries) {
449 struct lt_elem *tmp;
450 if (nattempts > 0) {
451 /*
452 * The caller specified a particular number of
453 * attempts before failure, so it's expected that
454 * they're prepared to handle a NULL return.
455 */
456 return NULL;
457 }
458
459 if (table->used_elem + nelem >= table_size)
460 panic("No more room to grow table: 0x%p size:%d, used:%d, requested elem:%d",
461 table, table_size, table->used_elem, nelem);
462 if (nelem == 1)
463 panic("Too many alloc retries: %d, table:%p, type:%d, nelem:%d",
464 ntries, table, type, nelem);
465 /* don't panic: try allocating one-at-a-time */
466 while (nelem > 0) {
467 tmp = ltable_alloc_elem(table, type, 1, nattempts);
468 if (elem)
469 lt_elem_list_link(table, tmp, elem);
470 elem = tmp;
471 --nelem;
472 }
473 assert(elem != NULL);
474 return elem;
475 }
476
477 nalloc = 0;
478 table_size = table->nelem;
479
480 if (table->used_elem + nelem >= table_size) {
481 if (get_preemption_level() != 0) {
482 #if CONFIG_LTABLE_STATS
483 table->nspins += 1;
484 #endif
485 /*
486 * We may have just raced with table growth: check
487 * again to make sure there really isn't any space.
488 */
489 if (++nspins > 4)
490 panic("Can't grow table %p with preemption"
491 " disabled!", table);
492 delay(1);
493 goto try_again;
494 }
495 ltable_grow(table, nelem);
496 goto try_again;
497 }
498
499 /* read this value only once before the CAS */
500 free_id = table->free_list;
501 if (free_id.idx >= table_size)
502 goto try_again;
503
504 /*
505 * Find the item on the free list which will become the new free list
506 * head, but be careful not to modify any memory (read only)! Other
507 * threads can alter table state at any time up until the CAS. We
508 * don't modify any memory until we've successfully swapped out the
509 * free list head with the one we've investigated.
510 */
511 for (struct lt_elem *next_elem = lt_elem_idx(table, free_id.idx);
512 nalloc < nelem;
513 nalloc++) {
514 elem = next_elem;
515 next_id.generation = 0;
516 next_id.idx = next_elem->lt_next_idx;
517 if (next_id.idx < table->nelem) {
518 next_elem = lt_elem_idx(table, next_id.idx);
519 next_id.id = next_elem->lt_id.id;
520 } else {
521 goto try_again;
522 }
523 }
524 /* 'elem' points to the last element being allocated */
525
526 if (OSCompareAndSwap64(free_id.id, next_id.id,
527 &table->free_list.id) == FALSE)
528 goto try_again;
529
530 /* load barrier */
531 OSMemoryBarrier();
532
533 /*
534 * After the CAS, we know that we own free_id, and it points to a
535 * valid table entry (checked above). Grab the table pointer and
536 * reset some values.
537 */
538 OSAddAtomic(nelem, &table->used_elem);
539
540 /* end the list of allocated elements */
541 elem->lt_next_idx = LT_IDX_MAX;
542 /* reset 'elem' to point to the first allocated element */
543 elem = lt_elem_idx(table, free_id.idx);
544
545 /*
546 * Update the generation count, and return the element(s)
547 * with a single reference (and no valid bit). If the
548 * caller immediately calls _put() on any element, then
549 * it will be released back to the free list. If the caller
550 * subsequently marks the element as valid, then the put
551 * will simply drop the reference.
552 */
553 for (struct lt_elem *tmp = elem; ; ) {
554 assert(!lt_bits_valid(tmp->lt_bits) &&
555 (lt_bits_refcnt(tmp->lt_bits) == 0));
556 --nalloc;
557 tmp->lt_id.generation += 1;
558 tmp->lt_bits = 1;
559 lt_elem_set_type(tmp, type);
560 if (tmp->lt_next_idx == LT_IDX_MAX)
561 break;
562 assert(tmp->lt_next_idx != LT_IDX_MAX);
563 tmp = lt_elem_idx(table, tmp->lt_next_idx);
564 }
565 assert(nalloc == 0);
566
567 #if CONFIG_LTABLE_STATS
568 uint64_t nreservations;
569 table->nallocs += nelem;
570 if (type == LT_RESERVED)
571 OSIncrementAtomic64(&table->nreservations);
572 nreservations = table->nreservations;
573 if (table->used_elem > table->max_used)
574 table->max_used = table->used_elem;
575 if (nreservations > table->max_reservations)
576 table->max_reservations = nreservations;
577 table->avg_used = (table->avg_used + table->used_elem) / 2;
578 table->avg_reservations = (table->avg_reservations + nreservations) / 2;
579 #endif
580
581 return elem;
582 }
583
584
585 /**
586 * ltable_realloc_elem: convert a reserved element to a particular type
587 *
588 * This funciton is used to convert reserved elements (not yet marked valid)
589 * to the given 'type'. The generation of 'elem' is incremented, the element
590 * is disconnected from any list to which it belongs, and its type is set to
591 * 'type'.
592 */
593 void ltable_realloc_elem(struct link_table *table, struct lt_elem *elem, int type)
594 {
595 (void)table;
596 assert(lt_elem_in_range(elem, table) &&
597 !lt_bits_valid(elem->lt_bits));
598
599 #if CONFIG_LTABLE_STATS
600 table->nreallocs += 1;
601 if (lt_bits_type(elem->lt_bits) == LT_RESERVED && type != LT_RESERVED) {
602 /*
603 * This isn't under any lock, so we'll clamp it.
604 * the stats are meant to be informative, not perfectly
605 * accurate
606 */
607 OSDecrementAtomic64(&table->nreservations);
608 }
609 table->avg_reservations = (table->avg_reservations + table->nreservations) / 2;
610 #endif
611
612 /*
613 * Return the same element with a new generation count, and a
614 * (potentially) new type. Don't touch the refcount: the caller
615 * is responsible for getting that (and the valid bit) correct.
616 */
617 elem->lt_id.generation += 1;
618 elem->lt_next_idx = LT_IDX_MAX;
619 lt_elem_set_type(elem, type);
620
621 return;
622 }
623
624
625 /**
626 * ltable_free_elem: release an element back to a link table
627 *
628 * Do not call this function directly: use ltable_[get|put]_elem!
629 *
630 * Conditions:
631 * 'elem' was originally allocated from 'table'
632 * 'elem' is _not_ marked valid
633 * 'elem' has a reference count of 0
634 */
635 static void ltable_free_elem(struct link_table *table, struct lt_elem *elem)
636 {
637 struct ltable_id next_id;
638
639 assert(lt_elem_in_range(elem, table) &&
640 !lt_bits_valid(elem->lt_bits) &&
641 (lt_bits_refcnt(elem->lt_bits) == 0));
642
643 OSDecrementAtomic(&table->used_elem);
644
645 #if CONFIG_LTABLE_STATS
646 table->avg_used = (table->avg_used + table->used_elem) / 2;
647 if (lt_bits_type(elem->lt_bits) == LT_RESERVED)
648 OSDecrementAtomic64(&table->nreservations);
649 table->avg_reservations = (table->avg_reservations + table->nreservations) / 2;
650 #endif
651
652 elem->lt_bits = 0;
653
654 if (table->poison)
655 (table->poison)(table, elem);
656
657 again:
658 next_id = table->free_list;
659 if (next_id.idx >= table->nelem)
660 elem->lt_next_idx = LT_IDX_MAX;
661 else
662 elem->lt_next_idx = next_id.idx;
663
664 /* store barrier */
665 OSMemoryBarrier();
666 if (OSCompareAndSwap64(next_id.id, elem->lt_id.id,
667 &table->free_list.id) == FALSE)
668 goto again;
669 }
670
671
672 /**
673 * ltable_get_elem: get a reference to a table element identified by 'id'
674 *
675 * Returns a reference to the table element associated with the given 'id', or
676 * NULL if the 'id' was invalid or does not exist in 'table'. The caller is
677 * responsible to release the reference using ltable_put_elem().
678 *
679 * NOTE: if the table element pointed to by 'id' is marked as invalid,
680 * this function will return NULL.
681 */
682 struct lt_elem *ltable_get_elem(struct link_table *table, uint64_t id)
683 {
684 struct lt_elem *elem;
685 uint32_t idx, bits, new_bits;
686
687 /*
688 * Here we have a reference to the table which is guaranteed to remain
689 * valid until we drop the reference
690 */
691
692 idx = ((struct ltable_id *)&id)->idx;
693
694 if (idx >= table->nelem)
695 panic("id:0x%llx : idx:%d > %d", id, idx, table->nelem);
696
697 elem = lt_elem_idx(table, idx);
698
699 /* verify the validity by taking a reference on the table object */
700 bits = elem->lt_bits;
701 if (!lt_bits_valid(bits))
702 return NULL;
703
704 /*
705 * do a pre-verify on the element ID to potentially
706 * avoid 2 compare-and-swaps
707 */
708 if (elem->lt_id.id != id)
709 return NULL;
710
711 new_bits = bits + 1;
712
713 /* check for overflow */
714 assert(lt_bits_refcnt(new_bits) > 0);
715
716 while (OSCompareAndSwap(bits, new_bits, &elem->lt_bits) == FALSE) {
717 /*
718 * either the element became invalid,
719 * or someone else grabbed/removed a reference.
720 */
721 bits = elem->lt_bits;
722 if (!lt_bits_valid(bits)) {
723 /* don't return invalid elements */
724 return NULL;
725 }
726 new_bits = bits + 1;
727 assert(lt_bits_refcnt(new_bits) > 0);
728 }
729
730 /* load barrier */
731 OSMemoryBarrier();
732
733 /* check to see that our reference is to the same generation! */
734 if (elem->lt_id.id != id) {
735 /*
736 ltdbg("ID:0x%llx table generation (%d) != %d",
737 id, elem->lt_id.generation,
738 ((struct ltable_id *)&id)->generation);
739 */
740 ltable_put_elem(table, elem);
741 return NULL;
742 }
743
744 /* We now have a reference on a valid object */
745 return elem;
746 }
747
748 /**
749 * ltable_put_elem: release a reference to table element
750 *
751 * This function releases a reference taken on a table element via
752 * ltable_get_elem(). This function will release the element back to 'table'
753 * when the reference count goes to 0 AND the element has been marked as
754 * invalid.
755 */
756 void ltable_put_elem(struct link_table *table, struct lt_elem *elem)
757 {
758 uint32_t bits, new_bits;
759
760 assert(lt_elem_in_range(elem, table));
761
762 bits = elem->lt_bits;
763 new_bits = bits - 1;
764
765 /* check for underflow */
766 assert(lt_bits_refcnt(new_bits) < LT_BITS_REFCNT_MASK);
767
768 while (OSCompareAndSwap(bits, new_bits, &elem->lt_bits) == FALSE) {
769 bits = elem->lt_bits;
770 new_bits = bits - 1;
771 /* catch underflow */
772 assert(lt_bits_refcnt(new_bits) < LT_BITS_REFCNT_MASK);
773 }
774
775 /* load barrier */
776 OSMemoryBarrier();
777
778 /*
779 * if this was the last reference, and it was marked as invalid,
780 * then we can add this link object back to the free list
781 */
782 if (!lt_bits_valid(new_bits) && (lt_bits_refcnt(new_bits) == 0))
783 ltable_free_elem(table, elem);
784
785 return;
786 }
787
788
789 /* - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
790 *
791 * API: lt_elem_list_...
792 *
793 * Reuse the free list linkage member, 'lt_next_idx' of a table element
794 * in a slightly more generic singly-linked list. All members of this
795 * list have been allocated from a table, but have not been made valid.
796 *
797 * - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -*/
798
799 /**
800 * lt_elem_list_link: link a child onto a parent
801 *
802 * Note that if 'parent' is the head of a list, this function will follow that
803 * list and attach 'child' to the end of it. In the simplest case, this
804 * results in: parent->child
805 * however this could also result in: parent->...->child
806 */
807 int lt_elem_list_link(struct link_table *table, struct lt_elem *parent, struct lt_elem *child)
808 {
809 int nelem = 1;
810
811 assert(lt_elem_in_range(parent, table));
812
813 /* find the end of the parent's list */
814 while (parent->lt_next_idx != LT_IDX_MAX) {
815 assert(parent->lt_next_idx < table->nelem);
816 parent = lt_elem_idx(table, parent->lt_next_idx);
817 nelem++;
818 }
819
820 if (child) {
821 assert(lt_elem_in_range(child, table));
822 parent->lt_next_idx = child->lt_id.idx;
823 }
824
825 return nelem;
826 }
827
828
829 /**
830 * lt_elem_list_first: obtain a pointer to the first element of a list.
831 *
832 * This function converts the head of a singly-linked list, 'id', into a real
833 * lt_elem object and returns a pointer to the object.
834 *
835 * It does _not_ take an extra reference on the object: the list implicitly
836 * holds that reference.
837 */
838 struct lt_elem *lt_elem_list_first(struct link_table *table, uint64_t id)
839 {
840 uint32_t idx;
841 struct lt_elem *elem = NULL;
842
843 if (id == 0)
844 return NULL;
845
846 idx = ((struct ltable_id *)&id)->idx;
847
848 if (idx > table->nelem)
849 panic("Invalid element for id:0x%llx", id);
850 elem = lt_elem_idx(table, idx);
851
852 /* invalid element: reserved ID was probably already reallocated */
853 if (elem->lt_id.id != id)
854 return NULL;
855
856 /* the returned element should _not_ be marked valid! */
857 if (lt_bits_valid(elem->lt_bits) ||
858 lt_bits_type(elem->lt_bits) != LT_RESERVED ||
859 lt_bits_refcnt(elem->lt_bits) != 1) {
860 panic("Valid/unreserved element %p (0x%x) in reserved list",
861 elem, elem->lt_bits);
862 }
863
864 return elem;
865 }
866
867
868 /**
869 * lt_elem_list_next: return the item subsequent to 'elem' in a list
870 *
871 * Note that this will return NULL if 'elem' is actually the end of the list.
872 */
873 struct lt_elem *lt_elem_list_next(struct link_table *table, struct lt_elem *head)
874 {
875 struct lt_elem *elem;
876
877 if (!head)
878 return NULL;
879 if (head->lt_next_idx >= table->nelem)
880 return NULL;
881
882 elem = lt_elem_idx(table, head->lt_next_idx);
883 assert(lt_elem_in_range(elem, table));
884
885 return elem;
886 }
887
888
889 /**
890 * lt_elem_list_break: break a list in two around 'elem'
891 *
892 * This function will reset the next_idx field of 'elem' (making it the end of
893 * the list), and return the element subsequent to 'elem' in the list
894 * (which could be NULL)
895 */
896 struct lt_elem *lt_elem_list_break(struct link_table *table, struct lt_elem *elem)
897 {
898 struct lt_elem *next;
899
900 if (!elem)
901 return NULL;
902 next = lt_elem_list_next(table, elem);
903 elem->lt_next_idx = LT_IDX_MAX;
904
905 return next;
906 }
907
908
909 /**
910 * lt_elem_list_pop: pop an item off the head of a list
911 *
912 * The list head is pointed to by '*id', the element corresponding to '*id' is
913 * returned by this function, and the new list head is returned in the in/out
914 * parameter, '*id'. The caller is responsible for the reference on the
915 * returned object. A realloc is done to reset the type of the object, but it
916 * is still left invalid.
917 */
918 struct lt_elem *lt_elem_list_pop(struct link_table *table, uint64_t *id, int type)
919 {
920 struct lt_elem *first, *next;
921
922 if (!id || *id == 0)
923 return NULL;
924
925 /* pop an item off the reserved stack */
926
927 first = lt_elem_list_first(table, *id);
928 if (!first) {
929 *id = 0;
930 return NULL;
931 }
932
933 next = lt_elem_list_next(table, first);
934 if (next)
935 *id = next->lt_id.id;
936 else
937 *id = 0;
938
939 ltable_realloc_elem(table, first, type);
940
941 return first;
942 }
943
944 /**
945 * lt_elem_list_release: free an entire list of reserved elements
946 *
947 * All elements in the list whose first member is 'head' will be released back
948 * to 'table' as free elements. The 'type' parameter is used in development
949 * kernels to assert that all elements on the list are of the given type.
950 */
951 int lt_elem_list_release(struct link_table *table, struct lt_elem *head,
952 int __assert_only type)
953 {
954 struct lt_elem *elem;
955 struct ltable_id free_id;
956 int nelem = 0;
957
958 if (!head)
959 return 0;
960
961 for (elem = head; ; ) {
962 assert(lt_elem_in_range(elem, table));
963 assert(!lt_bits_valid(elem->lt_bits) && (lt_bits_refcnt(elem->lt_bits) == 1));
964 assert(lt_bits_type(elem->lt_bits) == type);
965
966 nelem++;
967 elem->lt_bits = 0;
968 if (table->poison)
969 (table->poison)(table, elem);
970
971 if (elem->lt_next_idx == LT_IDX_MAX)
972 break;
973 assert(elem->lt_next_idx < table->nelem);
974 elem = lt_elem_idx(table, elem->lt_next_idx);
975 }
976
977 /*
978 * 'elem' now points to the end of our list, and 'head' points to the
979 * beginning. We want to atomically swap the free list pointer with
980 * the 'head' and ensure that 'elem' points to the previous free list
981 * head.
982 */
983
984 again:
985 free_id = table->free_list;
986 if (free_id.idx >= table->nelem)
987 elem->lt_next_idx = LT_IDX_MAX;
988 else
989 elem->lt_next_idx = free_id.idx;
990
991 /* store barrier */
992 OSMemoryBarrier();
993 if (OSCompareAndSwap64(free_id.id, head->lt_id.id,
994 &table->free_list.id) == FALSE)
995 goto again;
996
997 OSAddAtomic(-nelem, &table->used_elem);
998 return nelem;
999 }