]> git.saurik.com Git - apple/xnu.git/blob - osfmk/i386/cpu_threads.c
xnu-1228.12.14.tar.gz
[apple/xnu.git] / osfmk / i386 / cpu_threads.c
1 /*
2 * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
3 *
4 * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
5 *
6 * This file contains Original Code and/or Modifications of Original Code
7 * as defined in and that are subject to the Apple Public Source License
8 * Version 2.0 (the 'License'). You may not use this file except in
9 * compliance with the License. The rights granted to you under the License
10 * may not be used to create, or enable the creation or redistribution of,
11 * unlawful or unlicensed copies of an Apple operating system, or to
12 * circumvent, violate, or enable the circumvention or violation of, any
13 * terms of an Apple operating system software license agreement.
14 *
15 * Please obtain a copy of the License at
16 * http://www.opensource.apple.com/apsl/ and read it before using this file.
17 *
18 * The Original Code and all software distributed under the License are
19 * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
20 * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
21 * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
22 * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
23 * Please see the License for the specific language governing rights and
24 * limitations under the License.
25 *
26 * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
27 */
28 #include <vm/vm_kern.h>
29 #include <kern/kalloc.h>
30 #include <mach/machine.h>
31 #include <i386/cpu_threads.h>
32 #include <i386/cpuid.h>
33 #include <i386/machine_cpu.h>
34 #include <i386/lock.h>
35 #include <i386/perfmon.h>
36 #include <i386/pmCPU.h>
37
38 //#define TOPO_DEBUG 1
39 #if TOPO_DEBUG
40 void debug_topology_print(void);
41 #define DBG(x...) kprintf("DBG: " x)
42 #else
43 #define DBG(x...)
44 #endif /* TOPO_DEBUG */
45
46 void validate_topology(void);
47
48 #define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
49 #define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l)
50
51 x86_pkg_t *x86_pkgs = NULL;
52 uint32_t num_Lx_caches[MAX_CACHE_DEPTH] = { 0 };
53
54 static x86_pkg_t *free_pkgs = NULL;
55 static x86_die_t *free_dies = NULL;
56 static x86_core_t *free_cores = NULL;
57 static uint32_t num_dies = 0;
58
59 static x86_cpu_cache_t *x86_caches = NULL;
60 static uint32_t num_caches = 0;
61
62 static boolean_t topoParmsInited = FALSE;
63 x86_topology_parameters_t topoParms;
64
65 decl_simple_lock_data(, x86_topo_lock);
66
67 static boolean_t
68 cpu_is_hyperthreaded(void)
69 {
70 i386_cpu_info_t *cpuinfo;
71
72 cpuinfo = cpuid_info();
73 return(cpuinfo->thread_count > cpuinfo->core_count);
74 }
75
76 static x86_cpu_cache_t *
77 x86_cache_alloc(void)
78 {
79 x86_cpu_cache_t *cache;
80 int i;
81
82 if (x86_caches == NULL) {
83 cache = kalloc(sizeof(x86_cpu_cache_t) + (MAX_CPUS * sizeof(x86_lcpu_t *)));
84 if (cache == NULL)
85 return(NULL);
86 } else {
87 cache = x86_caches;
88 x86_caches = cache->next;
89 cache->next = NULL;
90 }
91
92 bzero(cache, sizeof(x86_cpu_cache_t));
93 cache->next = NULL;
94 cache->maxcpus = MAX_CPUS;
95 for (i = 0; i < cache->maxcpus; i += 1) {
96 cache->cpus[i] = NULL;
97 }
98
99 num_caches += 1;
100
101 return(cache);
102 }
103
104 static void
105 x86_LLC_info(void)
106 {
107 uint32_t index;
108 uint32_t cache_info[4];
109 uint32_t cache_level = 0;
110 uint32_t nCPUsSharing = 1;
111 i386_cpu_info_t *cpuinfo;
112
113 cpuinfo = cpuid_info();
114
115 do_cpuid(0, cache_info);
116
117 if (cache_info[eax] < 4) {
118 /*
119 * Processor does not support deterministic
120 * cache information. Set LLC sharing to 1, since
121 * we have no better information.
122 */
123 if (cpu_is_hyperthreaded()) {
124 topoParms.nCoresSharingLLC = 1;
125 topoParms.nLCPUsSharingLLC = 2;
126 topoParms.maxSharingLLC = 2;
127 } else {
128 topoParms.nCoresSharingLLC = 1;
129 topoParms.nLCPUsSharingLLC = 1;
130 topoParms.maxSharingLLC = 1;
131 }
132 return;
133 }
134
135 for (index = 0; ; index += 1) {
136 uint32_t this_level;
137
138 cache_info[eax] = 4;
139 cache_info[ecx] = index;
140 cache_info[ebx] = 0;
141 cache_info[edx] = 0;
142
143 cpuid(cache_info);
144
145 /*
146 * See if all levels have been queried.
147 */
148 if (bitfield(cache_info[eax], 4, 0) == 0)
149 break;
150
151 /*
152 * Get the current level.
153 */
154 this_level = bitfield(cache_info[eax], 7, 5);
155
156 /*
157 * Only worry about it if it's a deeper level than
158 * what we've seen before.
159 */
160 if (this_level > cache_level) {
161 cache_level = this_level;
162
163 /*
164 * Save the number of CPUs sharing this cache.
165 */
166 nCPUsSharing = bitfield(cache_info[eax], 25, 14) + 1;
167 }
168 }
169
170 /*
171 * Make the level of the LLC be 0 based.
172 */
173 topoParms.LLCDepth = cache_level - 1;
174
175 /*
176 * nCPUsSharing represents the *maximum* number of cores or
177 * logical CPUs sharing the cache.
178 */
179 topoParms.maxSharingLLC = nCPUsSharing;
180
181 topoParms.nCoresSharingLLC = nCPUsSharing;
182 topoParms.nLCPUsSharingLLC = nCPUsSharing;
183
184 /*
185 * nCPUsSharing may not be the number of *active* cores or
186 * threads that are sharing the cache.
187 */
188 if (nCPUsSharing > cpuinfo->core_count)
189 topoParms.nCoresSharingLLC = cpuinfo->core_count;
190 if (nCPUsSharing > cpuinfo->thread_count)
191 topoParms.nLCPUsSharingLLC = cpuinfo->thread_count;
192 }
193
194 static void
195 initTopoParms(void)
196 {
197 i386_cpu_info_t *cpuinfo;
198
199 cpuinfo = cpuid_info();
200
201 /*
202 * We need to start with getting the LLC information correct.
203 */
204 x86_LLC_info();
205
206 /*
207 * Compute the number of threads (logical CPUs) per core.
208 */
209 topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count;
210 topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package;
211
212 /*
213 * Compute the number of dies per package.
214 */
215 topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC;
216 topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
217
218 /*
219 * Compute the number of cores per die.
220 */
221 topoParms.nLCoresPerDie = topoParms.nCoresSharingLLC;
222 topoParms.nPCoresPerDie = (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
223
224 /*
225 * Compute the number of threads per die.
226 */
227 topoParms.nLThreadsPerDie = topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie;
228 topoParms.nPThreadsPerDie = topoParms.nPThreadsPerCore * topoParms.nPCoresPerDie;
229
230 /*
231 * Compute the number of cores per package.
232 */
233 topoParms.nLCoresPerPackage = topoParms.nLCoresPerDie * topoParms.nLDiesPerPackage;
234 topoParms.nPCoresPerPackage = topoParms.nPCoresPerDie * topoParms.nPDiesPerPackage;
235
236 /*
237 * Compute the number of threads per package.
238 */
239 topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage;
240 topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage;
241
242 DBG("\nLogical Topology Parameters:\n");
243 DBG("\tThreads per Core: %d\n", topoParms.nLThreadsPerCore);
244 DBG("\tCores per Die: %d\n", topoParms.nLCoresPerDie);
245 DBG("\tThreads per Die: %d\n", topoParms.nLThreadsPerDie);
246 DBG("\tDies per Package: %d\n", topoParms.nLDiesPerPackage);
247 DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage);
248 DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage);
249
250 DBG("\nPhysical Topology Parameters:\n");
251 DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore);
252 DBG("\tCores per Die: %d\n", topoParms.nPCoresPerDie);
253 DBG("\tThreads per Die: %d\n", topoParms.nPThreadsPerDie);
254 DBG("\tDies per Package: %d\n", topoParms.nPDiesPerPackage);
255 DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage);
256 DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage);
257
258 topoParmsInited = TRUE;
259 }
260
261 static void
262 x86_cache_free(x86_cpu_cache_t *cache)
263 {
264 num_caches -= 1;
265 if (cache->level > 0 && cache->level <= MAX_CACHE_DEPTH)
266 num_Lx_caches[cache->level - 1] -= 1;
267 cache->next = x86_caches;
268 x86_caches = cache;
269 }
270
271 /*
272 * This returns a list of cache structures that represent the
273 * caches for a CPU. Some of the structures may have to be
274 * "freed" if they are actually shared between CPUs.
275 */
276 static x86_cpu_cache_t *
277 x86_cache_list(void)
278 {
279 x86_cpu_cache_t *root = NULL;
280 x86_cpu_cache_t *cur = NULL;
281 x86_cpu_cache_t *last = NULL;
282 uint32_t index;
283 uint32_t cache_info[4];
284 uint32_t nsets;
285
286 do_cpuid(0, cache_info);
287
288 if (cache_info[eax] < 4) {
289 /*
290 * Processor does not support deterministic
291 * cache information. Don't report anything
292 */
293 return NULL;
294 }
295
296 for (index = 0; ; index += 1) {
297 cache_info[eax] = 4;
298 cache_info[ecx] = index;
299 cache_info[ebx] = 0;
300 cache_info[edx] = 0;
301
302 cpuid(cache_info);
303
304 /*
305 * See if all levels have been queried.
306 */
307 if (bitfield(cache_info[eax], 4, 0) == 0)
308 break;
309
310 cur = x86_cache_alloc();
311 if (cur == NULL) {
312 break;
313 }
314
315 cur->type = bitfield(cache_info[eax], 4, 0);
316 cur->level = bitfield(cache_info[eax], 7, 5);
317 cur->maxcpus = (bitfield(cache_info[eax], 25, 14) + 1);
318 cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1;
319 cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1;
320 cur->ways = bitfield(cache_info[ebx], 31, 22) + 1;
321 nsets = bitfield(cache_info[ecx], 31, 0) + 1;
322 cur->cache_size = cur->line_size * cur->ways * cur->partitions * nsets;
323
324 if (last == NULL) {
325 root = cur;
326 last = cur;
327 } else {
328 last->next = cur;
329 last = cur;
330 }
331
332 cur->nlcpus = 0;
333 num_Lx_caches[cur->level - 1] += 1;
334 }
335
336 return(root);
337 }
338
339 static x86_cpu_cache_t *
340 x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher)
341 {
342 x86_cpu_cache_t *cur_cache;
343
344 cur_cache = list;
345 while (cur_cache != NULL) {
346 if (cur_cache->maxcpus == matcher->maxcpus
347 && cur_cache->type == matcher->type
348 && cur_cache->level == matcher->level
349 && cur_cache->ways == matcher->ways
350 && cur_cache->partitions == matcher->partitions
351 && cur_cache->line_size == matcher->line_size
352 && cur_cache->cache_size == matcher->cache_size)
353 break;
354
355 cur_cache = cur_cache->next;
356 }
357
358 return(cur_cache);
359 }
360
361 static void
362 x86_lcpu_init(int cpu)
363 {
364 cpu_data_t *cpup;
365 x86_lcpu_t *lcpu;
366 int i;
367
368 cpup = cpu_datap(cpu);
369
370 lcpu = &cpup->lcpu;
371 lcpu->lcpu = lcpu;
372 lcpu->cpu = cpup;
373 lcpu->next_in_core = NULL;
374 lcpu->next_in_die = NULL;
375 lcpu->next_in_pkg = NULL;
376 lcpu->core = NULL;
377 lcpu->die = NULL;
378 lcpu->package = NULL;
379 lcpu->cpu_num = cpu;
380 lcpu->lnum = cpu;
381 lcpu->pnum = cpup->cpu_phys_number;
382 lcpu->state = LCPU_OFF;
383 for (i = 0; i < MAX_CACHE_DEPTH; i += 1)
384 lcpu->caches[i] = NULL;
385
386 lcpu->master = (lcpu->cpu_num == (unsigned int) master_cpu);
387 lcpu->primary = (lcpu->pnum % topoParms.nPThreadsPerPackage) == 0;
388 }
389
390 static x86_core_t *
391 x86_core_alloc(int cpu)
392 {
393 x86_core_t *core;
394 cpu_data_t *cpup;
395
396 cpup = cpu_datap(cpu);
397
398 simple_lock(&x86_topo_lock);
399 if (free_cores != NULL) {
400 core = free_cores;
401 free_cores = core->next_in_die;
402 core->next_in_die = NULL;
403 simple_unlock(&x86_topo_lock);
404 } else {
405 simple_unlock(&x86_topo_lock);
406 core = kalloc(sizeof(x86_core_t));
407 if (core == NULL)
408 panic("x86_core_alloc() kalloc of x86_core_t failed!\n");
409 }
410
411 bzero((void *) core, sizeof(x86_core_t));
412
413 core->pcore_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
414 core->lcore_num = core->pcore_num % topoParms.nPCoresPerPackage;
415
416 core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY
417 | X86CORE_FL_HALTED | X86CORE_FL_IDLE;
418
419 return(core);
420 }
421
422 static void
423 x86_core_free(x86_core_t *core)
424 {
425 simple_lock(&x86_topo_lock);
426 core->next_in_die = free_cores;
427 free_cores = core;
428 simple_unlock(&x86_topo_lock);
429 }
430
431 static x86_pkg_t *
432 x86_package_find(int cpu)
433 {
434 x86_pkg_t *pkg;
435 cpu_data_t *cpup;
436 uint32_t pkg_num;
437
438 cpup = cpu_datap(cpu);
439
440 pkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
441
442 pkg = x86_pkgs;
443 while (pkg != NULL) {
444 if (pkg->ppkg_num == pkg_num)
445 break;
446 pkg = pkg->next;
447 }
448
449 return(pkg);
450 }
451
452 static x86_die_t *
453 x86_die_find(int cpu)
454 {
455 x86_die_t *die;
456 x86_pkg_t *pkg;
457 cpu_data_t *cpup;
458 uint32_t die_num;
459
460 cpup = cpu_datap(cpu);
461
462 die_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
463
464 pkg = x86_package_find(cpu);
465 if (pkg == NULL)
466 return(NULL);
467
468 die = pkg->dies;
469 while (die != NULL) {
470 if (die->pdie_num == die_num)
471 break;
472 die = die->next_in_pkg;
473 }
474
475 return(die);
476 }
477
478 static x86_core_t *
479 x86_core_find(int cpu)
480 {
481 x86_core_t *core;
482 x86_die_t *die;
483 cpu_data_t *cpup;
484 uint32_t core_num;
485
486 cpup = cpu_datap(cpu);
487
488 core_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
489
490 die = x86_die_find(cpu);
491 if (die == NULL)
492 return(NULL);
493
494 core = die->cores;
495 while (core != NULL) {
496 if (core->pcore_num == core_num)
497 break;
498 core = core->next_in_die;
499 }
500
501 return(core);
502 }
503
504 void
505 x86_set_lcpu_numbers(x86_lcpu_t *lcpu)
506 {
507 lcpu->lnum = lcpu->cpu_num % topoParms.nLThreadsPerCore;
508 }
509
510 void
511 x86_set_core_numbers(x86_core_t *core, x86_lcpu_t *lcpu)
512 {
513 core->pcore_num = lcpu->cpu_num / topoParms.nLThreadsPerCore;
514 core->lcore_num = core->pcore_num % topoParms.nLCoresPerDie;
515 }
516
517 void
518 x86_set_die_numbers(x86_die_t *die, x86_lcpu_t *lcpu)
519 {
520 die->pdie_num = lcpu->cpu_num / (topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie);
521 die->ldie_num = die->pdie_num % topoParms.nLDiesPerPackage;
522 }
523
524 void
525 x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu)
526 {
527 pkg->ppkg_num = lcpu->cpu_num / topoParms.nLThreadsPerPackage;
528 pkg->lpkg_num = pkg->ppkg_num;
529 }
530
531 static x86_die_t *
532 x86_die_alloc(int cpu)
533 {
534 x86_die_t *die;
535 cpu_data_t *cpup;
536
537 cpup = cpu_datap(cpu);
538
539 simple_lock(&x86_topo_lock);
540 if (free_dies != NULL) {
541 die = free_dies;
542 free_dies = die->next_in_pkg;
543 die->next_in_pkg = NULL;
544 simple_unlock(&x86_topo_lock);
545 } else {
546 simple_unlock(&x86_topo_lock);
547 die = kalloc(sizeof(x86_die_t));
548 if (die == NULL)
549 panic("x86_die_alloc() kalloc of x86_die_t failed!\n");
550 }
551
552 bzero((void *) die, sizeof(x86_die_t));
553
554 die->pdie_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
555
556 die->ldie_num = num_dies;
557 atomic_incl((long *) &num_dies, 1);
558
559 die->flags = X86DIE_FL_PRESENT;
560 return(die);
561 }
562
563 static void
564 x86_die_free(x86_die_t *die)
565 {
566 simple_lock(&x86_topo_lock);
567 die->next_in_pkg = free_dies;
568 free_dies = die;
569 atomic_decl((long *) &num_dies, 1);
570 simple_unlock(&x86_topo_lock);
571 }
572
573 static x86_pkg_t *
574 x86_package_alloc(int cpu)
575 {
576 x86_pkg_t *pkg;
577 cpu_data_t *cpup;
578
579 cpup = cpu_datap(cpu);
580
581 simple_lock(&x86_topo_lock);
582 if (free_pkgs != NULL) {
583 pkg = free_pkgs;
584 free_pkgs = pkg->next;
585 pkg->next = NULL;
586 simple_unlock(&x86_topo_lock);
587 } else {
588 simple_unlock(&x86_topo_lock);
589 pkg = kalloc(sizeof(x86_pkg_t));
590 if (pkg == NULL)
591 panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
592 }
593
594 bzero((void *) pkg, sizeof(x86_pkg_t));
595
596 pkg->ppkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
597
598 pkg->lpkg_num = topoParms.nPackages;
599 atomic_incl((long *) &topoParms.nPackages, 1);
600
601 pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
602 return(pkg);
603 }
604
605 static void
606 x86_package_free(x86_pkg_t *pkg)
607 {
608 simple_lock(&x86_topo_lock);
609 pkg->next = free_pkgs;
610 free_pkgs = pkg;
611 atomic_decl((long *) &topoParms.nPackages, 1);
612 simple_unlock(&x86_topo_lock);
613 }
614
615 static void
616 x86_cache_add_lcpu(x86_cpu_cache_t *cache, x86_lcpu_t *lcpu)
617 {
618 x86_cpu_cache_t *cur_cache;
619 int i;
620
621 /*
622 * Put the new CPU into the list of the cache.
623 */
624 cur_cache = lcpu->caches[cache->level - 1];
625 lcpu->caches[cache->level - 1] = cache;
626 cache->next = cur_cache;
627 cache->nlcpus += 1;
628 for (i = 0; i < cache->nlcpus; i += 1) {
629 if (cache->cpus[i] == NULL) {
630 cache->cpus[i] = lcpu;
631 break;
632 }
633 }
634 }
635
636 static void
637 x86_lcpu_add_caches(x86_lcpu_t *lcpu)
638 {
639 x86_cpu_cache_t *list;
640 x86_cpu_cache_t *cur;
641 x86_cpu_cache_t *match;
642 x86_die_t *die;
643 x86_core_t *core;
644 x86_lcpu_t *cur_lcpu;
645 uint32_t level;
646 boolean_t found = FALSE;
647
648 assert(lcpu != NULL);
649
650 /*
651 * Add the cache data to the topology.
652 */
653 list = x86_cache_list();
654
655 simple_lock(&x86_topo_lock);
656
657 while (list != NULL) {
658 /*
659 * Remove the cache from the front of the list.
660 */
661 cur = list;
662 list = cur->next;
663 cur->next = NULL;
664 level = cur->level - 1;
665
666 /*
667 * If the cache isn't shared then just put it where it
668 * belongs.
669 */
670 if (cur->maxcpus == 1) {
671 x86_cache_add_lcpu(cur, lcpu);
672 continue;
673 }
674
675 /*
676 * We'll assume that all of the caches at a particular level
677 * have the same sharing. So if we have a cache already at
678 * this level, we'll just skip looking for the match.
679 */
680 if (lcpu->caches[level] != NULL) {
681 x86_cache_free(cur);
682 continue;
683 }
684
685 /*
686 * This is a shared cache, so we have to figure out if
687 * this is the first time we've seen this cache. We do
688 * this by searching through the topology and seeing if
689 * this cache is already described.
690 *
691 * Assume that L{LLC-1} are all at the core level and that
692 * LLC is shared at the die level.
693 */
694 if (level < topoParms.LLCDepth) {
695 /*
696 * Shared at the core.
697 */
698 core = lcpu->core;
699 cur_lcpu = core->lcpus;
700 while (cur_lcpu != NULL) {
701 /*
702 * Skip ourselves.
703 */
704 if (cur_lcpu == lcpu) {
705 cur_lcpu = cur_lcpu->next_in_core;
706 continue;
707 }
708
709 /*
710 * If there's a cache on this logical CPU,
711 * then use that one.
712 */
713 match = x86_match_cache(cur_lcpu->caches[level], cur);
714 if (match != NULL) {
715 x86_cache_free(cur);
716 x86_cache_add_lcpu(match, lcpu);
717 found = TRUE;
718 break;
719 }
720
721 cur_lcpu = cur_lcpu->next_in_core;
722 }
723 } else {
724 /*
725 * Shared at the die.
726 */
727 die = lcpu->die;
728 cur_lcpu = die->lcpus;
729 while (cur_lcpu != NULL) {
730 /*
731 * Skip ourselves.
732 */
733 if (cur_lcpu == lcpu) {
734 cur_lcpu = cur_lcpu->next_in_die;
735 continue;
736 }
737
738 /*
739 * If there's a cache on this logical CPU,
740 * then use that one.
741 */
742 match = x86_match_cache(cur_lcpu->caches[level], cur);
743 if (match != NULL) {
744 x86_cache_free(cur);
745 x86_cache_add_lcpu(match, lcpu);
746 found = TRUE;
747 break;
748 }
749
750 cur_lcpu = cur_lcpu->next_in_die;
751 }
752 }
753
754 /*
755 * If a shared cache wasn't found, then this logical CPU must
756 * be the first one encountered.
757 */
758 if (!found) {
759 x86_cache_add_lcpu(cur, lcpu);
760 }
761 }
762
763 simple_unlock(&x86_topo_lock);
764 }
765
766 static void
767 x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
768 {
769 assert(core != NULL);
770 assert(lcpu != NULL);
771
772 simple_lock(&x86_topo_lock);
773
774 lcpu->next_in_core = core->lcpus;
775 lcpu->core = core;
776 core->lcpus = lcpu;
777 core->num_lcpus += 1;
778 simple_unlock(&x86_topo_lock);
779 }
780
781 static void
782 x86_die_add_lcpu(x86_die_t *die, x86_lcpu_t *lcpu)
783 {
784 assert(die != NULL);
785 assert(lcpu != NULL);
786
787 lcpu->next_in_die = die->lcpus;
788 lcpu->die = die;
789 die->lcpus = lcpu;
790 }
791
792 static void
793 x86_die_add_core(x86_die_t *die, x86_core_t *core)
794 {
795 assert(die != NULL);
796 assert(core != NULL);
797
798 core->next_in_die = die->cores;
799 core->die = die;
800 die->cores = core;
801 die->num_cores += 1;
802 }
803
804 static void
805 x86_package_add_lcpu(x86_pkg_t *pkg, x86_lcpu_t *lcpu)
806 {
807 assert(pkg != NULL);
808 assert(lcpu != NULL);
809
810 lcpu->next_in_pkg = pkg->lcpus;
811 lcpu->package = pkg;
812 pkg->lcpus = lcpu;
813 }
814
815 static void
816 x86_package_add_core(x86_pkg_t *pkg, x86_core_t *core)
817 {
818 assert(pkg != NULL);
819 assert(core != NULL);
820
821 core->next_in_pkg = pkg->cores;
822 core->package = pkg;
823 pkg->cores = core;
824 }
825
826 static void
827 x86_package_add_die(x86_pkg_t *pkg, x86_die_t *die)
828 {
829 assert(pkg != NULL);
830 assert(die != NULL);
831
832 die->next_in_pkg = pkg->dies;
833 die->package = pkg;
834 pkg->dies = die;
835 pkg->num_dies += 1;
836 }
837
838 void *
839 cpu_thread_alloc(int cpu)
840 {
841 x86_core_t *core = NULL;
842 x86_die_t *die = NULL;
843 x86_pkg_t *pkg = NULL;
844 cpu_data_t *cpup;
845 uint32_t phys_cpu;
846
847 /*
848 * Only allow one to manipulate the topology at a time.
849 */
850 simple_lock(&x86_topo_lock);
851
852 /*
853 * Make sure all of the topology parameters have been initialized.
854 */
855 if (!topoParmsInited)
856 initTopoParms();
857
858 cpup = cpu_datap(cpu);
859
860 phys_cpu = cpup->cpu_phys_number;
861
862 x86_lcpu_init(cpu);
863
864 /*
865 * Allocate performance counter structure.
866 */
867 simple_unlock(&x86_topo_lock);
868 cpup->lcpu.pmc = pmc_alloc();
869 simple_lock(&x86_topo_lock);
870
871 /*
872 * Assume that all cpus have the same features.
873 */
874 if (cpu_is_hyperthreaded()) {
875 cpup->cpu_threadtype = CPU_THREADTYPE_INTEL_HTT;
876 } else {
877 cpup->cpu_threadtype = CPU_THREADTYPE_NONE;
878 }
879
880 /*
881 * Get the package that the logical CPU is in.
882 */
883 do {
884 pkg = x86_package_find(cpu);
885 if (pkg == NULL) {
886 /*
887 * Package structure hasn't been created yet, do it now.
888 */
889 simple_unlock(&x86_topo_lock);
890 pkg = x86_package_alloc(cpu);
891 simple_lock(&x86_topo_lock);
892 if (x86_package_find(cpu) != NULL) {
893 x86_package_free(pkg);
894 continue;
895 }
896
897 /*
898 * Add the new package to the global list of packages.
899 */
900 pkg->next = x86_pkgs;
901 x86_pkgs = pkg;
902 }
903 } while (pkg == NULL);
904
905 /*
906 * Get the die that the logical CPU is in.
907 */
908 do {
909 die = x86_die_find(cpu);
910 if (die == NULL) {
911 /*
912 * Die structure hasn't been created yet, do it now.
913 */
914 simple_unlock(&x86_topo_lock);
915 die = x86_die_alloc(cpu);
916 simple_lock(&x86_topo_lock);
917 if (x86_die_find(cpu) != NULL) {
918 x86_die_free(die);
919 continue;
920 }
921
922 /*
923 * Add the die to the package.
924 */
925 x86_package_add_die(pkg, die);
926 }
927 } while (die == NULL);
928
929 /*
930 * Get the core for this logical CPU.
931 */
932 do {
933 core = x86_core_find(cpu);
934 if (core == NULL) {
935 /*
936 * Allocate the core structure now.
937 */
938 simple_unlock(&x86_topo_lock);
939 core = x86_core_alloc(cpu);
940 simple_lock(&x86_topo_lock);
941 if (x86_core_find(cpu) != NULL) {
942 x86_core_free(core);
943 continue;
944 }
945
946 /*
947 * Add the core to the die & package.
948 */
949 x86_die_add_core(die, core);
950 x86_package_add_core(pkg, core);
951 machine_info.physical_cpu_max += 1;
952 }
953 } while (core == NULL);
954
955
956 /*
957 * Done manipulating the topology, so others can get in.
958 */
959 machine_info.logical_cpu_max += 1;
960 simple_unlock(&x86_topo_lock);
961
962 /*
963 * Add the logical CPU to the other topology structures.
964 */
965 x86_core_add_lcpu(core, &cpup->lcpu);
966 x86_die_add_lcpu(core->die, &cpup->lcpu);
967 x86_package_add_lcpu(core->package, &cpup->lcpu);
968 x86_lcpu_add_caches(&cpup->lcpu);
969
970 return (void *) core;
971 }
972
973 void
974 cpu_thread_init(void)
975 {
976 int my_cpu = get_cpu_number();
977 cpu_data_t *cpup = current_cpu_datap();
978 x86_core_t *core;
979 static int initialized = 0;
980
981 /*
982 * If we're the boot processor, we do all of the initialization of
983 * the CPU topology infrastructure.
984 */
985 if (my_cpu == master_cpu && !initialized) {
986 simple_lock_init(&x86_topo_lock, 0);
987
988 /*
989 * Put this logical CPU into the physical CPU topology.
990 */
991 cpup->lcpu.core = cpu_thread_alloc(my_cpu);
992
993 initialized = 1;
994 }
995
996 /*
997 * Do the CPU accounting.
998 */
999 core = cpup->lcpu.core;
1000 simple_lock(&x86_topo_lock);
1001 machine_info.logical_cpu += 1;
1002 if (core->active_lcpus == 0)
1003 machine_info.physical_cpu += 1;
1004 core->active_lcpus += 1;
1005 simple_unlock(&x86_topo_lock);
1006
1007 pmCPUMarkRunning(cpup);
1008 etimer_resync_deadlines();
1009 }
1010
1011 /*
1012 * Called for a cpu to halt permanently
1013 * (as opposed to halting and expecting an interrupt to awaken it).
1014 */
1015 void
1016 cpu_thread_halt(void)
1017 {
1018 x86_core_t *core;
1019 cpu_data_t *cpup = current_cpu_datap();
1020
1021 simple_lock(&x86_topo_lock);
1022 machine_info.logical_cpu -= 1;
1023 core = cpup->lcpu.core;
1024 core->active_lcpus -= 1;
1025 if (core->active_lcpus == 0)
1026 machine_info.physical_cpu -= 1;
1027 simple_unlock(&x86_topo_lock);
1028
1029 /*
1030 * Let the power management code determine the best way to "stop"
1031 * the processor.
1032 */
1033 ml_set_interrupts_enabled(FALSE);
1034 while (1) {
1035 pmCPUHalt(PM_HALT_NORMAL);
1036 }
1037 /* NOT REACHED */
1038 }
1039
1040 /*
1041 * Validates that the topology was built correctly. Must be called only
1042 * after the complete topology is built and no other changes are being made.
1043 */
1044 void
1045 validate_topology(void)
1046 {
1047 x86_pkg_t *pkg;
1048 x86_die_t *die;
1049 x86_core_t *core;
1050 x86_lcpu_t *lcpu;
1051 uint32_t nDies;
1052 uint32_t nCores;
1053 uint32_t nCPUs;
1054
1055 /*
1056 * XXX
1057 *
1058 * Right now this only works if the number of CPUs started is the total
1059 * number of CPUs. However, when specifying cpus=n the topology is only
1060 * partially constructed and the checks below will fail.
1061 *
1062 * We should *always* build the complete topology and only start the CPUs
1063 * indicated by cpus=n. Until that happens, this code will not check the
1064 * topology if the number of cpus defined is < that described the the
1065 * topology parameters.
1066 */
1067 nCPUs = topoParms.nPackages * topoParms.nLThreadsPerPackage;
1068 if (nCPUs > real_ncpus)
1069 return;
1070
1071 pkg = x86_pkgs;
1072 while (pkg != NULL) {
1073 /*
1074 * Make sure that the package has the correct number of dies.
1075 */
1076 nDies = 0;
1077 die = pkg->dies;
1078 while (die != NULL) {
1079 if (die->package == NULL)
1080 panic("Die(%d)->package is NULL",
1081 die->pdie_num);
1082 if (die->package != pkg)
1083 panic("Die %d points to package %d, should be %d",
1084 die->pdie_num, die->package->lpkg_num, pkg->lpkg_num);
1085
1086 DBG("Die(%d)->package %d\n",
1087 die->pdie_num, pkg->lpkg_num);
1088
1089 /*
1090 * Make sure that the die has the correct number of cores.
1091 */
1092 DBG("Die(%d)->cores: ");
1093 nCores = 0;
1094 core = die->cores;
1095 while (core != NULL) {
1096 if (core->die == NULL)
1097 panic("Core(%d)->die is NULL",
1098 core->pcore_num);
1099 if (core->die != die)
1100 panic("Core %d points to die %d, should be %d",
1101 core->pcore_num, core->die->pdie_num, die->pdie_num);
1102 nCores += 1;
1103 DBG("%d ", core->pcore_num);
1104 core = core->next_in_die;
1105 }
1106 DBG("\n");
1107
1108 if (nCores != topoParms.nLCoresPerDie)
1109 panic("Should have %d Cores, but only found %d for Die %d",
1110 topoParms.nLCoresPerDie, nCores, die->pdie_num);
1111
1112 /*
1113 * Make sure that the die has the correct number of CPUs.
1114 */
1115 DBG("Die(%d)->lcpus: ", die->pdie_num);
1116 nCPUs = 0;
1117 lcpu = die->lcpus;
1118 while (lcpu != NULL) {
1119 if (lcpu->die == NULL)
1120 panic("CPU(%d)->die is NULL",
1121 lcpu->cpu_num);
1122 if (lcpu->die != die)
1123 panic("CPU %d points to die %d, should be %d",
1124 lcpu->cpu_num, lcpu->die->pdie_num, die->pdie_num);
1125 nCPUs += 1;
1126 DBG("%d ", lcpu->cpu_num);
1127 lcpu = lcpu->next_in_die;
1128 }
1129 DBG("\n");
1130
1131 if (nCPUs != topoParms.nLThreadsPerDie)
1132 panic("Should have %d Threads, but only found %d for Die %d",
1133 topoParms.nLThreadsPerDie, nCPUs, die->pdie_num);
1134
1135 nDies += 1;
1136 die = die->next_in_pkg;
1137 }
1138
1139 if (nDies != topoParms.nLDiesPerPackage)
1140 panic("Should have %d Dies, but only found %d for package %d",
1141 topoParms.nLDiesPerPackage, nDies, pkg->lpkg_num);
1142
1143 /*
1144 * Make sure that the package has the correct number of cores.
1145 */
1146 nCores = 0;
1147 core = pkg->cores;
1148 while (core != NULL) {
1149 if (core->package == NULL)
1150 panic("Core(%d)->package is NULL",
1151 core->pcore_num);
1152 if (core->package != pkg)
1153 panic("Core %d points to package %d, should be %d",
1154 core->pcore_num, core->package->lpkg_num, pkg->lpkg_num);
1155 DBG("Core(%d)->package %d\n",
1156 core->pcore_num, pkg->lpkg_num);
1157
1158 /*
1159 * Make sure that the core has the correct number of CPUs.
1160 */
1161 nCPUs = 0;
1162 lcpu = core->lcpus;
1163 DBG("Core(%d)->lcpus: ");
1164 while (lcpu != NULL) {
1165 if (lcpu->core == NULL)
1166 panic("CPU(%d)->core is NULL",
1167 lcpu->cpu_num);
1168 if (lcpu->core != core)
1169 panic("CPU %d points to core %d, should be %d",
1170 lcpu->cpu_num, lcpu->core->pcore_num, core->pcore_num);
1171 DBG("%d ", lcpu->cpu_num);
1172 nCPUs += 1;
1173 lcpu = lcpu->next_in_core;
1174 }
1175 DBG("\n");
1176
1177 if (nCPUs != topoParms.nLThreadsPerCore)
1178 panic("Should have %d Threads, but only found %d for Core %d",
1179 topoParms.nLThreadsPerCore, nCPUs, core->pcore_num);
1180 nCores += 1;
1181 core = core->next_in_pkg;
1182 }
1183
1184 if (nCores != topoParms.nLCoresPerPackage)
1185 panic("Should have %d Cores, but only found %d for package %d",
1186 topoParms.nLCoresPerPackage, nCores, pkg->lpkg_num);
1187
1188 /*
1189 * Make sure that the package has the correct number of CPUs.
1190 */
1191 nCPUs = 0;
1192 lcpu = pkg->lcpus;
1193 while (lcpu != NULL) {
1194 if (lcpu->package == NULL)
1195 panic("CPU(%d)->package is NULL",
1196 lcpu->cpu_num);
1197 if (lcpu->package != pkg)
1198 panic("CPU %d points to package %d, should be %d",
1199 lcpu->cpu_num, lcpu->package->lpkg_num, pkg->lpkg_num);
1200 DBG("CPU(%d)->package %d\n",
1201 lcpu->cpu_num, pkg->lpkg_num);
1202 nCPUs += 1;
1203 lcpu = lcpu->next_in_pkg;
1204 }
1205
1206 if (nCPUs != topoParms.nLThreadsPerPackage)
1207 panic("Should have %d Threads, but only found %d for package %d",
1208 topoParms.nLThreadsPerPackage, nCPUs, pkg->lpkg_num);
1209
1210 pkg = pkg->next;
1211 }
1212 }
1213
1214 #if TOPO_DEBUG
1215 /*
1216 * Prints out the topology
1217 */
1218 void
1219 debug_topology_print(void)
1220 {
1221 x86_pkg_t *pkg;
1222 x86_die_t *die;
1223 x86_core_t *core;
1224 x86_lcpu_t *cpu;
1225
1226 pkg = x86_pkgs;
1227 while (pkg != NULL) {
1228 kprintf("Package:\n");
1229 kprintf(" Physical: %d\n", pkg->ppkg_num);
1230 kprintf(" Logical: %d\n", pkg->lpkg_num);
1231
1232 die = pkg->dies;
1233 while (die != NULL) {
1234 kprintf(" Die:\n");
1235 kprintf(" Physical: %d\n", die->pdie_num);
1236 kprintf(" Logical: %d\n", die->ldie_num);
1237
1238 core = die->cores;
1239 while (core != NULL) {
1240 kprintf(" Core:\n");
1241 kprintf(" Physical: %d\n", core->pcore_num);
1242 kprintf(" Logical: %d\n", core->lcore_num);
1243
1244 cpu = core->lcpus;
1245 while (cpu != NULL) {
1246 kprintf(" LCPU:\n");
1247 kprintf(" CPU #: %d\n", cpu->cpu_num);
1248 kprintf(" Physical: %d\n", cpu->pnum);
1249 kprintf(" Logical: %d\n", cpu->lnum);
1250 kprintf(" Flags: ");
1251 if (cpu->master)
1252 kprintf("MASTER ");
1253 if (cpu->primary)
1254 kprintf("PRIMARY");
1255 if (!cpu->master && !cpu->primary)
1256 kprintf("(NONE)");
1257 kprintf("\n");
1258
1259 cpu = cpu->next_in_core;
1260 }
1261
1262 core = core->next_in_die;
1263 }
1264
1265 die = die->next_in_pkg;
1266 }
1267
1268 pkg = pkg->next;
1269 }
1270 }
1271 #endif /* TOPO_DEBUG */