/*
- * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
*/
#include <vm/vm_kern.h>
#include <kern/kalloc.h>
+#include <kern/timer_queue.h>
#include <mach/machine.h>
#include <i386/cpu_threads.h>
#include <i386/cpuid.h>
#include <i386/machine_cpu.h>
-#include <i386/lock.h>
-#include <i386/perfmon.h>
#include <i386/pmCPU.h>
+#include <i386/bit_routines.h>
-#define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
-#define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l)
+#define DIVISOR_GUARD(denom) \
+ if ((denom) == 0) { \
+ kprintf("%s: %d Zero divisor: " #denom, \
+ __FILE__, __LINE__); \
+ }
-/*
- * Kernel parameter determining whether threads are halted unconditionally
- * in the idle state. This is the default behavior.
- * See machine_idle() for use.
- */
-int idlehalt = 1;
+static void debug_topology_print(void);
+
+boolean_t topo_dbg = FALSE;
-x86_pkg_t *x86_pkgs = NULL;
-uint32_t num_packages = 0;
+x86_pkg_t *x86_pkgs = NULL;
uint32_t num_Lx_caches[MAX_CACHE_DEPTH] = { 0 };
static x86_pkg_t *free_pkgs = NULL;
+static x86_die_t *free_dies = NULL;
static x86_core_t *free_cores = NULL;
+static uint32_t num_dies = 0;
static x86_cpu_cache_t *x86_caches = NULL;
static uint32_t num_caches = 0;
+static boolean_t topoParmsInited = FALSE;
+x86_topology_parameters_t topoParms;
+
decl_simple_lock_data(, x86_topo_lock);
+
+static struct cpu_cache {
+ int level; int type;
+} cpu_caches [LCACHE_MAX] = {
+ [L1D] = { 1, CPU_CACHE_TYPE_DATA },
+ [L1I] = { 1, CPU_CACHE_TYPE_INST },
+ [L2U] = { 2, CPU_CACHE_TYPE_UNIF },
+ [L3U] = { 3, CPU_CACHE_TYPE_UNIF },
+};
+
+static boolean_t
+cpu_is_hyperthreaded(void)
+{
+ i386_cpu_info_t *cpuinfo;
+
+ cpuinfo = cpuid_info();
+ return(cpuinfo->thread_count > cpuinfo->core_count);
+}
static x86_cpu_cache_t *
x86_cache_alloc(void)
return(cache);
}
+
+static void
+x86_LLC_info(void)
+{
+ int cache_level = 0;
+ uint32_t nCPUsSharing = 1;
+ i386_cpu_info_t *cpuinfo;
+ struct cpu_cache *cachep;
+ int i;
+
+ cpuinfo = cpuid_info();
+
+ for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) {
+
+ if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0)
+ continue;
+
+ /*
+ * Only worry about it if it's a deeper level than
+ * what we've seen before.
+ */
+ if (cachep->level > cache_level) {
+ cache_level = cachep->level;
+
+ /*
+ * Save the number of CPUs sharing this cache.
+ */
+ nCPUsSharing = cpuinfo->cache_sharing[i];
+ }
+ }
+
+ /*
+ * Make the level of the LLC be 0 based.
+ */
+ topoParms.LLCDepth = cache_level - 1;
+
+ /*
+ * nCPUsSharing represents the *maximum* number of cores or
+ * logical CPUs sharing the cache.
+ */
+ topoParms.maxSharingLLC = nCPUsSharing;
+
+ topoParms.nCoresSharingLLC = nCPUsSharing / (cpuinfo->thread_count /
+ cpuinfo->core_count);
+ topoParms.nLCPUsSharingLLC = nCPUsSharing;
+
+ /*
+ * nCPUsSharing may not be the number of *active* cores or
+ * threads that are sharing the cache.
+ */
+ if (nCPUsSharing > cpuinfo->core_count)
+ topoParms.nCoresSharingLLC = cpuinfo->core_count;
+ if (nCPUsSharing > cpuinfo->thread_count)
+ topoParms.nLCPUsSharingLLC = cpuinfo->thread_count;
+}
+
+static void
+initTopoParms(void)
+{
+ i386_cpu_info_t *cpuinfo;
+
+ topoParms.stable = FALSE;
+
+ cpuinfo = cpuid_info();
+
+ PE_parse_boot_argn("-topo", &topo_dbg, sizeof(topo_dbg));
+
+ /*
+ * We need to start with getting the LLC information correct.
+ */
+ x86_LLC_info();
+
+ /*
+ * Compute the number of threads (logical CPUs) per core.
+ */
+ DIVISOR_GUARD(cpuinfo->core_count);
+ topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count;
+ DIVISOR_GUARD(cpuinfo->cpuid_cores_per_package);
+ topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package;
+
+ /*
+ * Compute the number of dies per package.
+ */
+ DIVISOR_GUARD(topoParms.nCoresSharingLLC);
+ topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC;
+ DIVISOR_GUARD(topoParms.nPThreadsPerCore);
+ DIVISOR_GUARD(topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+ topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+
+ /*
+ * Compute the number of cores per die.
+ */
+ topoParms.nLCoresPerDie = topoParms.nCoresSharingLLC;
+ topoParms.nPCoresPerDie = (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+ /*
+ * Compute the number of threads per die.
+ */
+ topoParms.nLThreadsPerDie = topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie;
+ topoParms.nPThreadsPerDie = topoParms.nPThreadsPerCore * topoParms.nPCoresPerDie;
+
+ /*
+ * Compute the number of cores per package.
+ */
+ topoParms.nLCoresPerPackage = topoParms.nLCoresPerDie * topoParms.nLDiesPerPackage;
+ topoParms.nPCoresPerPackage = topoParms.nPCoresPerDie * topoParms.nPDiesPerPackage;
+
+ /*
+ * Compute the number of threads per package.
+ */
+ topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage;
+ topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage;
+
+ TOPO_DBG("\nCache Topology Parameters:\n");
+ TOPO_DBG("\tLLC Depth: %d\n", topoParms.LLCDepth);
+ TOPO_DBG("\tCores Sharing LLC: %d\n", topoParms.nCoresSharingLLC);
+ TOPO_DBG("\tThreads Sharing LLC: %d\n", topoParms.nLCPUsSharingLLC);
+ TOPO_DBG("\tmax Sharing of LLC: %d\n", topoParms.maxSharingLLC);
+
+ TOPO_DBG("\nLogical Topology Parameters:\n");
+ TOPO_DBG("\tThreads per Core: %d\n", topoParms.nLThreadsPerCore);
+ TOPO_DBG("\tCores per Die: %d\n", topoParms.nLCoresPerDie);
+ TOPO_DBG("\tThreads per Die: %d\n", topoParms.nLThreadsPerDie);
+ TOPO_DBG("\tDies per Package: %d\n", topoParms.nLDiesPerPackage);
+ TOPO_DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage);
+ TOPO_DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage);
+
+ TOPO_DBG("\nPhysical Topology Parameters:\n");
+ TOPO_DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore);
+ TOPO_DBG("\tCores per Die: %d\n", topoParms.nPCoresPerDie);
+ TOPO_DBG("\tThreads per Die: %d\n", topoParms.nPThreadsPerDie);
+ TOPO_DBG("\tDies per Package: %d\n", topoParms.nPDiesPerPackage);
+ TOPO_DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage);
+ TOPO_DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage);
+
+ topoParmsInited = TRUE;
+}
static void
x86_cache_free(x86_cpu_cache_t *cache)
x86_cpu_cache_t *root = NULL;
x86_cpu_cache_t *cur = NULL;
x86_cpu_cache_t *last = NULL;
- uint32_t index;
- uint32_t cache_info[4];
- uint32_t nsets;
-
- do_cpuid(0, cache_info);
-
- if (cache_info[eax] < 4) {
- /*
- * Processor does not support deterministic
- * cache information. Don't report anything
- */
- return NULL;
- }
-
- for (index = 0; ; index += 1) {
- cache_info[eax] = 4;
- cache_info[ecx] = index;
- cache_info[ebx] = 0;
- cache_info[edx] = 0;
-
- cpuid(cache_info);
+ struct cpu_cache *cachep;
+ int i;
- /*
- * See if all levels have been queried.
- */
- if (bitfield(cache_info[eax], 4, 0) == 0)
- break;
+ /*
+ * Cons up a list driven not by CPUID leaf 4 (deterministic cache params)
+ * but by the table above plus parameters already cracked from cpuid...
+ */
+ for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) {
+ if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0)
+ continue;
+
cur = x86_cache_alloc();
- if (cur == NULL) {
+ if (cur == NULL)
break;
- }
- cur->type = bitfield(cache_info[eax], 4, 0);
- cur->level = bitfield(cache_info[eax], 7, 5);
- cur->nlcpus = bitfield(cache_info[eax], 25, 14) + 1;
- cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1;
- cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1;
- cur->ways = bitfield(cache_info[ebx], 31, 22) + 1;
- nsets = bitfield(cache_info[ecx], 31, 0) + 1;
- cur->cache_size = cur->line_size * cur->ways * cur->partitions * nsets;
+ cur->type = cachep->type;
+ cur->level = cachep->level;
+ cur->nlcpus = 0;
+ cur->maxcpus = cpuid_info()->cache_sharing[i];
+ cur->partitions = cpuid_info()->cache_partitions[i];
+ cur->cache_size = cpuid_info()->cache_size[i];
+ cur->line_size = cpuid_info()->cache_linesize;
if (last == NULL) {
root = cur;
last->next = cur;
last = cur;
}
-
num_Lx_caches[cur->level - 1] += 1;
}
-
- return(root);
+ return root;
}
-static boolean_t
-cpu_is_hyperthreaded(void)
+
+static x86_cpu_cache_t *
+x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher)
{
- if (cpuid_features() & CPUID_FEATURE_HTT)
- return (cpuid_info()->cpuid_logical_per_package /
- cpuid_info()->cpuid_cores_per_package) > 1;
- else
- return FALSE;
+ x86_cpu_cache_t *cur_cache;
+
+ cur_cache = list;
+ while (cur_cache != NULL) {
+ if (cur_cache->maxcpus == matcher->maxcpus
+ && cur_cache->type == matcher->type
+ && cur_cache->level == matcher->level
+ && cur_cache->partitions == matcher->partitions
+ && cur_cache->line_size == matcher->line_size
+ && cur_cache->cache_size == matcher->cache_size)
+ break;
+
+ cur_cache = cur_cache->next;
+ }
+
+ return(cur_cache);
}
static void
lcpu = &cpup->lcpu;
lcpu->lcpu = lcpu;
lcpu->cpu = cpup;
- lcpu->next = NULL;
- lcpu->core = NULL;
+ lcpu->next_in_core = NULL;
+ lcpu->next_in_die = NULL;
+ lcpu->next_in_pkg = NULL;
+ lcpu->core = NULL;
+ lcpu->die = NULL;
+ lcpu->package = NULL;
+ lcpu->cpu_num = cpu;
lcpu->lnum = cpu;
lcpu->pnum = cpup->cpu_phys_number;
- lcpu->halted = FALSE; /* XXX is this correct? */
- lcpu->idle = FALSE; /* XXX is this correct? */
+ lcpu->state = LCPU_OFF;
for (i = 0; i < MAX_CACHE_DEPTH; i += 1)
lcpu->caches[i] = NULL;
-
- lcpu->master = (lcpu->pnum == (unsigned int) master_cpu);
- lcpu->primary = (lcpu->pnum % cpuid_info()->cpuid_logical_per_package) == 0;
}
static x86_core_t *
{
x86_core_t *core;
cpu_data_t *cpup;
- uint32_t cpu_in_pkg;
- uint32_t lcpus_per_core;
cpup = cpu_datap(cpu);
simple_lock(&x86_topo_lock);
if (free_cores != NULL) {
core = free_cores;
- free_cores = core->next;
- core->next = NULL;
+ free_cores = core->next_in_die;
+ core->next_in_die = NULL;
simple_unlock(&x86_topo_lock);
} else {
simple_unlock(&x86_topo_lock);
bzero((void *) core, sizeof(x86_core_t));
- cpu_in_pkg = cpu % cpuid_info()->cpuid_logical_per_package;
- lcpus_per_core = cpuid_info()->cpuid_logical_per_package /
- cpuid_info()->cpuid_cores_per_package;
-
- core->pcore_num = cpup->cpu_phys_number / lcpus_per_core;
- core->lcore_num = core->pcore_num % cpuid_info()->cpuid_cores_per_package;
+ core->pcore_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
+ core->lcore_num = core->pcore_num % topoParms.nPCoresPerPackage;
core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY
| X86CORE_FL_HALTED | X86CORE_FL_IDLE;
x86_core_free(x86_core_t *core)
{
simple_lock(&x86_topo_lock);
- core->next = free_cores;
+ core->next_in_die = free_cores;
free_cores = core;
simple_unlock(&x86_topo_lock);
}
cpup = cpu_datap(cpu);
- pkg_num = cpup->cpu_phys_number / cpuid_info()->cpuid_logical_per_package;
+ pkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
pkg = x86_pkgs;
while (pkg != NULL) {
return(pkg);
}
+
+static x86_die_t *
+x86_die_find(int cpu)
+{
+ x86_die_t *die;
+ x86_pkg_t *pkg;
+ cpu_data_t *cpup;
+ uint32_t die_num;
+
+ cpup = cpu_datap(cpu);
+
+ die_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+ pkg = x86_package_find(cpu);
+ if (pkg == NULL)
+ return(NULL);
+
+ die = pkg->dies;
+ while (die != NULL) {
+ if (die->pdie_num == die_num)
+ break;
+ die = die->next_in_pkg;
+ }
+
+ return(die);
+}
static x86_core_t *
x86_core_find(int cpu)
{
x86_core_t *core;
- x86_pkg_t *pkg;
+ x86_die_t *die;
cpu_data_t *cpup;
uint32_t core_num;
cpup = cpu_datap(cpu);
- core_num = cpup->cpu_phys_number
- / (cpuid_info()->cpuid_logical_per_package
- / cpuid_info()->cpuid_cores_per_package);
+ core_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
- pkg = x86_package_find(cpu);
- if (pkg == NULL)
+ die = x86_die_find(cpu);
+ if (die == NULL)
return(NULL);
- core = pkg->cores;
+ core = die->cores;
while (core != NULL) {
if (core->pcore_num == core_num)
break;
- core = core->next;
+ core = core->next_in_die;
}
return(core);
}
+
+void
+x86_set_logical_topology(x86_lcpu_t *lcpu, int pnum, int lnum)
+{
+ x86_core_t *core = lcpu->core;
+ x86_die_t *die = lcpu->die;
+ x86_pkg_t *pkg = lcpu->package;
+
+ assert(core != NULL);
+ assert(die != NULL);
+ assert(pkg != NULL);
+
+ lcpu->cpu_num = lnum;
+ lcpu->pnum = pnum;
+ lcpu->master = (lnum == master_cpu);
+ lcpu->primary = (lnum % topoParms.nLThreadsPerPackage) == 0;
+
+ lcpu->lnum = lnum % topoParms.nLThreadsPerCore;
+
+ core->pcore_num = lnum / topoParms.nLThreadsPerCore;
+ core->lcore_num = core->pcore_num % topoParms.nLCoresPerDie;
+
+ die->pdie_num = lnum / (topoParms.nLThreadsPerCore*topoParms.nLCoresPerDie);
+ die->ldie_num = die->pdie_num % topoParms.nLDiesPerPackage;
+
+ pkg->ppkg_num = lnum / topoParms.nLThreadsPerPackage;
+ pkg->lpkg_num = pkg->ppkg_num;
+
+}
+
+static x86_die_t *
+x86_die_alloc(int cpu)
+{
+ x86_die_t *die;
+ cpu_data_t *cpup;
+
+ cpup = cpu_datap(cpu);
+
+ simple_lock(&x86_topo_lock);
+ if (free_dies != NULL) {
+ die = free_dies;
+ free_dies = die->next_in_pkg;
+ die->next_in_pkg = NULL;
+ simple_unlock(&x86_topo_lock);
+ } else {
+ simple_unlock(&x86_topo_lock);
+ die = kalloc(sizeof(x86_die_t));
+ if (die == NULL)
+ panic("x86_die_alloc() kalloc of x86_die_t failed!\n");
+ }
+
+ bzero((void *) die, sizeof(x86_die_t));
+
+ die->pdie_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+ die->ldie_num = num_dies;
+ atomic_incl((long *) &num_dies, 1);
+
+ die->flags = X86DIE_FL_PRESENT;
+ return(die);
+}
static void
-x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
+x86_die_free(x86_die_t *die)
+{
+ simple_lock(&x86_topo_lock);
+ die->next_in_pkg = free_dies;
+ free_dies = die;
+ atomic_decl((long *) &num_dies, 1);
+ simple_unlock(&x86_topo_lock);
+}
+
+static x86_pkg_t *
+x86_package_alloc(int cpu)
+{
+ x86_pkg_t *pkg;
+ cpu_data_t *cpup;
+
+ cpup = cpu_datap(cpu);
+
+ simple_lock(&x86_topo_lock);
+ if (free_pkgs != NULL) {
+ pkg = free_pkgs;
+ free_pkgs = pkg->next;
+ pkg->next = NULL;
+ simple_unlock(&x86_topo_lock);
+ } else {
+ simple_unlock(&x86_topo_lock);
+ pkg = kalloc(sizeof(x86_pkg_t));
+ if (pkg == NULL)
+ panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
+ }
+
+ bzero((void *) pkg, sizeof(x86_pkg_t));
+
+ pkg->ppkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
+
+ pkg->lpkg_num = topoParms.nPackages;
+ atomic_incl((long *) &topoParms.nPackages, 1);
+
+ pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
+ return(pkg);
+}
+
+static void
+x86_package_free(x86_pkg_t *pkg)
+{
+ simple_lock(&x86_topo_lock);
+ pkg->next = free_pkgs;
+ free_pkgs = pkg;
+ atomic_decl((long *) &topoParms.nPackages, 1);
+ simple_unlock(&x86_topo_lock);
+}
+
+static void
+x86_cache_add_lcpu(x86_cpu_cache_t *cache, x86_lcpu_t *lcpu)
+{
+ x86_cpu_cache_t *cur_cache;
+ int i;
+
+ /*
+ * Put the new CPU into the list of the cache.
+ */
+ cur_cache = lcpu->caches[cache->level - 1];
+ lcpu->caches[cache->level - 1] = cache;
+ cache->next = cur_cache;
+ cache->nlcpus += 1;
+ for (i = 0; i < cache->nlcpus; i += 1) {
+ if (cache->cpus[i] == NULL) {
+ cache->cpus[i] = lcpu;
+ break;
+ }
+ }
+}
+
+static void
+x86_lcpu_add_caches(x86_lcpu_t *lcpu)
{
x86_cpu_cache_t *list;
x86_cpu_cache_t *cur;
- x86_core_t *cur_core;
+ x86_cpu_cache_t *match;
+ x86_die_t *die;
+ x86_core_t *core;
x86_lcpu_t *cur_lcpu;
- boolean_t found;
- int level;
- int i;
- uint32_t cpu_mask;
+ uint32_t level;
+ boolean_t found = FALSE;
- assert(core != NULL);
assert(lcpu != NULL);
/*
* If the cache isn't shared then just put it where it
* belongs.
*/
- if (cur->nlcpus == 1) {
- goto found_first;
+ if (cur->maxcpus == 1) {
+ x86_cache_add_lcpu(cur, lcpu);
+ continue;
}
/*
/*
* This is a shared cache, so we have to figure out if
* this is the first time we've seen this cache. We do
- * this by searching through the package and seeing if
- * a related core is already describing this cache.
+ * this by searching through the topology and seeing if
+ * this cache is already described.
*
- * NOTE: This assumes that CPUs whose ID mod <# sharing cache>
- * are indeed sharing the cache.
+ * Assume that L{LLC-1} are all at the core level and that
+ * LLC is shared at the die level.
*/
- cpu_mask = lcpu->pnum & ~(cur->nlcpus - 1);
- cur_core = core->package->cores;
- found = FALSE;
-
- while (cur_core != NULL && !found) {
- cur_lcpu = cur_core->lcpus;
- while (cur_lcpu != NULL && !found) {
- if ((cur_lcpu->pnum & ~(cur->nlcpus - 1)) == cpu_mask) {
- lcpu->caches[level] = cur_lcpu->caches[level];
- found = TRUE;
- x86_cache_free(cur);
+ if (level < topoParms.LLCDepth) {
+ /*
+ * Shared at the core.
+ */
+ core = lcpu->core;
+ cur_lcpu = core->lcpus;
+ while (cur_lcpu != NULL) {
+ /*
+ * Skip ourselves.
+ */
+ if (cur_lcpu == lcpu) {
+ cur_lcpu = cur_lcpu->next_in_core;
+ continue;
+ }
- /*
- * Put the new CPU into the list of the cache.
- */
- cur = lcpu->caches[level];
- for (i = 0; i < cur->nlcpus; i += 1) {
- if (cur->cpus[i] == NULL) {
- cur->cpus[i] = lcpu;
- break;
- }
- }
+ /*
+ * If there's a cache on this logical CPU,
+ * then use that one.
+ */
+ match = x86_match_cache(cur_lcpu->caches[level], cur);
+ if (match != NULL) {
+ x86_cache_free(cur);
+ x86_cache_add_lcpu(match, lcpu);
+ found = TRUE;
+ break;
}
- cur_lcpu = cur_lcpu->next;
+
+ cur_lcpu = cur_lcpu->next_in_core;
}
+ } else {
+ /*
+ * Shared at the die.
+ */
+ die = lcpu->die;
+ cur_lcpu = die->lcpus;
+ while (cur_lcpu != NULL) {
+ /*
+ * Skip ourselves.
+ */
+ if (cur_lcpu == lcpu) {
+ cur_lcpu = cur_lcpu->next_in_die;
+ continue;
+ }
+
+ /*
+ * If there's a cache on this logical CPU,
+ * then use that one.
+ */
+ match = x86_match_cache(cur_lcpu->caches[level], cur);
+ if (match != NULL) {
+ x86_cache_free(cur);
+ x86_cache_add_lcpu(match, lcpu);
+ found = TRUE;
+ break;
+ }
- cur_core = cur_core->next;
+ cur_lcpu = cur_lcpu->next_in_die;
+ }
}
+ /*
+ * If a shared cache wasn't found, then this logical CPU must
+ * be the first one encountered.
+ */
if (!found) {
-found_first:
- cur->next = lcpu->caches[level];
- lcpu->caches[level] = cur;
- cur->cpus[0] = lcpu;
+ x86_cache_add_lcpu(cur, lcpu);
}
}
- /*
- * Add the Logical CPU to the core.
- */
- lcpu->next = core->lcpus;
- lcpu->core = core;
- core->lcpus = lcpu;
- core->num_lcpus += 1;
-
simple_unlock(&x86_topo_lock);
}
-static x86_pkg_t *
-x86_package_alloc(int cpu)
+static void
+x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
{
- x86_pkg_t *pkg;
- cpu_data_t *cpup;
-
- cpup = cpu_datap(cpu);
+ assert(core != NULL);
+ assert(lcpu != NULL);
simple_lock(&x86_topo_lock);
- if (free_pkgs != NULL) {
- pkg = free_pkgs;
- free_pkgs = pkg->next;
- pkg->next = NULL;
- simple_unlock(&x86_topo_lock);
- } else {
- simple_unlock(&x86_topo_lock);
- pkg = kalloc(sizeof(x86_pkg_t));
- if (pkg == NULL)
- panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
- }
- bzero((void *) pkg, sizeof(x86_pkg_t));
+ lcpu->next_in_core = core->lcpus;
+ lcpu->core = core;
+ core->lcpus = lcpu;
+ core->num_lcpus += 1;
+ simple_unlock(&x86_topo_lock);
+}
- pkg->ppkg_num = cpup->cpu_phys_number
- / cpuid_info()->cpuid_logical_per_package;
+static void
+x86_die_add_lcpu(x86_die_t *die, x86_lcpu_t *lcpu)
+{
+ assert(die != NULL);
+ assert(lcpu != NULL);
+
+ lcpu->next_in_die = die->lcpus;
+ lcpu->die = die;
+ die->lcpus = lcpu;
+}
- pkg->lpkg_num = num_packages;
- atomic_incl((long *) &num_packages, 1);
+static void
+x86_die_add_core(x86_die_t *die, x86_core_t *core)
+{
+ assert(die != NULL);
+ assert(core != NULL);
- pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
- return(pkg);
+ core->next_in_die = die->cores;
+ core->die = die;
+ die->cores = core;
+ die->num_cores += 1;
}
-static void
-x86_package_free(x86_pkg_t *pkg)
+ static void
+x86_package_add_lcpu(x86_pkg_t *pkg, x86_lcpu_t *lcpu)
{
- simple_lock(&x86_topo_lock);
- pkg->next = free_pkgs;
- free_pkgs = pkg;
- atomic_decl((long *) &num_packages, 1);
- simple_unlock(&x86_topo_lock);
+ assert(pkg != NULL);
+ assert(lcpu != NULL);
+
+ lcpu->next_in_pkg = pkg->lcpus;
+ lcpu->package = pkg;
+ pkg->lcpus = lcpu;
}
static void
assert(pkg != NULL);
assert(core != NULL);
- core->next = pkg->cores;
+ core->next_in_pkg = pkg->cores;
core->package = pkg;
pkg->cores = core;
- pkg->num_cores += 1;
+}
+
+static void
+x86_package_add_die(x86_pkg_t *pkg, x86_die_t *die)
+{
+ assert(pkg != NULL);
+ assert(die != NULL);
+
+ die->next_in_pkg = pkg->dies;
+ die->package = pkg;
+ pkg->dies = die;
+ pkg->num_dies += 1;
}
void *
cpu_thread_alloc(int cpu)
{
- x86_core_t *core;
- x86_pkg_t *pkg;
+ x86_core_t *core = NULL;
+ x86_die_t *die = NULL;
+ x86_pkg_t *pkg = NULL;
cpu_data_t *cpup;
uint32_t phys_cpu;
+ /*
+ * Only allow one to manipulate the topology at a time.
+ */
+ simple_lock(&x86_topo_lock);
+
+ /*
+ * Make sure all of the topology parameters have been initialized.
+ */
+ if (!topoParmsInited)
+ initTopoParms();
+
cpup = cpu_datap(cpu);
phys_cpu = cpup->cpu_phys_number;
}
/*
- * Only allow one to manipulate the topology at a time.
- */
- simple_lock(&x86_topo_lock);
-
- /*
- * Get the core for this logical CPU.
+ * Get the package that the logical CPU is in.
*/
- core_again:
- core = x86_core_find(cpu);
- if (core == NULL) {
- /*
- * Core structure hasn't been created yet, do it now.
- *
- * Get the package that the core is part of.
- */
- package_again:
+ do {
pkg = x86_package_find(cpu);
if (pkg == NULL) {
/*
simple_lock(&x86_topo_lock);
if (x86_package_find(cpu) != NULL) {
x86_package_free(pkg);
- goto package_again;
+ continue;
}
/*
pkg->next = x86_pkgs;
x86_pkgs = pkg;
}
+ } while (pkg == NULL);
- /*
- * Allocate the core structure now.
- */
- simple_unlock(&x86_topo_lock);
- core = x86_core_alloc(cpu);
- simple_lock(&x86_topo_lock);
- if (x86_core_find(cpu) != NULL) {
- x86_core_free(core);
- goto core_again;
+ /*
+ * Get the die that the logical CPU is in.
+ */
+ do {
+ die = x86_die_find(cpu);
+ if (die == NULL) {
+ /*
+ * Die structure hasn't been created yet, do it now.
+ */
+ simple_unlock(&x86_topo_lock);
+ die = x86_die_alloc(cpu);
+ simple_lock(&x86_topo_lock);
+ if (x86_die_find(cpu) != NULL) {
+ x86_die_free(die);
+ continue;
+ }
+
+ /*
+ * Add the die to the package.
+ */
+ x86_package_add_die(pkg, die);
}
+ } while (die == NULL);
- /*
- * Add it to the package.
- */
- x86_package_add_core(pkg, core);
- machine_info.physical_cpu_max += 1;
+ /*
+ * Get the core for this logical CPU.
+ */
+ do {
+ core = x86_core_find(cpu);
+ if (core == NULL) {
+ /*
+ * Allocate the core structure now.
+ */
+ simple_unlock(&x86_topo_lock);
+ core = x86_core_alloc(cpu);
+ simple_lock(&x86_topo_lock);
+ if (x86_core_find(cpu) != NULL) {
+ x86_core_free(core);
+ continue;
+ }
+
+ /*
+ * Add the core to the die & package.
+ */
+ x86_die_add_core(die, core);
+ x86_package_add_core(pkg, core);
+ machine_info.physical_cpu_max += 1;
+ }
+ } while (core == NULL);
- /*
- * Allocate performance counter structure.
- */
- simple_unlock(&x86_topo_lock);
- core->pmc = pmc_alloc();
- simple_lock(&x86_topo_lock);
- }
/*
* Done manipulating the topology, so others can get in.
machine_info.logical_cpu_max += 1;
simple_unlock(&x86_topo_lock);
+ /*
+ * Add the logical CPU to the other topology structures.
+ */
x86_core_add_lcpu(core, &cpup->lcpu);
+ x86_die_add_lcpu(core->die, &cpup->lcpu);
+ x86_package_add_lcpu(core->package, &cpup->lcpu);
+ x86_lcpu_add_caches(&cpup->lcpu);
return (void *) core;
}
void
cpu_thread_init(void)
{
- int my_cpu = get_cpu_number();
- cpu_data_t *cpup = current_cpu_datap();
+ int my_cpu = get_cpu_number();
+ cpu_data_t *cpup = current_cpu_datap();
x86_core_t *core;
- static int initialized = 0;
+ static int initialized = 0;
/*
* If we're the boot processor, we do all of the initialization of
if (core->active_lcpus == 0)
machine_info.physical_cpu += 1;
core->active_lcpus += 1;
- cpup->lcpu.halted = FALSE;
- cpup->lcpu.idle = FALSE;
simple_unlock(&x86_topo_lock);
pmCPUMarkRunning(cpup);
- etimer_resync_deadlines();
+ timer_resync_deadlines();
}
/*
* Called for a cpu to halt permanently
* (as opposed to halting and expecting an interrupt to awaken it).
*/
+__attribute__((noreturn))
void
cpu_thread_halt(void)
{
simple_lock(&x86_topo_lock);
machine_info.logical_cpu -= 1;
- cpup->lcpu.idle = TRUE;
core = cpup->lcpu.core;
core->active_lcpus -= 1;
if (core->active_lcpus == 0)
}
/* NOT REACHED */
}
+
+/*
+ * Validates that the topology was built correctly. Must be called only
+ * after the complete topology is built and no other changes are being made.
+ */
+void
+x86_validate_topology(void)
+{
+ x86_pkg_t *pkg;
+ x86_die_t *die;
+ x86_core_t *core;
+ x86_lcpu_t *lcpu;
+ uint32_t nDies;
+ uint32_t nCores;
+ uint32_t nCPUs;
+
+ if (topo_dbg)
+ debug_topology_print();
+
+ /*
+ * XXX
+ *
+ * Right now this only works if the number of CPUs started is the total
+ * number of CPUs. However, when specifying cpus=n the topology is only
+ * partially constructed and the checks below will fail.
+ *
+ * We should *always* build the complete topology and only start the CPUs
+ * indicated by cpus=n. Until that happens, this code will not check the
+ * topology if the number of cpus defined is < that described the the
+ * topology parameters.
+ */
+ nCPUs = topoParms.nPackages * topoParms.nLThreadsPerPackage;
+ if (nCPUs > real_ncpus)
+ return;
+
+ pkg = x86_pkgs;
+ while (pkg != NULL) {
+ /*
+ * Make sure that the package has the correct number of dies.
+ */
+ nDies = 0;
+ die = pkg->dies;
+ while (die != NULL) {
+ if (die->package == NULL)
+ panic("Die(%d)->package is NULL",
+ die->pdie_num);
+ if (die->package != pkg)
+ panic("Die %d points to package %d, should be %d",
+ die->pdie_num, die->package->lpkg_num, pkg->lpkg_num);
+
+ TOPO_DBG("Die(%d)->package %d\n",
+ die->pdie_num, pkg->lpkg_num);
+
+ /*
+ * Make sure that the die has the correct number of cores.
+ */
+ TOPO_DBG("Die(%d)->cores: ", die->pdie_num);
+ nCores = 0;
+ core = die->cores;
+ while (core != NULL) {
+ if (core->die == NULL)
+ panic("Core(%d)->die is NULL",
+ core->pcore_num);
+ if (core->die != die)
+ panic("Core %d points to die %d, should be %d",
+ core->pcore_num, core->die->pdie_num, die->pdie_num);
+ nCores += 1;
+ TOPO_DBG("%d ", core->pcore_num);
+ core = core->next_in_die;
+ }
+ TOPO_DBG("\n");
+
+ if (nCores != topoParms.nLCoresPerDie)
+ panic("Should have %d Cores, but only found %d for Die %d",
+ topoParms.nLCoresPerDie, nCores, die->pdie_num);
+
+ /*
+ * Make sure that the die has the correct number of CPUs.
+ */
+ TOPO_DBG("Die(%d)->lcpus: ", die->pdie_num);
+ nCPUs = 0;
+ lcpu = die->lcpus;
+ while (lcpu != NULL) {
+ if (lcpu->die == NULL)
+ panic("CPU(%d)->die is NULL",
+ lcpu->cpu_num);
+ if (lcpu->die != die)
+ panic("CPU %d points to die %d, should be %d",
+ lcpu->cpu_num, lcpu->die->pdie_num, die->pdie_num);
+ nCPUs += 1;
+ TOPO_DBG("%d ", lcpu->cpu_num);
+ lcpu = lcpu->next_in_die;
+ }
+ TOPO_DBG("\n");
+
+ if (nCPUs != topoParms.nLThreadsPerDie)
+ panic("Should have %d Threads, but only found %d for Die %d",
+ topoParms.nLThreadsPerDie, nCPUs, die->pdie_num);
+
+ nDies += 1;
+ die = die->next_in_pkg;
+ }
+
+ if (nDies != topoParms.nLDiesPerPackage)
+ panic("Should have %d Dies, but only found %d for package %d",
+ topoParms.nLDiesPerPackage, nDies, pkg->lpkg_num);
+
+ /*
+ * Make sure that the package has the correct number of cores.
+ */
+ nCores = 0;
+ core = pkg->cores;
+ while (core != NULL) {
+ if (core->package == NULL)
+ panic("Core(%d)->package is NULL",
+ core->pcore_num);
+ if (core->package != pkg)
+ panic("Core %d points to package %d, should be %d",
+ core->pcore_num, core->package->lpkg_num, pkg->lpkg_num);
+ TOPO_DBG("Core(%d)->package %d\n",
+ core->pcore_num, pkg->lpkg_num);
+
+ /*
+ * Make sure that the core has the correct number of CPUs.
+ */
+ nCPUs = 0;
+ lcpu = core->lcpus;
+ TOPO_DBG("Core(%d)->lcpus: ", core->pcore_num);
+ while (lcpu != NULL) {
+ if (lcpu->core == NULL)
+ panic("CPU(%d)->core is NULL",
+ lcpu->cpu_num);
+ if (lcpu->core != core)
+ panic("CPU %d points to core %d, should be %d",
+ lcpu->cpu_num, lcpu->core->pcore_num, core->pcore_num);
+ TOPO_DBG("%d ", lcpu->cpu_num);
+ nCPUs += 1;
+ lcpu = lcpu->next_in_core;
+ }
+ TOPO_DBG("\n");
+
+ if (nCPUs != topoParms.nLThreadsPerCore)
+ panic("Should have %d Threads, but only found %d for Core %d",
+ topoParms.nLThreadsPerCore, nCPUs, core->pcore_num);
+ nCores += 1;
+ core = core->next_in_pkg;
+ }
+
+ if (nCores != topoParms.nLCoresPerPackage)
+ panic("Should have %d Cores, but only found %d for package %d",
+ topoParms.nLCoresPerPackage, nCores, pkg->lpkg_num);
+
+ /*
+ * Make sure that the package has the correct number of CPUs.
+ */
+ nCPUs = 0;
+ lcpu = pkg->lcpus;
+ while (lcpu != NULL) {
+ if (lcpu->package == NULL)
+ panic("CPU(%d)->package is NULL",
+ lcpu->cpu_num);
+ if (lcpu->package != pkg)
+ panic("CPU %d points to package %d, should be %d",
+ lcpu->cpu_num, lcpu->package->lpkg_num, pkg->lpkg_num);
+ TOPO_DBG("CPU(%d)->package %d\n",
+ lcpu->cpu_num, pkg->lpkg_num);
+ nCPUs += 1;
+ lcpu = lcpu->next_in_pkg;
+ }
+
+ if (nCPUs != topoParms.nLThreadsPerPackage)
+ panic("Should have %d Threads, but only found %d for package %d",
+ topoParms.nLThreadsPerPackage, nCPUs, pkg->lpkg_num);
+
+ pkg = pkg->next;
+ }
+}
+
+/*
+ * Prints out the topology
+ */
+static void
+debug_topology_print(void)
+{
+ x86_pkg_t *pkg;
+ x86_die_t *die;
+ x86_core_t *core;
+ x86_lcpu_t *cpu;
+
+ pkg = x86_pkgs;
+ while (pkg != NULL) {
+ kprintf("Package:\n");
+ kprintf(" Physical: %d\n", pkg->ppkg_num);
+ kprintf(" Logical: %d\n", pkg->lpkg_num);
+
+ die = pkg->dies;
+ while (die != NULL) {
+ kprintf(" Die:\n");
+ kprintf(" Physical: %d\n", die->pdie_num);
+ kprintf(" Logical: %d\n", die->ldie_num);
+
+ core = die->cores;
+ while (core != NULL) {
+ kprintf(" Core:\n");
+ kprintf(" Physical: %d\n", core->pcore_num);
+ kprintf(" Logical: %d\n", core->lcore_num);
+
+ cpu = core->lcpus;
+ while (cpu != NULL) {
+ kprintf(" LCPU:\n");
+ kprintf(" CPU #: %d\n", cpu->cpu_num);
+ kprintf(" Physical: %d\n", cpu->pnum);
+ kprintf(" Logical: %d\n", cpu->lnum);
+ kprintf(" Flags: ");
+ if (cpu->master)
+ kprintf("MASTER ");
+ if (cpu->primary)
+ kprintf("PRIMARY");
+ if (!cpu->master && !cpu->primary)
+ kprintf("(NONE)");
+ kprintf("\n");
+
+ cpu = cpu->next_in_core;
+ }
+
+ core = core->next_in_die;
+ }
+
+ die = die->next_in_pkg;
+ }
+
+ pkg = pkg->next;
+ }
+}