+ if (cachep->level > cache_level) {
+ cache_level = cachep->level;
+
+ /*
+ * Save the number of CPUs sharing this cache.
+ */
+ nCPUsSharing = cpuinfo->cache_sharing[i];
+ }
+ }
+
+ /*
+ * Make the level of the LLC be 0 based.
+ */
+ topoParms.LLCDepth = cache_level - 1;
+
+ /*
+ * nCPUsSharing represents the *maximum* number of cores or
+ * logical CPUs sharing the cache.
+ */
+ topoParms.maxSharingLLC = nCPUsSharing;
+
+ topoParms.nCoresSharingLLC = nCPUsSharing / (cpuinfo->thread_count /
+ cpuinfo->core_count);
+ topoParms.nLCPUsSharingLLC = nCPUsSharing;
+
+ /*
+ * nCPUsSharing may not be the number of *active* cores or
+ * threads that are sharing the cache.
+ */
+ if (nCPUsSharing > cpuinfo->core_count)
+ topoParms.nCoresSharingLLC = cpuinfo->core_count;
+ if (nCPUsSharing > cpuinfo->thread_count)
+ topoParms.nLCPUsSharingLLC = cpuinfo->thread_count;
+}
+
+static void
+initTopoParms(void)
+{
+ i386_cpu_info_t *cpuinfo;
+
+ topoParms.stable = FALSE;
+
+ cpuinfo = cpuid_info();
+
+ PE_parse_boot_argn("-topo", &topo_dbg, sizeof(topo_dbg));
+
+ /*
+ * We need to start with getting the LLC information correct.
+ */
+ x86_LLC_info();
+
+ /*
+ * Compute the number of threads (logical CPUs) per core.
+ */
+ DIVISOR_GUARD(cpuinfo->core_count);
+ topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count;
+ DIVISOR_GUARD(cpuinfo->cpuid_cores_per_package);
+ topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package;
+
+ /*
+ * Compute the number of dies per package.
+ */
+ DIVISOR_GUARD(topoParms.nCoresSharingLLC);
+ topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC;
+ DIVISOR_GUARD(topoParms.nPThreadsPerCore);
+ DIVISOR_GUARD(topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+ topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+
+ /*
+ * Compute the number of cores per die.
+ */
+ topoParms.nLCoresPerDie = topoParms.nCoresSharingLLC;
+ topoParms.nPCoresPerDie = (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+ /*
+ * Compute the number of threads per die.
+ */
+ topoParms.nLThreadsPerDie = topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie;
+ topoParms.nPThreadsPerDie = topoParms.nPThreadsPerCore * topoParms.nPCoresPerDie;
+
+ /*
+ * Compute the number of cores per package.
+ */
+ topoParms.nLCoresPerPackage = topoParms.nLCoresPerDie * topoParms.nLDiesPerPackage;
+ topoParms.nPCoresPerPackage = topoParms.nPCoresPerDie * topoParms.nPDiesPerPackage;
+
+ /*
+ * Compute the number of threads per package.
+ */
+ topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage;
+ topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage;
+
+ TOPO_DBG("\nCache Topology Parameters:\n");
+ TOPO_DBG("\tLLC Depth: %d\n", topoParms.LLCDepth);
+ TOPO_DBG("\tCores Sharing LLC: %d\n", topoParms.nCoresSharingLLC);
+ TOPO_DBG("\tThreads Sharing LLC: %d\n", topoParms.nLCPUsSharingLLC);
+ TOPO_DBG("\tmax Sharing of LLC: %d\n", topoParms.maxSharingLLC);
+
+ TOPO_DBG("\nLogical Topology Parameters:\n");
+ TOPO_DBG("\tThreads per Core: %d\n", topoParms.nLThreadsPerCore);
+ TOPO_DBG("\tCores per Die: %d\n", topoParms.nLCoresPerDie);
+ TOPO_DBG("\tThreads per Die: %d\n", topoParms.nLThreadsPerDie);
+ TOPO_DBG("\tDies per Package: %d\n", topoParms.nLDiesPerPackage);
+ TOPO_DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage);
+ TOPO_DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage);
+
+ TOPO_DBG("\nPhysical Topology Parameters:\n");
+ TOPO_DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore);
+ TOPO_DBG("\tCores per Die: %d\n", topoParms.nPCoresPerDie);
+ TOPO_DBG("\tThreads per Die: %d\n", topoParms.nPThreadsPerDie);
+ TOPO_DBG("\tDies per Package: %d\n", topoParms.nPDiesPerPackage);
+ TOPO_DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage);
+ TOPO_DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage);
+
+ topoParmsInited = TRUE;
+}
+
+static void
+x86_cache_free(x86_cpu_cache_t *cache)
+{
+ num_caches -= 1;
+ if (cache->level > 0 && cache->level <= MAX_CACHE_DEPTH)
+ num_Lx_caches[cache->level - 1] -= 1;
+ cache->next = x86_caches;
+ x86_caches = cache;
+}
+
+/*
+ * This returns a list of cache structures that represent the
+ * caches for a CPU. Some of the structures may have to be
+ * "freed" if they are actually shared between CPUs.
+ */
+static x86_cpu_cache_t *
+x86_cache_list(void)
+{
+ x86_cpu_cache_t *root = NULL;
+ x86_cpu_cache_t *cur = NULL;
+ x86_cpu_cache_t *last = NULL;
+ struct cpu_cache *cachep;
+ int i;
+
+ /*
+ * Cons up a list driven not by CPUID leaf 4 (deterministic cache params)
+ * but by the table above plus parameters already cracked from cpuid...
+ */
+ for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) {
+
+ if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0)
+ continue;
+
+ cur = x86_cache_alloc();
+ if (cur == NULL)
+ break;
+
+ cur->type = cachep->type;
+ cur->level = cachep->level;
+ cur->nlcpus = 0;
+ cur->maxcpus = cpuid_info()->cache_sharing[i];
+ cur->partitions = cpuid_info()->cache_partitions[i];
+ cur->cache_size = cpuid_info()->cache_size[i];
+ cur->line_size = cpuid_info()->cache_linesize;
+
+ if (last == NULL) {
+ root = cur;
+ last = cur;
+ } else {
+ last->next = cur;
+ last = cur;
+ }
+ num_Lx_caches[cur->level - 1] += 1;
+ }
+ return root;
+}
+
+
+static x86_cpu_cache_t *
+x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher)
+{
+ x86_cpu_cache_t *cur_cache;
+
+ cur_cache = list;
+ while (cur_cache != NULL) {
+ if (cur_cache->maxcpus == matcher->maxcpus
+ && cur_cache->type == matcher->type
+ && cur_cache->level == matcher->level
+ && cur_cache->partitions == matcher->partitions
+ && cur_cache->line_size == matcher->line_size
+ && cur_cache->cache_size == matcher->cache_size)
+ break;
+
+ cur_cache = cur_cache->next;
+ }
+
+ return(cur_cache);
+}
+
+static void
+x86_lcpu_init(int cpu)
+{
+ cpu_data_t *cpup;
+ x86_lcpu_t *lcpu;
+ int i;
+
+ cpup = cpu_datap(cpu);
+
+ lcpu = &cpup->lcpu;
+ lcpu->lcpu = lcpu;
+ lcpu->cpu = cpup;
+ lcpu->next_in_core = NULL;
+ lcpu->next_in_die = NULL;
+ lcpu->next_in_pkg = NULL;
+ lcpu->core = NULL;
+ lcpu->die = NULL;
+ lcpu->package = NULL;
+ lcpu->cpu_num = cpu;
+ lcpu->lnum = cpu;
+ lcpu->pnum = cpup->cpu_phys_number;
+ lcpu->state = LCPU_OFF;
+ for (i = 0; i < MAX_CACHE_DEPTH; i += 1)
+ lcpu->caches[i] = NULL;
+}
+
+static x86_core_t *
+x86_core_alloc(int cpu)
+{
+ x86_core_t *core;
+ cpu_data_t *cpup;
+
+ cpup = cpu_datap(cpu);
+
+ simple_lock(&x86_topo_lock);
+ if (free_cores != NULL) {
+ core = free_cores;
+ free_cores = core->next_in_die;
+ core->next_in_die = NULL;
+ simple_unlock(&x86_topo_lock);
+ } else {
+ simple_unlock(&x86_topo_lock);
+ core = kalloc(sizeof(x86_core_t));
+ if (core == NULL)
+ panic("x86_core_alloc() kalloc of x86_core_t failed!\n");
+ }
+
+ bzero((void *) core, sizeof(x86_core_t));
+
+ core->pcore_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
+ core->lcore_num = core->pcore_num % topoParms.nPCoresPerPackage;
+
+ core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY
+ | X86CORE_FL_HALTED | X86CORE_FL_IDLE;
+
+ return(core);
+}
+
+static void
+x86_core_free(x86_core_t *core)
+{
+ simple_lock(&x86_topo_lock);
+ core->next_in_die = free_cores;
+ free_cores = core;
+ simple_unlock(&x86_topo_lock);
+}
+
+static x86_pkg_t *
+x86_package_find(int cpu)
+{
+ x86_pkg_t *pkg;
+ cpu_data_t *cpup;
+ uint32_t pkg_num;
+
+ cpup = cpu_datap(cpu);
+
+ pkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
+
+ pkg = x86_pkgs;
+ while (pkg != NULL) {
+ if (pkg->ppkg_num == pkg_num)
+ break;
+ pkg = pkg->next;
+ }
+
+ return(pkg);
+}
+
+static x86_die_t *
+x86_die_find(int cpu)
+{
+ x86_die_t *die;
+ x86_pkg_t *pkg;
+ cpu_data_t *cpup;
+ uint32_t die_num;
+
+ cpup = cpu_datap(cpu);
+
+ die_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+ pkg = x86_package_find(cpu);
+ if (pkg == NULL)
+ return(NULL);
+
+ die = pkg->dies;
+ while (die != NULL) {
+ if (die->pdie_num == die_num)
+ break;
+ die = die->next_in_pkg;
+ }
+
+ return(die);
+}
+
+static x86_core_t *
+x86_core_find(int cpu)
+{
+ x86_core_t *core;
+ x86_die_t *die;
+ cpu_data_t *cpup;
+ uint32_t core_num;
+
+ cpup = cpu_datap(cpu);
+
+ core_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
+
+ die = x86_die_find(cpu);
+ if (die == NULL)
+ return(NULL);
+
+ core = die->cores;
+ while (core != NULL) {
+ if (core->pcore_num == core_num)
+ break;
+ core = core->next_in_die;
+ }
+
+ return(core);
+}
+
+void
+x86_set_logical_topology(x86_lcpu_t *lcpu, int pnum, int lnum)
+{
+ x86_core_t *core = lcpu->core;
+ x86_die_t *die = lcpu->die;
+ x86_pkg_t *pkg = lcpu->package;
+
+ assert(core != NULL);
+ assert(die != NULL);
+ assert(pkg != NULL);
+
+ lcpu->cpu_num = lnum;
+ lcpu->pnum = pnum;
+ lcpu->master = (lnum == master_cpu);
+ lcpu->primary = (lnum % topoParms.nLThreadsPerPackage) == 0;
+
+ lcpu->lnum = lnum % topoParms.nLThreadsPerCore;
+
+ core->pcore_num = lnum / topoParms.nLThreadsPerCore;
+ core->lcore_num = core->pcore_num % topoParms.nLCoresPerDie;
+
+ die->pdie_num = lnum / (topoParms.nLThreadsPerCore*topoParms.nLCoresPerDie);
+ die->ldie_num = die->pdie_num % topoParms.nLDiesPerPackage;
+
+ pkg->ppkg_num = lnum / topoParms.nLThreadsPerPackage;
+ pkg->lpkg_num = pkg->ppkg_num;
+
+}
+
+static x86_die_t *
+x86_die_alloc(int cpu)
+{
+ x86_die_t *die;
+ cpu_data_t *cpup;
+
+ cpup = cpu_datap(cpu);
+
+ simple_lock(&x86_topo_lock);
+ if (free_dies != NULL) {
+ die = free_dies;
+ free_dies = die->next_in_pkg;
+ die->next_in_pkg = NULL;
+ simple_unlock(&x86_topo_lock);
+ } else {
+ simple_unlock(&x86_topo_lock);
+ die = kalloc(sizeof(x86_die_t));
+ if (die == NULL)
+ panic("x86_die_alloc() kalloc of x86_die_t failed!\n");
+ }
+
+ bzero((void *) die, sizeof(x86_die_t));
+
+ die->pdie_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+ die->ldie_num = num_dies;
+ atomic_incl((long *) &num_dies, 1);
+
+ die->flags = X86DIE_FL_PRESENT;
+ return(die);
+}
+
+static void
+x86_die_free(x86_die_t *die)
+{
+ simple_lock(&x86_topo_lock);
+ die->next_in_pkg = free_dies;
+ free_dies = die;
+ atomic_decl((long *) &num_dies, 1);
+ simple_unlock(&x86_topo_lock);
+}
+
+static x86_pkg_t *
+x86_package_alloc(int cpu)
+{
+ x86_pkg_t *pkg;
+ cpu_data_t *cpup;
+
+ cpup = cpu_datap(cpu);
+
+ simple_lock(&x86_topo_lock);
+ if (free_pkgs != NULL) {
+ pkg = free_pkgs;
+ free_pkgs = pkg->next;
+ pkg->next = NULL;
+ simple_unlock(&x86_topo_lock);
+ } else {
+ simple_unlock(&x86_topo_lock);
+ pkg = kalloc(sizeof(x86_pkg_t));
+ if (pkg == NULL)
+ panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
+ }
+
+ bzero((void *) pkg, sizeof(x86_pkg_t));
+
+ pkg->ppkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
+
+ pkg->lpkg_num = topoParms.nPackages;
+ atomic_incl((long *) &topoParms.nPackages, 1);
+
+ pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
+ return(pkg);
+}
+
+static void
+x86_package_free(x86_pkg_t *pkg)
+{
+ simple_lock(&x86_topo_lock);
+ pkg->next = free_pkgs;
+ free_pkgs = pkg;
+ atomic_decl((long *) &topoParms.nPackages, 1);
+ simple_unlock(&x86_topo_lock);
+}
+
+static void
+x86_cache_add_lcpu(x86_cpu_cache_t *cache, x86_lcpu_t *lcpu)
+{
+ x86_cpu_cache_t *cur_cache;
+ int i;
+
+ /*
+ * Put the new CPU into the list of the cache.
+ */
+ cur_cache = lcpu->caches[cache->level - 1];
+ lcpu->caches[cache->level - 1] = cache;
+ cache->next = cur_cache;
+ cache->nlcpus += 1;
+ for (i = 0; i < cache->nlcpus; i += 1) {
+ if (cache->cpus[i] == NULL) {
+ cache->cpus[i] = lcpu;
+ break;
+ }
+ }
+}
+
+static void
+x86_lcpu_add_caches(x86_lcpu_t *lcpu)
+{
+ x86_cpu_cache_t *list;
+ x86_cpu_cache_t *cur;
+ x86_cpu_cache_t *match;
+ x86_die_t *die;
+ x86_core_t *core;
+ x86_lcpu_t *cur_lcpu;
+ uint32_t level;
+ boolean_t found = FALSE;
+
+ assert(lcpu != NULL);
+
+ /*
+ * Add the cache data to the topology.
+ */
+ list = x86_cache_list();
+
+ simple_lock(&x86_topo_lock);
+
+ while (list != NULL) {
+ /*
+ * Remove the cache from the front of the list.
+ */
+ cur = list;
+ list = cur->next;
+ cur->next = NULL;
+ level = cur->level - 1;
+
+ /*
+ * If the cache isn't shared then just put it where it
+ * belongs.
+ */
+ if (cur->maxcpus == 1) {
+ x86_cache_add_lcpu(cur, lcpu);
+ continue;
+ }
+
+ /*
+ * We'll assume that all of the caches at a particular level
+ * have the same sharing. So if we have a cache already at
+ * this level, we'll just skip looking for the match.
+ */
+ if (lcpu->caches[level] != NULL) {
+ x86_cache_free(cur);
+ continue;
+ }
+
+ /*
+ * This is a shared cache, so we have to figure out if
+ * this is the first time we've seen this cache. We do
+ * this by searching through the topology and seeing if
+ * this cache is already described.
+ *
+ * Assume that L{LLC-1} are all at the core level and that
+ * LLC is shared at the die level.
+ */
+ if (level < topoParms.LLCDepth) {
+ /*
+ * Shared at the core.
+ */
+ core = lcpu->core;
+ cur_lcpu = core->lcpus;
+ while (cur_lcpu != NULL) {
+ /*
+ * Skip ourselves.
+ */
+ if (cur_lcpu == lcpu) {
+ cur_lcpu = cur_lcpu->next_in_core;
+ continue;
+ }
+