X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..4d15aeb193b2c68f1d38666c317f8d3734f5f083:/osfmk/i386/cpu_threads.c diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c index ad8867f53..e58a9369e 100644 --- a/osfmk/i386/cpu_threads.c +++ b/osfmk/i386/cpu_threads.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2003-2007 Apple Inc. All rights reserved. + * Copyright (c) 2003-2016 Apple Inc. All rights reserved. * * @APPLE_OSREFERENCE_LICENSE_HEADER_START@ * @@ -27,35 +27,57 @@ */ #include #include +#include #include #include #include #include -#include -#include #include +#include -#define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1)) -#define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l) +#define DIVISOR_GUARD(denom) \ + if ((denom) == 0) { \ + kprintf("%s: %d Zero divisor: " #denom, \ + __FILE__, __LINE__); \ + } -/* - * Kernel parameter determining whether threads are halted unconditionally - * in the idle state. This is the default behavior. - * See machine_idle() for use. - */ -int idlehalt = 1; +static void debug_topology_print(void); + +boolean_t topo_dbg = FALSE; -x86_pkg_t *x86_pkgs = NULL; -uint32_t num_packages = 0; +x86_pkg_t *x86_pkgs = NULL; uint32_t num_Lx_caches[MAX_CACHE_DEPTH] = { 0 }; static x86_pkg_t *free_pkgs = NULL; +static x86_die_t *free_dies = NULL; static x86_core_t *free_cores = NULL; +static uint32_t num_dies = 0; static x86_cpu_cache_t *x86_caches = NULL; static uint32_t num_caches = 0; +static boolean_t topoParmsInited = FALSE; +x86_topology_parameters_t topoParms; + decl_simple_lock_data(, x86_topo_lock); + +static struct cpu_cache { + int level; int type; +} cpu_caches [LCACHE_MAX] = { + [L1D] = { 1, CPU_CACHE_TYPE_DATA }, + [L1I] = { 1, CPU_CACHE_TYPE_INST }, + [L2U] = { 2, CPU_CACHE_TYPE_UNIF }, + [L3U] = { 3, CPU_CACHE_TYPE_UNIF }, +}; + +static boolean_t +cpu_is_hyperthreaded(void) +{ + i386_cpu_info_t *cpuinfo; + + cpuinfo = cpuid_info(); + return(cpuinfo->thread_count > cpuinfo->core_count); +} static x86_cpu_cache_t * x86_cache_alloc(void) @@ -84,6 +106,144 @@ x86_cache_alloc(void) return(cache); } + +static void +x86_LLC_info(void) +{ + int cache_level = 0; + uint32_t nCPUsSharing = 1; + i386_cpu_info_t *cpuinfo; + struct cpu_cache *cachep; + int i; + + cpuinfo = cpuid_info(); + + for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) { + + if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0) + continue; + + /* + * Only worry about it if it's a deeper level than + * what we've seen before. + */ + if (cachep->level > cache_level) { + cache_level = cachep->level; + + /* + * Save the number of CPUs sharing this cache. + */ + nCPUsSharing = cpuinfo->cache_sharing[i]; + } + } + + /* + * Make the level of the LLC be 0 based. + */ + topoParms.LLCDepth = cache_level - 1; + + /* + * nCPUsSharing represents the *maximum* number of cores or + * logical CPUs sharing the cache. + */ + topoParms.maxSharingLLC = nCPUsSharing; + + topoParms.nCoresSharingLLC = nCPUsSharing / (cpuinfo->thread_count / + cpuinfo->core_count); + topoParms.nLCPUsSharingLLC = nCPUsSharing; + + /* + * nCPUsSharing may not be the number of *active* cores or + * threads that are sharing the cache. + */ + if (nCPUsSharing > cpuinfo->core_count) + topoParms.nCoresSharingLLC = cpuinfo->core_count; + if (nCPUsSharing > cpuinfo->thread_count) + topoParms.nLCPUsSharingLLC = cpuinfo->thread_count; +} + +static void +initTopoParms(void) +{ + i386_cpu_info_t *cpuinfo; + + topoParms.stable = FALSE; + + cpuinfo = cpuid_info(); + + PE_parse_boot_argn("-topo", &topo_dbg, sizeof(topo_dbg)); + + /* + * We need to start with getting the LLC information correct. + */ + x86_LLC_info(); + + /* + * Compute the number of threads (logical CPUs) per core. + */ + DIVISOR_GUARD(cpuinfo->core_count); + topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count; + DIVISOR_GUARD(cpuinfo->cpuid_cores_per_package); + topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package; + + /* + * Compute the number of dies per package. + */ + DIVISOR_GUARD(topoParms.nCoresSharingLLC); + topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC; + DIVISOR_GUARD(topoParms.nPThreadsPerCore); + DIVISOR_GUARD(topoParms.maxSharingLLC / topoParms.nPThreadsPerCore); + topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore); + + + /* + * Compute the number of cores per die. + */ + topoParms.nLCoresPerDie = topoParms.nCoresSharingLLC; + topoParms.nPCoresPerDie = (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore); + + /* + * Compute the number of threads per die. + */ + topoParms.nLThreadsPerDie = topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie; + topoParms.nPThreadsPerDie = topoParms.nPThreadsPerCore * topoParms.nPCoresPerDie; + + /* + * Compute the number of cores per package. + */ + topoParms.nLCoresPerPackage = topoParms.nLCoresPerDie * topoParms.nLDiesPerPackage; + topoParms.nPCoresPerPackage = topoParms.nPCoresPerDie * topoParms.nPDiesPerPackage; + + /* + * Compute the number of threads per package. + */ + topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage; + topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage; + + TOPO_DBG("\nCache Topology Parameters:\n"); + TOPO_DBG("\tLLC Depth: %d\n", topoParms.LLCDepth); + TOPO_DBG("\tCores Sharing LLC: %d\n", topoParms.nCoresSharingLLC); + TOPO_DBG("\tThreads Sharing LLC: %d\n", topoParms.nLCPUsSharingLLC); + TOPO_DBG("\tmax Sharing of LLC: %d\n", topoParms.maxSharingLLC); + + TOPO_DBG("\nLogical Topology Parameters:\n"); + TOPO_DBG("\tThreads per Core: %d\n", topoParms.nLThreadsPerCore); + TOPO_DBG("\tCores per Die: %d\n", topoParms.nLCoresPerDie); + TOPO_DBG("\tThreads per Die: %d\n", topoParms.nLThreadsPerDie); + TOPO_DBG("\tDies per Package: %d\n", topoParms.nLDiesPerPackage); + TOPO_DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage); + TOPO_DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage); + + TOPO_DBG("\nPhysical Topology Parameters:\n"); + TOPO_DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore); + TOPO_DBG("\tCores per Die: %d\n", topoParms.nPCoresPerDie); + TOPO_DBG("\tThreads per Die: %d\n", topoParms.nPThreadsPerDie); + TOPO_DBG("\tDies per Package: %d\n", topoParms.nPDiesPerPackage); + TOPO_DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage); + TOPO_DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage); + + topoParmsInited = TRUE; +} static void x86_cache_free(x86_cpu_cache_t *cache) @@ -106,47 +266,29 @@ x86_cache_list(void) x86_cpu_cache_t *root = NULL; x86_cpu_cache_t *cur = NULL; x86_cpu_cache_t *last = NULL; - uint32_t index; - uint32_t cache_info[4]; - uint32_t nsets; - - do_cpuid(0, cache_info); - - if (cache_info[eax] < 4) { - /* - * Processor does not support deterministic - * cache information. Don't report anything - */ - return NULL; - } - - for (index = 0; ; index += 1) { - cache_info[eax] = 4; - cache_info[ecx] = index; - cache_info[ebx] = 0; - cache_info[edx] = 0; - - cpuid(cache_info); + struct cpu_cache *cachep; + int i; - /* - * See if all levels have been queried. - */ - if (bitfield(cache_info[eax], 4, 0) == 0) - break; + /* + * Cons up a list driven not by CPUID leaf 4 (deterministic cache params) + * but by the table above plus parameters already cracked from cpuid... + */ + for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) { + if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0) + continue; + cur = x86_cache_alloc(); - if (cur == NULL) { + if (cur == NULL) break; - } - cur->type = bitfield(cache_info[eax], 4, 0); - cur->level = bitfield(cache_info[eax], 7, 5); - cur->nlcpus = bitfield(cache_info[eax], 25, 14) + 1; - cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1; - cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1; - cur->ways = bitfield(cache_info[ebx], 31, 22) + 1; - nsets = bitfield(cache_info[ecx], 31, 0) + 1; - cur->cache_size = cur->line_size * cur->ways * cur->partitions * nsets; + cur->type = cachep->type; + cur->level = cachep->level; + cur->nlcpus = 0; + cur->maxcpus = cpuid_info()->cache_sharing[i]; + cur->partitions = cpuid_info()->cache_partitions[i]; + cur->cache_size = cpuid_info()->cache_size[i]; + cur->line_size = cpuid_info()->cache_linesize; if (last == NULL) { root = cur; @@ -155,21 +297,31 @@ x86_cache_list(void) last->next = cur; last = cur; } - num_Lx_caches[cur->level - 1] += 1; } - - return(root); + return root; } -static boolean_t -cpu_is_hyperthreaded(void) + +static x86_cpu_cache_t * +x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher) { - if (cpuid_features() & CPUID_FEATURE_HTT) - return (cpuid_info()->cpuid_logical_per_package / - cpuid_info()->cpuid_cores_per_package) > 1; - else - return FALSE; + x86_cpu_cache_t *cur_cache; + + cur_cache = list; + while (cur_cache != NULL) { + if (cur_cache->maxcpus == matcher->maxcpus + && cur_cache->type == matcher->type + && cur_cache->level == matcher->level + && cur_cache->partitions == matcher->partitions + && cur_cache->line_size == matcher->line_size + && cur_cache->cache_size == matcher->cache_size) + break; + + cur_cache = cur_cache->next; + } + + return(cur_cache); } static void @@ -184,17 +336,18 @@ x86_lcpu_init(int cpu) lcpu = &cpup->lcpu; lcpu->lcpu = lcpu; lcpu->cpu = cpup; - lcpu->next = NULL; - lcpu->core = NULL; + lcpu->next_in_core = NULL; + lcpu->next_in_die = NULL; + lcpu->next_in_pkg = NULL; + lcpu->core = NULL; + lcpu->die = NULL; + lcpu->package = NULL; + lcpu->cpu_num = cpu; lcpu->lnum = cpu; lcpu->pnum = cpup->cpu_phys_number; - lcpu->halted = FALSE; /* XXX is this correct? */ - lcpu->idle = FALSE; /* XXX is this correct? */ + lcpu->state = LCPU_OFF; for (i = 0; i < MAX_CACHE_DEPTH; i += 1) lcpu->caches[i] = NULL; - - lcpu->master = (lcpu->pnum == (unsigned int) master_cpu); - lcpu->primary = (lcpu->pnum % cpuid_info()->cpuid_logical_per_package) == 0; } static x86_core_t * @@ -202,16 +355,14 @@ x86_core_alloc(int cpu) { x86_core_t *core; cpu_data_t *cpup; - uint32_t cpu_in_pkg; - uint32_t lcpus_per_core; cpup = cpu_datap(cpu); simple_lock(&x86_topo_lock); if (free_cores != NULL) { core = free_cores; - free_cores = core->next; - core->next = NULL; + free_cores = core->next_in_die; + core->next_in_die = NULL; simple_unlock(&x86_topo_lock); } else { simple_unlock(&x86_topo_lock); @@ -222,14 +373,11 @@ x86_core_alloc(int cpu) bzero((void *) core, sizeof(x86_core_t)); - cpu_in_pkg = cpu % cpuid_info()->cpuid_logical_per_package; - lcpus_per_core = cpuid_info()->cpuid_logical_per_package / - cpuid_info()->cpuid_cores_per_package; + core->pcore_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore; + core->lcore_num = core->pcore_num % topoParms.nPCoresPerPackage; - core->pcore_num = cpup->cpu_phys_number / lcpus_per_core; - core->lcore_num = core->pcore_num % cpuid_info()->cpuid_cores_per_package; - - core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY; + core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY + | X86CORE_FL_HALTED | X86CORE_FL_IDLE; return(core); } @@ -238,7 +386,7 @@ static void x86_core_free(x86_core_t *core) { simple_lock(&x86_topo_lock); - core->next = free_cores; + core->next_in_die = free_cores; free_cores = core; simple_unlock(&x86_topo_lock); } @@ -252,7 +400,7 @@ x86_package_find(int cpu) cpup = cpu_datap(cpu); - pkg_num = cpup->cpu_phys_number / cpuid_info()->cpuid_logical_per_package; + pkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage; pkg = x86_pkgs; while (pkg != NULL) { @@ -263,48 +411,205 @@ x86_package_find(int cpu) return(pkg); } + +static x86_die_t * +x86_die_find(int cpu) +{ + x86_die_t *die; + x86_pkg_t *pkg; + cpu_data_t *cpup; + uint32_t die_num; + + cpup = cpu_datap(cpu); + + die_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie; + + pkg = x86_package_find(cpu); + if (pkg == NULL) + return(NULL); + + die = pkg->dies; + while (die != NULL) { + if (die->pdie_num == die_num) + break; + die = die->next_in_pkg; + } + + return(die); +} static x86_core_t * x86_core_find(int cpu) { x86_core_t *core; - x86_pkg_t *pkg; + x86_die_t *die; cpu_data_t *cpup; uint32_t core_num; cpup = cpu_datap(cpu); - core_num = cpup->cpu_phys_number - / (cpuid_info()->cpuid_logical_per_package - / cpuid_info()->cpuid_cores_per_package); + core_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore; - pkg = x86_package_find(cpu); - if (pkg == NULL) + die = x86_die_find(cpu); + if (die == NULL) return(NULL); - core = pkg->cores; + core = die->cores; while (core != NULL) { if (core->pcore_num == core_num) break; - core = core->next; + core = core->next_in_die; } return(core); } + +void +x86_set_logical_topology(x86_lcpu_t *lcpu, int pnum, int lnum) +{ + x86_core_t *core = lcpu->core; + x86_die_t *die = lcpu->die; + x86_pkg_t *pkg = lcpu->package; + + assert(core != NULL); + assert(die != NULL); + assert(pkg != NULL); + + lcpu->cpu_num = lnum; + lcpu->pnum = pnum; + lcpu->master = (lnum == master_cpu); + lcpu->primary = (lnum % topoParms.nLThreadsPerPackage) == 0; + + lcpu->lnum = lnum % topoParms.nLThreadsPerCore; + + core->pcore_num = lnum / topoParms.nLThreadsPerCore; + core->lcore_num = core->pcore_num % topoParms.nLCoresPerDie; + + die->pdie_num = lnum / (topoParms.nLThreadsPerCore*topoParms.nLCoresPerDie); + die->ldie_num = die->pdie_num % topoParms.nLDiesPerPackage; + + pkg->ppkg_num = lnum / topoParms.nLThreadsPerPackage; + pkg->lpkg_num = pkg->ppkg_num; + +} + +static x86_die_t * +x86_die_alloc(int cpu) +{ + x86_die_t *die; + cpu_data_t *cpup; + + cpup = cpu_datap(cpu); + + simple_lock(&x86_topo_lock); + if (free_dies != NULL) { + die = free_dies; + free_dies = die->next_in_pkg; + die->next_in_pkg = NULL; + simple_unlock(&x86_topo_lock); + } else { + simple_unlock(&x86_topo_lock); + die = kalloc(sizeof(x86_die_t)); + if (die == NULL) + panic("x86_die_alloc() kalloc of x86_die_t failed!\n"); + } + + bzero((void *) die, sizeof(x86_die_t)); + + die->pdie_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie; + + die->ldie_num = num_dies; + atomic_incl((long *) &num_dies, 1); + + die->flags = X86DIE_FL_PRESENT; + return(die); +} static void -x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu) +x86_die_free(x86_die_t *die) +{ + simple_lock(&x86_topo_lock); + die->next_in_pkg = free_dies; + free_dies = die; + atomic_decl((long *) &num_dies, 1); + simple_unlock(&x86_topo_lock); +} + +static x86_pkg_t * +x86_package_alloc(int cpu) +{ + x86_pkg_t *pkg; + cpu_data_t *cpup; + + cpup = cpu_datap(cpu); + + simple_lock(&x86_topo_lock); + if (free_pkgs != NULL) { + pkg = free_pkgs; + free_pkgs = pkg->next; + pkg->next = NULL; + simple_unlock(&x86_topo_lock); + } else { + simple_unlock(&x86_topo_lock); + pkg = kalloc(sizeof(x86_pkg_t)); + if (pkg == NULL) + panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n"); + } + + bzero((void *) pkg, sizeof(x86_pkg_t)); + + pkg->ppkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage; + + pkg->lpkg_num = topoParms.nPackages; + atomic_incl((long *) &topoParms.nPackages, 1); + + pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY; + return(pkg); +} + +static void +x86_package_free(x86_pkg_t *pkg) +{ + simple_lock(&x86_topo_lock); + pkg->next = free_pkgs; + free_pkgs = pkg; + atomic_decl((long *) &topoParms.nPackages, 1); + simple_unlock(&x86_topo_lock); +} + +static void +x86_cache_add_lcpu(x86_cpu_cache_t *cache, x86_lcpu_t *lcpu) +{ + x86_cpu_cache_t *cur_cache; + int i; + + /* + * Put the new CPU into the list of the cache. + */ + cur_cache = lcpu->caches[cache->level - 1]; + lcpu->caches[cache->level - 1] = cache; + cache->next = cur_cache; + cache->nlcpus += 1; + for (i = 0; i < cache->nlcpus; i += 1) { + if (cache->cpus[i] == NULL) { + cache->cpus[i] = lcpu; + break; + } + } +} + +static void +x86_lcpu_add_caches(x86_lcpu_t *lcpu) { x86_cpu_cache_t *list; x86_cpu_cache_t *cur; - x86_core_t *cur_core; + x86_cpu_cache_t *match; + x86_die_t *die; + x86_core_t *core; x86_lcpu_t *cur_lcpu; - boolean_t found; - int level; - int i; - uint32_t cpu_mask; + uint32_t level; + boolean_t found = FALSE; - assert(core != NULL); assert(lcpu != NULL); /* @@ -327,8 +632,9 @@ x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu) * If the cache isn't shared then just put it where it * belongs. */ - if (cur->nlcpus == 1) { - goto found_first; + if (cur->maxcpus == 1) { + x86_cache_add_lcpu(cur, lcpu); + continue; } /* @@ -344,101 +650,131 @@ x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu) /* * This is a shared cache, so we have to figure out if * this is the first time we've seen this cache. We do - * this by searching through the package and seeing if - * a related core is already describing this cache. + * this by searching through the topology and seeing if + * this cache is already described. * - * NOTE: This assumes that CPUs whose ID mod <# sharing cache> - * are indeed sharing the cache. + * Assume that L{LLC-1} are all at the core level and that + * LLC is shared at the die level. */ - cpu_mask = lcpu->pnum & ~(cur->nlcpus - 1); - cur_core = core->package->cores; - found = FALSE; - - while (cur_core != NULL && !found) { - cur_lcpu = cur_core->lcpus; - while (cur_lcpu != NULL && !found) { - if ((cur_lcpu->pnum & ~(cur->nlcpus - 1)) == cpu_mask) { - lcpu->caches[level] = cur_lcpu->caches[level]; - found = TRUE; - x86_cache_free(cur); + if (level < topoParms.LLCDepth) { + /* + * Shared at the core. + */ + core = lcpu->core; + cur_lcpu = core->lcpus; + while (cur_lcpu != NULL) { + /* + * Skip ourselves. + */ + if (cur_lcpu == lcpu) { + cur_lcpu = cur_lcpu->next_in_core; + continue; + } - /* - * Put the new CPU into the list of the cache. - */ - cur = lcpu->caches[level]; - for (i = 0; i < cur->nlcpus; i += 1) { - if (cur->cpus[i] == NULL) { - cur->cpus[i] = lcpu; - break; - } - } + /* + * If there's a cache on this logical CPU, + * then use that one. + */ + match = x86_match_cache(cur_lcpu->caches[level], cur); + if (match != NULL) { + x86_cache_free(cur); + x86_cache_add_lcpu(match, lcpu); + found = TRUE; + break; } - cur_lcpu = cur_lcpu->next; + + cur_lcpu = cur_lcpu->next_in_core; } + } else { + /* + * Shared at the die. + */ + die = lcpu->die; + cur_lcpu = die->lcpus; + while (cur_lcpu != NULL) { + /* + * Skip ourselves. + */ + if (cur_lcpu == lcpu) { + cur_lcpu = cur_lcpu->next_in_die; + continue; + } + + /* + * If there's a cache on this logical CPU, + * then use that one. + */ + match = x86_match_cache(cur_lcpu->caches[level], cur); + if (match != NULL) { + x86_cache_free(cur); + x86_cache_add_lcpu(match, lcpu); + found = TRUE; + break; + } - cur_core = cur_core->next; + cur_lcpu = cur_lcpu->next_in_die; + } } + /* + * If a shared cache wasn't found, then this logical CPU must + * be the first one encountered. + */ if (!found) { -found_first: - cur->next = lcpu->caches[level]; - lcpu->caches[level] = cur; - cur->cpus[0] = lcpu; + x86_cache_add_lcpu(cur, lcpu); } } - /* - * Add the Logical CPU to the core. - */ - lcpu->next = core->lcpus; - lcpu->core = core; - core->lcpus = lcpu; - core->num_lcpus += 1; - simple_unlock(&x86_topo_lock); } -static x86_pkg_t * -x86_package_alloc(int cpu) +static void +x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu) { - x86_pkg_t *pkg; - cpu_data_t *cpup; - - cpup = cpu_datap(cpu); + assert(core != NULL); + assert(lcpu != NULL); simple_lock(&x86_topo_lock); - if (free_pkgs != NULL) { - pkg = free_pkgs; - free_pkgs = pkg->next; - pkg->next = NULL; - simple_unlock(&x86_topo_lock); - } else { - simple_unlock(&x86_topo_lock); - pkg = kalloc(sizeof(x86_pkg_t)); - if (pkg == NULL) - panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n"); - } - bzero((void *) pkg, sizeof(x86_pkg_t)); + lcpu->next_in_core = core->lcpus; + lcpu->core = core; + core->lcpus = lcpu; + core->num_lcpus += 1; + simple_unlock(&x86_topo_lock); +} - pkg->ppkg_num = cpup->cpu_phys_number - / cpuid_info()->cpuid_logical_per_package; +static void +x86_die_add_lcpu(x86_die_t *die, x86_lcpu_t *lcpu) +{ + assert(die != NULL); + assert(lcpu != NULL); + + lcpu->next_in_die = die->lcpus; + lcpu->die = die; + die->lcpus = lcpu; +} - pkg->lpkg_num = num_packages; - atomic_incl((long *) &num_packages, 1); +static void +x86_die_add_core(x86_die_t *die, x86_core_t *core) +{ + assert(die != NULL); + assert(core != NULL); - pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY; - return(pkg); + core->next_in_die = die->cores; + core->die = die; + die->cores = core; + die->num_cores += 1; } -static void -x86_package_free(x86_pkg_t *pkg) + static void +x86_package_add_lcpu(x86_pkg_t *pkg, x86_lcpu_t *lcpu) { - simple_lock(&x86_topo_lock); - pkg->next = free_pkgs; - free_pkgs = pkg; - atomic_decl((long *) &num_packages, 1); - simple_unlock(&x86_topo_lock); + assert(pkg != NULL); + assert(lcpu != NULL); + + lcpu->next_in_pkg = pkg->lcpus; + lcpu->package = pkg; + pkg->lcpus = lcpu; } static void @@ -447,20 +783,43 @@ x86_package_add_core(x86_pkg_t *pkg, x86_core_t *core) assert(pkg != NULL); assert(core != NULL); - core->next = pkg->cores; + core->next_in_pkg = pkg->cores; core->package = pkg; pkg->cores = core; - pkg->num_cores += 1; +} + +static void +x86_package_add_die(x86_pkg_t *pkg, x86_die_t *die) +{ + assert(pkg != NULL); + assert(die != NULL); + + die->next_in_pkg = pkg->dies; + die->package = pkg; + pkg->dies = die; + pkg->num_dies += 1; } void * cpu_thread_alloc(int cpu) { - x86_core_t *core; - x86_pkg_t *pkg; + x86_core_t *core = NULL; + x86_die_t *die = NULL; + x86_pkg_t *pkg = NULL; cpu_data_t *cpup; uint32_t phys_cpu; + /* + * Only allow one to manipulate the topology at a time. + */ + simple_lock(&x86_topo_lock); + + /* + * Make sure all of the topology parameters have been initialized. + */ + if (!topoParmsInited) + initTopoParms(); + cpup = cpu_datap(cpu); phys_cpu = cpup->cpu_phys_number; @@ -477,22 +836,9 @@ cpu_thread_alloc(int cpu) } /* - * Only allow one to manipulate the topology at a time. - */ - simple_lock(&x86_topo_lock); - - /* - * Get the core for this logical CPU. + * Get the package that the logical CPU is in. */ - core_again: - core = x86_core_find(cpu); - if (core == NULL) { - /* - * Core structure hasn't been created yet, do it now. - * - * Get the package that the core is part of. - */ - package_again: + do { pkg = x86_package_find(cpu); if (pkg == NULL) { /* @@ -503,7 +849,7 @@ cpu_thread_alloc(int cpu) simple_lock(&x86_topo_lock); if (x86_package_find(cpu) != NULL) { x86_package_free(pkg); - goto package_again; + continue; } /* @@ -512,31 +858,58 @@ cpu_thread_alloc(int cpu) pkg->next = x86_pkgs; x86_pkgs = pkg; } + } while (pkg == NULL); - /* - * Allocate the core structure now. - */ - simple_unlock(&x86_topo_lock); - core = x86_core_alloc(cpu); - simple_lock(&x86_topo_lock); - if (x86_core_find(cpu) != NULL) { - x86_core_free(core); - goto core_again; + /* + * Get the die that the logical CPU is in. + */ + do { + die = x86_die_find(cpu); + if (die == NULL) { + /* + * Die structure hasn't been created yet, do it now. + */ + simple_unlock(&x86_topo_lock); + die = x86_die_alloc(cpu); + simple_lock(&x86_topo_lock); + if (x86_die_find(cpu) != NULL) { + x86_die_free(die); + continue; + } + + /* + * Add the die to the package. + */ + x86_package_add_die(pkg, die); } + } while (die == NULL); - /* - * Add it to the package. - */ - x86_package_add_core(pkg, core); - machine_info.physical_cpu_max += 1; + /* + * Get the core for this logical CPU. + */ + do { + core = x86_core_find(cpu); + if (core == NULL) { + /* + * Allocate the core structure now. + */ + simple_unlock(&x86_topo_lock); + core = x86_core_alloc(cpu); + simple_lock(&x86_topo_lock); + if (x86_core_find(cpu) != NULL) { + x86_core_free(core); + continue; + } + + /* + * Add the core to the die & package. + */ + x86_die_add_core(die, core); + x86_package_add_core(pkg, core); + machine_info.physical_cpu_max += 1; + } + } while (core == NULL); - /* - * Allocate performance counter structure. - */ - simple_unlock(&x86_topo_lock); - core->pmc = pmc_alloc(); - simple_lock(&x86_topo_lock); - } /* * Done manipulating the topology, so others can get in. @@ -544,7 +917,13 @@ cpu_thread_alloc(int cpu) machine_info.logical_cpu_max += 1; simple_unlock(&x86_topo_lock); + /* + * Add the logical CPU to the other topology structures. + */ x86_core_add_lcpu(core, &cpup->lcpu); + x86_die_add_lcpu(core->die, &cpup->lcpu); + x86_package_add_lcpu(core->package, &cpup->lcpu); + x86_lcpu_add_caches(&cpup->lcpu); return (void *) core; } @@ -552,10 +931,10 @@ cpu_thread_alloc(int cpu) void cpu_thread_init(void) { - int my_cpu = get_cpu_number(); - cpu_data_t *cpup = current_cpu_datap(); + int my_cpu = get_cpu_number(); + cpu_data_t *cpup = current_cpu_datap(); x86_core_t *core; - static int initialized = 0; + static int initialized = 0; /* * If we're the boot processor, we do all of the initialization of @@ -581,18 +960,17 @@ cpu_thread_init(void) if (core->active_lcpus == 0) machine_info.physical_cpu += 1; core->active_lcpus += 1; - cpup->lcpu.halted = FALSE; - cpup->lcpu.idle = FALSE; simple_unlock(&x86_topo_lock); pmCPUMarkRunning(cpup); - etimer_resync_deadlines(); + timer_resync_deadlines(); } /* * Called for a cpu to halt permanently * (as opposed to halting and expecting an interrupt to awaken it). */ +__attribute__((noreturn)) void cpu_thread_halt(void) { @@ -601,7 +979,6 @@ cpu_thread_halt(void) simple_lock(&x86_topo_lock); machine_info.logical_cpu -= 1; - cpup->lcpu.idle = TRUE; core = cpup->lcpu.core; core->active_lcpus -= 1; if (core->active_lcpus == 0) @@ -618,3 +995,237 @@ cpu_thread_halt(void) } /* NOT REACHED */ } + +/* + * Validates that the topology was built correctly. Must be called only + * after the complete topology is built and no other changes are being made. + */ +void +x86_validate_topology(void) +{ + x86_pkg_t *pkg; + x86_die_t *die; + x86_core_t *core; + x86_lcpu_t *lcpu; + uint32_t nDies; + uint32_t nCores; + uint32_t nCPUs; + + if (topo_dbg) + debug_topology_print(); + + /* + * XXX + * + * Right now this only works if the number of CPUs started is the total + * number of CPUs. However, when specifying cpus=n the topology is only + * partially constructed and the checks below will fail. + * + * We should *always* build the complete topology and only start the CPUs + * indicated by cpus=n. Until that happens, this code will not check the + * topology if the number of cpus defined is < that described the the + * topology parameters. + */ + nCPUs = topoParms.nPackages * topoParms.nLThreadsPerPackage; + if (nCPUs > real_ncpus) + return; + + pkg = x86_pkgs; + while (pkg != NULL) { + /* + * Make sure that the package has the correct number of dies. + */ + nDies = 0; + die = pkg->dies; + while (die != NULL) { + if (die->package == NULL) + panic("Die(%d)->package is NULL", + die->pdie_num); + if (die->package != pkg) + panic("Die %d points to package %d, should be %d", + die->pdie_num, die->package->lpkg_num, pkg->lpkg_num); + + TOPO_DBG("Die(%d)->package %d\n", + die->pdie_num, pkg->lpkg_num); + + /* + * Make sure that the die has the correct number of cores. + */ + TOPO_DBG("Die(%d)->cores: ", die->pdie_num); + nCores = 0; + core = die->cores; + while (core != NULL) { + if (core->die == NULL) + panic("Core(%d)->die is NULL", + core->pcore_num); + if (core->die != die) + panic("Core %d points to die %d, should be %d", + core->pcore_num, core->die->pdie_num, die->pdie_num); + nCores += 1; + TOPO_DBG("%d ", core->pcore_num); + core = core->next_in_die; + } + TOPO_DBG("\n"); + + if (nCores != topoParms.nLCoresPerDie) + panic("Should have %d Cores, but only found %d for Die %d", + topoParms.nLCoresPerDie, nCores, die->pdie_num); + + /* + * Make sure that the die has the correct number of CPUs. + */ + TOPO_DBG("Die(%d)->lcpus: ", die->pdie_num); + nCPUs = 0; + lcpu = die->lcpus; + while (lcpu != NULL) { + if (lcpu->die == NULL) + panic("CPU(%d)->die is NULL", + lcpu->cpu_num); + if (lcpu->die != die) + panic("CPU %d points to die %d, should be %d", + lcpu->cpu_num, lcpu->die->pdie_num, die->pdie_num); + nCPUs += 1; + TOPO_DBG("%d ", lcpu->cpu_num); + lcpu = lcpu->next_in_die; + } + TOPO_DBG("\n"); + + if (nCPUs != topoParms.nLThreadsPerDie) + panic("Should have %d Threads, but only found %d for Die %d", + topoParms.nLThreadsPerDie, nCPUs, die->pdie_num); + + nDies += 1; + die = die->next_in_pkg; + } + + if (nDies != topoParms.nLDiesPerPackage) + panic("Should have %d Dies, but only found %d for package %d", + topoParms.nLDiesPerPackage, nDies, pkg->lpkg_num); + + /* + * Make sure that the package has the correct number of cores. + */ + nCores = 0; + core = pkg->cores; + while (core != NULL) { + if (core->package == NULL) + panic("Core(%d)->package is NULL", + core->pcore_num); + if (core->package != pkg) + panic("Core %d points to package %d, should be %d", + core->pcore_num, core->package->lpkg_num, pkg->lpkg_num); + TOPO_DBG("Core(%d)->package %d\n", + core->pcore_num, pkg->lpkg_num); + + /* + * Make sure that the core has the correct number of CPUs. + */ + nCPUs = 0; + lcpu = core->lcpus; + TOPO_DBG("Core(%d)->lcpus: ", core->pcore_num); + while (lcpu != NULL) { + if (lcpu->core == NULL) + panic("CPU(%d)->core is NULL", + lcpu->cpu_num); + if (lcpu->core != core) + panic("CPU %d points to core %d, should be %d", + lcpu->cpu_num, lcpu->core->pcore_num, core->pcore_num); + TOPO_DBG("%d ", lcpu->cpu_num); + nCPUs += 1; + lcpu = lcpu->next_in_core; + } + TOPO_DBG("\n"); + + if (nCPUs != topoParms.nLThreadsPerCore) + panic("Should have %d Threads, but only found %d for Core %d", + topoParms.nLThreadsPerCore, nCPUs, core->pcore_num); + nCores += 1; + core = core->next_in_pkg; + } + + if (nCores != topoParms.nLCoresPerPackage) + panic("Should have %d Cores, but only found %d for package %d", + topoParms.nLCoresPerPackage, nCores, pkg->lpkg_num); + + /* + * Make sure that the package has the correct number of CPUs. + */ + nCPUs = 0; + lcpu = pkg->lcpus; + while (lcpu != NULL) { + if (lcpu->package == NULL) + panic("CPU(%d)->package is NULL", + lcpu->cpu_num); + if (lcpu->package != pkg) + panic("CPU %d points to package %d, should be %d", + lcpu->cpu_num, lcpu->package->lpkg_num, pkg->lpkg_num); + TOPO_DBG("CPU(%d)->package %d\n", + lcpu->cpu_num, pkg->lpkg_num); + nCPUs += 1; + lcpu = lcpu->next_in_pkg; + } + + if (nCPUs != topoParms.nLThreadsPerPackage) + panic("Should have %d Threads, but only found %d for package %d", + topoParms.nLThreadsPerPackage, nCPUs, pkg->lpkg_num); + + pkg = pkg->next; + } +} + +/* + * Prints out the topology + */ +static void +debug_topology_print(void) +{ + x86_pkg_t *pkg; + x86_die_t *die; + x86_core_t *core; + x86_lcpu_t *cpu; + + pkg = x86_pkgs; + while (pkg != NULL) { + kprintf("Package:\n"); + kprintf(" Physical: %d\n", pkg->ppkg_num); + kprintf(" Logical: %d\n", pkg->lpkg_num); + + die = pkg->dies; + while (die != NULL) { + kprintf(" Die:\n"); + kprintf(" Physical: %d\n", die->pdie_num); + kprintf(" Logical: %d\n", die->ldie_num); + + core = die->cores; + while (core != NULL) { + kprintf(" Core:\n"); + kprintf(" Physical: %d\n", core->pcore_num); + kprintf(" Logical: %d\n", core->lcore_num); + + cpu = core->lcpus; + while (cpu != NULL) { + kprintf(" LCPU:\n"); + kprintf(" CPU #: %d\n", cpu->cpu_num); + kprintf(" Physical: %d\n", cpu->pnum); + kprintf(" Logical: %d\n", cpu->lnum); + kprintf(" Flags: "); + if (cpu->master) + kprintf("MASTER "); + if (cpu->primary) + kprintf("PRIMARY"); + if (!cpu->master && !cpu->primary) + kprintf("(NONE)"); + kprintf("\n"); + + cpu = cpu->next_in_core; + } + + core = core->next_in_die; + } + + die = die->next_in_pkg; + } + + pkg = pkg->next; + } +}