X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/2d21ac55c334faf3a56e5634905ed6987fc787d4..4d15aeb193b2c68f1d38666c317f8d3734f5f083:/osfmk/i386/cpu_threads.c

diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c
index ad8867f53..e58a9369e 100644
--- a/osfmk/i386/cpu_threads.c
+++ b/osfmk/i386/cpu_threads.c
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2016 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -27,35 +27,57 @@
  */
 #include <vm/vm_kern.h>
 #include <kern/kalloc.h>
+#include <kern/timer_queue.h>
 #include <mach/machine.h>
 #include <i386/cpu_threads.h>
 #include <i386/cpuid.h>
 #include <i386/machine_cpu.h>
-#include <i386/lock.h>
-#include <i386/perfmon.h>
 #include <i386/pmCPU.h>
+#include <i386/bit_routines.h>
 
-#define bitmask(h,l)	((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
-#define bitfield(x,h,l)	(((x) & bitmask(h,l)) >> l)
+#define DIVISOR_GUARD(denom)				\
+	if ((denom) == 0) {				\
+		kprintf("%s: %d Zero divisor: " #denom,	\
+			__FILE__, __LINE__);		\
+	}
 
-/*
- * Kernel parameter determining whether threads are halted unconditionally
- * in the idle state.  This is the default behavior.
- * See machine_idle() for use.
- */
-int idlehalt = 1;
+static void debug_topology_print(void);
+
+boolean_t	topo_dbg = FALSE;
 
-x86_pkg_t	*x86_pkgs	= NULL;
-uint32_t	num_packages	= 0;
+x86_pkg_t	*x86_pkgs		= NULL;
 uint32_t	num_Lx_caches[MAX_CACHE_DEPTH]	= { 0 };
 
 static x86_pkg_t	*free_pkgs	= NULL;
+static x86_die_t	*free_dies	= NULL;
 static x86_core_t	*free_cores	= NULL;
+static uint32_t		num_dies	= 0;
 
 static x86_cpu_cache_t	*x86_caches	= NULL;
 static uint32_t		num_caches	= 0;
 
+static boolean_t	topoParmsInited	= FALSE;
+x86_topology_parameters_t	topoParms;
+
 decl_simple_lock_data(, x86_topo_lock);
+ 
+static struct cpu_cache {
+	int	level;	int	type;
+} cpu_caches [LCACHE_MAX] = {
+	[L1D] = {	1,	CPU_CACHE_TYPE_DATA },
+	[L1I] = {	1,	CPU_CACHE_TYPE_INST },
+	[L2U] = { 2,	CPU_CACHE_TYPE_UNIF },
+	[L3U] = { 3,	CPU_CACHE_TYPE_UNIF },
+};
+
+static boolean_t
+cpu_is_hyperthreaded(void)
+{
+    i386_cpu_info_t	*cpuinfo;
+
+    cpuinfo = cpuid_info();
+    return(cpuinfo->thread_count > cpuinfo->core_count);
+}
 
 static x86_cpu_cache_t *
 x86_cache_alloc(void)
@@ -84,6 +106,144 @@ x86_cache_alloc(void)
 
     return(cache);
 }
+ 
+static void
+x86_LLC_info(void)
+{
+    int			cache_level	= 0;
+    uint32_t		nCPUsSharing	= 1;
+    i386_cpu_info_t	*cpuinfo;
+    struct cpu_cache	*cachep;
+    int			i;
+
+    cpuinfo = cpuid_info();
+
+    for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) {
+
+	if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0)
+	    continue;
+
+	/*
+	 * Only worry about it if it's a deeper level than
+	 * what we've seen before.
+	 */
+	if (cachep->level > cache_level) {
+	    cache_level = cachep->level;
+
+	    /*
+	     * Save the number of CPUs sharing this cache.
+	     */
+	    nCPUsSharing = cpuinfo->cache_sharing[i];
+	}
+    }
+
+    /*
+     * Make the level of the LLC be 0 based.
+     */
+    topoParms.LLCDepth = cache_level - 1;
+
+    /*
+     * nCPUsSharing represents the *maximum* number of cores or
+     * logical CPUs sharing the cache.
+     */
+    topoParms.maxSharingLLC = nCPUsSharing;
+
+    topoParms.nCoresSharingLLC = nCPUsSharing / (cpuinfo->thread_count /
+						 cpuinfo->core_count);
+    topoParms.nLCPUsSharingLLC = nCPUsSharing;
+
+    /*
+     * nCPUsSharing may not be the number of *active* cores or
+     * threads that are sharing the cache.
+     */
+    if (nCPUsSharing > cpuinfo->core_count)
+	topoParms.nCoresSharingLLC = cpuinfo->core_count;
+    if (nCPUsSharing > cpuinfo->thread_count)
+	topoParms.nLCPUsSharingLLC = cpuinfo->thread_count;
+}
+
+static void
+initTopoParms(void)
+{
+    i386_cpu_info_t	*cpuinfo;
+
+    topoParms.stable = FALSE;
+
+    cpuinfo = cpuid_info();
+
+    PE_parse_boot_argn("-topo", &topo_dbg, sizeof(topo_dbg));
+
+    /*
+     * We need to start with getting the LLC information correct.
+     */
+    x86_LLC_info();
+
+    /*
+     * Compute the number of threads (logical CPUs) per core.
+     */
+    DIVISOR_GUARD(cpuinfo->core_count);
+    topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count;
+    DIVISOR_GUARD(cpuinfo->cpuid_cores_per_package);
+    topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package;
+
+    /*
+     * Compute the number of dies per package.
+     */
+     DIVISOR_GUARD(topoParms.nCoresSharingLLC);
+    topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC;
+    DIVISOR_GUARD(topoParms.nPThreadsPerCore);
+    DIVISOR_GUARD(topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+    topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+
+    /*
+     * Compute the number of cores per die.
+     */
+    topoParms.nLCoresPerDie = topoParms.nCoresSharingLLC;
+    topoParms.nPCoresPerDie = (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+    /*
+     * Compute the number of threads per die.
+     */
+    topoParms.nLThreadsPerDie = topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie;
+    topoParms.nPThreadsPerDie = topoParms.nPThreadsPerCore * topoParms.nPCoresPerDie;
+
+    /*
+     * Compute the number of cores per package.
+     */
+    topoParms.nLCoresPerPackage = topoParms.nLCoresPerDie * topoParms.nLDiesPerPackage;
+    topoParms.nPCoresPerPackage = topoParms.nPCoresPerDie * topoParms.nPDiesPerPackage;
+
+    /*
+     * Compute the number of threads per package.
+     */
+    topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage;
+    topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage;
+
+    TOPO_DBG("\nCache Topology Parameters:\n");
+    TOPO_DBG("\tLLC Depth:           %d\n", topoParms.LLCDepth);
+    TOPO_DBG("\tCores Sharing LLC:   %d\n", topoParms.nCoresSharingLLC);
+    TOPO_DBG("\tThreads Sharing LLC: %d\n", topoParms.nLCPUsSharingLLC);
+    TOPO_DBG("\tmax Sharing of LLC:  %d\n", topoParms.maxSharingLLC);
+
+    TOPO_DBG("\nLogical Topology Parameters:\n");
+    TOPO_DBG("\tThreads per Core:  %d\n", topoParms.nLThreadsPerCore);
+    TOPO_DBG("\tCores per Die:     %d\n", topoParms.nLCoresPerDie);
+    TOPO_DBG("\tThreads per Die:   %d\n", topoParms.nLThreadsPerDie);
+    TOPO_DBG("\tDies per Package:  %d\n", topoParms.nLDiesPerPackage);
+    TOPO_DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage);
+    TOPO_DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage);
+
+    TOPO_DBG("\nPhysical Topology Parameters:\n");
+    TOPO_DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore);
+    TOPO_DBG("\tCores per Die:     %d\n", topoParms.nPCoresPerDie);
+    TOPO_DBG("\tThreads per Die:   %d\n", topoParms.nPThreadsPerDie);
+    TOPO_DBG("\tDies per Package:  %d\n", topoParms.nPDiesPerPackage);
+    TOPO_DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage);
+    TOPO_DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage);
+
+    topoParmsInited = TRUE;
+}
 
 static void
 x86_cache_free(x86_cpu_cache_t *cache)
@@ -106,47 +266,29 @@ x86_cache_list(void)
     x86_cpu_cache_t	*root	= NULL;
     x86_cpu_cache_t	*cur	= NULL;
     x86_cpu_cache_t	*last	= NULL;
-    uint32_t		index;
-    uint32_t		cache_info[4];
-    uint32_t		nsets;
-
-    do_cpuid(0, cache_info);
-
-    if (cache_info[eax] < 4) {
-	/*
-	 * Processor does not support deterministic
-	 * cache information. Don't report anything
-	 */
-	return NULL;
-    }
-
-    for (index = 0; ; index += 1) {
-	cache_info[eax] = 4;
-	cache_info[ecx] = index;
-	cache_info[ebx] = 0;
-	cache_info[edx] = 0;
-
-	cpuid(cache_info);
+    struct cpu_cache	*cachep;
+    int			i;
 
-	/*
-	 * See if all levels have been queried.
-	 */
-	if (bitfield(cache_info[eax], 4, 0) == 0)
-	    break;
+    /*
+     * Cons up a list driven not by CPUID leaf 4 (deterministic cache params)
+     * but by the table above plus parameters already cracked from cpuid...
+     */
+    for (i = 0, cachep = &cpu_caches[0]; i < LCACHE_MAX; i++, cachep++) {
 
+	if (cachep->type == 0 || cpuid_info()->cache_size[i] == 0)
+	    continue;
+	
 	cur = x86_cache_alloc();
-	if (cur == NULL) {
+	if (cur == NULL)
 	    break;
-	}
 
-	cur->type = bitfield(cache_info[eax], 4, 0);
-	cur->level = bitfield(cache_info[eax], 7, 5);
-	cur->nlcpus = bitfield(cache_info[eax], 25, 14) + 1;
-	cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1;
-	cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1;
-	cur->ways = bitfield(cache_info[ebx], 31, 22) + 1;
-	nsets = bitfield(cache_info[ecx], 31, 0) + 1;
-	cur->cache_size = cur->line_size * cur->ways * cur->partitions * nsets;
+	cur->type       = cachep->type;
+	cur->level      = cachep->level;
+	cur->nlcpus     = 0;
+	cur->maxcpus    = cpuid_info()->cache_sharing[i];
+	cur->partitions = cpuid_info()->cache_partitions[i];
+	cur->cache_size = cpuid_info()->cache_size[i];
+	cur->line_size  = cpuid_info()->cache_linesize;
 
 	if (last == NULL) {
 	    root = cur;
@@ -155,21 +297,31 @@ x86_cache_list(void)
 	    last->next = cur;
 	    last = cur;
 	}
-
 	num_Lx_caches[cur->level - 1] += 1;
     }
-
-    return(root);
+    return root;
 }
 
-static boolean_t
-cpu_is_hyperthreaded(void)
+
+static x86_cpu_cache_t *
+x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher)
 {
-    if  (cpuid_features() & CPUID_FEATURE_HTT)
-	return (cpuid_info()->cpuid_logical_per_package /
-		cpuid_info()->cpuid_cores_per_package) > 1;
-    else
-	return FALSE;
+    x86_cpu_cache_t	*cur_cache;
+ 
+    cur_cache = list;
+    while (cur_cache != NULL) {
+	if (cur_cache->maxcpus  == matcher->maxcpus
+	    && cur_cache->type  == matcher->type
+	    && cur_cache->level == matcher->level
+	    && cur_cache->partitions == matcher->partitions
+	    && cur_cache->line_size  == matcher->line_size
+	    && cur_cache->cache_size == matcher->cache_size)
+	    break;
+
+	cur_cache = cur_cache->next;
+    }
+
+    return(cur_cache);
 }
 
 static void
@@ -184,17 +336,18 @@ x86_lcpu_init(int cpu)
     lcpu = &cpup->lcpu;
     lcpu->lcpu = lcpu;
     lcpu->cpu  = cpup;
-    lcpu->next = NULL;
-    lcpu->core = NULL;
+    lcpu->next_in_core = NULL;
+    lcpu->next_in_die  = NULL;
+    lcpu->next_in_pkg  = NULL;
+    lcpu->core         = NULL;
+    lcpu->die          = NULL;
+    lcpu->package      = NULL;
+    lcpu->cpu_num = cpu;
     lcpu->lnum = cpu;
     lcpu->pnum = cpup->cpu_phys_number;
-    lcpu->halted = FALSE;	/* XXX is this correct? */
-    lcpu->idle   = FALSE;	/* XXX is this correct? */
+    lcpu->state = LCPU_OFF;
     for (i = 0; i < MAX_CACHE_DEPTH; i += 1)
 	lcpu->caches[i] = NULL;
-
-    lcpu->master = (lcpu->pnum == (unsigned int) master_cpu);
-    lcpu->primary = (lcpu->pnum % cpuid_info()->cpuid_logical_per_package) == 0;
 }
 
 static x86_core_t *
@@ -202,16 +355,14 @@ x86_core_alloc(int cpu)
 {
     x86_core_t	*core;
     cpu_data_t	*cpup;
-    uint32_t	cpu_in_pkg;
-    uint32_t	lcpus_per_core;
 
     cpup = cpu_datap(cpu);
 
     simple_lock(&x86_topo_lock);
     if (free_cores != NULL) {
 	core = free_cores;
-	free_cores = core->next;
-	core->next = NULL;
+	free_cores = core->next_in_die;
+	core->next_in_die = NULL;
 	simple_unlock(&x86_topo_lock);
     } else {
 	simple_unlock(&x86_topo_lock);
@@ -222,14 +373,11 @@ x86_core_alloc(int cpu)
 
     bzero((void *) core, sizeof(x86_core_t));
 
-    cpu_in_pkg = cpu % cpuid_info()->cpuid_logical_per_package;
-    lcpus_per_core = cpuid_info()->cpuid_logical_per_package /
-		     cpuid_info()->cpuid_cores_per_package;
+    core->pcore_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
+    core->lcore_num = core->pcore_num % topoParms.nPCoresPerPackage;
 
-    core->pcore_num = cpup->cpu_phys_number / lcpus_per_core;
-    core->lcore_num = core->pcore_num % cpuid_info()->cpuid_cores_per_package;
-
-    core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY;
+    core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY
+	        | X86CORE_FL_HALTED | X86CORE_FL_IDLE;
 
     return(core);
 }
@@ -238,7 +386,7 @@ static void
 x86_core_free(x86_core_t *core)
 {
     simple_lock(&x86_topo_lock);
-    core->next = free_cores;
+    core->next_in_die = free_cores;
     free_cores = core;
     simple_unlock(&x86_topo_lock);
 }
@@ -252,7 +400,7 @@ x86_package_find(int cpu)
 
     cpup = cpu_datap(cpu);
 
-    pkg_num = cpup->cpu_phys_number / cpuid_info()->cpuid_logical_per_package;
+    pkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
 
     pkg = x86_pkgs;
     while (pkg != NULL) {
@@ -263,48 +411,205 @@ x86_package_find(int cpu)
 
     return(pkg);
 }
+ 
+static x86_die_t *
+x86_die_find(int cpu)
+{
+    x86_die_t	*die;
+    x86_pkg_t	*pkg;
+    cpu_data_t	*cpup;
+    uint32_t	die_num;
+
+    cpup = cpu_datap(cpu);
+
+    die_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+    pkg = x86_package_find(cpu);
+    if (pkg == NULL)
+	return(NULL);
+
+    die = pkg->dies;
+    while (die != NULL) {
+	if (die->pdie_num == die_num)
+	    break;
+	die = die->next_in_pkg;
+    }
+
+    return(die);
+}
 
 static x86_core_t *
 x86_core_find(int cpu)
 {
     x86_core_t	*core;
-    x86_pkg_t	*pkg;
+    x86_die_t	*die;
     cpu_data_t	*cpup;
     uint32_t	core_num;
 
     cpup = cpu_datap(cpu);
 
-    core_num = cpup->cpu_phys_number
-	       / (cpuid_info()->cpuid_logical_per_package
-		  / cpuid_info()->cpuid_cores_per_package);
+    core_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
 
-    pkg = x86_package_find(cpu);
-    if (pkg == NULL)
+    die = x86_die_find(cpu);
+    if (die == NULL)
 	return(NULL);
 
-    core = pkg->cores;
+    core = die->cores;
     while (core != NULL) {
 	if (core->pcore_num == core_num)
 	    break;
-	core = core->next;
+	core = core->next_in_die;
     }
 
     return(core);
 }
+ 
+void
+x86_set_logical_topology(x86_lcpu_t *lcpu, int pnum, int lnum)
+{
+    x86_core_t	*core = lcpu->core;
+    x86_die_t	*die  = lcpu->die;
+    x86_pkg_t	*pkg  = lcpu->package;
+    
+    assert(core != NULL);
+    assert(die != NULL);
+    assert(pkg != NULL);
+
+    lcpu->cpu_num = lnum;
+    lcpu->pnum = pnum;
+    lcpu->master = (lnum == master_cpu);
+    lcpu->primary = (lnum % topoParms.nLThreadsPerPackage) == 0;
+
+    lcpu->lnum = lnum % topoParms.nLThreadsPerCore;
+
+    core->pcore_num = lnum / topoParms.nLThreadsPerCore;
+    core->lcore_num = core->pcore_num % topoParms.nLCoresPerDie;
+
+    die->pdie_num = lnum / (topoParms.nLThreadsPerCore*topoParms.nLCoresPerDie);
+    die->ldie_num = die->pdie_num % topoParms.nLDiesPerPackage;
+
+    pkg->ppkg_num = lnum / topoParms.nLThreadsPerPackage;
+    pkg->lpkg_num = pkg->ppkg_num;
+
+}
+
+static x86_die_t *
+x86_die_alloc(int cpu)
+{
+    x86_die_t	*die;
+    cpu_data_t	*cpup;
+
+    cpup = cpu_datap(cpu);
+
+    simple_lock(&x86_topo_lock);
+    if (free_dies != NULL) {
+	die = free_dies;
+	free_dies = die->next_in_pkg;
+	die->next_in_pkg = NULL;
+	simple_unlock(&x86_topo_lock);
+    } else {
+	simple_unlock(&x86_topo_lock);
+	die = kalloc(sizeof(x86_die_t));
+	if (die == NULL)
+	    panic("x86_die_alloc() kalloc of x86_die_t failed!\n");
+    }
+
+    bzero((void *) die, sizeof(x86_die_t));
+
+    die->pdie_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+    die->ldie_num = num_dies;
+    atomic_incl((long *) &num_dies, 1);
+
+    die->flags = X86DIE_FL_PRESENT;
+    return(die);
+}
 
 static void
-x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
+x86_die_free(x86_die_t *die)
+{
+    simple_lock(&x86_topo_lock);
+    die->next_in_pkg = free_dies;
+    free_dies = die;
+    atomic_decl((long *) &num_dies, 1);
+    simple_unlock(&x86_topo_lock);
+}
+
+static x86_pkg_t *
+x86_package_alloc(int cpu)
+{
+    x86_pkg_t	*pkg;
+    cpu_data_t	*cpup;
+
+    cpup = cpu_datap(cpu);
+
+    simple_lock(&x86_topo_lock);
+    if (free_pkgs != NULL) {
+	pkg = free_pkgs;
+	free_pkgs = pkg->next;
+	pkg->next = NULL;
+	simple_unlock(&x86_topo_lock);
+    } else {
+	simple_unlock(&x86_topo_lock);
+	pkg = kalloc(sizeof(x86_pkg_t));
+	if (pkg == NULL)
+	    panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
+    }
+
+    bzero((void *) pkg, sizeof(x86_pkg_t));
+
+    pkg->ppkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
+
+    pkg->lpkg_num = topoParms.nPackages;
+    atomic_incl((long *) &topoParms.nPackages, 1);
+
+    pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
+    return(pkg);
+}
+
+static void
+x86_package_free(x86_pkg_t *pkg)
+{
+    simple_lock(&x86_topo_lock);
+    pkg->next = free_pkgs;
+    free_pkgs = pkg;
+    atomic_decl((long *) &topoParms.nPackages, 1);
+    simple_unlock(&x86_topo_lock);
+}
+
+static void
+x86_cache_add_lcpu(x86_cpu_cache_t *cache, x86_lcpu_t *lcpu)
+{
+    x86_cpu_cache_t	*cur_cache;
+    int			i;
+
+    /*
+     * Put the new CPU into the list of the cache.
+     */
+    cur_cache = lcpu->caches[cache->level - 1];
+    lcpu->caches[cache->level - 1] = cache;
+    cache->next = cur_cache;
+    cache->nlcpus += 1;
+    for (i = 0; i < cache->nlcpus; i += 1) {
+	if (cache->cpus[i] == NULL) {
+	    cache->cpus[i] = lcpu;
+	    break;
+	}
+    }
+}
+
+static void
+x86_lcpu_add_caches(x86_lcpu_t *lcpu)
 {
     x86_cpu_cache_t	*list;
     x86_cpu_cache_t	*cur;
-    x86_core_t		*cur_core;
+    x86_cpu_cache_t	*match;
+    x86_die_t		*die;
+    x86_core_t		*core;
     x86_lcpu_t		*cur_lcpu;
-    boolean_t		found;
-    int			level;
-    int			i;
-    uint32_t		cpu_mask;
+    uint32_t		level;
+    boolean_t		found		= FALSE;
 
-    assert(core != NULL);
     assert(lcpu != NULL);
 
     /*
@@ -327,8 +632,9 @@ x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
 	 * If the cache isn't shared then just put it where it
 	 * belongs.
 	 */
-	if (cur->nlcpus == 1) {
-	    goto found_first;
+	if (cur->maxcpus == 1) {
+	    x86_cache_add_lcpu(cur, lcpu);
+	    continue;
 	}
 
 	/*
@@ -344,101 +650,131 @@ x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
 	/*
 	 * This is a shared cache, so we have to figure out if
 	 * this is the first time we've seen this cache.  We do
-	 * this by searching through the package and seeing if
-	 * a related core is already describing this cache.
+	 * this by searching through the topology and seeing if
+	 * this cache is already described.
 	 *
-	 * NOTE: This assumes that CPUs whose ID mod <# sharing cache>
-	 * are indeed sharing the cache.
+	 * Assume that L{LLC-1} are all at the core level and that
+	 * LLC is shared at the die level.
 	 */
-	cpu_mask = lcpu->pnum & ~(cur->nlcpus - 1);
-	cur_core = core->package->cores;
-	found = FALSE;
-
-	while (cur_core != NULL && !found) {
-	    cur_lcpu = cur_core->lcpus;
-	    while (cur_lcpu != NULL && !found) {
-		if ((cur_lcpu->pnum & ~(cur->nlcpus - 1)) == cpu_mask) {
-		    lcpu->caches[level] = cur_lcpu->caches[level];
-		    found = TRUE;
-		    x86_cache_free(cur);
+	if (level < topoParms.LLCDepth) {
+	    /*
+	     * Shared at the core.
+	     */
+	    core = lcpu->core;
+	    cur_lcpu = core->lcpus;
+	    while (cur_lcpu != NULL) {
+		/*
+		 * Skip ourselves.
+		 */
+		if (cur_lcpu == lcpu) {
+		    cur_lcpu = cur_lcpu->next_in_core;
+		    continue;
+		}
 
-		    /*
-		     * Put the new CPU into the list of the cache.
-		     */
-		    cur = lcpu->caches[level];
-		    for (i = 0; i < cur->nlcpus; i += 1) {
-			if (cur->cpus[i] == NULL) {
-			    cur->cpus[i] = lcpu;
-			    break;
-			}
-		    }
+		/*
+		 * If there's a cache on this logical CPU,
+		 * then use that one.
+		 */
+		match = x86_match_cache(cur_lcpu->caches[level], cur);
+		if (match != NULL) {
+		    x86_cache_free(cur);
+		    x86_cache_add_lcpu(match, lcpu);
+		    found = TRUE;
+		    break;
 		}
-		cur_lcpu = cur_lcpu->next;
+
+		cur_lcpu = cur_lcpu->next_in_core;
 	    }
+	} else {
+	    /*
+	     * Shared at the die.
+	     */
+	    die = lcpu->die;
+	    cur_lcpu = die->lcpus;
+	    while (cur_lcpu != NULL) {
+		/*
+		 * Skip ourselves.
+		 */
+		if (cur_lcpu == lcpu) {
+		    cur_lcpu = cur_lcpu->next_in_die;
+		    continue;
+		}
+
+		/*
+		 * If there's a cache on this logical CPU,
+		 * then use that one.
+		 */
+		match = x86_match_cache(cur_lcpu->caches[level], cur);
+		if (match != NULL) {
+		    x86_cache_free(cur);
+		    x86_cache_add_lcpu(match, lcpu);
+		    found = TRUE;
+		    break;
+		}
 
-	    cur_core = cur_core->next;
+		cur_lcpu = cur_lcpu->next_in_die;
+	    }
 	}
 
+	/*
+	 * If a shared cache wasn't found, then this logical CPU must
+	 * be the first one encountered.
+	 */
 	if (!found) {
-found_first:
-	    cur->next = lcpu->caches[level];
-	    lcpu->caches[level] = cur;
-	    cur->cpus[0] = lcpu;
+	    x86_cache_add_lcpu(cur, lcpu);
 	}
     }
 
-    /*
-     * Add the Logical CPU to the core.
-     */
-    lcpu->next = core->lcpus;
-    lcpu->core = core;
-    core->lcpus = lcpu;
-    core->num_lcpus += 1;
-
     simple_unlock(&x86_topo_lock);
 }
 
-static x86_pkg_t *
-x86_package_alloc(int cpu)
+static void
+x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
 {
-    x86_pkg_t	*pkg;
-    cpu_data_t	*cpup;
-
-    cpup = cpu_datap(cpu);
+    assert(core != NULL);
+    assert(lcpu != NULL);
 
     simple_lock(&x86_topo_lock);
-    if (free_pkgs != NULL) {
-	pkg = free_pkgs;
-	free_pkgs = pkg->next;
-	pkg->next = NULL;
-	simple_unlock(&x86_topo_lock);
-    } else {
-	simple_unlock(&x86_topo_lock);
-	pkg = kalloc(sizeof(x86_pkg_t));
-	if (pkg == NULL)
-	    panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
-    }
 
-    bzero((void *) pkg, sizeof(x86_pkg_t));
+    lcpu->next_in_core = core->lcpus;
+    lcpu->core = core;
+    core->lcpus = lcpu;
+    core->num_lcpus += 1;
+    simple_unlock(&x86_topo_lock);
+}
 
-    pkg->ppkg_num = cpup->cpu_phys_number
-		    / cpuid_info()->cpuid_logical_per_package;
+static void
+x86_die_add_lcpu(x86_die_t *die, x86_lcpu_t *lcpu)
+{
+    assert(die != NULL);
+    assert(lcpu != NULL);
+ 
+    lcpu->next_in_die = die->lcpus;
+    lcpu->die = die;
+    die->lcpus = lcpu;
+}
 
-    pkg->lpkg_num = num_packages;
-    atomic_incl((long *) &num_packages, 1);
+static void
+x86_die_add_core(x86_die_t *die, x86_core_t *core)
+{
+    assert(die != NULL);
+    assert(core != NULL);
 
-    pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
-    return(pkg);
+    core->next_in_die = die->cores;
+    core->die = die;
+    die->cores = core;
+    die->num_cores += 1;
 }
 
-static void
-x86_package_free(x86_pkg_t *pkg)
+ static void
+x86_package_add_lcpu(x86_pkg_t *pkg, x86_lcpu_t *lcpu)
 {
-    simple_lock(&x86_topo_lock);
-    pkg->next = free_pkgs;
-    free_pkgs = pkg;
-    atomic_decl((long *) &num_packages, 1);
-    simple_unlock(&x86_topo_lock);
+    assert(pkg != NULL);
+    assert(lcpu != NULL);
+
+    lcpu->next_in_pkg = pkg->lcpus;
+    lcpu->package = pkg;
+    pkg->lcpus = lcpu;
 }
 
 static void
@@ -447,20 +783,43 @@ x86_package_add_core(x86_pkg_t *pkg, x86_core_t *core)
     assert(pkg != NULL);
     assert(core != NULL);
 
-    core->next = pkg->cores;
+    core->next_in_pkg = pkg->cores;
     core->package = pkg;
     pkg->cores = core;
-    pkg->num_cores += 1;
+}
+
+static void
+x86_package_add_die(x86_pkg_t *pkg, x86_die_t *die)
+{
+    assert(pkg != NULL);
+    assert(die != NULL);
+
+    die->next_in_pkg = pkg->dies;
+    die->package = pkg;
+    pkg->dies = die;
+    pkg->num_dies += 1;
 }
 
 void *
 cpu_thread_alloc(int cpu)
 {
-    x86_core_t	*core;
-    x86_pkg_t	*pkg;
+    x86_core_t	*core		= NULL;
+    x86_die_t	*die		= NULL;
+    x86_pkg_t	*pkg		= NULL;
     cpu_data_t	*cpup;
     uint32_t	phys_cpu;
 
+    /*
+     * Only allow one to manipulate the topology at a time.
+     */
+    simple_lock(&x86_topo_lock);
+
+    /*
+     * Make sure all of the topology parameters have been initialized.
+     */
+    if (!topoParmsInited)
+	initTopoParms();
+
     cpup = cpu_datap(cpu);
 
     phys_cpu = cpup->cpu_phys_number;
@@ -477,22 +836,9 @@ cpu_thread_alloc(int cpu)
     }
 
     /*
-     * Only allow one to manipulate the topology at a time.
-     */
-    simple_lock(&x86_topo_lock);
-
-    /*
-     * Get the core for this logical CPU.
+     * Get the package that the logical CPU is in.
      */
-  core_again:
-    core = x86_core_find(cpu);
-    if (core == NULL) {
-	/*
-	 * Core structure hasn't been created yet, do it now.
-	 *
-	 * Get the package that the core is part of.
-	 */
-      package_again:
+    do {
 	pkg = x86_package_find(cpu);
 	if (pkg == NULL) {
 	    /*
@@ -503,7 +849,7 @@ cpu_thread_alloc(int cpu)
 	    simple_lock(&x86_topo_lock);
 	    if (x86_package_find(cpu) != NULL) {
 		x86_package_free(pkg);
-		goto package_again;
+		continue;
 	    }
 	    
 	    /*
@@ -512,31 +858,58 @@ cpu_thread_alloc(int cpu)
 	    pkg->next = x86_pkgs;
 	    x86_pkgs = pkg;
 	}
+    } while (pkg == NULL);
 
-	/*
-	 * Allocate the core structure now.
-	 */
-	simple_unlock(&x86_topo_lock);
-	core = x86_core_alloc(cpu);
-	simple_lock(&x86_topo_lock);
-	if (x86_core_find(cpu) != NULL) {
-	    x86_core_free(core);
-	    goto core_again;
+    /*
+     * Get the die that the logical CPU is in.
+     */
+    do {
+	die = x86_die_find(cpu);
+	if (die == NULL) {
+	    /*
+	     * Die structure hasn't been created yet, do it now.
+	     */
+	    simple_unlock(&x86_topo_lock);
+	    die = x86_die_alloc(cpu);
+	    simple_lock(&x86_topo_lock);
+	    if (x86_die_find(cpu) != NULL) {
+		x86_die_free(die);
+		continue;
+	    }
+
+	    /*
+	     * Add the die to the package.
+	     */
+	    x86_package_add_die(pkg, die);
 	}
+    } while (die == NULL);
 
-	/*
-	 * Add it to the package.
-	 */
-	x86_package_add_core(pkg, core);
-	machine_info.physical_cpu_max += 1;
+    /*
+     * Get the core for this logical CPU.
+     */
+    do {
+	core = x86_core_find(cpu);
+	if (core == NULL) {
+	    /*
+	     * Allocate the core structure now.
+	     */
+	    simple_unlock(&x86_topo_lock);
+	    core = x86_core_alloc(cpu);
+	    simple_lock(&x86_topo_lock);
+	    if (x86_core_find(cpu) != NULL) {
+		x86_core_free(core);
+		continue;
+	    }
+
+	    /*
+	     * Add the core to the die & package.
+	     */
+	    x86_die_add_core(die, core);
+	    x86_package_add_core(pkg, core);
+	    machine_info.physical_cpu_max += 1;
+	}
+    } while (core == NULL);
 
-	/*
-	 * Allocate performance counter structure.
-	 */
-	simple_unlock(&x86_topo_lock);
-	core->pmc = pmc_alloc();
-	simple_lock(&x86_topo_lock);
-    }
     
     /*
      * Done manipulating the topology, so others can get in.
@@ -544,7 +917,13 @@ cpu_thread_alloc(int cpu)
     machine_info.logical_cpu_max += 1;
     simple_unlock(&x86_topo_lock);
 
+    /*
+     * Add the logical CPU to the other topology structures.
+     */
     x86_core_add_lcpu(core, &cpup->lcpu);
+    x86_die_add_lcpu(core->die, &cpup->lcpu);
+    x86_package_add_lcpu(core->package, &cpup->lcpu);
+    x86_lcpu_add_caches(&cpup->lcpu);
 
     return (void *) core;
 }
@@ -552,10 +931,10 @@ cpu_thread_alloc(int cpu)
 void
 cpu_thread_init(void)
 {
-    int		my_cpu	= get_cpu_number();
-    cpu_data_t	*cpup	= current_cpu_datap();
+    int		my_cpu		= get_cpu_number();
+    cpu_data_t	*cpup		= current_cpu_datap();
     x86_core_t	*core;
-    static int	initialized = 0;
+    static int	initialized	= 0;
 
     /*
      * If we're the boot processor, we do all of the initialization of
@@ -581,18 +960,17 @@ cpu_thread_init(void)
     if (core->active_lcpus == 0)
 	machine_info.physical_cpu += 1;
     core->active_lcpus += 1;
-    cpup->lcpu.halted = FALSE;
-    cpup->lcpu.idle   = FALSE;
     simple_unlock(&x86_topo_lock);
 
     pmCPUMarkRunning(cpup);
-    etimer_resync_deadlines();
+    timer_resync_deadlines();
 }
 
 /*
  * Called for a cpu to halt permanently
  * (as opposed to halting and expecting an interrupt to awaken it).
  */
+__attribute__((noreturn))
 void
 cpu_thread_halt(void)
 {
@@ -601,7 +979,6 @@ cpu_thread_halt(void)
 
     simple_lock(&x86_topo_lock);
     machine_info.logical_cpu -= 1;
-    cpup->lcpu.idle   = TRUE;
     core = cpup->lcpu.core;
     core->active_lcpus -= 1;
     if (core->active_lcpus == 0)
@@ -618,3 +995,237 @@ cpu_thread_halt(void)
     }
     /* NOT REACHED */
 }
+
+/*
+ * Validates that the topology was built correctly.  Must be called only
+ * after the complete topology is built and no other changes are being made.
+ */
+void
+x86_validate_topology(void)
+{
+    x86_pkg_t		*pkg;
+    x86_die_t		*die;
+    x86_core_t		*core;
+    x86_lcpu_t		*lcpu;
+    uint32_t		nDies;
+    uint32_t		nCores;
+    uint32_t		nCPUs;
+
+    if (topo_dbg)
+	debug_topology_print();
+
+    /*
+     * XXX
+     *
+     * Right now this only works if the number of CPUs started is the total
+     * number of CPUs.  However, when specifying cpus=n the topology is only
+     * partially constructed and the checks below will fail.
+     *
+     * We should *always* build the complete topology and only start the CPUs
+     * indicated by cpus=n.  Until that happens, this code will not check the
+     * topology if the number of cpus defined is < that described the the
+     * topology parameters.
+     */
+    nCPUs = topoParms.nPackages * topoParms.nLThreadsPerPackage;
+    if (nCPUs > real_ncpus)
+	return;
+
+    pkg = x86_pkgs;
+    while (pkg != NULL) {
+	/*
+	 * Make sure that the package has the correct number of dies.
+	 */
+	nDies = 0;
+	die = pkg->dies;
+	while (die != NULL) {
+	    if (die->package == NULL)
+		panic("Die(%d)->package is NULL",
+		      die->pdie_num);
+	    if (die->package != pkg)
+		panic("Die %d points to package %d, should be %d",
+		      die->pdie_num, die->package->lpkg_num, pkg->lpkg_num);
+
+	    TOPO_DBG("Die(%d)->package %d\n",
+		die->pdie_num, pkg->lpkg_num);
+
+	    /*
+	     * Make sure that the die has the correct number of cores.
+	     */
+	    TOPO_DBG("Die(%d)->cores: ", die->pdie_num);
+	    nCores = 0;
+	    core = die->cores;
+	    while (core != NULL) {
+		if (core->die == NULL)
+		    panic("Core(%d)->die is NULL",
+			  core->pcore_num);
+		if (core->die != die)
+		    panic("Core %d points to die %d, should be %d",
+			  core->pcore_num, core->die->pdie_num, die->pdie_num);
+		nCores += 1;
+		TOPO_DBG("%d ", core->pcore_num);
+		core = core->next_in_die;
+	    }
+	    TOPO_DBG("\n");
+
+	    if (nCores != topoParms.nLCoresPerDie)
+		panic("Should have %d Cores, but only found %d for Die %d",
+		      topoParms.nLCoresPerDie, nCores, die->pdie_num);
+
+	    /*
+	     * Make sure that the die has the correct number of CPUs.
+	     */
+	    TOPO_DBG("Die(%d)->lcpus: ", die->pdie_num);
+	    nCPUs = 0;
+	    lcpu = die->lcpus;
+	    while (lcpu != NULL) {
+		if (lcpu->die == NULL)
+		    panic("CPU(%d)->die is NULL",
+			  lcpu->cpu_num);
+		if (lcpu->die != die)
+		    panic("CPU %d points to die %d, should be %d",
+			  lcpu->cpu_num, lcpu->die->pdie_num, die->pdie_num);
+		nCPUs += 1;
+		TOPO_DBG("%d ", lcpu->cpu_num);
+		lcpu = lcpu->next_in_die;
+	    }
+	    TOPO_DBG("\n");
+
+	    if (nCPUs != topoParms.nLThreadsPerDie)
+		panic("Should have %d Threads, but only found %d for Die %d",
+		      topoParms.nLThreadsPerDie, nCPUs, die->pdie_num);
+
+	    nDies += 1;
+	    die = die->next_in_pkg;
+	}
+
+	if (nDies != topoParms.nLDiesPerPackage)
+	    panic("Should have %d Dies, but only found %d for package %d",
+		  topoParms.nLDiesPerPackage, nDies, pkg->lpkg_num);
+
+	/*
+	 * Make sure that the package has the correct number of cores.
+	 */
+	nCores = 0;
+	core = pkg->cores;
+	while (core != NULL) {
+	    if (core->package == NULL)
+		panic("Core(%d)->package is NULL",
+		      core->pcore_num);
+	    if (core->package != pkg)
+		panic("Core %d points to package %d, should be %d",
+		      core->pcore_num, core->package->lpkg_num, pkg->lpkg_num);
+	    TOPO_DBG("Core(%d)->package %d\n",
+		core->pcore_num, pkg->lpkg_num);
+
+	    /*
+	     * Make sure that the core has the correct number of CPUs.
+	     */
+	    nCPUs = 0;
+	    lcpu = core->lcpus;
+	    TOPO_DBG("Core(%d)->lcpus: ", core->pcore_num);
+	    while (lcpu != NULL) {
+		if (lcpu->core == NULL)
+		    panic("CPU(%d)->core is NULL",
+			  lcpu->cpu_num);
+		if (lcpu->core != core)
+		    panic("CPU %d points to core %d, should be %d",
+			  lcpu->cpu_num, lcpu->core->pcore_num, core->pcore_num);
+		TOPO_DBG("%d ", lcpu->cpu_num);
+		nCPUs += 1;
+		lcpu = lcpu->next_in_core;
+	    }
+	    TOPO_DBG("\n");
+
+	    if (nCPUs != topoParms.nLThreadsPerCore)
+		panic("Should have %d Threads, but only found %d for Core %d",
+		      topoParms.nLThreadsPerCore, nCPUs, core->pcore_num);
+	    nCores += 1;
+	    core = core->next_in_pkg;
+	}
+
+	if (nCores != topoParms.nLCoresPerPackage)
+	    panic("Should have %d Cores, but only found %d for package %d",
+		  topoParms.nLCoresPerPackage, nCores, pkg->lpkg_num);
+
+	/*
+	 * Make sure that the package has the correct number of CPUs.
+	 */
+	nCPUs = 0;
+	lcpu = pkg->lcpus;
+	while (lcpu != NULL) {
+	    if (lcpu->package == NULL)
+		panic("CPU(%d)->package is NULL",
+		      lcpu->cpu_num);
+	    if (lcpu->package != pkg)
+		panic("CPU %d points to package %d, should be %d",
+		      lcpu->cpu_num, lcpu->package->lpkg_num, pkg->lpkg_num);
+	    TOPO_DBG("CPU(%d)->package %d\n",
+		lcpu->cpu_num, pkg->lpkg_num);
+	    nCPUs += 1;
+	    lcpu = lcpu->next_in_pkg;
+	}
+
+	if (nCPUs != topoParms.nLThreadsPerPackage)
+	    panic("Should have %d Threads, but only found %d for package %d",
+		  topoParms.nLThreadsPerPackage, nCPUs, pkg->lpkg_num);
+
+	pkg = pkg->next;
+    }
+}
+
+/*
+ * Prints out the topology
+ */
+static void
+debug_topology_print(void)
+{
+    x86_pkg_t		*pkg;
+    x86_die_t		*die;
+    x86_core_t		*core;
+    x86_lcpu_t		*cpu;
+
+    pkg = x86_pkgs;
+    while (pkg != NULL) {
+	kprintf("Package:\n");
+	kprintf("    Physical: %d\n", pkg->ppkg_num);
+	kprintf("    Logical:  %d\n", pkg->lpkg_num);
+
+	die = pkg->dies;
+	while (die != NULL) {
+	    kprintf("    Die:\n");
+	    kprintf("        Physical: %d\n", die->pdie_num);
+	    kprintf("        Logical:  %d\n", die->ldie_num);
+
+	    core = die->cores;
+	    while (core != NULL) {
+		kprintf("        Core:\n");
+		kprintf("            Physical: %d\n", core->pcore_num);
+		kprintf("            Logical:  %d\n", core->lcore_num);
+
+		cpu = core->lcpus;
+		while (cpu != NULL) {
+		    kprintf("            LCPU:\n");
+		    kprintf("                CPU #:    %d\n", cpu->cpu_num);
+		    kprintf("                Physical: %d\n", cpu->pnum);
+		    kprintf("                Logical:  %d\n", cpu->lnum);
+		    kprintf("                Flags:    ");
+		    if (cpu->master)
+			kprintf("MASTER ");
+		    if (cpu->primary)
+			kprintf("PRIMARY");
+		    if (!cpu->master && !cpu->primary)
+			kprintf("(NONE)");
+		    kprintf("\n");
+
+		    cpu = cpu->next_in_core;
+		}
+
+		core = core->next_in_die;
+	    }
+
+	    die = die->next_in_pkg;
+	}
+
+	pkg = pkg->next;
+    }
+}