X-Git-Url: https://git.saurik.com/apple/xnu.git/blobdiff_plain/0c530ab8987f0ae6a1a3d9284f40182b88852816..2d21ac55c334faf3a56e5634905ed6987fc787d4:/osfmk/i386/cpu_threads.c

diff --git a/osfmk/i386/cpu_threads.c b/osfmk/i386/cpu_threads.c
index 0a4c3d5e2..ad8867f53 100644
--- a/osfmk/i386/cpu_threads.c
+++ b/osfmk/i386/cpu_threads.c
@@ -1,31 +1,42 @@
 /*
- * Copyright (c) 2003-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
  *
- * @APPLE_LICENSE_HEADER_START@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
- * The contents of this file constitute Original Code as defined in and
- * are subject to the Apple Public Source License Version 1.1 (the
- * "License").  You may not use this file except in compliance with the
- * License.  Please obtain a copy of the License at
- * http://www.apple.com/publicsource and read it before using this file.
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
  * 
- * This Original Code and all software distributed under the License are
- * distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE OR NON-INFRINGEMENT.  Please see the
- * License for the specific language governing rights and limitations
- * under the License.
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
  * 
- * @APPLE_LICENSE_HEADER_END@
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
  */
 #include <vm/vm_kern.h>
+#include <kern/kalloc.h>
 #include <mach/machine.h>
 #include <i386/cpu_threads.h>
 #include <i386/cpuid.h>
 #include <i386/machine_cpu.h>
 #include <i386/lock.h>
 #include <i386/perfmon.h>
+#include <i386/pmCPU.h>
+
+#define bitmask(h,l)	((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
+#define bitfield(x,h,l)	(((x) & bitmask(h,l)) >> l)
 
 /*
  * Kernel parameter determining whether threads are halted unconditionally
@@ -34,81 +45,548 @@
  */
 int idlehalt = 1;
 
+x86_pkg_t	*x86_pkgs	= NULL;
+uint32_t	num_packages	= 0;
+uint32_t	num_Lx_caches[MAX_CACHE_DEPTH]	= { 0 };
+
+static x86_pkg_t	*free_pkgs	= NULL;
+static x86_core_t	*free_cores	= NULL;
+
+static x86_cpu_cache_t	*x86_caches	= NULL;
+static uint32_t		num_caches	= 0;
+
+decl_simple_lock_data(, x86_topo_lock);
+
+static x86_cpu_cache_t *
+x86_cache_alloc(void)
+{
+    x86_cpu_cache_t	*cache;
+    int			i;
+
+    if (x86_caches == NULL) {
+	cache = kalloc(sizeof(x86_cpu_cache_t) + (MAX_CPUS * sizeof(x86_lcpu_t *)));
+	if (cache == NULL)
+	    return(NULL);
+    } else {
+	cache = x86_caches;
+	x86_caches = cache->next;
+	cache->next = NULL;
+    }
+
+    bzero(cache, sizeof(x86_cpu_cache_t));
+    cache->next = NULL;
+    cache->maxcpus = MAX_CPUS;
+    for (i = 0; i < cache->maxcpus; i += 1) {
+	cache->cpus[i] = NULL;
+    }
+
+    num_caches += 1;
+
+    return(cache);
+}
+
+static void
+x86_cache_free(x86_cpu_cache_t *cache)
+{
+    num_caches -= 1;
+    if (cache->level > 0 && cache->level <= MAX_CACHE_DEPTH)
+	num_Lx_caches[cache->level - 1] -= 1;
+    cache->next = x86_caches;
+    x86_caches = cache;
+}
+
+/*
+ * This returns a list of cache structures that represent the
+ * caches for a CPU.  Some of the structures may have to be
+ * "freed" if they are actually shared between CPUs.
+ */
+static x86_cpu_cache_t *
+x86_cache_list(void)
+{
+    x86_cpu_cache_t	*root	= NULL;
+    x86_cpu_cache_t	*cur	= NULL;
+    x86_cpu_cache_t	*last	= NULL;
+    uint32_t		index;
+    uint32_t		cache_info[4];
+    uint32_t		nsets;
+
+    do_cpuid(0, cache_info);
+
+    if (cache_info[eax] < 4) {
+	/*
+	 * Processor does not support deterministic
+	 * cache information. Don't report anything
+	 */
+	return NULL;
+    }
+
+    for (index = 0; ; index += 1) {
+	cache_info[eax] = 4;
+	cache_info[ecx] = index;
+	cache_info[ebx] = 0;
+	cache_info[edx] = 0;
+
+	cpuid(cache_info);
+
+	/*
+	 * See if all levels have been queried.
+	 */
+	if (bitfield(cache_info[eax], 4, 0) == 0)
+	    break;
+
+	cur = x86_cache_alloc();
+	if (cur == NULL) {
+	    break;
+	}
+
+	cur->type = bitfield(cache_info[eax], 4, 0);
+	cur->level = bitfield(cache_info[eax], 7, 5);
+	cur->nlcpus = bitfield(cache_info[eax], 25, 14) + 1;
+	cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1;
+	cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1;
+	cur->ways = bitfield(cache_info[ebx], 31, 22) + 1;
+	nsets = bitfield(cache_info[ecx], 31, 0) + 1;
+	cur->cache_size = cur->line_size * cur->ways * cur->partitions * nsets;
+
+	if (last == NULL) {
+	    root = cur;
+	    last = cur;
+	} else {
+	    last->next = cur;
+	    last = cur;
+	}
+
+	num_Lx_caches[cur->level - 1] += 1;
+    }
+
+    return(root);
+}
 
 static boolean_t
 cpu_is_hyperthreaded(void)
 {
-	if  (cpuid_features() & CPUID_FEATURE_HTT)
-		return (cpuid_info()->cpuid_logical_per_package /
-			cpuid_info()->cpuid_cores_per_package) > 1;
-	else
-		return FALSE;
+    if  (cpuid_features() & CPUID_FEATURE_HTT)
+	return (cpuid_info()->cpuid_logical_per_package /
+		cpuid_info()->cpuid_cores_per_package) > 1;
+    else
+	return FALSE;
+}
+
+static void
+x86_lcpu_init(int cpu)
+{
+    cpu_data_t		*cpup;
+    x86_lcpu_t		*lcpu;
+    int			i;
+
+    cpup = cpu_datap(cpu);
+
+    lcpu = &cpup->lcpu;
+    lcpu->lcpu = lcpu;
+    lcpu->cpu  = cpup;
+    lcpu->next = NULL;
+    lcpu->core = NULL;
+    lcpu->lnum = cpu;
+    lcpu->pnum = cpup->cpu_phys_number;
+    lcpu->halted = FALSE;	/* XXX is this correct? */
+    lcpu->idle   = FALSE;	/* XXX is this correct? */
+    for (i = 0; i < MAX_CACHE_DEPTH; i += 1)
+	lcpu->caches[i] = NULL;
+
+    lcpu->master = (lcpu->pnum == (unsigned int) master_cpu);
+    lcpu->primary = (lcpu->pnum % cpuid_info()->cpuid_logical_per_package) == 0;
+}
+
+static x86_core_t *
+x86_core_alloc(int cpu)
+{
+    x86_core_t	*core;
+    cpu_data_t	*cpup;
+    uint32_t	cpu_in_pkg;
+    uint32_t	lcpus_per_core;
+
+    cpup = cpu_datap(cpu);
+
+    simple_lock(&x86_topo_lock);
+    if (free_cores != NULL) {
+	core = free_cores;
+	free_cores = core->next;
+	core->next = NULL;
+	simple_unlock(&x86_topo_lock);
+    } else {
+	simple_unlock(&x86_topo_lock);
+	core = kalloc(sizeof(x86_core_t));
+	if (core == NULL)
+	    panic("x86_core_alloc() kalloc of x86_core_t failed!\n");
+    }
+
+    bzero((void *) core, sizeof(x86_core_t));
+
+    cpu_in_pkg = cpu % cpuid_info()->cpuid_logical_per_package;
+    lcpus_per_core = cpuid_info()->cpuid_logical_per_package /
+		     cpuid_info()->cpuid_cores_per_package;
+
+    core->pcore_num = cpup->cpu_phys_number / lcpus_per_core;
+    core->lcore_num = core->pcore_num % cpuid_info()->cpuid_cores_per_package;
+
+    core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY;
+
+    return(core);
+}
+
+static void
+x86_core_free(x86_core_t *core)
+{
+    simple_lock(&x86_topo_lock);
+    core->next = free_cores;
+    free_cores = core;
+    simple_unlock(&x86_topo_lock);
+}
+
+static x86_pkg_t *
+x86_package_find(int cpu)
+{
+    x86_pkg_t	*pkg;
+    cpu_data_t	*cpup;
+    uint32_t	pkg_num;
+
+    cpup = cpu_datap(cpu);
+
+    pkg_num = cpup->cpu_phys_number / cpuid_info()->cpuid_logical_per_package;
+
+    pkg = x86_pkgs;
+    while (pkg != NULL) {
+	if (pkg->ppkg_num == pkg_num)
+	    break;
+	pkg = pkg->next;
+    }
+
+    return(pkg);
+}
+
+static x86_core_t *
+x86_core_find(int cpu)
+{
+    x86_core_t	*core;
+    x86_pkg_t	*pkg;
+    cpu_data_t	*cpup;
+    uint32_t	core_num;
+
+    cpup = cpu_datap(cpu);
+
+    core_num = cpup->cpu_phys_number
+	       / (cpuid_info()->cpuid_logical_per_package
+		  / cpuid_info()->cpuid_cores_per_package);
+
+    pkg = x86_package_find(cpu);
+    if (pkg == NULL)
+	return(NULL);
+
+    core = pkg->cores;
+    while (core != NULL) {
+	if (core->pcore_num == core_num)
+	    break;
+	core = core->next;
+    }
+
+    return(core);
+}
+
+static void
+x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
+{
+    x86_cpu_cache_t	*list;
+    x86_cpu_cache_t	*cur;
+    x86_core_t		*cur_core;
+    x86_lcpu_t		*cur_lcpu;
+    boolean_t		found;
+    int			level;
+    int			i;
+    uint32_t		cpu_mask;
+
+    assert(core != NULL);
+    assert(lcpu != NULL);
+
+    /*
+     * Add the cache data to the topology.
+     */
+    list = x86_cache_list();
+
+    simple_lock(&x86_topo_lock);
+
+    while (list != NULL) {
+	/*
+	 * Remove the cache from the front of the list.
+	 */
+	cur = list;
+	list = cur->next;
+	cur->next = NULL;
+	level = cur->level - 1;
+
+	/*
+	 * If the cache isn't shared then just put it where it
+	 * belongs.
+	 */
+	if (cur->nlcpus == 1) {
+	    goto found_first;
+	}
+
+	/*
+	 * We'll assume that all of the caches at a particular level
+	 * have the same sharing.  So if we have a cache already at
+	 * this level, we'll just skip looking for the match.
+	 */
+	if (lcpu->caches[level] != NULL) {
+	    x86_cache_free(cur);
+	    continue;
+	}
+
+	/*
+	 * This is a shared cache, so we have to figure out if
+	 * this is the first time we've seen this cache.  We do
+	 * this by searching through the package and seeing if
+	 * a related core is already describing this cache.
+	 *
+	 * NOTE: This assumes that CPUs whose ID mod <# sharing cache>
+	 * are indeed sharing the cache.
+	 */
+	cpu_mask = lcpu->pnum & ~(cur->nlcpus - 1);
+	cur_core = core->package->cores;
+	found = FALSE;
+
+	while (cur_core != NULL && !found) {
+	    cur_lcpu = cur_core->lcpus;
+	    while (cur_lcpu != NULL && !found) {
+		if ((cur_lcpu->pnum & ~(cur->nlcpus - 1)) == cpu_mask) {
+		    lcpu->caches[level] = cur_lcpu->caches[level];
+		    found = TRUE;
+		    x86_cache_free(cur);
+
+		    /*
+		     * Put the new CPU into the list of the cache.
+		     */
+		    cur = lcpu->caches[level];
+		    for (i = 0; i < cur->nlcpus; i += 1) {
+			if (cur->cpus[i] == NULL) {
+			    cur->cpus[i] = lcpu;
+			    break;
+			}
+		    }
+		}
+		cur_lcpu = cur_lcpu->next;
+	    }
+
+	    cur_core = cur_core->next;
+	}
+
+	if (!found) {
+found_first:
+	    cur->next = lcpu->caches[level];
+	    lcpu->caches[level] = cur;
+	    cur->cpus[0] = lcpu;
+	}
+    }
+
+    /*
+     * Add the Logical CPU to the core.
+     */
+    lcpu->next = core->lcpus;
+    lcpu->core = core;
+    core->lcpus = lcpu;
+    core->num_lcpus += 1;
+
+    simple_unlock(&x86_topo_lock);
+}
+
+static x86_pkg_t *
+x86_package_alloc(int cpu)
+{
+    x86_pkg_t	*pkg;
+    cpu_data_t	*cpup;
+
+    cpup = cpu_datap(cpu);
+
+    simple_lock(&x86_topo_lock);
+    if (free_pkgs != NULL) {
+	pkg = free_pkgs;
+	free_pkgs = pkg->next;
+	pkg->next = NULL;
+	simple_unlock(&x86_topo_lock);
+    } else {
+	simple_unlock(&x86_topo_lock);
+	pkg = kalloc(sizeof(x86_pkg_t));
+	if (pkg == NULL)
+	    panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
+    }
+
+    bzero((void *) pkg, sizeof(x86_pkg_t));
+
+    pkg->ppkg_num = cpup->cpu_phys_number
+		    / cpuid_info()->cpuid_logical_per_package;
+
+    pkg->lpkg_num = num_packages;
+    atomic_incl((long *) &num_packages, 1);
+
+    pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
+    return(pkg);
+}
+
+static void
+x86_package_free(x86_pkg_t *pkg)
+{
+    simple_lock(&x86_topo_lock);
+    pkg->next = free_pkgs;
+    free_pkgs = pkg;
+    atomic_decl((long *) &num_packages, 1);
+    simple_unlock(&x86_topo_lock);
+}
+
+static void
+x86_package_add_core(x86_pkg_t *pkg, x86_core_t *core)
+{
+    assert(pkg != NULL);
+    assert(core != NULL);
+
+    core->next = pkg->cores;
+    core->package = pkg;
+    pkg->cores = core;
+    pkg->num_cores += 1;
 }
 
 void *
 cpu_thread_alloc(int cpu)
 {
-	int		core_base_cpu;
-	int		ret;
-	cpu_core_t	*core;
+    x86_core_t	*core;
+    x86_pkg_t	*pkg;
+    cpu_data_t	*cpup;
+    uint32_t	phys_cpu;
 
+    cpup = cpu_datap(cpu);
+
+    phys_cpu = cpup->cpu_phys_number;
+
+    x86_lcpu_init(cpu);
+
+    /*
+     * Assume that all cpus have the same features.
+     */
+    if (cpu_is_hyperthreaded()) {
+	cpup->cpu_threadtype = CPU_THREADTYPE_INTEL_HTT;
+    } else {
+	cpup->cpu_threadtype = CPU_THREADTYPE_NONE;
+    }
+
+    /*
+     * Only allow one to manipulate the topology at a time.
+     */
+    simple_lock(&x86_topo_lock);
+
+    /*
+     * Get the core for this logical CPU.
+     */
+  core_again:
+    core = x86_core_find(cpu);
+    if (core == NULL) {
 	/*
- 	 * Assume that all cpus have the same features.
+	 * Core structure hasn't been created yet, do it now.
+	 *
+	 * Get the package that the core is part of.
 	 */
-	if (cpu_is_hyperthreaded()) {
-		/*
-		 * Get the cpu number of the base thread in the core.
-		 */
-		core_base_cpu = cpu_to_core_cpu(cpu);
-		cpu_datap(cpu)->cpu_threadtype = CPU_THREADTYPE_INTEL_HTT;
-	} else {
-		core_base_cpu = cpu;
-		cpu_datap(cpu)->cpu_threadtype = CPU_THREADTYPE_NONE;
+      package_again:
+	pkg = x86_package_find(cpu);
+	if (pkg == NULL) {
+	    /*
+	     * Package structure hasn't been created yet, do it now.
+	     */
+	    simple_unlock(&x86_topo_lock);
+	    pkg = x86_package_alloc(cpu);
+	    simple_lock(&x86_topo_lock);
+	    if (x86_package_find(cpu) != NULL) {
+		x86_package_free(pkg);
+		goto package_again;
+	    }
+	    
+	    /*
+	     * Add the new package to the global list of packages.
+	     */
+	    pkg->next = x86_pkgs;
+	    x86_pkgs = pkg;
 	}
 
-	core = (cpu_core_t *) cpu_to_core(core_base_cpu);
-	if (core == NULL) {
-		ret = kmem_alloc(kernel_map,
-				 (void *) &core, sizeof(cpu_core_t));
-		if (ret != KERN_SUCCESS)
-			panic("cpu_thread_alloc() kmem_alloc ret=%d\n", ret);
-		bzero((void *) core, sizeof(cpu_core_t));
+	/*
+	 * Allocate the core structure now.
+	 */
+	simple_unlock(&x86_topo_lock);
+	core = x86_core_alloc(cpu);
+	simple_lock(&x86_topo_lock);
+	if (x86_core_find(cpu) != NULL) {
+	    x86_core_free(core);
+	    goto core_again;
+	}
 
-		core->base_cpu = core_base_cpu;
+	/*
+	 * Add it to the package.
+	 */
+	x86_package_add_core(pkg, core);
+	machine_info.physical_cpu_max += 1;
 
-		atomic_incl((long *) &machine_info.physical_cpu_max, 1);
+	/*
+	 * Allocate performance counter structure.
+	 */
+	simple_unlock(&x86_topo_lock);
+	core->pmc = pmc_alloc();
+	simple_lock(&x86_topo_lock);
+    }
+    
+    /*
+     * Done manipulating the topology, so others can get in.
+     */
+    machine_info.logical_cpu_max += 1;
+    simple_unlock(&x86_topo_lock);
 
-		/* Allocate performance counter data area (if available) */
-		core->pmc = pmc_alloc();
-	}
-	atomic_incl((long *) &machine_info.logical_cpu_max, 1);
+    x86_core_add_lcpu(core, &cpup->lcpu);
 
-	return (void *) core;
+    return (void *) core;
 }
 
 void
 cpu_thread_init(void)
 {
-	int		my_cpu = get_cpu_number();
-	cpu_core_t	*my_core;
+    int		my_cpu	= get_cpu_number();
+    cpu_data_t	*cpup	= current_cpu_datap();
+    x86_core_t	*core;
+    static int	initialized = 0;
+
+    /*
+     * If we're the boot processor, we do all of the initialization of
+     * the CPU topology infrastructure.
+     */
+    if (my_cpu == master_cpu && !initialized) {
+	simple_lock_init(&x86_topo_lock, 0);
 
 	/*
-	 * If we're the boot processor we allocate the core structure here.
-	 * Otherwise the core has already been allocated (by the boot cpu).
+	 * Put this logical CPU into the physical CPU topology.
 	 */
-	if (my_cpu == master_cpu)
-		cpu_to_core(master_cpu) = cpu_thread_alloc(master_cpu);
+	cpup->lcpu.core = cpu_thread_alloc(my_cpu);
+
+	initialized = 1;
+    }
 
-	my_core = cpu_core();
-	if (my_core == NULL)
-		panic("cpu_thread_init() no core allocated for cpu %d", my_cpu);
+    /*
+     * Do the CPU accounting.
+     */
+    core = cpup->lcpu.core;
+    simple_lock(&x86_topo_lock);
+    machine_info.logical_cpu += 1;
+    if (core->active_lcpus == 0)
+	machine_info.physical_cpu += 1;
+    core->active_lcpus += 1;
+    cpup->lcpu.halted = FALSE;
+    cpup->lcpu.idle   = FALSE;
+    simple_unlock(&x86_topo_lock);
 
-	atomic_incl((long *) &my_core->active_threads, 1);
-	atomic_incl((long *) &machine_info.logical_cpu, 1);
-	/* Note: cpus are started serially so this isn't as racey as it looks */
-	if (my_core->num_threads == 0)
-		atomic_incl((long *) &machine_info.physical_cpu, 1);
-	atomic_incl((long *) &my_core->num_threads, 1);
+    pmCPUMarkRunning(cpup);
+    etimer_resync_deadlines();
 }
 
 /*
@@ -118,12 +596,25 @@ cpu_thread_init(void)
 void
 cpu_thread_halt(void)
 {
-	cpu_core_t	*my_core = cpu_core();
+    x86_core_t	*core;
+    cpu_data_t	*cpup = current_cpu_datap();
 
-	atomic_decl((long *) &machine_info.logical_cpu, 1);
-	atomic_decl((long *) &my_core->active_threads, 1);
-	if (atomic_decl_and_test((long *) &my_core->num_threads, 1))
-		atomic_decl((long *) &machine_info.physical_cpu, 1);
+    simple_lock(&x86_topo_lock);
+    machine_info.logical_cpu -= 1;
+    cpup->lcpu.idle   = TRUE;
+    core = cpup->lcpu.core;
+    core->active_lcpus -= 1;
+    if (core->active_lcpus == 0)
+	machine_info.physical_cpu -= 1;
+    simple_unlock(&x86_topo_lock);
 
-	cpu_halt();
+    /*
+     * Let the power management code determine the best way to "stop"
+     * the processor.
+     */
+    ml_set_interrupts_enabled(FALSE);
+    while (1) {
+	pmCPUHalt(PM_HALT_NORMAL);
+    }
+    /* NOT REACHED */
 }