/*
- * Copyright (c) 1999-2003 Apple Computer, Inc. All Rights Reserved.
+ * Copyright (c) 1999-2008 Apple Inc. All Rights Reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
the static linker does not need to
examine dependent dylibs to see
if any are re-exported */
+#define MH_PIE 0x200000 /* When this bit is set, the OS will
+ load the main executable at a
+ random address. Only used in
+ MH_EXECUTE filetypes. */
/*
* The load commands directly follow the mach_header. The total size of all
#define LC_CODE_SIGNATURE 0x1d /* local of code signature */
#define LC_SEGMENT_SPLIT_INFO 0x1e /* local of info to split segments */
#define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */
+#define LC_LAZY_LOAD_DYLIB 0x20 /* delay load of dylib until first use */
+#define LC_ENCRYPTION_INFO 0x21 /* encrypted segment information */
/*
* A variable length string in a load command is represented by an lc_str
#define S_INTERPOSING 0xd /* section with only pairs of
function pointers for
interposing */
-#define S_16BYTE_LITERALS 0xe /* section with only 16 byte literals */
+#define S_16BYTE_LITERALS 0xe /* section with only 16 byte
+ literals */
+#define S_DTRACE_DOF 0xf /* section contains
+ DTrace Object Format */
+#define S_LAZY_DYLIB_SYMBOL_POINTERS 0x10 /* section with only lazy
+ symbol pointers to lazy
+ loaded dylibs */
/*
* Constants for the section attributes part of the flags field of a section
* structure.
uint32_t datasize; /* file size of data in __LINKEDIT segment */
};
+/*
+ * The encryption_info_command contains the file offset and size of an
+ * of an encrypted segment.
+ */
+struct encryption_info_command {
+ uint32_t cmd; /* LC_ENCRYPTION_INFO */
+ uint32_t cmdsize; /* sizeof(struct encryption_info_command) */
+ uint32_t cryptoff; /* file offset of encrypted range */
+ uint32_t cryptsize; /* file size of encrypted range */
+ uint32_t cryptid; /* which enryption system,
+ 0 means not-encrypted yet */
+};
+
/*
* The symseg_command contains the offset and size of the GNU style
* symbol table information as described in the header file <symseg.h>.
By default, architecture defaults to the build machine
architecture, and the kernel configuration is set to build for DEVELOPMENT.
- The machine configuration defaults to S5L8900XRB for arm and default for i386 and ppc.
+ The machine configuration defaults to S5L8900X for arm and default for i386 and ppc.
This will also create a bootable image, mach_kernel, and a kernel binary
with symbols, mach_kernel.sys.
Examples:
/* make a debug kernel for H1 arm board */
- make TARGET_CONFIGS="debug arm s5l8900xrb"
+ make TARGET_CONFIGS="debug arm s5l8900x"
- $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
- $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
+ $(OBJROOT)/DEBUG_ARM_S5L8900X/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+ $(OBJROOT)/DEBUG_ARM_S5L8900X/mach_kernel: bootable image
/* make debug and development kernels for H1 arm board */
- make TARGET_CONFIGS="debug arm s5l8900xrb development arm s5l8900xrb"
+ make TARGET_CONFIGS="debug arm s5l8900x development arm s5l8900x"
- $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
- $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
- $(OBJROOT)/DEVELOPMENT_ARM/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
- $(OBJROOT)/DEVELOPMENT_ARM/mach_kernel: bootable image
+ $(OBJROOT)/DEBUG_ARM_S5L8900X/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+ $(OBJROOT)/DEBUG_ARM_S5L8900X/mach_kernel: bootable image
+ $(OBJROOT)/DEVELOPMENT_ARM_S5L8900X/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
+ $(OBJROOT)/DEVELOPMENT_ARM_S5L8900X/mach_kernel: bootable image
/* this is all you need to do to build H1 arm with DEVELOPMENT kernel configuration */
make TARGET_CONFIGS="default arm default"
options CONFIG_FORCE_OUT_IFP # Force IP output to use an interface # <config_force_out_ifp>
options CONFIG_MBUF_NOEXPAND # limit mbuf expansion # <config_mbuf_noexpand>
options CONFIG_MBUF_JUMBO # jumbo cluster pool # <config_mbuf_jumbo>
+options CONFIG_IP_EDGEHOLE # Drop tagged packets at EDGE interface # <config_ip_edgehole>
+
+options CONFIG_WORKQUEUE # <config_workqueue>
#
# 4.4 filesystems
options CONFIG_VNODES=263168 # <medium>
options CONFIG_VNODES=10240 # <small>
options CONFIG_VNODES=1024 # <xsmall>
-options CONFIG_VNODES=512 # <bsmall>
+options CONFIG_VNODES=640 # <bsmall>
options CONFIG_VNODE_FREE_MIN=500 # <large,xlarge>
options CONFIG_VNODE_FREE_MIN=300 # <medium>
options CONFIG_NMBCLUSTERS="((1024 * 512) / MCLBYTES)" # <medium>
options CONFIG_NMBCLUSTERS="((1024 * 256) / MCLBYTES)" # <bsmall,xsmall,small>
+# set maximum space used for packet buffers
+#
+options CONFIG_USESOCKTHRESHOLD=1 # <large,xlarge,medium>
+options CONFIG_USESOCKTHRESHOLD=0 # <bsmall,xsmall,small>
+
#
# Configure size of TCP hash table
#
#
options CONFIG_EMBEDDED # <config_embedded>
+#
+# code decryption... used on embedded for app protection
+# must be set in all the bsd/conf and osfmk/conf MASTER files
+#
+options CONFIG_CODE_DECRYPTION # <config_embedded>
+
+
#
# Ethernet (ARP)
#
#
# Standard Apple Research Configurations:
# -------- ----- -------- ---------------
-# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
+# BASE = [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo ffs union cd9660 config_volfs ]
# NETWORKING = [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert netat ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ]
# NFS = [ nfsclient nfsserver ]
# PROFILE = [ RELEASE profile ]
# DEBUG = [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ]
#
-# EMBEDDED_BASE = [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
+# EMBEDDED_BASE = [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
# EMBEDDED_FILESYS = [ devfs hfs journaling fdesc fifo ]
# EMBEDDED_NET = [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
# EMBEDDED = [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
# app-profiling i.e. pre-heating - off?
options CONFIG_APP_PROFILE=0
+#
+# code decryption... used on i386 for DSMOS
+# must be set in all the bsd/conf and osfmk/conf MASTER files
+#
+options CONFIG_CODE_DECRYPTION
+
#
# Ipl measurement system
#
# Standard Apple Research Configurations:
# -------- ----- -------- ---------------
#
-# BASE = [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
+# BASE = [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
# FILESYS = [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo ffs union cd9660 config_volfs ]
# NETWORKING = [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert netat ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ]
# NFS = [ nfsclient nfsserver ]
OPTIONS/hw_ast optional hw_ast
OPTIONS/hw_footprint optional hw_footprint
OPTIONS/kernserv optional kernserv
+OPTIONS/config_ip_edgehole optional config_ip_edgehole
OPTIONS/config_macf optional config_macf
OPTIONS/config_macf_socket_subset optional config_macf_socket_subset
OPTIONS/config_macf_socket optional config_macf_socket
bsd/netinet/ip_input.c optional inet
bsd/netinet/ip_mroute.c optional mrouting
bsd/netinet/ip_output.c optional inet
+bsd/netinet/ip_edgehole.c optional config_ip_edgehole
bsd/netinet/raw_ip.c optional inet
bsd/netinet/tcp_debug.c optional tcpdebug
bsd/netinet/tcp_input.c optional inet
* XXX Warn if state is LAZY_OFF? It won't break anything, but
* makes no sense...
*/
- if (!PE_parse_boot_arg("dtrace_dof_mode", &dtrace_dof_mode)) {
+ if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) {
dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
}
fbt_init( void )
{
- PE_parse_boot_arg("DisableFBT", &gDisableFBT);
+ PE_parse_boot_argn("DisableFBT", &gDisableFBT, sizeof (gDisableFBT));
if (0 == gDisableFBT)
{
"pmap_cpu_high_map_vaddr",
"pmap_cpu_high_shared_remap",
"pmap_cpu_init",
- "rdHPET",
"register_cpu_setup_func",
"unregister_cpu_setup_func"
};
unsigned int i, j;
int gIgnoreFBTBlacklist = 0;
- PE_parse_boot_arg("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist);
+ PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
mh = (struct mach_header *)(ctl->address);
modname = ctl->mod_modname;
*/
if (strstr(name, "machine_stack_") == name ||
strstr(name, "mapping_") == name ||
- strstr(name, "hpet_") == name ||
-
- 0 == strcmp(name, "rdHPET") ||
- 0 == strcmp(name, "HPETInterrupt") ||
0 == strcmp(name, "tmrCvt") ||
strstr(name, "tsc_") == name ||
strstr(name, "pmCPU") == name ||
0 == strcmp(name, "Cstate_table_set") ||
- 0 == strcmp(name, "pmHPETInterrupt") ||
0 == strcmp(name, "pmKextRegister") ||
0 == strcmp(name, "pmSafeMode") ||
0 == strcmp(name, "pmUnregister") ||
#include <sys/kernel.h>
#include <sys/sysctl.h>
#include <i386/cpuid.h>
+#include <i386/tsc.h>
static int
hw_cpu_sysctl SYSCTL_HANDLER_ARGS
sizeof(uint32_t),
hw_cpu_sysctl, "I", "CPU cores per package");
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, microcode_version,
+ CTLTYPE_INT | CTLFLAG_RD,
+ (void *)offsetof(i386_cpu_info_t, cpuid_microcode_version),
+ sizeof(uint32_t),
+ hw_cpu_sysctl, "I", "Microcode version number");
+
SYSCTL_NODE(_machdep_cpu, OID_AUTO, mwait, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
"mwait");
hw_cpu_sysctl, "I", "Cache size (in Kbytes)");
+SYSCTL_NODE(_machdep_cpu, OID_AUTO, tlb, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+ "tlb");
+
+SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, inst_small,
+ CTLTYPE_INT | CTLFLAG_RD,
+ (void *)offsetof(i386_cpu_info_t, cpuid_itlb_small),
+ sizeof(uint32_t),
+ hw_cpu_sysctl, "I", "Number of small page instruction TLBs");
+
+SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, data_small,
+ CTLTYPE_INT | CTLFLAG_RD,
+ (void *)offsetof(i386_cpu_info_t, cpuid_dtlb_small),
+ sizeof(uint32_t),
+ hw_cpu_sysctl, "I", "Number of small page data TLBs");
+
+SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, inst_large,
+ CTLTYPE_INT | CTLFLAG_RD,
+ (void *)offsetof(i386_cpu_info_t, cpuid_itlb_large),
+ sizeof(uint32_t),
+ hw_cpu_sysctl, "I", "Number of large page instruction TLBs");
+
+SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, data_large,
+ CTLTYPE_INT | CTLFLAG_RD,
+ (void *)offsetof(i386_cpu_info_t, cpuid_dtlb_large),
+ sizeof(uint32_t),
+ hw_cpu_sysctl, "I", "Number of large page data TLBs");
+
+
SYSCTL_NODE(_machdep_cpu, OID_AUTO, address_bits, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
"address_bits");
sizeof(uint32_t),
hw_cpu_sysctl, "I", "Number of virtual address bits");
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, core_count,
+ CTLTYPE_INT | CTLFLAG_RD,
+ (void *)offsetof(i386_cpu_info_t, core_count),
+ sizeof(uint32_t),
+ hw_cpu_sysctl, "I", "Number of enabled cores per package");
+
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, thread_count,
+ CTLTYPE_INT | CTLFLAG_RD,
+ (void *)offsetof(i386_cpu_info_t, thread_count),
+ sizeof(uint32_t),
+ hw_cpu_sysctl, "I", "Number of enabled threads per package");
+
+
uint64_t pmap_pv_hashlist_walks;
uint64_t pmap_pv_hashlist_cnts;
uint32_t pmap_pv_hashlist_max;
extern void unix_syscall(x86_saved_state_t *);
extern void unix_syscall64(x86_saved_state_t *);
extern void *find_user_regs(thread_t);
-extern void throttle_lowpri_io(int *lowpri_window, mount_t v_mount);
extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
*/
syscall_exit_funnelcheck();
#endif /* DEBUG */
- if (uthread->uu_lowpri_window && uthread->v_mount) {
+ if (uthread->uu_lowpri_window) {
/*
* task is marked as a low priority I/O type
* and the I/O we issued while in this system call
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+ throttle_lowpri_io(TRUE);
}
if (code != 180)
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
*/
syscall_exit_funnelcheck();
- if (uthread->uu_lowpri_window && uthread->v_mount) {
+ if (uthread->uu_lowpri_window) {
/*
* task is marked as a low priority I/O type
* and the I/O we issued while in this system call
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+ throttle_lowpri_io(TRUE);
}
if (code != 180)
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
*/
syscall_exit_funnelcheck();
- if (uthread->uu_lowpri_window && uthread->v_mount) {
+ if (uthread->uu_lowpri_window) {
/*
* task is marked as a low priority I/O type
* and the I/O we issued while in this system call
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+ throttle_lowpri_io(TRUE);
}
if (code != 180)
KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
static int mdevbioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p);
static int mdevcioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p);
static int mdevrw(dev_t dev, struct uio *uio, int ioflag);
+#ifdef CONFIG_MEMDEV_INSECURE
static char * nonspace(char *pos, char *end);
static char * getspace(char *pos, char *end);
static char * cvtnum(char *pos, char *end, unsigned int *num);
+#endif /* CONFIG_MEMDEV_INSECURE */
extern void bcopy_phys(addr64_t from, addr64_t to, vm_size_t bytes);
extern void mapping_set_mod(ppnum_t pn);
void mdevinit(__unused int the_cnt) {
+#ifdef CONFIG_MEMDEV_INSECURE
+
int devid, phys;
ppnum_t base;
unsigned int size;
char *ba, *lp;
dev_t dev;
-
ba = PE_boot_args(); /* Get the boot arguments */
lp = ba + 256; /* Point to the end */
dev = mdevadd(devid, base >> 12, size >> 12, phys); /* Go add the device */
}
-
+
+#endif /* CONFIG_MEMDEV_INSECURE */
return;
}
+#ifdef CONFIG_MEMDEV_INSECURE
char *nonspace(char *pos, char *end) { /* Find next non-space in string */
if(pos >= end) return end; /* Don't go past end */
pos++; /* Step on */
}
}
+#endif /* CONFIG_MEMDEV_INSECURE */
dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys) {
char *modname;
unsigned int i;
- int gIgnoreFBTBlacklist = 0;
- PE_parse_boot_arg("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist);
+ int gIgnoreFBTBlacklist = 0;
+ PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
mh = (struct mach_header *)(ctl->address);
modname = ctl->mod_modname;
thread_t act);
extern lck_spin_t * tz_slock;
-extern void throttle_lowpri_io(int *lowpri_window, mount_t v_mount);
/*
* Function: unix_syscall
/* panic if funnel is held */
syscall_exit_funnelcheck();
- if (uthread->uu_lowpri_window && uthread->v_mount) {
+ if (uthread->uu_lowpri_window) {
/*
* task is marked as a low priority I/O type
* and the I/O we issued while in this system call
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+ throttle_lowpri_io(TRUE);
}
if (kdebug_enable && (code != 180)) {
/* panic if funnel is held */
syscall_exit_funnelcheck();
- if (uthread->uu_lowpri_window && uthread->v_mount) {
+ if (uthread->uu_lowpri_window) {
/*
* task is marked as a low priority I/O type
* and the I/O we issued while in this system call
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+ throttle_lowpri_io(TRUE);
}
if (kdebug_enable && (code != 180)) {
if (callp->sy_return_type == _SYSCALL_RET_SSIZE_T)
#if SOCKETS
{
+#if CONFIG_USESOCKTHRESHOLD
+ static const unsigned int maxspace = 64 * 1024;
+#else
+ static const unsigned int maxspace = 128 * 1024;
+#endif
int scale;
nmbclusters = bsd_mbuf_cluster_reserve() / MCLBYTES;
tcp_sendspace *= scale;
tcp_recvspace *= scale;
- if (tcp_sendspace > (64 * 1024))
- tcp_sendspace = 64 * 1024;
- if (tcp_recvspace > (64 * 1024))
- tcp_recvspace = 64 * 1024;
+ if (tcp_sendspace > maxspace)
+ tcp_sendspace = maxspace;
+ if (tcp_recvspace > maxspace)
+ tcp_recvspace = maxspace;
}
#endif /* INET || INET6 */
}
u_int32_t hfs_flags; /* see below */
/* Physical Description */
- u_long hfs_phys_block_size; /* Always a multiple of 512 */
- daddr64_t hfs_phys_block_count; /* Num of PHYSICAL blocks of volume */
- daddr64_t hfs_alt_id_sector; /* location of alternate VH/MDB */
+ u_int32_t hfs_logical_block_size; /* Logical block size of the disk as reported by ioctl(DKIOCGETBLOCKSIZE), always a multiple of 512 */
+ daddr64_t hfs_logical_block_count; /* Number of logical blocks on the disk */
+ daddr64_t hfs_alt_id_sector; /* location of alternate VH/MDB */
+ u_int32_t hfs_physical_block_size; /* Physical block size of the disk as reported by ioctl(DKIOCGETPHYSICALBLOCKSIZE) */
+ u_int32_t hfs_log_per_phys; /* Number of logical blocks per physical block size */
/* Access to VFS and devices */
struct mount *hfs_mp; /* filesystem vfs structure */
#define HFS_FOLDERCOUNT 0x10000
/* When set, the file system exists on a virtual device, like disk image */
#define HFS_VIRTUAL_DEVICE 0x20000
+/* When set, we're in hfs_changefs, so hfs_sync should do nothing. */
+#define HFS_IN_CHANGEFS 0x40000
/* Macro to update next allocation block in the HFS mount structure. If
#define HFS_ALT_SECTOR(blksize, blkcnt) (((blkcnt) - 1) - (512 / (blksize)))
#define HFS_ALT_OFFSET(blksize) ((blksize) > 1024 ? (blksize) - 1024 : 0)
+/* Convert the logical sector number to be aligned on physical block size boundary.
+ * We are assuming the partition is a multiple of physical block size.
+ */
+#define HFS_PHYSBLK_ROUNDDOWN(sector_num, log_per_phys) ((sector_num / log_per_phys) * log_per_phys)
/*
* HFS specific fcntl()'s
} else {
/* Convert the data fork. */
datafp->cf_size = recp->hfsPlusFile.dataFork.logicalSize;
+ datafp->cf_new_size = 0;
datafp->cf_blocks = recp->hfsPlusFile.dataFork.totalBlocks;
if ((hfsmp->hfc_stage == HFC_RECORDING) &&
(attrp->ca_atime >= hfsmp->hfc_timebase)) {
/* Convert the resource fork. */
rsrcfp->cf_size = recp->hfsPlusFile.resourceFork.logicalSize;
+ rsrcfp->cf_new_size = 0;
rsrcfp->cf_blocks = recp->hfsPlusFile.resourceFork.totalBlocks;
if ((hfsmp->hfc_stage == HFC_RECORDING) &&
(attrp->ca_atime >= hfsmp->hfc_timebase)) {
} else if (wantrsrc) {
/* Convert the resource fork. */
forkp->cf_size = recp->hfsPlusFile.resourceFork.logicalSize;
+ forkp->cf_new_size = 0;
forkp->cf_blocks = recp->hfsPlusFile.resourceFork.totalBlocks;
if ((hfsmp->hfc_stage == HFC_RECORDING) &&
(to_bsd_time(recp->hfsPlusFile.accessDate) >= hfsmp->hfc_timebase)) {
/* Convert the data fork. */
forkp->cf_size = recp->hfsPlusFile.dataFork.logicalSize;
+ forkp->cf_new_size = 0;
forkp->cf_blocks = recp->hfsPlusFile.dataFork.totalBlocks;
if ((hfsmp->hfc_stage == HFC_RECORDING) &&
(to_bsd_time(recp->hfsPlusFile.accessDate) >= hfsmp->hfc_timebase)) {
blksize = hfsmp->blockSize;
blkcount = howmany(kHFSAliasSize, blksize);
- sectorsize = hfsmp->hfs_phys_block_size;
+ sectorsize = hfsmp->hfs_logical_block_size;
bzero(rsrcforkp, sizeof(HFSPlusForkData));
/* Allocate some disk space for the alias content. */
blkno = ((u_int64_t)rsrcforkp->extents[0].startBlock * (u_int64_t)blksize) / sectorsize;
blkno += hfsmp->hfsPlusIOPosOffset / sectorsize;
- bp = buf_getblk(hfsmp->hfs_devvp, blkno, roundup(kHFSAliasSize, hfsmp->hfs_phys_block_size), 0, 0, BLK_META);
+ bp = buf_getblk(hfsmp->hfs_devvp, blkno, roundup(kHFSAliasSize, hfsmp->hfs_logical_block_size), 0, 0, BLK_META);
if (hfsmp->jnl) {
journal_modify_block_start(hfsmp->jnl, bp);
}
* Catalog Node Fork (runtime)
*
* NOTE: this is not the same as a struct HFSPlusForkData
+ *
+ * NOTE: if cf_new_size > cf_size, then a write is in progress and is extending
+ * the EOF; the new EOF will be cf_new_size. Writes and pageouts may validly
+ * write up to cf_new_size, but reads should only read up to cf_size. When
+ * an extending write is not in progress, cf_new_size is zero.
*/
struct cat_fork {
off_t cf_size; /* fork's logical size in bytes */
+ off_t cf_new_size; /* fork's logical size after write completes */
union {
u_int32_t cfu_clump; /* fork's clump size in bytes (sys files only) */
u_int64_t cfu_bytesread; /* bytes read from this fork */
*/
if (v_type == VDIR) {
hfs_reldirhints(cp, 0);
- }
+ }
if (cp->c_flag & C_HARDLINK) {
hfs_relorigins(cp);
if (vnode_isdir(vp)) {
hfs_reldirhints(cp, 0);
}
-
+
if (cp->c_flag & C_HARDLINK) {
hfs_relorigins(cp);
}
+
}
/* Release the file fork and related data */
if (fp) {
/* Aliases for common fields */
#define ff_size ff_data.cf_size
+#define ff_new_size ff_data.cf_new_size
#define ff_clumpsize ff_data.cf_clump
#define ff_bytesread ff_data.cf_bytesread
#define ff_blocks ff_data.cf_blocks
/*
* When first opening a BTree, we have to read the header node before the
* control block is initialized. In this case, totalNodes will be zero,
- * so skip the bounds checking.
+ * so skip the bounds checking. Also, we should ignore the header node when
+ * checking for invalid forwards and backwards links, since the header node's
+ * links can point back to itself legitimately.
*/
if (btcb->totalNodes != 0) {
if (srcDesc->fLink >= btcb->totalNodes) {
error = fsBTInvalidHeaderErr;
goto fail;
}
+
+ if ((src->blockNum != 0) && (srcDesc->fLink == (u_int32_t) src->blockNum)) {
+ printf("hfs_swap_BTNode: invalid forward link (0x%08x == 0x%08x)\n",
+ srcDesc->fLink, (u_int32_t) src->blockNum);
+ error = fsBTInvalidHeaderErr;
+ goto fail;
+ }
+ if ((src->blockNum != 0) && (srcDesc->bLink == (u_int32_t) src->blockNum)) {
+ printf("hfs_swap_BTNode: invalid backward link (0x%08x == 0x%08x)\n",
+ srcDesc->bLink, (u_int32_t) src->blockNum);
+ error = fsBTInvalidHeaderErr;
+ goto fail;
+ }
+
}
/*
if (direction == kSwapBTNodeHostToBig) {
/*
* Sanity check and swap the forward and backward links.
+ * Ignore the header node since its forward and backwards links can legitimately
+ * point to itself.
*/
if (srcDesc->fLink >= btcb->totalNodes) {
panic("hfs_UNswap_BTNode: invalid forward link (0x%08X)\n", srcDesc->fLink);
error = fsBTInvalidHeaderErr;
goto fail;
}
+ if ((src->blockNum != 0) && (srcDesc->fLink == (u_int32_t) src->blockNum)) {
+ panic ("hfs_UNswap_BTNode: invalid forward link (0x%08x == 0x%08x)\n",
+ srcDesc->fLink, (u_int32_t) src->blockNum);
+ error = fsBTInvalidHeaderErr;
+ goto fail;
+ }
+
if (srcDesc->bLink >= btcb->totalNodes) {
panic("hfs_UNswap_BTNode: invalid backward link (0x%08X)\n", srcDesc->bLink);
error = fsBTInvalidHeaderErr;
goto fail;
}
+ if ((src->blockNum != 0) && (srcDesc->bLink == (u_int32_t) src->blockNum)) {
+ panic ("hfs_UNswap_BTNode: invalid backward link (0x%08x == 0x%08x)\n",
+ srcDesc->bLink, (u_int32_t) src->blockNum);
+ error = fsBTInvalidHeaderErr;
+ goto fail;
+ }
+
+
srcDesc->fLink = SWAP_BE32 (srcDesc->fLink);
srcDesc->bLink = SWAP_BE32 (srcDesc->bLink);
* process removed the object before we had a chance
* to create the vnode, then just treat it as the not
* found case above and return EJUSTRETURN.
+ * We should do the same for the RENAME operation since we are
+ * going to write it in regardless.
*/
if ((retval == ENOENT) &&
- (cnp->cn_nameiop == CREATE) &&
+ ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
(flags & ISLASTCN)) {
retval = EJUSTRETURN;
}
hfs_unlock(cp);
cnode_locked = 0;
+
+ /*
+ * We need to tell UBC the fork's new size BEFORE calling
+ * cluster_write, in case any of the new pages need to be
+ * paged out before cluster_write completes (which does happen
+ * in embedded systems due to extreme memory pressure).
+ * Similarly, we need to tell hfs_vnop_pageout what the new EOF
+ * will be, so that it can pass that on to cluster_pageout, and
+ * allow those pageouts.
+ *
+ * We don't update ff_size yet since we don't want pageins to
+ * be able to see uninitialized data between the old and new
+ * EOF, until cluster_write has completed and initialized that
+ * part of the file.
+ *
+ * The vnode pager relies on the file size last given to UBC via
+ * ubc_setsize. hfs_vnop_pageout relies on fp->ff_new_size or
+ * ff_size (whichever is larger). NOTE: ff_new_size is always
+ * zero, unless we are extending the file via write.
+ */
+ if (filesize > fp->ff_size) {
+ fp->ff_new_size = filesize;
+ ubc_setsize(vp, filesize);
+ }
retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
tail_off, lflag | IO_NOZERODIRTY);
if (retval) {
+ fp->ff_new_size = 0; /* no longer extending; use ff_size */
+ if (filesize > origFileSize) {
+ ubc_setsize(vp, origFileSize);
+ }
goto ioerr_exit;
}
- offset = uio_offset(uio);
- if (offset > fp->ff_size) {
- fp->ff_size = offset;
-
- ubc_setsize(vp, fp->ff_size); /* XXX check errors */
+
+ if (filesize > origFileSize) {
+ fp->ff_size = filesize;
+
/* Files that are changing size are not hot file candidates. */
- if (hfsmp->hfc_stage == HFC_RECORDING)
+ if (hfsmp->hfc_stage == HFC_RECORDING) {
fp->ff_bytesread = 0;
+ }
}
+ fp->ff_new_size = 0; /* ff_size now has the correct size */
+
+ /* If we wrote some bytes, then touch the change and mod times */
if (resid > uio_resid(uio)) {
cp->c_touch_chgtime = TRUE;
cp->c_touch_modtime = TRUE;
cp = VTOC(vp);
fp = VTOF(vp);
- if (vnode_isswap(vp)) {
- filesize = fp->ff_size;
- } else {
+ /*
+ * Figure out where the file ends, for pageout purposes. If
+ * ff_new_size > ff_size, then we're in the middle of extending the
+ * file via a write, so it is safe (and necessary) that we be able
+ * to pageout up to that point.
+ */
+ filesize = fp->ff_size;
+ if (fp->ff_new_size > filesize)
+ filesize = fp->ff_new_size;
+
+ if (!vnode_isswap(vp)) {
off_t end_of_range;
int tooklock = 0;
tooklock = 1;
}
- filesize = fp->ff_size;
end_of_range = ap->a_f_offset + ap->a_size - 1;
if (end_of_range >= filesize) {
retval = ENOSPC;
goto restore;
} else if ((eflags & kEFMetadataMask) &&
- ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
+ ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
hfsmp->hfs_metazone_end)) {
const char * filestr;
char emptystr = '\0';
if ((retval = hfs_flushfiles(mp, flags, p)))
goto out;
- hfsmp->hfs_flags |= HFS_READ_ONLY;
+
+ /* mark the volume cleanly unmounted */
+ hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+ hfsmp->hfs_flags |= HFS_READ_ONLY;
/* also get the volume bitmap blocks */
if (!retval) {
goto out;
}
-
- retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
- if (retval != E_NONE)
- goto out;
-
// If the journal was shut-down previously because we were
// asked to be read-only, let's start it back up again now
(hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
hfsmp->jnl_size,
hfsmp->hfs_devvp,
- hfsmp->hfs_phys_block_size,
+ hfsmp->hfs_logical_block_size,
jflags,
0,
hfs_sync_metadata, hfsmp->hfs_mp);
/* Only clear HFS_READ_ONLY after a successfull write */
hfsmp->hfs_flags &= ~HFS_READ_ONLY;
- if (!(hfsmp->hfs_flags & (HFS_READ_ONLY & HFS_STANDARD))) {
+ /* mark the volume dirty (clear clean unmount bit) */
+ hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
+
+ retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+ if (retval != E_NONE)
+ goto out;
+
+ if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
/* Setup private/hidden directories for hardlinks. */
hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
vcb = HFSTOVCB(hfsmp);
mount_flags = (unsigned int)vfs_flags(mp);
+ hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
+
permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
(((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
/* The root filesystem must operate with actual permissions: */
if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS)); /* Just say "No". */
- return EINVAL;
+ retval = EINVAL;
+ goto exit;
}
if (mount_flags & MNT_UNKNOWNPERMISSIONS)
hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
(void) hfs_relconverter(old_encoding);
}
exit:
+ hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
return (retval);
}
{
register struct vnode *devvp;
struct buf *bp;
- int sectorsize;
int error, i;
struct hfsmount *hfsmp;
struct HFSPlusVolumeHeader *vhp;
struct filefork *forkp;
struct cat_desc cndesc;
struct hfs_reload_cargs args;
+ daddr64_t priIDSector;
hfsmp = VFSTOHFS(mountp);
vcb = HFSTOVCB(hfsmp);
/*
* Re-read VolumeHeader from disk.
*/
- sectorsize = hfsmp->hfs_phys_block_size;
+ priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+ HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
error = (int)buf_meta_bread(hfsmp->hfs_devvp,
- (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + HFS_PRI_SECTOR(sectorsize)),
- sectorsize, NOCRED, &bp);
+ HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, NOCRED, &bp);
if (error) {
if (bp != NULL)
buf_brelse(bp);
return (error);
}
- vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
+ vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
/* Do a quick sanity check */
if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
int mntwrapper;
kauth_cred_t cred;
u_int64_t disksize;
- daddr64_t blkcnt;
- u_int32_t blksize;
+ daddr64_t log_blkcnt;
+ u_int32_t log_blksize;
+ u_int32_t phys_blksize;
u_int32_t minblksize;
u_int32_t iswritable;
daddr64_t mdb_offset;
/* Advisory locking should be handled at the VFS layer */
vfs_setlocklocal(mp);
- /* Get the real physical block size. */
- if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context)) {
+ /* Get the logical block size (treated as physical block size everywhere) */
+ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
retval = ENXIO;
goto error_exit;
}
+ /* Get the physical block size. */
+ retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
+ if (retval) {
+ if ((retval != ENOTSUP) && (retval != ENOTTY)) {
+ retval = ENXIO;
+ goto error_exit;
+ }
+ /* If device does not support this ioctl, assume that physical
+ * block size is same as logical block size
+ */
+ phys_blksize = log_blksize;
+ }
/* Switch to 512 byte sectors (temporarily) */
- if (blksize > 512) {
+ if (log_blksize > 512) {
u_int32_t size512 = 512;
if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
}
}
/* Get the number of 512 byte physical blocks. */
- if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) {
+ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
/* resetting block size may fail if getting block count did */
- (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context);
+ (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
retval = ENXIO;
goto error_exit;
}
/* Compute an accurate disk size (i.e. within 512 bytes) */
- disksize = (u_int64_t)blkcnt * (u_int64_t)512;
+ disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
/*
* On Tiger it is not necessary to switch the device
* worth of blocks but to insure compatibility with
* pre-Tiger systems we have to do it.
*/
- if (blkcnt > 0x000000007fffffff) {
- minblksize = blksize = 4096;
+ if (log_blkcnt > 0x000000007fffffff) {
+ minblksize = log_blksize = 4096;
+ if (phys_blksize < log_blksize)
+ phys_blksize = log_blksize;
}
/* Now switch to our preferred physical block size. */
- if (blksize > 512) {
- if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) {
+ if (log_blksize > 512) {
+ if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
retval = ENXIO;
goto error_exit;
}
/* Get the count of physical blocks. */
- if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) {
+ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
retval = ENXIO;
goto error_exit;
}
/*
* At this point:
* minblksize is the minimum physical block size
- * blksize has our preferred physical block size
- * blkcnt has the total number of physical blocks
+ * log_blksize has our preferred physical block size
+ * log_blkcnt has the total number of physical blocks
*/
- mdb_offset = (daddr64_t)HFS_PRI_SECTOR(blksize);
- if ((retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp))) {
+ mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
+ if ((retval = (int)buf_meta_bread(devvp,
+ HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)),
+ phys_blksize, cred, &bp))) {
goto error_exit;
}
MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
- bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, kMDBSize);
+ bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
buf_brelse(bp);
bp = NULL;
hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
hfsmp->hfs_devvp = devvp;
vnode_ref(devvp); /* Hold a ref on the device, dropped when hfsmp is freed. */
- hfsmp->hfs_phys_block_size = blksize;
- hfsmp->hfs_phys_block_count = blkcnt;
+ hfsmp->hfs_logical_block_size = log_blksize;
+ hfsmp->hfs_logical_block_count = log_blkcnt;
+ hfsmp->hfs_physical_block_size = phys_blksize;
+ hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
if (ronly)
hfsmp->hfs_flags |= HFS_READ_ONLY;
goto error_exit;
}
/* HFS disks can only use 512 byte physical blocks */
- if (blksize > kHFSBlockSize) {
- blksize = kHFSBlockSize;
- if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) {
+ if (log_blksize > kHFSBlockSize) {
+ log_blksize = kHFSBlockSize;
+ if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
retval = ENXIO;
goto error_exit;
}
- if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) {
+ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
retval = ENXIO;
goto error_exit;
}
- hfsmp->hfs_phys_block_size = blksize;
- hfsmp->hfs_phys_block_count = blkcnt;
+ hfsmp->hfs_logical_block_size = log_blksize;
+ hfsmp->hfs_logical_block_count = log_blkcnt;
}
if (args) {
hfsmp->hfs_encoding = args->hfs_encoding;
* block size so everything will line up on a block
* boundary.
*/
- if ((embeddedOffset % blksize) != 0) {
+ if ((embeddedOffset % log_blksize) != 0) {
printf("HFS Mount: embedded volume offset not"
" a multiple of physical block size (%d);"
- " switching to 512\n", blksize);
- blksize = 512;
+ " switching to 512\n", log_blksize);
+ log_blksize = 512;
if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
- (caddr_t)&blksize, FWRITE, context)) {
+ (caddr_t)&log_blksize, FWRITE, context)) {
retval = ENXIO;
goto error_exit;
}
if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
- (caddr_t)&blkcnt, 0, context)) {
+ (caddr_t)&log_blkcnt, 0, context)) {
retval = ENXIO;
goto error_exit;
}
/* Note: relative block count adjustment */
- hfsmp->hfs_phys_block_count *=
- hfsmp->hfs_phys_block_size / blksize;
- hfsmp->hfs_phys_block_size = blksize;
+ hfsmp->hfs_logical_block_count *=
+ hfsmp->hfs_logical_block_size / log_blksize;
+ hfsmp->hfs_logical_block_size = log_blksize;
}
disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
(u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
- hfsmp->hfs_phys_block_count = disksize / blksize;
+ hfsmp->hfs_logical_block_count = disksize / log_blksize;
- mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
- retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+ mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
+ retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+ phys_blksize, cred, &bp);
if (retval)
goto error_exit;
- bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, 512);
+ bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
buf_brelse(bp);
bp = NULL;
vhp = (HFSPlusVolumeHeader*) mdbp;
hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
if (mdb_offset == 0) {
- mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
+ mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
}
bp = NULL;
- retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+ retval = (int)buf_meta_bread(devvp,
+ HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+ phys_blksize, cred, &bp);
if (retval == 0) {
- jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blksize));
+ jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
printf ("hfs(1): Journal replay fail. Writing lastMountVersion as FSK!\n");
* If the backend didn't like our physical blocksize
* then retry with physical blocksize of 512.
*/
- if ((retval == ENXIO) && (blksize > 512) && (blksize != minblksize)) {
+ if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
printf("HFS Mount: could not use physical block size "
- "(%d) switching to 512\n", blksize);
- blksize = 512;
- if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) {
+ "(%d) switching to 512\n", log_blksize);
+ log_blksize = 512;
+ if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
retval = ENXIO;
goto error_exit;
}
- if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) {
+ if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
retval = ENXIO;
goto error_exit;
}
- devvp->v_specsize = blksize;
+ devvp->v_specsize = log_blksize;
/* Note: relative block count adjustment (in case this is an embedded volume). */
- hfsmp->hfs_phys_block_count *= hfsmp->hfs_phys_block_size / blksize;
- hfsmp->hfs_phys_block_size = blksize;
+ hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
+ hfsmp->hfs_logical_block_size = log_blksize;
if (hfsmp->jnl) {
// close and re-open this with the new block size
hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
if (mdb_offset == 0) {
- mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
+ mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
}
bp = NULL;
- retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+ retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+ phys_blksize, cred, &bp);
if (retval == 0) {
- jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blksize));
+ jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
printf ("hfs(2): Journal replay fail. Writing lastMountVersion as FSK!\n");
struct hfsmount *hfsmp;
ExtendedVCB *vcb;
buf_t bp;
- int sectorsize, retval;
+ int retval;
daddr64_t priIDSector;
hfsmp = VFSTOHFS(mp);
vcb = HFSTOVCB(hfsmp);
// now make sure the super block is flushed
- sectorsize = hfsmp->hfs_phys_block_size;
- priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) +
- HFS_PRI_SECTOR(sectorsize));
- retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp);
+ priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+ HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
+
+ retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
+ HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, NOCRED, &bp);
if ((retval != 0 ) && (retval != ENXIO)) {
printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
(int)priIDSector, retval);
// hfs_btreeio.c:FlushAlternate() should flag when it was
// written...
if (hfsmp->hfs_alt_id_sector) {
- retval = (int)buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &bp);
+ retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
+ HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, NOCRED, &bp);
if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
buf_bwrite(bp);
} else if (bp) {
int error, allerror = 0;
struct hfs_sync_cargs args;
+ hfsmp = VFSTOHFS(mp);
+
/*
- * During MNT_UPDATE hfs_changefs might be manipulating
- * vnodes so back off
+ * hfs_changefs might be manipulating vnodes so back off
*/
- if (((u_int32_t)vfs_flags(mp)) & MNT_UPDATE) /* XXX MNT_UPDATE may not be visible here */
+ if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
return (0);
- hfsmp = VFSTOHFS(mp);
if (hfsmp->hfs_flags & HFS_READ_ONLY)
return (EROFS);
+ HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
(off_t)((unsigned)name[3]),
hfsmp->hfs_devvp,
- hfsmp->hfs_phys_block_size,
+ hfsmp->hfs_logical_block_size,
0,
0,
hfs_sync_metadata, hfsmp->hfs_mp);
int sectorsize;
ByteCount namelen;
- sectorsize = hfsmp->hfs_phys_block_size;
+ sectorsize = hfsmp->hfs_logical_block_size;
retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
if (retval) {
if (bp)
int retval;
struct buf *bp;
int i;
- int sectorsize;
daddr64_t priIDSector;
int critical;
u_int16_t signature;
return hfs_flushMDB(hfsmp, waitfor, altflush);
}
critical = altflush;
- sectorsize = hfsmp->hfs_phys_block_size;
- priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) +
- HFS_PRI_SECTOR(sectorsize));
+ priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+ HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
if (hfs_start_transaction(hfsmp) != 0) {
return EINVAL;
}
- retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp);
+ retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
+ HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, NOCRED, &bp);
if (retval) {
if (bp)
buf_brelse(bp);
journal_modify_block_start(hfsmp->jnl, bp);
}
- volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
+ volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) +
+ HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
/*
* Sanity check what we just read.
struct buf *bp2;
HFSMasterDirectoryBlock *mdb;
- retval = (int)buf_meta_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize),
- sectorsize, NOCRED, &bp2);
+ retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
+ HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, NOCRED, &bp2);
if (retval) {
if (bp2)
buf_brelse(bp2);
retval = 0;
} else {
mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
- HFS_PRI_OFFSET(sectorsize));
+ HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
{
if (altflush && hfsmp->hfs_alt_id_sector) {
struct buf *alt_bp = NULL;
- if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
+ if (buf_meta_bread(hfsmp->hfs_devvp,
+ HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
if (hfsmp->jnl) {
journal_modify_block_start(hfsmp->jnl, alt_bp);
}
- bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
+ bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) +
+ HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size),
+ kMDBSize);
if (hfsmp->jnl) {
journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
u_int32_t addblks;
u_int64_t sectorcnt;
u_int32_t sectorsize;
+ u_int32_t phys_sectorsize;
daddr64_t prev_alt_sector;
daddr_t bitmapblks;
int lockflags;
if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)§orsize, 0, context)) {
return (ENXIO);
}
- if (sectorsize != hfsmp->hfs_phys_block_size) {
+ if (sectorsize != hfsmp->hfs_logical_block_size) {
return (ENXIO);
}
if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)§orcnt, 0, context)) {
printf("hfs_extendfs: not enough space on device\n");
return (ENOSPC);
}
+ error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
+ if (error) {
+ if ((error != ENOTSUP) && (error != ENOTTY)) {
+ return (ENXIO);
+ }
+ /* If ioctl is not supported, force physical and logical sector size to be same */
+ phys_sectorsize = sectorsize;
+ }
+ if (phys_sectorsize != hfsmp->hfs_physical_block_size) {
+ return (ENXIO);
+ }
oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
/*
* Validate new size.
*/
- if ((newsize <= oldsize) || (newsize % sectorsize)) {
+ if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
printf("hfs_extendfs: invalid size\n");
return (EINVAL);
}
/*
* Adjust file system variables for new space.
*/
- prev_phys_block_count = hfsmp->hfs_phys_block_count;
+ prev_phys_block_count = hfsmp->hfs_logical_block_count;
prev_alt_sector = hfsmp->hfs_alt_id_sector;
vcb->totalBlocks += addblks;
vcb->freeBlocks += addblks;
- hfsmp->hfs_phys_block_count = newsize / sectorsize;
+ hfsmp->hfs_logical_block_count = newsize / sectorsize;
hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
- HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_phys_block_count);
+ HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
MarkVCBDirty(vcb);
error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
if (error) {
}
vcb->totalBlocks -= addblks;
vcb->freeBlocks -= addblks;
- hfsmp->hfs_phys_block_count = prev_phys_block_count;
+ hfsmp->hfs_logical_block_count = prev_phys_block_count;
hfsmp->hfs_alt_id_sector = prev_alt_sector;
MarkVCBDirty(vcb);
if (vcb->blockSize == 512)
*/
bp = NULL;
if (prev_alt_sector) {
- if (buf_meta_bread(hfsmp->hfs_devvp, prev_alt_sector, sectorsize,
- NOCRED, &bp) == 0) {
+ if (buf_meta_bread(hfsmp->hfs_devvp,
+ HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
journal_modify_block_start(hfsmp->jnl, bp);
- bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
+ bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
} else if (bp) {
/* Make sure new size is valid. */
if ((newsize < HFS_MIN_SIZE) ||
(newsize >= oldsize) ||
- (newsize % hfsmp->hfs_phys_block_size)) {
+ (newsize % hfsmp->hfs_logical_block_size) ||
+ (newsize % hfsmp->hfs_physical_block_size)) {
+ printf ("hfs_truncatefs: invalid size\n");
error = EINVAL;
goto out;
}
* since this block will be outside of the truncated file system!
*/
if (hfsmp->hfs_alt_id_sector) {
- if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector,
- hfsmp->hfs_phys_block_size, NOCRED, &bp) == 0) {
+ if (buf_meta_bread(hfsmp->hfs_devvp,
+ HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
- bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_phys_block_size)), kMDBSize);
+ bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
(void) VNOP_BWRITE(bp);
} else if (bp) {
buf_brelse(bp);
* Adjust file system variables and flush them to disk.
*/
hfsmp->totalBlocks = newblkcnt;
- hfsmp->hfs_phys_block_count = newsize / hfsmp->hfs_phys_block_size;
- hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size, hfsmp->hfs_phys_block_count);
+ hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
+ hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
MarkVCBDirty(hfsmp);
error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
if (error)
size_t ioSize;
u_int32_t ioSizeSectors; /* Device sectors in this I/O */
daddr64_t srcSector, destSector;
- u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_phys_block_size;
+ u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
/*
* Sanity check that we have locked the vnode of the file we're copying.
buf_setdataptr(bp, (uintptr_t)buffer);
resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
- srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_phys_block_size;
- destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_phys_block_size;
+ srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
+ destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
while (resid > 0) {
ioSize = MIN(bufferSize, resid);
- ioSizeSectors = ioSize / hfsmp->hfs_phys_block_size;
+ ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
/* Prepare the buffer for reading */
buf_reset(bp, B_READ);
JournalInfoBlock *jibp;
error = buf_meta_bread(hfsmp->hfs_devvp,
- hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_phys_block_size),
+ hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
if (error) {
printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
/* Copy the old journal info block content to the new location */
error = buf_meta_bread(hfsmp->hfs_devvp,
- hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_phys_block_size),
+ hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
if (error) {
printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
goto free_fail;
}
new_bp = buf_getblk(hfsmp->hfs_devvp,
- newBlock * (hfsmp->blockSize/hfsmp->hfs_phys_block_size),
+ newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
hfsmp->blockSize, 0, 0, BLK_META);
bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
buf_brelse(old_bp);
if (error || (utf8chars == 0))
(void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
- hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size);
+ hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
vcb->vcbVBMIOSize = kHFSBlockSize;
- hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size,
- hfsmp->hfs_phys_block_count);
+ hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
+ hfsmp->hfs_logical_block_count);
bzero(&cndesc, sizeof(cndesc));
cndesc.cd_parentcnid = kHFSRootParentID;
return (EINVAL);
/* Make sure we can live with the physical block size. */
- if ((disksize & (hfsmp->hfs_phys_block_size - 1)) ||
- (embeddedOffset & (hfsmp->hfs_phys_block_size - 1)) ||
- (blockSize < hfsmp->hfs_phys_block_size)) {
+ if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
+ (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
+ (blockSize < hfsmp->hfs_logical_block_size)) {
return (ENXIO);
}
+
+ /* If allocation block size is less than the physical
+ * block size, we assume that the physical block size
+ * is same as logical block size. The physical block
+ * size value is used to round down the offsets for
+ * reading and writing the primary and alternate volume
+ * headers at physical block boundary and will cause
+ * problems if it is less than the block size.
+ */
+ if (blockSize < hfsmp->hfs_physical_block_size) {
+ hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
+ }
+
/*
* The VolumeHeader seems OK: transfer info from it into VCB
* Note - the VCB starts out clear (all zeros)
* (currently set up from the wrapper MDB) using the
* new blocksize value:
*/
- hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size);
+ hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
/*
* Validate and initialize the location of the alternate volume header.
*/
- spare_sectors = hfsmp->hfs_phys_block_count -
+ spare_sectors = hfsmp->hfs_logical_block_count -
(((daddr64_t)vcb->totalBlocks * blockSize) /
- hfsmp->hfs_phys_block_size);
+ hfsmp->hfs_logical_block_size);
- if (spare_sectors > (blockSize / hfsmp->hfs_phys_block_size)) {
+ if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
hfsmp->hfs_alt_id_sector = 0; /* partition has grown! */
} else {
- hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size) +
- HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size,
- hfsmp->hfs_phys_block_count);
+ hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+ HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
+ hfsmp->hfs_logical_block_count);
}
bzero(&cndesc, sizeof(cndesc));
cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
cfork.cf_size = SWAP_BE64 (vhp->extentsFile.logicalSize);
+ cfork.cf_new_size= 0;
cfork.cf_clump = SWAP_BE32 (vhp->extentsFile.clumpSize);
cfork.cf_blocks = SWAP_BE32 (vhp->extentsFile.totalBlocks);
cfork.cf_vblocks = 0;
mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
- retval = (int)buf_meta_bread(hfsmp->hfs_devvp, mdb_offset, blockSize, cred, &bp);
+ retval = (int)buf_meta_bread(hfsmp->hfs_devvp,
+ HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, cred, &bp);
if (retval == 0) {
- jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blockSize));
+ jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
printf ("hfs(3): Journal replay fail. Writing lastMountVersion as FSK!\n");
JournalInfoBlock *jibp;
struct buf *jinfo_bp, *bp;
int sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
- int retval, blksize = hfsmp->hfs_phys_block_size;
+ int retval;
+ uint32_t blksize = hfsmp->hfs_logical_block_size;
struct vnode *devvp;
struct hfs_mount_args *args = _args;
u_int32_t jib_flags;
jib_offset + embeddedOffset,
jib_size,
devvp,
- hfsmp->hfs_phys_block_size);
+ hfsmp->hfs_logical_block_size);
hfsmp->jnl = NULL;
if (mdb_offset == 0) {
mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
}
- retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+ retval = (int)buf_meta_bread(devvp,
+ HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+ hfsmp->hfs_physical_block_size, cred, &bp);
if (retval) {
buf_brelse(bp);
printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
retval);
return retval;
}
- bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, 512);
+ bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
buf_brelse(bp);
bp = NULL;
}
}
- sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_phys_block_size;
+ sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
retval = (int)buf_meta_bread(devvp,
- (daddr64_t)(vcb->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size +
+ (daddr64_t)(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size +
(SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
SWAP_BE32(vhp->blockSize), NOCRED, &jinfo_bp);
if (retval) {
jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
jib_size,
devvp,
- hfsmp->hfs_phys_block_size);
+ hfsmp->hfs_logical_block_size);
hfsmp->jnl = NULL;
jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
jib_size,
devvp,
- hfsmp->hfs_phys_block_size,
+ hfsmp->hfs_logical_block_size,
arg_flags,
arg_tbufsz,
hfs_sync_metadata, hfsmp->hfs_mp);
jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
jib_size,
devvp,
- hfsmp->hfs_phys_block_size,
+ hfsmp->hfs_logical_block_size,
arg_flags,
arg_tbufsz,
hfs_sync_metadata, hfsmp->hfs_mp);
error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL,
HFS_EXCLUSIVE_LOCK);
if (error) {
- if (took_trunc_lock)
+ if (took_trunc_lock) {
hfs_unlock_truncate(VTOC(tvp), TRUE);
+ took_trunc_lock = 0;
+ }
+ /*
+ * tvp might no longer exist. if we get ENOENT, re-check the
+ * C_NOEXISTS flag on tvp to find out whether it's still in the
+ * namespace.
+ */
+ if (error == ENOENT && tvp) {
+ /*
+ * It's okay to just check C_NOEXISTS without having a lock,
+ * because we have an iocount on it from the vfs layer so it can't
+ * have disappeared.
+ */
+ if (VTOC(tvp)->c_flag & C_NOEXISTS) {
+ /*
+ * tvp is no longer in the namespace. Try again with NULL
+ * tvp/tcp (NULLing these out is fine because the vfs syscall
+ * will vnode_put the vnodes).
+ */
+ tcp = NULL;
+ tvp = NULL;
+ goto retry;
+ }
+ }
return (error);
}
}
/* Write the link to disk */
- bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, VTOHFS(vp)->hfs_phys_block_size),
+ bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, hfsmp->hfs_physical_block_size),
0, 0, BLK_META);
if (hfsmp->jnl) {
journal_modify_block_start(hfsmp->jnl, bp);
MALLOC(fp->ff_symlinkptr, char *, fp->ff_size, M_TEMP, M_WAITOK);
error = (int)buf_meta_bread(vp, (daddr64_t)0,
- roundup((int)fp->ff_size,
- VTOHFS(vp)->hfs_phys_block_size),
+ roundup((int)fp->ff_size, VTOHFS(vp)->hfs_physical_block_size),
vfs_context_ucred(ap->a_context), &bp);
if (error) {
if (bp)
btreePtr->fileRefNum = GetFileRefNumFromFCB(filePtr);
filePtr->fcbBTCBPtr = (Ptr) btreePtr; // attach btree cb to file
- /* The minimum node size is the physical block size */
- nodeRec.blockSize = VTOHFS(btreePtr->fileRefNum)->hfs_phys_block_size;
+ /* Prefer doing I/O a physical block at a time */
+ nodeRec.blockSize = VTOHFS(btreePtr->fileRefNum)->hfs_physical_block_size;
/* Start with the allocation block size for regular files. */
if (FTOC(filePtr)->c_fileid >= kHFSFirstUserCatalogNodeID)
// set kBadClose attribute bit, and UpdateNode
- /* b-tree node size must be at least as big as the physical block size */
- if (btreePtr->nodeSize < nodeRec.blockSize)
+ /* b-tree node size must be at least as big as the logical block size */
+ if (btreePtr->nodeSize < VTOHFS(btreePtr->fileRefNum)->hfs_logical_block_size)
{
/*
* If this tree has any records or the media is writeable then
/////////////////////// Try Simple Insert ///////////////////////////////
- if ( node == leftNodeNum )
- targetNode = leftNode;
- else
- targetNode = rightNode;
-
+ /* sanity check our left and right nodes here. */
+ if (node == leftNodeNum) {
+ if (leftNode->buffer == NULL) {
+ err = fsBTInvalidNodeErr;
+ M_ExitOnError(err);
+ }
+ else{
+ targetNode = leftNode;
+ }
+ }
+ else {
+ // we can assume right node is initialized.
+ targetNode = rightNode;
+ }
+
+
recordFit = InsertKeyRecord (btreePtr, targetNode->buffer, index, key->keyPtr, key->keyLength, key->recPtr, key->recSize);
if ( recordFit )
if ( !recordFit && leftNodeNum > 0 )
{
- PanicIf ( leftNode->buffer != nil, "\p InsertNode: leftNode already aquired!");
+ PanicIf ( leftNode->buffer != nil, "\p InsertNode: leftNode already acquired!");
if ( leftNode->buffer == nil )
{
off_t tmpOff;
allocBlockSize = vcb->blockSize;
- sectorSize = VCBTOHFS(vcb)->hfs_phys_block_size;
+ sectorSize = VCBTOHFS(vcb)->hfs_logical_block_size;
err = SearchExtentFile(vcb, fcb, offset, &foundKey, foundData, &foundIndex, &hint, &nextFABN);
if (err == noErr) {
#include <sys/types.h>
#include <sys/buf.h>
#include <sys/systm.h>
+#include <sys/disk.h>
#include "../../hfs.h"
#include "../../hfs_dbg.h"
u_int32_t wordsPerBlock;
// XXXdbg
struct hfsmount *hfsmp = VCBTOHFS(vcb);
+ dk_discard_t discard;
/*
* NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
goto Exit;
}
+ memset(&discard, 0, sizeof(dk_discard_t));
+ discard.offset = (uint64_t)startingBlock * (uint64_t)vcb->blockSize;
+ discard.length = (uint64_t)numBlocks * (uint64_t)vcb->blockSize;
+
//
// Pre-read the bitmap block containing the first word of allocation
if (buffer)
(void)ReleaseBitmapBlock(vcb, blockRef, true);
+ if (err == noErr) {
+ // it doesn't matter if this fails, it's just informational anyway
+ VNOP_IOCTL(vcb->hfs_devvp, DKIOCDISCARD, (caddr_t)&discard, 0, vfs_context_kernel());
+ }
+
+
return err;
Corruption:
char namep[16];
int msgbuf;
- if (PE_parse_boot_arg("-s", namep))
+ if (PE_parse_boot_argn("-s", namep, sizeof (namep)))
boothowto |= RB_SINGLE;
- if (PE_parse_boot_arg("-b", namep))
+ if (PE_parse_boot_argn("-b", namep, sizeof (namep)))
boothowto |= RB_NOBOOTRC;
- if (PE_parse_boot_arg("-x", namep)) /* safe boot */
+ if (PE_parse_boot_argn("-x", namep, sizeof (namep))) /* safe boot */
boothowto |= RB_SAFEBOOT;
- if (PE_parse_boot_arg("-l", namep)) /* leaks logging */
+ if (PE_parse_boot_argn("-l", namep, sizeof (namep))) /* leaks logging */
turn_on_log_leaks = 1;
- PE_parse_boot_arg("srv", &srv);
- PE_parse_boot_arg("ncl", &ncl);
- if (PE_parse_boot_arg("nbuf", &max_nbuf_headers)) {
+ PE_parse_boot_argn("srv", &srv, sizeof (srv));
+ PE_parse_boot_argn("ncl", &ncl, sizeof (ncl));
+ if (PE_parse_boot_argn("nbuf", &max_nbuf_headers, sizeof (max_nbuf_headers))) {
customnbuf = 1;
}
#if !defined(SECURE_KERNEL)
- PE_parse_boot_arg("kmem", &setup_kmem);
+ PE_parse_boot_argn("kmem", &setup_kmem, sizeof (setup_kmem));
#endif
- PE_parse_boot_arg("trace", &new_nkdbufs);
+ PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs));
- if (PE_parse_boot_arg("msgbuf", &msgbuf)) {
+ if (PE_parse_boot_argn("msgbuf", &msgbuf, sizeof (msgbuf))) {
log_setsize(msgbuf);
}
}
{
char namep[16];
- if (PE_parse_boot_arg("-s", namep)) {
+ if (PE_parse_boot_argn("-s", namep, sizeof(namep))) {
return(1);
} else {
return(0);
{0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 358 = nosys */
{0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 359 = nosys */
#endif
+#if CONFIG_WORKQUEUE
{AC(bsdthread_create_args), 0, 0, (sy_call_t *)bsdthread_create, munge_wwwww, munge_ddddd, _SYSCALL_RET_ADDR_T, 20}, /* 360 = bsdthread_create */
{AC(bsdthread_terminate_args), 0, 0, (sy_call_t *)bsdthread_terminate, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 361 = bsdthread_terminate */
+#else
+ {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 360 = nosys */
+ {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 361 = nosys */
+#endif
{0, 0, 0, (sy_call_t *)kqueue, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 362 = kqueue */
{AC(kevent_args), 0, 0, (sy_call_t *)kevent, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 363 = kevent */
{AC(lchown_args), 0, 0, (sy_call_t *)lchown, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 364 = lchown */
{AC(stack_snapshot_args), 0, 0, (sy_call_t *)stack_snapshot, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 365 = stack_snapshot */
+#if CONFIG_WORKQUEUE
{AC(bsdthread_register_args), 0, 0, (sy_call_t *)bsdthread_register, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 366 = bsdthread_register */
{0, 0, 0, (sy_call_t *)workq_open, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 367 = workq_open */
{AC(workq_ops_args), 0, 0, (sy_call_t *)workq_ops, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 368 = workq_ops */
+#else
+ {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 366 = nosys */
+ {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 367 = nosys */
+ {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 368 = nosys */
+#endif
{0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 369 = nosys */
{0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 370 = nosys */
{0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0}, /* 371 = nosys */
/* get the number of cpus and cache it */
#define BSD_HOST 1
host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
- kd_cpus = hinfo.physical_cpu_max;
+ kd_cpus = hinfo.logical_cpu_max;
if (kmem_alloc(kernel_map, (unsigned int *)&kdbip,
sizeof(struct kd_bufinfo) * kd_cpus) != KERN_SUCCESS)
void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
+#ifdef SECURE_KERNEL
+__private_extern__ int do_coredump = 0; /* default: don't dump cores */
+#else
__private_extern__ int do_coredump = 1; /* default: dump cores */
+#endif
__private_extern__ int sugid_coredump = 0; /* default: but not SGUID binaries */
void
#include <kern/kalloc.h>
#include <libkern/OSAtomic.h>
-#include <sys/ubc.h>
+#include <sys/ubc_internal.h>
struct psemnode;
struct pshmnode;
extern kauth_scope_t kauth_scope_fileop;
+extern int cs_debug;
+
#define f_flag f_fglob->fg_flag
#define f_type f_fglob->fg_type
#define f_msgcount f_fglob->fg_msgcount
goto outdrop;
}
+ if(ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start))
+ {
+ if(cs_debug)
+ printf("CODE SIGNING: resident blob offered for: %s\n", vp->v_name);
+ vnode_put(vp);
+ goto outdrop;
+ }
+
#define CS_MAX_BLOB_SIZE (1ULL * 1024 * 1024) /* XXX ? */
if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
error = E2BIG;
}
kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
- kr = kmem_alloc(kernel_map,
- &kernel_blob_addr,
- kernel_blob_size);
+ kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
if (kr != KERN_SUCCESS) {
error = ENOMEM;
vnode_put(vp);
(void *) kernel_blob_addr,
kernel_blob_size);
if (error) {
- kmem_free(kernel_map,
- kernel_blob_addr,
- kernel_blob_size);
+ ubc_cs_blob_deallocate(kernel_blob_addr,
+ kernel_blob_size);
vnode_put(vp);
goto outdrop;
}
kernel_blob_addr,
kernel_blob_size);
if (error) {
- kmem_free(kernel_map,
- kernel_blob_addr,
- kernel_blob_size);
+ ubc_cs_blob_deallocate(kernel_blob_addr,
+ kernel_blob_size);
} else {
- /* ubc_blob_add() was consumed "kernel_blob_addr" */
+ /* ubc_blob_add() has consumed "kernel_blob_addr" */
}
(void) vnode_put(vp);
imgp->ip_csflags |= CS_KILL;
- /* load_machfile() maps the vnode */
- (void)ubc_map(imgp->ip_vp, PROT_READ | PROT_EXEC);
-
/*
* Set up the system reserved areas in the new address space.
*/
*/
error = exec_handle_sugid(imgp);
- proc_knote(p, NOTE_EXEC);
-
if (!vfexec && (p->p_lflag & P_LTRACED))
psignal(p, SIGTRAP);
goto badtoolate;
}
+#if CONFIG_MACF
+ /* Determine if the map will allow VM_PROT_COPY */
+ error = mac_proc_check_map_prot_copy_allow(p);
+ vm_map_set_prot_copy_allow(get_task_map(task),
+ error ? FALSE : TRUE);
+#endif
+
if (load_result.unixproc &&
create_unix_stack(get_task_map(task),
load_result.user_stack,
}
badtoolate:
+ proc_knote(p, NOTE_EXEC);
+
if (vfexec) {
task_deallocate(new_task);
thread_deallocate(thread);
int once = 1; /* save SGUID-ness for interpreted files */
int i;
int iterlimit = EAI_ITERLIMIT;
+ proc_t p = vfs_context_proc(imgp->ip_vfs_context);
error = execargs_alloc(imgp);
if (error)
*/
error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
if (error) {
- goto bad;
+ goto bad_notrans;
}
DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
again:
error = namei(&nd);
if (error)
- goto bad;
+ goto bad_notrans;
imgp->ip_ndp = &nd; /* successful namei(); call nameidone() later */
imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */
+ proc_transstart(p, 0);
+
error = exec_check_permissions(imgp);
if (error)
goto bad;
nd.ni_segflg = UIO_SYSSPACE32;
nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name);
+ proc_transend(p, 0);
goto again;
default:
}
bad:
+ proc_transend(p, 0);
+
+bad_notrans:
if (imgp->ip_strings)
execargs_free(imgp);
if (imgp->ip_ndp)
if (!(uthread->uu_flag & UT_VFORK)) {
if (task != kernel_task) {
proc_lock(p);
- numthreads = get_task_numacts(task);
+ numthreads = get_task_numactivethreads(task);
if (numthreads <= 0 ) {
proc_unlock(p);
kauth_cred_unref(&context.vc_ucred);
}
#endif
- proc_transstart(p, 0);
error = exec_activate_image(imgp);
- proc_transend(p, 0);
kauth_cred_unref(&context.vc_ucred);
*/
p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred), p->p_ucred->cr_gid);
- /* XXX Obsolete; security token should not be separate from cred */
+ /* Update the process' identity version and set the security token */
+ p->p_idversion++;
set_security_token(p);
return(error);
*/
fdfree(p);
+ if (uth->uu_lowpri_window) {
+ /*
+ * task is marked as a low priority I/O type
+ * and the I/O we issued while flushing files on close
+ * collided with normal I/O operations...
+ * no need to throttle this thread since its going away
+ * but we do need to update our bookeeping w/r to throttled threads
+ */
+ throttle_lowpri_io(FALSE);
+ }
+
#if SYSV_SHM
/* Close ref SYSV Shared memory*/
if (p->vm_shm)
(void)reap_child_locked(pp, p, 1, 1, 1);
/* list lock dropped by reap_child_locked */
}
+ if (uth->uu_lowpri_window) {
+ /*
+ * task is marked as a low priority I/O type and we've
+ * somehow picked up another throttle during exit processing...
+ * no need to throttle this thread since its going away
+ * but we do need to update our bookeeping w/r to throttled threads
+ */
+ throttle_lowpri_io(FALSE);
+ }
proc_rele(pp);
forkproc(proc_t parent, int lock)
{
struct proc * child; /* Our new process */
- static int nextpid = 0, pidwrap = 0;
+ static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
int error = 0;
struct session *sessp;
uthread_t uth_parent = (uthread_t)get_bsdthread_info(current_thread());
}
nprocs++;
child->p_pid = nextpid;
+ child->p_idversion = nextpidversion++;
#if 1
if (child->p_pid != 0) {
if (pfind_locked(child->p_pid) != PROC_NULL)
uthread_t uth = (uthread_t)uthread;
proc_t p = (proc_t)bsd_info;
+
+ if (uth->uu_lowpri_window) {
+ /*
+ * task is marked as a low priority I/O type
+ * and we've somehow managed to not dismiss the throttle
+ * through the normal exit paths back to user space...
+ * no need to throttle this thread since its going away
+ * but we do need to update our bookeeping w/r to throttled threads
+ */
+ throttle_lowpri_io(FALSE);
+ }
/*
* Per-thread audit state should never last beyond system
* call return. Since we don't audit the thread creation/
#include <libkern/libkern.h>
#include <sys/sysctl.h>
+extern unsigned int vm_page_free_count;
+extern unsigned int vm_page_active_count;
+extern unsigned int vm_page_inactive_count;
+extern unsigned int vm_page_purgeable_count;
+extern unsigned int vm_page_wire_count;
+
static void kern_memorystatus_thread(void);
int kern_memorystatus_wakeup = 0;
-int kern_memorystatus_pause = 0;
int kern_memorystatus_level = 0;
int kern_memorystatus_last_level = 0;
unsigned int kern_memorystatus_kev_failure_count = 0;
kern_memorystatus_thread(void)
{
struct kev_msg ev_msg;
+ struct {
+ uint32_t free_pages;
+ uint32_t active_pages;
+ uint32_t inactive_pages;
+ uint32_t purgeable_pages;
+ uint32_t wired_pages;
+ } data;
int ret;
while(1) {
/* pass the memory status level in the event code (as percent used) */
ev_msg.event_code = 100 - kern_memorystatus_last_level;
- ev_msg.dv[0].data_length = 0;
+ ev_msg.dv[0].data_length = sizeof data;
+ ev_msg.dv[0].data_ptr = &data;
+ ev_msg.dv[1].data_length = 0;
+
+ data.free_pages = vm_page_free_count;
+ data.active_pages = vm_page_active_count;
+ data.inactive_pages = vm_page_inactive_count;
+ data.purgeable_pages = vm_page_purgeable_count;
+ data.wired_pages = vm_page_wire_count;
ret = kev_post_msg(&ev_msg);
if (ret) {
printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
}
- assert_wait_timeout((event_t)&kern_memorystatus_pause, THREAD_UNINT, 1, 250*1000*NSEC_PER_USEC);
- (void)thread_block(THREAD_CONTINUE_NULL);
-
if (kern_memorystatus_level >= kern_memorystatus_last_level + 5 ||
kern_memorystatus_level <= kern_memorystatus_last_level - 5)
continue;
if (cpusubtype == CPU_SUBTYPE_POWERPC_970 &&
cpu_info.l2_cache_size == 1 * 1024 * 1024)
/* The signature of the dual-core G5 */
- packages = hinfo.max_cpus / 2;
+ packages = roundup(hinfo.max_cpus, 2) / 2;
else
packages = hinfo.max_cpus;
cachesize[4] = 0;
/* hw.packages */
- packages = ml_cpu_cache_sharing(0) /
- cpuid_info()->cpuid_cores_per_package;
-
+ packages = roundup(ml_cpu_cache_sharing(0), cpuid_info()->thread_count)
+ / cpuid_info()->thread_count;
+
#else /* end __arm__ */
# warning we do not support this platform yet
#endif /* __ppc__ */
struct fileproc *fp;
register struct vnode *vp;
int flags;
- int prot, file_prot;
+ int prot;
int err=0;
vm_map_t user_map;
kern_return_t result;
(void)vnode_put(vp);
goto out;
}
-
- file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC);
- if (docow) {
- /* private mapping: won't write to the file */
- file_prot &= ~PROT_WRITE;
- }
- (void) ubc_map(vp, file_prot);
}
if (!mapanon)
}
ubc_setthreadcred(vp, current_proc(), current_thread());
- (void)ubc_map(vp, (PROT_READ | PROT_EXEC));
(void)vnode_put(vp);
err = 0;
bad:
static void lctxinit(void);
#endif
+#if DEBUG
#define __PROC_INTERNAL_DEBUG 1
+#endif
/* Name to give to core files */
__private_extern__ char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"};
int
isinferior(proc_t p, proc_t t)
{
-int retval = 0;
+ int retval = 0;
+ int nchecked = 0;
+ proc_t start = p;
/* if p==t they are not inferior */
if (p == t)
return(0);
proc_list_lock();
- for (; p != t; p = p->p_pptr)
- if (p->p_pid == 0)
+ for (; p != t; p = p->p_pptr) {
+ nchecked++;
+
+ /* Detect here if we're in a cycle */
+ if ((p->p_pid == 0) || (p->p_pptr == start) || (nchecked >= nprocs))
goto out;
+ }
retval = 1;
out:
proc_list_unlock();
}
void
-proc_checkdeadrefs(proc_t p)
+proc_checkdeadrefs(__unused proc_t p)
{
-//#if __PROC_INTERNAL_DEBUG
+#if __PROC_INTERNAL_DEBUG
if ((p->p_listflag & P_LIST_INHASH) != 0)
panic("proc being freed and still in hash %x: %x\n", (unsigned int)p, (unsigned int)p->p_listflag);
if (p->p_childrencnt != 0)
panic("proc being freed and pending refcount %x:%x\n", (unsigned int)p, (unsigned int)p->p_refcount);
if (p->p_parentref != 0)
panic("proc being freed and pending parentrefs %x:%x\n", (unsigned int)p, (unsigned int)p->p_parentref);
-//#endif
+#endif
}
int
return(IS_64BIT_PROCESS(p));
}
+int
+proc_pidversion(proc_t p)
+{
+ return(p->p_idversion);
+}
+
+int
+proc_getcdhash(proc_t p, unsigned char *cdhash)
+{
+ return vn_getcdhash(p->p_textvp, p->p_textoff, cdhash);
+}
+
void
bsd_set_dependency_capable(task_t task)
{
buf = (char *)kalloc(usize);
if (buf == NULL)
return(ENOMEM);
-
bzero(buf, usize);
error = vnode_getwithvid(tvp, vid);
SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW, &cs_debug, 0, "");
int
-cs_invalid_page(void)
+cs_invalid_page(
+ addr64_t vaddr)
{
struct proc *p;
int retval;
if (cs_force_hard)
p->p_csflags |= CS_HARD;
- if (p->p_csflags & CS_VALID) {
- p->p_csflags &= ~CS_VALID;
-
+ /* CS_KILL triggers us to send a kill signal. Nothing else. */
+ if (p->p_csflags & CS_KILL) {
proc_unlock(p);
- cs_procs_invalidated++;
- printf("CODE SIGNING: cs_invalid_page: "
- "p=%d[%s] clearing CS_VALID\n",
- p->p_pid, p->p_comm);
+ if (cs_debug) {
+ printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+ "p=%d[%s] honoring CS_KILL\n",
+ vaddr, p->p_pid, p->p_comm);
+ }
+ cs_procs_killed++;
+ psignal(p, SIGKILL);
proc_lock(p);
-
-
- if (p->p_csflags & CS_KILL) {
- proc_unlock(p);
- if (cs_debug) {
- printf("CODE SIGNING: cs_invalid_page: "
- "p=%d[%s] honoring CS_KILL\n",
- p->p_pid, p->p_comm);
- }
- cs_procs_killed++;
- psignal(p, SIGKILL);
- proc_lock(p);
+ }
+
+ /* CS_HARD means fail the mapping operation so the process stays valid. */
+ if (p->p_csflags & CS_HARD) {
+ proc_unlock(p);
+ if (cs_debug) {
+ printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+ "p=%d[%s] honoring CS_HARD\n",
+ vaddr, p->p_pid, p->p_comm);
}
-
- if (p->p_csflags & CS_HARD) {
+ retval = 1;
+ } else {
+ if (p->p_csflags & CS_VALID) {
+ p->p_csflags &= ~CS_VALID;
+
proc_unlock(p);
- if (cs_debug) {
- printf("CODE SIGNING: cs_invalid_page: "
- "p=%d[%s] honoring CS_HARD\n",
- p->p_pid, p->p_comm);
- }
- retval = 1;
+ cs_procs_invalidated++;
+ printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+ "p=%d[%s] clearing CS_VALID\n",
+ vaddr, p->p_pid, p->p_comm);
} else {
proc_unlock(p);
- retval = 0;
- }
- } else {
- proc_unlock(p);
- if (cs_debug) {
- printf("CODE SIGNING: cs_invalid_page: "
- "p=%d[%s] ignored...\n",
- p->p_pid, p->p_comm);
}
+
retval = 0;
}
audit_token.val[4] = my_cred->cr_rgid;
audit_token.val[5] = p->p_pid;
audit_token.val[6] = my_cred->cr_au.ai_asid;
- audit_token.val[7] = my_cred->cr_au.ai_termid.port;
+ audit_token.val[7] = p->p_idversion;
#if CONFIG_MACF_MACH
mac_task_label_update_cred(my_cred, p->task);
}
-/* ptrace set runnalbe */
+/* ptrace set runnable */
void
pt_setrunnable(proc_t p)
{
proc_unlock(p);
if (p->sigwait) {
wakeup((caddr_t)&(p->sigwait));
- task_release(task);
+ if ((p->p_lflag & P_LSIGEXC) == 0) { // 5878479
+ task_release(task);
+ }
}
}
}
&& !(name[0] == KERN_PROC
|| name[0] == KERN_PROF
|| name[0] == KERN_KDEBUG
+#if !CONFIG_EMBEDDED
|| name[0] == KERN_PROCARGS
+#endif
|| name[0] == KERN_PROCARGS2
|| name[0] == KERN_IPC
|| name[0] == KERN_SYSV
#endif
case KERN_KDEBUG:
return (kdebug_ops(name + 1, namelen - 1, oldp, oldlenp, p));
+#if !CONFIG_EMBEDDED
case KERN_PROCARGS:
/* new one as it does not use kinfo_proc */
return (sysctl_procargs(name + 1, namelen - 1, oldp, oldlenp, p));
+#endif
case KERN_PROCARGS2:
/* new one as it does not use kinfo_proc */
return (sysctl_procargs2(name + 1, namelen - 1, oldp, oldlenp, p));
sysctl_coredump
(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
{
+#ifdef SECURE_KERNEL
+ return (ENOTSUP);
+#endif
int new_value, changed;
int error = sysctl_io_number(req, do_coredump, sizeof(int), &new_value, &changed);
if (changed) {
sysctl_suid_coredump
(__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
{
+#ifdef SECURE_KERNEL
+ return (ENOTSUP);
+#endif
int new_value, changed;
int error = sysctl_io_number(req, sugid_coredump, sizeof(int), &new_value, &changed);
if (changed) {
#include <kern/kalloc.h>
#include <kern/task.h>
#include <kern/thread.h>
+#include <kern/page_decrypt.h>
#include <mach-o/fat.h>
#include <mach-o/loader.h>
cpu_type_t cputype,
load_result_t *result);
+#if CONFIG_CODE_DECRYPTION
+static load_return_t
+set_code_unprotect(
+ struct encryption_info_command *lcp,
+ caddr_t addr,
+ vm_map_t map,
+ struct vnode *vp);
+#endif
+
static load_return_t
load_unixthread(
struct thread_command *tcp,
kfree(kl_addr, kl_size);
return(LOAD_IOERROR);
}
- /* (void)ubc_map(vp, PROT_EXEC); */ /* NOT HERE */
/*
* Scan through the commands, processing each one as necessary.
got_code_signatures = TRUE;
}
break;
+#if CONFIG_CODE_DECRYPTION
+ case LC_ENCRYPTION_INFO:
+ if (pass != 2)
+ break;
+ ret = set_code_unprotect(
+ (struct encryption_info_command *) lcp,
+ addr, map, vp);
+ if (ret != LOAD_SUCCESS) {
+ printf("proc %d: set unprotect error %d "
+ "for file \"%s\"\n",
+ p->p_pid, ret, vp->v_name);
+ ret = LOAD_SUCCESS; /* ignore error */
+ }
+ break;
+#endif
default:
/* Other commands are ignored by the kernel */
ret = LOAD_SUCCESS;
if (kl_addr )
kfree(kl_addr, kl_size);
- if (ret == LOAD_SUCCESS)
- (void)ubc_map(vp, PROT_READ | PROT_EXEC);
-
return(ret);
}
-#ifdef __i386__
+#if CONFIG_CODE_DECRYPTION
#define APPLE_UNPROTECTED_HEADER_SIZE (3 * PAGE_SIZE_64)
map_size -= delta;
}
/* ... transform the rest of the mapping. */
+ struct pager_crypt_info crypt_info;
+ crypt_info.page_decrypt = dsmos_page_transform;
+ crypt_info.crypt_ops = NULL;
+ crypt_info.crypt_end = NULL;
kr = vm_map_apple_protected(map,
map_addr,
- map_addr + map_size);
+ map_addr + map_size,
+ &crypt_info);
}
if (kr != KERN_SUCCESS) {
}
return LOAD_SUCCESS;
}
-#else /* __i386__ */
+#else /* CONFIG_CODE_DECRYPTION */
#define unprotect_segment_64(file_off, file_size, map, map_addr, map_size) \
LOAD_SUCCESS
-#endif /* __i386__ */
+#endif /* CONFIG_CODE_DECRYPTION */
static
load_return_t
if (ret == LOAD_SUCCESS) {
result->dynlinker = TRUE;
result->entry_point = myresult.entry_point;
- (void)ubc_map(vp, PROT_READ | PROT_EXEC);
}
out:
vnode_put(vp);
int resid;
struct cs_blob *blob;
int error;
+ vm_size_t blob_size;
addr = 0;
blob = NULL;
goto out;
}
- kr = kmem_alloc(kernel_map, &addr, round_page(lcp->datasize));
+ blob_size = lcp->datasize;
+ kr = ubc_cs_blob_allocate(&addr, &blob_size);
if (kr != KERN_SUCCESS) {
ret = LOAD_NOSPACE;
goto out;
result->csflags |= blob->csb_flags;
}
if (addr != 0) {
- kmem_free(kernel_map, addr, round_page(lcp->datasize));
+ ubc_cs_blob_deallocate(addr, blob_size);
addr = 0;
}
return ret;
}
+
+#if CONFIG_CODE_DECRYPTION
+
+static load_return_t
+set_code_unprotect(
+ struct encryption_info_command *eip,
+ caddr_t addr,
+ vm_map_t map,
+ struct vnode *vp)
+{
+ int result, len;
+ char vpath[MAXPATHLEN];
+ pager_crypt_info_t crypt_info;
+ const char * cryptname = 0;
+
+ size_t offset;
+ struct segment_command_64 *seg64;
+ struct segment_command *seg32;
+ vm_map_offset_t map_offset, map_size;
+ kern_return_t kr;
+
+ switch(eip->cryptid) {
+ case 0:
+ /* not encrypted, just an empty load command */
+ return LOAD_SUCCESS;
+ case 1:
+ cryptname="com.apple.unfree";
+ break;
+ case 0x10:
+ /* some random cryptid that you could manually put into
+ * your binary if you want NULL */
+ cryptname="com.apple.null";
+ break;
+ default:
+ return LOAD_FAILURE;
+ }
+
+ len = MAXPATHLEN;
+ result = vn_getpath(vp, vpath, &len);
+ if(result) return result;
+
+ /* set up decrypter first */
+ if(NULL==text_crypter_create) return LOAD_FAILURE;
+ kr=text_crypter_create(&crypt_info, cryptname, (void*)vpath);
+
+ if(kr) {
+ printf("set_code_unprotect: unable to find decrypter %s, kr=%d\n",
+ cryptname, kr);
+ return LOAD_FAILURE;
+ }
+
+ /* this is terrible, but we have to rescan the load commands to find the
+ * virtual address of this encrypted stuff. This code is gonna look like
+ * the dyld source one day... */
+ struct mach_header *header = (struct mach_header *)addr;
+ size_t mach_header_sz = sizeof(struct mach_header);
+ if (header->magic == MH_MAGIC_64 ||
+ header->magic == MH_CIGAM_64) {
+ mach_header_sz = sizeof(struct mach_header_64);
+ }
+ offset = mach_header_sz;
+ uint32_t ncmds = header->ncmds;
+ while (ncmds--) {
+ /*
+ * Get a pointer to the command.
+ */
+ struct load_command *lcp = (struct load_command *)(addr + offset);
+ offset += lcp->cmdsize;
+
+ switch(lcp->cmd) {
+ case LC_SEGMENT_64:
+ seg64 = (struct segment_command_64 *)lcp;
+ if ((seg64->fileoff <= eip->cryptoff) &&
+ (seg64->fileoff+seg64->filesize >=
+ eip->cryptoff+eip->cryptsize)) {
+ map_offset = seg64->vmaddr + eip->cryptoff - seg64->fileoff;
+ map_size = eip->cryptsize;
+ goto remap_now;
+ }
+ case LC_SEGMENT:
+ seg32 = (struct segment_command *)lcp;
+ if ((seg32->fileoff <= eip->cryptoff) &&
+ (seg32->fileoff+seg32->filesize >=
+ eip->cryptoff+eip->cryptsize)) {
+ map_offset = seg32->vmaddr + eip->cryptoff - seg32->fileoff;
+ map_size = eip->cryptsize;
+ goto remap_now;
+ }
+ }
+ }
+
+ /* if we get here, did not find anything */
+ return LOAD_FAILURE;
+
+remap_now:
+ /* now remap using the decrypter */
+ kr = vm_map_apple_protected(map, map_offset, map_offset+map_size, &crypt_info);
+ if(kr) printf("set_code_unprotect(): mapping failed with %x\n", kr);
+
+ return LOAD_SUCCESS;
+}
+
+#endif
+
/*
* This routine exists to support the load_dylinker().
*
(btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP);
}
- PE_parse_boot_arg("mcache_flags", &mcache_flags);
+ PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof (mcache_flags));
mcache_flags &= MCF_FLAGS_MASK;
mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t),
#if 0
KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, (unsigned int)freeaddr, (unsigned int)freesize, (unsigned int)kthport, 0xff, 0);
#endif
- if (sem != MACH_PORT_NULL) {
- kret = semaphore_signal_internal_trap(sem);
+ if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
+ kret = mach_vm_deallocate(current_map(), freeaddr, freesize);
if (kret != KERN_SUCCESS) {
return(EINVAL);
}
}
- if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
- kret = mach_vm_deallocate(current_map(), freeaddr, freesize);
+
+ (void) thread_terminate(current_thread());
+ if (sem != MACH_PORT_NULL) {
+ kret = semaphore_signal_internal_trap(sem);
if (kret != KERN_SUCCESS) {
return(EINVAL);
}
}
- (void) thread_terminate(current_thread());
if (kthport != MACH_PORT_NULL)
mach_port_deallocate(get_task_ipcspace(current_task()), kthport);
thread_exception_return();
int
setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
{
-
#if defined(__ppc__)
/*
* Set up PowerPC registers...
continue;
newl = ch == '\n';
localbuff[i++] = ch;
+ /* The original version of this routine contained a buffer
+ * overflow. At the time, a "small" targeted fix was desired
+ * so the change below to check the buffer bounds was made.
+ * TODO: rewrite this needlessly convoluted routine.
+ */
+ if (i == (localbuff_size - 2))
+ break;
}
if (!newl)
localbuff[i++] = '\n';
359 ALL { int nosys(void); }
#endif
+#if CONFIG_WORKQUEUE
360 ALL { user_addr_t bsdthread_create(user_addr_t func, user_addr_t func_arg, user_addr_t stack, user_addr_t pthread, uint32_t flags) NO_SYSCALL_STUB; }
361 ALL { int bsdthread_terminate(user_addr_t stackaddr, size_t freesize, uint32_t port, uint32_t sem) NO_SYSCALL_STUB; }
+#else
+360 ALL { int nosys(void); }
+361 ALL { int nosys(void); }
+#endif
+
362 ALL { int kqueue(void); }
363 ALL { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); }
364 ALL { int lchown(user_addr_t path, uid_t owner, gid_t group); }
365 ALL { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options) NO_SYSCALL_STUB; }
+
+#if CONFIG_WORKQUEUE
366 ALL { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize) NO_SYSCALL_STUB; }
367 ALL { int workq_open(void) NO_SYSCALL_STUB; }
368 ALL { int workq_ops(int options, user_addr_t item, int prio) NO_SYSCALL_STUB; }
+#else
+366 ALL { int nosys(void); }
+367 ALL { int nosys(void); }
+368 ALL { int nosys(void); }
+#endif
+
369 ALL { int nosys(void); }
370 ALL { int nosys(void); }
371 ALL { int nosys(void); }
#include <libkern/crypto/sha1.h>
+#include <security/mac_framework.h>
+
/* XXX These should be in a BSD accessible Mach header, but aren't. */
extern kern_return_t memory_object_pages_resident(memory_object_control_t,
boolean_t *);
*/
cd = (const CS_CodeDirectory *) embedded;
}
-
if (cd &&
cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
if (flags & UPL_COMMIT_FREE_ON_EMPTY)
flags |= UPL_COMMIT_NOTIFY_EMPTY;
+ if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
kr = upl_commit_range(upl, offset, size, flags,
/*
* CODE SIGNING
*/
-#define CS_BLOB_KEEP_IN_KERNEL 1
+#define CS_BLOB_PAGEABLE 0
static volatile SInt32 cs_blob_size = 0;
static volatile SInt32 cs_blob_count = 0;
static SInt32 cs_blob_size_peak = 0;
SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD, &cs_blob_size_max, 0, "Size of biggest code signature blob");
+kern_return_t
+ubc_cs_blob_allocate(
+ vm_offset_t *blob_addr_p,
+ vm_size_t *blob_size_p)
+{
+ kern_return_t kr;
+
+#if CS_BLOB_PAGEABLE
+ *blob_size_p = round_page(*blob_size_p);
+ kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
+#else /* CS_BLOB_PAGEABLE */
+ *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
+ if (*blob_addr_p == 0) {
+ kr = KERN_NO_SPACE;
+ } else {
+ kr = KERN_SUCCESS;
+ }
+#endif /* CS_BLOB_PAGEABLE */
+ return kr;
+}
+
+void
+ubc_cs_blob_deallocate(
+ vm_offset_t blob_addr,
+ vm_size_t blob_size)
+{
+#if CS_BLOB_PAGEABLE
+ kmem_free(kernel_map, blob_addr, blob_size);
+#else /* CS_BLOB_PAGEABLE */
+ kfree((void *) blob_addr, blob_size);
+#endif /* CS_BLOB_PAGEABLE */
+}
+
int
ubc_cs_blob_add(
struct vnode *vp,
return ENOMEM;
}
+#if CS_BLOB_PAGEABLE
/* get a memory entry on the blob */
blob_size = (memory_object_size_t) size;
kr = mach_make_memory_entry_64(kernel_map,
error = EINVAL;
goto out;
}
-
+#else
+ blob_size = (memory_object_size_t) size;
+ blob_handle = IPC_PORT_NULL;
+#endif
/* fill in the new blob */
blob->csb_cpu_type = cputype;
blob->csb_mem_offset = 0;
blob->csb_mem_handle = blob_handle;
blob->csb_mem_kaddr = addr;
-
/*
* Validate the blob's contents
SHA1Final(blob->csb_sha1, &sha1ctxt);
}
-
+ /*
+ * Let policy module check whether the blob's signature is accepted.
+ */
+#if CONFIG_MACF
+ error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size);
+ if (error)
+ goto out;
+#endif
+
/*
* Validate the blob's coverage
*/
blob->csb_flags);
}
-#if !CS_BLOB_KEEP_IN_KERNEL
- blob->csb_mem_kaddr = 0;
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
-
vnode_unlock(vp);
error = 0; /* success ! */
mach_memory_entry_port_release(blob_handle);
blob_handle = IPC_PORT_NULL;
}
- } else {
-#if !CS_BLOB_KEEP_IN_KERNEL
- kmem_free(kernel_map, addr, size);
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
}
if (error == EAGAIN) {
/*
* Since we're not failing, consume the data we received.
*/
- kmem_free(kernel_map, addr, size);
+ ubc_cs_blob_deallocate(addr, size);
}
return error;
blob = next_blob) {
next_blob = blob->csb_next;
if (blob->csb_mem_kaddr != 0) {
- kmem_free(kernel_map,
- blob->csb_mem_kaddr,
- blob->csb_mem_size);
+ ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
+ blob->csb_mem_size);
blob->csb_mem_kaddr = 0;
}
- mach_memory_entry_port_release(blob->csb_mem_handle);
+ if (blob->csb_mem_handle != IPC_PORT_NULL) {
+ mach_memory_entry_port_release(blob->csb_mem_handle);
+ }
blob->csb_mem_handle = IPC_PORT_NULL;
OSAddAtomic(-1, &cs_blob_count);
OSAddAtomic(-blob->csb_mem_size, &cs_blob_size);
cd->hashType != 0x1 ||
cd->hashSize != SHA1_RESULTLEN) {
/* bogus blob ? */
-#if !CS_BLOB_KEEP_IN_KERNEL
- kmem_free(kernel_map, kaddr, ksize);
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
continue;
}
if (offset < start_offset ||
offset >= end_offset) {
/* our page is not covered by this blob */
-#if !CS_BLOB_KEEP_IN_KERNEL
- kmem_free(kernel_map, kaddr, ksize);
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
continue;
}
found_hash = TRUE;
}
-#if !CS_BLOB_KEEP_IN_KERNEL
- /* we no longer need that blob in the kernel map */
- kmem_free(kernel_map, kaddr, ksize);
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
-
break;
}
}
validated = FALSE;
*tainted = FALSE;
} else {
- const uint32_t *asha1, *esha1;
size = PAGE_SIZE;
+ const uint32_t *asha1, *esha1;
if (offset + size > codeLimit) {
/* partial page at end of segment */
assert(offset < codeLimit);
}
/* compute the actual page's SHA1 hash */
SHA1Init(&sha1ctxt);
- SHA1Update(&sha1ctxt, data, size);
+ SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
SHA1Final(actual_hash, &sha1ctxt);
asha1 = (const uint32_t *) actual_hash;
VERIFY(slabstbl != NULL);
/* Allocate audit structures if needed */
- PE_parse_boot_arg("mbuf_debug", &mbuf_debug);
+ PE_parse_boot_argn("mbuf_debug", &mbuf_debug, sizeof (mbuf_debug));
mbuf_debug |= mcache_getflags();
if (mbuf_debug & MCF_AUDIT) {
MALLOC(mclaudit, mcl_audit_t *,
embutl = (union mcluster *)
((unsigned char *)mbutl + (nmbclusters * MCLBYTES));
- PE_parse_boot_arg("initmcl", &initmcl);
+ PE_parse_boot_argn("initmcl", &initmcl, sizeof (initmcl));
lck_mtx_lock(mbuf_mlock);
return;
}
- PE_parse_boot_arg("socket_debug", &socket_debug);
+ PE_parse_boot_argn("socket_debug", &socket_debug, sizeof (socket_debug));
/*
* allocate lock group attribute and group for socket cache mutex
fremovexattr.2 \
fsetxattr.2 \
fstat.2 \
- fstat64.2 \
fstatfs.2 \
- fstatfs64.2 \
fsync.2 \
ftruncate.2 \
futimes.2 \
listxattr.2 \
lseek.2 \
lstat.2 \
- lstat64.2 \
madvise.2 \
mincore.2 \
minherit.2 \
socket.2 \
socketpair.2 \
stat.2 \
- stat64.2 \
statfs.2 \
- statfs64.2 \
symlink.2 \
sync.2 \
syscall.2 \
+++ /dev/null
-.so man2/stat.2
+++ /dev/null
-.so man2/statfs.2
+++ /dev/null
-.so man2/stat.2
-
.Os BSD 4
.Sh NAME
.Nm fstat ,
-.Nm fstat64 ,
.Nm lstat ,
-.Nm lstat64 ,
-.Nm stat ,
-.Nm stat64
+.Nm stat
.Nd get file status
.Sh SYNOPSIS
.Fd #include <sys/stat.h>
.Fa "struct stat *buf"
.Fc
.Ft int
-.Fo fstat64
-.Fa "int fildes"
-.Fa "struct stat64 *buf"
-.Fc
-.Ft int
.Fo lstat
.Fa "const char *restrict path"
.Fa "struct stat *restrict buf"
.Fc
.Ft int
-.Fo lstat64
-.Fa "const char *restrict path"
-.Fa "struct stat64 *restrict buf"
-.Fc
-.Ft int
.Fo stat
.Fa "const char *restrict path"
.Fa "struct stat *restrict buf"
.Fc
-.Ft int
-.Fo stat64
-.Fa "const char *restrict path"
-.Fa "struct stat64 *restrict buf"
-.Fc
.Sh DESCRIPTION
The
.Fn stat
-family of functions and their 64 bit variants obtain information about a file. The
+family of functions obtain information about a file. The
.Fn stat
function obtains information about the file pointed to by
.Fa path .
.Fa buf
argument is a pointer to a
.Fa stat
-or
-.Fa stat64
structure
as defined by
.Aq Pa sys/stat.h
-(both shown below)
and into which information is placed concerning the file.
.Bd -literal
struct stat {
- dev_t st_dev; /* device inode resides on */
- ino_t st_ino; /* inode's number */
- mode_t st_mode; /* inode protection mode */
- nlink_t st_nlink; /* number or hard links to the file */
- uid_t st_uid; /* user-id of owner */
- gid_t st_gid; /* group-id of owner */
- dev_t st_rdev; /* device type, for special file inode */
- struct timespec st_atimespec; /* time of last access */
- struct timespec st_mtimespec; /* time of last data modification */
- struct timespec st_ctimespec; /* time of last file status change */
- off_t st_size; /* file size, in bytes */
- quad_t st_blocks; /* blocks allocated for file */
- u_long st_blksize;/* optimal file sys I/O ops blocksize */
- u_long st_flags; /* user defined flags for file */
- u_long st_gen; /* file generation number */
-};
-
-
-struct stat64 {
dev_t st_dev; /* ID of device containing file */
mode_t st_mode; /* Mode of file (see below) */
nlink_t st_nlink; /* Number of hard links */
- ino64_t st_ino; /* File serial number */
+ ino_t st_ino; /* File serial number */
uid_t st_uid; /* User ID of the file */
gid_t st_gid; /* Group ID of the file */
dev_t st_rdev; /* Device ID */
.Pp
The time-related fields of
.Fa struct stat
-and
-.Fa struct stat64
are as follows:
.Bl -tag -width XXXst_birthtime
.It st_atime
.Xr write 2
system calls.
.It st_birthtime
-Time of file creation. Only set once when the file is created. This field is
-only available in the 64 bit variants. On filesystems where birthtime is
-not available, this field holds the
+Time of file creation. Only set once when the file is created.
+On filesystems where birthtime is not available, this field holds the
.Fa ctime
instead.
.El
The file generation number,
.Fa st_gen ,
is only available to the super-user.
-.br
-The fields in the stat structure currently marked
-.Fa st_spare1 ,
-.Fa st_spare2 ,
-and
-.Fa st_spare3
-are present in preparation for inode time stamps expanding
-to 64 bits. This, however, can break certain programs that
-depend on the time stamps being contiguous (in calls to
-.Xr utimes 2 ) .
-.Sh LEGACY SYNOPSIS
-.Fd #include <sys/types.h>
-.Fd #include <sys/stat.h>
-.Pp
-The include file
-.In sys/types.h
-is necessary.
.Sh SEE ALSO
.Xr chflags 2 ,
.Xr chmod 2 ,
.Fn lstat
function call appeared in
.Bx 4.2 .
-The
-.Fn stat64 ,
-.Fn fstat64 ,
-and
-.Fn lstat64
-system calls first appeared in Mac OS X 10.5 (Leopard).
+++ /dev/null
-.so man2/stat.2
.Os
.Sh NAME
.Nm statfs,
-.Nm statfs64,
-.Nm fstatfs,
-.Nm fstatfs64
+.Nm fstatfs
.Nd get file system statistics
.Sh SYNOPSIS
.Fd #include <sys/param.h>
.Ft int
.Fn statfs "const char *path" "struct statfs *buf"
.Ft int
-.Fn statfs64 "const char *path" "struct statfs64 *buf"
-.Ft int
.Fn fstatfs "int fd" "struct statfs *buf"
-.Ft int
-.Fn fstatfs64 "int fd" "struct statfs64 *buf"
.Sh DESCRIPTION
.Fn Statfs
returns information about a mounted file system.
.Fa Buf
is a pointer to a
.Fa statfs
-or
-.Fa statfs64
structure defined as follows:
.Bd -literal
typedef struct { int32_t val[2]; } fsid_t;
-#define MFSNAMELEN 15 /* length of fs type name, not inc. nul */
-#define MNAMELEN 90 /* length of buffer for returned name */
#define MFSTYPENAMELEN 16 /* length of fs type name including null */
#define MAXPATHLEN 1024
struct statfs {
- short f_otype; /* type of file system (reserved: zero) */
- short f_oflags; /* copy of mount flags (reserved: zero) */
- long f_bsize; /* fundamental file system block size */
- long f_iosize; /* optimal transfer block size */
- long f_blocks; /* total data blocks in file system */
- long f_bfree; /* free blocks in fs */
- long f_bavail; /* free blocks avail to non-superuser */
- long f_files; /* total file nodes in file system */
- long f_ffree; /* free file nodes in fs */
- fsid_t f_fsid; /* file system id */
- uid_t f_owner; /* user that mounted the file system */
- short f_reserved1; /* reserved for future use */
- short f_type; /* type of file system (reserved) */
- long f_flags; /* copy of mount flags (reserved) */
- long f_reserved2[2]; /* reserved for future use */
- char f_fstypename[MFSNAMELEN]; /* fs type name */
- char f_mntonname[MNAMELEN]; /* directory on which mounted */
- char f_mntfromname[MNAMELEN]; /* mounted file system */
- char f_reserved3; /* reserved for future use */
- long f_reserved4[4]; /* reserved for future use */
-};
-
-struct statfs64 {
uint32_t f_bsize; /* fundamental file system block size */
int32_t f_iosize; /* optimal transfer block size */
uint64_t f_blocks; /* total data blocks in file system */
.Sh HISTORY
The
.Fn statfs
-function first appeared in 4.4BSD. The
-.Fn statfs64
-and
-.Fn fstatfs64
-first appeared in Max OS X 10.5 (Leopard).
+function first appeared in 4.4BSD.
+++ /dev/null
-.so man2/statfs.2
-
-
.Bd -literal
/*** Excerpt from <sys/dirent.h> ***/
/*
- * The dirent structure defines the format of directory entries returned by
- * the getdirentries(2) system call.
+ * The dirent structure defines the format of directory entries.
*
* A directory entry has a struct dirent at the front of it, containing its
* inode number, the length of the entry, and the length of the name
* contained in the entry. These are followed by the name padded to a 4
* byte boundary with null bytes. All names are guaranteed null terminated.
- * The maximum length of a name in a directory is MAXNAMLEN.
- * The dirent structure defines the format of directory entries returned by
- * the getdirentries(2) system call.
+ * The maximum length of a name in a directory is MAXPATHLEN.
*/
#ifndef _SYS_DIRENT_H
#define _SYS_DIRENT_H
struct dirent {
- u_int32_t d_fileno; /* file number of entry */
+ ino_t d_ino; /* file number of entry */
+ u_int64_t d_seekoff; /* length of this record */
u_int16_t d_reclen; /* length of this record */
+ u_int16_t d_namlen; /* length of string in d_name */
u_int8_t d_type; /* file type, see below */
- u_int8_t d_namlen; /* length of string in d_name */
-#ifdef _POSIX_SOURCE
- char d_name[255 + 1]; /* name must be no longer than this */
-#else
-#define MAXNAMLEN 255
- char d_name[MAXNAMLEN + 1]; /* name must be no longer than this */
-#endif
+ char d_name[MAXPATHLEN]; /* name must be no longer than this */
};
/*
#ifndef _DIRENT_H
#define _DIRENT_H
-#ifdef _POSIX_SOURCE
-typedef void * DIR;
-#else
-
-#define d_ino d_fileno /* backward compatibility */
-
/* definitions for library routines operating on directories. */
#define DIRBLKSIZ 1024
struct _telldir; /* see telldir.h */
/* structure describing an open directory. */
-typedef struct _dirdesc {
- int dd_fd; /* file descriptor associated with directory */
- long dd_loc; /* offset in current buffer */
- long dd_size; /* amount of data returned by getdirentries */
- char *dd_buf; /* data buffer */
- int dd_len; /* size of data buffer */
- long dd_seek; /* magic cookie returned by getdirentries */
- long dd_rewind; /* magic cookie for rewinding */
- int dd_flags; /* flags for readdir */
- pthread_mutex_t dd_lock; /* for thread locking */
- struct _telldir *dd_td; /* telldir position recording */
+typedef struct {
+ int __dd_fd; /* file descriptor associated with directory */
+ long __dd_loc; /* offset in current buffer */
+ long __dd_size; /* amount of data returned by getdirentries */
+ char *__dd_buf; /* data buffer */
+ int __dd_len; /* size of data buffer */
+ long __dd_seek; /* magic cookie returned by getdirentries */
+ long __dd_rewind; /* magic cookie for rewinding */
+ int __dd_flags; /* flags for readdir */
+ pthread_mutex_t __dd_lock; /* for thread locking */
+ struct _telldir *__dd_td; /* telldir position recording */
} DIR;
-#define dirfd(dirp) ((dirp)->dd_fd)
-
-/* flags for opendir2 */
-#define DTF_HIDEW 0x0001 /* hide whiteout entries */
-#define DTF_NODUP 0x0002 /* don't return duplicate names */
-/* structure describing an open directory. */
-typedef struct _dirdesc {
- int dd_fd; /* file descriptor associated with directory */
- long dd_loc; /* offset in current buffer */
- long dd_size; /* amount of data returned by getdirentries */
- char *dd_buf; /* data buffer */
- int dd_len; /* size of data buffer */
- long dd_seek; /* magic cookie returned by getdirentries */
- long dd_rewind; /* magic cookie for rewinding */
- int dd_flags; /* flags for readdir */
- pthread_mutex_t dd_lock; /* for thread locking */
- struct _telldir *dd_td; /* telldir position recording */
-} DIR;
-
-#define dirfd(dirp) ((dirp)->dd_fd)
+#define dirfd(dirp) ((dirp)->__dd_fd)
/* flags for opendir2 */
#define DTF_HIDEW 0x0001 /* hide whiteout entries */
#define DTF_REWIND 0x0004 /* rewind after reading union stack */
#define __DTF_READALL 0x0008 /* everything has been read */
-#ifndef NULL
-#define NULL 0
-#endif
-
-#endif /* _POSIX_SOURCE */
-
#endif /* !_DIRENT_H_ */
.Ed
.Sh SEE ALSO
typedef long daddr_t;
typedef char * caddr_t;
-typedef u_long ino_t;
+typedef u_int64_t ino_t;
typedef long swblk_t;
typedef long segsz_t;
typedef long off_t;
struct vnode * vp = ap->a_vp;
register devnode_t * dnp;
struct timeval now;
+ int ref = 1;
- if (vnode_isinuse(vp, 1)) {
+ if (vp->v_type == VBLK)
+ ref = 0;
+
+ if (vnode_isinuse(vp, ref)) {
DEVFS_LOCK();
microtime(&now);
dnp = VTODN(vp);
dn_times(dnp, &now, &now, &now);
DEVFS_UNLOCK();
}
+
return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_close), ap));
}
*/
extern int hard_throttle_on_root;
void IOSleep(int);
-extern void throttle_lowpri_io(int *lowpri_window,mount_t v_mount);
// the low priority process may wait for at most LOWPRI_MAX_DELAY millisecond
#define LOWPRI_INITIAL_WINDOW_MSECS 100
#define LOWPRI_MAX_WAITING_MSECS 200
#define LOWPRI_SLEEP_INTERVAL 5
+struct _throttle_io_info_t {
+ struct timeval last_normal_IO_timestamp;
+ SInt32 numthreads_throttling;
+};
+
+struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV];
int lowpri_IO_initial_window_msecs = LOWPRI_INITIAL_WINDOW_MSECS;
int lowpri_IO_window_msecs_inc = LOWPRI_WINDOW_MSECS_INC;
int lowpri_max_window_msecs = LOWPRI_MAX_WINDOW_MSECS;
SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
-void throttle_lowpri_io(int *lowpri_window,mount_t v_mount)
+int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit)
{
- int i;
- struct timeval last_lowpri_IO_timestamp,last_normal_IO_timestamp;
struct timeval elapsed;
- int lowpri_IO_window_msecs;
- struct timeval lowpri_IO_window;
- int max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL;
+ int elapsed_msecs;
- KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
- *lowpri_window, 0, 0, 0, 0);
+ microuptime(&elapsed);
+ timevalsub(&elapsed, &_throttle_io_info[devbsdunit].last_normal_IO_timestamp);
+ elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000;
- last_normal_IO_timestamp = v_mount->last_normal_IO_timestamp;
-
- for (i=0; i<max_try_num; i++) {
- microuptime(&last_lowpri_IO_timestamp);
+ if (lowpri_window_msecs == -1) // use the max waiting time
+ lowpri_window_msecs = lowpri_max_waiting_msecs;
- elapsed = last_lowpri_IO_timestamp;
- timevalsub(&elapsed, &last_normal_IO_timestamp);
+ return elapsed_msecs < lowpri_window_msecs;
+}
- lowpri_IO_window_msecs = *lowpri_window;
- lowpri_IO_window.tv_sec = lowpri_IO_window_msecs / 1000;
- lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000;
+void throttle_lowpri_io(boolean_t ok_to_sleep)
+{
+ int i;
+ int max_try_num;
+ struct uthread *ut;
- if (timevalcmp(&elapsed, &lowpri_IO_window, <)) {
- IOSleep(LOWPRI_SLEEP_INTERVAL);
- } else {
- break;
+ ut = get_bsdthread_info(current_thread());
+
+ if (ut->uu_lowpri_window == 0)
+ return;
+
+ max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, _throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+
+ KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
+ ut->uu_lowpri_window, 0, 0, 0, 0);
+
+ if (ok_to_sleep == TRUE) {
+ for (i=0; i<max_try_num; i++) {
+ if (throttle_io_will_be_throttled(ut->uu_lowpri_window, ut->uu_devbsdunit)) {
+ IOSleep(LOWPRI_SLEEP_INTERVAL);
+ } else {
+ break;
+ }
}
}
-
KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
- *lowpri_window, i*5, 0, 0, 0);
- *lowpri_window = 0;
+ ut->uu_lowpri_window, i*5, 0, 0, 0);
+ SInt32 oldValue;
+ oldValue = OSDecrementAtomic(&_throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+ ut->uu_lowpri_window = 0;
+
+ if (oldValue <= 0) {
+ panic("%s: numthreads negative", __func__);
+ }
+}
+
+int throttle_get_io_policy(struct uthread **ut)
+{
+ int policy = IOPOL_DEFAULT;
+ proc_t p = current_proc();
+
+ *ut = get_bsdthread_info(current_thread());
+
+ if (p != NULL)
+ policy = p->p_iopol_disk;
+
+ if (*ut != NULL) {
+ // the I/O policy of the thread overrides that of the process
+ // unless the I/O policy of the thread is default
+ if ((*ut)->uu_iopol_disk != IOPOL_DEFAULT)
+ policy = (*ut)->uu_iopol_disk;
+ }
+ return policy;
}
int
hard_throttle_on_root = 1;
if (lowpri_IO_initial_window_msecs) {
- proc_t p;
struct uthread *ut;
- int policy = IOPOL_DEFAULT;
+ int policy;
int is_throttleable_io = 0;
int is_passive_io = 0;
- p = current_proc();
- ut = get_bsdthread_info(current_thread());
-
- if (p != NULL)
- policy = p->p_iopol_disk;
-
- if (ut != NULL) {
- // the I/O policy of the thread overrides that of the process
- // unless the I/O policy of the thread is default
- if (ut->uu_iopol_disk != IOPOL_DEFAULT)
- policy = ut->uu_iopol_disk;
- }
+ size_t devbsdunit;
+ SInt32 oldValue;
+
+ policy = throttle_get_io_policy(&ut);
switch (policy) {
case IOPOL_DEFAULT:
if (!is_throttleable_io && ISSET(bflags, B_PASSIVE))
is_passive_io |= 1;
+ if (buf_vnode(bp)->v_mount != NULL)
+ devbsdunit = buf_vnode(bp)->v_mount->mnt_devbsdunit;
+ else
+ devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
if (!is_throttleable_io) {
- if (!is_passive_io && buf_vnode(bp)->v_mount != NULL){
- microuptime(&(buf_vnode(bp)->v_mount->last_normal_IO_timestamp));
+ if (!is_passive_io){
+ microuptime(&_throttle_io_info[devbsdunit].last_normal_IO_timestamp);
}
} else {
/*
* do the delay just before we return from the system
* call that triggered this I/O or from vnode_pagein
*/
- if(buf_vnode(bp)->v_mount != NULL)
- ut->v_mount = buf_vnode(bp)->v_mount;
if (ut->uu_lowpri_window == 0) {
+ ut->uu_devbsdunit = devbsdunit;
+ oldValue = OSIncrementAtomic(&_throttle_io_info[devbsdunit].numthreads_throttling);
+ if (oldValue < 0) {
+ panic("%s: numthreads negative", __func__);
+ }
ut->uu_lowpri_window = lowpri_IO_initial_window_msecs;
+ ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue;
} else {
- ut->uu_lowpri_window += lowpri_IO_window_msecs_inc;
- if (ut->uu_lowpri_window > lowpri_max_window_msecs)
- ut->uu_lowpri_window = lowpri_max_window_msecs;
+ if (ut->uu_devbsdunit != devbsdunit) { // the thread sends I/Os to different devices within the same system call
+ // keep track of the numthreads in the right device
+ OSDecrementAtomic(&_throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+ OSIncrementAtomic(&_throttle_io_info[devbsdunit].numthreads_throttling);
+ ut->uu_devbsdunit = devbsdunit;
+ }
+ int numthreads = MAX(1, _throttle_io_info[devbsdunit].numthreads_throttling);
+ ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * numthreads;
+ if (ut->uu_lowpri_window > lowpri_max_window_msecs * numthreads)
+ ut->uu_lowpri_window = lowpri_max_window_msecs * numthreads;
}
}
}
* sum of the reference counts on all the aliased
* vnodes descends to one, we are on last close.
*/
- if (vcount(vp) > 1)
+ if (vcount(vp) > 0)
return (0);
#else /* DEVFS_IMPLEMENTS_LOCKING */
/*
* sum of the reference counts on all the aliased
* vnodes descends to one, we are on last close.
*/
- if (vcount(vp) > 1)
+ if (vcount(vp) > 0)
return (0);
/*
void
dlil_init(void)
{
- PE_parse_boot_arg("net_affinity", &net_affinity);
+ PE_parse_boot_argn("net_affinity", &net_affinity, sizeof (net_affinity));
TAILQ_INIT(&dlil_ifnet_head);
TAILQ_INIT(&ifnet_head);
}
}
- /* Quick check for VLAN */
- if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0 ||
- ether_type == htons(ETHERTYPE_VLAN)) {
- *protocol_family = PF_VLAN;
- return 0;
+ /* check for VLAN */
+ if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
+ if (EVL_VLANOFTAG(m->m_pkthdr.vlan_tag) != 0) {
+ *protocol_family = PF_VLAN;
+ return (0);
+ }
+ /* the packet is just priority-tagged, clear the bit */
+ m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
+ }
+ else if (ether_type == htons(ETHERTYPE_VLAN)) {
+ struct ether_vlan_header * evl;
+
+ evl = (struct ether_vlan_header *)frame_header;
+ if (m->m_len < ETHER_VLAN_ENCAP_LEN
+ || ntohs(evl->evl_proto) == ETHERTYPE_VLAN
+ || EVL_VLANOFTAG(ntohs(evl->evl_tag)) != 0) {
+ *protocol_family = PF_VLAN;
+ return 0;
+ }
+ /* the packet is just priority-tagged */
+
+ /* make the encapsulated ethertype the actual ethertype */
+ ether_type = evl->evl_encap_proto = evl->evl_proto;
+
+ /* remove the encapsulation header */
+ m->m_len -= ETHER_VLAN_ENCAP_LEN;
+ m->m_data += ETHER_VLAN_ENCAP_LEN;
+ m->m_pkthdr.len -= ETHER_VLAN_ENCAP_LEN;
+ m->m_pkthdr.csum_flags = 0; /* can't trust hardware checksum */
}
data = mtod(m, u_int8_t*);
#define IFEF_VLAN 0x200 /* interface has one or more vlans */
#define IFEF_BOND 0x400 /* interface is part of bond */
#define IFEF_ARPLL 0x800 /* ARP for IPv4LL addresses on this port */
+#define IFEF_NOWINDOWSCALE 0x1000 /* TCP window scale disabled on this interface, see 5933937 & 5959897*/
+#define IFEF_NOTIMESTAMPS IFEF_NOWINDOWSCALE /* We don't actualy disable timestamps, just window scale see 5959897 */
#define IFEF_SENDLIST 0x10000000 /* Interface supports sending a list of packets */
#define IFEF_REUSE 0x20000000 /* DLIL ifnet recycler, ifnet is not new */
#define IFEF_INUSE 0x40000000 /* DLIL ifnet recycler, ifnet in use */
/* We found a vlan interface, inject on that interface. */
dlil_input_packet_list(ifp, m);
} else {
+ m->m_pkthdr.header = frame_header;
/* Send priority-tagged packet up through the parent */
dlil_input_packet_list(p, m);
}
{
int size;
- PE_parse_boot_arg("rte_debug", &rte_debug);
+ PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug));
if (rte_debug != 0)
rte_debug |= RTD_DEBUG;
PRIVATE_DATAFILES = \
if_fddi.h if_atm.h ip_dummynet.h \
tcp_debug.h \
- in_gif.h ip_compat.h
+ in_gif.h ip_compat.h ip_edgehole.h
PRIVATE_KERNELFILES = ${KERNELFILES} \
ip_ecn.h ip_encap.h ip_flow.h
gate = rt->rt_gateway;
SDL(gate)->sdl_type = rt->rt_ifp->if_type;
SDL(gate)->sdl_index = rt->rt_ifp->if_index;
- rt->rt_expire = timenow.tv_sec;
+ /* In case we're called before 1.0 sec. has elapsed */
+ rt->rt_expire = MAX(timenow.tv_sec, 1);
break;
}
/* Announce a new entry if requested. */
gate_ll->sdl_alen = broadcast_len;
gate_ll->sdl_family = AF_LINK;
gate_ll->sdl_len = sizeof(struct sockaddr_dl);
- rt->rt_expire = timenow.tv_sec;
+ /* In case we're called before 1.0 sec. has elapsed */
+ rt->rt_expire = MAX(timenow.tv_sec, 1);
}
#endif
KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0);
/* sanity check */
- if (m->m_pkthdr.len < skip + len) {
+ if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < skip + len) {
panic("inet_cksum: mbuf len (%d) < off+len (%d+%d)\n",
m->m_pkthdr.len, skip, len);
}
KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0);
/* sanity check */
- if (m->m_pkthdr.len < skip + len) {
+ if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < skip + len) {
panic("inet_cksum: mbuf len (%d) < off+len (%d+%d)\n",
m->m_pkthdr.len, skip, len);
}
return (mac_error);
}
mac_inpcb_label_associate(so, inp);
+#endif
+#if CONFIG_IP_EDGEHOLE
+ ip_edgehole_attach(inp);
#endif
so->so_pcb = (caddr_t)inp;
void *pdp_ifp;
#endif /* _KERN_SYS_KERNELTYPES_H_ */
#endif /* CONFIG_EMBEDDED */
+#if CONFIG_IP_EDGEHOLE
+ u_int32_t inpcb_edgehole_flags;
+ u_int32_t inpcb_edgehole_mask;
+#endif
};
#endif /* KERNEL_PRIVATE */
socket_unlock(so, 0);
#if CONFIG_MACF_NET
mac_mbuf_label_associate_inpcb(inp, m);
+#endif
+#if CONFIG_IP_EDGEHOLE
+ ip_edgehole_mbuf_tag(inp, m);
#endif
error = ip_output(m,
inp->inp_options, &inp->inp_route,
--- /dev/null
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kpi_mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <string.h> // For bzero
+#include <libkern/libkern.h> // for printf
+#include <kern/debug.h> // For panic
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <libkern/OSMalloc.h>
+#include <libkern/OSAtomic.h>
+#include <kern/thread_call.h>
+#include "ip_edgehole.h"
+
+enum
+{
+ kEdgeHoleFlag_BlockInternet = 0x00000001,
+ kEdgeHoleFlag_BlockVV = 0x00000002
+};
+
+struct edgehole_tag
+{
+ // flags tells us whether or not we should block traffic
+ u_int32_t eh_flags;
+
+ // These fields are used to help us find the PCB after we block traffic for TCP
+ struct inpcbinfo *eh_inpinfo;
+ struct inpcb *eh_inp;
+};
+
+struct edgehole_delayed_notify
+{
+ // flags tells us whether or not we should block traffic
+ struct edgehole_delayed_notify *next;
+
+ // These fields are used to help us find the PCB after we block traffic for TCP
+ struct inpcbinfo *inpinfo;
+ struct inpcb *inp;
+};
+
+static mbuf_tag_id_t edgehole_tag = 0;
+static thread_call_t edgehole_callout = NULL;
+static OSMallocTag edgehole_mtag = 0;
+static struct edgehole_delayed_notify *edgehole_delay_list = NULL;
+
+#ifndef HAS_COMPARE_AND_SWAP_PTR
+// 64bit kernels have an OSCompareAndSwapPtr that does the right thing
+static Boolean
+OSCompareAndSwapPtr(
+ void *oldValue,
+ void *newValue,
+ volatile void *address)
+{
+ return OSCompareAndSwap((UInt32)oldValue, (UInt32)newValue, (volatile UInt32*)address);
+}
+#endif
+
+static void
+ip_edgehole_notify_delayed(
+ struct inpcb *inp,
+ struct inpcbinfo *inpinfo)
+{
+ if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING)
+ {
+ // We've found an inpcb for the packet we're dropping.
+ struct socket *so = inp->inp_socket;
+ if (so && so != &inpinfo->nat_dummy_socket)
+ {
+ socket_lock(so, 1);
+ if (in_pcb_checkstate(inp, WNT_RELEASE,1) != WNT_STOPUSING)
+ {
+ if (inp->inp_ip_p == IPPROTO_TCP)
+ {
+ // Why do we still have caddr_t? Come on! Casting from
+ // caddr_t to something else causes "cast increases required alignment"
+ // warnings. warnings are treated as failures. This union does the
+ // exact same thing without the warning.
+ union
+ {
+ caddr_t caddrt_sucks;
+ void *void_ptr;
+ } bite_me;
+
+ bite_me.caddrt_sucks = inp->inp_ppcb;
+ tcp_drop((struct tcpcb*)bite_me.void_ptr, EPERM);
+ }
+ else
+ {
+ // Is this enough?
+ socantsendmore(so);
+ }
+ }
+ socket_unlock(so, 1);
+ }
+ }
+}
+
+// Some shortcomings of this strategy:
+// 1) an inpcb could be reused for a new socket before we get a chance to notify
+
+static void
+ip_edgehole_process_delayed(
+ __unused void *unused1,
+ __unused void *unused2)
+{
+ struct edgehole_delayed_notify *head;
+
+ while (edgehole_delay_list)
+ {
+ // Atomically grab the list
+ do
+ {
+ head = edgehole_delay_list;
+ }
+ while (!OSCompareAndSwapPtr(head, NULL, &edgehole_delay_list));
+
+ if (head == NULL)
+ {
+ break;
+ }
+
+ // Prune duplicates from the list
+ struct edgehole_delayed_notify *current;
+ struct edgehole_delayed_notify **current_p;
+ struct edgehole_delayed_notify *ye_dead;
+ for (current = head; current && current->next; current = current->next)
+ {
+ current_p = &head;
+ while (*current_p)
+ {
+ if ((*current_p)->inp == current->inp)
+ {
+ ye_dead = *current_p;
+ *current_p = ye_dead->next;
+ OSFree(ye_dead, sizeof(*ye_dead), edgehole_mtag);
+ }
+ else
+ {
+ current_p = &(*current_p)->next;
+ }
+ }
+ }
+
+ while (head)
+ {
+ struct inpcbinfo *lockedinfo;
+
+ lockedinfo = head->inpinfo;
+
+ // Lock the list
+ lck_rw_lock_shared(lockedinfo->mtx);
+
+ struct inpcb *inp;
+
+ // Walk the inp list.
+ LIST_FOREACH(inp, lockedinfo->listhead, inp_list)
+ {
+ // Walk the list of notifications
+ for (current = head; current != NULL; current = current->next)
+ {
+ // Found a match, notify
+ if (current->inpinfo == lockedinfo && current->inp == inp)
+ {
+ ip_edgehole_notify_delayed(inp, lockedinfo);
+ }
+ }
+ }
+
+ lck_rw_done(lockedinfo->mtx);
+
+ // Release all the notifications for this inpcbinfo
+ current_p = &head;
+ while (*current_p)
+ {
+ // Free any items for this inpcbinfo
+ if ((*current_p)->inpinfo == lockedinfo)
+ {
+ ye_dead = *current_p;
+ *current_p = ye_dead->next;
+ OSFree(ye_dead, sizeof(*ye_dead), edgehole_mtag);
+ }
+ else
+ {
+ current_p = &(*current_p)->next;
+ }
+ }
+ }
+ }
+}
+
+static void
+ip_edgehole_notify(
+ struct edgehole_tag *tag)
+{
+ // Since the lock on the socket may be held while a packet is being transmitted,
+ // we must allocate storage to keep track of this information and schedule a
+ // thread to handle the work.
+
+ if (tag->eh_inp == NULL || tag->eh_inpinfo == NULL)
+ return;
+
+ struct edgehole_delayed_notify *delayed = OSMalloc(sizeof(*delayed), edgehole_mtag);
+ if (delayed)
+ {
+ delayed->inp = tag->eh_inp;
+ delayed->inpinfo = tag->eh_inpinfo;
+ do
+ {
+ delayed->next = edgehole_delay_list;
+ }
+ while (!OSCompareAndSwapPtr(delayed->next, delayed, &edgehole_delay_list));
+
+ thread_call_enter(edgehole_callout);
+ }
+}
+
+__private_extern__ void
+ip_edgehole_attach(
+ struct inpcb *inp)
+{
+ inp->inpcb_edgehole_flags = 0;
+ inp->inpcb_edgehole_mask = 0;
+
+ // TBD: call MAC framework to find out of we are allowed to use EDGE
+#ifdef TEST_THE_EVIL_EDGE_HOLE
+ char pidname[64];
+ proc_selfname(pidname, sizeof(pidname));
+ pidname[sizeof(pidname) -1] = 0;
+ if (strcmp(pidname, "MobileSafari") == 0 ||
+ strcmp(pidname, "ping") == 0)
+ {
+ inp->inpcb_edgehole_flags = kEdgeHoleFlag_BlockInternet;
+ inp->inpcb_edgehole_mask = kEdgeHoleFlag_BlockInternet;
+ }
+#endif
+
+ if (inp->inpcb_edgehole_mask != 0)
+ {
+ // Allocate a callout
+ if (edgehole_callout == NULL)
+ {
+ thread_call_t tmp_callout = thread_call_allocate(ip_edgehole_process_delayed, NULL);
+ if (!tmp_callout) panic("ip_edgehole_attach: thread_call_allocate failed");
+ if (!OSCompareAndSwapPtr(NULL, tmp_callout, &edgehole_callout))
+ thread_call_free(tmp_callout);
+ }
+
+ // Allocate a malloc tag
+ if (edgehole_mtag == 0)
+ {
+ OSMallocTag mtag = OSMalloc_Tagalloc("com.apple.ip_edgehole", 0);
+ if (!mtag) panic("ip_edgehole_attach: OSMalloc_Tagalloc failed");
+ if (!OSCompareAndSwapPtr(NULL, mtag, &edgehole_mtag))
+ OSMalloc_Tagfree(mtag);
+ }
+ }
+}
+
+__private_extern__ void
+ip_edgehole_mbuf_tag(
+ struct inpcb *inp,
+ mbuf_t m)
+{
+ // Immediately bail if there are no flags on this inpcb
+ if (inp->inpcb_edgehole_mask == 0)
+ {
+ return;
+ }
+
+ // Allocate a tag_id if we don't have one already
+ if (edgehole_tag == 0)
+ mbuf_tag_id_find("com.apple.edgehole", &edgehole_tag);
+
+ struct edgehole_tag *tag;
+ size_t length;
+
+ // Find an existing tag
+ if (mbuf_tag_find(m, edgehole_tag, 0, &length, (void**)&tag) == 0)
+ {
+ if (length != sizeof(*tag))
+ panic("ip_edgehole_mbuf_tag - existing tag is wrong size");
+
+ // add restrictions
+ tag->eh_flags = (tag->eh_flags & (~inp->inpcb_edgehole_mask)) |
+ (inp->inpcb_edgehole_flags & inp->inpcb_edgehole_mask);
+ }
+ else if ((inp->inpcb_edgehole_mask & inp->inpcb_edgehole_flags) != 0)
+ {
+ // Add the tag
+ if (mbuf_tag_allocate(m, edgehole_tag, 0, sizeof(*tag), MBUF_WAITOK, (void**)&tag) != 0)
+ panic("ip_edgehole_mbuf_tag - mbuf_tag_allocate failed"); // ouch - how important is it that we block this stuff?
+
+ tag->eh_flags = (inp->inpcb_edgehole_flags & inp->inpcb_edgehole_mask);
+ tag->eh_inp = inp;
+ tag->eh_inpinfo = inp->inp_pcbinfo;
+ }
+}
+
+int
+ip_edgehole_filter(
+ mbuf_t *m,
+ __unused int isVV)
+{
+ struct edgehole_tag *tag;
+ size_t length;
+
+ if (mbuf_tag_find(*m, edgehole_tag, 0, &length, (void**)&tag) == 0)
+ {
+ if (length != sizeof(*tag))
+ panic("ip_edgehole_filter - existing tag is wrong size");
+
+ if ((tag->eh_flags & kEdgeHoleFlag_BlockInternet) != 0)
+ {
+ ip_edgehole_notify(tag);
+
+ mbuf_freem(*m); *m = NULL;
+ return EPERM;
+ }
+ }
+
+ return 0;
+}
--- /dev/null
+#include <sys/kpi_mbuf.h>
+
+struct inpcb;
+
+// Tag an mbuf on the way out with the edge flags from the inpcb
+extern void ip_edgehole_mbuf_tag(struct inpcb *inp, mbuf_t m);
+
+// Attach the edge flags to the inpcb
+extern void ip_edgehole_attach(struct inpcb *inp);
+
+// Called by the edge interface to determine if the edge interface
+// should drop the packet. Will return 0 if the packet should continue
+// to be processed or EPERM if ip_edgehole_filter swallowed the packet.
+// When ip_edgehole_filter swallows a packet, it frees it and sets your
+// pointer to it to NULL. isVV should be set to zero unless the edge
+// interface in question is the visual voicemail edge interface.
+extern int ip_edgehole_filter(mbuf_t *m, int isVV);
mac_mbuf_label_associate_inpcb(inp, m);
#endif
+#if CONFIG_IP_EDGEHOLE
+ ip_edgehole_mbuf_tag(inp, m);
+#endif
+
#if CONFIG_FORCE_OUT_IFP
return (ip_output_list(m, 0, inp->inp_options, &inp->inp_route, flags,
inp->inp_moptions, inp->pdp_ifp));
/* ECN-setup SYN */
tp->ecn_flags |= (TE_SETUPRECEIVED | TE_SENDIPECT);
}
+#ifdef IFEF_NOWINDOWSCALE
+ if (m->m_pkthdr.rcvif != NULL &&
+ (m->m_pkthdr.rcvif->if_eflags & IFEF_NOWINDOWSCALE) != 0)
+ {
+ // Timestamps are not enabled on this interface
+ tp->t_flags &= ~(TF_REQ_SCALE);
+ }
+#endif
goto trimthenstep6;
}
tp->t_dupacks = 0;
break;
}
+
+ if (!IN_FASTRECOVERY(tp)) {
+ /*
+ * We were not in fast recovery. Reset the duplicate ack
+ * counter.
+ */
+ tp->t_dupacks = 0;
+ }
/*
* If the congestion window was inflated to account
* for the other side's cached packets, retract it.
*/
- if (tcp_do_newreno || tp->sack_enable) {
- if (IN_FASTRECOVERY(tp)) {
+ else {
+ if (tcp_do_newreno || tp->sack_enable) {
if (SEQ_LT(th->th_ack, tp->snd_recover)) {
if (tp->sack_enable)
tcp_sack_partialack(tp, th);
else
- tcp_newreno_partial_ack(tp, th);
- } else {
- /*
- * Out of fast recovery.
- * Window inflation should have left us
- * with approximately snd_ssthresh
- * outstanding data.
- * But in case we would be inclined to
- * send a burst, better to do it via
- * the slow start mechanism.
- */
- if (SEQ_GT(th->th_ack +
- tp->snd_ssthresh,
- tp->snd_max))
- tp->snd_cwnd = tp->snd_max -
- th->th_ack +
- tp->t_maxseg;
- else
- tp->snd_cwnd = tp->snd_ssthresh;
+ tcp_newreno_partial_ack(tp, th);
+ }
+ else {
+ if (tcp_do_newreno) {
+ long ss = tp->snd_max - th->th_ack;
+
+ /*
+ * Complete ack. Inflate the congestion window to
+ * ssthresh and exit fast recovery.
+ *
+ * Window inflation should have left us with approx.
+ * snd_ssthresh outstanding data. But in case we
+ * would be inclined to send a burst, better to do
+ * it via the slow start mechanism.
+ */
+ if (ss < tp->snd_ssthresh)
+ tp->snd_cwnd = ss + tp->t_maxseg;
+ else
+ tp->snd_cwnd = tp->snd_ssthresh;
+ }
+ else {
+ /*
+ * Clamp the congestion window to the crossover point
+ * and exit fast recovery.
+ */
+ if (tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
+ }
+
+ EXIT_FASTRECOVERY(tp);
+ tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
}
}
- } else {
- if (tp->t_dupacks >= tcprexmtthresh &&
- tp->snd_cwnd > tp->snd_ssthresh)
- tp->snd_cwnd = tp->snd_ssthresh;
+ else {
+ /*
+ * Clamp the congestion window to the crossover point
+ * and exit fast recovery in non-newreno and non-SACK case.
+ */
+ if (tp->snd_cwnd > tp->snd_ssthresh)
+ tp->snd_cwnd = tp->snd_ssthresh;
+ EXIT_FASTRECOVERY(tp);
+ tp->t_dupacks = 0;
+ tp->t_bytes_acked = 0;
+ }
}
- tp->t_dupacks = 0;
- tp->t_bytes_acked = 0;
+
+
/*
* If we reach this point, ACK is not a duplicate,
* i.e., it ACKs something we sent.
#if CONFIG_MACF_NET
mac_mbuf_label_associate_inpcb(tp->t_inpcb, m);
#endif
+#if CONFIG_IP_EDGEHOLE
+ ip_edgehole_mbuf_tag(tp->t_inpcb, m);
+#endif
#if INET6
if (isipv6) {
ip6 = mtod(m, struct ip6_hdr *);
unlocked = TRUE;
socket_unlock(so, 0);
}
-
+
/*
* Don't send down a chain of packets when:
* - TCP chaining is disabled
mac_netinet_tcp_reply(m);
}
#endif
+
+#if CONFIG_IP_EDGEHOLE
+ if (tp && tp->t_inpcb)
+ ip_edgehole_mbuf_tag(tp->t_inpcb, m);
+#endif
+
nth->th_seq = htonl(seq);
nth->th_ack = htonl(ack);
nth->th_x2 = 0;
#define ISN_BYTES_PER_SECOND 1048576
-//PWC - md5 routines cause alignment exceptions. Need to figure out why. For now use lame incremental
-// isn. how's that for not easily guessable!?
-
-int pwc_bogus;
-
tcp_seq
tcp_new_isn(tp)
struct tcpcb *tp;
/*
* Look-up the routing entry to the peer of this inpcb. If no route
- * is found and it cannot be allocated the return NULL. This routine
+ * is found and it cannot be allocated then return NULL. This routine
* is called by TCP routines that access the rmx structure and by tcp_mss
* to get the interface MTU.
*/
else
tp->t_flags |= TF_PMTUD;
+#ifdef IFEF_NOWINDOWSCALE
+ if (tp->t_state == TCPS_SYN_SENT && rt != NULL && rt->rt_ifp != NULL &&
+ (rt->rt_ifp->if_eflags & IFEF_NOWINDOWSCALE) != 0)
+ {
+ // Timestamps are not enabled on this interface
+ tp->t_flags &= ~(TF_REQ_SCALE);
+ }
+#endif
+
return rt;
}
#define TCPDEBUG2(req)
#endif
+#if CONFIG_USESOCKTHRESHOLD
__private_extern__ unsigned int tcp_sockthreshold = 64;
+#else
+__private_extern__ unsigned int tcp_sockthreshold = 0;
+#endif
SYSCTL_INT(_net_inet_tcp, OID_AUTO, sockthreshold, CTLFLAG_RW,
&tcp_sockthreshold , 0, "TCP Socket size increased if less than threshold");
#if CONFIG_MACF_NET
mac_mbuf_label_associate_inpcb(inp, m);
#endif
-
+
+#if CONFIG_IP_EDGEHOLE
+ ip_edgehole_mbuf_tag(inp, m);
+#endif
/*
* Calculate data length and get a mbuf
m_freem(m);
return EINVAL;
}
+
return udp_output(inp, m, addr, control, p);
}
void key_init(void);
+static errno_t ipsecif_register_control(void);
+
+
/*
* PF_KEY init
LIST_INIT(&spihash[i]);
raw_init();
+
+ /* register ip_if application of kernel control */
+ ipsecif_register_control();
+
}
return m;
}
+
+
+/* ----------------------------------------------------------------------------------
+Application of kernel control for interface creation
+
+Theory of operation:
+ipsecif acts as glue between kernel control sockets and ipsec network interfaces. This
+kernel control will register an interface for every client that connects.
+ipsec interface do not send or receive packets, an they are intercepted by ipsec before
+they reach the interface. ipsec needs interface to attach tunnel ip addresses.
+In the future, we may want to change the control mechanism to use PF_KEY to create
+interfaces for ipsec
+---------------------------------------------------------------------------------- */
+
+#include <sys/systm.h>
+//#include "if_ip.h"
+#include <sys/kern_control.h>
+#include <net/kpi_protocol.h>
+#include <net/kpi_interface.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/bpf.h>
+#include <libkern/OSMalloc.h>
+#include <libkern/OSAtomic.h>
+#include <sys/mbuf.h> /* Until leopard, our ugly bpf protocol prepend will need this */
+#include <sys/sockio.h>
+#include <netinet/in.h>
+#include <netinet6/in6_var.h>
+
+/*
+*/
+
+#define IPSECIF_CONTROL_NAME "com.apple.net.ipsecif_control"
+
+/* Kernel Control functions */
+static errno_t ipsecif_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
+ void **unitinfo);
+static errno_t ipsecif_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
+ void *unitinfo);
+static errno_t ipsecif_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
+ void *unitinfo, mbuf_t m, int flags);
+
+/* Network Interface functions */
+static errno_t ipsecif_output(ifnet_t interface, mbuf_t data);
+static errno_t ipsecif_demux(ifnet_t interface, mbuf_t data, char *frame_header,
+ protocol_family_t *protocol);
+static errno_t ipsecif_add_proto(ifnet_t interface, protocol_family_t protocol,
+ const struct ifnet_demux_desc *demux_array,
+ u_int32_t demux_count);
+static errno_t ipsecif_del_proto(ifnet_t interface, protocol_family_t protocol);
+static errno_t ipsecif_ioctl(ifnet_t interface, u_int32_t cmd, void *data);
+static errno_t ipsecif_settap(ifnet_t interface, bpf_tap_mode mode,
+ bpf_packet_func callback);
+static void ipsecif_detached(ifnet_t interface);
+
+/* Protocol handlers */
+static errno_t ipsecif_attach_proto(ifnet_t interface, protocol_family_t proto);
+static errno_t ipsecif_proto_input(ifnet_t interface, protocol_family_t protocol,
+ mbuf_t m, char *frame_header);
+
+/* Control block allocated for each kernel control connection */
+struct ipsecif_pcb {
+ kern_ctl_ref ctlref;
+ u_int32_t unit;
+ ifnet_t ifp;
+ bpf_tap_mode mode;
+ bpf_packet_func tap;
+};
+
+static kern_ctl_ref ipsecif_kctlref;
+static u_int32_t ipsecif_family;
+static OSMallocTag ipsecif_malloc_tag;
+static SInt32 ipsecif_ifcount = 0;
+
+/* Prepend length */
+static void*
+ipsecif_alloc(size_t size)
+{
+ size_t *mem = OSMalloc(size + sizeof(size_t), ipsecif_malloc_tag);
+
+ if (mem) {
+ *mem = size + sizeof(size_t);
+ mem++;
+ }
+
+ return (void*)mem;
+}
+
+static void
+ipsecif_free(void *ptr)
+{
+ size_t *size = ptr;
+ size--;
+ OSFree(size, *size, ipsecif_malloc_tag);
+}
+
+static errno_t
+ipsecif_register_control(void)
+{
+ struct kern_ctl_reg kern_ctl;
+ errno_t result = 0;
+
+ /* Create a tag to allocate memory */
+ ipsecif_malloc_tag = OSMalloc_Tagalloc(IPSECIF_CONTROL_NAME, OSMT_DEFAULT);
+
+ /* Find a unique value for our interface family */
+ result = mbuf_tag_id_find(IPSECIF_CONTROL_NAME, &ipsecif_family);
+ if (result != 0) {
+ printf("ipsecif_register_control - mbuf_tag_id_find_internal failed: %d\n", result);
+ return result;
+ }
+
+ bzero(&kern_ctl, sizeof(kern_ctl));
+ strncpy(kern_ctl.ctl_name, IPSECIF_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
+ kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
+ kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */
+ kern_ctl.ctl_connect = ipsecif_ctl_connect;
+ kern_ctl.ctl_disconnect = ipsecif_ctl_disconnect;
+ kern_ctl.ctl_send = ipsecif_ctl_send;
+
+ result = ctl_register(&kern_ctl, &ipsecif_kctlref);
+ if (result != 0) {
+ printf("ipsecif_register_control - ctl_register failed: %d\n", result);
+ return result;
+ }
+
+ /* Register the protocol plumbers */
+ if ((result = proto_register_plumber(PF_INET, ipsecif_family,
+ ipsecif_attach_proto, NULL)) != 0) {
+ printf("ipsecif_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n",
+ ipsecif_family, result);
+ ctl_deregister(ipsecif_kctlref);
+ return result;
+ }
+
+ /* Register the protocol plumbers */
+ if ((result = proto_register_plumber(PF_INET6, ipsecif_family,
+ ipsecif_attach_proto, NULL)) != 0) {
+ proto_unregister_plumber(PF_INET, ipsecif_family);
+ ctl_deregister(ipsecif_kctlref);
+ printf("ipsecif_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n",
+ ipsecif_family, result);
+ return result;
+ }
+
+ return 0;
+}
+
+/* Kernel control functions */
+
+static errno_t
+ipsecif_ctl_connect(
+ kern_ctl_ref kctlref,
+ struct sockaddr_ctl *sac,
+ void **unitinfo)
+{
+ struct ifnet_init_params ipsecif_init;
+ struct ipsecif_pcb *pcb;
+ errno_t result;
+
+ /* kernel control allocates, interface frees */
+ pcb = ipsecif_alloc(sizeof(*pcb));
+ if (pcb == NULL)
+ return ENOMEM;
+
+ /* Setup the protocol control block */
+ bzero(pcb, sizeof(*pcb));
+ *unitinfo = pcb;
+ pcb->ctlref = kctlref;
+ pcb->unit = sac->sc_unit;
+ printf("ipsecif_ctl_connect: creating unit ip%d\n", pcb->unit);
+
+ /* Create the interface */
+ bzero(&ipsecif_init, sizeof(ipsecif_init));
+ ipsecif_init.name = "ipsec";
+ ipsecif_init.unit = pcb->unit;
+ ipsecif_init.family = ipsecif_family;
+ ipsecif_init.type = IFT_OTHER;
+ ipsecif_init.output = ipsecif_output;
+ ipsecif_init.demux = ipsecif_demux;
+ ipsecif_init.add_proto = ipsecif_add_proto;
+ ipsecif_init.del_proto = ipsecif_del_proto;
+ ipsecif_init.softc = pcb;
+ ipsecif_init.ioctl = ipsecif_ioctl;
+ ipsecif_init.set_bpf_tap = ipsecif_settap;
+ ipsecif_init.detach = ipsecif_detached;
+
+ result = ifnet_allocate(&ipsecif_init, &pcb->ifp);
+ if (result != 0) {
+ printf("ipsecif_ctl_connect - ifnet_allocate failed: %d\n", result);
+ ipsecif_free(pcb);
+ return result;
+ }
+ OSIncrementAtomic(&ipsecif_ifcount);
+
+ /* Set flags and additional information. */
+ ifnet_set_mtu(pcb->ifp, 1280);
+ ifnet_set_flags(pcb->ifp, IFF_UP | IFF_MULTICAST | IFF_BROADCAST, 0xffff);
+// ifnet_set_flags(pcb->ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
+
+ /* Attach the interface */
+ result = ifnet_attach(pcb->ifp, NULL);
+ if (result != 0) {
+ printf("ipsecif_ctl_connect - ifnet_allocate failed: %d\n", result);
+ ifnet_release(pcb->ifp);
+ ipsecif_free(pcb);
+ }
+
+ /* Attach to bpf */
+ if (result == 0)
+ bpfattach(pcb->ifp, DLT_NULL, 4);
+
+ return result;
+}
+
+/*
+ * These defines are marked private but it's impossible to remove an interface
+ * without them.
+ */
+#ifndef SIOCPROTODETACH
+#define SIOCPROTODETACH _IOWR('i', 81, struct ifreq) /* detach proto from interface */
+#endif /* SIOCPROTODETACH */
+
+#ifndef SIOCPROTODETACH_IN6
+#define SIOCPROTODETACH_IN6 _IOWR('i', 111, struct in6_ifreq) /* detach proto from interface */
+#endif /* SIOCPROTODETACH */
+
+
+static errno_t
+ipsecif_detach_ip(
+ ifnet_t interface,
+ protocol_family_t protocol,
+ socket_t pf_socket)
+{
+ errno_t result = EPROTONOSUPPORT;
+
+ /* Attempt a detach */
+ if (protocol == PF_INET) {
+ struct ifreq ifr;
+
+ bzero(&ifr, sizeof(ifr));
+ snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
+ ifnet_name(interface), ifnet_unit(interface));
+
+ result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
+ }
+ else if (protocol == PF_INET6) {
+ struct in6_ifreq ifr6;
+
+ bzero(&ifr6, sizeof(ifr6));
+ snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
+ ifnet_name(interface), ifnet_unit(interface));
+
+ result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
+ }
+
+ return result;
+}
+
+static void
+ipsecif_remove_address(
+ ifnet_t interface,
+ protocol_family_t protocol,
+ ifaddr_t address,
+ socket_t pf_socket)
+{
+ errno_t result = 0;
+
+ /* Attempt a detach */
+ if (protocol == PF_INET) {
+ struct ifreq ifr;
+
+ bzero(&ifr, sizeof(ifr));
+ snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
+ ifnet_name(interface), ifnet_unit(interface));
+ result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
+ if (result != 0) {
+ printf("ipsecif_remove_address - ifaddr_address failed: %d", result);
+ }
+ else {
+ result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
+ if (result != 0) {
+ printf("ipsecif_remove_address - SIOCDIFADDR failed: %d", result);
+ }
+ }
+ }
+ else if (protocol == PF_INET6) {
+ struct in6_ifreq ifr6;
+
+ bzero(&ifr6, sizeof(ifr6));
+ snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
+ ifnet_name(interface), ifnet_unit(interface));
+ result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
+ sizeof(ifr6.ifr_addr));
+ if (result != 0) {
+ printf("ipsecif_remove_address - ifaddr_address failed (v6): %d",
+ result);
+ }
+ else {
+ result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
+ if (result != 0) {
+ printf("ipsecif_remove_address - SIOCDIFADDR_IN6 failed: %d",
+ result);
+ }
+ }
+ }
+}
+
+static void
+ipsecif_cleanup_family(
+ ifnet_t interface,
+ protocol_family_t protocol)
+{
+ errno_t result = 0;
+ socket_t pf_socket = NULL;
+ ifaddr_t *addresses = NULL;
+ int i;
+
+ if (protocol != PF_INET && protocol != PF_INET6) {
+ printf("ipsecif_cleanup_family - invalid protocol family %d\n", protocol);
+ return;
+ }
+
+ /* Create a socket for removing addresses and detaching the protocol */
+ result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
+ if (result != 0) {
+ if (result != EAFNOSUPPORT)
+ printf("ipsecif_cleanup_family - failed to create %s socket: %d\n",
+ protocol == PF_INET ? "IP" : "IPv6", result);
+ goto cleanup;
+ }
+
+ result = ipsecif_detach_ip(interface, protocol, pf_socket);
+ if (result == 0 || result == ENXIO) {
+ /* We are done! We either detached or weren't attached. */
+ goto cleanup;
+ }
+ else if (result != EBUSY) {
+ /* Uh, not really sure what happened here... */
+ printf("ipsecif_cleanup_family - ipsecif_detach_ip failed: %d\n", result);
+ goto cleanup;
+ }
+
+ /*
+ * At this point, we received an EBUSY error. This means there are
+ * addresses attached. We should detach them and then try again.
+ */
+ result = ifnet_get_address_list_family(interface, &addresses, protocol);
+ if (result != 0) {
+ printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
+ ifnet_name(interface), ifnet_unit(interface),
+ protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
+ goto cleanup;
+ }
+
+ for (i = 0; addresses[i] != 0; i++) {
+ ipsecif_remove_address(interface, protocol, addresses[i], pf_socket);
+ }
+ ifnet_free_address_list(addresses);
+ addresses = NULL;
+
+ /*
+ * The addresses should be gone, we should try the remove again.
+ */
+ result = ipsecif_detach_ip(interface, protocol, pf_socket);
+ if (result != 0 && result != ENXIO) {
+ printf("ipsecif_cleanup_family - ipsecif_detach_ip failed: %d\n", result);
+ }
+
+cleanup:
+ if (pf_socket != NULL)
+ sock_close(pf_socket);
+
+ if (addresses != NULL)
+ ifnet_free_address_list(addresses);
+}
+
+static errno_t
+ipsecif_ctl_disconnect(
+ __unused kern_ctl_ref kctlref,
+ __unused u_int32_t unit,
+ void *unitinfo)
+{
+ struct ipsecif_pcb *pcb = unitinfo;
+ ifnet_t ifp = pcb->ifp;
+ errno_t result = 0;
+
+ pcb->ctlref = NULL;
+ pcb->unit = 0;
+
+ /*
+ * We want to do everything in our power to ensure that the interface
+ * really goes away when the socket is closed. We must remove IP/IPv6
+ * addresses and detach the protocols. Finally, we can remove and
+ * release the interface.
+ */
+ ipsecif_cleanup_family(ifp, AF_INET);
+ ipsecif_cleanup_family(ifp, AF_INET6);
+
+ if ((result = ifnet_detach(ifp)) != 0) {
+ printf("ipsecif_ctl_disconnect - ifnet_detach failed: %d\n", result);
+ }
+
+ if ((result = ifnet_release(ifp)) != 0) {
+ printf("ipsecif_ctl_disconnect - ifnet_release failed: %d\n", result);
+ }
+
+ return 0;
+}
+
+static inline void
+call_bpf_tap(
+ ifnet_t ifp,
+ bpf_packet_func tap,
+ mbuf_t m)
+{
+ struct m_hdr hack_hdr;
+ struct mbuf *n;
+ int af;
+
+ if (!tap)
+ return;
+
+ af = (((*(char*)(mbuf_data(m))) & 0xf0) >> 4); // 4 or 6
+ if(af == 4) {
+ af = AF_INET;
+ }
+ else if (af == 6) {
+ af = AF_INET6;
+ }
+ else {
+ /* Uh...this ain't right */
+ af = 0;
+ }
+
+ hack_hdr.mh_next = (struct mbuf*)m;
+ hack_hdr.mh_nextpkt = NULL;
+ hack_hdr.mh_len = 4;
+ hack_hdr.mh_data = (char *)⁡
+ hack_hdr.mh_type = ((struct mbuf*)m)->m_type;
+ hack_hdr.mh_flags = 0;
+
+ n = (struct mbuf*)&hack_hdr;
+
+ tap(ifp, (mbuf_t)n);
+}
+
+
+static errno_t
+ipsecif_ctl_send(
+ __unused kern_ctl_ref kctlref,
+ __unused u_int32_t unit,
+ void *unitinfo,
+ mbuf_t m,
+ __unused int flags)
+{
+ struct ipsecif_pcb *pcb = unitinfo;
+ struct ifnet_stat_increment_param incs;
+ errno_t result;
+
+ bzero(&incs, sizeof(incs));
+
+ mbuf_pkthdr_setrcvif(m, pcb->ifp);
+
+ if (pcb->mode & BPF_MODE_INPUT) {
+ call_bpf_tap(pcb->ifp, pcb->tap, m);
+ }
+
+ incs.packets_in = 1;
+ incs.bytes_in = mbuf_pkthdr_len(m);
+ result = ifnet_input(pcb->ifp, m, &incs);
+ if (result != 0) {
+ ifnet_stat_increment_in(pcb->ifp, 0, 0, 1);
+ printf("ipsecif_ctl_send - ifnet_input failed: %d\n", result);
+ mbuf_freem(m);
+ }
+
+ return 0;
+}
+
+/* Network Interface functions */
+static errno_t
+ipsecif_output(
+ ifnet_t interface,
+ mbuf_t data)
+{
+ struct ipsecif_pcb *pcb = ifnet_softc(interface);
+ errno_t result;
+
+ if (pcb->mode & BPF_MODE_OUTPUT) {
+ call_bpf_tap(interface, pcb->tap, data);
+ }
+
+ // no packet should go to the ipsec interface
+ mbuf_freem(data);
+
+#if 0
+ if (pcb->ctlref) {
+ int length = mbuf_pkthdr_len(data);
+ result = ctl_enqueuembuf(pcb->ctlref, pcb->unit, data, CTL_DATA_EOR);
+ if (result != 0) {
+ mbuf_freem(data);
+ printf("ipsecif_output - ctl_enqueuembuf failed: %d\n", result);
+ ifnet_stat_increment_out(interface, 0, 0, 1);
+ }
+ else {
+ ifnet_stat_increment_out(interface, 1, length, 0);
+ }
+ }
+ else
+ mbuf_freem(data);
+#endif
+
+ return 0;
+}
+
+/* Network Interface functions */
+static errno_t
+ipsecif_demux(
+ __unused ifnet_t interface,
+ mbuf_t data,
+ __unused char *frame_header,
+ protocol_family_t *protocol)
+{
+ u_int8_t *vers;
+
+ while (data != NULL && mbuf_len(data) < 1) {
+ data = mbuf_next(data);
+ }
+
+ if (data != NULL) {
+ vers = mbuf_data(data);
+ switch(((*vers) & 0xf0) >> 4) {
+ case 4:
+ *protocol = PF_INET;
+ return 0;
+
+ case 6:
+ *protocol = PF_INET6;
+ return 0;
+ }
+ }
+
+ return ENOENT;
+}
+
+static errno_t
+ipsecif_add_proto(
+ __unused ifnet_t interface,
+ protocol_family_t protocol,
+ __unused const struct ifnet_demux_desc *demux_array,
+ __unused u_int32_t demux_count)
+{
+ switch(protocol) {
+ case PF_INET:
+ return 0;
+ case PF_INET6:
+ return 0;
+ default:
+ break;
+ }
+
+ return ENOPROTOOPT;
+}
+
+static errno_t
+ipsecif_del_proto(
+ __unused ifnet_t interface,
+ __unused protocol_family_t protocol)
+{
+ return 0;
+}
+
+static errno_t
+ipsecif_ioctl(
+ __unused ifnet_t interface,
+ __unused u_int32_t command,
+ __unused void *data)
+{
+ errno_t result = 0;
+
+ switch(command) {
+ case SIOCSIFMTU:
+ ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
+ break;
+
+ default:
+ result = EOPNOTSUPP;
+ }
+
+ return result;
+}
+
+static errno_t
+ipsecif_settap(
+ ifnet_t interface,
+ bpf_tap_mode mode,
+ bpf_packet_func callback)
+{
+ struct ipsecif_pcb *pcb = ifnet_softc(interface);
+
+ pcb->mode = mode;
+ pcb->tap = callback;
+
+ return 0;
+}
+
+static void
+ipsecif_detached(
+ ifnet_t interface)
+{
+ struct ipsecif_pcb *pcb = ifnet_softc(interface);
+
+ ipsecif_free(pcb);
+
+ OSDecrementAtomic(&ipsecif_ifcount);
+}
+
+/* Protocol Handlers */
+
+static errno_t
+ipsecif_proto_input(
+ __unused ifnet_t interface,
+ protocol_family_t protocol,
+ mbuf_t m,
+ __unused char *frame_header)
+{
+ proto_input(protocol, m);
+
+ return 0;
+}
+
+static errno_t
+ipsecif_attach_proto(
+ ifnet_t interface,
+ protocol_family_t protocol)
+{
+ struct ifnet_attach_proto_param proto;
+ errno_t result;
+
+ bzero(&proto, sizeof(proto));
+ proto.input = ipsecif_proto_input;
+
+ result = ifnet_attach_protocol(interface, protocol, &proto);
+ if (result != 0 && result != EEXIST) {
+ printf("ipsecif_attach_inet - ifnet_attach_protocol %d failed: %d\n",
+ protocol, result);
+ }
+
+ return result;
+}
+
/*
* Fill in the information structure.
+ * We set all values to zero with bzero to clear
+ * out any information in the sockaddr_storage
+ * and nfs_filehandle contained in msgreq so that
+ * we will not leak extraneous information out of
+ * the kernel when calling up to lockd via our mig
+ * generated routine.
*/
- msgreq.lmr_answered = 0;
- msgreq.lmr_errno = 0;
- msgreq.lmr_saved_errno = 0;
+ bzero(&msgreq, sizeof(msgreq));
msg = &msgreq.lmr_msg;
msg->lm_version = LOCKD_MSG_VERSION;
msg->lm_flags = 0;
if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_PRESERVING)
caps |= VOL_CAP_FMT_CASE_PRESERVING;
}
+ /* Note: VOL_CAP_FMT_2TB_FILESIZE is actually used to test for "large file support" */
if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
- /* Is server's max file size at least 2TB? */
- if (nmp->nm_fsattr.nfsa_maxfilesize >= 0x20000000000ULL)
+ /* Is server's max file size at least 4GB? */
+ if (nmp->nm_fsattr.nfsa_maxfilesize >= 0x100000000ULL)
caps |= VOL_CAP_FMT_2TB_FILESIZE;
} else if (nfsvers >= NFS_VER3) {
/*
* NFSv3 and up supports 64 bits of file size.
- * So, we'll just assume maxfilesize >= 2TB
+ * So, we'll just assume maxfilesize >= 4GB
*/
caps |= VOL_CAP_FMT_2TB_FILESIZE;
}
if (maxsize < nmp->nm_readdirsize)
nmp->nm_readdirsize = maxsize;
- nfsm_chain_get_64(error, &nmrep, maxsize);
- nmp->nm_fsattr.nfsa_maxfilesize = maxsize;
+ nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_maxfilesize);
nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED); // skip time_delta
//PWC hack until we have a real "mount" tool to remount root rw
int rw_root=0;
int flags = MNT_ROOTFS|MNT_RDONLY;
- PE_parse_boot_arg("-rwroot_hack", &rw_root);
+ PE_parse_boot_argn("-rwroot_hack", &rw_root, sizeof (rw_root));
if(rw_root)
{
flags = MNT_ROOTFS;
include $(MakeInc_cmd)
include $(MakeInc_def)
+ALLPRODUCTS = AppleTV iPhone MacOSX
+PRODUCT = $(shell tconf --product)
+EXTRAUNIFDEF = $(foreach x,$(ALLPRODUCTS),$(if $(findstring $(PRODUCT),$(x)),-DPRODUCT_$(x),-UPRODUCT_$(x)))
+SINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+SPINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+KINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+KPINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+
INSTINC_SUBDIRS = \
INSTINC_SUBDIRS_PPC = \
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
* used without a prototype in scope.
*/
+/* These settings are particular to each product. */
+#ifdef KERNEL
+#define __DARWIN_ONLY_64_BIT_INO_T 0
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 0
+#define __DARWIN_ONLY_VERS_1050 0
+#else /* !KERNEL */
+#ifdef PRODUCT_AppleTV
+/* Product: AppleTV */
+#define __DARWIN_ONLY_64_BIT_INO_T 1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050 1
+#endif /* PRODUCT_AppleTV */
+#ifdef PRODUCT_iPhone
+/* Product: iPhone */
+#define __DARWIN_ONLY_64_BIT_INO_T 1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050 1
+#endif /* PRODUCT_iPhone */
+#ifdef PRODUCT_MacOSX
+/* Product: MacOSX */
+#define __DARWIN_ONLY_64_BIT_INO_T 0
+/* #undef __DARWIN_ONLY_UNIX_CONFORMANCE (automatically set for 64-bit) */
+#define __DARWIN_ONLY_VERS_1050 0
+#endif /* PRODUCT_MacOSX */
+#endif /* KERNEL */
+
/*
* The __DARWIN_ALIAS macros are used to do symbol renaming; they allow
* legacy code to use the old symbol, thus maintiang binary compatability
* pre-10.5, and it is the default compilation environment, revert the
* compilation environment to pre-__DARWIN_UNIX03.
*/
+#if !defined(__DARWIN_ONLY_UNIX_CONFORMANCE)
+# if defined(__LP64__)
+# define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+# else /* !__LP64__ */
+# define __DARWIN_ONLY_UNIX_CONFORMANCE 0
+# endif /* __LP64__ */
+#endif /* !__DARWIN_ONLY_UNIX_CONFORMANCE */
+
#if !defined(__DARWIN_UNIX03)
-# if defined(_DARWIN_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE) || defined(__LP64__) || (defined(__arm__) && !defined(KERNEL))
+# if defined(KERNEL)
+# define __DARWIN_UNIX03 0
+# elif __DARWIN_ONLY_UNIX_CONFORMANCE
# if defined(_NONSTD_SOURCE)
-# error "Can't define both _NONSTD_SOURCE and any of _DARWIN_C_SOURCE, _XOPEN_SOURCE, _POSIX_C_SOURCE, or __LP64__"
+# error "Can't define _NONSTD_SOURCE when only UNIX conformance is available."
# endif /* _NONSTD_SOURCE */
# define __DARWIN_UNIX03 1
-# elif defined(_NONSTD_SOURCE) || defined(KERNEL)
+# elif defined(_DARWIN_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE)
+# if defined(_NONSTD_SOURCE)
+# error "Can't define both _NONSTD_SOURCE and any of _DARWIN_C_SOURCE, _XOPEN_SOURCE or _POSIX_C_SOURCE."
+# endif /* _NONSTD_SOURCE */
+# define __DARWIN_UNIX03 1
+# elif defined(_NONSTD_SOURCE)
# define __DARWIN_UNIX03 0
# else /* default */
# if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) < 1050)
#endif /* !__DARWIN_UNIX03 */
#if !defined(__DARWIN_64_BIT_INO_T)
-# if defined(_DARWIN_USE_64_BIT_INODE)
+# if defined(KERNEL)
+# define __DARWIN_64_BIT_INO_T 0
+# elif defined(_DARWIN_USE_64_BIT_INODE)
+# if defined(_DARWIN_NO_64_BIT_INODE)
+# error "Can't define both _DARWIN_USE_64_BIT_INODE and _DARWIN_NO_64_BIT_INODE."
+# endif /* _DARWIN_NO_64_BIT_INODE */
# define __DARWIN_64_BIT_INO_T 1
-# elif defined(_DARWIN_NO_64_BIT_INODE) || defined(KERNEL)
+# elif defined(_DARWIN_NO_64_BIT_INODE)
+# if __DARWIN_ONLY_64_BIT_INO_T
+# error "Can't define _DARWIN_NO_64_BIT_INODE when only 64-bit inodes are available."
+# endif /* __DARWIN_ONLY_64_BIT_INO_T */
# define __DARWIN_64_BIT_INO_T 0
# else /* default */
-# define __DARWIN_64_BIT_INO_T 0
+# if __DARWIN_ONLY_64_BIT_INO_T
+# define __DARWIN_64_BIT_INO_T 1
+# else /* !__DARWIN_ONLY_64_BIT_INO_T */
+# define __DARWIN_64_BIT_INO_T 0
+# endif /* __DARWIN_ONLY_64_BIT_INO_T */
# endif
#endif /* !__DARWIN_64_BIT_INO_T */
-#if !defined(__DARWIN_NON_CANCELABLE)
+#if !defined(__DARWIN_VERS_1050)
# if defined(KERNEL)
-# define __DARWIN_NON_CANCELABLE 0
+# define __DARWIN_VERS_1050 0
+# elif __DARWIN_ONLY_VERS_1050
+# define __DARWIN_VERS_1050 1
+# elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) >= 1050)
+# define __DARWIN_VERS_1050 1
# else /* default */
-# define __DARWIN_NON_CANCELABLE 0
+# define __DARWIN_VERS_1050 0
# endif
-#endif /* !__DARWIN_NON_CANCELABLE */
+#endif /* !__DARWIN_VERS_1050 */
-#if !defined(__DARWIN_VERS_1050)
-# if !defined(KERNEL) && defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) >= 1050)
-# define __DARWIN_VERS_1050 1
+#if !defined(__DARWIN_NON_CANCELABLE)
+# if defined(KERNEL)
+# define __DARWIN_NON_CANCELABLE 0
# else /* default */
-# define __DARWIN_VERS_1050 0
+# define __DARWIN_NON_CANCELABLE 0
# endif
#endif /* !__DARWIN_NON_CANCELABLE */
* symbol suffixes used for symbol versioning
*/
#if __DARWIN_UNIX03
-# if !defined(__LP64__) && !defined(__arm__)
-# define __DARWIN_SUF_UNIX03 "$UNIX2003"
-# define __DARWIN_SUF_UNIX03_SET 1
-# else /* __LP64__ || __arm__ */
+# if __DARWIN_ONLY_UNIX_CONFORMANCE
# define __DARWIN_SUF_UNIX03 /* nothing */
-# define __DARWIN_SUF_UNIX03_SET 0
-# endif /* !__LP64__ && !__arm__ */
+# else /* !__DARWIN_ONLY_UNIX_CONFORMANCE */
+# define __DARWIN_SUF_UNIX03 "$UNIX2003"
+# endif /* __DARWIN_ONLY_UNIX_CONFORMANCE */
# if __DARWIN_64_BIT_INO_T
-# define __DARWIN_SUF_64_BIT_INO_T "$INODE64"
+# if __DARWIN_ONLY_64_BIT_INO_T
+# define __DARWIN_SUF_64_BIT_INO_T /* nothing */
+# else /* !__DARWIN_ONLY_64_BIT_INO_T */
+# define __DARWIN_SUF_64_BIT_INO_T "$INODE64"
+# endif /* __DARWIN_ONLY_64_BIT_INO_T */
# else /* !__DARWIN_64_BIT_INO_T */
# define __DARWIN_SUF_64_BIT_INO_T /* nothing */
-# endif /* __DARWIN_UNIX03 */
+# endif /* __DARWIN_64_BIT_INO_T */
+
+# if __DARWIN_VERS_1050
+# if __DARWIN_ONLY_VERS_1050
+# define __DARWIN_SUF_1050 /* nothing */
+# else /* !__DARWIN_ONLY_VERS_1050 */
+# define __DARWIN_SUF_1050 "$1050"
+# endif /* __DARWIN_ONLY_VERS_1050 */
+# else /* !__DARWIN_VERS_1050 */
+# define __DARWIN_SUF_1050 /* nothing */
+# endif /* __DARWIN_VERS_1050 */
# if __DARWIN_NON_CANCELABLE
# define __DARWIN_SUF_NON_CANCELABLE "$NOCANCEL"
# define __DARWIN_SUF_NON_CANCELABLE /* nothing */
# endif /* __DARWIN_NON_CANCELABLE */
-# if __DARWIN_VERS_1050
-# define __DARWIN_SUF_1050 "$1050"
-# else /* !__DARWIN_VERS_1050 */
-# define __DARWIN_SUF_1050 /* nothing */
-# endif /* __DARWIN_VERS_1050 */
-
#else /* !__DARWIN_UNIX03 */
# define __DARWIN_SUF_UNIX03 /* nothing */
-# define __DARWIN_SUF_UNIX03_SET 0
# define __DARWIN_SUF_64_BIT_INO_T /* nothing */
# define __DARWIN_SUF_NON_CANCELABLE /* nothing */
# define __DARWIN_SUF_1050 /* nothing */
* long doubles. This applies only to ppc; i386 already has long double
* support, while ppc64 doesn't have any backwards history.
*/
-#if defined(__ppc__)
+#if defined(__ppc__)
# if defined(__LDBL_MANT_DIG__) && defined(__DBL_MANT_DIG__) && \
__LDBL_MANT_DIG__ > __DBL_MANT_DIG__
# if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0 < 1040
# define __DARWIN_LDBL_COMPAT2(x) /* nothing */
# define __DARWIN_LONG_DOUBLE_IS_DOUBLE 1
# endif
-#elif defined(__i386__) || defined(__ppc64__) || defined(__x86_64__) || defined (__arm__)
+#elif defined(__i386__) || defined(__ppc64__) || defined(__x86_64__)
# define __DARWIN_LDBL_COMPAT(x) /* nothing */
# define __DARWIN_LDBL_COMPAT2(x) /* nothing */
# define __DARWIN_LONG_DOUBLE_IS_DOUBLE 0
* Public darwin-specific feature macros
*****************************************/
+/*
+ * _DARWIN_FEATURE_64_BIT_INODE indicates that the ino_t type is 64-bit, and
+ * structures modified for 64-bit inodes (like struct stat) will be used.
+ */
+#if __DARWIN_64_BIT_INO_T
+#define _DARWIN_FEATURE_64_BIT_INODE 1
+#endif
+
/*
* _DARWIN_FEATURE_LONG_DOUBLE_IS_DOUBLE indicates when the long double type
- * is the same as the double type (ppc only)
+ * is the same as the double type (ppc and arm only)
*/
#if __DARWIN_LONG_DOUBLE_IS_DOUBLE
#define _DARWIN_FEATURE_LONG_DOUBLE_IS_DOUBLE 1
#endif
/*
- * _DARWIN_FEATURE_UNIX_CONFORMANCE indicates whether UNIX conformance is on,
- * and specifies the conformance level (3 is SUSv3)
+ * _DARWIN_FEATURE_64_ONLY_BIT_INODE indicates that the ino_t type may only
+ * be 64-bit; there is no support for 32-bit ino_t when this macro is defined
+ * (and non-zero). There is no struct stat64 either, as the regular
+ * struct stat will already be the 64-bit version.
*/
-#if __DARWIN_UNIX03
-#define _DARWIN_FEATURE_UNIX_CONFORMANCE 3
+#if __DARWIN_ONLY_64_BIT_INO_T
+#define _DARWIN_FEATURE_ONLY_64_BIT_INODE 1
#endif
/*
- * _DARWIN_FEATURE_64_BIT_INODE indicates that the ino_t type is 64-bit, and
- * structures modified for 64-bit inodes (like struct stat) will be used.
+ * _DARWIN_FEATURE_ONLY_VERS_1050 indicates that only those APIs updated
+ * in 10.5 exists; no pre-10.5 variants are available.
*/
-#if __DARWIN_64_BIT_INO_T
-#define _DARWIN_FEATURE_64_BIT_INODE 1
+#if __DARWIN_ONLY_VERS_1050
+#define _DARWIN_FEATURE_ONLY_VERS_1050 1
+#endif
+
+/*
+ * _DARWIN_FEATURE_ONLY_UNIX_CONFORMANCE indicates only UNIX conforming API
+ * are available (the legacy BSD APIs are not available)
+ */
+#if __DARWIN_ONLY_UNIX_CONFORMANCE
+#define _DARWIN_FEATURE_ONLY_UNIX_CONFORMANCE 1
+#endif
+
+/*
+ * _DARWIN_FEATURE_UNIX_CONFORMANCE indicates whether UNIX conformance is on,
+ * and specifies the conformance level (3 is SUSv3)
+ */
+#if __DARWIN_UNIX03
+#define _DARWIN_FEATURE_UNIX_CONFORMANCE 3
#endif
#endif /* !_CDEFS_H_ */
* DKIOCISFORMATTED is media formatted?
* DKIOCISWRITABLE is media writable?
*
+ * DKIOCDISCARD delete unused data
+ *
* DKIOCGETMAXBLOCKCOUNTREAD get maximum block count for reads
* DKIOCGETMAXBLOCKCOUNTWRITE get maximum block count for writes
* DKIOCGETMAXBYTECOUNTREAD get maximum byte count for reads
*
* DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT get minimum segment alignment in bytes
* DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT get maximum segment width in bits
+ *
+ * DKIOCGETPHYSICALBLOCKSIZE get device's block size
*/
typedef struct
{
- char path[128];
+ uint64_t offset;
+ uint64_t length;
+
+ uint8_t reserved0128[16]; /* reserved, clear to zero */
+} dk_discard_t;
+
+typedef struct
+{
+ char path[128];
} dk_firmware_path_t;
typedef struct
#define DKIOCISFORMATTED _IOR('d', 23, uint32_t)
#define DKIOCISWRITABLE _IOR('d', 29, uint32_t)
+#define DKIOCDISCARD _IOW('d', 31, dk_discard_t)
+
#define DKIOCGETMAXBLOCKCOUNTREAD _IOR('d', 64, uint64_t)
#define DKIOCGETMAXBLOCKCOUNTWRITE _IOR('d', 65, uint64_t)
#define DKIOCGETMAXBYTECOUNTREAD _IOR('d', 70, uint64_t)
#define DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT _IOR('d', 74, uint64_t)
#define DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT _IOR('d', 75, uint64_t)
+#define DKIOCGETPHYSICALBLOCKSIZE _IOR('d', 77, uint32_t)
+
#ifdef KERNEL
+#define DK_FEATURE_DISCARD 0x00000010
#define DK_FEATURE_FORCE_UNIT_ACCESS 0x00000001
#define DKIOCGETBLOCKCOUNT32 _IOR('d', 25, uint32_t)
#define DKIOCSETBLOCKSIZE _IOW('d', 24, uint32_t)
#define IMGPF_NONE 0x00000000 /* No flags */
#define IMGPF_INTERPRET 0x00000001 /* Interpreter invoked */
#define IMGPF_POWERPC 0x00000002 /* ppc mode for x86 */
+#if CONFIG_EMBEDDED
+#undef IMGPF_POWERPC
+#endif
#define IMGPF_WAS_64BIT 0x00000004 /* exec from a 64Bit binary */
#define IMGPF_IS_64BIT 0x00000008 /* exec to a 64Bit binary */
#define DBG_IOBLUETOOTH 46 /* Bluetooth */
#define DBG_IOFIREWIRE 47 /* FireWire */
#define DBG_IOINFINIBAND 48 /* Infiniband */
-#define DBG_IOCPUPM 49 /* CPU Power Management */
+#define DBG_IOCPUPM 49 /* CPU Power Management */
+#define DBG_IOGRAPHICS 50 /* Graphics */
/* Backwards compatibility */
#define DBG_IOPOINTING DBG_IOHID /* OBSOLETE: Use DBG_IOHID instead */
#define DBG_DRVBLUETOOTH 15 /* Bluetooth */
#define DBG_DRVFIREWIRE 16 /* FireWire */
#define DBG_DRVINFINIBAND 17 /* Infiniband */
+#define DBG_DRVGRAPHICS 18 /* Graphics */
/* Backwards compatibility */
#define DBG_DRVPOINTING DBG_DRVHID /* OBSOLETE: Use DBG_DRVHID instead */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <stdint.h>
#include <sys/ucred.h>
#include <sys/queue.h> /* XXX needed for user builds */
+#include <Availability.h>
#else
#include <sys/kernel_types.h>
#endif
uint32_t f_reserved[8]; /* For future use */ \
}
+#if !__DARWIN_ONLY_64_BIT_INO_T
+
struct statfs64 __DARWIN_STRUCT_STATFS64;
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
+
#if __DARWIN_64_BIT_INO_T
struct statfs __DARWIN_STRUCT_STATFS64;
__BEGIN_DECLS
int fhopen(const struct fhandle *, int);
int fstatfs(int, struct statfs *) __DARWIN_INODE64(fstatfs);
-int fstatfs64(int, struct statfs64 *);
+#if !__DARWIN_ONLY_64_BIT_INO_T
+int fstatfs64(int, struct statfs64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
int getfh(const char *, fhandle_t *);
int getfsstat(struct statfs *, int, int) __DARWIN_INODE64(getfsstat);
-int getfsstat64(struct statfs64 *, int, int);
+#if !__DARWIN_ONLY_64_BIT_INO_T
+int getfsstat64(struct statfs64 *, int, int) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
int getmntinfo(struct statfs **, int) __DARWIN_INODE64(getmntinfo);
-int getmntinfo64(struct statfs64 **, int);
+#if !__DARWIN_ONLY_64_BIT_INO_T
+int getmntinfo64(struct statfs64 **, int) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
int mount(const char *, const char *, int, void *);
int statfs(const char *, struct statfs *) __DARWIN_INODE64(statfs);
-int statfs64(const char *, struct statfs64 *);
+#if !__DARWIN_ONLY_64_BIT_INO_T
+int statfs64(const char *, struct statfs64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
int unmount(const char *, int);
int getvfsbyname(const char *, struct vfsconf *);
__END_DECLS
lck_rw_t mnt_rwlock; /* mutex readwrite lock */
lck_mtx_t mnt_renamelock; /* mutex that serializes renames that change shape of tree */
vnode_t mnt_devvp; /* the device mounted on for local file systems */
+ uint32_t mnt_devbsdunit; /* the BSD unit number of the device */
int32_t mnt_crossref; /* refernces to cover lookups crossing into mp */
int32_t mnt_iterref; /* refernces to cover iterations; drained makes it -ve */
*/
pid_t mnt_dependent_pid;
void *mnt_dependent_process;
-
- struct timeval last_normal_IO_timestamp;
};
/*
#endif
};
+/*
+ * throttle I/Os are affected only by normal I/Os happening on the same bsd device node. For example, disk1s3 and
+ * disk1s5 are the same device node, while disk1s3 and disk2 are not (although disk2 might be a mounted disk image file
+ * and the disk image file resides on a partition in disk1). The following constant defines the maximum number of
+ * different bsd device nodes the algorithm can consider, and larger numbers are rounded by this maximum. Since
+ * throttled I/O is usually useful in non-server environment only, a small number 16 is enough in most cases
+ */
+#define LOWPRI_MAX_NUM_DEV 16
+
__BEGIN_DECLS
extern int mount_generation;
void mount_iterdrain(mount_t);
void mount_iterreset(mount_t);
+/* throttled I/O api */
+int throttle_get_io_policy(struct uthread **ut);
+extern void throttle_lowpri_io(boolean_t ok_to_sleep);
+int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit);
+
__END_DECLS
#endif /* !_SYS_MOUNT_INTERNAL_H_ */
extern int tsleep(void *chan, int pri, const char *wmesg, int timo);
extern int msleep1(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, u_int64_t timo);
-#endif
+
+extern int proc_pidversion(proc_t);
+extern int proc_getcdhash(proc_t, unsigned char *);
+#endif /* KERNEL_PRIVATE */
__END_DECLS
#include <kern/locks.h>
__END_DECLS
+#if DEBUG
#define __PROC_INTERNAL_DEBUG 1
+#endif
/*
* The short form for various locks that protect fields in the data structures.
struct timeval p_start; /* starting time */
void * p_rcall;
int p_ractive;
+ int p_idversion; /* version of process identity */
#if DIAGNOSTIC
unsigned int p_fdlock_pc[4];
unsigned int p_fdunlock_pc[4];
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <sys/cdefs.h>
#ifdef KERNEL
#include <machine/types.h>
+#else /* !KERNEL */
+#include <Availability.h>
#endif /* KERNEL */
/* [XSI] The timespec structure may be defined as described in <time.h> */
#if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
+#if !__DARWIN_ONLY_64_BIT_INO_T
+
struct stat64 __DARWIN_STRUCT_STAT64;
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
+
#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
int mkfifox_np(const char *, filesec_t);
int statx_np(const char *, struct stat *, filesec_t) __DARWIN_INODE64(statx_np);
int umaskx_np(filesec_t);
-/* The following are simillar to stat and friends except provide struct stat64 instead of struct stat */
-int fstatx64_np(int, struct stat64 *, filesec_t);
-int lstatx64_np(const char *, struct stat64 *, filesec_t);
-int statx64_np(const char *, struct stat64 *, filesec_t);
-int fstat64(int, struct stat64 *);
-int lstat64(const char *, struct stat64 *);
-int stat64(const char *, struct stat64 *);
+
+#if !__DARWIN_ONLY_64_BIT_INO_T
+/* The following deprecated routines are simillar to stat and friends except provide struct stat64 instead of struct stat */
+int fstatx64_np(int, struct stat64 *, filesec_t) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int lstatx64_np(const char *, struct stat64 *, filesec_t) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int statx64_np(const char *, struct stat64 *, filesec_t) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int fstat64(int, struct stat64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int lstat64(const char *, struct stat64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int stat64(const char *, struct stat64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
#endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
__END_DECLS
-#endif
+#endif /* !KERNEL */
#endif /* !_SYS_STAT_H_ */
/* code signing */
struct cs_blob;
-int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t);
struct cs_blob *ubc_cs_blob_get(vnode_t, cpu_type_t, off_t);
-struct cs_blob *ubc_get_cs_blobs(vnode_t);
-int ubc_cs_getcdhash(vnode_t, off_t, unsigned char *);
-
/* cluster IO routines */
int advisory_read(vnode_t, off_t, off_t, int);
__private_extern__ int ubc_umount(mount_t mp);
__private_extern__ void ubc_unmountall(void);
__private_extern__ memory_object_t ubc_getpager(vnode_t);
-__private_extern__ int ubc_map(vnode_t, int);
__private_extern__ void ubc_destroy_named(vnode_t);
/* internal only */
int UBCINFOEXISTS(vnode_t);
+/* code signing */
+struct cs_blob;
+int ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t);
+struct cs_blob *ubc_get_cs_blobs(vnode_t);
+int ubc_cs_getcdhash(vnode_t, off_t, unsigned char *);
+kern_return_t ubc_cs_blob_allocate(vm_offset_t *, vm_size_t *);
+void ubc_cs_blob_deallocate(vm_offset_t, vm_size_t);
+
__END_DECLS
u_int32_t dlil_incremented_read;
lck_mtx_t *uu_mtx;
- int uu_lowpri_window;
+ int uu_lowpri_window;
+ size_t uu_devbsdunit; // to identify which device throttled I/Os are sent to
struct user_sigaltstack uu_sigstk;
int uu_defer_reclaims;
#endif
#endif /* CONFIG_DTRACE */
void * uu_threadlist;
- mount_t v_mount;
};
typedef struct uthread * uthread_t;
#define VNODE_ITERATE_ALL 0x80
#define VNODE_ITERATE_ACTIVE 0x100
#define VNODE_ITERATE_INACTIVE 0x200
+#ifdef BSD_KERNEL_PRIVATE
+#define VNODE_ALWAYS 0x400
+#endif /* BSD_KERNEL_PRIVATE */
/*
* return values from callback
void vnode_rele_ext(vnode_t, int, int);
void vnode_rele_internal(vnode_t, int, int, int);
int vnode_getwithref(vnode_t);
+#ifdef BSD_KERNEL_PRIVATE
+int vnode_getalways(vnode_t);
+#endif /* BSD_KERNEL_PRIVATE */
int vnode_get_locked(vnode_t);
int vnode_put_locked(vnode_t);
if (timevalcmp(&elapsed, &hard_throttle_maxelapsed, <))
return(1);
}
+ struct uthread *ut;
+ if (throttle_get_io_policy(&ut) == IOPOL_THROTTLE) {
+ size_t devbsdunit;
+ if (vp->v_mount != NULL)
+ devbsdunit = vp->v_mount->mnt_devbsdunit;
+ else
+ devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
+ if (throttle_io_will_be_throttled(-1, devbsdunit)) {
+ return(1);
+ }
+ }
return(0);
}
if (phys_blksz != (size_t)jnl->jhdr->jhdr_size && jnl->jhdr->jhdr_size != 0) {
printf("jnl: %s: open: phys_blksz %lu does not match journal header size %d\n",
jdev_name, phys_blksz, jnl->jhdr->jhdr_size);
-
- orig_blksz = phys_blksz;
- phys_blksz = jnl->jhdr->jhdr_size;
- if (VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, &context)) {
- printf("jnl: %s: could not set block size to %lu bytes.\n", jdev_name, phys_blksz);
- goto bad_journal;
- }
-// goto bad_journal;
}
if ( jnl->jhdr->start <= 0
log(LOG_EMERG, "%d desired, %d numvnodes, "
"%d free, %d dead, %d rage\n",
desiredvnodes, numvnodes, freevnodes, deadvnodes, ragevnodes);
+#if CONFIG_EMBEDDED
+ /*
+ * Running out of vnodes tends to make a system unusable. On an
+ * embedded system, it's unlikely that the user can do anything
+ * about it (or would know what to do, if they could). So panic
+ * the system so it will automatically restart (and hopefully we
+ * can get a panic log that tells us why we ran out).
+ */
+ panic("vnode table is full\n");
+#endif
*vpp = NULL;
return (ENFILE);
}
}
+__private_extern__ int
+vnode_getalways(vnode_t vp)
+{
+ return(vget_internal(vp, 0, VNODE_ALWAYS));
+}
+
int
vnode_put(vnode_t vp)
{
{
int nodead = vflags & VNODE_NODEAD;
int nosusp = vflags & VNODE_NOSUSPEND;
+ int always = vflags & VNODE_ALWAYS;
for (;;) {
/*
(vp->v_owner == current_thread())) {
break;
}
+ if (always != 0)
+ break;
vnode_lock_convert(vp);
if (vp->v_lflag & VL_TERMINATE) {
#include <sys/sysproto.h>
#include <sys/xattr.h>
#include <sys/ubc_internal.h>
+#include <sys/disk.h>
#include <machine/cons.h>
#include <machine/limits.h>
#include <miscfs/specfs/specdev.h>
strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
mp->mnt_vnodecovered = vp;
mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
+ mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
/* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
goto out3;
}
#endif
+ if (device_vnode != NULL) {
+ VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
+ mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
+ }
+
/*
* Mount the filesystem.
*/
int needwakeup = 0;
int forcedunmount = 0;
int lflags = 0;
+ struct vnode *devvp = NULLVP;
if (flags & MNT_FORCE)
forcedunmount = 1;
OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
- mp->mnt_devvp->v_specflags &= ~SI_MOUNTEDON;
- VNOP_CLOSE(mp->mnt_devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
+ /* hold an io reference and drop the usecount before close */
+ devvp = mp->mnt_devvp;
+ vnode_clearmountedon(devvp);
+ vnode_getalways(devvp);
+ vnode_rele(devvp);
+ VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
ctx);
- vnode_rele(mp->mnt_devvp);
+ vnode_put(devvp);
}
lck_rw_done(&mp->mnt_rwlock);
mount_list_remove(mp);
struct nameidata fromnd, tond;
vfs_context_t ctx = vfs_context_current();
int error;
+ int do_retry;
int mntrename;
int need_event;
const char *oname;
fse_info from_finfo, to_finfo;
holding_mntlock = 0;
+ do_retry = 0;
retry:
fvp = tvp = NULL;
fdvp = tdvp = NULL;
if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
NULL,
vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
- ctx)) != 0)
+ ctx)) != 0) {
+ /*
+ * We could encounter a race where after doing the namei, tvp stops
+ * being valid. If so, simply re-drive the rename call from the
+ * top.
+ */
+ if (error == ENOENT) {
+ do_retry = 1;
+ }
goto auth_exit;
+ }
} else {
/* node staying in same directory, must be allowed to add new name */
if ((error = vnode_authorize(fdvp, NULL,
}
/* overwriting tvp */
if ((tvp != NULL) && !vnode_isdir(tvp) &&
- ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0))
+ ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
+ /*
+ * We could encounter a race where after doing the namei, tvp stops
+ * being valid. If so, simply re-drive the rename call from the
+ * top.
+ */
+ if (error == ENOENT) {
+ do_retry = 1;
+ }
goto auth_exit;
+ }
/* XXX more checks? */
holding_mntlock = 0;
}
if (error) {
+ /*
+ * We may encounter a race in the VNOP where the destination didn't
+ * exist when we did the namei, but it does by the time we go and
+ * try to create the entry. In this case, we should re-drive this rename
+ * call from the top again.
+ */
+ if (error == EEXIST) {
+ do_retry = 1;
+ }
goto out1;
}
vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
}
out1:
- if (to_name != NULL)
- RELEASE_PATH(to_name);
- if (from_name != NULL)
- RELEASE_PATH(from_name);
-
+ if (to_name != NULL) {
+ RELEASE_PATH(to_name);
+ to_name = NULL;
+ }
+ if (from_name != NULL) {
+ RELEASE_PATH(from_name);
+ from_name = NULL;
+ }
if (holding_mntlock) {
mount_unlock_renames(locked_mp);
mount_drop(locked_mp, 0);
+ holding_mntlock = 0;
}
if (tdvp) {
/*
vnode_put(fvp);
vnode_put(fdvp);
}
+
+ /*
+ * If things changed after we did the namei, then we will re-drive
+ * this rename call from the top.
+ */
+ if(do_retry) {
+ do_retry = 0;
+ goto retry;
+ }
+
return (error);
}
goto bad;
}
if ( (error = vnode_ref_ext(vp, fmode)) ) {
- goto bad;
+ goto bad2;
}
/* call out to allow 3rd party notification of open.
*fmodep = fmode;
return (0);
+bad2:
+ VNOP_CLOSE(vp, fmode, ctx);
bad:
ndp->ni_vp = NULL;
if (vp) {
}
}
#endif
+
+ /* work around for foxhound */
+ if (vp->v_type == VBLK)
+ (void)vnode_rele_ext(vp, flags, 0);
+
error = VNOP_CLOSE(vp, flags, ctx);
- (void)vnode_rele_ext(vp, flags, 0);
+ if (vp->v_type != VBLK)
+ (void)vnode_rele_ext(vp, flags, 0);
+
return (error);
}
goto done;
}
- /*
- * The mapping was successful. Let the buffer cache know
- * that we've mapped that file with these protections. This
- * prevents the vnode from getting recycled while it's mapped.
- */
- (void) ubc_map(vp, VM_PROT_READ);
error = 0;
/* update the vnode's access time */
}
-extern void throttle_lowpri_io(int *lowpri_window,mount_t v_mount);
-
pager_return_t
vnode_pagein(
struct vnode *vp,
ut = get_bsdthread_info(current_thread());
- if (ut->uu_lowpri_window && ut->v_mount) {
+ if (ut->uu_lowpri_window) {
/*
* task is marked as a low priority I/O type
- * and the I/O we issued while in this system call
+ * and the I/O we issued while in this page fault
* collided with normal I/O operations... we'll
* delay in order to mitigate the impact of this
* task on the normal operation of the system
*/
- throttle_lowpri_io(&ut->uu_lowpri_window,ut->v_mount);
+ throttle_lowpri_io(TRUE);
}
return (error);
}
-9.5.0
+9.6.0
# The first line of this file contains the master version number for the kernel.
# All other instances of the kernel version in xnu are derived from this file.
-_Cstate_table_set
_PE_install_interrupt_handler
_PE_interrupt_handler
_acpi_install_wake_handler
_mp_rendezvous_no_intrs
_mtrr_range_add
_mtrr_range_remove
-_pmsCPUSetPStateLimit
-_pmsCPULoadVIDTable
_rtc_clock_stepped
_rtc_clock_stepping
_smp_initialized
-_thread_bind
__ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy
__ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy
_sbflush
_sbspace
_securelevel
+_sha1_hardware_hook
_sleep
_soabort
_sobind
_cpu_number
_dsmos_page_transform_hook
_gPEEFISystemTable
-_hpet_get_info
-_hpet_register_callback
-_hpet_request
_in6addr_local
_io_map_spec
_kdp_register_callout
_ml_get_apicid
_ml_get_maxbusdelay
_ml_get_maxsnoop
-_ml_hpet_cfg
_ml_cpu_int_event_time
_mp_rendezvous
_mp_rendezvous_no_intrs
_pmCPUControl
_pmKextRegister
_pm_init_lock
-_rdHPET
_real_ncpus
_rtc_clock_napped
_serial_getc
#define kIOPlatformQuiesceActionKey "IOPlatformQuiesceAction" /* value is OSNumber (priority) */
#define kIOPlatformActiveActionKey "IOPlatformActiveAction" /* value is OSNumber (priority) */
+#define kIOPlatformFunctionHandlerSet "IOPlatformFunctionHandlerSet"
+#if defined(__i386__)
+#define kIOPlatformFunctionHandlerMaxBusDelay "IOPlatformFunctionHandlerMaxBusDelay"
+#define kIOPlatformFunctionHandlerMaxInterruptDelay "IOPlatformFunctionHandlerMaxInterruptDelay"
+#endif /* defined(__i386__) */
+
+
#endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */
MI_DIR = platform
NOT_EXPORT_HEADERS =
-NOT_KF_MI_HEADERS = AppleARMCPU.h AppleARMFunction.h AppleARMIICController.h \
- AppleARMIICDevice.h AppleARMIISController.h \
- AppleARMIISDevice.h AppleARMIO.h AppleARMIODevice.h \
- AppleARMNORFlashController.h AppleARMNORFlashDevice.h \
- AppleARMPE.h AppleARMRTC.h AppleARMSPIController.h \
- AppleARMSPIDevice.h
+NOT_KF_MI_HEADERS =
INSTINC_SUBDIRS =
INSTINC_SUBDIRS_PPC =
#define kIOPMPSHealthConfidenceKey "HealthConfidence"
#define kIOPMPSCapacityEstimatedKey "CapacityEstimated"
#define kIOPMPSBatteryChargeStatusKey "ChargeStatus"
+#define kIOPMPSBatteryTemperatureKey "Temperature"
// kIOPMBatteryChargeStatusKey may have one of the following values, or may have
// no value. If kIOPMBatteryChargeStatusKey has a NULL value (or no value) associated with it
#define kIOPMSettingDisplaySleepUsesDimKey "Display Sleep Uses Dim"
#define kIOPMSettingTimeZoneOffsetKey "TimeZoneOffsetSeconds"
#define kIOPMSettingMobileMotionModuleKey "MobileMotionModule"
+#define kIOPMSettingGraphicsSwitchKey "GPUSwitch"
// Setting controlling drivers can register to receive scheduled wake data
// Either in "CF seconds" type, or structured calendar data in a formatted
kIOPMSetACAdaptorConnected = (1<<18)
};
+/*
+ * PM notification types
+ */
+
+/* @constant kIOPMStateConsoleUserShutdown
+ * @abstract Notification of GUI shutdown state available to kexts.
+ * @discussion This type can be passed as arguments to registerPMSettingController()
+ * to receive callbacks.
+ */
+#define kIOPMStateConsoleShutdown "ConsoleShutdown"
+
+/* @enum ShutdownValues
+ * @abstract Potential values shared with key kIOPMStateConsoleUserShutdown
+ */
+enum {
+/* @constant kIOPMStateConsoleShutdownNone
+ * @abstract System shutdown (or restart) hasn't started; system is ON.
+ * @discussion Next state: 2
+ */
+ kIOPMStateConsoleShutdownNone = 1,
+/* @constant kIOPMStateConsoleShutdownPossible
+ * @abstract User has been presented with the option to shutdown or restart. Shutdown may be cancelled.
+ * @discussion Next state may be: 1, 4
+ */
+ kIOPMStateConsoleShutdownPossible = 2,
+/* @constant kIOPMStateConsoleShutdownUnderway
+ * @abstract Shutdown or restart is proceeding. It may still be cancelled.
+ * @discussion Next state may be: 1, 4. This state is currently unused.
+ */
+ kIOPMStateConsoleShutdownUnderway = 3,
+/* @constant kIOPMStateConsoleShutdownCertain
+ * @abstract Shutdown is in progress and irrevocable.
+ * @discussion State remains 4 until power is removed from CPU.
+ */
+ kIOPMStateConsoleShutdownCertain = 4
+};
+
#endif /* ! _IOKIT_IOPMPRIVATE_H */
kPMLogSetClockGating, // 50 0x051000c8 - platform device specific clock control
kPMLogSetPowerGating, // 51 0x051000cc - platform device specific power control
kPMLogSetPinGroup, // 52 0x051000d0 - platform device specific gpio control
+ kPMLogIdleCancel, // 53 0x051000d4 - device unidle during change
kIOPMlogLastEvent
};
IOPMinformee.h \
IOPMinformeeList.h \
IOPMlog.h \
- IOPMPagingPlexus.h \
- IOPMPrivate.h
-
+ IOPMPagingPlexus.h
+
INSTINC_SUBDIRS =
INSTINC_SUBDIRS_PPC =
INSTINC_SUBDIRS_I386 =
ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h))
INSTALL_MI_LIST = IOPMLibDefs.h IOPM.h IOPMDeprecated.h
-INSTALL_MI_LCL_LIST = ""
+INSTALL_MI_LCL_LIST = IOPMPrivate.h
INSTALL_MI_DIR = $(MI_DIR)
EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS))
if (targetCPU) targetCPU->quiesceCPU();
}
+
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#define super IOService
goto finish;
}
- PE_parse_boot_arg("keepsyms", &keepsyms);
+ PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms));
IOLog("Jettisoning kernel linker.\n");
static UInt sMaxDBDMASegment;
if (!sMaxDBDMASegment) {
sMaxDBDMASegment = (UInt) -1;
- if (PE_parse_boot_arg("mseg", &sMaxDBDMASegment))
+ if (PE_parse_boot_argn("mseg", &sMaxDBDMASegment, sizeof (sMaxDBDMASegment)))
IOLog("Setting MaxDBDMASegment to %d\n", sMaxDBDMASegment);
}
#include <IOKit/IOPlatformExpert.h>
#include <IOKit/IOUserClient.h>
#include <IOKit/IOKitKeys.h>
+#include <kern/debug.h>
+#include <pexpert/pexpert.h>
#define super IOService
{"security-password", kOFVariableTypeData, kOFVariablePermRootOnly, -1},
{"boot-image", kOFVariableTypeData, kOFVariablePermUserWrite, -1},
{"com.apple.System.fp-state", kOFVariableTypeData, kOFVariablePermKernelOnly, -1},
+#if CONFIG_EMBEDDED
+ {"backlight-level", kOFVariableTypeData, kOFVariablePermUserWrite, -1},
+#endif
{0, kOFVariableTypeData, kOFVariablePermUserRead, -1}
};
// expert informs us we are the root.
// **********************************************************************************
-#define kRootDomainSettingsCount 14
+#define kRootDomainSettingsCount 16
static SYSCTL_STRUCT(_kern, OID_AUTO, sleeptime,
CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN,
OSSymbol::withCString(kIOPMSettingWakeOnACChangeKey),
OSSymbol::withCString(kIOPMSettingTimeZoneOffsetKey),
OSSymbol::withCString(kIOPMSettingDisplaySleepUsesDimKey),
- OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey)
+ OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey),
+ OSSymbol::withCString(kIOPMSettingGraphicsSwitchKey),
+ OSSymbol::withCString(kIOPMStateConsoleShutdown)
};
{
if ( stateNum == ON_STATE )
{
-#if HIBERNATION
// Direct callout into OSMetaClass so it can disable kmod unloads
// during sleep/wake to prevent deadlocks.
OSMetaClassSystemSleepOrWake( kIOMessageSystemHasPoweredOn );
- IOHibernateSystemPostWake();
+ if (getPowerState() == ON_STATE)
+ {
+ // this is a quick wake from aborted sleep
+ if (idleSeconds && !wrangler)
+ {
+ AbsoluteTime deadline;
+ sleepASAP = false;
+ // stay awake for at least idleSeconds
+ clock_interval_to_deadline(idleSeconds, kSecondScale, &deadline);
+ thread_call_enter_delayed(extraSleepTimer, deadline);
+ // this gets turned off when we sleep again
+ idleSleepPending = true;
+ }
+ tellClients(kIOMessageSystemWillPowerOn);
+ }
+#if HIBERNATION
+ else
+ {
+ IOHibernateSystemPostWake();
+ }
#endif
return tellClients(kIOMessageSystemHasPoweredOn);
}
return false;
// Override the mapper present flag is requested by boot arguments.
- if (PE_parse_boot_arg("dart", &debugFlags) && (debugFlags == 0))
+ if (PE_parse_boot_argn("dart", &debugFlags, sizeof (debugFlags)) && (debugFlags == 0))
removeProperty(kIOPlatformMapperPresentKey);
// Register the presence or lack thereof a system
const OSSymbol * gIOPlatformQuiesceActionKey;
const OSSymbol * gIOPlatformActiveActionKey;
+const OSSymbol * gIOPlatformFunctionHandlerSet;
+
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
#define LOCKREADNOTIFY() \
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
-#if __i386__
+#if defined(__i386__)
// Only used by the intel implementation of
-// IOService::requireMaxBusStall(UInt32 __unused ns)
-struct BusStallEntry
+// IOService::requireMaxBusStall(UInt32 ns)
+// IOService::requireMaxInterruptDelay(uint32_t ns)
+struct CpuDelayEntry
{
- const IOService *fService;
- UInt32 fMaxDelay;
+ IOService * fService;
+ UInt32 fMaxDelay;
+ UInt32 fDelayType;
+};
+
+enum {
+ kCpuDelayBusStall, kCpuDelayInterrupt,
+ kCpuNumDelayTypes
};
-static OSData *sBusStall = OSData::withCapacity(8 * sizeof(BusStallEntry));
-static IOLock *sBusStallLock = IOLockAlloc();
-#endif /* __i386__ */
+static OSData *sCpuDelayData = OSData::withCapacity(8 * sizeof(CpuDelayEntry));
+static IORecursiveLock *sCpuDelayLock = IORecursiveLockAlloc();
+static OSArray *sCpuLatencyHandlers[kCpuNumDelayTypes];
+const OSSymbol *sCPULatencyFunctionName[kCpuNumDelayTypes];
+
+static void
+requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType);
+static IOReturn
+setLatencyHandler(UInt32 delayType, IOService * target, bool enable);
+
+#endif /* defined(__i386__) */
/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
gIOPlatformQuiesceActionKey = OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey);
gIOPlatformActiveActionKey = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey);
+ gIOPlatformFunctionHandlerSet = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerSet);
+#if defined(__i386__)
+ sCPULatencyFunctionName[kCpuDelayBusStall] = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerMaxBusDelay);
+ sCPULatencyFunctionName[kCpuDelayInterrupt] = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerMaxInterruptDelay);
+#endif
gNotificationLock = IORecursiveLockAlloc();
assert( gIOServicePlane && gIODeviceMemoryKey
void *param3, void *param4 )
{
IOReturn result = kIOReturnUnsupported;
- IOService *provider = getProvider();
-
- if (provider != 0) {
+ IOService *provider;
+
+ if (gIOPlatformFunctionHandlerSet == functionName)
+ {
+#if defined(__i386__)
+ const OSSymbol * functionHandlerName = (const OSSymbol *) param1;
+ IOService * target = (IOService *) param2;
+ bool enable = (param3 != 0);
+
+ if (sCPULatencyFunctionName[kCpuDelayBusStall] == functionHandlerName)
+ result = setLatencyHandler(kCpuDelayBusStall, target, enable);
+ else if (sCPULatencyFunctionName[kCpuDelayInterrupt] == param1)
+ result = setLatencyHandler(kCpuDelayInterrupt, target, enable);
+#endif /* defined(__i386__) */
+ }
+
+ if ((kIOReturnUnsupported == result) && (provider = getProvider())) {
result = provider->callPlatformFunction(functionName, waitForFunction,
param1, param2, param3, param4);
}
void IOService::
setCPUSnoopDelay(UInt32 __unused ns)
{
-#if __i386__
+#if defined(__i386__)
ml_set_maxsnoop(ns);
-#endif /* __i386__ */
+#endif /* defined(__i386__) */
}
UInt32 IOService::
getCPUSnoopDelay()
{
-#if __i386__
+#if defined(__i386__)
return ml_get_maxsnoop();
#else
return 0;
-#endif /* __i386__ */
+#endif /* defined(__i386__) */
}
-void IOService::
-requireMaxBusStall(UInt32 __unused ns)
+#if defined(__i386__)
+static void
+requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType)
{
-#if __i386__
static const UInt kNoReplace = -1U; // Must be an illegal index
UInt replace = kNoReplace;
+ bool setCpuDelay = false;
- IOLockLock(sBusStallLock);
+ IORecursiveLockLock(sCpuDelayLock);
- UInt count = sBusStall->getLength() / sizeof(BusStallEntry);
- BusStallEntry *entries = (BusStallEntry *) sBusStall->getBytesNoCopy();
+ UInt count = sCpuDelayData->getLength() / sizeof(CpuDelayEntry);
+ CpuDelayEntry *entries = (CpuDelayEntry *) sCpuDelayData->getBytesNoCopy();
+ IOService * holder = NULL;
if (ns) {
- const BusStallEntry ne = {this, ns};
-
- // Set Maximum bus delay.
- for (UInt i = 0; i < count; i++) {
- const IOService *thisService = entries[i].fService;
- if (this == thisService)
- replace = i;
- else if (!thisService) {
- if (kNoReplace == replace)
- replace = i;
- }
- else {
- const UInt32 thisMax = entries[i].fMaxDelay;
- if (thisMax < ns)
- ns = thisMax;
- }
- }
-
- // Must be safe to call from locked context
- ml_set_maxbusdelay(ns);
-
- if (kNoReplace == replace)
- sBusStall->appendBytes(&ne, sizeof(ne));
- else
- entries[replace] = ne;
+ const CpuDelayEntry ne = {service, ns, delayType};
+ holder = service;
+ // Set maximum delay.
+ for (UInt i = 0; i < count; i++) {
+ IOService *thisService = entries[i].fService;
+ bool sameType = (delayType == entries[i].fDelayType);
+ if ((service == thisService) && sameType)
+ replace = i;
+ else if (!thisService) {
+ if (kNoReplace == replace)
+ replace = i;
+ }
+ else if (sameType) {
+ const UInt32 thisMax = entries[i].fMaxDelay;
+ if (thisMax < ns)
+ {
+ ns = thisMax;
+ holder = thisService;
+ }
+ }
+ }
+
+ setCpuDelay = true;
+ if (kNoReplace == replace)
+ sCpuDelayData->appendBytes(&ne, sizeof(ne));
+ else
+ entries[replace] = ne;
}
else {
- ns = -1U; // Set to max unsigned, i.e. no restriction
-
- for (UInt i = 0; i < count; i++) {
- // Clear a maximum bus delay.
- const IOService *thisService = entries[i].fService;
- UInt32 thisMax = entries[i].fMaxDelay;
- if (this == thisService)
- replace = i;
- else if (thisService && thisMax < ns)
- ns = thisMax;
+ ns = -1U; // Set to max unsigned, i.e. no restriction
+
+ for (UInt i = 0; i < count; i++) {
+ // Clear a maximum delay.
+ IOService *thisService = entries[i].fService;
+ if (thisService && (delayType == entries[i].fDelayType)) {
+ UInt32 thisMax = entries[i].fMaxDelay;
+ if (service == thisService)
+ replace = i;
+ else if (thisMax < ns) {
+ ns = thisMax;
+ holder = thisService;
+ }
+ }
+ }
+
+ // Check if entry found
+ if (kNoReplace != replace) {
+ entries[replace].fService = 0; // Null the entry
+ setCpuDelay = true;
+ }
+ }
+
+ if (setCpuDelay)
+ {
+ // Must be safe to call from locked context
+ if (delayType == kCpuDelayBusStall)
+ {
+ ml_set_maxbusdelay(ns);
+ }
+ else if (delayType == kCpuDelayInterrupt)
+ {
+ ml_set_maxintdelay(ns);
+ }
+
+ OSArray * handlers = sCpuLatencyHandlers[delayType];
+ IOService * target;
+ if (handlers) for (unsigned int idx = 0;
+ (target = (IOService *) handlers->getObject(idx));
+ idx++)
+ {
+ target->callPlatformFunction(sCPULatencyFunctionName[delayType], false,
+ (void *) (uintptr_t) ns, holder,
+ NULL, NULL);
}
+ }
- // Check if entry found
- if (kNoReplace != replace) {
- entries[replace].fService = 0; // Null the entry
- ml_set_maxbusdelay(ns);
+ IORecursiveLockUnlock(sCpuDelayLock);
+}
+
+static IOReturn
+setLatencyHandler(UInt32 delayType, IOService * target, bool enable)
+{
+ IOReturn result = kIOReturnNotFound;
+ OSArray * array;
+ unsigned int idx;
+
+ IORecursiveLockLock(sCpuDelayLock);
+
+ do
+ {
+ if (enable && !sCpuLatencyHandlers[delayType])
+ sCpuLatencyHandlers[delayType] = OSArray::withCapacity(4);
+ array = sCpuLatencyHandlers[delayType];
+ if (!array)
+ break;
+ idx = array->getNextIndexOfObject(target, 0);
+ if (!enable)
+ {
+ if (-1U != idx)
+ {
+ array->removeObject(idx);
+ result = kIOReturnSuccess;
+ }
+ }
+ else
+ {
+ if (-1U != idx) {
+ result = kIOReturnExclusiveAccess;
+ break;
+ }
+ array->setObject(target);
+
+ UInt count = sCpuDelayData->getLength() / sizeof(CpuDelayEntry);
+ CpuDelayEntry *entries = (CpuDelayEntry *) sCpuDelayData->getBytesNoCopy();
+ UInt32 ns = -1U; // Set to max unsigned, i.e. no restriction
+ IOService * holder = NULL;
+
+ for (UInt i = 0; i < count; i++) {
+ if (entries[i].fService
+ && (delayType == entries[i].fDelayType)
+ && (entries[i].fMaxDelay < ns)) {
+ ns = entries[i].fMaxDelay;
+ holder = entries[i].fService;
+ }
+ }
+ target->callPlatformFunction(sCPULatencyFunctionName[delayType], false,
+ (void *) (uintptr_t) ns, holder,
+ NULL, NULL);
+ result = kIOReturnSuccess;
}
}
+ while (false);
- IOLockUnlock(sBusStallLock);
-#endif /* __i386__ */
+ IORecursiveLockUnlock(sCpuDelayLock);
+
+ return (result);
+}
+
+#endif /* defined(__i386__) */
+
+void IOService::
+requireMaxBusStall(UInt32 __unused ns)
+{
+#if defined(__i386__)
+ requireMaxCpuDelay(this, ns, kCpuDelayBusStall);
+#endif
}
/*
#define NS_TO_MS(nsec) ((int)((nsec) / 1000000ULL))
+#if CONFIG_EMBEDDED
+#define SUPPORT_IDLE_CANCEL 1
+#endif
+
//*********************************************************************************
// PM machine states
//*********************************************************************************
unsigned long computedState;
unsigned long theDesiredState;
IOService * child;
+ IOPMRequest * childRequest;
if (!initialized)
return IOPMNotYetInitialized;
}
// Record the child's desires on the connection.
-
+#if SUPPORT_IDLE_CANCEL
+ bool attemptCancel = ((kIOPMPreventIdleSleep & desiredState) && !whichChild->getPreventIdleSleepFlag());
+#endif
whichChild->setDesiredDomainState( computedState );
whichChild->setPreventIdleSleepFlag( desiredState & kIOPMPreventIdleSleep );
whichChild->setPreventSystemSleepFlag( desiredState & kIOPMPreventSystemSleep );
if (!fWillAdjustPowerState && !fDeviceOverrides)
{
- IOPMRequest * childRequest;
-
childRequest = acquirePMRequest( this, kIOPMRequestTypeAdjustPowerState );
if (childRequest)
{
fWillAdjustPowerState = true;
}
}
+#if SUPPORT_IDLE_CANCEL
+ if (attemptCancel)
+ {
+ childRequest = acquirePMRequest( this, kIOPMRequestTypeIdleCancel );
+ if (childRequest)
+ {
+ submitPMRequest( childRequest );
+ }
+ }
+#endif
return IOPMNoErr;
}
// apps didn't respond in time
cleanClientResponses(true);
OUR_PMLog(kPMLogClientTardy, 0, 1);
- if (fMachineState == kIOPM_OurChangeTellClientsPowerDown)
- {
- // tardy equates to veto
- fDoNotPowerDown = true;
- }
+ // tardy equates to approval
done = true;
break;
return kIOReturnSuccess;
}
+ OSString * name = IOCopyLogNameForPID(proc_selfpid());
+ PM_ERROR("PM notification cancel (%s)\n", name ? name->getCStringNoCopy() : "");
+ if (name)
+ name->release();
+
request = acquirePMRequest( this, kIOPMRequestTypeCancelPowerChange );
if (!request)
{
case kIOPM_OurChangeTellClientsPowerDown:
// our change, was it vetoed?
- if (fDesiredPowerState > fHeadNoteState)
- {
- PM_DEBUG("%s: idle cancel\n", fName);
- fDoNotPowerDown = true;
- }
if (!fDoNotPowerDown)
{
// no, we can continue
}
else
{
+ OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
+ PM_ERROR("%s: idle cancel\n", fName);
// yes, rescind the warning
tellNoChangeDown(fHeadNoteState);
// mark the change note un-actioned
break;
case kIOPM_OurChangeTellPriorityClientsPowerDown:
- OurChangeTellPriorityClientsPowerDown();
+ // our change, should it be acted on still?
+#if SUPPORT_IDLE_CANCEL
+ if (fDoNotPowerDown)
+ {
+ OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
+ PM_ERROR("%s: idle revert\n", fName);
+ // no, tell clients we're back in the old state
+ tellChangeUp(fCurrentPowerState);
+ // mark the change note un-actioned
+ fHeadNoteFlags |= IOPMNotDone;
+ // and we're done
+ all_done();
+ }
+ else
+#endif
+ {
+ // yes, we can continue
+ OurChangeTellPriorityClientsPowerDown();
+ }
break;
case kIOPM_OurChangeNotifyInterestedDriversWillChange:
more = true;
break;
+#if SUPPORT_IDLE_CANCEL
+ case kIOPMRequestTypeIdleCancel:
+ if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown)
+ || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown))
+ {
+ OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, 0);
+ fDoNotPowerDown = true;
+ if (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown)
+ cleanClientResponses(false);
+ more = true;
+ }
+ break;
+#endif
+
default:
IOPanic("servicePMReplyQueue: unknown reply type");
}
kIOPMRequestTypeAckSetPowerState = 0x82,
kIOPMRequestTypeAllowPowerChange = 0x83,
kIOPMRequestTypeCancelPowerChange = 0x84,
- kIOPMRequestTypeInterestChanged = 0x85
+ kIOPMRequestTypeInterestChanged = 0x85,
+ kIOPMRequestTypeIdleCancel = 0x86
};
//*********************************************************************************
extern "C" {
extern void OSlibkernInit (void);
-extern void ml_hpet_cfg(uint32_t, uint32_t);
#include <kern/clock.h>
#include <sys/time.h>
OSCollectionIterator * kmodIter; // must release
OSString * kmodName; // don't release
- if( PE_parse_boot_arg( "io", &debugFlags ))
+ if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) ))
gIOKitDebug = debugFlags;
// Check for the log synchronous bit set in io
_kgm_update_loop
end
else
+ if ($kgm_mtype == 7)
set $newact = (struct thread *) $arg0
#This needs to identify 64-bit processes as well
set $newiss = (x86_saved_state32_t) ($newact->machine.pcb->iss.uss.ss_32)
_kgm_flush_loop
_kgm_update_loop
end
+ else
+ echo showuserstack not supported on this architecture\n
+ end
end
end
document showuserstack
flushstack
set $pc = $newact->machine->pcb.save_srr0
else
+ if ($kgm_mtype == 7)
set $kgm_cstatep = (struct x86_kernel_state32 *) \
($newact->kernel_stack + 0x4000 \
- sizeof(struct x86_kernel_state32))
loadcontext $kgm_cstatep
flushstack
+ else
+ echo switchtocorethread not supported on this architecture\n
+ end
end
showcontext_int
end
set $cr = $kgm_contextp.save_cr
set $ctr = $kgm_contextp.save_ctr
else
+ if ($kgm_mtype == 7)
set $kgm_contextp = (struct x86_kernel_state32 *) $arg0
set $ebx = $kgm_contextp->k_ebx
set $ebp = $kgm_contextp->k_ebp
set $esi = $kgm_contextp->k_esi
set $eip = $kgm_contextp->k_eip
set $pc = $kgm_contextp->k_eip
+ else
+ echo loadcontext not supported on this architecture\n
+ end
end
end
flushstack
set $pc = $kdpstatep->eip
update
+ else
+ echo resetcorectx not supported on this architecture\n
end
end
showcontext_int
_if_present mca_threshold_status_present
printf "\n%d error banks, ", mca_error_bank_count
printf "family code 0x%x, ", mca_family
- printf "machine-check exception taken: %d\n", mca_exception_taken
+ printf "machine-check dump state: %d\n", mca_dump_state
set $kgm_cpu = 0
while cpu_data_ptr[$kgm_cpu] != 0
set $kgm_mcp = cpu_data_ptr[$kgm_cpu]->cpu_mca_state
#include <sys/types.h>
#include <sys/systm.h>
+#include <libkern/OSAtomic.h>
#include <libkern/crypto/sha1.h>
#define memset(x, y, z) bzero(x, z);
static void SHA1Transform(u_int32_t, u_int32_t, u_int32_t, u_int32_t,
u_int32_t, const u_int8_t *, SHA1_CTX *);
+void _SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen);
+
void SHA1Final_r(SHA1_CTX *, void *);
+typedef kern_return_t (*InKernelPerformSHA1Func)(void *ref, const void *data, size_t dataLen, u_int32_t *inHash, u_int32_t options, u_int32_t *outHash, Boolean usePhysicalAddress);
+void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref);
+static void *SHA1Ref;
+InKernelPerformSHA1Func performSHA1WithinKernelOnly;
+#define SHA1_USE_HARDWARE_THRESHOLD 2048 //bytes
+
+
/*
* SHA1 initialization. Begins a SHA1 operation, writing a new context.
*/
* context.
*/
void
-SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
+_SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
{
u_int32_t i, index, partLen;
const unsigned char *input = (const unsigned char *)inpp;
memcpy(&context->buffer[index], &input[i], inputLen - i);
}
+
+
+
+/*
+ * This function is called by the SHA1 hardware kext during its init.
+ * This will register the function to call to perform SHA1 using hardware.
+ */
+void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref)
+{
+ if(option) {
+ // Establish the hook. The hardware is ready.
+ OSCompareAndSwap((uintptr_t)NULL, (uintptr_t)ref, (uintptr_t *)&SHA1Ref);
+
+ if(!OSCompareAndSwap((uintptr_t)NULL, (uintptr_t)func, (uintptr_t *)&performSHA1WithinKernelOnly)) {
+ panic("sha1_hardware_hook: Called twice.. Should never happen\n");
+ }
+ }
+ else {
+ // The hardware is going away. Tear down the hook.
+ performSHA1WithinKernelOnly = NULL;
+ SHA1Ref = NULL;
+ }
+}
+
+static u_int32_t SHA1UpdateWithHardware(SHA1_CTX *context, const unsigned char *data, size_t dataLen, Boolean usePhysicalAddress)
+{
+ u_int32_t *inHashBuffer = context->state;
+ u_int32_t options = 0;
+ int result;
+
+ result = performSHA1WithinKernelOnly(SHA1Ref, data, dataLen, inHashBuffer, options, inHashBuffer, usePhysicalAddress);
+ if(result != KERN_SUCCESS) {
+ //The hardware failed to hash for some reason. Fall back to software.
+ return 0;
+ }
+
+ //Update the context with the total length.
+ /* Update number of bits */
+ if ((context->bcount[1] += (dataLen << 3)) < (dataLen << 3))
+ context->bcount[0]++;
+ context->bcount[0] += (dataLen >> 29);
+ return dataLen;
+}
+
+/*
+ * This is function is only called in from the pagefault path or from page_copy().
+ * So we assume that we can safely convert the virtual address to the physical address and use it.
+ * Assumptions: The passed in address(inpp) is a kernel virtual address
+ * and a physical page has been faulted in.
+ * The inputLen passed in should always be less than or equal to a page size (4096)
+ * and inpp should be on a page boundary.
+ * "performSHA1WithinKernelOnly" is initialized only when the hardware driver exists and is ready.
+ */
+void SHA1UpdateUsePhysicalAddress(SHA1_CTX *context, const void *inpp, size_t inputLen)
+{
+ Boolean usePhysicalAddress = TRUE;
+ if((inputLen == PAGE_SIZE) && performSHA1WithinKernelOnly) { // If hardware exists and is ready.
+ if(SHA1UpdateWithHardware(context, (const unsigned char *)inpp, inputLen, usePhysicalAddress))
+ return;
+ //else for some reason the hardware failed..
+ //fall through to software and try the hash in software.
+ }
+ //Use the software implementation since the hardware is absent or
+ // has not been initialized yet or inputLen != PAGE_SIZE.
+ _SHA1Update(context, inpp, inputLen);
+}
+
+/*
+ * A wrapper around _SHA1Update() to pick between software or hardware based SHA1.
+ *
+ */
+void SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
+{
+ const unsigned char *input = (const unsigned char *)inpp;
+ Boolean usePhysicalAddress = FALSE;
+ u_int32_t index;
+
+ if((inputLen > SHA1_USE_HARDWARE_THRESHOLD) && performSHA1WithinKernelOnly) {
+ index = (context->bcount[1] >> 3) & 0x3F;
+ if(index != 0) { //bytes left in the context. Handle them first.
+ u_int32_t partLen = 64 - index;
+ memcpy(&context->buffer[index], input, partLen);
+ _SHA1Update(context, inpp, inputLen);
+ inputLen -= partLen;
+ input += partLen;
+ }
+
+ u_int32_t lenForHardware = inputLen & (~0x3F); //multiple of 64
+ u_int32_t bytesHashed = 0;
+ bytesHashed = SHA1UpdateWithHardware(context, input, lenForHardware, usePhysicalAddress);
+
+ inputLen -= bytesHashed;
+ input += bytesHashed;
+ }
+
+ //Fall through to the software implementation.
+ _SHA1Update(context, input, inputLen);
+}
+
/*
* For backwards compatibility, sha1_result symbol is mapped to this
* routine since it's equivalent to SHA1Final with reversed parameters.
#define APPLE_KEXT_VTABLE_PADDING 1
+#if defined(__LP64__)
+#define APPLE_KEXT_LEGACY_ABI 0
+#elif defined(__arm__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2))
+#define APPLE_KEXT_LEGACY_ABI 0
+#else
+#define APPLE_KEXT_LEGACY_ABI 1
+#endif
+
#if APPLE_KEXT_VTABLE_PADDING
#define APPLE_KEXT_PAD_METHOD virtual
#define APPLE_KEXT_PAD_IMPL(index) gMetaClass.reservedCalled(index)
#define OSCheckTypeInst(typeinst, inst) \
OSMetaClassBase::checkTypeInst(inst, typeinst)
+typedef void (*_ptf_t)(void);
+
+#if APPLE_KEXT_LEGACY_ABI
// Arcane evil code interprets a C++ pointer to function as specified in the
// -fapple-kext ABI, i.e. the gcc-2.95 generated code. IT DOES NOT ALLOW
// the conversion of functions that are from MULTIPLY inherited classes.
-typedef void (*_ptf_t)(void);
-
static inline _ptf_t
_ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void))
{
}
}
+#else /* !APPLE_KEXT_LEGACY_ABI */
+
+
+// Slightly less arcane and slightly less evil code to do
+// the same for kexts compiled with the standard Itanium C++
+// ABI
+
+static inline _ptf_t
+_ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void))
+{
+ union {
+ void (OSMetaClassBase::*fIn)(void);
+ uintptr_t fVTOffset;
+ _ptf_t fPFN;
+ } map;
+
+ map.fIn = func;
+
+ if (map.fVTOffset & 1) {
+ // virtual
+ union {
+ const OSMetaClassBase *fObj;
+ _ptf_t **vtablep;
+ } u;
+ u.fObj = self;
+
+ // Virtual member function so dereference vtable
+ return *(_ptf_t *)(((uintptr_t)*u.vtablep) + map.fVTOffset - 1);
+ } else {
+ // Not virtual, i.e. plain member func
+ return map.fPFN;
+ }
+}
+
+
+#endif /* !APPLE_KEXT_LEGACY_ABI */
+
/*! @function OSMemberFunctionCast
@abstract Convert a pointer to a member function to a c-style pointer to function. No warnings are generated.
@param type The type of pointer function desired.
extern void SHA1Init(SHA1_CTX *);
extern void SHA1Update(SHA1_CTX *, const void *, size_t);
+extern void SHA1UpdateUsePhysicalAddress(SHA1_CTX *context, const void *inpp, size_t inputLen);
extern void SHA1Final(void *, SHA1_CTX *);
#ifdef __cplusplus
goto finish;
}
- } else if (PE_parse_boot_arg("-x", namep)) { /* safe boot */
+ } else if (PE_parse_boot_argn("-x", namep, sizeof (namep))) { /* safe boot */
ineligible_for_safe_boot = true;
result = false;
goto finish;
goto finish;
}
+ if (0 == strcmp("com.apple.driver.AppleIntelCPUPowerManagement",
+ incumbentName->getCStringNoCopy())) {
+ /* Special rules. Always favor version 51.0.0 exactly at the
+ * expense of all other versions newer or older.
+ */
+ if(0 == strcmp(incumbentVersionString->getCStringNoCopy(), "51.0.0")) {
+ IOLog(VTYELLOW "Skipping duplicate extension \"%s\" with "
+ " version (%s -> %s).\n" VTRESET,
+ candidateName->getCStringNoCopy(),
+ candidateVersionString->getCStringNoCopy(),
+ incumbentVersionString->getCStringNoCopy());
+ winner = incumbent;
+ goto finish;
+ } else if (0 == strcmp(candidateVersionString->getCStringNoCopy(), "51.0.0")) {
+ IOLog(VTYELLOW "Skipping duplicate extension \"%s\" with "
+ " version (%s -> %s).\n" VTRESET,
+ candidateName->getCStringNoCopy(),
+ incumbentVersionString->getCStringNoCopy(),
+ candidateVersionString->getCStringNoCopy());
+ winner = candidate;
+ goto finish;
+ }
+ }
+
if (candidate_vers > incumbent_vers) {
IOLog(VTYELLOW "Replacing extension \"%s\" with newer version "
"(%s -> %s).\n" VTRESET,
#include <mach/message.h>
#include <mach/exception.h>
#include <mach/mig_errors.h>
-#include <mach-o/dyld.h>
+#include <dlfcn.h>
#include <stdlib.h>
__private_extern__ kern_return_t internal_catch_exception_raise (
static kern_return_t (*func)(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t);
if (checkForFunction == 0) {
checkForFunction = 1;
- _dyld_lookup_and_bind("_catch_exception_raise", (unsigned long *)&func, (void **)0);
+ func = dlsym(RTLD_DEFAULT, "catch_exception_raise");
}
if (func == 0) {
/* The user hasn't defined catch_exception_raise in their binary */
#include <mach/message.h>
#include <mach/exception.h>
#include <mach/mig_errors.h>
-#include <mach-o/dyld.h>
+#include <dlfcn.h>
#include <stdlib.h>
__private_extern__ kern_return_t internal_catch_exception_raise_state (
static kern_return_t (*func)(mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
if (checkForFunction == 0) {
checkForFunction = 1;
- _dyld_lookup_and_bind("_catch_exception_raise_state", (unsigned long *)&func, (void **)0);
+ func = dlsym(RTLD_DEFAULT, "catch_exception_raise_state");
}
if (func == 0) {
/* The user hasn't defined catch_exception_raise in their binary */
#include <mach/message.h>
#include <mach/exception.h>
#include <mach/mig_errors.h>
-#include <mach-o/dyld.h>
+#include <dlfcn.h>
#include <stdlib.h>
__private_extern__ kern_return_t internal_catch_exception_raise_state_identity (
static kern_return_t (*func)(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
if (checkForFunction == 0) {
checkForFunction = 1;
- _dyld_lookup_and_bind("_catch_exception_raise_state_identity", (unsigned long *)&func, (void **)0);
+ func = dlsym(RTLD_DEFAULT, "catch_exception_raise_state_identity");
}
if (func == 0) {
/* The user hasn't defined catch_exception_raise in their binary */
CAT = /bin/cat
MKDIR = /bin/mkdir -p
FIND = /usr/bin/find
+INSTALL = /usr/bin/install
TAR = /usr/bin/gnutar
STRIP = /usr/bin/strip
export CFLAGS_DEBUG =
export CFLAGS_PROFILE = -pg
-ifeq ($(ARCH_CONFIG),ARM)
-BUILD_STABS = 1
-endif
-
ifeq ($(BUILD_STABS),1)
export CFLAGS_PPC = -Dppc -DPPC -D__PPC__ -DPAGE_SIZE_FIXED \
-mno-altivec -gstabs+ -force_cpusubtype_ALL
./incmidir/$${filename_strip}; \
if [ -s ./incmidir/$${filename_strip} ]; \
then ( \
- install $(INSTALL_FLAGS) ./incmidir/$${filename} $(dir $@);\
+ $(INSTALL) $(INSTALL_FLAGS) ./incmidir/$${filename} $(dir $@);\
); \
else \
echo Header file $< not exported; \
./kincmidir/$${filename_strip}; \
if [ -s ./kincmidir/$${filename_strip} ]; \
then ( \
- install $(INSTALL_FLAGS) ./kincmidir/$${filename} $(dir $@);\
+ $(INSTALL) $(INSTALL_FLAGS) ./kincmidir/$${filename} $(dir $@);\
); \
else \
echo Header file $< not exported; \
./pincmidir/$${filename_strip}; \
if [ -s ./pincmidir/$${filename_strip} ]; \
then ( \
- install $(INSTALL_FLAGS) ./pincmidir/$${filename} $(dir $@);\
+ $(INSTALL) $(INSTALL_FLAGS) ./pincmidir/$${filename} $(dir $@);\
); \
else \
echo Header file $< not exported; \
./kpincmidir/$${filename_strip}; \
if [ -s ./kpincmidir/$${filename_strip} ]; \
then ( \
- install $(INSTALL_FLAGS) ./kpincmidir/$${filename} $(dir $@);\
+ $(INSTALL) $(INSTALL_FLAGS) ./kpincmidir/$${filename} $(dir $@);\
); \
else \
echo Header file $< not exported; \
./incdir/$${filename_strip}; \
if [ -s ./incdir/$${filename_strip} ]; \
then ( \
- install $(INSTALL_FLAGS) ./incdir/$${filename} $(dir $@);\
+ $(INSTALL) $(INSTALL_FLAGS) ./incdir/$${filename} $(dir $@);\
); \
else \
echo Header file $< not exported; \
./kincdir/$${filename_strip}; \
if [ -s ./kincdir/$${filename_strip} ]; \
then ( \
- install $(INSTALL_FLAGS) ./kincdir/$${filename} $(dir $@);\
+ $(INSTALL) $(INSTALL_FLAGS) ./kincdir/$${filename} $(dir $@);\
); \
else \
echo Header file $< not exported; \
./pincdir/$${filename_strip}; \
if [ -s ./pincdir/$${filename_strip} ]; \
then ( \
- install $(INSTALL_FLAGS) ./pincdir/$${filename} $(dir $@);\
+ $(INSTALL) $(INSTALL_FLAGS) ./pincdir/$${filename} $(dir $@);\
); \
else \
echo Header file $< not exported; \
./kpincdir/$${filename_strip}; \
if [ -s ./kpincdir/$${filename_strip} ]; \
then ( \
- install $(INSTALL_FLAGS) ./kpincdir/$${filename} $(dir $@);\
+ $(INSTALL) $(INSTALL_FLAGS) ./kpincdir/$${filename} $(dir $@);\
); \
else \
echo Header file $< not exported; \
./incmidir/$$j.strip; \
if [ -s ./incmidir/$$j.strip ]; \
then ( \
- install $(INSTALL_FLAGS) ./incmidir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \
+ $(INSTALL) $(INSTALL_FLAGS) ./incmidir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR); \
); \
else \
echo Header file $$j not exported; \
./pincmidir/$$j.strip; \
if [ -s ./pincmidir/$$j.strip ]; \
then ( \
- install $(INSTALL_FLAGS) ./pincmidir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \
+ $(INSTALL) $(INSTALL_FLAGS) ./pincmidir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \
); \
else \
echo Header file $$j not exported; \
./kincmidir/$$j.strip; \
if [ -s ./kincmidir/$$j.strip ]; \
then ( \
- install $(INSTALL_FLAGS) ./kincmidir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \
+ $(INSTALL) $(INSTALL_FLAGS) ./kincmidir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \
); \
else \
echo Header file $$j not exported; \
./kpincmidir/$$j.strip; \
if [ -s ./kpincmidir/$$j.strip ]; \
then ( \
- install $(INSTALL_FLAGS) ./kpincmidir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR); \
+ $(INSTALL) $(INSTALL_FLAGS) ./kpincmidir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR); \
); \
else \
echo Header file $$j not exported; \
./incdir/$$j.strip; \
if [ -s ./incdir/$$j.strip ]; \
then ( \
- install $(INSTALL_FLAGS) ./incdir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \
+ $(INSTALL) $(INSTALL_FLAGS) ./incdir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR); \
); \
else \
echo Header file $$j not exported; \
./pincdir/$$j.strip; \
if [ -s ./pincdir/$$j.strip ]; \
then ( \
- install $(INSTALL_FLAGS) ./pincdir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \
+ $(INSTALL) $(INSTALL_FLAGS) ./pincdir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR); \
); \
else \
echo Header file $$j not exported; \
./kincdir/$$j.strip; \
if [ -s ./kincdir/$$j.strip ]; \
then ( \
- install $(INSTALL_FLAGS) ./kincdir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR); \
+ $(INSTALL) $(INSTALL_FLAGS) ./kincdir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR); \
); \
else \
echo Header file $$j not exported; \
./kpincdir/$$j.strip; \
if [ -s ./kpincdir/$$j.strip ]; \
then ( \
- install $(INSTALL_FLAGS) ./kpincdir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \
+ $(INSTALL) $(INSTALL_FLAGS) ./kpincdir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \
); \
else \
echo Header file $$j not exported; \
# mach_kernel building rules
#
do_build_mach_kernel: $(OBJPATH)/kgmacros
- $(_v)install $(DATA_INSTALL_FLAGS) $(SRCROOT)/config/version.c $(OBJPATH)/version.c;
+ $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $(SRCROOT)/config/version.c $(OBJPATH)/version.c;
$(_v)$(SRCROOT)/config/newvers.pl $(OBJPATH)/version.c > /dev/null;
@echo CC version.o
$(_v)${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS}} ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} $(OBJPATH)/version.c -o $(OBJPATH)/version.o
$(_v)$(STRIP) $(STRIP_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel
$(OBJPATH)/kgmacros: $(SRCROOT)/kgmacros
- $(_v)$(CP) $? $@
+ $(_v)$(INSTALL) $(INSTALL_FLAGS) $? $@
# Special rules to install machine configuration variants
fi; \
if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
$(RM) $(RMFLAGS) $@; \
- install $(FILE_INSTALL_FLAGS) $< $@; \
+ $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \
else \
if [ ! -e $@ ]; then \
echo >empty_file_$(notdir $@); \
fi; \
if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
$(RM) $(RMFLAGS) $@; \
- install $(FILE_INSTALL_FLAGS) $< $@; \
+ $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \
+ if [ $(BUILD_DWARF) -eq 1 ]; then \
+ $(RM) -rf $@.dSYM; \
+ $(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR); \
+ $(INSTALL) $(INSTALL_FLAGS) \
+ $<.dSYM/$(DSYMBUILDDIR)/$(notdir $<) \
+ $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@); \
+ fi; \
else \
if [ ! -e $@ ]; then \
echo >empty_file_$(notdir $@); \
fi; \
if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
$(RM) $(RMFLAGS) $@; \
- install $(FILE_INSTALL_FLAGS) $< $@; \
+ $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@; \
else \
if [ ! -e $@ ]; then \
echo >empty_file_$(notdir $@); \
-exec $(RM) -rf {} \; ; \
$(CTFMERGE) -l xnu -o $<.ctfsys \
$(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \
- install $(FILE_INSTALL_FLAGS) $<.ctfsys $(dir $@); \
+ $(INSTALL) $(FILE_INSTALL_FLAGS) $<.ctfsys $(dir $@); \
else \
if [ ! -e $@.ctfsys ]; then \
echo >empty_file_$(notdir $@); \
fi; \
if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
$(RM) $(RMFLAGS) $@; \
- install $(INSTALL_FLAGS) $< $@; \
+ $(INSTALL) $(INSTALL_FLAGS) $< $@; \
if [ $(BUILD_DWARF) -eq 1 ]; then \
$(DSYMUTIL) $(DSYMUTIL_FLAGS) \
$(TARGET)/mach_kernel.sys \
-o $(TARGET)/mach_kernel.sys.dSYM; \
$(RM) -rf $@.dSYM; \
$(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR); \
- install $(INSTALL_FLAGS) \
+ $(INSTALL) $(INSTALL_FLAGS) \
$<.dSYM/$(DSYMBUILDDIR)/$(notdir $<) \
$@.dSYM/$(DSYMBUILDDIR)/$(notdir $@); \
fi; \
-o $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@); \
fi; \
fi
- $(CP) $(SOURCE)kgmacros $(SYMROOT)$(INSTALL_FILE_DIR)
+ $(INSTALL) $(INSTALL_FLAGS) $(SOURCE)kgmacros $(SYMROOT)$(INSTALL_FILE_DIR)
INSTALL_DATA_FILES = $(addprefix $(DSTROOT)$(INSTALL_DATA_DIR), $(INSTALL_DATA_LIST))
@echo Installing $< in $@;
$(_v)[ -d $(dir $@) ] ||$(MKDIR) $(dir $@); \
$(RM) $(RMFLAGS) $@; \
- install $(DATA_INSTALL_FLAGS) $< $(dir $@);
+ $(INSTALL) $(DATA_INSTALL_FLAGS) $< $(dir $@);
setup_build_install:
@echo "[ $(SOURCE) ] make setup_build_install $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
$(MKDIR) $$man_dir; \
fi; \
echo Installing $(INSTALL_MAN_LIST) in $$man_dir; \
- install $(INSTALL_FLAGS) $(INSTALL_MAN_LIST) $$man_dir; \
+ $(INSTALL) $(INSTALL_FLAGS) $(INSTALL_MAN_LIST) $$man_dir; \
if [ -n "$(strip $(INSTALL_MAN_LINKS))" ]; then \
set `echo ${INSTALL_MAN_LINKS}`; \
while : ; do \
@true echo Installing $< in $(dir $@)
$(_v)$(MKDIR) $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR); \
$(RM) $(RMFLAGS) $@; \
- install $(INSTALL_FLAGS) $< $(dir $@);
+ $(INSTALL) $(INSTALL_FLAGS) $< $(dir $@);
ifeq ($(INCL_MAKEDEP), TRUE)
-include Makedep
#include <i386/cpu_data.h>
#include <i386/machine_routines.h>
#include <i386/perfmon.h>
+#include <i386/lapic.h>
#include <i386/mp.h>
#include <i386/trap.h>
#include <mach/i386/syscall_sw.h>
#include <chud/chud_thread.h>
#include <i386/misc_protos.h>
+#include <i386/lapic.h>
#include <i386/mp.h>
#include <i386/machine_cpu.h>
#
options CONFIG_EMBEDDED # <config_embedded>
+# only execute signed code. Hang this off config_embedded since there's
+# nothing more appropriate right now
+#
+options CONFIG_ENFORCE_SIGNED_CODE # <config_embedded>
+
# jettison_kernel_linker - jettison kernel linker after kernel init; don't wait for kextd to launch
options CONFIG_JETTISON_KERNEL_LINKER # <jettison_kernel_linker>
# secure_kernel - secure kernel from user programs
options SECURE_KERNEL # <secure_kernel>
+
+#
+# code decryption... used on embedded for app protection
+# must be set in all the bsd/conf and osfmk/conf MASTER files
+#
+options CONFIG_CODE_DECRYPTION # <config_embedded>
# Standard Apple MacOS X Configurations:
# -------- ---- -------- ---------------
#
-# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto config_dtrace]
+# RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto config_dtrace]
# DEBUG_KDP = [ RELEASE osf_debug debug ]
# DEBUG= [ RELEASE osf_debug debug mach_kdb mach_assert ]
# PROFILE = [ RELEASE profile ]
#
-# EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto ]
+# EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto ]
# EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ]
# DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_dtrace ]
#
options DDB # Inline debugger # <debug>
options MACH_KDB # # <mach_kdb>
options MACH_KDP # KDP # <mach_kdp>
+options CONFIG_SERIAL_KDP # KDP over serial # <config_serial_kdp>
options PAE
options X86_64
options DISPATCH_COUNTS
#
options CONFIG_MACF # Mandatory Access Control Framework
#options CONFIG_MACF_MACH # MACF applied to Mach services
+
+#
+# code decryption... used on i386 for DSMOS
+# must be set in all the bsd/conf and osfmk/conf MASTER files
+#
+options CONFIG_CODE_DECRYPTION
OPTIONS/mach_kdb optional mach_kdb
OPTIONS/mach_kgdb optional mach_kgdb
OPTIONS/mach_kdp optional mach_kdp
+OPTIONS/config_serial_kdp optional config_serial_kdp
OPTIONS/mach_kprof optional mach_kprof
OPTIONS/mach_ldebug optional mach_ldebug
OPTIONS/mach_mp_debug optional mach_mp_debug
osfmk/ddb/tr.c optional mach_tr
osfmk/kdp/kdp.c optional mach_kdp
osfmk/kdp/kdp_udp.c optional mach_kdp
+osfmk/kdp/kdp_serial.c optional config_serial_kdp
osfmk/ipc/ipc_entry.c standard
osfmk/ipc/ipc_hash.c standard
osfmk/ipc/ipc_init.c standard
osfmk/i386/user_ldt.c standard
osfmk/i386/Diagnostics.c standard
osfmk/i386/pmCPU.c standard
-osfmk/i386/hpet.c standard
osfmk/i386/tsc.c standard
osfmk/i386/commpage/commpage.c standard
osfmk/i386/AT386/conf.c standard
osfmk/i386/AT386/model_dep.c standard
+osfmk/i386/lapic.c standard
osfmk/i386/mp.c standard
osfmk/i386/mp_slave_boot.s standard
static int pixels_needed_to_blit_digit( int digit );
static void blit_digit( int digit );
static const char * strnstr(const char * s, const char * find, size_t slen);
-static void dim_screen(void);
+void dim_screen(void);
static void panic_blit_rect(unsigned int x, unsigned int y, unsigned int width,
unsigned int height, int transparent,
const unsigned char * dataPtr);
}
-static void
+void
dim_screen(void)
{
unsigned long *p, *endp, *row;
new_vinfo.v_baseaddr = newVideoVirt + boot_vinfo->v_offset; /* Set the new framebuffer address */
else
new_vinfo.v_baseaddr = lastVideoVirt + boot_vinfo->v_offset; /* Set the new framebuffer address */
-
+
/* Update the vinfo structure atomically with respect to the vc_progress task if running */
if (vc_progress)
{
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
.fcn = db_apic,
.flag = CS_MORE,
},
- {
- .name = "hp",
- .fcn = db_hpet,
- .flag = CS_MORE,
- },
#endif /* !__ppc__ */
#if defined(__ppc__)
{
if (size == 0) {
ASSERT(unavail_size);
+ ps_clunmap(vs, offset, unavail_size);
cnt -= unavail_size;
offset += unavail_size;
if((offset & ((vm_page_size << vs->vs_clshift) - 1))
*/
write_vsmap = *vsmap_ptr;
*vsmap_ptr = read_vsmap;
+ ps_clunmap(vs, offset, size);
} else {
/* discard the old backing object */
write_vsmap = *vsmap_ptr;
dp_memory_object_data_initialize,
dp_memory_object_data_unlock,
dp_memory_object_synchronize,
- dp_memory_object_unmap,
+ dp_memory_object_map,
+ dp_memory_object_last_unmap,
"default pager"
};
}
kern_return_t
-dp_memory_object_unmap(
- __unused memory_object_t mem_obj)
+dp_memory_object_map(
+ __unused memory_object_t mem_obj,
+ __unused vm_prot_t prot)
{
- panic("dp_memory_object_unmap");
+ panic("dp_memory_object_map");
+ return KERN_FAILURE;
+}
+kern_return_t
+dp_memory_object_last_unmap(
+ __unused memory_object_t mem_obj)
+{
+ panic("dp_memory_object_last_unmap");
return KERN_FAILURE;
}
halt_in_debugger = halt_in_debugger ? 0 : 1;
#endif
- if (PE_parse_boot_arg("debug", &boot_arg)) {
+ if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) {
if (boot_arg & DB_HALT) halt_in_debugger=1;
if (boot_arg & DB_PRT) disable_debug_output=FALSE;
if (boot_arg & DB_SLOG) systemLogDiags=TRUE;
if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
}
- if (!PE_parse_boot_arg("nvram_paniclog", &commit_paniclog_to_nvram))
+ if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
commit_paniclog_to_nvram = 1;
/*
* Entering the debugger will put the CPUs into a "safe"
* power mode.
*/
- if (PE_parse_boot_arg("pmsafe_debug", &boot_arg))
+ if (PE_parse_boot_argn("pmsafe_debug", &boot_arg, sizeof (boot_arg)))
pmsafe_debug = boot_arg;
#if NOTYET
}
#endif /* MACH_KDB */
- if (PE_parse_boot_arg("preempt", &boot_arg)) {
+ if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) {
default_preemption_rate = boot_arg;
}
- if (PE_parse_boot_arg("unsafe", &boot_arg)) {
+ if (PE_parse_boot_argn("unsafe", &boot_arg, sizeof (boot_arg))) {
max_unsafe_quanta = boot_arg;
}
- if (PE_parse_boot_arg("poll", &boot_arg)) {
+ if (PE_parse_boot_argn("poll", &boot_arg, sizeof (boot_arg))) {
max_poll_quanta = boot_arg;
}
- if (PE_parse_boot_arg("yield", &boot_arg)) {
+ if (PE_parse_boot_argn("yield", &boot_arg, sizeof (boot_arg))) {
sched_poll_yield_shift = boot_arg;
}
- if (PE_parse_boot_arg("idlehalt", &boot_arg)) {
+ if (PE_parse_boot_argn("idlehalt", &boot_arg, sizeof (boot_arg))) {
idlehalt = boot_arg;
}
/* The I/O port to issue a read from, in the event of a panic. Useful for
* triggering logic analyzers.
*/
- if (PE_parse_boot_arg("panic_io_port", &boot_arg)) {
+ if (PE_parse_boot_argn("panic_io_port", &boot_arg, sizeof (boot_arg))) {
/*I/O ports range from 0 through 0xFFFF */
panic_io_port = boot_arg & 0xffff;
}
pbtcpu = cpu_number();
}
- PE_parse_boot_arg("keepsyms", &keepsyms);
+ PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms));
if (msg != NULL) {
kdb_printf(msg);
/*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2005-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <i386/mp.h>
#include <i386/pmCPU.h>
#include <i386/tsc.h>
-#include <i386/hpet.h>
#include <mach/i386/syscall_sw.h>
extern uint64_t lastNapClear;
cpu_topology.h \
cpuid.h \
eflags.h \
- hpet.h \
io_map_entries.h \
+ lapic.h \
lock.h \
locks.h \
machine_routines.h \
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <i386/vmx/vmx_cpu.h>
#include <i386/acpi.h>
#include <i386/fpu.h>
+#include <i386/lapic.h>
#include <i386/mp.h>
#include <i386/mp_desc.h>
#include <i386/serial_io.h>
-#include <i386/hpet.h>
#include <i386/machine_check.h>
+#include <i386/pmCPU.h>
#include <kern/cpu_data.h>
#include <console/serial_protos.h>
data.refcon = refcon;
#endif
- /* Save HPET state */
- hpet_save();
+ /* Save power management timer state */
+ pmTimerSave();
/*
* Turn off VT, otherwise switching to legacy mode will fail
/* set up PAT following boot processor power up */
pat_init();
+ /*
+ * Go through all of the CPUs and mark them as requiring
+ * a full restart.
+ */
+ pmMarkAllCPUsOff();
+
/* let the realtime clock reset */
rtc_sleep_wakeup(acpi_sleep_abstime);
/* re-enable and re-init local apic */
if (lapic_probe())
- lapic_init();
+ lapic_configure();
+
+ /* Restore power management register state */
+ pmCPUMarkRunning(current_cpu_datap());
- /* Restore HPET state */
- hpet_restore();
+ /* Restore power management timer state */
+ pmTimerRestore();
/* Restart tick interrupts from the LAPIC timer */
rtc_lapic_start_ticking();
/*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#ifndef _I386_APIC_H_
#define _I386_APIC_H_
-#define LAPIC_START 0xFEE00000
-#define LAPIC_SIZE 0x00000400
-
-#define LAPIC_ID 0x00000020
-#define LAPIC_ID_SHIFT 24
-#define LAPIC_ID_MASK 0x0F
-#define LAPIC_VERSION 0x00000030
-#define LAPIC_VERSION_MASK 0xFF
-#define LAPIC_TPR 0x00000080
-#define LAPIC_TPR_MASK 0xFF
-#define LAPIC_APR 0x00000090
-#define LAPIC_APR_MASK 0xFF
-#define LAPIC_PPR 0x000000A0
-#define LAPIC_PPR_MASK 0xFF
-#define LAPIC_EOI 0x000000B0
-#define LAPIC_REMOTE_READ 0x000000C0
-#define LAPIC_LDR 0x000000D0
-#define LAPIC_LDR_SHIFT 24
-#define LAPIC_DFR 0x000000E0
-#define LAPIC_DFR_FLAT 0xFFFFFFFF
-#define LAPIC_DFR_CLUSTER 0x0FFFFFFF
-#define LAPIC_DFR_SHIFT 28
-#define LAPIC_SVR 0x000000F0
-#define LAPIC_SVR_MASK 0x0FF
-#define LAPIC_SVR_ENABLE 0x100
-#define LAPIC_SVR_FOCUS_OFF 0x200
-#define LAPIC_ISR_BASE 0x00000100
-#define LAPIC_TMR_BASE 0x00000180
-#define LAPIC_IRR_BASE 0x00000200
-#define LAPIC_ERROR_STATUS 0x00000280
-#define LAPIC_ICR 0x00000300
-#define LAPIC_ICR_VECTOR_MASK 0x000FF
-#define LAPIC_ICR_DM_MASK 0x00700
-#define LAPIC_ICR_DM_FIXED 0x00000
-#define LAPIC_ICR_DM_LOWEST 0x00100
-#define LAPIC_ICR_DM_SMI 0x00200
-#define LAPIC_ICR_DM_REMOTE 0x00300
-#define LAPIC_ICR_DM_NMI 0x00400
-#define LAPIC_ICR_DM_INIT 0x00500
-#define LAPIC_ICR_DM_STARTUP 0x00600
-#define LAPIC_ICR_DM_LOGICAL 0x00800
-#define LAPIC_ICR_DS_PENDING 0x01000
-#define LAPIC_ICR_LEVEL_ASSERT 0x04000
-#define LAPIC_ICR_TRIGGER_LEVEL 0x08000
-#define LAPIC_ICR_RR_MASK 0x30000
-#define LAPIC_ICR_RR_INVALID 0x00000
-#define LAPIC_ICR_RR_INPROGRESS 0x10000
-#define LAPIC_ICR_RR_VALID 0x20000
-#define LAPIC_ICR_DSS_MASK 0xC0000
-#define LAPIC_ICR_DSS_DEST 0x00000
-#define LAPIC_ICR_DSS_SELF 0x40000
-#define LAPIC_ICR_DSS_ALL 0x80000
-#define LAPIC_ICR_DSS_OTHERS 0xC0000
-#define LAPIC_ICRD 0x00000310
-#define LAPIC_ICRD_DEST_SHIFT 24
-#define LAPIC_LVT_TIMER 0x00000320
-#define LAPIC_LVT_THERMAL 0x00000330
-#define LAPIC_LVT_PERFCNT 0x00000340
-#define LAPIC_LVT_LINT0 0x00000350
-#define LAPIC_LVT_LINT1 0x00000360
-#define LAPIC_LVT_ERROR 0x00000370
-#define LAPIC_LVT_VECTOR_MASK 0x000FF
-#define LAPIC_LVT_DM_SHIFT 8
-#define LAPIC_LVT_DM_MASK 0x00007
-#define LAPIC_LVT_DM_FIXED 0x00000
-#define LAPIC_LVT_DM_NMI 0x00400
-#define LAPIC_LVT_DM_EXTINT 0x00700
-#define LAPIC_LVT_DS_PENDING 0x01000
-#define LAPIC_LVT_IP_PLRITY_LOW 0x02000
-#define LAPIC_LVT_REMOTE_IRR 0x04000
-#define LAPIC_LVT_TM_LEVEL 0x08000
-#define LAPIC_LVT_MASKED 0x10000
-#define LAPIC_LVT_PERIODIC 0x20000
-#define LAPIC_TIMER_INITIAL_COUNT 0x00000380
-#define LAPIC_TIMER_CURRENT_COUNT 0x00000390
-#define LAPIC_TIMER_DIVIDE_CONFIG 0x000003E0
-/* divisor encoded by bits 0,1,3 with bit 2 always 0: */
-#define LAPIC_TIMER_DIVIDE_MASK 0x0000000F
-#define LAPIC_TIMER_DIVIDE_2 0x00000000
-#define LAPIC_TIMER_DIVIDE_4 0x00000001
-#define LAPIC_TIMER_DIVIDE_8 0x00000002
-#define LAPIC_TIMER_DIVIDE_16 0x00000003
-#define LAPIC_TIMER_DIVIDE_32 0x00000008
-#define LAPIC_TIMER_DIVIDE_64 0x00000009
-#define LAPIC_TIMER_DIVIDE_128 0x0000000A
-#define LAPIC_TIMER_DIVIDE_1 0x0000000B
-
-#ifndef ASSEMBLER
-#include <stdint.h>
-typedef enum {
- periodic,
- one_shot
-} lapic_timer_mode_t;
-typedef enum {
- divide_by_1 = LAPIC_TIMER_DIVIDE_1,
- divide_by_2 = LAPIC_TIMER_DIVIDE_2,
- divide_by_4 = LAPIC_TIMER_DIVIDE_4,
- divide_by_8 = LAPIC_TIMER_DIVIDE_8,
- divide_by_16 = LAPIC_TIMER_DIVIDE_16,
- divide_by_32 = LAPIC_TIMER_DIVIDE_32,
- divide_by_64 = LAPIC_TIMER_DIVIDE_64,
- divide_by_128 = LAPIC_TIMER_DIVIDE_128
-} lapic_timer_divide_t;
-typedef uint32_t lapic_timer_count_t;
-#endif /* ASSEMBLER */
-
#define IOAPIC_START 0xFEC00000
#define IOAPIC_SIZE 0x00000020
thread_compose_cthread_desc(unsigned int addr, pcb_t pcb);
void IOSleep(int);
+extern void throttle_lowpri_io(boolean_t);
void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
if (current_thread()->funnel_lock)
(void) thread_funnel_set(current_thread()->funnel_lock, FALSE);
+ throttle_lowpri_io(TRUE);
+
thread_exception_return();
/* NOTREACHED */
}
if (current_thread()->funnel_lock)
(void) thread_funnel_set(current_thread()->funnel_lock, FALSE);
+ throttle_lowpri_io(TRUE);
+
thread_exception_return();
/* NOTREACHED */
}
retval, 0, 0, 0, 0);
regs->eax = retval;
+ throttle_lowpri_io(TRUE);
+
thread_exception_return();
/* NOTREACHED */
}
(call_number)) | DBG_FUNC_END,
(int)regs->rax, 0, 0, 0, 0);
+ throttle_lowpri_io(TRUE);
+
thread_exception_return();
/* NOTREACHED */
}
jz 0b
rdtsc /* get TSC in %edx:%eax */
+ lfence
+
subl _COMM_PAGE_NT_TSC_BASE,%eax
sbbl _COMM_PAGE_NT_TSC_BASE+4,%edx
testl %r8d,%r8d // if 0, data is being changed...
jz 1b // ...so loop until stable
rdtsc // edx:eax := tsc
+ lfence
shlq $32,%rdx // rax := ((edx << 32) | eax), ie 64-bit tsc
orq %rdx,%rax
subq _NT_TSC_BASE(%rsi), %rax // rax := (tsc - base_tsc)
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
if (cpu == cpu_number()) {
cpu_machine_init();
return KERN_SUCCESS;
- } else {
+ }
+
+ /*
+ * Try to bring the CPU back online without a reset.
+ * If the fast restart doesn't succeed, fall back to
+ * the slow way.
+ */
+ ret = intel_startCPU_fast(cpu);
+ if (ret != KERN_SUCCESS) {
/*
* Should call out through PE.
* But take the shortcut here.
*/
ret = intel_startCPU(cpu);
- return(ret);
}
+
+ if (ret != KERN_SUCCESS)
+ kprintf("cpu: cpu_start(%d) returning failure!\n", cpu);
+
+ return(ret);
}
void
cpu_data_t *cdp = cpu_datap(cpu);
simple_lock(&x86_topo_lock);
- while (!cdp->lcpu.halted) {
+ while ((cdp->lcpu.state != LCPU_HALT)
+ && (cdp->lcpu.state != LCPU_OFF)) {
simple_unlock(&x86_topo_lock);
cpu_pause();
simple_lock(&x86_topo_lock);
boolean_t has_expired;
} rtclock_timer_t;
-typedef struct rtc_nanotime {
- uint64_t tsc_base; /* timestamp */
- uint64_t ns_base; /* nanoseconds */
- uint32_t scale; /* tsc -> nanosec multiplier */
- uint32_t shift; /* tsc -> nanosec shift/div */
- /* shift is overloaded with
- * lower 32bits of tsc_freq
- * on slower machines (SLOW_TSC_THRESHOLD) */
- uint32_t generation; /* 0 == being updated */
- uint32_t spare1;
-} rtc_nanotime_t;
-
-#define SLOW_TSC_THRESHOLD 1000067800 /* TSC is too slow for regular nanotime() algorithm */
-
typedef struct {
struct i386_tss *cdi_ktss;
uint64_t *cpu_physwindow_ptep;
void *cpu_hi_iss;
boolean_t cpu_tlb_invalid;
- uint32_t cpu_hwIntCnt[256]; /* Interrupt counts */
+ uint32_t cpu_hwIntCnt[256]; /* Interrupt counts */
uint64_t cpu_dr7; /* debug control register */
uint64_t cpu_int_event_time; /* intr entry/exit time */
vmx_cpu_t cpu_vmx; /* wonderful world of virtualization */
* arg store
* validity flag.
*/
-
+ rtc_nanotime_t *cpu_nanotime; /* Nanotime info */
} cpu_data_t;
#include <i386/perfmon.h>
#include <i386/pmCPU.h>
+//#define TOPO_DEBUG 1
+#if TOPO_DEBUG
+void debug_topology_print(void);
+#define DBG(x...) kprintf("DBG: " x)
+#else
+#define DBG(x...)
+#endif /* TOPO_DEBUG */
+
#define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
#define bitfield(x,h,l) (((x) & bitmask(h,l)) >> l)
-/*
- * Kernel parameter determining whether threads are halted unconditionally
- * in the idle state. This is the default behavior.
- * See machine_idle() for use.
- */
-int idlehalt = 1;
-
-x86_pkg_t *x86_pkgs = NULL;
-uint32_t num_packages = 0;
+x86_pkg_t *x86_pkgs = NULL;
uint32_t num_Lx_caches[MAX_CACHE_DEPTH] = { 0 };
static x86_pkg_t *free_pkgs = NULL;
+static x86_die_t *free_dies = NULL;
static x86_core_t *free_cores = NULL;
+static uint32_t num_dies = 0;
static x86_cpu_cache_t *x86_caches = NULL;
static uint32_t num_caches = 0;
+static boolean_t topoParmsInited = FALSE;
+x86_topology_parameters_t topoParms;
+
decl_simple_lock_data(, x86_topo_lock);
+
+static boolean_t
+cpu_is_hyperthreaded(void)
+{
+ i386_cpu_info_t *cpuinfo;
+
+ cpuinfo = cpuid_info();
+ return(cpuinfo->thread_count > cpuinfo->core_count);
+}
static x86_cpu_cache_t *
x86_cache_alloc(void)
return(cache);
}
+
+static void
+x86_LLC_info(void)
+{
+ uint32_t index;
+ uint32_t cache_info[4];
+ uint32_t cache_level = 0;
+ uint32_t nCPUsSharing = 1;
+ i386_cpu_info_t *cpuinfo;
+
+ cpuinfo = cpuid_info();
+
+ do_cpuid(0, cache_info);
+
+ if (cache_info[eax] < 4) {
+ /*
+ * Processor does not support deterministic
+ * cache information. Set LLC sharing to 1, since
+ * we have no better information.
+ */
+ if (cpu_is_hyperthreaded()) {
+ topoParms.nCoresSharingLLC = 1;
+ topoParms.nLCPUsSharingLLC = 2;
+ topoParms.maxSharingLLC = 2;
+ } else {
+ topoParms.nCoresSharingLLC = 1;
+ topoParms.nLCPUsSharingLLC = 1;
+ topoParms.maxSharingLLC = 1;
+ }
+ return;
+ }
+
+ for (index = 0; ; index += 1) {
+ uint32_t this_level;
+
+ cache_info[eax] = 4;
+ cache_info[ecx] = index;
+ cache_info[ebx] = 0;
+ cache_info[edx] = 0;
+
+ cpuid(cache_info);
+
+ /*
+ * See if all levels have been queried.
+ */
+ if (bitfield(cache_info[eax], 4, 0) == 0)
+ break;
+
+ /*
+ * Get the current level.
+ */
+ this_level = bitfield(cache_info[eax], 7, 5);
+
+ /*
+ * Only worry about it if it's a deeper level than
+ * what we've seen before.
+ */
+ if (this_level > cache_level) {
+ cache_level = this_level;
+
+ /*
+ * Save the number of CPUs sharing this cache.
+ */
+ nCPUsSharing = bitfield(cache_info[eax], 25, 14) + 1;
+ }
+ }
+
+ /*
+ * Make the level of the LLC be 0 based.
+ */
+ topoParms.LLCDepth = cache_level - 1;
+
+ /*
+ * nCPUsSharing represents the *maximum* number of cores or
+ * logical CPUs sharing the cache.
+ */
+ topoParms.maxSharingLLC = nCPUsSharing;
+
+ topoParms.nCoresSharingLLC = nCPUsSharing;
+ topoParms.nLCPUsSharingLLC = nCPUsSharing;
+
+ /*
+ * nCPUsSharing may not be the number of *active* cores or
+ * threads that are sharing the cache.
+ */
+ if (nCPUsSharing > cpuinfo->core_count)
+ topoParms.nCoresSharingLLC = cpuinfo->core_count;
+ if (nCPUsSharing > cpuinfo->thread_count)
+ topoParms.nLCPUsSharingLLC = cpuinfo->thread_count;
+
+
+ if (nCPUsSharing > cpuinfo->thread_count)
+ topoParms.maxSharingLLC = cpuinfo->thread_count;
+}
+
+static void
+initTopoParms(void)
+{
+ i386_cpu_info_t *cpuinfo;
+
+ cpuinfo = cpuid_info();
+
+ /*
+ * We need to start with getting the LLC information correct.
+ */
+ x86_LLC_info();
+
+ /*
+ * Compute the number of threads (logical CPUs) per core.
+ */
+ topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count;
+ topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package;
+
+ /*
+ * Compute the number of dies per package.
+ */
+ topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC;
+ topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+ /*
+ * Compute the number of cores per die.
+ */
+ topoParms.nLCoresPerDie = topoParms.nCoresSharingLLC;
+ topoParms.nPCoresPerDie = (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+ /*
+ * Compute the number of threads per die.
+ */
+ topoParms.nLThreadsPerDie = topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie;
+ topoParms.nPThreadsPerDie = topoParms.nPThreadsPerCore * topoParms.nPCoresPerDie;
+
+ /*
+ * Compute the number of cores per package.
+ */
+ topoParms.nLCoresPerPackage = topoParms.nLCoresPerDie * topoParms.nLDiesPerPackage;
+ topoParms.nPCoresPerPackage = topoParms.nPCoresPerDie * topoParms.nPDiesPerPackage;
+
+ /*
+ * Compute the number of threads per package.
+ */
+ topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage;
+ topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage;
+
+ DBG("\nLogical Topology Parameters:\n");
+ DBG("\tThreads per Core: %d\n", topoParms.nLThreadsPerCore);
+ DBG("\tCores per Die: %d\n", topoParms.nLCoresPerDie);
+ DBG("\tThreads per Die: %d\n", topoParms.nLThreadsPerDie);
+ DBG("\tDies per Package: %d\n", topoParms.nLDiesPerPackage);
+ DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage);
+ DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage);
+
+ DBG("\nPhysical Topology Parameters:\n");
+ DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore);
+ DBG("\tCores per Die: %d\n", topoParms.nPCoresPerDie);
+ DBG("\tThreads per Die: %d\n", topoParms.nPThreadsPerDie);
+ DBG("\tDies per Package: %d\n", topoParms.nPDiesPerPackage);
+ DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage);
+ DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage);
+
+ topoParmsInited = TRUE;
+}
static void
x86_cache_free(x86_cpu_cache_t *cache)
cur->type = bitfield(cache_info[eax], 4, 0);
cur->level = bitfield(cache_info[eax], 7, 5);
- cur->nlcpus = bitfield(cache_info[eax], 25, 14) + 1;
+ cur->maxcpus = (bitfield(cache_info[eax], 25, 14) + 1);
cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1;
cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1;
cur->ways = bitfield(cache_info[ebx], 31, 22) + 1;
last = cur;
}
+ cur->nlcpus = 0;
num_Lx_caches[cur->level - 1] += 1;
}
return(root);
}
-static boolean_t
-cpu_is_hyperthreaded(void)
+static x86_cpu_cache_t *
+x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher)
{
- if (cpuid_features() & CPUID_FEATURE_HTT)
- return (cpuid_info()->cpuid_logical_per_package /
- cpuid_info()->cpuid_cores_per_package) > 1;
- else
- return FALSE;
+ x86_cpu_cache_t *cur_cache;
+
+ cur_cache = list;
+ while (cur_cache != NULL) {
+ if (cur_cache->maxcpus == matcher->maxcpus
+ && cur_cache->type == matcher->type
+ && cur_cache->level == matcher->level
+ && cur_cache->ways == matcher->ways
+ && cur_cache->partitions == matcher->partitions
+ && cur_cache->line_size == matcher->line_size
+ && cur_cache->cache_size == matcher->cache_size)
+ break;
+
+ cur_cache = cur_cache->next;
+ }
+
+ return(cur_cache);
}
static void
lcpu = &cpup->lcpu;
lcpu->lcpu = lcpu;
lcpu->cpu = cpup;
- lcpu->next = NULL;
- lcpu->core = NULL;
+ lcpu->next_in_core = NULL;
+ lcpu->next_in_die = NULL;
+ lcpu->next_in_pkg = NULL;
+ lcpu->core = NULL;
+ lcpu->die = NULL;
+ lcpu->package = NULL;
+ lcpu->cpu_num = cpu;
lcpu->lnum = cpu;
lcpu->pnum = cpup->cpu_phys_number;
- lcpu->halted = FALSE; /* XXX is this correct? */
- lcpu->idle = FALSE; /* XXX is this correct? */
+ lcpu->state = LCPU_OFF;
for (i = 0; i < MAX_CACHE_DEPTH; i += 1)
lcpu->caches[i] = NULL;
- lcpu->master = (lcpu->pnum == (unsigned int) master_cpu);
- lcpu->primary = (lcpu->pnum % cpuid_info()->cpuid_logical_per_package) == 0;
+ lcpu->master = (lcpu->cpu_num == (unsigned int) master_cpu);
+ lcpu->primary = (lcpu->pnum % topoParms.nPThreadsPerPackage) == 0;
}
static x86_core_t *
{
x86_core_t *core;
cpu_data_t *cpup;
- uint32_t cpu_in_pkg;
- uint32_t lcpus_per_core;
cpup = cpu_datap(cpu);
simple_lock(&x86_topo_lock);
if (free_cores != NULL) {
core = free_cores;
- free_cores = core->next;
- core->next = NULL;
+ free_cores = core->next_in_die;
+ core->next_in_die = NULL;
simple_unlock(&x86_topo_lock);
} else {
simple_unlock(&x86_topo_lock);
bzero((void *) core, sizeof(x86_core_t));
- cpu_in_pkg = cpu % cpuid_info()->cpuid_logical_per_package;
- lcpus_per_core = cpuid_info()->cpuid_logical_per_package /
- cpuid_info()->cpuid_cores_per_package;
-
- core->pcore_num = cpup->cpu_phys_number / lcpus_per_core;
- core->lcore_num = core->pcore_num % cpuid_info()->cpuid_cores_per_package;
+ core->pcore_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
+ core->lcore_num = core->pcore_num % topoParms.nPCoresPerPackage;
core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY
| X86CORE_FL_HALTED | X86CORE_FL_IDLE;
x86_core_free(x86_core_t *core)
{
simple_lock(&x86_topo_lock);
- core->next = free_cores;
+ core->next_in_die = free_cores;
free_cores = core;
simple_unlock(&x86_topo_lock);
}
cpup = cpu_datap(cpu);
- pkg_num = cpup->cpu_phys_number / cpuid_info()->cpuid_logical_per_package;
+ pkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
pkg = x86_pkgs;
while (pkg != NULL) {
return(pkg);
}
+
+static x86_die_t *
+x86_die_find(int cpu)
+{
+ x86_die_t *die;
+ x86_pkg_t *pkg;
+ cpu_data_t *cpup;
+ uint32_t die_num;
+
+ cpup = cpu_datap(cpu);
+
+ die_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+ pkg = x86_package_find(cpu);
+ if (pkg == NULL)
+ return(NULL);
+
+ die = pkg->dies;
+ while (die != NULL) {
+ if (die->pdie_num == die_num)
+ break;
+ die = die->next_in_pkg;
+ }
+
+ return(die);
+}
static x86_core_t *
x86_core_find(int cpu)
{
x86_core_t *core;
- x86_pkg_t *pkg;
+ x86_die_t *die;
cpu_data_t *cpup;
uint32_t core_num;
cpup = cpu_datap(cpu);
- core_num = cpup->cpu_phys_number
- / (cpuid_info()->cpuid_logical_per_package
- / cpuid_info()->cpuid_cores_per_package);
+ core_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
- pkg = x86_package_find(cpu);
- if (pkg == NULL)
+ die = x86_die_find(cpu);
+ if (die == NULL)
return(NULL);
- core = pkg->cores;
+ core = die->cores;
while (core != NULL) {
if (core->pcore_num == core_num)
break;
- core = core->next;
+ core = core->next_in_die;
}
return(core);
}
+
+void
+x86_set_lcpu_numbers(x86_lcpu_t *lcpu)
+{
+ lcpu->lnum = lcpu->cpu_num % topoParms.nLThreadsPerCore;
+}
+
+void
+x86_set_core_numbers(x86_core_t *core, x86_lcpu_t *lcpu)
+{
+ core->pcore_num = lcpu->cpu_num / topoParms.nLThreadsPerCore;
+ core->lcore_num = core->pcore_num % topoParms.nLCoresPerDie;
+}
+
+void
+x86_set_die_numbers(x86_die_t *die, x86_lcpu_t *lcpu)
+{
+ die->pdie_num = lcpu->cpu_num / (topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie);
+ die->ldie_num = die->pdie_num % topoParms.nLDiesPerPackage;
+}
+
+void
+x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu)
+{
+ pkg->ppkg_num = lcpu->cpu_num / topoParms.nLThreadsPerPackage;
+ pkg->lpkg_num = pkg->ppkg_num;
+}
+
+static x86_die_t *
+x86_die_alloc(int cpu)
+{
+ x86_die_t *die;
+ cpu_data_t *cpup;
+
+ cpup = cpu_datap(cpu);
+
+ simple_lock(&x86_topo_lock);
+ if (free_dies != NULL) {
+ die = free_dies;
+ free_dies = die->next_in_pkg;
+ die->next_in_pkg = NULL;
+ simple_unlock(&x86_topo_lock);
+ } else {
+ simple_unlock(&x86_topo_lock);
+ die = kalloc(sizeof(x86_die_t));
+ if (die == NULL)
+ panic("x86_die_alloc() kalloc of x86_die_t failed!\n");
+ }
+
+ bzero((void *) die, sizeof(x86_die_t));
+
+ die->pdie_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+ die->ldie_num = num_dies;
+ atomic_incl((long *) &num_dies, 1);
+
+ die->flags = X86DIE_FL_PRESENT;
+ return(die);
+}
static void
-x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
+x86_die_free(x86_die_t *die)
+{
+ simple_lock(&x86_topo_lock);
+ die->next_in_pkg = free_dies;
+ free_dies = die;
+ atomic_decl((long *) &num_dies, 1);
+ simple_unlock(&x86_topo_lock);
+}
+
+static x86_pkg_t *
+x86_package_alloc(int cpu)
+{
+ x86_pkg_t *pkg;
+ cpu_data_t *cpup;
+
+ cpup = cpu_datap(cpu);
+
+ simple_lock(&x86_topo_lock);
+ if (free_pkgs != NULL) {
+ pkg = free_pkgs;
+ free_pkgs = pkg->next;
+ pkg->next = NULL;
+ simple_unlock(&x86_topo_lock);
+ } else {
+ simple_unlock(&x86_topo_lock);
+ pkg = kalloc(sizeof(x86_pkg_t));
+ if (pkg == NULL)
+ panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
+ }
+
+ bzero((void *) pkg, sizeof(x86_pkg_t));
+
+ pkg->ppkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
+
+ pkg->lpkg_num = topoParms.nPackages;
+ atomic_incl((long *) &topoParms.nPackages, 1);
+
+ pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
+ return(pkg);
+}
+
+static void
+x86_package_free(x86_pkg_t *pkg)
+{
+ simple_lock(&x86_topo_lock);
+ pkg->next = free_pkgs;
+ free_pkgs = pkg;
+ atomic_decl((long *) &topoParms.nPackages, 1);
+ simple_unlock(&x86_topo_lock);
+}
+
+static void
+x86_cache_add_lcpu(x86_cpu_cache_t *cache, x86_lcpu_t *lcpu)
+{
+ x86_cpu_cache_t *cur_cache;
+ int i;
+
+ /*
+ * Put the new CPU into the list of the cache.
+ */
+ cur_cache = lcpu->caches[cache->level - 1];
+ lcpu->caches[cache->level - 1] = cache;
+ cache->next = cur_cache;
+ cache->nlcpus += 1;
+ for (i = 0; i < cache->nlcpus; i += 1) {
+ if (cache->cpus[i] == NULL) {
+ cache->cpus[i] = lcpu;
+ break;
+ }
+ }
+}
+
+static void
+x86_lcpu_add_caches(x86_lcpu_t *lcpu)
{
x86_cpu_cache_t *list;
x86_cpu_cache_t *cur;
- x86_core_t *cur_core;
+ x86_cpu_cache_t *match;
+ x86_die_t *die;
+ x86_core_t *core;
x86_lcpu_t *cur_lcpu;
- boolean_t found;
- int level;
- int i;
- uint32_t cpu_mask;
+ uint32_t level;
+ boolean_t found = FALSE;
- assert(core != NULL);
assert(lcpu != NULL);
/*
* If the cache isn't shared then just put it where it
* belongs.
*/
- if (cur->nlcpus == 1) {
- goto found_first;
+ if (cur->maxcpus == 1) {
+ x86_cache_add_lcpu(cur, lcpu);
+ continue;
}
/*
/*
* This is a shared cache, so we have to figure out if
* this is the first time we've seen this cache. We do
- * this by searching through the package and seeing if
- * a related core is already describing this cache.
+ * this by searching through the topology and seeing if
+ * this cache is already described.
*
- * NOTE: This assumes that CPUs whose ID mod <# sharing cache>
- * are indeed sharing the cache.
+ * Assume that L{LLC-1} are all at the core level and that
+ * LLC is shared at the die level.
*/
- cpu_mask = lcpu->pnum & ~(cur->nlcpus - 1);
- cur_core = core->package->cores;
- found = FALSE;
-
- while (cur_core != NULL && !found) {
- cur_lcpu = cur_core->lcpus;
- while (cur_lcpu != NULL && !found) {
- if ((cur_lcpu->pnum & ~(cur->nlcpus - 1)) == cpu_mask) {
- lcpu->caches[level] = cur_lcpu->caches[level];
- found = TRUE;
- x86_cache_free(cur);
+ if (level < topoParms.LLCDepth) {
+ /*
+ * Shared at the core.
+ */
+ core = lcpu->core;
+ cur_lcpu = core->lcpus;
+ while (cur_lcpu != NULL) {
+ /*
+ * Skip ourselves.
+ */
+ if (cur_lcpu == lcpu) {
+ cur_lcpu = cur_lcpu->next_in_core;
+ continue;
+ }
- /*
- * Put the new CPU into the list of the cache.
- */
- cur = lcpu->caches[level];
- for (i = 0; i < cur->nlcpus; i += 1) {
- if (cur->cpus[i] == NULL) {
- cur->cpus[i] = lcpu;
- break;
- }
- }
+ /*
+ * If there's a cache on this logical CPU,
+ * then use that one.
+ */
+ match = x86_match_cache(cur_lcpu->caches[level], cur);
+ if (match != NULL) {
+ x86_cache_free(cur);
+ x86_cache_add_lcpu(match, lcpu);
+ found = TRUE;
+ break;
}
- cur_lcpu = cur_lcpu->next;
+
+ cur_lcpu = cur_lcpu->next_in_core;
}
+ } else {
+ /*
+ * Shared at the die.
+ */
+ die = lcpu->die;
+ cur_lcpu = die->lcpus;
+ while (cur_lcpu != NULL) {
+ /*
+ * Skip ourselves.
+ */
+ if (cur_lcpu == lcpu) {
+ cur_lcpu = cur_lcpu->next_in_die;
+ continue;
+ }
- cur_core = cur_core->next;
+ /*
+ * If there's a cache on this logical CPU,
+ * then use that one.
+ */
+ match = x86_match_cache(cur_lcpu->caches[level], cur);
+ if (match != NULL) {
+ x86_cache_free(cur);
+ x86_cache_add_lcpu(match, lcpu);
+ found = TRUE;
+ break;
+ }
+
+ cur_lcpu = cur_lcpu->next_in_die;
+ }
}
+ /*
+ * If a shared cache wasn't found, then this logical CPU must
+ * be the first one encountered.
+ */
if (!found) {
-found_first:
- cur->next = lcpu->caches[level];
- lcpu->caches[level] = cur;
- cur->cpus[0] = lcpu;
+ x86_cache_add_lcpu(cur, lcpu);
}
}
- /*
- * Add the Logical CPU to the core.
- */
- lcpu->next = core->lcpus;
- lcpu->core = core;
- core->lcpus = lcpu;
- core->num_lcpus += 1;
-
simple_unlock(&x86_topo_lock);
}
-static x86_pkg_t *
-x86_package_alloc(int cpu)
+static void
+x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
{
- x86_pkg_t *pkg;
- cpu_data_t *cpup;
-
- cpup = cpu_datap(cpu);
+ assert(core != NULL);
+ assert(lcpu != NULL);
simple_lock(&x86_topo_lock);
- if (free_pkgs != NULL) {
- pkg = free_pkgs;
- free_pkgs = pkg->next;
- pkg->next = NULL;
- simple_unlock(&x86_topo_lock);
- } else {
- simple_unlock(&x86_topo_lock);
- pkg = kalloc(sizeof(x86_pkg_t));
- if (pkg == NULL)
- panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
- }
- bzero((void *) pkg, sizeof(x86_pkg_t));
+ lcpu->next_in_core = core->lcpus;
+ lcpu->core = core;
+ core->lcpus = lcpu;
+ core->num_lcpus += 1;
+ simple_unlock(&x86_topo_lock);
+}
- pkg->ppkg_num = cpup->cpu_phys_number
- / cpuid_info()->cpuid_logical_per_package;
+static void
+x86_die_add_lcpu(x86_die_t *die, x86_lcpu_t *lcpu)
+{
+ assert(die != NULL);
+ assert(lcpu != NULL);
+
+ lcpu->next_in_die = die->lcpus;
+ lcpu->die = die;
+ die->lcpus = lcpu;
+}
- pkg->lpkg_num = num_packages;
- atomic_incl((long *) &num_packages, 1);
+static void
+x86_die_add_core(x86_die_t *die, x86_core_t *core)
+{
+ assert(die != NULL);
+ assert(core != NULL);
- pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
- return(pkg);
+ core->next_in_die = die->cores;
+ core->die = die;
+ die->cores = core;
+ die->num_cores += 1;
}
-static void
-x86_package_free(x86_pkg_t *pkg)
+ static void
+x86_package_add_lcpu(x86_pkg_t *pkg, x86_lcpu_t *lcpu)
{
- simple_lock(&x86_topo_lock);
- pkg->next = free_pkgs;
- free_pkgs = pkg;
- atomic_decl((long *) &num_packages, 1);
- simple_unlock(&x86_topo_lock);
+ assert(pkg != NULL);
+ assert(lcpu != NULL);
+
+ lcpu->next_in_pkg = pkg->lcpus;
+ lcpu->package = pkg;
+ pkg->lcpus = lcpu;
}
static void
assert(pkg != NULL);
assert(core != NULL);
- core->next = pkg->cores;
+ core->next_in_pkg = pkg->cores;
core->package = pkg;
pkg->cores = core;
- pkg->num_cores += 1;
+}
+
+static void
+x86_package_add_die(x86_pkg_t *pkg, x86_die_t *die)
+{
+ assert(pkg != NULL);
+ assert(die != NULL);
+
+ die->next_in_pkg = pkg->dies;
+ die->package = pkg;
+ pkg->dies = die;
+ pkg->num_dies += 1;
}
void *
cpu_thread_alloc(int cpu)
{
- x86_core_t *core;
- x86_pkg_t *pkg;
+ x86_core_t *core = NULL;
+ x86_die_t *die = NULL;
+ x86_pkg_t *pkg = NULL;
cpu_data_t *cpup;
uint32_t phys_cpu;
+ /*
+ * Only allow one to manipulate the topology at a time.
+ */
+ simple_lock(&x86_topo_lock);
+
+ /*
+ * Make sure all of the topology parameters have been initialized.
+ */
+ if (!topoParmsInited)
+ initTopoParms();
+
cpup = cpu_datap(cpu);
phys_cpu = cpup->cpu_phys_number;
x86_lcpu_init(cpu);
+ /*
+ * Allocate performance counter structure.
+ */
+ simple_unlock(&x86_topo_lock);
+ cpup->lcpu.pmc = pmc_alloc();
+ simple_lock(&x86_topo_lock);
+
/*
* Assume that all cpus have the same features.
*/
}
/*
- * Only allow one to manipulate the topology at a time.
- */
- simple_lock(&x86_topo_lock);
-
- /*
- * Get the core for this logical CPU.
+ * Get the package that the logical CPU is in.
*/
- core_again:
- core = x86_core_find(cpu);
- if (core == NULL) {
- /*
- * Core structure hasn't been created yet, do it now.
- *
- * Get the package that the core is part of.
- */
- package_again:
+ do {
pkg = x86_package_find(cpu);
if (pkg == NULL) {
/*
simple_lock(&x86_topo_lock);
if (x86_package_find(cpu) != NULL) {
x86_package_free(pkg);
- goto package_again;
+ continue;
}
/*
pkg->next = x86_pkgs;
x86_pkgs = pkg;
}
+ } while (pkg == NULL);
- /*
- * Allocate the core structure now.
- */
- simple_unlock(&x86_topo_lock);
- core = x86_core_alloc(cpu);
- simple_lock(&x86_topo_lock);
- if (x86_core_find(cpu) != NULL) {
- x86_core_free(core);
- goto core_again;
+ /*
+ * Get the die that the logical CPU is in.
+ */
+ do {
+ die = x86_die_find(cpu);
+ if (die == NULL) {
+ /*
+ * Die structure hasn't been created yet, do it now.
+ */
+ simple_unlock(&x86_topo_lock);
+ die = x86_die_alloc(cpu);
+ simple_lock(&x86_topo_lock);
+ if (x86_die_find(cpu) != NULL) {
+ x86_die_free(die);
+ continue;
+ }
+
+ /*
+ * Add the die to the package.
+ */
+ x86_package_add_die(pkg, die);
}
+ } while (die == NULL);
- /*
- * Add it to the package.
- */
- x86_package_add_core(pkg, core);
- machine_info.physical_cpu_max += 1;
+ /*
+ * Get the core for this logical CPU.
+ */
+ do {
+ core = x86_core_find(cpu);
+ if (core == NULL) {
+ /*
+ * Allocate the core structure now.
+ */
+ simple_unlock(&x86_topo_lock);
+ core = x86_core_alloc(cpu);
+ simple_lock(&x86_topo_lock);
+ if (x86_core_find(cpu) != NULL) {
+ x86_core_free(core);
+ continue;
+ }
+
+ /*
+ * Add the core to the die & package.
+ */
+ x86_die_add_core(die, core);
+ x86_package_add_core(pkg, core);
+ machine_info.physical_cpu_max += 1;
+ }
+ } while (core == NULL);
- /*
- * Allocate performance counter structure.
- */
- simple_unlock(&x86_topo_lock);
- core->pmc = pmc_alloc();
- simple_lock(&x86_topo_lock);
- }
/*
* Done manipulating the topology, so others can get in.
machine_info.logical_cpu_max += 1;
simple_unlock(&x86_topo_lock);
+ /*
+ * Add the logical CPU to the other topology structures.
+ */
x86_core_add_lcpu(core, &cpup->lcpu);
+ x86_die_add_lcpu(core->die, &cpup->lcpu);
+ x86_package_add_lcpu(core->package, &cpup->lcpu);
+ x86_lcpu_add_caches(&cpup->lcpu);
return (void *) core;
}
void
cpu_thread_init(void)
{
- int my_cpu = get_cpu_number();
- cpu_data_t *cpup = current_cpu_datap();
+ int my_cpu = get_cpu_number();
+ cpu_data_t *cpup = current_cpu_datap();
x86_core_t *core;
- static int initialized = 0;
+ static int initialized = 0;
/*
* If we're the boot processor, we do all of the initialization of
if (core->active_lcpus == 0)
machine_info.physical_cpu += 1;
core->active_lcpus += 1;
- cpup->lcpu.halted = FALSE;
- cpup->lcpu.idle = FALSE;
simple_unlock(&x86_topo_lock);
pmCPUMarkRunning(cpup);
simple_lock(&x86_topo_lock);
machine_info.logical_cpu -= 1;
- cpup->lcpu.idle = TRUE;
core = cpup->lcpu.core;
core->active_lcpus -= 1;
if (core->active_lcpus == 0)
}
/* NOT REACHED */
}
+
+#if TOPO_DEBUG
+/*
+ * Prints out the topology
+ */
+void
+debug_topology_print(void)
+{
+ x86_pkg_t *pkg;
+ x86_die_t *die;
+ x86_core_t *core;
+ x86_lcpu_t *cpu;
+
+ pkg = x86_pkgs;
+ while (pkg != NULL) {
+ kprintf("Package:\n");
+ kprintf(" Physical: %d\n", pkg->ppkg_num);
+ kprintf(" Logical: %d\n", pkg->lpkg_num);
+
+ die = pkg->dies;
+ while (die != NULL) {
+ kprintf(" Die:\n");
+ kprintf(" Physical: %d\n", die->pdie_num);
+ kprintf(" Logical: %d\n", die->ldie_num);
+
+ core = die->cores;
+ while (core != NULL) {
+ kprintf(" Core:\n");
+ kprintf(" Physical: %d\n", core->pcore_num);
+ kprintf(" Logical: %d\n", core->lcore_num);
+
+ cpu = core->lcpus;
+ while (cpu != NULL) {
+ kprintf(" LCPU:\n");
+ kprintf(" CPU #: %d\n", cpu->cpu_num);
+ kprintf(" Physical: %d\n", cpu->pnum);
+ kprintf(" Logical: %d\n", cpu->lnum);
+ kprintf(" Flags: ");
+ if (cpu->master)
+ kprintf("MASTER ");
+ if (cpu->primary)
+ kprintf("PRIMARY");
+ if (!cpu->master && !cpu->primary)
+ kprintf("(NONE)");
+ kprintf("\n");
+
+ cpu = cpu->next_in_core;
+ }
+
+ core = core->next_in_die;
+ }
+
+ die = die->next_in_pkg;
+ }
+
+ pkg = pkg->next;
+ }
+}
+#endif /* TOPO_DEBUG */
/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#define cpu_to_lcpu(cpu) ((cpu_datap(cpu) != NULL) ? _cpu_to_lcpu(cpu) : NULL)
#define cpu_to_core(cpu) ((cpu_to_lcpu(cpu) != NULL) ? _cpu_to_lcpu(cpu)->core : NULL)
-#define cpu_to_package(cpu) ((cpu_to_core(cpu) != NULL) ? _cpu_to_core(cpu)->package : NULL)
+#define cpu_to_die(cpu) ((cpu_to_lcpu(cpu) != NULL) ? _cpu_to_lcpu(cpu)->die : NULL)
+#define cpu_to_package(cpu) ((cpu_to_lcpu(cpu) != NULL) ? _cpu_to_lcpu(cpu)->package : NULL)
/* Fast access: */
#define x86_lcpu() (¤t_cpu_datap()->lcpu)
#define x86_core() (x86_lcpu()->core)
-#define x86_package() (x86_core()->package)
+#define x86_die() (x86_lcpu()->die)
+#define x86_package() (x86_lcpu()->package)
#define cpu_is_same_core(cpu1,cpu2) (cpu_to_core(cpu1) == cpu_to_core(cpu2))
+#define cpu_is_same_die(cpu1,cpu2) (cpu_to_die(cpu1) == cpu_to_die(cpu2))
#define cpu_is_same_package(cpu1,cpu2) (cpu_to_package(cpu1) == cpu_to_package(cpu2))
#define cpus_share_cache(cpu1,cpu2,_cl) (cpu_to_lcpu(cpu1)->caches[_cl] == cpu_to_lcpu(cpu2)->caches[_cl])
extern void cpu_thread_init(void);
extern void cpu_thread_halt(void);
+extern void x86_set_lcpu_numbers(x86_lcpu_t *lcpu);
+extern void x86_set_core_numbers(x86_core_t *core, x86_lcpu_t *lcpu);
+extern void x86_set_die_numbers(x86_die_t *die, x86_lcpu_t *lcpu);
+extern void x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu);
+
+extern x86_topology_parameters_t topoParms;
+
#endif /* _I386_CPU_THREADS_H_ */
#include <i386/machine_cpu.h>
#include <i386/machine_routines.h>
#include <i386/lock.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
//#define TOPO_DEBUG 1
#if TOPO_DEBUG
#else
#define DBG(x...)
#endif
+void debug_topology_print(void);
__private_extern__ void qsort(
void * array,
*/
for (i = 1; i < ncpus; i++) {
cpu_data_t *cpup = cpu_datap(i);
+ x86_core_t *core = cpup->lcpu.core;
+ x86_die_t *die = cpup->lcpu.die;
+ x86_pkg_t *pkg = cpup->lcpu.package;
+
+ assert(core != NULL);
+ assert(die != NULL);
+ assert(pkg != NULL);
if (cpup->cpu_number != i) {
kprintf("cpu_datap(%d):0x%08x local apic id 0x%x "
cpup->cpu_number);
}
cpup->cpu_number = i;
- cpup->lcpu.lnum = i;
+ cpup->lcpu.cpu_num = i;
+ cpup->lcpu.pnum = cpup->cpu_phys_number;
lapic_cpu_map(cpup->cpu_phys_number, i);
+ x86_set_lcpu_numbers(&cpup->lcpu);
+ x86_set_core_numbers(core, &cpup->lcpu);
+ x86_set_die_numbers(die, &cpup->lcpu);
+ x86_set_pkg_numbers(pkg, &cpup->lcpu);
}
+#if TOPO_DEBUG
+ debug_topology_print();
+#endif /* TOPO_DEBUG */
+
ml_set_interrupts_enabled(istate);
+ DBG("cpu_topology_start() LLC is L%d\n", topoParms.LLCDepth + 1);
/*
* Iterate over all logical cpus finding or creating the affinity set
- * for their L2 cache. Each affinity set possesses a processor set
+ * for their LLC cache. Each affinity set possesses a processor set
* into which each logical processor is added.
*/
DBG("cpu_topology_start() creating affinity sets:\n");
for (i = 0; i < ncpus; i++) {
cpu_data_t *cpup = cpu_datap(i);
x86_lcpu_t *lcpup = cpu_to_lcpu(i);
- x86_cpu_cache_t *L2_cachep;
+ x86_cpu_cache_t *LLC_cachep;
x86_affinity_set_t *aset;
- L2_cachep = lcpup->caches[CPU_CACHE_DEPTH_L2];
- assert(L2_cachep->type == CPU_CACHE_TYPE_UNIF);
- aset = find_cache_affinity(L2_cachep);
+ LLC_cachep = lcpup->caches[topoParms.LLCDepth];
+ assert(LLC_cachep->type == CPU_CACHE_TYPE_UNIF);
+ aset = find_cache_affinity(LLC_cachep);
if (aset == NULL) {
aset = (x86_affinity_set_t *) kalloc(sizeof(*aset));
if (aset == NULL)
aset->next = x86_affinities;
x86_affinities = aset;
aset->num = x86_affinity_count++;
- aset->cache = L2_cachep;
+ aset->cache = LLC_cachep;
aset->pset = (i == master_cpu) ?
processor_pset(master_processor) :
pset_create(pset_node_root());
}
DBG("\tprocessor_init set %p(%d) lcpup %p(%d) cpu %p processor %p\n",
- aset, aset->num, lcpup, lcpup->lnum, cpup, cpup->cpu_processor);
+ aset, aset->num, lcpup, lcpup->cpu_num, cpup, cpup->cpu_processor);
if (i != master_cpu)
processor_init(cpup->cpu_processor, i, aset->pset);
if (affinity_num == aset->num)
break;
}
- return (aset == NULL) ? PROCESSOR_SET_NULL : aset->pset;
-
+ return (aset == NULL) ? PROCESSOR_SET_NULL : aset->pset;
}
uint64_t
if (level == 0) {
return machine_info.max_mem;
- } else if ( 1 <= level && level <= 3) {
+ } else if ( 1 <= level && level <= MAX_CACHE_DEPTH) {
cachep = current_cpu_datap()->lcpu.caches[level-1];
return cachep ? cachep->cache_size : 0;
} else {
if (level == 0) {
return machine_info.max_cpus;
- } else if ( 1 <= level && level <= 3) {
+ } else if ( 1 <= level && level <= MAX_CACHE_DEPTH) {
cachep = current_cpu_datap()->lcpu.caches[level-1];
return cachep ? cachep->nlcpus : 0;
} else {
/*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
typedef struct x86_cpu_cache
{
struct x86_cpu_cache *next; /* next cache at this level/lcpu */
+ struct x86_die *die; /* die containing this cache (only for LLC) */
uint8_t maxcpus; /* maximum # of cpus that can share */
uint8_t nlcpus; /* # of logical cpus sharing this cache */
uint8_t type; /* type of cache */
struct pmc;
struct cpu_data;
+struct mca_state;
+/*
+ * Define the states that a (logical) CPU can be in.
+ *
+ * LCPU_OFF This indicates that the CPU is "off". It requires a full
+ * restart. This is the state of a CPU when the system first
+ * boots or when it comes out of "sleep" (aka S3/S5).
+ *
+ * LCPU_HALT This indicates that the CPU has been "halted". It has been
+ * removed from the system but still retains its internal state
+ * so that it can be quickly brought back on-line.
+ *
+ * LCPU_NONSCHED This indicates that the CPU is not schedulable. It
+ * will still appear in the system as a viable CPU however no
+ * work will be sceduled on it.
+ *
+ * LCPU_PAUSE This indicates that the CPU is "paused". This is usually
+ * done only during kernel debug.
+ *
+ * LCPU_IDLE This indicates that the CPU is idle. The scheduler has
+ * determined that there is no work for this CPU to do.
+ *
+ * LCPU_RUN This indicates that the CPU is running code and performing work.
+ *
+ * In normal system operation, CPUs will usually be transitioning between
+ * LCPU_IDLE and LCPU_RUN.
+ */
+typedef enum lcpu_state
+{
+ LCPU_OFF = 0, /* 0 so the right thing happens on boot */
+ LCPU_HALT = 1,
+ LCPU_NONSCHED = 2,
+ LCPU_PAUSE = 3,
+ LCPU_IDLE = 4,
+ LCPU_RUN = 5,
+} lcpu_state_t;
+
+/*
+ * In each topology structure there are two numbers: a logical number and a
+ * physical number.
+ *
+ * The logical numbers represent the ID of that structure
+ * relative to the enclosing structure and always starts at 0. So when using
+ * logical numbers, it is necessary to specify all elements in the topology
+ * (ie to "name" a logical CPU using logical numbers, 4 numbers are required:
+ * package, die, core, logical CPU).
+ *
+ * The physical numbers represent the ID of that structure and is unique (for
+ * that structure) across the entire topology.
+ *
+ * The logical CPU structure contains a third number which is the CPU number.
+ * This number is identical to the CPU number used in other parts of the kernel.
+ */
typedef struct x86_lcpu
{
- struct x86_lcpu *next; /* next logical cpu in core */
- struct x86_lcpu *lcpu; /* pointer back to self */
- struct x86_core *core; /* core containing the logical cpu */
- struct cpu_data *cpu; /* cpu_data structure */
- uint32_t lnum; /* logical cpu number */
- uint32_t pnum; /* physical cpu number */
- boolean_t master; /* logical cpu is the master (boot) CPU */
- boolean_t primary;/* logical cpu is primary CPU in package */
- boolean_t halted; /* logical cpu is halted */
- boolean_t idle; /* logical cpu is idle */
- uint64_t rtcPop; /* when etimer wants a timer pop */
+ struct x86_lcpu *next_in_core; /* next logical cpu in core */
+ struct x86_lcpu *next_in_die; /* next logical cpu in die */
+ struct x86_lcpu *next_in_pkg; /* next logical cpu in package */
+ struct x86_lcpu *lcpu; /* pointer back to self */
+ struct x86_core *core; /* core containing the logical cpu */
+ struct x86_die *die; /* die containing the logical cpu */
+ struct x86_pkg *package; /* package containing the logical cpu */
+ struct cpu_data *cpu; /* cpu_data structure */
+ uint32_t cpu_num; /* cpu number */
+ uint32_t lnum; /* logical cpu number (within core) */
+ uint32_t pnum; /* physical cpu number */
+ boolean_t master; /* logical cpu is the master (boot) CPU */
+ boolean_t primary; /* logical cpu is primary CPU in package */
+ volatile lcpu_state_t state; /* state of the logical CPU */
+ volatile boolean_t stopped; /* used to indicate that the CPU has "stopped" */
+ uint64_t rtcPop; /* when etimer wants a timer pop */
uint64_t rtcDeadline;
x86_cpu_cache_t *caches[MAX_CACHE_DEPTH];
+ struct pmc *pmc; /* Pointer to perfmon data */
+ void *pmStats; /* Power management stats for lcpu */
+ void *pmState; /* Power management state for lcpu */
} x86_lcpu_t;
#define X86CORE_FL_PRESENT 0x80000000 /* core is present */
typedef struct x86_core
{
- struct x86_core *next; /* next core in package */
- struct x86_lcpu *lcpus; /* list of logical cpus in core */
+ struct x86_core *next_in_die; /* next core in die */
+ struct x86_core *next_in_pkg; /* next core in package */
+ struct x86_die *die; /* die containing the core */
struct x86_pkg *package; /* package containing core */
+ struct x86_lcpu *lcpus; /* list of logical cpus in core */
uint32_t flags;
- uint32_t lcore_num; /* logical core # (unique to package) */
+ uint32_t lcore_num; /* logical core # (unique within die) */
uint32_t pcore_num; /* physical core # (globally unique) */
uint32_t num_lcpus; /* Number of logical cpus */
- uint32_t active_lcpus; /* Number of non-halted cpus */
- struct pmc *pmc; /* Pointer to perfmon data */
- struct hpetTimer *Hpet; /* Address of the HPET for this core */
- uint32_t HpetVec; /* Interrupt vector for HPET */
- uint64_t HpetInt; /* Number of HPET Interrupts */
- uint64_t HpetCmp; /* HPET Comparitor */
- uint64_t HpetCfg; /* HPET configuration */
- uint64_t HpetTime;
+ uint32_t active_lcpus; /* Number of {running, idle} cpus */
void *pmStats; /* Power management stats for core */
void *pmState; /* Power management state for core */
} x86_core_t;
+#define X86DIE_FL_PRESENT 0x80000000 /* die is present */
+#define X86DIE_FL_READY 0x40000000 /* die struct is init'd */
+
+typedef struct x86_die
+{
+ struct x86_die *next_in_pkg; /* next die in package */
+ struct x86_lcpu *lcpus; /* list of lcpus in die */
+ struct x86_core *cores; /* list of cores in die */
+ struct x86_pkg *package; /* package containing the die */
+ uint32_t flags;
+ uint32_t ldie_num; /* logical die # (unique to package) */
+ uint32_t pdie_num; /* physical die # (globally unique) */
+ uint32_t num_cores; /* Number of cores in die */
+ x86_cpu_cache_t *LLC; /* LLC contained in this die */
+ void *pmStats; /* Power Management stats for die */
+ void *pmState; /* Power Management state for die */
+} x86_die_t;
+
#define X86PKG_FL_PRESENT 0x80000000 /* package is present */
#define X86PKG_FL_READY 0x40000000 /* package struct init'd */
#define X86PKG_FL_HAS_HPET 0x10000000 /* package has HPET assigned */
typedef struct x86_pkg
{
struct x86_pkg *next; /* next package */
+ struct x86_lcpu *lcpus; /* list of logical cpus in package */
struct x86_core *cores; /* list of cores in package */
+ struct x86_die *dies; /* list of dies in package */
uint32_t flags;
uint32_t lpkg_num; /* logical package # */
uint32_t ppkg_num; /* physical package # */
- uint32_t num_cores; /* number of cores in package */
- struct hpetTimer *Hpet; /* address of HPET for this package */
- uint32_t HpetVec; /* Interrupt vector for HPET */
- uint64_t HpetInt; /* Number of HPET interrupts */
- uint64_t HpetCmp; /* HPET comparitor */
- uint64_t HpetCfg; /* HPET configuration */
- uint64_t HpetTime;
+ uint32_t num_dies; /* number of dies in package */
void *pmStats; /* Power Management stats for package*/
void *pmState; /* Power Management state for package*/
+ struct mca_state *mca_state; /* MCA state for memory errors */
} x86_pkg_t;
extern x86_pkg_t *x86_pkgs; /* root of all CPU packages */
+
+typedef struct x86_topology_parameters
+{
+ uint32_t LLCDepth;
+ uint32_t nCoresSharingLLC;
+ uint32_t nLCPUsSharingLLC;
+ uint32_t maxSharingLLC;
+ uint32_t nLThreadsPerCore;
+ uint32_t nPThreadsPerCore;
+ uint32_t nLCoresPerDie;
+ uint32_t nPCoresPerDie;
+ uint32_t nLDiesPerPackage;
+ uint32_t nPDiesPerPackage;
+ uint32_t nLThreadsPerDie;
+ uint32_t nPThreadsPerDie;
+ uint32_t nLThreadsPerPackage;
+ uint32_t nPThreadsPerPackage;
+ uint32_t nLCoresPerPackage;
+ uint32_t nPCoresPerPackage;
+ uint32_t nPackages;
+} x86_topology_parameters_t;
/* Called after cpu discovery */
extern void cpu_topology_start(void);
-extern int idlehalt;
-
#endif /* _I386_CPU_TOPOLOGY_H_ */
#endif /* KERNEL_PRIVATE */
else if (linesizes[L1D])
info_p->cache_linesize = linesizes[L1D];
else panic("no linesize");
+
+ /*
+ * Extract and publish TLB information.
+ */
+ for (i = 1; i < sizeof(info_p->cache_info); i++) {
+ uint8_t desc = info_p->cache_info[i];
+
+ switch (desc) {
+ case CPUID_CACHE_ITLB_4K_32_4:
+ info_p->cpuid_itlb_small = 32;
+ break;
+ case CPUID_CACHE_ITLB_4M_2:
+ info_p->cpuid_itlb_large = 2;
+ break;
+ case CPUID_CACHE_DTLB_4K_64_4:
+ info_p->cpuid_dtlb_small = 64;
+ break;
+ case CPUID_CACHE_DTLB_4M_8_4:
+ info_p->cpuid_dtlb_large = 8;
+ break;
+ case CPUID_CACHE_DTLB_4M_32_4:
+ info_p->cpuid_dtlb_large = 32;
+ break;
+ case CPUID_CACHE_ITLB_64:
+ info_p->cpuid_itlb_small = 64;
+ info_p->cpuid_itlb_large = 64;
+ break;
+ case CPUID_CACHE_ITLB_128:
+ info_p->cpuid_itlb_small = 128;
+ info_p->cpuid_itlb_large = 128;
+ break;
+ case CPUID_CACHE_ITLB_256:
+ info_p->cpuid_itlb_small = 256;
+ info_p->cpuid_itlb_large = 256;
+ break;
+ case CPUID_CACHE_DTLB_64:
+ info_p->cpuid_dtlb_small = 64;
+ info_p->cpuid_dtlb_large = 64;
+ break;
+ case CPUID_CACHE_DTLB_128:
+ info_p->cpuid_dtlb_small = 128;
+ info_p->cpuid_dtlb_large = 128;
+ break;
+ case CPUID_CACHE_DTLB_256:
+ info_p->cpuid_dtlb_small = 256;
+ info_p->cpuid_dtlb_large = 256;
+ break;
+ case CPUID_CACHE_ITLB_4M2M_7:
+ info_p->cpuid_itlb_large = 7;
+ break;
+ case CPUID_CACHE_DTLB_4K_16_4:
+ info_p->cpuid_dtlb_small = 16;
+ break;
+ case CPUID_CACHE_DTLB_4M2M_32_4:
+ info_p->cpuid_dtlb_large = 32;
+ break;
+ case CPUID_CACHE_ITLB_4K_128_4:
+ info_p->cpuid_itlb_small = 128;
+ break;
+ case CPUID_CACHE_ITLB_4M_8:
+ info_p->cpuid_itlb_large = 8;
+ break;
+ case CPUID_CACHE_DTLB_4K_128_4:
+ info_p->cpuid_dtlb_small = 128;
+ break;
+ case CPUID_CACHE_DTLB_4K_256_4:
+ info_p->cpuid_dtlb_small = 256;
+ break;
+ }
+ }
}
static void
/* Fold extensions into family/model */
if (info_p->cpuid_family == 0x0f)
info_p->cpuid_family += info_p->cpuid_extfamily;
- if (info_p->cpuid_family == 0x0f || info_p->cpuid_family== 0x06)
+ if (info_p->cpuid_family == 0x0f || info_p->cpuid_family == 0x06)
info_p->cpuid_model += (info_p->cpuid_extmodel << 4);
if (info_p->cpuid_features & CPUID_FEATURE_HTT)
quad(cpuid_reg[ecx], cpuid_reg[edx]);
}
- if (info_p->cpuid_extfeatures && CPUID_FEATURE_MONITOR) {
+ if (info_p->cpuid_features & CPUID_FEATURE_MONITOR) {
/*
* Extract the Monitor/Mwait Leaf info:
*/
cpuid_set_cache_info(&cpuid_cpu_info);
+ if (cpuid_cpu_info.core_count == 0) {
+ cpuid_cpu_info.core_count =
+ cpuid_cpu_info.cpuid_cores_per_package;
+ cpuid_cpu_info.thread_count =
+ cpuid_cpu_info.cpuid_logical_per_package;
+ }
+
cpuid_cpu_info.cpuid_model_string = ""; /* deprecated */
}
{CPUID_FEATURE_PDCM, "PDCM"},
{CPUID_FEATURE_SSE4_1, "SSE4.1"},
{CPUID_FEATURE_SSE4_2, "SSE4.2"},
+ {CPUID_FEATURE_xAPIC, "xAPIC"},
{CPUID_FEATURE_POPCNT, "POPCNT"},
{0, 0}
},
i386_cpu_info_t *
cpuid_info(void)
{
- /* Set-up the cpuid_indo stucture lazily */
+ /* Set-up the cpuid_info stucture lazily */
if (cpuid_cpu_infop == NULL) {
cpuid_set_info();
cpuid_cpu_infop = &cpuid_cpu_info;
cpuid_features(void)
{
static int checked = 0;
- char fpu_arg[16] = { 0 };
+ char fpu_arg[20] = { 0 };
(void) cpuid_info();
if (!checked) {
/* check for boot-time fpu limitations */
- if (PE_parse_boot_arg("_fpu", &fpu_arg[0])) {
+ if (PE_parse_boot_argn("_fpu", &fpu_arg[0], sizeof (fpu_arg))) {
printf("limiting fpu features to: %s\n", fpu_arg);
if (!strncmp("387", fpu_arg, sizeof("387")) || !strncmp("mmx", fpu_arg, sizeof("mmx"))) {
printf("no sse or sse2\n");
#define CPUID_FEATURE_CX16 _HBit(13) /* CmpXchg16b instruction */
#define CPUID_FEATURE_xTPR _HBit(14) /* Send Task PRiority msgs */
#define CPUID_FEATURE_PDCM _HBit(15) /* Perf/Debug Capability MSR */
+#define CPUID_FEATURE_DCA _HBit(18) /* Direct Cache Access */
#define CPUID_FEATURE_SSE4_1 _HBit(19) /* Streaming SIMD extensions 4.1 */
-#define CPUID_FEATURE_SSE4_2 _HBit(20) /* Streaming SIMD extensions 4.1 */
+#define CPUID_FEATURE_SSE4_2 _HBit(20) /* Streaming SIMD extensions 4.2 */
+#define CPUID_FEATURE_xAPIC _HBit(21) /* Extended APIC Mode */
#define CPUID_FEATURE_POPCNT _HBit(23) /* POPCNT instruction */
/*
#define CPUID_CACHE_SIZE 16 /* Number of descriptor vales */
#define CPUID_CACHE_NULL 0x00 /* NULL */
-#define CPUID_CACHE_ITLB_4K 0x01 /* Instruction TLB: 4K pages */
-#define CPUID_CACHE_ITLB_4M 0x02 /* Instruction TLB: 4M pages */
-#define CPUID_CACHE_DTLB_4K 0x03 /* Data TLB: 4K pages */
-#define CPUID_CACHE_DTLB_4M 0x04 /* Data TLB: 4M pages */
-#define CPUID_CACHE_ICACHE_8K 0x06 /* Instruction cache: 8K */
-#define CPUID_CACHE_ICACHE_16K 0x08 /* Instruction cache: 16K */
-#define CPUID_CACHE_DCACHE_8K 0x0A /* Data cache: 8K */
-#define CPUID_CACHE_DCACHE_16K 0x0C /* Data cache: 16K */
+#define CPUID_CACHE_ITLB_4K_32_4 0x01 /* Inst TLB: 4K pages, 32 ents, 4-way */
+#define CPUID_CACHE_ITLB_4M_2 0x02 /* Inst TLB: 4M pages, 2 ents */
+#define CPUID_CACHE_DTLB_4K_64_4 0x03 /* Data TLB: 4K pages, 64 ents, 4-way */
+#define CPUID_CACHE_DTLB_4M_8_4 0x04 /* Data TLB: 4M pages, 8 ents, 4-way */
+#define CPUID_CACHE_DTLB_4M_32_4 0x05 /* Data TLB: 4M pages, 32 ents, 4-way */
+#define CPUID_CACHE_L1I_8K 0x06 /* Icache: 8K */
+#define CPUID_CACHE_L1I_16K 0x08 /* Icache: 16K */
+#define CPUID_CACHE_L1I_32K 0x09 /* Icache: 32K, 4-way, 64 bytes */
+#define CPUID_CACHE_L1D_8K 0x0A /* Dcache: 8K */
+#define CPUID_CACHE_L1D_16K 0x0C /* Dcache: 16K */
+#define CPUID_CACHE_L1D_16K_4_32 0x0D /* Dcache: 16K, 4-way, 64 byte, ECC */
+#define CPUID_CACHE_L2_256K_8_64 0x21 /* L2: 256K, 8-way, 64 bytes */
#define CPUID_CACHE_L3_512K 0x22 /* L3: 512K */
#define CPUID_CACHE_L3_1M 0x23 /* L3: 1M */
#define CPUID_CACHE_L3_2M 0x25 /* L3: 2M */
#define CPUID_CACHE_L3_4M 0x29 /* L3: 4M */
-#define CPUID_CACHE_DCACHE_32K 0x2C /* Data cache: 32K, 8-way */
-#define CPUID_CACHE_ICACHE_32K 0x30 /* Instruction cache: 32K, 8-way */
+#define CPUID_CACHE_L1D_32K_8 0x2C /* Dcache: 32K, 8-way, 64 byte */
+#define CPUID_CACHE_L1I_32K_8 0x30 /* Icache: 32K, 8-way */
#define CPUID_CACHE_L2_128K_S4 0x39 /* L2: 128K, 4-way, sectored */
#define CPUID_CACHE_L2_128K_S2 0x3B /* L2: 128K, 2-way, sectored */
#define CPUID_CACHE_L2_256K_S4 0x3C /* L2: 256K, 4-way, sectored */
#define CPUID_CACHE_L2_12M_12_64 0x4C /* L2: 12M, 12-way, 64 bytes */
#define CPUID_CACHE_L2_16M_16_64 0x4D /* L2: 16M, 16-way, 64 bytes */
#define CPUID_CACHE_L2_6M_24_64 0x4E /* L2: 6M, 24-way, 64 bytes */
-#define CPUID_CACHE_ITLB_64 0x50 /* Instruction TLB: 64 entries */
-#define CPUID_CACHE_ITLB_128 0x51 /* Instruction TLB: 128 entries */
-#define CPUID_CACHE_ITLB_256 0x52 /* Instruction TLB: 256 entries */
+#define CPUID_CACHE_ITLB_64 0x50 /* Inst TLB: 64 entries */
+#define CPUID_CACHE_ITLB_128 0x51 /* Inst TLB: 128 entries */
+#define CPUID_CACHE_ITLB_256 0x52 /* Inst TLB: 256 entries */
+#define CPUID_CACHE_ITLB_4M2M_7 0x55 /* Inst TLB: 4M/2M, 7 entries */
#define CPUID_CACHE_DTLB_4M_16_4 0x56 /* Data TLB: 4M, 16 entries, 4-way */
-#define CPUID_CACHE_DTLB_4K_16_4 0x56 /* Data TLB: 4K, 16 entries, 4-way */
+#define CPUID_CACHE_DTLB_4K_16_4 0x57 /* Data TLB: 4K, 16 entries, 4-way */
+#define CPUID_CACHE_DTLB_4M2M_32_4 0x5A /* Data TLB: 4M/2M, 32 entries */
#define CPUID_CACHE_DTLB_64 0x5B /* Data TLB: 64 entries */
#define CPUID_CACHE_DTLB_128 0x5C /* Data TLB: 128 entries */
#define CPUID_CACHE_DTLB_256 0x5D /* Data TLB: 256 entries */
-#define CPUID_CACHE_DCACHE_16K_8_64 0x60 /* Data cache: 16K, 8-way, 64 bytes */
-#define CPUID_CACHE_DCACHE_8K_4_64 0x66 /* Data cache: 8K, 4-way, 64 bytes */
-#define CPUID_CACHE_DCACHE_16K_4_64 0x67 /* Data cache: 16K, 4-way, 64 bytes */
-#define CPUID_CACHE_DCACHE_32K_4_64 0x68 /* Data cache: 32K, 4-way, 64 bytes */
+#define CPUID_CACHE_L1D_16K_8_64 0x60 /* Data cache: 16K, 8-way, 64 bytes */
+#define CPUID_CACHE_L1D_8K_4_64 0x66 /* Data cache: 8K, 4-way, 64 bytes */
+#define CPUID_CACHE_L1D_16K_4_64 0x67 /* Data cache: 16K, 4-way, 64 bytes */
+#define CPUID_CACHE_L1D_32K_4_64 0x68 /* Data cache: 32K, 4-way, 64 bytes */
#define CPUID_CACHE_TRACE_12K_8 0x70 /* Trace cache 12K-uop, 8-way */
#define CPUID_CACHE_TRACE_16K_8 0x71 /* Trace cache 16K-uop, 8-way */
#define CPUID_CACHE_TRACE_32K_8 0x72 /* Trace cache 32K-uop, 8-way */
#define CPUID_CACHE_L2_1M_8_64 0x87 /* L2: 1M, 8-way, 64 bytes */
#define CPUID_CACHE_ITLB_4K_128_4 0xB0 /* ITLB: 4KB, 128 entries, 4-way */
#define CPUID_CACHE_ITLB_4M_4_4 0xB1 /* ITLB: 4MB, 4 entries, 4-way, or */
-#define CPUID_CACHE_ITLB_2M_8_4 0xB1 /* ITLB: 2MB, 8 entries, 4-way */
+#define CPUID_CACHE_ITLB_2M_8_4 0xB1 /* ITLB: 2MB, 8 entries, 4-way, or */
+#define CPUID_CACHE_ITLB_4M_8 0xB1 /* ITLB: 4MB, 8 entries */
+#define CPUID_CACHE_ITLB_4K_64_4 0xB2 /* ITLB: 4KB, 64 entries, 4-way */
#define CPUID_CACHE_DTLB_4K_128_4 0xB3 /* DTLB: 4KB, 128 entries, 4-way */
#define CPUID_CACHE_DTLB_4K_256_4 0xB4 /* DTLB: 4KB, 256 entries, 4-way */
+#define CPUID_CACHE_2TLB_4K_512_4 0xB4 /* 2nd-level TLB: 4KB, 512, 4-way */
+#define CPUID_CACHE_L3_512K_4_64 0xD0 /* L3: 512KB, 4-way, 64 bytes */
+#define CPUID_CACHE_L3_1M_4_64 0xD1 /* L3: 1M, 4-way, 64 bytes */
+#define CPUID_CACHE_L3_2M_4_64 0xD2 /* L3: 2M, 4-way, 64 bytes */
+#define CPUID_CACHE_L3_1M_8_64 0xD6 /* L3: 1M, 8-way, 64 bytes */
+#define CPUID_CACHE_L3_2M_8_64 0xD7 /* L3: 2M, 8-way, 64 bytes */
+#define CPUID_CACHE_L3_4M_8_64 0xD8 /* L3: 4M, 8-way, 64 bytes */
+#define CPUID_CACHE_L3_1M5_12_64 0xDC /* L3: 1.5M, 12-way, 64 bytes */
+#define CPUID_CACHE_L3_3M_12_64 0xDD /* L3: 3M, 12-way, 64 bytes */
+#define CPUID_CACHE_L3_6M_12_64 0xDE /* L3: 6M, 12-way, 64 bytes */
+#define CPUID_CACHE_L3_2M_16_64 0xE2 /* L3: 2M, 16-way, 64 bytes */
+#define CPUID_CACHE_L3_4M_16_64 0xE3 /* L3: 4M, 16-way, 64 bytes */
+#define CPUID_CACHE_L3_8M_16_64 0xE4 /* L3: 8M, 16-way, 64 bytes */
#define CPUID_CACHE_PREFETCH_64 0xF0 /* 64-Byte Prefetching */
#define CPUID_CACHE_PREFETCH_128 0xF1 /* 128-Byte Prefetching */
#define CPUID_MWAIT_EXTENSION _Bit(0) /* enumeration of WMAIT extensions */
#define CPUID_MWAIT_BREAK _Bit(1) /* interrupts are break events */
+#define CPUID_MODEL_YONAH 14
+#define CPUID_MODEL_MEROM 15
+#define CPUID_MODEL_PENRYN 23
+#define CPUID_MODEL_NEHALEM 26
+
#ifndef ASSEMBLER
#include <stdint.h>
#include <mach/mach_types.h>
/* Virtual and physical address aize: */
uint32_t cpuid_address_bits_physical;
uint32_t cpuid_address_bits_virtual;
+
+ uint32_t cpuid_microcode_version;
+
+ /* Numbers of tlbs per processor */
+ uint32_t cpuid_itlb_small;
+ uint32_t cpuid_dtlb_small;
+ uint32_t cpuid_itlb_large;
+ uint32_t cpuid_dtlb_large;
+
+ uint32_t core_count;
+ uint32_t thread_count;
+
} i386_cpu_info_t;
#ifdef __cplusplus
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <i386/eflags.h>
#include <i386/trap.h>
#include <i386/pmCPU.h>
-#include <i386/hpet.h>
typedef addr64_t db_addr_t; /* address - unsigned */
typedef uint64_t db_expr_t; /* expression */
char *modif);
extern void db_apic(db_expr_t addr, boolean_t have_addr, db_expr_t count,
char *modif);
-extern void db_display_hpet(hpetReg_t *);
-extern void db_hpet(db_expr_t addr, boolean_t have_addr, db_expr_t count,
- char *modif);
/* macros for printing OS server dependent task name */
int cpu);
#if MACH_KDB
-extern void db_getpmgr(pmData_t *pmj);
extern void db_chkpmgr(void);
#endif /* MACH_KDB */
extern void db_pmgr(db_expr_t addr, int have_addr, db_expr_t count, char * modif);
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <i386/cpuid.h>
#include <i386/Diagnostics.h>
#include <i386/pmCPU.h>
-#include <i386/hpet.h>
#include <mach/i386/vm_param.h>
#include <mach/i386/thread_status.h>
#include <machine/commpage.h>
DECLARE("CPU_UBER_ARG_STORE_VALID",
offsetof(cpu_data_t *, cpu_uber_arg_store_valid));
+ DECLARE("CPU_NANOTIME",
+ offsetof(cpu_data_t *, cpu_nanotime));
+
DECLARE("CPU_DR7",
offsetof(cpu_data_t *, cpu_dr7));
DECLARE("OnProc", OnProc);
-
- DECLARE("GCAP_ID", offsetof(hpetReg_t *, GCAP_ID));
- DECLARE("GEN_CONF", offsetof(hpetReg_t *, GEN_CONF));
- DECLARE("GINTR_STA", offsetof(hpetReg_t *, GINTR_STA));
- DECLARE("MAIN_CNT", offsetof(hpetReg_t *, MAIN_CNT));
- DECLARE("TIM0_CONF", offsetof(hpetReg_t *, TIM0_CONF));
- DECLARE("TIM_CONF", TIM_CONF);
- DECLARE("Tn_INT_ENB_CNF", Tn_INT_ENB_CNF);
- DECLARE("TIM0_COMP", offsetof(hpetReg_t *, TIM0_COMP));
- DECLARE("TIM_COMP", TIM_COMP);
- DECLARE("TIM1_CONF", offsetof(hpetReg_t *, TIM1_CONF));
- DECLARE("TIM1_COMP", offsetof(hpetReg_t *, TIM1_COMP));
- DECLARE("TIM2_CONF", offsetof(hpetReg_t *, TIM2_CONF));
- DECLARE("TIM2_COMP", offsetof(hpetReg_t *, TIM2_COMP));
-
#if CONFIG_DTRACE
DECLARE("LS_LCK_MTX_LOCK_ACQUIRE", LS_LCK_MTX_LOCK_ACQUIRE);
DECLARE("LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE", LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE);
+++ /dev/null
-/*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <string.h>
-#include <mach/vm_param.h>
-#include <mach/vm_prot.h>
-#include <mach/machine.h>
-#include <mach/time_value.h>
-#include <kern/spl.h>
-#include <kern/assert.h>
-#include <kern/debug.h>
-#include <kern/misc_protos.h>
-#include <kern/startup.h>
-#include <kern/clock.h>
-#include <kern/cpu_data.h>
-#include <kern/processor.h>
-#include <vm/vm_page.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <i386/pmap.h>
-#include <i386/misc_protos.h>
-#include <i386/cpuid.h>
-#include <i386/mp.h>
-#include <i386/machine_cpu.h>
-#include <i386/machine_routines.h>
-#include <i386/io_map_entries.h>
-#include <architecture/i386/pio.h>
-#include <i386/cpuid.h>
-#include <i386/apic.h>
-#include <i386/tsc.h>
-#include <i386/hpet.h>
-#include <i386/pmCPU.h>
-#include <i386/cpu_topology.h>
-#include <i386/cpu_threads.h>
-#include <pexpert/device_tree.h>
-#if MACH_KDB
-#include <i386/db_machdep.h>
-#endif
-#if MACH_KDB
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
-#endif /* MACH_KDB */
-#include <ddb/tr.h>
-
-/* Decimal powers: */
-#define kilo (1000ULL)
-#define Mega (kilo * kilo)
-#define Giga (kilo * Mega)
-#define Tera (kilo * Giga)
-#define Peta (kilo * Tera)
-
-uint32_t hpetArea = 0;
-uint32_t hpetAreap = 0;
-uint64_t hpetFemto = 0;
-uint64_t hpetFreq = 0;
-uint64_t hpetCvt = 0; /* (TAKE OUT LATER) */
-uint64_t hpetCvtt2n = 0;
-uint64_t hpetCvtn2t = 0;
-uint64_t tsc2hpet = 0;
-uint64_t hpet2tsc = 0;
-uint64_t bus2hpet = 0;
-uint64_t hpet2bus = 0;
-
-uint32_t rcbaArea = 0;
-uint32_t rcbaAreap = 0;
-
-static int (*hpet_req)(uint32_t apicid, void *arg, hpetRequest_t *hpet) = NULL;
-static void *hpet_arg = NULL;
-
-#if DEBUG
-#define DBG(x...) kprintf("DBG: " x)
-#else
-#define DBG(x...)
-#endif
-
-int
-hpet_register_callback(int (*hpet_reqst)(uint32_t apicid,
- void *arg,
- hpetRequest_t *hpet),
- void *arg)
-{
- hpet_req = hpet_reqst;
- hpet_arg = arg;
- return(0);
-}
-
-/*
- * This routine is called to obtain an HPET and have it assigned
- * to a CPU. It returns 0 if successful and non-zero if one could
- * not be assigned.
- */
-int
-hpet_request(uint32_t cpu)
-{
- hpetRequest_t hpetReq;
- int rc;
- x86_lcpu_t *lcpu;
- x86_core_t *core;
- x86_pkg_t *pkg;
- boolean_t enabled;
-
- if (hpet_req == NULL) {
- return(-1);
- }
-
- /*
- * Deal with the case where the CPU # passed in is past the
- * value specified in cpus=n in boot-args.
- */
- if (cpu >= real_ncpus) {
- enabled = ml_set_interrupts_enabled(FALSE);
- lcpu = cpu_to_lcpu(cpu);
- if (lcpu != NULL) {
- core = lcpu->core;
- pkg = core->package;
-
- if (lcpu->primary) {
- pkg->flags |= X86PKG_FL_HAS_HPET;
- }
- }
-
- ml_set_interrupts_enabled(enabled);
- return(0);
- }
-
- rc = (*hpet_req)(ml_get_apicid(cpu), hpet_arg, &hpetReq);
- if (rc != 0) {
- return(rc);
- }
-
- enabled = ml_set_interrupts_enabled(FALSE);
- lcpu = cpu_to_lcpu(cpu);
- core = lcpu->core;
- pkg = core->package;
-
- /*
- * Compute the address of the HPET.
- */
- core->Hpet = (hpetTimer_t *)((uint8_t *)hpetArea + hpetReq.hpetOffset);
- core->HpetVec = hpetReq.hpetVector;
-
- /*
- * Enable interrupts
- */
- core->Hpet->Config |= Tn_INT_ENB_CNF;
-
- /*
- * Save the configuration
- */
- core->HpetCfg = core->Hpet->Config;
- core->HpetCmp = 0;
-
- /*
- * If the CPU is the "primary" for the package, then
- * add the HPET to the package too.
- */
- if (lcpu->primary) {
- pkg->Hpet = core->Hpet;
- pkg->HpetCfg = core->HpetCfg;
- pkg->HpetCmp = core->HpetCmp;
- pkg->flags |= X86PKG_FL_HAS_HPET;
- }
-
- ml_set_interrupts_enabled(enabled);
-
- return(0);
-}
-
-/*
- * Map the RCBA area.
- */
-static void
-map_rcbaArea(void)
-{
- /*
- * Get RCBA area physical address and map it
- */
- outl(cfgAdr, lpcCfg | (0xF0 & 0xFC));
- rcbaAreap = inl(cfgDat | (0xF0 & 0x03));
- rcbaArea = io_map_spec(rcbaAreap & -4096, PAGE_SIZE * 4, VM_WIMG_IO);
- kprintf("RCBA: vaddr = %08X, paddr = %08X\n", rcbaArea, rcbaAreap);
-}
-
-/*
- * Initialize the HPET
- */
-void
-hpet_init(void)
-{
- unsigned int *xmod;
-
- map_rcbaArea();
-
- /*
- * Is the HPET memory already enabled?
- * If not, set address and enable.
- */
- xmod = (uint32_t *)(rcbaArea + 0x3404); /* Point to the HPTC */
- uint32_t hptc = *xmod; /* Get HPET config */
- DBG(" current RCBA.HPTC: %08X\n", *xmod);
- if(!(hptc & hptcAE)) {
- DBG("HPET memory is not enabled, "
- "enabling and assigning to 0xFED00000 (hope that's ok)\n");
- *xmod = (hptc & ~3) | hptcAE;
- }
-
- /*
- * Get physical address of HPET and map it.
- */
- hpetAreap = hpetAddr | ((hptc & 3) << 12);
- hpetArea = io_map_spec(hpetAreap & -4096, PAGE_SIZE * 4, VM_WIMG_IO);
- kprintf("HPET: vaddr = %08X, paddr = %08X\n", hpetArea, hpetAreap);
-
- /*
- * Extract the HPET tick rate.
- * The period of the HPET is reported in femtoseconds (10**-15s)
- * and convert to frequency in hertz.
- */
- hpetFemto = (uint32_t)(((hpetReg_t *)hpetArea)->GCAP_ID >> 32);
- hpetFreq = (1 * Peta) / hpetFemto;
-
- /*
- * The conversion factor is the number of nanoseconds per HPET tick
- * with about 32 bits of fraction. The value is converted to a
- * base-2 fixed point number. To convert from HPET to nanoseconds,
- * multiply the value by the conversion factor using 96-bit arithmetic,
- * then shift right 32 bits. If the value is known to be small,
- * 64-bit arithmetic will work.
- */
-
- /*
- * Begin conversion of base 10 femtoseconds to base 2, calculate:
- * - HPET ticks to nanoseconds conversion in base 2 fraction (* 2**32)
- * - nanoseconds to HPET ticks conversion
- */
- hpetCvtt2n = (uint64_t)hpetFemto << 32;
- hpetCvtt2n = hpetCvtt2n / 1000000ULL;
- hpetCvtn2t = 0xFFFFFFFFFFFFFFFFULL / hpetCvtt2n;
- kprintf("HPET: Frequency = %6d.%04dMHz, "
- "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X\n",
- (uint32_t)(hpetFreq / Mega), (uint32_t)(hpetFreq % Mega),
- (uint32_t)(hpetCvtt2n >> 32), (uint32_t)hpetCvtt2n,
- (uint32_t)(hpetCvtn2t >> 32), (uint32_t)hpetCvtn2t);
-
-
- /* (TAKE OUT LATER)
- * Begin conversion of base 10 femtoseconds to base 2
- * HPET ticks to nanoseconds in base 2 fraction (times 1048576)
- */
- hpetCvt = (uint64_t)hpetFemto << 20;
- hpetCvt = hpetCvt / 1000000ULL;
-
- /* Calculate conversion from TSC to HPET */
- tsc2hpet = tmrCvt(tscFCvtt2n, hpetCvtn2t);
- DBG(" CVT: TSC to HPET = %08X.%08X\n",
- (uint32_t)(tsc2hpet >> 32), (uint32_t)tsc2hpet);
-
- /* Calculate conversion from HPET to TSC */
- hpet2tsc = tmrCvt(hpetCvtt2n, tscFCvtn2t);
- DBG(" CVT: HPET to TSC = %08X.%08X\n",
- (uint32_t)(hpet2tsc >> 32), (uint32_t)hpet2tsc);
-
- /* Calculate conversion from BUS to HPET */
- bus2hpet = tmrCvt(busFCvtt2n, hpetCvtn2t);
- DBG(" CVT: BUS to HPET = %08X.%08X\n",
- (uint32_t)(bus2hpet >> 32), (uint32_t)bus2hpet);
-
- /* Calculate conversion from HPET to BUS */
- hpet2bus = tmrCvt(hpetCvtt2n, busFCvtn2t);
- DBG(" CVT: HPET to BUS = %08X.%08X\n",
- (uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus);
-
-#if MACH_KDB
- db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */
-#endif
-}
-
-/*
- * This routine is used to get various information about the HPET
- * without having to export gobs of globals. It fills in a data
- * structure with the info.
- */
-void
-hpet_get_info(hpetInfo_t *info)
-{
- info->hpetCvtt2n = hpetCvtt2n;
- info->hpetCvtn2t = hpetCvtn2t;
- info->tsc2hpet = tsc2hpet;
- info->hpet2tsc = hpet2tsc;
- info->bus2hpet = bus2hpet;
- info->hpet2bus = hpet2bus;
- /*
- * XXX
- * We're repurposing the rcbaArea so we can use the HPET.
- * Eventually we'll rename this correctly.
- */
- info->rcbaArea = hpetArea;
- info->rcbaAreap = hpetAreap;
-}
-
-
-/*
- * This routine is called by the HPET driver
- * when it assigns an HPET timer to a processor.
- *
- * XXX with the new callback into the HPET driver,
- * this routine will be deprecated.
- */
-void
-ml_hpet_cfg(uint32_t cpu, uint32_t hpetVect)
-{
- uint64_t *hpetVaddr;
- hpetTimer_t *hpet;
- x86_lcpu_t *lcpu;
- x86_core_t *core;
- x86_pkg_t *pkg;
- boolean_t enabled;
-
- if(cpu > 1) {
- panic("ml_hpet_cfg: invalid cpu = %d\n", cpu);
- }
-
- lcpu = cpu_to_lcpu(cpu);
- core = lcpu->core;
- pkg = core->package;
-
- /*
- * Only deal with the primary CPU for the package.
- */
- if (!lcpu->primary)
- return;
-
- enabled = ml_set_interrupts_enabled(FALSE);
-
- /* Calculate address of the HPET for this processor */
- hpetVaddr = (uint64_t *)(((uint32_t)&(((hpetReg_t *)hpetArea)->TIM1_CONF)) + (cpu << 5));
- hpet = (hpetTimer_t *)hpetVaddr;
-
- DBG("ml_hpet_cfg: HPET for cpu %d at %p, vector = %d\n",
- cpu, hpetVaddr, hpetVect);
-
- /* Save the address and vector of the HPET for this processor */
- core->Hpet = hpet;
- core->HpetVec = hpetVect;
-
- /*
- * Enable interrupts
- */
- core->Hpet->Config |= Tn_INT_ENB_CNF;
-
- /* Save the configuration */
- core->HpetCfg = core->Hpet->Config;
- core->HpetCmp = 0;
-
- /*
- * We're only doing this for the primary CPU, so go
- * ahead and add the HPET to the package too.
- */
- pkg->Hpet = core->Hpet;
- pkg->HpetVec = core->HpetVec;
- pkg->HpetCfg = core->HpetCfg;
- pkg->HpetCmp = core->HpetCmp;
- pkg->flags |= X86PKG_FL_HAS_HPET;
-
- ml_set_interrupts_enabled(enabled);
-}
-
-/*
- * This is the HPET interrupt handler.
- *
- * It just hands off to the power management code so that the
- * appropriate things get done there.
- */
-int
-HPETInterrupt(void)
-{
-
- /* All we do here is to bump the count */
- x86_package()->HpetInt++;
-
- /*
- * Let power management do it's thing.
- */
- pmHPETInterrupt();
-
- /* Return and show that the 'rupt has been handled... */
- return 1;
-}
-
-
-static hpetReg_t saved_hpet;
-
-void
-hpet_save(void)
-{
- hpetReg_t *from = (hpetReg_t *) hpetArea;
- hpetReg_t *to = &saved_hpet;
-
- to->GEN_CONF = from->GEN_CONF;
- to->TIM0_CONF = from->TIM0_CONF;
- to->TIM0_COMP = from->TIM0_COMP;
- to->TIM1_CONF = from->TIM1_CONF;
- to->TIM1_COMP = from->TIM1_COMP;
- to->TIM2_CONF = from->TIM2_CONF;
- to->TIM2_COMP = from->TIM2_COMP;
- to->MAIN_CNT = from->MAIN_CNT;
-}
-
-void
-hpet_restore(void)
-{
- hpetReg_t *from = &saved_hpet;
- hpetReg_t *to = (hpetReg_t *) hpetArea;
-
- /*
- * Is the HPET memory already enabled?
- * If not, set address and enable.
- */
- uint32_t *hptcp = (uint32_t *)(rcbaArea + 0x3404);
- uint32_t hptc = *hptcp;
- if(!(hptc & hptcAE)) {
- DBG("HPET memory is not enabled, "
- "enabling and assigning to 0xFED00000 (hope that's ok)\n");
- *hptcp = (hptc & ~3) | hptcAE;
- }
-
- to->GEN_CONF = from->GEN_CONF & ~1;
-
- to->TIM0_CONF = from->TIM0_CONF;
- to->TIM0_COMP = from->TIM0_COMP;
- to->TIM1_CONF = from->TIM1_CONF;
- to->TIM1_COMP = from->TIM1_COMP;
- to->TIM2_CONF = from->TIM2_CONF;
- to->TIM2_COMP = from->TIM2_COMP;
- to->GINTR_STA = -1ULL;
- to->MAIN_CNT = from->MAIN_CNT;
-
- to->GEN_CONF = from->GEN_CONF;
-}
-
-/*
- * Read the HPET timer
- *
- */
-uint64_t
-rdHPET(void)
-{
- hpetReg_t *hpetp = (hpetReg_t *) hpetArea;
- volatile uint32_t *regp = (uint32_t *) &hpetp->MAIN_CNT;
- uint32_t high;
- uint32_t low;
-
- do {
- high = *(regp + 1);
- low = *regp;
- } while (high != *(regp + 1));
-
- return (((uint64_t) high) << 32) | low;
-}
-
-#if MACH_KDB
-
-#define HI32(x) ((uint32_t)(((x) >> 32) & 0xFFFFFFFF))
-#define LO32(x) ((uint32_t)((x) & 0xFFFFFFFF))
-
-/*
- * Displays HPET memory mapped area
- * hp
- */
-void
-db_hpet(__unused db_expr_t addr, __unused int have_addr, __unused db_expr_t count, __unused char *modif)
-{
-
- db_display_hpet((hpetReg_t *) hpetArea); /* Dump out the HPET
- * stuff */
- return;
-}
-
-void
-db_display_hpet(hpetReg_t *hpt)
-{
- uint64_t cmain;
-
- cmain = hpt->MAIN_CNT; /* Get the main timer */
-
- /* General capabilities */
- db_printf(" GCAP_ID = %08X.%08X\n",
- HI32(hpt->GCAP_ID), LO32(hpt->GCAP_ID));
- /* General configuration */
- db_printf(" GEN_CONF = %08X.%08X\n",
- HI32(hpt->GEN_CONF), LO32(hpt->GEN_CONF));
- /* General Interrupt status */
- db_printf("GINTR_STA = %08X.%08X\n",
- HI32(hpt->GINTR_STA), LO32(hpt->GINTR_STA));
- /* Main counter */
- db_printf(" MAIN_CNT = %08X.%08X\n",
- HI32(cmain), LO32(cmain));
- /* Timer 0 config and cap */
- db_printf("TIM0_CONF = %08X.%08X\n",
- HI32(hpt->TIM0_CONF), LO32(hpt->TIM0_CONF));
- /* Timer 0 comparator */
- db_printf("TIM0_COMP = %08X.%08X\n",
- HI32(hpt->TIM0_COMP), LO32(hpt->TIM0_COMP));
- /* Timer 1 config and cap */
- db_printf("TIM0_CONF = %08X.%08X\n",
- HI32(hpt->TIM1_CONF), LO32(hpt->TIM1_CONF));
- /* Timer 1 comparator */
- db_printf("TIM1_COMP = %08X.%08X\n",
- HI32(hpt->TIM1_COMP), LO32(hpt->TIM1_COMP));
- /* Timer 2 config and cap */
- db_printf("TIM2_CONF = %08X.%08X\n",
- HI32(hpt->TIM2_CONF), LO32(hpt->TIM2_CONF));
- /* Timer 2 comparator */
- db_printf("TIM2_COMP = %08X.%08X\n",
- HI32(hpt->TIM2_COMP), LO32(hpt->TIM2_COMP));
-
- db_printf("\nHPET Frequency = %d.%05dMHz\n",
- (uint32_t) (hpetFreq / 1000000), (uint32_t) (hpetFreq % 1000000));
-}
-#endif
+++ /dev/null
-/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifdef KERNEL_PRIVATE
-#ifndef _I386_HPET_H_
-#define _I386_HPET_H_
-
-/*
- * HPET kernel functions to support the HPET KEXT and the
- * power management KEXT.
- */
-
-
-/*
- * Memory mapped registers for the HPET
- */
-typedef struct hpetReg {
- uint64_t GCAP_ID; /* General capabilities */
- uint64_t rsv1;
- uint64_t GEN_CONF; /* General configuration */
- uint64_t rsv2;
- uint64_t GINTR_STA; /* General Interrupt status */
- uint64_t rsv3[25];
- uint64_t MAIN_CNT; /* Main counter */
- uint64_t rsv4;
- uint64_t TIM0_CONF; /* Timer 0 config and cap */
-#define TIM_CONF 0
-#define Tn_INT_ENB_CNF 4
- uint64_t TIM0_COMP; /* Timer 0 comparator */
-#define TIM_COMP 8
- uint64_t rsv5[2];
- uint64_t TIM1_CONF; /* Timer 1 config and cap */
- uint64_t TIM1_COMP; /* Timer 1 comparator */
- uint64_t rsv6[2];
- uint64_t TIM2_CONF; /* Timer 2 config and cap */
- uint64_t TIM2_COMP; /* Timer 2 comparator */
- uint64_t rsv7[2];
-} hpetReg;
-typedef struct hpetReg hpetReg_t;
-
-typedef struct hpetTimer {
- uint64_t Config; /* Timer config and capabilities */
- uint64_t Compare; /* Timer comparitor */
-} hpetTimer_t;
-
-struct hpetInfo
-{
- uint64_t hpetCvtt2n;
- uint64_t hpetCvtn2t;
- uint64_t tsc2hpet;
- uint64_t hpet2tsc;
- uint64_t bus2hpet;
- uint64_t hpet2bus;
- uint32_t rcbaArea;
- uint32_t rcbaAreap;
-};
-typedef struct hpetInfo hpetInfo_t;
-
-struct hpetRequest
-{
- uint32_t flags;
- uint32_t hpetOffset;
- uint32_t hpetVector;
-};
-typedef struct hpetRequest hpetRequest_t;
-
-#define HPET_REQFL_64BIT 0x00000001 /* Timer is 64 bits */
-
-extern uint64_t hpetFemto;
-extern uint64_t hpetFreq;
-extern uint64_t hpetCvtt2n;
-extern uint64_t hpetCvtn2t;
-extern uint64_t tsc2hpet;
-extern uint64_t hpet2tsc;
-extern uint64_t bus2hpet;
-extern uint64_t hpet2bus;
-
-extern uint32_t rcbaArea;
-extern uint32_t rcbaAreap;
-
-extern void map_rcbaAread(void);
-extern void hpet_init(void);
-
-extern void hpet_save(void);
-extern void hpet_restore(void);
-
-#ifdef XNU_KERNEL_PRIVATE
-extern int HPETInterrupt(void);
-#endif
-
-extern int hpet_register_callback(int (*hpet_reqst)(uint32_t apicid, void *arg, hpetRequest_t *hpet), void *arg);
-extern int hpet_request(uint32_t cpu);
-
-extern uint64_t rdHPET(void);
-extern void hpet_get_info(hpetInfo_t *info);
-
-#define hpetAddr 0xFED00000
-#define hptcAE 0x80
-
-#endif /* _I386_HPET_H_ */
-
-#endif /* KERNEL_PRIVATE */
+++ /dev/null
-/*
- * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- *
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- *
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- *
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _I386_HW_DEFS_H_
-#define _I386_HW_DEFS_H_
-
-
-#define pmMwaitC1 0x00
-#define pmMwaitC2 0x10
-#define pmMwaitC3 0x20
-#define pmMwaitC4 0x30
-#define pmMwaitBrInt 0x1
-
-#define pmBase 0x400
-#define pmCtl1 0x04
-#define pmCtl2 0x20
-#define pmC3Res 0x54
-#define pmStatus 0x00
-#define msrTSC 0x10
-
-#endif /* _I386_HW_DEFS_H_ */
/*
- * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <i386/Diagnostics.h>
#include <i386/pmCPU.h>
#include <i386/tsc.h>
-#include <i386/hpet.h>
#include <i386/locks.h> /* LcksOpts */
#if MACH_KDB
#include <ddb/db_aout.h>
/* setup debugging output if one has been chosen */
PE_init_kprintf(FALSE);
- if (!PE_parse_boot_arg("diag", &dgWork.dgFlags))
+ if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags)))
dgWork.dgFlags = 0;
serialmode = 0;
- if(PE_parse_boot_arg("serial", &serialmode)) {
+ if(PE_parse_boot_argn("serial", &serialmode, sizeof (serialmode))) {
/* We want a serial keyboard and/or console */
kprintf("Serial mode specified: %08X\n", serialmode);
}
kprintf("version_variant = %s\n", version_variant);
kprintf("version = %s\n", version);
- if (!PE_parse_boot_arg("maxmem", &maxmem))
- maxmemtouse=0;
+ if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem)))
+ maxmemtouse = 0;
else
maxmemtouse = ((uint64_t)maxmem) * (uint64_t)(1024 * 1024);
- if (PE_parse_boot_arg("cpus", &cpus)) {
+ if (PE_parse_boot_argn("cpus", &cpus, sizeof (cpus))) {
if ((0 < cpus) && (cpus < max_ncpus))
max_ncpus = cpus;
}
/*
* debug support for > 4G systems
*/
- if (!PE_parse_boot_arg("himemory_mode", &vm_himemory_mode))
+ if (!PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof (vm_himemory_mode)))
vm_himemory_mode = 0;
if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof (fidn)))
boolean_t IA32e = FALSE;
if (cpuid_extfeatures() & CPUID_EXTFEATURE_EM64T) {
kprintf("EM64T supported");
- if (PE_parse_boot_arg("-legacy", &legacy_mode)) {
+ if (PE_parse_boot_argn("-legacy", &legacy_mode, sizeof (legacy_mode))) {
kprintf(" but legacy mode forced\n");
} else {
IA32e = TRUE;
nx_enabled = 0;
/* Obtain "lcks" options:this currently controls lock statistics */
- if (!PE_parse_boot_arg("lcks", &LcksOpts))
+ if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
LcksOpts = 0;
/*
*/
i386_vm_init(maxmemtouse, IA32e, kernelBootArgs);
- if ( ! PE_parse_boot_arg("novmx", &noVMX))
+ if ( ! PE_parse_boot_argn("novmx", &noVMX, sizeof (noVMX)))
noVMX = 0; /* OK to support Altivec in rosetta? */
tsc_init();
- hpet_init();
power_management_init();
PE_init_platform(TRUE, kernelBootArgs);
mov %edx,%edi
rdtsc /* read cyclecount into %edx:%eax */
+ lfence
addl %ecx,%eax /* fetch and timeout */
adcl $0,%edx /* add carry */
mov %edx,%ecx
* Here after spinning INNER_LOOP_COUNT times, check for timeout
*/
rdtsc /* cyclecount into %edx:%eax */
+ lfence
cmpl %ecx,%edx /* compare high-order 32-bits */
jb 4b /* continue spinning if less, or */
cmpl %ebx,%eax /* compare low-order 32-bits */
* Compute the memory size.
*/
- if ((1 == vm_himemory_mode) || PE_parse_boot_arg("-x", &safeboot)) {
+ if ((1 == vm_himemory_mode) || PE_parse_boot_argn("-x", &safeboot, sizeof (safeboot))) {
maxpg = 1 << (32 - I386_PGSHIFT);
}
avail_remaining = 0;
kprintf("Physical memory %llu MB\n", sane_size/MEG);
- if (!PE_parse_boot_arg("max_valid_dma_addr", &maxdmaaddr))
+ if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr)))
max_valid_dma_address = 1024ULL * 1024ULL * 4096ULL;
else
max_valid_dma_address = ((uint64_t) maxdmaaddr) * 1024ULL * 1024ULL;
- if (!PE_parse_boot_arg("maxbouncepool", &maxbouncepoolsize))
+ if (!PE_parse_boot_argn("maxbouncepool", &maxbouncepoolsize, sizeof (maxbouncepoolsize)))
maxbouncepoolsize = MAXBOUNCEPOOL;
else
maxbouncepoolsize = maxbouncepoolsize * (1024 * 1024);
* in order to correctly determine the size of the mbuf pool
* that will be reserved
*/
- if (!PE_parse_boot_arg("maxloreserve", &maxloreserve))
+ if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve)))
maxloreserve = MAXLORESERVE + bsd_mbuf_cluster_reserve();
else
maxloreserve = maxloreserve * (1024 * 1024);
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/kern_return.h>
+
+#include <kern/kern_types.h>
+#include <kern/cpu_number.h>
+#include <kern/cpu_data.h>
+#include <kern/assert.h>
+#include <kern/machine.h>
+
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+
+#include <i386/lapic.h>
+#include <i386/cpuid.h>
+#include <i386/proc_reg.h>
+#include <i386/machine_cpu.h>
+#include <i386/misc_protos.h>
+#include <i386/mp.h>
+#include <i386/mtrr.h>
+#include <i386/postcode.h>
+#include <i386/cpu_threads.h>
+#include <i386/trap.h>
+#include <i386/machine_routines.h>
+#include <i386/machine_check.h>
+
+#if MACH_KDB
+#include <machine/db_machdep.h>
+#endif
+
+#include <sys/kdebug.h>
+
+#if MP_DEBUG
+#define PAUSE delay(1000000)
+#define DBG(x...) kprintf(x)
+#else
+#define DBG(x...)
+#define PAUSE
+#endif /* MP_DEBUG */
+
+/* Initialize lapic_id so cpu_number() works on non SMP systems */
+unsigned long lapic_id_initdata = 0;
+unsigned long lapic_id = (unsigned long)&lapic_id_initdata;
+vm_offset_t lapic_start;
+
+static i386_intr_func_t lapic_intr_func[LAPIC_FUNC_TABLE_SIZE];
+
+/* TRUE if local APIC was enabled by the OS not by the BIOS */
+static boolean_t lapic_os_enabled = FALSE;
+
+/* Base vector for local APIC interrupt sources */
+int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
+
+int lapic_to_cpu[MAX_CPUS];
+int cpu_to_lapic[MAX_CPUS];
+
+static void
+lapic_cpu_map_init(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_CPUS; i++) {
+ lapic_to_cpu[i] = -1;
+ cpu_to_lapic[i] = -1;
+ }
+}
+
+void
+lapic_cpu_map(int apic_id, int cpu)
+{
+ cpu_to_lapic[cpu] = apic_id;
+ lapic_to_cpu[apic_id] = cpu;
+}
+
+/*
+ * Retrieve the local apic ID a cpu.
+ *
+ * Returns the local apic ID for the given processor.
+ * If the processor does not exist or apic not configured, returns -1.
+ */
+
+uint32_t
+ml_get_apicid(uint32_t cpu)
+{
+ if(cpu >= (uint32_t)MAX_CPUS)
+ return 0xFFFFFFFF; /* Return -1 if cpu too big */
+
+ /* Return the apic ID (or -1 if not configured) */
+ return (uint32_t)cpu_to_lapic[cpu];
+
+}
+
+#ifdef MP_DEBUG
+static void
+lapic_cpu_map_dump(void)
+{
+ int i;
+
+ for (i = 0; i < MAX_CPUS; i++) {
+ if (cpu_to_lapic[i] == -1)
+ continue;
+ kprintf("cpu_to_lapic[%d]: %d\n",
+ i, cpu_to_lapic[i]);
+ }
+ for (i = 0; i < MAX_CPUS; i++) {
+ if (lapic_to_cpu[i] == -1)
+ continue;
+ kprintf("lapic_to_cpu[%d]: %d\n",
+ i, lapic_to_cpu[i]);
+ }
+}
+#endif /* MP_DEBUG */
+
+void
+lapic_init(void)
+{
+ int result;
+ vm_map_entry_t entry;
+ uint32_t lo;
+ uint32_t hi;
+ boolean_t is_boot_processor;
+ boolean_t is_lapic_enabled;
+ vm_offset_t lapic_base;
+
+ /* Examine the local APIC state */
+ rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+ is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
+ is_lapic_enabled = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
+ lapic_base = (lo & MSR_IA32_APIC_BASE_BASE);
+ kprintf("MSR_IA32_APIC_BASE 0x%x %s %s\n", lapic_base,
+ is_lapic_enabled ? "enabled" : "disabled",
+ is_boot_processor ? "BSP" : "AP");
+ if (!is_boot_processor || !is_lapic_enabled)
+ panic("Unexpected local APIC state\n");
+
+ /* Establish a map to the local apic */
+ lapic_start = vm_map_min(kernel_map);
+ result = vm_map_find_space(kernel_map,
+ (vm_map_address_t *) &lapic_start,
+ round_page(LAPIC_SIZE), 0,
+ VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
+ if (result != KERN_SUCCESS) {
+ panic("smp_init: vm_map_find_entry FAILED (err=%d)", result);
+ }
+ vm_map_unlock(kernel_map);
+/* Map in the local APIC non-cacheable, as recommended by Intel
+ * in section 8.4.1 of the "System Programming Guide".
+ */
+ pmap_enter(pmap_kernel(),
+ lapic_start,
+ (ppnum_t) i386_btop(lapic_base),
+ VM_PROT_READ|VM_PROT_WRITE,
+ VM_WIMG_IO,
+ TRUE);
+ lapic_id = (unsigned long)(lapic_start + LAPIC_ID);
+
+ if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) {
+ printf("Local APIC version 0x%x, 0x14 or greater expected\n",
+ (LAPIC_READ(VERSION)&LAPIC_VERSION_MASK));
+ }
+
+ /* Set up the lapic_id <-> cpu_number map and add this boot processor */
+ lapic_cpu_map_init();
+ lapic_cpu_map((LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0);
+ kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]);
+}
+
+
+static int
+lapic_esr_read(void)
+{
+ /* write-read register */
+ LAPIC_WRITE(ERROR_STATUS, 0);
+ return LAPIC_READ(ERROR_STATUS);
+}
+
+static void
+lapic_esr_clear(void)
+{
+ LAPIC_WRITE(ERROR_STATUS, 0);
+ LAPIC_WRITE(ERROR_STATUS, 0);
+}
+
+static const char *DM_str[8] = {
+ "Fixed",
+ "Lowest Priority",
+ "Invalid",
+ "Invalid",
+ "NMI",
+ "Reset",
+ "Invalid",
+ "ExtINT"};
+
+void
+lapic_dump(void)
+{
+ int i;
+
+#define BOOL(a) ((a)?' ':'!')
+#define VEC(lvt) \
+ LAPIC_READ(lvt)&LAPIC_LVT_VECTOR_MASK
+#define DS(lvt) \
+ (LAPIC_READ(lvt)&LAPIC_LVT_DS_PENDING)?" SendPending" : "Idle"
+#define DM(lvt) \
+ DM_str[(LAPIC_READ(lvt)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK]
+#define MASK(lvt) \
+ BOOL(LAPIC_READ(lvt)&LAPIC_LVT_MASKED)
+#define TM(lvt) \
+ (LAPIC_READ(lvt)&LAPIC_LVT_TM_LEVEL)? "Level" : "Edge"
+#define IP(lvt) \
+ (LAPIC_READ(lvt)&LAPIC_LVT_IP_PLRITY_LOW)? "Low " : "High"
+
+ kprintf("LAPIC %d at 0x%x version 0x%x\n",
+ (LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK,
+ lapic_start,
+ LAPIC_READ(VERSION)&LAPIC_VERSION_MASK);
+ kprintf("Priorities: Task 0x%x Arbitration 0x%x Processor 0x%x\n",
+ LAPIC_READ(TPR)&LAPIC_TPR_MASK,
+ LAPIC_READ(APR)&LAPIC_APR_MASK,
+ LAPIC_READ(PPR)&LAPIC_PPR_MASK);
+ kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
+ LAPIC_READ(DFR)>>LAPIC_DFR_SHIFT,
+ LAPIC_READ(LDR)>>LAPIC_LDR_SHIFT);
+ kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
+ BOOL(LAPIC_READ(SVR)&LAPIC_SVR_ENABLE),
+ BOOL(!(LAPIC_READ(SVR)&LAPIC_SVR_FOCUS_OFF)),
+ LAPIC_READ(SVR) & LAPIC_SVR_MASK);
+ kprintf("LVT_TIMER: Vector 0x%02x %s %cmasked %s\n",
+ VEC(LVT_TIMER),
+ DS(LVT_TIMER),
+ MASK(LVT_TIMER),
+ (LAPIC_READ(LVT_TIMER)&LAPIC_LVT_PERIODIC)?"Periodic":"OneShot");
+ kprintf(" Initial Count: 0x%08x \n", LAPIC_READ(TIMER_INITIAL_COUNT));
+ kprintf(" Current Count: 0x%08x \n", LAPIC_READ(TIMER_CURRENT_COUNT));
+ kprintf(" Divide Config: 0x%08x \n", LAPIC_READ(TIMER_DIVIDE_CONFIG));
+ kprintf("LVT_PERFCNT: Vector 0x%02x [%s] %s %cmasked\n",
+ VEC(LVT_PERFCNT),
+ DM(LVT_PERFCNT),
+ DS(LVT_PERFCNT),
+ MASK(LVT_PERFCNT));
+ kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n",
+ VEC(LVT_THERMAL),
+ DM(LVT_THERMAL),
+ DS(LVT_THERMAL),
+ MASK(LVT_THERMAL));
+ kprintf("LVT_LINT0: Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
+ VEC(LVT_LINT0),
+ DM(LVT_LINT0),
+ TM(LVT_LINT0),
+ IP(LVT_LINT0),
+ DS(LVT_LINT0),
+ MASK(LVT_LINT0));
+ kprintf("LVT_LINT1: Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
+ VEC(LVT_LINT1),
+ DM(LVT_LINT1),
+ TM(LVT_LINT1),
+ IP(LVT_LINT1),
+ DS(LVT_LINT1),
+ MASK(LVT_LINT1));
+ kprintf("LVT_ERROR: Vector 0x%02x %s %cmasked\n",
+ VEC(LVT_ERROR),
+ DS(LVT_ERROR),
+ MASK(LVT_ERROR));
+ kprintf("ESR: %08x \n", lapic_esr_read());
+ kprintf(" ");
+ for(i=0xf; i>=0; i--)
+ kprintf("%x%x%x%x",i,i,i,i);
+ kprintf("\n");
+ kprintf("TMR: 0x");
+ for(i=7; i>=0; i--)
+ kprintf("%08x",LAPIC_READ_OFFSET(TMR_BASE, i*0x10));
+ kprintf("\n");
+ kprintf("IRR: 0x");
+ for(i=7; i>=0; i--)
+ kprintf("%08x",LAPIC_READ_OFFSET(IRR_BASE, i*0x10));
+ kprintf("\n");
+ kprintf("ISR: 0x");
+ for(i=7; i >= 0; i--)
+ kprintf("%08x",LAPIC_READ_OFFSET(ISR_BASE, i*0x10));
+ kprintf("\n");
+}
+
+#if MACH_KDB
+/*
+ * Displays apic junk
+ *
+ * da
+ */
+void
+db_apic(__unused db_expr_t addr,
+ __unused int have_addr,
+ __unused db_expr_t count,
+ __unused char *modif)
+{
+
+ lapic_dump();
+
+ return;
+}
+
+#endif
+
+boolean_t
+lapic_probe(void)
+{
+ uint32_t lo;
+ uint32_t hi;
+
+ if (cpuid_features() & CPUID_FEATURE_APIC)
+ return TRUE;
+
+ if (cpuid_family() == 6 || cpuid_family() == 15) {
+ /*
+ * Mobile Pentiums:
+ * There may be a local APIC which wasn't enabled by BIOS.
+ * So we try to enable it explicitly.
+ */
+ rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+ lo &= ~MSR_IA32_APIC_BASE_BASE;
+ lo |= MSR_IA32_APIC_BASE_ENABLE | LAPIC_START;
+ lo |= MSR_IA32_APIC_BASE_ENABLE;
+ wrmsr(MSR_IA32_APIC_BASE, lo, hi);
+
+ /*
+ * Re-initialize cpu features info and re-check.
+ */
+ cpuid_set_info();
+ if (cpuid_features() & CPUID_FEATURE_APIC) {
+ printf("Local APIC discovered and enabled\n");
+ lapic_os_enabled = TRUE;
+ lapic_interrupt_base = LAPIC_REDUCED_INTERRUPT_BASE;
+ return TRUE;
+ }
+ }
+
+ return FALSE;
+}
+
+void
+lapic_shutdown(void)
+{
+ uint32_t lo;
+ uint32_t hi;
+ uint32_t value;
+
+ /* Shutdown if local APIC was enabled by OS */
+ if (lapic_os_enabled == FALSE)
+ return;
+
+ mp_disable_preemption();
+
+ /* ExtINT: masked */
+ if (get_cpu_number() == master_cpu) {
+ value = LAPIC_READ(LVT_LINT0);
+ value |= LAPIC_LVT_MASKED;
+ LAPIC_WRITE(LVT_LINT0, value);
+ }
+
+ /* Timer: masked */
+ LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) | LAPIC_LVT_MASKED);
+
+ /* Perfmon: masked */
+ LAPIC_WRITE(LVT_PERFCNT, LAPIC_READ(LVT_PERFCNT) | LAPIC_LVT_MASKED);
+
+ /* Error: masked */
+ LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
+
+ /* APIC software disabled */
+ LAPIC_WRITE(SVR, LAPIC_READ(SVR) & ~LAPIC_SVR_ENABLE);
+
+ /* Bypass the APIC completely and update cpu features */
+ rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+ lo &= ~MSR_IA32_APIC_BASE_ENABLE;
+ wrmsr(MSR_IA32_APIC_BASE, lo, hi);
+ cpuid_set_info();
+
+ mp_enable_preemption();
+}
+
+void
+lapic_configure(void)
+{
+ int value;
+
+ /* Set flat delivery model, logical processor id */
+ LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
+ LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
+
+ /* Accept all */
+ LAPIC_WRITE(TPR, 0);
+
+ LAPIC_WRITE(SVR, LAPIC_VECTOR(SPURIOUS) | LAPIC_SVR_ENABLE);
+
+ /* ExtINT */
+ if (get_cpu_number() == master_cpu) {
+ value = LAPIC_READ(LVT_LINT0);
+ value &= ~LAPIC_LVT_MASKED;
+ value |= LAPIC_LVT_DM_EXTINT;
+ LAPIC_WRITE(LVT_LINT0, value);
+ }
+
+ /* Timer: unmasked, one-shot */
+ LAPIC_WRITE(LVT_TIMER, LAPIC_VECTOR(TIMER));
+
+ /* Perfmon: unmasked */
+ LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
+
+ /* Thermal: unmasked */
+ LAPIC_WRITE(LVT_THERMAL, LAPIC_VECTOR(THERMAL));
+
+ lapic_esr_clear();
+
+ LAPIC_WRITE(LVT_ERROR, LAPIC_VECTOR(ERROR));
+}
+
+void
+lapic_set_timer(
+ boolean_t interrupt,
+ lapic_timer_mode_t mode,
+ lapic_timer_divide_t divisor,
+ lapic_timer_count_t initial_count)
+{
+ boolean_t state;
+ uint32_t timer_vector;
+
+ state = ml_set_interrupts_enabled(FALSE);
+ timer_vector = LAPIC_READ(LVT_TIMER);
+ timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);;
+ timer_vector |= interrupt ? 0 : LAPIC_LVT_MASKED;
+ timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
+ LAPIC_WRITE(LVT_TIMER, timer_vector);
+ LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor);
+ LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
+ ml_set_interrupts_enabled(state);
+}
+
+void
+lapic_get_timer(
+ lapic_timer_mode_t *mode,
+ lapic_timer_divide_t *divisor,
+ lapic_timer_count_t *initial_count,
+ lapic_timer_count_t *current_count)
+{
+ boolean_t state;
+
+ state = ml_set_interrupts_enabled(FALSE);
+ if (mode)
+ *mode = (LAPIC_READ(LVT_TIMER) & LAPIC_LVT_PERIODIC) ?
+ periodic : one_shot;
+ if (divisor)
+ *divisor = LAPIC_READ(TIMER_DIVIDE_CONFIG) & LAPIC_TIMER_DIVIDE_MASK;
+ if (initial_count)
+ *initial_count = LAPIC_READ(TIMER_INITIAL_COUNT);
+ if (current_count)
+ *current_count = LAPIC_READ(TIMER_CURRENT_COUNT);
+ ml_set_interrupts_enabled(state);
+}
+
+static inline void
+_lapic_end_of_interrupt(void)
+{
+ LAPIC_WRITE(EOI, 0);
+}
+
+void
+lapic_end_of_interrupt(void)
+{
+ _lapic_end_of_interrupt();
+}
+
+void
+lapic_set_intr_func(int vector, i386_intr_func_t func)
+{
+ if (vector > lapic_interrupt_base)
+ vector -= lapic_interrupt_base;
+
+ switch (vector) {
+ case LAPIC_NMI_INTERRUPT:
+ case LAPIC_INTERPROCESSOR_INTERRUPT:
+ case LAPIC_TIMER_INTERRUPT:
+ case LAPIC_THERMAL_INTERRUPT:
+ case LAPIC_PERFCNT_INTERRUPT:
+ lapic_intr_func[vector] = func;
+ break;
+ default:
+ panic("lapic_set_intr_func(%d,%p) invalid vector\n",
+ vector, func);
+ }
+}
+
+int
+lapic_interrupt(int interrupt, x86_saved_state_t *state)
+{
+ int retval = 0;
+
+ interrupt -= lapic_interrupt_base;
+ if (interrupt < 0) {
+ if (interrupt == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base) &&
+ lapic_intr_func[LAPIC_NMI_INTERRUPT] != NULL) {
+ retval = (*lapic_intr_func[LAPIC_NMI_INTERRUPT])(state);
+ _lapic_end_of_interrupt();
+ return retval;
+ }
+ else
+ return 0;
+ }
+
+ switch(interrupt) {
+ case LAPIC_TIMER_INTERRUPT:
+ case LAPIC_THERMAL_INTERRUPT:
+ case LAPIC_INTERPROCESSOR_INTERRUPT:
+ if (lapic_intr_func[interrupt] != NULL)
+ (void) (*lapic_intr_func[interrupt])(state);
+ if (interrupt == LAPIC_PERFCNT_INTERRUPT)
+ LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
+ _lapic_end_of_interrupt();
+ retval = 1;
+ break;
+ case LAPIC_ERROR_INTERRUPT:
+ lapic_dump();
+ panic("Local APIC error\n");
+ _lapic_end_of_interrupt();
+ retval = 1;
+ break;
+ case LAPIC_SPURIOUS_INTERRUPT:
+ kprintf("SPIV\n");
+ /* No EOI required here */
+ retval = 1;
+ break;
+ }
+
+ return retval;
+}
+
+void
+lapic_smm_restore(void)
+{
+ boolean_t state;
+
+ if (lapic_os_enabled == FALSE)
+ return;
+
+ state = ml_set_interrupts_enabled(FALSE);
+
+ if (LAPIC_ISR_IS_SET(LAPIC_REDUCED_INTERRUPT_BASE, TIMER)) {
+ /*
+ * Bogus SMI handler enables interrupts but does not know about
+ * local APIC interrupt sources. When APIC timer counts down to
+ * zero while in SMM, local APIC will end up waiting for an EOI
+ * but no interrupt was delivered to the OS.
+ */
+ _lapic_end_of_interrupt();
+
+ /*
+ * timer is one-shot, trigger another quick countdown to trigger
+ * another timer interrupt.
+ */
+ if (LAPIC_READ(TIMER_CURRENT_COUNT) == 0) {
+ LAPIC_WRITE(TIMER_INITIAL_COUNT, 1);
+ }
+
+ kprintf("lapic_smm_restore\n");
+ }
+
+ ml_set_interrupts_enabled(state);
+}
+
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ *
+ */
+#ifndef _I386_LAPIC_H_
+#define _I386_LAPIC_H_
+
+#define LAPIC_START 0xFEE00000
+#define LAPIC_SIZE 0x00000400
+
+#define LAPIC_ID 0x00000020
+#define LAPIC_ID_SHIFT 24
+#define LAPIC_ID_MASK 0xFF
+#define LAPIC_VERSION 0x00000030
+#define LAPIC_VERSION_MASK 0xFF
+#define LAPIC_TPR 0x00000080
+#define LAPIC_TPR_MASK 0xFF
+#define LAPIC_APR 0x00000090
+#define LAPIC_APR_MASK 0xFF
+#define LAPIC_PPR 0x000000A0
+#define LAPIC_PPR_MASK 0xFF
+#define LAPIC_EOI 0x000000B0
+#define LAPIC_REMOTE_READ 0x000000C0
+#define LAPIC_LDR 0x000000D0
+#define LAPIC_LDR_SHIFT 24
+#define LAPIC_DFR 0x000000E0
+#define LAPIC_DFR_FLAT 0xFFFFFFFF
+#define LAPIC_DFR_CLUSTER 0x0FFFFFFF
+#define LAPIC_DFR_SHIFT 28
+#define LAPIC_SVR 0x000000F0
+#define LAPIC_SVR_MASK 0x0FF
+#define LAPIC_SVR_ENABLE 0x100
+#define LAPIC_SVR_FOCUS_OFF 0x200
+#define LAPIC_ISR_BASE 0x00000100
+#define LAPIC_TMR_BASE 0x00000180
+#define LAPIC_IRR_BASE 0x00000200
+#define LAPIC_ERROR_STATUS 0x00000280
+#define LAPIC_ICR 0x00000300
+#define LAPIC_ICR_VECTOR_MASK 0x000FF
+#define LAPIC_ICR_DM_MASK 0x00700
+#define LAPIC_ICR_DM_FIXED 0x00000
+#define LAPIC_ICR_DM_LOWEST 0x00100
+#define LAPIC_ICR_DM_SMI 0x00200
+#define LAPIC_ICR_DM_REMOTE 0x00300
+#define LAPIC_ICR_DM_NMI 0x00400
+#define LAPIC_ICR_DM_INIT 0x00500
+#define LAPIC_ICR_DM_STARTUP 0x00600
+#define LAPIC_ICR_DM_LOGICAL 0x00800
+#define LAPIC_ICR_DS_PENDING 0x01000
+#define LAPIC_ICR_LEVEL_ASSERT 0x04000
+#define LAPIC_ICR_TRIGGER_LEVEL 0x08000
+#define LAPIC_ICR_RR_MASK 0x30000
+#define LAPIC_ICR_RR_INVALID 0x00000
+#define LAPIC_ICR_RR_INPROGRESS 0x10000
+#define LAPIC_ICR_RR_VALID 0x20000
+#define LAPIC_ICR_DSS_MASK 0xC0000
+#define LAPIC_ICR_DSS_DEST 0x00000
+#define LAPIC_ICR_DSS_SELF 0x40000
+#define LAPIC_ICR_DSS_ALL 0x80000
+#define LAPIC_ICR_DSS_OTHERS 0xC0000
+#define LAPIC_ICRD 0x00000310
+#define LAPIC_ICRD_DEST_SHIFT 24
+#define LAPIC_LVT_TIMER 0x00000320
+#define LAPIC_LVT_THERMAL 0x00000330
+#define LAPIC_LVT_PERFCNT 0x00000340
+#define LAPIC_LVT_LINT0 0x00000350
+#define LAPIC_LVT_LINT1 0x00000360
+#define LAPIC_LVT_ERROR 0x00000370
+#define LAPIC_LVT_VECTOR_MASK 0x000FF
+#define LAPIC_LVT_DM_SHIFT 8
+#define LAPIC_LVT_DM_MASK 0x00007
+#define LAPIC_LVT_DM_FIXED 0x00000
+#define LAPIC_LVT_DM_NMI 0x00400
+#define LAPIC_LVT_DM_EXTINT 0x00700
+#define LAPIC_LVT_DS_PENDING 0x01000
+#define LAPIC_LVT_IP_PLRITY_LOW 0x02000
+#define LAPIC_LVT_REMOTE_IRR 0x04000
+#define LAPIC_LVT_TM_LEVEL 0x08000
+#define LAPIC_LVT_MASKED 0x10000
+#define LAPIC_LVT_PERIODIC 0x20000
+#define LAPIC_TIMER_INITIAL_COUNT 0x00000380
+#define LAPIC_TIMER_CURRENT_COUNT 0x00000390
+#define LAPIC_TIMER_DIVIDE_CONFIG 0x000003E0
+/* divisor encoded by bits 0,1,3 with bit 2 always 0: */
+#define LAPIC_TIMER_DIVIDE_MASK 0x0000000F
+#define LAPIC_TIMER_DIVIDE_2 0x00000000
+#define LAPIC_TIMER_DIVIDE_4 0x00000001
+#define LAPIC_TIMER_DIVIDE_8 0x00000002
+#define LAPIC_TIMER_DIVIDE_16 0x00000003
+#define LAPIC_TIMER_DIVIDE_32 0x00000008
+#define LAPIC_TIMER_DIVIDE_64 0x00000009
+#define LAPIC_TIMER_DIVIDE_128 0x0000000A
+#define LAPIC_TIMER_DIVIDE_1 0x0000000B
+
+#define LAPIC_ID_MAX (LAPIC_ID_MASK)
+
+#define CPU_NUMBER(r) \
+ movl %gs:CPU_NUMBER_GS,r
+
+#define CPU_NUMBER_FROM_LAPIC(r) \
+ movl EXT(lapic_id),r; \
+ movl 0(r),r; \
+ shrl $(LAPIC_ID_SHIFT),r; \
+ andl $(LAPIC_ID_MASK),r; \
+ movl EXT(lapic_to_cpu)(,r,4),r
+
+#ifndef ASSEMBLER
+#include <stdint.h>
+#include <sys/cdefs.h>
+#include <mach/boolean.h>
+#include <mach/kern_return.h>
+typedef enum {
+ periodic,
+ one_shot
+} lapic_timer_mode_t;
+typedef enum {
+ divide_by_1 = LAPIC_TIMER_DIVIDE_1,
+ divide_by_2 = LAPIC_TIMER_DIVIDE_2,
+ divide_by_4 = LAPIC_TIMER_DIVIDE_4,
+ divide_by_8 = LAPIC_TIMER_DIVIDE_8,
+ divide_by_16 = LAPIC_TIMER_DIVIDE_16,
+ divide_by_32 = LAPIC_TIMER_DIVIDE_32,
+ divide_by_64 = LAPIC_TIMER_DIVIDE_64,
+ divide_by_128 = LAPIC_TIMER_DIVIDE_128
+} lapic_timer_divide_t;
+typedef uint32_t lapic_timer_count_t;
+
+/*
+ * By default, use high vectors to leave vector space for systems
+ * with multiple I/O APIC's. However some systems that boot with
+ * local APIC disabled will hang in SMM when vectors greater than
+ * 0x5F are used. Those systems are not expected to have I/O APIC
+ * so 16 (0x50 - 0x40) vectors for legacy PIC support is perfect.
+ */
+#define LAPIC_DEFAULT_INTERRUPT_BASE 0xD0
+#define LAPIC_REDUCED_INTERRUPT_BASE 0x50
+/*
+ * Specific lapic interrupts are relative to this base
+ * in priority order from high to low:
+ */
+
+#define LAPIC_PERFCNT_INTERRUPT 0xF
+#define LAPIC_TIMER_INTERRUPT 0xE
+#define LAPIC_INTERPROCESSOR_INTERRUPT 0xD
+#define LAPIC_THERMAL_INTERRUPT 0xC
+#define LAPIC_ERROR_INTERRUPT 0xB
+#define LAPIC_SPURIOUS_INTERRUPT 0xA
+#define LAPIC_CMCI_INTERRUPT 0x9
+/* The vector field is ignored for NMI interrupts via the LAPIC
+ * or otherwise, so this is not an offset from the interrupt
+ * base.
+ */
+#define LAPIC_NMI_INTERRUPT 0x2
+#define LAPIC_FUNC_TABLE_SIZE LAPIC_PERFCNT_INTERRUPT
+
+#define LAPIC_WRITE(reg,val) \
+ *((volatile uint32_t *)(lapic_start + LAPIC_##reg)) = (val)
+#define LAPIC_READ(reg) \
+ (*((volatile uint32_t *)(lapic_start + LAPIC_##reg)))
+#define LAPIC_READ_OFFSET(reg,off) \
+ (*((volatile uint32_t *)(lapic_start + LAPIC_##reg + (off))))
+
+#define LAPIC_VECTOR(src) \
+ (lapic_interrupt_base + LAPIC_##src##_INTERRUPT)
+
+#define LAPIC_ISR_IS_SET(base,src) \
+ (LAPIC_READ_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) \
+ & (1 <<((base + LAPIC_##src##_INTERRUPT)%32)))
+
+extern vm_offset_t lapic_start;
+
+extern void lapic_init(void);
+extern void lapic_configure(void);
+extern void lapic_shutdown(void);
+extern void lapic_smm_restore(void);
+extern boolean_t lapic_probe(void);
+extern void lapic_dump(void);
+extern int lapic_interrupt(
+ int interrupt, x86_saved_state_t *state);
+extern void lapic_end_of_interrupt(void);
+extern int lapic_to_cpu[];
+extern int cpu_to_lapic[];
+extern int lapic_interrupt_base;
+extern void lapic_cpu_map(int lapic, int cpu_num);
+extern uint32_t ml_get_apicid(uint32_t cpu);
+
+extern void lapic_set_timer(
+ boolean_t interrupt,
+ lapic_timer_mode_t mode,
+ lapic_timer_divide_t divisor,
+ lapic_timer_count_t initial_count);
+
+extern void lapic_get_timer(
+ lapic_timer_mode_t *mode,
+ lapic_timer_divide_t *divisor,
+ lapic_timer_count_t *initial_count,
+ lapic_timer_count_t *current_count);
+
+typedef int (*i386_intr_func_t)(x86_saved_state_t *state);
+extern void lapic_set_intr_func(int intr, i386_intr_func_t func);
+
+static inline void lapic_set_timer_func(i386_intr_func_t func)
+{
+ lapic_set_intr_func(LAPIC_VECTOR(TIMER), func);
+}
+static inline void lapic_set_pmi_func(i386_intr_func_t func)
+{
+ lapic_set_intr_func(LAPIC_VECTOR(PERFCNT), func);
+}
+static inline void lapic_set_thermal_func(i386_intr_func_t func)
+{
+ lapic_set_intr_func(LAPIC_VECTOR(THERMAL), func);
+}
+
+#ifdef MP_DEBUG
+#define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump()
+#define LAPIC_DUMP() lapic_dump()
+#else
+#define LAPIC_CPU_MAP_DUMP()
+#define LAPIC_DUMP()
+#endif /* MP_DEBUG */
+
+#endif /* ASSEMBLER */
+
+#endif /* _I386_LAPIC_H_ */
+
#include <i386/asm.h>
#include <i386/cpuid.h>
#include <i386/eflags.h>
+#include <i386/lapic.h>
+#include <i386/rtclock.h>
#include <i386/proc_reg.h>
#include <i386/trap.h>
#include <assym.s>
* Nanotime returned in %edx:%eax.
* Computed from tsc based on the scale factor
* and an implicit 32 bit shift.
- * This code must match what _rtc_nanotime_read does in
- * i386/machine_routines_asm.s. Failure to do so can
- * result in "weird" timing results.
*
* Uses %eax, %ebx, %ecx, %edx, %esi, %edi.
*/
-#define RNT_INFO _rtc_nanotime_info
#define NANOTIME \
- lea RNT_INFO,%edi ; \
-0: ; \
- movl RNT_GENERATION(%edi),%esi /* being updated? */ ; \
- testl %esi,%esi ; \
- jz 0b /* wait until done */ ; \
- rdtsc ; \
- subl RNT_TSC_BASE(%edi),%eax ; \
- sbbl RNT_TSC_BASE+4(%edi),%edx /* tsc - tsc_base */ ; \
- movl RNT_SCALE(%edi),%ecx /* * scale factor */ ; \
- movl %edx,%ebx ; \
- mull %ecx ; \
- movl %ebx,%eax ; \
- movl %edx,%ebx ; \
- mull %ecx ; \
- addl %ebx,%eax ; \
- adcl $0,%edx ; \
- addl RNT_NS_BASE(%edi),%eax /* + ns_base */ ; \
- adcl RNT_NS_BASE+4(%edi),%edx ; \
- cmpl RNT_GENERATION(%edi),%esi /* check for update */ ; \
- jne 0b /* do it all again */
+ mov %gs:CPU_NANOTIME,%edi ; \
+ RTC_NANOTIME_READ_FAST()
/*
*/
#include <kern/kalloc.h>
+#include <mach/mach_time.h>
#include <i386/cpu_data.h>
#include <i386/cpuid.h>
+#include <i386/cpu_topology.h>
+#include <i386/cpu_threads.h>
+#include <i386/machine_cpu.h>
#include <i386/machine_check.h>
#include <i386/proc_reg.h>
static boolean_t mca_extended_MSRs_present = FALSE;
static unsigned int mca_extended_MSRs_count = 0;
static ia32_mcg_cap_t ia32_mcg_cap;
-static boolean_t mca_exception_taken = FALSE;
-
decl_simple_lock_data(static, mca_lock);
typedef struct {
mca_mci_bank_t mca_error_bank[0];
} mca_state_t;
+typedef enum {
+ CLEAR,
+ DUMPING,
+ DUMPED
+} mca_dump_state_t;
+static volatile mca_dump_state_t mca_dump_state = CLEAR;
+
static void
mca_get_availability(void)
{
}
static void
-mca_save_state(void)
+mca_save_state(mca_state_t *mca_state)
{
- mca_state_t *mca_state;
mca_mci_bank_t *bank;
unsigned int i;
assert(!ml_get_interrupts_enabled() || get_preemption_level() > 0);
- mca_state = (mca_state_t *) current_cpu_datap()->cpu_mca_state;
if (mca_state == NULL)
return;
void
mca_check_save(void)
{
- if (mca_exception_taken)
- mca_save_state();
+ if (mca_dump_state > CLEAR)
+ mca_save_state(current_cpu_datap()->cpu_mca_state);
}
static void mca_dump_64bit_state(void)
// microcode revision is top 32 bits of MSR_IA32_UCODE_REV
microcode = rdmsr64(MSR_IA32_UCODE_REV) >> 32;
- kdb_printf("family: %d model: %d stepping: %d microcode revision %d\n",
+ kdb_printf(" family: %d model: %d stepping: %d microcode: %d\n",
infop->cpuid_family,
infop->cpuid_model,
infop->cpuid_stepping,
(uint32_t) microcode);
- kdb_printf("%s\n", infop->cpuid_brand_string);
+ kdb_printf(" %s\n", infop->cpuid_brand_string);
}
-
static const char *mca_threshold_status[] = {
[THRESHOLD_STATUS_NO_TRACKING] "No tracking",
[THRESHOLD_STATUS_GREEN] "Green",
};
static void
-mca_dump_error_banks(void)
+mca_dump_bank(mca_state_t *state, int i)
{
- unsigned int i;
+ mca_mci_bank_t *bank;
ia32_mci_status_t status;
- kdb_printf("MCA error-reporting registers:\n");
- for (i = 0; i < mca_error_bank_count; i++ ) {
- status.u64 = rdmsr64(IA32_MCi_STATUS(i));
+ bank = &state->mca_error_bank[i];
+ status = bank->mca_mci_status;
+ kdb_printf(
+ " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n",
+ i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in"));
+ if (!status.bits.val)
+ return;
+
+ kdb_printf(
+ " MCA error code: 0x%04x\n",
+ status.bits.mca_error);
+ kdb_printf(
+ " Model specific error code: 0x%04x\n",
+ status.bits.model_specific_error);
+ if (!mca_threshold_status_present) {
kdb_printf(
- " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n",
- i, IA32_MCi_STATUS(i), status.u64,
- IF(!status.bits.val, "in"));
- if (!status.bits.val)
- continue;
+ " Other information: 0x%08x\n",
+ status.bits.other_information);
+ } else {
+ int threshold = status.bits_tes_p.threshold;
kdb_printf(
- " MCA error code : 0x%04x\n",
- status.bits.mca_error);
+ " Other information: 0x%08x\n"
+ " Threshold-based status: %s\n",
+ status.bits_tes_p.other_information,
+ (status.bits_tes_p.uc == 0) ?
+ mca_threshold_status[threshold] :
+ "Undefined");
+ }
+ kdb_printf(
+ " Status bits:\n%s%s%s%s%s%s",
+ IF(status.bits.pcc, " Processor context corrupt\n"),
+ IF(status.bits.addrv, " ADDR register valid\n"),
+ IF(status.bits.miscv, " MISC register valid\n"),
+ IF(status.bits.en, " Error enabled\n"),
+ IF(status.bits.uc, " Uncorrected error\n"),
+ IF(status.bits.over, " Error overflow\n"));
+ if (status.bits.addrv)
kdb_printf(
- " Model specific error code: 0x%04x\n",
- status.bits.model_specific_error);
- if (!mca_threshold_status_present) {
- kdb_printf(
- " Other information : 0x%08x\n",
- status.bits.other_information);
- } else {
- int threshold = status.bits_tes_p.threshold;
- kdb_printf(
- " Other information : 0x%08x\n"
- " Threshold-based status : %s\n",
- status.bits_tes_p.other_information,
- (status.bits_tes_p.uc == 0) ?
- mca_threshold_status[threshold] :
- "Undefined");
- }
+ " IA32_MC%d_ADDR(0x%x): 0x%016qx\n",
+ i, IA32_MCi_ADDR(i), bank->mca_mci_addr);
+ if (status.bits.miscv)
kdb_printf(
- " Status bits:\n%s%s%s%s%s%s",
- IF(status.bits.pcc, " Processor context corrupt\n"),
- IF(status.bits.addrv, " ADDR register valid\n"),
- IF(status.bits.miscv, " MISC register valid\n"),
- IF(status.bits.en, " Error enabled\n"),
- IF(status.bits.uc, " Uncorrected error\n"),
- IF(status.bits.over, " Error overflow\n"));
- if (status.bits.addrv)
- kdb_printf(
- " IA32_MC%d_ADDR(0x%x): 0x%016qx\n",
- i, IA32_MCi_ADDR(i), rdmsr64(IA32_MCi_ADDR(i)));
- if (status.bits.miscv)
- kdb_printf(
- " IA32_MC%d_MISC(0x%x): 0x%016qx\n",
- i, IA32_MCi_MISC(i), rdmsr64(IA32_MCi_MISC(i)));
+ " IA32_MC%d_MISC(0x%x): 0x%016qx\n",
+ i, IA32_MCi_MISC(i), bank->mca_mci_misc);
+}
+
+static void
+mca_dump_error_banks(mca_state_t *state)
+{
+ unsigned int i;
+
+ kdb_printf("MCA error-reporting registers:\n");
+ for (i = 0; i < mca_error_bank_count; i++ ) {
+ mca_dump_bank(state, i);
}
}
mca_dump(void)
{
ia32_mcg_status_t status;
+ mca_state_t *mca_state = current_cpu_datap()->cpu_mca_state;
- mca_save_state();
+ /*
+ * Capture local MCA registers to per-cpu data.
+ */
+ mca_save_state(mca_state);
/*
* Serialize in case of multiple simultaneous machine-checks.
- * Only the first caller is allowed to print MCA registers.
+ * Only the first caller is allowed to dump MCA registers,
+ * other threads spin meantime.
*/
simple_lock(&mca_lock);
- if (mca_exception_taken) {
+ if (mca_dump_state > CLEAR) {
simple_unlock(&mca_lock);
+ while (mca_dump_state == DUMPING)
+ cpu_pause();
return;
}
- mca_exception_taken = TRUE;
+ mca_dump_state = DUMPING;
+ simple_unlock(&mca_lock);
/*
* Report machine-check capabilities:
mca_report_cpu_info();
kdb_printf(
- " %d error-reporting banks\n%s%s", mca_error_bank_count,
+ " %d error-reporting banks\n%s%s%s", mca_error_bank_count,
IF(mca_control_MSR_present,
" control MSR present\n"),
IF(mca_threshold_status_present,
- " threshold-based error status present\n"));
+ " threshold-based error status present\n"),
+ "");
if (mca_extended_MSRs_present)
kdb_printf(
" %d extended MSRs present\n", mca_extended_MSRs_count);
*/
status.u64 = rdmsr64(IA32_MCG_STATUS);
kdb_printf(
- "Machine-check status 0x%016qx\n%s%s%s", status.u64,
+ "Machine-check status 0x%016qx:\n%s%s%s", status.u64,
IF(status.bits.ripv, " restart IP valid\n"),
IF(status.bits.eipv, " error IP valid\n"),
IF(status.bits.mcip, " machine-check in progress\n"));
/*
* Dump error-reporting registers:
*/
- mca_dump_error_banks();
+ mca_dump_error_banks(mca_state);
/*
* Dump any extended machine state:
mca_dump_32bit_state();
}
- simple_unlock(&mca_lock);
+ /* Update state to release any other threads. */
+ mca_dump_state = DUMPED;
}
uint64_t count :BITS(7,0);
uint64_t mcg_ctl_p :BIT1(8);
uint64_t mcg_ext_p :BIT1(9);
- uint64_t reserved1 :BIT1(10);
+ uint64_t mcg_reserved1 :BIT1(10);
uint64_t mcg_tes_p :BIT1(11);
- uint64_t reserved2 :BITS(15,12);
+ uint64_t mcg_reserved2 :BITS(15,12);
uint64_t mcg_ext_cnt :BITS(23,16);
- uint64_t reserved3 :BITS(63,24);
} bits;
uint64_t u64;
} ia32_mcg_cap_t;
uint64_t ripv :BIT1(0);
uint64_t eipv :BIT1(1);
uint64_t mcip :BIT1(2);
- uint64_t reserved :BITS(61,3);
} bits;
uint64_t u64;
} ia32_mcg_status_t;
#define IA32_MCi_CTL_ENABLE_ALL (0xFFFFFFFFFFFFFFFFULL)
typedef union {
- struct {
+ struct {
uint64_t mca_error :BITS(15,0);
uint64_t model_specific_error :BITS(31,16);
uint64_t other_information :BITS(56,32);
uint64_t uc :BIT1(61);
uint64_t over :BIT1(62);
uint64_t val :BIT1(63);
- } bits;
- struct { /* Variant if threshold-based error status present: */
+ } bits;
+ struct { /* Variant if threshold-based error status present: */
uint64_t mca_error :BITS(15,0);
uint64_t model_specific_error :BITS(31,16);
uint64_t other_information :BITS(52,32);
uint64_t threshold :BITS(54,53);
- uint64_t reserved :BITS(56,55);
uint64_t pcc :BIT1(57);
uint64_t addrv :BIT1(58);
uint64_t miscv :BIT1(59);
uint64_t uc :BIT1(61);
uint64_t over :BIT1(62);
uint64_t val :BIT1(63);
- } bits_tes_p;
- uint64_t u64;
+ } bits_tes_p;
+ uint64_t u64;
} ia32_mci_status_t;
/* Values for threshold_status if mcg_tes_p == 1 and uc == 0 */
typedef uint64_t ia32_mci_addr_t;
typedef uint64_t ia32_mci_misc_t;
-
#define IA32_MCG_EAX (0x180)
#define IA32_MCG_EBX (0x181)
#define IA32_MCG_ECX (0x182)
#define IA32_MCG_R14 (0x196)
#define IA32_MCG_R15 (0x197)
-extern void mca_cpu_alloc(cpu_data_t *cdp);
-extern void mca_cpu_init(void);
-extern void mca_dump(void);
-extern void mca_check_save(void);
+extern void mca_cpu_alloc(cpu_data_t *cdp);
+extern void mca_cpu_init(void);
+extern void mca_dump(void);
+extern void mca_check_save(void);
#endif /* _I386_MACHINE_CHECK_H_ */
#endif /* KERNEL_PRIVATE */
void cpu_machine_init(
void);
-void cpu_signal_handler(
- x86_saved_state_t *regs);
-
void handle_pending_TLB_flushes(
void);
#include <kern/thread.h>
#include <i386/cpu_data.h>
#include <i386/machine_cpu.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
#include <i386/mp_events.h>
#include <i386/pmap.h>
#include <i386/misc_protos.h>
}
-void
-machine_idle(void)
-{
- x86_core_t *my_core = x86_core();
- cpu_data_t *my_cpu = current_cpu_datap();
- int others_active;
-
- /*
- * We halt this cpu thread
- * unless kernel param idlehalt is false and no other thread
- * in the same core is active - if so, don't halt so that this
- * core doesn't go into a low-power mode.
- * For 4/4, we set a null "active cr3" while idle.
- */
- if (my_core == NULL || my_cpu == NULL)
- goto out;
-
- others_active = !atomic_decl_and_test(
- (long *) &my_core->active_lcpus, 1);
- my_cpu->lcpu.idle = TRUE;
- if (idlehalt || others_active) {
- DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
- MARK_CPU_IDLE(cpu_number());
- machine_idle_cstate(FALSE);
- MARK_CPU_ACTIVE(cpu_number());
- DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
- }
- my_cpu->lcpu.idle = FALSE;
- atomic_incl((long *) &my_core->active_lcpus, 1);
- out:
- __asm__ volatile("sti");
-}
-
void
machine_signal_idle(
processor_t processor)
goto failed;
if (!boot_cpu) {
- this_cpu_datap->lcpu.core = cpu_thread_alloc(this_cpu_datap->cpu_number);
+ cpu_thread_alloc(this_cpu_datap->cpu_number);
if (this_cpu_datap->lcpu.core == NULL)
goto failed;
LockTimeOut = (uint32_t) abstime;
LockTimeOutTSC = (uint32_t) tmrCvt(abstime, tscFCvtn2t);
- if (PE_parse_boot_arg("mtxspin", &mtxspin)) {
+ if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
if (mtxspin > USEC_PER_SEC>>4)
mtxspin = USEC_PER_SEC>>4;
nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime);
extern unsigned ml_get_maxsnoop(void);
extern void ml_set_maxbusdelay(uint32_t mdelay);
extern uint32_t ml_get_maxbusdelay(void);
+extern void ml_set_maxintdelay(uint64_t mdelay);
+extern uint64_t ml_get_maxintdelay(void);
-extern void ml_hpet_cfg(uint32_t cpu, uint32_t hpetVect);
-
extern uint64_t tmrCvt(uint64_t time, uint64_t conversion);
extern uint64_t ml_cpu_int_event_time(void);
*/
#include <i386/asm.h>
+#include <i386/rtclock.h>
#include <i386/proc_reg.h>
#include <i386/eflags.h>
movl S_ARG0, %ecx
rdtsc
+ lfence
movl %edx, 0(%ecx)
movl %eax, 4(%ecx)
jnz Lslow
/* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
-0:
- movl RNT_GENERATION(%edi),%esi /* get generation (0 if being changed) */
- testl %esi,%esi /* if being changed, loop until stable */
- jz 0b
-
- rdtsc /* get TSC in %edx:%eax */
- subl RNT_TSC_BASE(%edi),%eax
- sbbl RNT_TSC_BASE+4(%edi),%edx
-
- movl RNT_SCALE(%edi),%ecx
-
- movl %edx,%ebx
- mull %ecx
- movl %ebx,%eax
- movl %edx,%ebx
- mull %ecx
- addl %ebx,%eax
- adcl $0,%edx
-
- addl RNT_NS_BASE(%edi),%eax
- adcl RNT_NS_BASE+4(%edi),%edx
-
- cmpl RNT_GENERATION(%edi),%esi /* have the parameters changed? */
- jne 0b /* yes, loop until stable */
+ RTC_NANOTIME_READ_FAST()
popl %ebx
popl %edi
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <kern/assert.h>
#include <kern/machine.h>
#include <kern/pms.h>
+#include <kern/misc_protos.h>
#include <vm/vm_map.h>
#include <vm/vm_kern.h>
#include <i386/mp.h>
#include <i386/mp_events.h>
#include <i386/mp_slave_boot.h>
-#include <i386/apic.h>
+#include <i386/lapic.h>
#include <i386/ipl.h>
#include <i386/fpu.h>
#include <i386/cpuid.h>
#include <i386/trap.h>
#include <i386/machine_routines.h>
#include <i386/pmCPU.h>
-#include <i386/hpet.h>
#include <i386/machine_check.h>
#include <chud/chud_xnu.h>
#define PAUSE
#endif /* MP_DEBUG */
-/* Initialize lapic_id so cpu_number() works on non SMP systems */
-unsigned long lapic_id_initdata = 0;
-unsigned long lapic_id = (unsigned long)&lapic_id_initdata;
-vm_offset_t lapic_start;
-
-static i386_intr_func_t lapic_timer_func;
-static i386_intr_func_t lapic_pmi_func;
-static i386_intr_func_t lapic_thermal_func;
-
-/* TRUE if local APIC was enabled by the OS not by the BIOS */
-static boolean_t lapic_os_enabled = FALSE;
-
-/* Base vector for local APIC interrupt sources */
-int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
+#define FULL_SLAVE_INIT (NULL)
+#define FAST_SLAVE_INIT ((void *)(uintptr_t)1)
void slave_boot_init(void);
static void mp_rendezvous_action(void);
static void mp_broadcast_action(void);
-static int NMIInterruptHandler(x86_saved_state_t *regs);
static boolean_t cpu_signal_pending(int cpu, mp_event_t event);
+static int cpu_signal_handler(x86_saved_state_t *regs);
+static int NMIInterruptHandler(x86_saved_state_t *regs);
boolean_t smp_initialized = FALSE;
volatile boolean_t force_immediate_debugger_NMI = FALSE;
/* Variables needed for MP broadcast. */
static void (*mp_bc_action_func)(void *arg);
static void *mp_bc_func_arg;
-static int mp_bc_ncpus;
+static int mp_bc_ncpus;
static volatile long mp_bc_count;
decl_mutex_data(static, mp_bc_lock);
+static volatile int debugger_cpu = -1;
static void mp_cpus_call_action(void);
-int lapic_to_cpu[MAX_CPUS];
-int cpu_to_lapic[MAX_CPUS];
-
-static void
-lapic_cpu_map_init(void)
-{
- int i;
-
- for (i = 0; i < MAX_CPUS; i++) {
- lapic_to_cpu[i] = -1;
- cpu_to_lapic[i] = -1;
- }
-}
-
-void
-lapic_cpu_map(int apic_id, int cpu)
-{
- cpu_to_lapic[cpu] = apic_id;
- lapic_to_cpu[apic_id] = cpu;
-}
-
-/*
- * Retrieve the local apic ID a cpu.
- *
- * Returns the local apic ID for the given processor.
- * If the processor does not exist or apic not configured, returns -1.
- */
-
-uint32_t
-ml_get_apicid(uint32_t cpu)
-{
- if(cpu >= (uint32_t)MAX_CPUS)
- return 0xFFFFFFFF; /* Return -1 if cpu too big */
-
- /* Return the apic ID (or -1 if not configured) */
- return (uint32_t)cpu_to_lapic[cpu];
-
-}
-
-#ifdef MP_DEBUG
-static void
-lapic_cpu_map_dump(void)
-{
- int i;
-
- for (i = 0; i < MAX_CPUS; i++) {
- if (cpu_to_lapic[i] == -1)
- continue;
- kprintf("cpu_to_lapic[%d]: %d\n",
- i, cpu_to_lapic[i]);
- }
- for (i = 0; i < MAX_CPUS; i++) {
- if (lapic_to_cpu[i] == -1)
- continue;
- kprintf("lapic_to_cpu[%d]: %d\n",
- i, lapic_to_cpu[i]);
- }
-}
-#define LAPIC_CPU_MAP_DUMP() lapic_cpu_map_dump()
-#define LAPIC_DUMP() lapic_dump()
-#else
-#define LAPIC_CPU_MAP_DUMP()
-#define LAPIC_DUMP()
-#endif /* MP_DEBUG */
-
#if GPROF
/*
* Initialize dummy structs for profiling. These aren't used but
void
smp_init(void)
{
- int result;
- vm_map_entry_t entry;
- uint32_t lo;
- uint32_t hi;
- boolean_t is_boot_processor;
- boolean_t is_lapic_enabled;
- vm_offset_t lapic_base;
-
simple_lock_init(&mp_kdp_lock, 0);
simple_lock_init(&mp_rv_lock, 0);
mutex_init(&mp_cpu_boot_lock, 0);
if (!lapic_probe())
return;
- /* Examine the local APIC state */
- rdmsr(MSR_IA32_APIC_BASE, lo, hi);
- is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
- is_lapic_enabled = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
- lapic_base = (lo & MSR_IA32_APIC_BASE_BASE);
- kprintf("MSR_IA32_APIC_BASE 0x%x %s %s\n", lapic_base,
- is_lapic_enabled ? "enabled" : "disabled",
- is_boot_processor ? "BSP" : "AP");
- if (!is_boot_processor || !is_lapic_enabled)
- panic("Unexpected local APIC state\n");
-
- /* Establish a map to the local apic */
- lapic_start = vm_map_min(kernel_map);
- result = vm_map_find_space(kernel_map,
- (vm_map_address_t *) &lapic_start,
- round_page(LAPIC_SIZE), 0,
- VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
- if (result != KERN_SUCCESS) {
- panic("smp_init: vm_map_find_entry FAILED (err=%d)", result);
- }
- vm_map_unlock(kernel_map);
-/* Map in the local APIC non-cacheable, as recommended by Intel
- * in section 8.4.1 of the "System Programming Guide".
- */
- pmap_enter(pmap_kernel(),
- lapic_start,
- (ppnum_t) i386_btop(lapic_base),
- VM_PROT_READ|VM_PROT_WRITE,
- VM_WIMG_IO,
- TRUE);
- lapic_id = (unsigned long)(lapic_start + LAPIC_ID);
-
- if ((LAPIC_REG(VERSION)&LAPIC_VERSION_MASK) != 0x14) {
- printf("Local APIC version not 0x14 as expected\n");
- }
-
- /* Set up the lapic_id <-> cpu_number map and add this boot processor */
- lapic_cpu_map_init();
- lapic_cpu_map((LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0);
- kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]);
-
lapic_init();
+ lapic_configure();
+ lapic_set_intr_func(LAPIC_NMI_INTERRUPT, NMIInterruptHandler);
+ lapic_set_intr_func(LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler);
cpu_thread_init();
return;
}
-
-static int
-lapic_esr_read(void)
-{
- /* write-read register */
- LAPIC_REG(ERROR_STATUS) = 0;
- return LAPIC_REG(ERROR_STATUS);
-}
-
-static void
-lapic_esr_clear(void)
-{
- LAPIC_REG(ERROR_STATUS) = 0;
- LAPIC_REG(ERROR_STATUS) = 0;
-}
-
-static const char *DM[8] = {
- "Fixed",
- "Lowest Priority",
- "Invalid",
- "Invalid",
- "NMI",
- "Reset",
- "Invalid",
- "ExtINT"};
-
-void
-lapic_dump(void)
-{
- int i;
-
-#define BOOL(a) ((a)?' ':'!')
-
- kprintf("LAPIC %d at 0x%x version 0x%x\n",
- (LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK,
- lapic_start,
- LAPIC_REG(VERSION)&LAPIC_VERSION_MASK);
- kprintf("Priorities: Task 0x%x Arbitration 0x%x Processor 0x%x\n",
- LAPIC_REG(TPR)&LAPIC_TPR_MASK,
- LAPIC_REG(APR)&LAPIC_APR_MASK,
- LAPIC_REG(PPR)&LAPIC_PPR_MASK);
- kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
- LAPIC_REG(DFR)>>LAPIC_DFR_SHIFT,
- LAPIC_REG(LDR)>>LAPIC_LDR_SHIFT);
- kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
- BOOL(LAPIC_REG(SVR)&LAPIC_SVR_ENABLE),
- BOOL(!(LAPIC_REG(SVR)&LAPIC_SVR_FOCUS_OFF)),
- LAPIC_REG(SVR) & LAPIC_SVR_MASK);
- kprintf("LVT_TIMER: Vector 0x%02x %s %cmasked %s\n",
- LAPIC_REG(LVT_TIMER)&LAPIC_LVT_VECTOR_MASK,
- (LAPIC_REG(LVT_TIMER)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
- BOOL(LAPIC_REG(LVT_TIMER)&LAPIC_LVT_MASKED),
- (LAPIC_REG(LVT_TIMER)&LAPIC_LVT_PERIODIC)?"Periodic":"OneShot");
- kprintf(" Initial Count: 0x%08x \n", LAPIC_REG(TIMER_INITIAL_COUNT));
- kprintf(" Current Count: 0x%08x \n", LAPIC_REG(TIMER_CURRENT_COUNT));
- kprintf(" Divide Config: 0x%08x \n", LAPIC_REG(TIMER_DIVIDE_CONFIG));
- kprintf("LVT_PERFCNT: Vector 0x%02x [%s] %s %cmasked\n",
- LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_VECTOR_MASK,
- DM[(LAPIC_REG(LVT_PERFCNT)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
- (LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
- BOOL(LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_MASKED));
- kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n",
- LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_VECTOR_MASK,
- DM[(LAPIC_REG(LVT_THERMAL)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
- (LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
- BOOL(LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_MASKED));
- kprintf("LVT_LINT0: Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
- LAPIC_REG(LVT_LINT0)&LAPIC_LVT_VECTOR_MASK,
- DM[(LAPIC_REG(LVT_LINT0)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
- (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_TM_LEVEL)?"Level":"Edge ",
- (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_IP_PLRITY_LOW)?"Low ":"High",
- (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
- BOOL(LAPIC_REG(LVT_LINT0)&LAPIC_LVT_MASKED));
- kprintf("LVT_LINT1: Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
- LAPIC_REG(LVT_LINT1)&LAPIC_LVT_VECTOR_MASK,
- DM[(LAPIC_REG(LVT_LINT1)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
- (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_TM_LEVEL)?"Level":"Edge ",
- (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_IP_PLRITY_LOW)?"Low ":"High",
- (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
- BOOL(LAPIC_REG(LVT_LINT1)&LAPIC_LVT_MASKED));
- kprintf("LVT_ERROR: Vector 0x%02x %s %cmasked\n",
- LAPIC_REG(LVT_ERROR)&LAPIC_LVT_VECTOR_MASK,
- (LAPIC_REG(LVT_ERROR)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
- BOOL(LAPIC_REG(LVT_ERROR)&LAPIC_LVT_MASKED));
- kprintf("ESR: %08x \n", lapic_esr_read());
- kprintf(" ");
- for(i=0xf; i>=0; i--)
- kprintf("%x%x%x%x",i,i,i,i);
- kprintf("\n");
- kprintf("TMR: 0x");
- for(i=7; i>=0; i--)
- kprintf("%08x",LAPIC_REG_OFFSET(TMR_BASE, i*0x10));
- kprintf("\n");
- kprintf("IRR: 0x");
- for(i=7; i>=0; i--)
- kprintf("%08x",LAPIC_REG_OFFSET(IRR_BASE, i*0x10));
- kprintf("\n");
- kprintf("ISR: 0x");
- for(i=7; i >= 0; i--)
- kprintf("%08x",LAPIC_REG_OFFSET(ISR_BASE, i*0x10));
- kprintf("\n");
-}
-
-#if MACH_KDB
/*
- * Displays apic junk
- *
- * da
+ * Poll a CPU to see when it has marked itself as running.
*/
-void
-db_apic(__unused db_expr_t addr,
- __unused int have_addr,
- __unused db_expr_t count,
- __unused char *modif)
-{
-
- lapic_dump();
-
- return;
-}
-
-#endif
-
-boolean_t
-lapic_probe(void)
-{
- uint32_t lo;
- uint32_t hi;
-
- if (cpuid_features() & CPUID_FEATURE_APIC)
- return TRUE;
-
- if (cpuid_family() == 6 || cpuid_family() == 15) {
- /*
- * Mobile Pentiums:
- * There may be a local APIC which wasn't enabled by BIOS.
- * So we try to enable it explicitly.
- */
- rdmsr(MSR_IA32_APIC_BASE, lo, hi);
- lo &= ~MSR_IA32_APIC_BASE_BASE;
- lo |= MSR_IA32_APIC_BASE_ENABLE | LAPIC_START;
- lo |= MSR_IA32_APIC_BASE_ENABLE;
- wrmsr(MSR_IA32_APIC_BASE, lo, hi);
-
- /*
- * Re-initialize cpu features info and re-check.
- */
- cpuid_set_info();
- if (cpuid_features() & CPUID_FEATURE_APIC) {
- printf("Local APIC discovered and enabled\n");
- lapic_os_enabled = TRUE;
- lapic_interrupt_base = LAPIC_REDUCED_INTERRUPT_BASE;
- return TRUE;
- }
- }
-
- return FALSE;
-}
-
-void
-lapic_shutdown(void)
-{
- uint32_t lo;
- uint32_t hi;
- uint32_t value;
-
- /* Shutdown if local APIC was enabled by OS */
- if (lapic_os_enabled == FALSE)
- return;
-
- mp_disable_preemption();
-
- /* ExtINT: masked */
- if (get_cpu_number() == master_cpu) {
- value = LAPIC_REG(LVT_LINT0);
- value |= LAPIC_LVT_MASKED;
- LAPIC_REG(LVT_LINT0) = value;
- }
-
- /* Timer: masked */
- LAPIC_REG(LVT_TIMER) |= LAPIC_LVT_MASKED;
-
- /* Perfmon: masked */
- LAPIC_REG(LVT_PERFCNT) |= LAPIC_LVT_MASKED;
-
- /* Error: masked */
- LAPIC_REG(LVT_ERROR) |= LAPIC_LVT_MASKED;
-
- /* APIC software disabled */
- LAPIC_REG(SVR) &= ~LAPIC_SVR_ENABLE;
-
- /* Bypass the APIC completely and update cpu features */
- rdmsr(MSR_IA32_APIC_BASE, lo, hi);
- lo &= ~MSR_IA32_APIC_BASE_ENABLE;
- wrmsr(MSR_IA32_APIC_BASE, lo, hi);
- cpuid_set_info();
-
- mp_enable_preemption();
-}
-
-void
-lapic_init(void)
-{
- int value;
-
- /* Set flat delivery model, logical processor id */
- LAPIC_REG(DFR) = LAPIC_DFR_FLAT;
- LAPIC_REG(LDR) = (get_cpu_number()) << LAPIC_LDR_SHIFT;
-
- /* Accept all */
- LAPIC_REG(TPR) = 0;
-
- LAPIC_REG(SVR) = LAPIC_VECTOR(SPURIOUS) | LAPIC_SVR_ENABLE;
-
- /* ExtINT */
- if (get_cpu_number() == master_cpu) {
- value = LAPIC_REG(LVT_LINT0);
- value &= ~LAPIC_LVT_MASKED;
- value |= LAPIC_LVT_DM_EXTINT;
- LAPIC_REG(LVT_LINT0) = value;
- }
-
- /* Timer: unmasked, one-shot */
- LAPIC_REG(LVT_TIMER) = LAPIC_VECTOR(TIMER);
-
- /* Perfmon: unmasked */
- LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT);
-
- /* Thermal: unmasked */
- LAPIC_REG(LVT_THERMAL) = LAPIC_VECTOR(THERMAL);
-
- lapic_esr_clear();
-
- LAPIC_REG(LVT_ERROR) = LAPIC_VECTOR(ERROR);
-}
-
-void
-lapic_set_timer_func(i386_intr_func_t func)
-{
- lapic_timer_func = func;
-}
-
-void
-lapic_set_timer(
- boolean_t interrupt,
- lapic_timer_mode_t mode,
- lapic_timer_divide_t divisor,
- lapic_timer_count_t initial_count)
-{
- boolean_t state;
- uint32_t timer_vector;
-
- state = ml_set_interrupts_enabled(FALSE);
- timer_vector = LAPIC_REG(LVT_TIMER);
- timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);;
- timer_vector |= interrupt ? 0 : LAPIC_LVT_MASKED;
- timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
- LAPIC_REG(LVT_TIMER) = timer_vector;
- LAPIC_REG(TIMER_DIVIDE_CONFIG) = divisor;
- LAPIC_REG(TIMER_INITIAL_COUNT) = initial_count;
- ml_set_interrupts_enabled(state);
-}
-
-void
-lapic_get_timer(
- lapic_timer_mode_t *mode,
- lapic_timer_divide_t *divisor,
- lapic_timer_count_t *initial_count,
- lapic_timer_count_t *current_count)
-{
- boolean_t state;
-
- state = ml_set_interrupts_enabled(FALSE);
- if (mode)
- *mode = (LAPIC_REG(LVT_TIMER) & LAPIC_LVT_PERIODIC) ?
- periodic : one_shot;
- if (divisor)
- *divisor = LAPIC_REG(TIMER_DIVIDE_CONFIG) & LAPIC_TIMER_DIVIDE_MASK;
- if (initial_count)
- *initial_count = LAPIC_REG(TIMER_INITIAL_COUNT);
- if (current_count)
- *current_count = LAPIC_REG(TIMER_CURRENT_COUNT);
- ml_set_interrupts_enabled(state);
-}
-
-void
-lapic_set_pmi_func(i386_intr_func_t func)
-{
- lapic_pmi_func = func;
-}
-
-void
-lapic_set_thermal_func(i386_intr_func_t func)
-{
- lapic_thermal_func = func;
-}
-
-static inline void
-_lapic_end_of_interrupt(void)
-{
- LAPIC_REG(EOI) = 0;
-}
-
-void
-lapic_end_of_interrupt(void)
-{
- _lapic_end_of_interrupt();
-}
-
-int
-lapic_interrupt(int interrupt, x86_saved_state_t *state)
-{
- int retval = 0;
-
- /* Did we just field an interruption for the HPET comparator? */
- if(x86_core()->HpetVec == ((uint32_t)interrupt - 0x40)) {
- /* Yes, go handle it... */
- retval = HPETInterrupt();
- /* Was it really handled? */
- if(retval) {
- /* If so, EOI the 'rupt */
- _lapic_end_of_interrupt();
- /*
- * and then leave,
- * indicating that this has been handled
- */
- return 1;
- }
- }
-
- interrupt -= lapic_interrupt_base;
- if (interrupt < 0) {
- if (interrupt == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base)) {
- retval = NMIInterruptHandler(state);
- _lapic_end_of_interrupt();
- return retval;
- }
- else
- return 0;
- }
-
- switch(interrupt) {
- case LAPIC_PERFCNT_INTERRUPT:
- if (lapic_pmi_func != NULL)
- (*lapic_pmi_func)(NULL);
- /* Clear interrupt masked */
- LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT);
- _lapic_end_of_interrupt();
- retval = 1;
- break;
- case LAPIC_TIMER_INTERRUPT:
- _lapic_end_of_interrupt();
- if (lapic_timer_func != NULL)
- (*lapic_timer_func)(state);
- retval = 1;
- break;
- case LAPIC_THERMAL_INTERRUPT:
- if (lapic_thermal_func != NULL)
- (*lapic_thermal_func)(NULL);
- _lapic_end_of_interrupt();
- retval = 1;
- break;
- case LAPIC_ERROR_INTERRUPT:
- lapic_dump();
- panic("Local APIC error\n");
- _lapic_end_of_interrupt();
- retval = 1;
- break;
- case LAPIC_SPURIOUS_INTERRUPT:
- kprintf("SPIV\n");
- /* No EOI required here */
- retval = 1;
- break;
- case LAPIC_INTERPROCESSOR_INTERRUPT:
- _lapic_end_of_interrupt();
- cpu_signal_handler(state);
- retval = 1;
- break;
- }
-
- return retval;
-}
-
-void
-lapic_smm_restore(void)
+static void
+mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
{
- boolean_t state;
-
- if (lapic_os_enabled == FALSE)
- return;
-
- state = ml_set_interrupts_enabled(FALSE);
-
- if (LAPIC_ISR_IS_SET(LAPIC_REDUCED_INTERRUPT_BASE, TIMER)) {
- /*
- * Bogus SMI handler enables interrupts but does not know about
- * local APIC interrupt sources. When APIC timer counts down to
- * zero while in SMM, local APIC will end up waiting for an EOI
- * but no interrupt was delivered to the OS.
- */
- _lapic_end_of_interrupt();
-
- /*
- * timer is one-shot, trigger another quick countdown to trigger
- * another timer interrupt.
- */
- if (LAPIC_REG(TIMER_CURRENT_COUNT) == 0) {
- LAPIC_REG(TIMER_INITIAL_COUNT) = 1;
- }
-
- kprintf("lapic_smm_restore\n");
+ while (iters-- > 0) {
+ if (cpu_datap(slot_num)->cpu_running)
+ break;
+ delay(usecdelay);
}
-
- ml_set_interrupts_enabled(state);
}
kern_return_t
return KERN_SUCCESS;
}
- LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
- LAPIC_REG(ICR) = LAPIC_ICR_DM_INIT;
+ LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
+ LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT);
delay(10000);
- LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
- LAPIC_REG(ICR) = LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12);
+ LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
+ LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12));
delay(200);
- LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
- LAPIC_REG(ICR) = LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12);
+ LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
+ LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12));
delay(200);
#ifdef POSTCODE_DELAY
/* Wait much longer if postcodes are displayed for a delay period. */
i *= 10000;
#endif
- while(i-- > 0) {
- if (cpu_datap(slot_num)->cpu_running)
- break;
- delay(10000);
- }
+ mp_wait_for_cpu_up(slot_num, i, 10000);
mp_enable_preemption();
mutex_unlock(&mp_cpu_boot_lock);
}
}
+/*
+ * Quickly bring a CPU back online which has been halted.
+ */
+kern_return_t
+intel_startCPU_fast(int slot_num)
+{
+ kern_return_t rc;
+
+ /*
+ * Try to perform a fast restart
+ */
+ rc = pmCPUExitHalt(slot_num);
+ if (rc != KERN_SUCCESS)
+ /*
+ * The CPU was not eligible for a fast restart.
+ */
+ return(rc);
+
+ /*
+ * Wait until the CPU is back online.
+ */
+ mp_disable_preemption();
+
+ /*
+ * We use short pauses (1us) for low latency. 30,000 iterations is
+ * longer than a full restart would require so it should be more
+ * than long enough.
+ */
+ mp_wait_for_cpu_up(slot_num, 30000, 1);
+ mp_enable_preemption();
+
+ /*
+ * Check to make sure that the CPU is really running. If not,
+ * go through the slow path.
+ */
+ if (cpu_datap(slot_num)->cpu_running)
+ return(KERN_SUCCESS);
+ else
+ return(KERN_FAILURE);
+}
+
extern char slave_boot_base[];
extern char slave_boot_end[];
extern void slave_pstart(void);
#endif /* MP_DEBUG */
-void
+int
cpu_signal_handler(x86_saved_state_t *regs)
{
int my_cpu;
mp_enable_preemption();
+ return 0;
}
-/* We want this to show up in backtraces, hence marked noinline.
- */
-static int __attribute__((noinline))
+static int
NMIInterruptHandler(x86_saved_state_t *regs)
{
void *stackptr;
sync_iss_to_iks_unconditionally(regs);
__asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
+ if (cpu_number() == debugger_cpu)
+ goto NMExit;
+
if (pmap_tlb_flush_timeout == TRUE && current_cpu_datap()->cpu_tlb_invalid) {
- panic_i386_backtrace(stackptr, 10, "Panic: Unresponsive processor\n", TRUE, regs);
+ char pstr[128];
+ snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor\n", cpu_number());
+ panic_i386_backtrace(stackptr, 10, &pstr[0], TRUE, regs);
panic_io_port_read();
mca_check_save();
if (pmsafe_debug)
}
}
mp_kdp_wait(FALSE);
+NMExit:
return 1;
}
#ifdef MP_DEBUG
-extern int max_lock_loops;
+int max_lock_loops = 1000000;
int trappedalready = 0; /* (BRINGUP */
#endif /* MP_DEBUG */
/* Wait for previous interrupt to be delivered... */
#ifdef MP_DEBUG
int pending_busy_count = 0;
- while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) {
+ while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
if (++pending_busy_count > max_lock_loops)
panic("i386_cpu_IPI() deadlock\n");
#else
- while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) {
+ while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
#endif /* MP_DEBUG */
cpu_pause();
}
state = ml_set_interrupts_enabled(FALSE);
- LAPIC_REG(ICRD) =
- cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT;
- LAPIC_REG(ICR) =
- LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED;
+ LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
+ LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED);
(void) ml_set_interrupts_enabled(state);
}
if (smp_initialized) {
state = ml_set_interrupts_enabled(FALSE);
/* Program the interrupt command register */
- LAPIC_REG(ICRD) =
- cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT;
+ LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
/* The vector is ignored in this case--the target CPU will enter on the
* NMI vector.
*/
- LAPIC_REG(ICR) =
- LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_NMI;
+ LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI);
(void) ml_set_interrupts_enabled(state);
}
}
#if MACH_KDP
volatile boolean_t mp_kdp_trap = FALSE;
-volatile unsigned long mp_kdp_ncpus;
+volatile unsigned long mp_kdp_ncpus;
boolean_t mp_kdp_state;
{
unsigned int cpu;
unsigned int ncpus;
- unsigned int my_cpu = cpu_number();
+ unsigned int my_cpu;
uint64_t tsc_timeout;
DBG("mp_kdp_enter()\n");
mp_kdp_wait(TRUE);
simple_lock(&mp_kdp_lock);
}
+ my_cpu = cpu_number();
+ debugger_cpu = my_cpu;
mp_kdp_ncpus = 1; /* self */
mp_kdp_trap = TRUE;
simple_unlock(&mp_kdp_lock);
mp_kdp_exit(void)
{
DBG("mp_kdp_exit()\n");
+ debugger_cpu = -1;
atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
mp_kdp_trap = FALSE;
__asm__ volatile("mfence");
#endif /* MACH_KDB */
-/*
- * i386_init_slave() is called from pstart.
- * We're in the cpu's interrupt stack with interrupts disabled.
- * At this point we are in legacy mode. We need to switch on IA32e
- * if the mode is set to 64-bits.
- */
-void
-i386_init_slave(void)
+static void
+do_init_slave(boolean_t fast_restart)
{
+ void *init_param = FULL_SLAVE_INIT;
+
postcode(I386_INIT_SLAVE);
- /* Ensure that caching and write-through are enabled */
- set_cr0(get_cr0() & ~(CR0_NW|CR0_CD));
+ if (!fast_restart) {
+ /* Ensure that caching and write-through are enabled */
+ set_cr0(get_cr0() & ~(CR0_NW|CR0_CD));
- DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
- get_cpu_number(), get_cpu_phys_number());
+ DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
+ get_cpu_number(), get_cpu_phys_number());
- assert(!ml_get_interrupts_enabled());
+ assert(!ml_get_interrupts_enabled());
- cpu_mode_init(current_cpu_datap());
+ cpu_mode_init(current_cpu_datap());
- mca_cpu_init();
+ mca_cpu_init();
- lapic_init();
- LAPIC_DUMP();
- LAPIC_CPU_MAP_DUMP();
+ lapic_configure();
+ LAPIC_DUMP();
+ LAPIC_CPU_MAP_DUMP();
- init_fpu();
+ init_fpu();
- mtrr_update_cpu();
+ mtrr_update_cpu();
+ } else
+ init_param = FAST_SLAVE_INIT;
/* resume VT operation */
vmx_resume();
- pat_init();
+ if (!fast_restart)
+ pat_init();
cpu_thread_init(); /* not strictly necessary */
cpu_init(); /* Sets cpu_running which starter cpu waits for */
- slave_main();
+ slave_main(init_param);
- panic("i386_init_slave() returned from slave_main()");
+ panic("do_init_slave() returned from slave_main()");
}
+/*
+ * i386_init_slave() is called from pstart.
+ * We're in the cpu's interrupt stack with interrupts disabled.
+ * At this point we are in legacy mode. We need to switch on IA32e
+ * if the mode is set to 64-bits.
+ */
void
-slave_machine_init(void)
+i386_init_slave(void)
+{
+ do_init_slave(FALSE);
+}
+
+/*
+ * i386_init_slave_fast() is called from pmCPUHalt.
+ * We're running on the idle thread and need to fix up
+ * some accounting and get it so that the scheduler sees this
+ * CPU again.
+ */
+void
+i386_init_slave_fast(void)
+{
+ do_init_slave(TRUE);
+}
+
+void
+slave_machine_init(void *param)
{
/*
* Here in process context, but with interrupts disabled.
*/
DBG("slave_machine_init() CPU%d\n", get_cpu_number());
- clock_init();
+ if (param == FULL_SLAVE_INIT) {
+ /*
+ * Cold start
+ */
+ clock_init();
- cpu_machine_init(); /* Interrupts enabled hereafter */
+ cpu_machine_init(); /* Interrupts enabled hereafter */
+ }
}
#undef cpu_number()
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
*/
#ifdef KERNEL_PRIVATE
-#ifndef _I386AT_MP_H_
-#define _I386AT_MP_H_
+#ifndef _I386_MP_H_
+#define _I386_MP_H_
#ifndef DEBUG
#include <debug.h>
#include <i386/apic.h>
#include <i386/mp_events.h>
-#define LAPIC_ID_MAX (LAPIC_ID_MASK)
-
-#define MAX_CPUS (LAPIC_ID_MAX + 1)
+#define MAX_CPUS 32 /* (8*sizeof(long)) */
#ifndef ASSEMBLER
+#include <stdint.h>
#include <sys/cdefs.h>
#include <mach/boolean.h>
#include <mach/kern_return.h>
#include <mach/i386/thread_status.h>
+#include <kern/lock.h>
__BEGIN_DECLS
extern kern_return_t intel_startCPU(int slot_num);
+extern kern_return_t intel_startCPU_fast(int slot_num);
extern void i386_init_slave(void);
+extern void i386_init_slave_fast(void);
extern void smp_init(void);
extern void cpu_interrupt(int cpu);
-
-extern void lapic_init(void);
-extern void lapic_shutdown(void);
-extern void lapic_smm_restore(void);
-extern boolean_t lapic_probe(void);
-extern void lapic_dump(void);
-extern int lapic_interrupt(int interrupt, x86_saved_state_t *state);
-extern void lapic_end_of_interrupt(void);
-extern int lapic_to_cpu[];
-extern int cpu_to_lapic[];
-extern int lapic_interrupt_base;
-extern void lapic_cpu_map(int lapic, int cpu_num);
-extern uint32_t ml_get_apicid(uint32_t cpu);
-
-extern void lapic_set_timer(
- boolean_t interrupt,
- lapic_timer_mode_t mode,
- lapic_timer_divide_t divisor,
- lapic_timer_count_t initial_count);
-
-extern void lapic_get_timer(
- lapic_timer_mode_t *mode,
- lapic_timer_divide_t *divisor,
- lapic_timer_count_t *initial_count,
- lapic_timer_count_t *current_count);
-
-typedef void (*i386_intr_func_t)(void *);
-extern void lapic_set_timer_func(i386_intr_func_t func);
-extern void lapic_set_pmi_func(i386_intr_func_t func);
-extern void lapic_set_thermal_func(i386_intr_func_t func);
-
__END_DECLS
-/*
- * By default, use high vectors to leave vector space for systems
- * with multiple I/O APIC's. However some systems that boot with
- * local APIC disabled will hang in SMM when vectors greater than
- * 0x5F are used. Those systems are not expected to have I/O APIC
- * so 16 (0x50 - 0x40) vectors for legacy PIC support is perfect.
- */
-#define LAPIC_DEFAULT_INTERRUPT_BASE 0xD0
-#define LAPIC_REDUCED_INTERRUPT_BASE 0x50
-/*
- * Specific lapic interrupts are relative to this base
- * in priority order from high to low:
- */
-
-#define LAPIC_PERFCNT_INTERRUPT 0xF
-#define LAPIC_TIMER_INTERRUPT 0xE
-#define LAPIC_INTERPROCESSOR_INTERRUPT 0xD
-#define LAPIC_THERMAL_INTERRUPT 0xC
-#define LAPIC_ERROR_INTERRUPT 0xB
-#define LAPIC_SPURIOUS_INTERRUPT 0xA
-/* The vector field is ignored for NMI interrupts via the LAPIC
- * or otherwise, so this is not an offset from the interrupt
- * base.
- */
-#define LAPIC_NMI_INTERRUPT 0x2
-
-#define LAPIC_REG(reg) \
- (*((volatile uint32_t *)(lapic_start + LAPIC_##reg)))
-#define LAPIC_REG_OFFSET(reg,off) \
- (*((volatile uint32_t *)(lapic_start + LAPIC_##reg + (off))))
-
-#define LAPIC_VECTOR(src) \
- (lapic_interrupt_base + LAPIC_##src##_INTERRUPT)
-
-#define LAPIC_ISR_IS_SET(base,src) \
- (LAPIC_REG_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) & \
- (1 <<((base + LAPIC_##src##_INTERRUPT)%32)))
-
-extern vm_offset_t lapic_start;
-
-#endif /* ASSEMBLER */
-
-#define CPU_NUMBER(r) \
- movl %gs:CPU_NUMBER_GS,r
-
-#define CPU_NUMBER_FROM_LAPIC(r) \
- movl EXT(lapic_id),r; \
- movl 0(r),r; \
- shrl $(LAPIC_ID_SHIFT),r; \
- andl $(LAPIC_ID_MASK),r; \
- movl EXT(lapic_to_cpu)(,r,4),r
-
-
-/* word describing the reason for the interrupt, one per cpu */
-
-#ifndef ASSEMBLER
-#include <kern/lock.h>
-
extern unsigned int real_ncpus; /* real number of cpus */
extern unsigned int max_ncpus; /* max number of cpus */
decl_simple_lock_data(extern,kdb_lock) /* kdb lock */
#define MP_ENABLE_PREEMPTION_NO_CHECK
#endif /* MACH_RT */
-#endif /* _I386AT_MP_H_ */
+#endif /* _I386_MP_H_ */
#endif /* KERNEL_PRIVATE */
* The master cpu (cpu 0) has its data area statically allocated;
* others are allocated dynamically and this array is updated at runtime.
*/
-cpu_data_t cpu_data_master;
+cpu_data_t cpu_data_master = {
+ .cpu_this = &cpu_data_master,
+ .cpu_nanotime = &rtc_nanotime_info,
+ .cpu_is64bit = FALSE,
+ .cpu_int_stack_top = (vm_offset_t) low_eintstack,
+ };
cpu_data_t *cpu_data_ptr[MAX_CPUS] = { [0] &cpu_data_master };
decl_simple_lock_data(,cpu_lock); /* protects real_ncpus */
if (cdp->cpu_processor == NULL) {
cdp->cpu_processor = cpu_processor_alloc(TRUE);
cdp->cpu_pmap = pmap_cpu_alloc(TRUE);
- cdp->cpu_this = cdp;
- cdp->cpu_is64bit = FALSE;
- cdp->cpu_int_stack_top = (vm_offset_t) low_eintstack;
cpu_desc_init(cdp, TRUE);
fast_syscall_init();
}
real_ncpus++;
simple_unlock(&cpu_lock);
+ cdp->cpu_nanotime = &rtc_nanotime_info;
+
kprintf("cpu_data_alloc(%d) %p desc_table: %p "
"ldt: %p "
"int_stack: 0x%x-0x%x\n",
#include <i386/perfmon.h>
#include <i386/proc_reg.h>
#include <i386/cpu_threads.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
#include <i386/cpuid.h>
#include <i386/lock.h>
#include <vm/vm_kern.h>
static void
pmc_p4_intr(void *state)
{
- pmc_table_t *pmc_table = (pmc_table_t *) x86_core()->pmc;
+ pmc_table_t *pmc_table = (pmc_table_t *) x86_lcpu()->pmc;
uint32_t cccr_addr;
pmc_cccr_t cccr;
pmc_id_t id;
static void
pmc_p6_intr(void *state)
{
- pmc_table_t *pmc_table = (pmc_table_t *) x86_core()->pmc;
+ pmc_table_t *pmc_table = (pmc_table_t *) x86_lcpu()->pmc;
pmc_id_t id;
/*
static void
pmc_core_intr(void *state)
{
- pmc_table_t *pmc_table = (pmc_table_t *) x86_core()->pmc;
+ pmc_table_t *pmc_table = (pmc_table_t *) x86_lcpu()->pmc;
pmc_id_t id;
pmc_global_status_t ovf_status;
pmc_table->id_max = 17;
pmc_table->msr_counter_base = MSR_COUNTER_ADDR(0);
pmc_table->msr_control_base = MSR_CCCR_ADDR(0);
- lapic_set_pmi_func(&pmc_p4_intr);
+ lapic_set_pmi_func((i386_intr_func_t) &pmc_p4_intr);
break;
case pmc_Core:
pmc_table->id_max = 1;
pmc_table->Core.msr_global_ctrl = MSR_PERF_GLOBAL_CTRL;
pmc_table->Core.msr_global_ovf_ctrl = MSR_PERF_GLOBAL_OVF_CTRL;
pmc_table->Core.msr_global_status = MSR_PERF_GLOBAL_STATUS;
- lapic_set_pmi_func(&pmc_core_intr);
+ lapic_set_pmi_func((i386_intr_func_t) &pmc_core_intr);
break;
case pmc_P6:
pmc_table->id_max = 1;
pmc_table->msr_counter_base = MSR_P6_COUNTER_ADDR(0);
pmc_table->msr_control_base = MSR_P6_PES_ADDR(0);
- lapic_set_pmi_func(&pmc_p6_intr);
+ lapic_set_pmi_func((i386_intr_func_t) &pmc_p6_intr);
break;
default:
break;
static inline pmc_table_t *
pmc_table_valid(pmc_id_t id)
{
- x86_core_t *my_core = x86_core();
+ x86_lcpu_t *my_lcpu = x86_lcpu();
pmc_table_t *pmc;
- assert(my_core != NULL);
+ assert(my_lcpu != NULL);
- pmc = (pmc_table_t *) my_core->pmc;
+ pmc = (pmc_table_t *) my_lcpu->pmc;
if ((pmc == NULL) ||
(id > pmc->id_max) ||
(pmc->machine_type == pmc_P4_Xeon && !pmc->P4.reserved[id]) ||
int
pmc_machine_type(pmc_machine_t *type)
{
- x86_core_t *my_core = x86_core();
+ x86_lcpu_t *my_lcpu = x86_lcpu();
pmc_table_t *pmc_table;
- assert(my_core != NULL);
+ assert(my_lcpu != NULL);
- pmc_table = (pmc_table_t *) my_core->pmc;
+ pmc_table = (pmc_table_t *) my_lcpu->pmc;
if (pmc_table == NULL)
return KERN_FAILURE;
int
pmc_reserve(pmc_id_t id)
{
- x86_core_t *my_core = x86_core();
+ x86_lcpu_t *my_lcpu = x86_lcpu();
pmc_table_t *pmc_table;
- assert(my_core != NULL);
+ assert(my_lcpu != NULL);
- pmc_table = (pmc_table_t *) my_core->pmc;
+ pmc_table = (pmc_table_t *) my_lcpu->pmc;
if (pmc_table == NULL)
return KERN_FAILURE;
if (id > pmc_table->id_max)
/*
- * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
*
* Implements the "wrappers" to the KEXT.
*/
+#include <kern/machine.h>
#include <i386/machine_routines.h>
#include <i386/machine_cpu.h>
#include <i386/misc_protos.h>
#include <i386/pmCPU.h>
#include <i386/cpuid.h>
#include <i386/rtclock.h>
+#include <kern/sched_prim.h>
+
+/*
+ * Kernel parameter determining whether threads are halted unconditionally
+ * in the idle state. This is the default behavior.
+ * See machine_idle() for use.
+ */
+int idlehalt = 1;
extern int disableConsoleOutput;
*/
pmDispatch_t *pmDispatch = NULL;
-/*
- * Current power management states (for use until KEXT is loaded).
- */
-static pmInitState_t pmInitState;
-
static uint32_t pmInitDone = 0;
-/*
- * Nap control variables:
- */
-uint32_t forcenap = 0; /* Force nap (fn) boot-arg controls */
-
-/*
- * Do any initialization needed
- */
-void
-pmsInit(void)
-{
- static int initialized = 0;
-
- /*
- * Initialize some of the initial state to "uninitialized" until
- * it gets set with something more useful. This allows the KEXT
- * to determine if the initial value was actually set to something.
- */
- if (!initialized) {
- pmInitState.PState = -1;
- pmInitState.PLimit = -1;
- pmInitState.maxBusDelay = -1;
- initialized = 1;
- }
-
- if (pmDispatch != NULL && pmDispatch->pmsInit != NULL)
- (*pmDispatch->pmsInit)();
-}
-
-/*
- * Start the power management stepper on all processors
- *
- * All processors must be parked. This should be called when the hardware
- * is ready to step. Probably only at boot and after wake from sleep.
- *
- */
-void
-pmsStart(void)
-{
- if (pmDispatch != NULL && pmDispatch->pmsStart != NULL)
- (*pmDispatch->pmsStart)();
-}
-
-/*
- * Park the stepper execution. This will force the stepper on this
- * processor to abandon its current step and stop. No changes to the
- * hardware state is made and any previous step is lost.
- *
- * This is used as the initial state at startup and when the step table
- * is being changed.
- *
- */
-void
-pmsPark(void)
-{
- if (pmDispatch != NULL && pmDispatch->pmsPark != NULL)
- (*pmDispatch->pmsPark)();
-}
-
-/*
- * Control the Power Management Stepper.
- * Called from user state by the superuser.
- * Interrupts disabled.
- *
- * This interface is deprecated and is now a no-op.
- */
-kern_return_t
-pmsControl(__unused uint32_t request, __unused user_addr_t reqaddr,
- __unused uint32_t reqsize)
-{
- return(KERN_SUCCESS);
-}
-
-/*
- * Broadcast a change to all processors including ourselves.
- *
- * Interrupts disabled.
- */
-void
-pmsRun(uint32_t nstep)
-{
- if (pmDispatch != NULL && pmDispatch->pmsRun != NULL)
- (*pmDispatch->pmsRun)(nstep);
-}
-
-/*
- * Build the tables needed for the stepper. This includes both the step
- * definitions and the step control table.
- *
- * We most absolutely need to be parked before this happens because we're
- * going to change the table. We also have to be complte about checking
- * for errors. A copy is always made because we don't want to be crippled
- * by not being able to change the table or description formats.
- *
- * We pass in a table of external functions and the new stepper def uses
- * the corresponding indexes rather than actual function addresses. This
- * is done so that a proper table can be built with the control syscall.
- * It can't supply addresses, so the index has to do. We internalize the
- * table so our caller does not need to keep it. Note that passing in a 0
- * will use the current function table. Also note that entry 0 is reserved
- * and must be 0, we will check and fail the build.
- *
- * The platformData parameter is a 32-bit word of data that is passed unaltered
- * to the set function.
- *
- * The queryFunc parameter is the address of a function that will return the
- * current state of the platform. The format of the data returned is the same
- * as the platform specific portions of pmsSetCmd, i.e., pmsXClk, pmsVoltage,
- * and any part of pmsPowerID that is maintained by the platform hardware
- * (an example would be the values of the gpios that correspond to pmsPowerID).
- * The value should be constructed by querying hardware rather than returning
- * a value cached by software. One of the intents of this function is to help
- * recover lost or determine initial power states.
- *
- */
-kern_return_t
-pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab,
- uint32_t platformData, pmsQueryFunc_t queryFunc)
-{
- kern_return_t rc = 0;
-
- if (pmDispatch != NULL && pmDispatch->pmsBuild != NULL)
- rc = (*pmDispatch->pmsBuild)(pd, pdsize, functab,
- platformData, queryFunc);
-
- return(rc);
-}
-
-
-/*
- * Load a new ratio/VID table.
- *
- * Note that this interface is specific to the Intel SpeedStep implementation.
- * It is expected that this will only be called once to override the default
- * ratio/VID table when the platform starts.
- *
- * Normally, the table will need to be replaced at the same time that the
- * stepper program proper is replaced, as the PState indices from an old
- * program may no longer be valid. When replacing the default program this
- * should not be a problem as any new table will have at least two PState
- * entries and the default program only references P0 and P1.
- */
-kern_return_t
-pmsCPULoadVIDTable(uint16_t *tablep, int nstates)
-{
- if (pmDispatch != NULL && pmDispatch->pmsCPULoadVIDTable != NULL)
- return((*pmDispatch->pmsCPULoadVIDTable)(tablep, nstates));
- else {
- int i;
-
- if (nstates > MAX_PSTATES)
- return(KERN_FAILURE);
-
- for (i = 0; i < nstates; i += 1)
- pmInitState.VIDTable[i] = tablep[i];
- }
- return(KERN_SUCCESS);
-}
-
-/*
- * Set the (global) PState limit. CPUs will not be permitted to run at
- * a lower (more performant) PState than this.
- */
-kern_return_t
-pmsCPUSetPStateLimit(uint32_t limit)
-{
- if (pmDispatch != NULL && pmDispatch->pmsCPUSetPStateLimit != NULL)
- return((*pmDispatch->pmsCPUSetPStateLimit)(limit));
-
- pmInitState.PLimit = limit;
- return(KERN_SUCCESS);
-}
/*
* Initialize the Cstate change code.
}
/*
- * ACPI calls the following routine to set/update mwait hints. A table
- * (possibly null) specifies the available Cstates and their hints, all
- * other states are assumed to be invalid. ACPI may update available
- * states to change the nap policy (for example, while AC power is
- * available).
+ * Called when the CPU is idle. It calls into the power management kext
+ * to determine the best way to idle the CPU.
*/
-kern_return_t
-Cstate_table_set(Cstate_hint_t *tablep, unsigned int nstates)
+void
+machine_idle(void)
{
- if (forcenap)
- return(KERN_SUCCESS);
+ cpu_data_t *my_cpu = current_cpu_datap();
- if (pmDispatch != NULL && pmDispatch->cstateTableSet != NULL)
- return((*pmDispatch->cstateTableSet)(tablep, nstates));
- else {
- unsigned int i;
+ if (my_cpu == NULL)
+ goto out;
- for (i = 0; i < nstates; i += 1) {
- pmInitState.CStates[i].number = tablep[i].number;
- pmInitState.CStates[i].hint = tablep[i].hint;
- }
+ /*
+ * If idlehalt isn't set, then don't do any power management related
+ * idle handling.
+ */
+ if (!idlehalt)
+ goto out;
+
+ my_cpu->lcpu.state = LCPU_IDLE;
+ DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
+ MARK_CPU_IDLE(cpu_number());
- pmInitState.CStatesCount = nstates;
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->cstateMachineIdle != NULL)
+ (*pmDispatch->cstateMachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+ else {
+ /*
+ * If no power management, re-enable interrupts and halt.
+ * This will keep the CPU from spinning through the scheduler
+ * and will allow at least some minimal power savings (but it
+ * cause problems in some MP configurations w.r.t. the APIC
+ * stopping during a GV3 transition).
+ */
+ __asm__ volatile ("sti; hlt");
}
- return(KERN_SUCCESS);
-}
-/*
- * Called when the CPU is idle. It will choose the best C state to
- * be in.
- */
-void
-machine_idle_cstate(boolean_t halted)
-{
- if (pmInitDone
- && pmDispatch != NULL
- && pmDispatch->cstateMachineIdle != NULL)
- (*pmDispatch->cstateMachineIdle)(!halted ?
- 0x7FFFFFFFFFFFFFFFULL : 0ULL);
- else if (halted) {
- /*
- * If no power managment and a processor is taken off-line,
- * then invalidate the cache and halt it (it will not be able
- * to be brought back on-line without resetting the CPU).
- */
- __asm__ volatile ( "wbinvd; hlt" );
- } else {
- /*
- * If no power management, re-enable interrupts and halt.
- * This will keep the CPU from spinning through the scheduler
- * and will allow at least some minimal power savings (but it
- * may cause problems in some MP configurations w.r.t to the
- * APIC stopping during a P-State transition).
- */
- __asm__ volatile ( "sti; hlt" );
- }
+ /*
+ * Mark the CPU as running again.
+ */
+ MARK_CPU_ACTIVE(cpu_number());
+ DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
+ my_cpu->lcpu.state = LCPU_RUN;
+
+ /*
+ * Re-enable interrupts.
+ */
+ out:
+ __asm__ volatile("sti");
}
/*
void
pmCPUHalt(uint32_t reason)
{
+ cpu_data_t *cpup = current_cpu_datap();
switch (reason) {
case PM_HALT_DEBUG:
+ cpup->lcpu.state = LCPU_PAUSE;
__asm__ volatile ("wbinvd; hlt");
break;
case PM_HALT_PANIC:
+ cpup->lcpu.state = LCPU_PAUSE;
__asm__ volatile ("cli; wbinvd; hlt");
break;
if (pmInitDone
&& pmDispatch != NULL
&& pmDispatch->pmCPUHalt != NULL) {
+ /*
+ * Halt the CPU (and put it in a low power state.
+ */
(*pmDispatch->pmCPUHalt)();
- } else {
- cpu_data_t *cpup = current_cpu_datap();
+ /*
+ * We've exited halt, so get the the CPU schedulable again.
+ */
+ i386_init_slave_fast();
+
+ panic("init_slave_fast returned");
+ } else {
/*
* If no power managment and a processor is taken off-line,
* then invalidate the cache and halt it (it will not be able
* to be brought back on-line without resetting the CPU).
*/
__asm__ volatile ("wbinvd");
- cpup->lcpu.halted = TRUE;
+ cpup->lcpu.state = LCPU_HALT;
__asm__ volatile ( "wbinvd; hlt" );
+
+ panic("back from Halt");
}
break;
}
}
-/*
- * Called to initialize the power management structures for the CPUs.
- */
void
-pmCPUStateInit(void)
+pmMarkAllCPUsOff(void)
{
- if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
- (*pmDispatch->pmCPUStateInit)();
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->markAllCPUsOff != NULL)
+ (*pmDispatch->markAllCPUsOff)();
}
static void
return(cpup->lcpu.core);
}
+static x86_die_t *
+pmGetDie(int cpu)
+{
+ return(cpu_to_die(cpu));
+}
+
+static x86_die_t *
+pmGetMyDie(void)
+{
+ cpu_data_t *cpup = current_cpu_datap();
+
+ return(cpup->lcpu.die);
+}
+
static x86_pkg_t *
pmGetPackage(int cpu)
{
{
cpu_data_t *cpup = current_cpu_datap();
- return(cpup->lcpu.core->package);
+ return(cpup->lcpu.package);
}
static void
return(do_ipi);
}
+kern_return_t
+pmCPUExitHalt(int cpu)
+{
+ kern_return_t rc = KERN_INVALID_ARGUMENT;
+
+ if (pmInitDone
+ && pmDispatch != NULL
+ && pmDispatch->exitHalt != NULL)
+ rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
+
+ return(rc);
+}
+
/*
- * Called when a CPU is being restarted after being powered off (as in S3).
+ * Called to initialize the power management structures for the CPUs.
*/
void
-pmCPUMarkRunning(cpu_data_t *cpu)
+pmCPUStateInit(void)
{
- if (pmInitDone
- && pmDispatch != NULL
- && pmDispatch->markCPURunning != NULL)
- (*pmDispatch->markCPURunning)(&cpu->lcpu);
+ if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
+ (*pmDispatch->pmCPUStateInit)();
}
/*
- * Called from the HPET interrupt handler to perform the
- * necessary power management work.
+ * Called when a CPU is being restarted after being powered off (as in S3).
*/
void
-pmHPETInterrupt(void)
+pmCPUMarkRunning(cpu_data_t *cpu)
{
+ cpu_data_t *cpup = current_cpu_datap();
+
if (pmInitDone
&& pmDispatch != NULL
- && pmDispatch->HPETInterrupt != NULL)
- (*pmDispatch->HPETInterrupt)();
+ && pmDispatch->markCPURunning != NULL)
+ (*pmDispatch->markCPURunning)(&cpu->lcpu);
+ else
+ cpup->lcpu.state = LCPU_RUN;
}
/*
return(rc);
}
+/*
+ * Called to save the timer state used by power management prior
+ * to "sleeping".
+ */
+void
+pmTimerSave(void)
+{
+ if (pmDispatch != NULL
+ && pmDispatch->pmTimerStateSave != NULL)
+ (*pmDispatch->pmTimerStateSave)();
+}
+
+/*
+ * Called to restore the timer state used by power management after
+ * waking from "sleep".
+ */
+void
+pmTimerRestore(void)
+{
+ if (pmDispatch != NULL
+ && pmDispatch->pmTimerStateRestore != NULL)
+ (*pmDispatch->pmTimerStateRestore)();
+}
+
/*
* Set the worst-case time for the C4 to C2 transition.
* No longer does anything.
if (pmDispatch != NULL
&& pmDispatch->setMaxBusDelay != NULL)
pmDispatch->setMaxBusDelay(maxdelay);
- else
- pmInitState.maxBusDelay = maxdelay;
+}
+
+uint64_t
+ml_get_maxintdelay(void)
+{
+ uint64_t max_delay = 0;
+
+ if (pmDispatch != NULL
+ && pmDispatch->getMaxIntDelay != NULL)
+ max_delay = pmDispatch->getMaxIntDelay();
+
+ return(max_delay);
+}
+
+/*
+ * Set the maximum delay allowed for an interrupt.
+ */
+void
+ml_set_maxintdelay(uint64_t mdelay)
+{
+ if (pmDispatch != NULL
+ && pmDispatch->setMaxIntDelay != NULL)
+ pmDispatch->setMaxIntDelay(mdelay);
}
/*
* We only look at the PAUSE and RESUME flags. The other flag(s)
* will not make any sense without the KEXT, so just ignore them.
*
- * We set the halted flag in the LCPU structure to indicate
- * that this CPU isn't to do anything. If it's the CPU we're
- * currently running on, then spin until the halted flag is
- * reset.
+ * We set the CPU's state to indicate that it's halted. If this
+ * is the CPU we're currently running on, then spin until the
+ * state becomes non-halted.
*/
if (flags & PM_SAFE_FL_PAUSE) {
- lcpu->halted = TRUE;
+ lcpu->state = LCPU_PAUSE;
if (lcpu == x86_lcpu()) {
- while (lcpu->halted)
+ while (lcpu->state == LCPU_PAUSE)
cpu_pause();
}
}
* get it out of it's spin loop.
*/
if (flags & PM_SAFE_FL_RESUME) {
- lcpu->halted = FALSE;
+ lcpu->state = LCPU_RUN;
}
}
}
pmCallBacks_t *callbacks)
{
if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
- callbacks->InitState = &pmInitState;
callbacks->setRTCPop = setPop;
callbacks->resyncDeadlines = etimer_resync_deadlines;
callbacks->initComplete= pmInitComplete;
callbacks->GetLCPU = pmGetLogicalCPU;
callbacks->GetCore = pmGetCore;
+ callbacks->GetDie = pmGetDie;
callbacks->GetPackage = pmGetPackage;
callbacks->GetMyLCPU = pmGetMyLogicalCPU;
callbacks->GetMyCore = pmGetMyCore;
+ callbacks->GetMyDie = pmGetMyDie;
callbacks->GetMyPackage= pmGetMyPackage;
- callbacks->CoresPerPkg = cpuid_info()->cpuid_cores_per_package;
callbacks->GetPkgRoot = pmGetPkgRoot;
callbacks->LockCPUTopology = pmLockCPUTopology;
callbacks->GetHibernate = pmCPUGetHibernate;
callbacks->LCPUtoProcessor = pmLCPUtoProcessor;
+ callbacks->ThreadBind = thread_bind;
+ callbacks->topoParms = &topoParms;
}
if (cpuFuncs != NULL) {
}
}
+/******************************************************************************
+ *
+ * All of the following are deprecated interfaces and no longer used.
+ *
+ ******************************************************************************/
+kern_return_t
+pmsControl(__unused uint32_t request, __unused user_addr_t reqaddr,
+ __unused uint32_t reqsize)
+{
+ return(KERN_SUCCESS);
+}
+
+void
+pmsInit(void)
+{
+}
+
+void
+pmsStart(void)
+{
+}
+
+void
+pmsPark(void)
+{
+}
+
+void
+pmsRun(__unused uint32_t nstep)
+{
+}
+
+kern_return_t
+pmsBuild(__unused pmsDef *pd, __unused uint32_t pdsize,
+ __unused pmsSetFunc_t *functab,
+ __unused uint32_t platformData, __unused pmsQueryFunc_t queryFunc)
+{
+ return(KERN_SUCCESS);
+}
/*
- * Copyright (c) 2006-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#ifndef _I386_PMCPU_H_
#define _I386_PMCPU_H_
-#include <kern/pms.h>
#include <i386/cpu_topology.h>
#ifndef ASSEMBLER
-#define MAX_PSTATES 32 /* architectural limit */
-
-typedef enum
-{
- Cn1, Cn2, Cn3, Cn4, Cn5, Cn6, CnHlt, Cn0, CnRun, Cnmax
-} Cstate_number_t;
-
-typedef struct
-{
- Cstate_number_t number;
- uint32_t hint;
-} Cstate_hint_t;
-
-
-struct pmData {
- uint8_t pad[93];
-};
-typedef struct pmData pmData_t;
-
-#define pmNapHalt 0x00000010
-#define pmNapC1 0x00000008
-#define pmNapC2 0x00000004
-#define pmNapC3 0x00000002
-#define pmNapC4 0x00000001
-#define pmNapMask 0x000000FF
-
-#define cfgAdr 0xCF8
-#define cfgDat 0xCFC
-#define lpcCfg (0x80000000 | (0 << 16) | (31 << 11) | (0 << 8))
-
/*
* This value should be changed each time that pmDsipatch_t or pmCallBacks_t
* changes.
*/
-#define PM_DISPATCH_VERSION 7
+#define PM_DISPATCH_VERSION 12
/*
* Dispatch table for functions that get installed when the power
*/
typedef struct
{
- /*
- * The following are the stepper table interfaces.
- */
int (*pmCPUStateInit)(void);
- void (*pmsInit)(void);
- void (*pmsStart)(void);
- void (*pmsPark)(void);
- kern_return_t (*pmsCPUSetPStateLimit)(uint32_t limit);
-
- /*
- * The following are legacy stepper interfaces.
- */
- void (*pmsRun)(uint32_t nstep);
- kern_return_t (*pmsBuild)(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc);
- kern_return_t (*pmsCPULoadVIDTable)(uint16_t *tablep, int nstates);
/*
* The following are the 'C' State interfaces.
*/
void (*cstateInit)(void);
uint64_t (*cstateMachineIdle)(uint64_t maxIdleDuration);
- kern_return_t (*cstateTableSet)(Cstate_hint_t *tablep, unsigned int nstates);
uint64_t (*GetDeadline)(x86_lcpu_t *lcpu);
uint64_t (*SetDeadline)(x86_lcpu_t *lcpu, uint64_t);
void (*Deadline)(x86_lcpu_t *lcpu);
boolean_t (*exitIdle)(x86_lcpu_t *lcpu);
void (*markCPURunning)(x86_lcpu_t *lcpu);
- void (*HPETInterrupt)(void);
int (*pmCPUControl)(uint32_t cmd, void *datap);
void (*pmCPUHalt)(void);
uint64_t (*getMaxSnoop)(void);
void (*setMaxBusDelay)(uint64_t time);
uint64_t (*getMaxBusDelay)(void);
+ void (*setMaxIntDelay)(uint64_t time);
+ uint64_t (*getMaxIntDelay)(void);
void (*pmCPUSafeMode)(x86_lcpu_t *lcpu, uint32_t flags);
+ void (*pmTimerStateSave)(void);
+ void (*pmTimerStateRestore)(void);
+ kern_return_t (*exitHalt)(x86_lcpu_t *lcpu);
+ void (*markAllCPUsOff)(void);
} pmDispatch_t;
typedef struct {
- uint32_t PState;
- uint32_t PLimit;
- uint16_t VIDTable[MAX_PSTATES];
- uint32_t VIDTableCount;
- Cstate_hint_t CStates[Cnmax];
- uint32_t CStatesCount;
- uint64_t maxBusDelay;
-} pmInitState_t;
-
-typedef struct {
- uint64_t *(*HPETAddr)(void);
- pmInitState_t *InitState;
int (*setRTCPop)(uint64_t time);
void (*resyncDeadlines)(void);
void (*initComplete)(void);
x86_lcpu_t *(*GetLCPU)(int cpu);
x86_core_t *(*GetCore)(int cpu);
+ x86_die_t *(*GetDie)(int cpu);
x86_pkg_t *(*GetPackage)(int cpu);
x86_lcpu_t *(*GetMyLCPU)(void);
x86_core_t *(*GetMyCore)(void);
+ x86_die_t *(*GetMyDie)(void);
x86_pkg_t *(*GetMyPackage)(void);
- uint32_t CoresPerPkg;
x86_pkg_t *(*GetPkgRoot)(void);
void (*LockCPUTopology)(int lock);
boolean_t (*GetHibernate)(int cpu);
processor_t (*LCPUtoProcessor)(int lcpu);
+ processor_t (*ThreadBind)(processor_t proc);
+ x86_topology_parameters_t *topoParms;
} pmCallBacks_t;
extern pmDispatch_t *pmDispatch;
-extern uint32_t forcenap;
-
void power_management_init(void);
-void machine_nap_policy(void);
-kern_return_t Cstate_table_set(Cstate_hint_t *tablep, unsigned int nstates);
-void machine_idle_cstate(boolean_t halted);
void pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
pmCallBacks_t *callbacks);
void pmUnRegister(pmDispatch_t *cpuFuncs);
void pmCPUDeadline(struct cpu_data *cpu);
boolean_t pmCPUExitIdle(struct cpu_data *cpu);
void pmCPUMarkRunning(struct cpu_data *cpu);
-void pmHPETInterrupt(void);
+void pmMarkAllCPUsOff(void);
int pmCPUControl(uint32_t cmd, void *datap);
void pmCPUHalt(uint32_t reason);
+void pmTimerSave(void);
+void pmTimerRestore(void);
+kern_return_t pmCPUExitHalt(int cpu);
#define PM_HALT_NORMAL 0 /* normal halt path */
#define PM_HALT_DEBUG 1 /* debug code wants to halt */
#define PM_HALT_PANIC 2 /* panic code wants to halt */
+#define PM_HALT_SLEEP 3 /* sleep code wants to halt */
void pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags);
#define PM_SAFE_FL_RESUME 0x00000020 /* resume execution on the CPU */
extern int pmsafe_debug;
+extern int idlehalt;
+
+/******************************************************************************
+ *
+ * All of the following are deprecated interfaces and no longer used.
+ *
+ ******************************************************************************/
+
#endif /* ASSEMBLER */
virtual_avail = va;
- if (PE_parse_boot_arg("npvhash", &npvhash)) {
+ if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) {
if (0 != ((npvhash+1) & npvhash)) {
kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH);
npvhash = NPVHASH;
printf("npvhash=%d\n",npvhash);
wpkernel = 1;
- if (PE_parse_boot_arg("wpkernel", &boot_arg)) {
+ if (PE_parse_boot_argn("wpkernel", &boot_arg, sizeof (boot_arg))) {
if (boot_arg == 0)
wpkernel = 0;
}
* By default for 64-bit users loaded at 4GB, share kernel mapping.
* But this may be overridden by the -no_shared_cr3 boot-arg.
*/
- if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3)) {
+ if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) {
kprintf("Shared kernel address space disabled\n");
}
#ifdef PMAP_TRACES
- if (PE_parse_boot_arg("-pmap_trace", &pmap_trace)) {
+ if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) {
kprintf("Kernel traces for pmap operations enabled\n");
}
#endif /* PMAP_TRACES */
*/
while (cpus_to_respond != 0) {
if (mach_absolute_time() > deadline) {
- pmap_tlb_flush_timeout = TRUE;
- pmap_cpuset_NMIPI(cpus_to_respond);
+ if (!panic_active()) {
+ pmap_tlb_flush_timeout = TRUE;
+ pmap_cpuset_NMIPI(cpus_to_respond);
+ }
panic("pmap_flush_tlbs() timeout: "
"cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
pmap, cpus_to_respond);
__asm__ volatile("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
#define rdtsc(lo,hi) \
- __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi))
+ __asm__ volatile("rdtsc; lfence" : "=a" (lo), "=d" (hi))
#define write_tsc(lo,hi) wrmsr(0x10, lo, hi)
static inline uint64_t rdtsc64(void)
{
uint64_t ret;
- __asm__ volatile("rdtsc" : "=A" (ret));
+ __asm__ volatile("rdtsc; lfence" : "=A" (ret));
return ret;
}
#define MSR_IA32_GS_BASE 0xC0000101
#define MSR_IA32_KERNEL_GS_BASE 0xC0000102
+#define MSR_IA32_BIOS_SIGN_ID 0x08B
+
#endif /* _I386_PROC_REG_H_ */
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <i386/misc_protos.h>
#include <i386/proc_reg.h>
#include <i386/machine_cpu.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
#include <i386/cpuid.h>
#include <i386/cpu_data.h>
#include <i386/cpu_threads.h>
#include <machine/commpage.h>
#include <sys/kdebug.h>
#include <i386/tsc.h>
-#include <i386/hpet.h>
#include <i386/rtclock.h>
#define NSEC_PER_HZ (NSEC_PER_SEC / 100) /* nsec per tick */
static void rtc_set_timescale(uint64_t cycles);
static uint64_t rtc_export_speed(uint64_t cycles);
-extern void _rtc_nanotime_store(
- uint64_t tsc,
- uint64_t nsec,
- uint32_t scale,
- uint32_t shift,
- rtc_nanotime_t *dst);
-
-extern uint64_t _rtc_nanotime_read(
- rtc_nanotime_t *rntp,
- int slow );
-
rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0};
-
/*
* tsc_to_nanoseconds:
*
"mull %%ecx ;"
"addl %%edi,%%eax ;"
"adcl $0,%%edx "
- : "+A" (value) : "c" (rtc_nanotime_info.scale) : "esi", "edi");
+ : "+A" (value)
+ : "c" (current_cpu_datap()->cpu_nanotime->scale)
+ : "esi", "edi");
return (value);
}
static void
rtc_nanotime_init(uint64_t base)
{
- rtc_nanotime_t *rntp = &rtc_nanotime_info;
+ rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime;
_rtc_nanotime_init(rntp, base);
rtc_nanotime_set_commpage(rntp);
{
spl_t s = splclock();
- rtc_nanotime_set_commpage(&rtc_nanotime_info);
+ rtc_nanotime_set_commpage(current_cpu_datap()->cpu_nanotime);
splx(s);
}
#if CONFIG_EMBEDDED
if (gPEClockFrequencyInfo.timebase_frequency_hz > SLOW_TSC_THRESHOLD)
- return _rtc_nanotime_read( &rtc_nanotime_info, 1 ); /* slow processor */
+ return _rtc_nanotime_read(current_cpu_datap()->cpu_nanotime, 1); /* slow processor */
else
#endif
- return _rtc_nanotime_read( &rtc_nanotime_info, 0 ); /* assume fast processor */
+ return _rtc_nanotime_read(current_cpu_datap()->cpu_nanotime, 0); /* assume fast processor */
}
/*
void
rtc_clock_napped(uint64_t base, uint64_t tsc_base)
{
- rtc_nanotime_t *rntp = &rtc_nanotime_info;
+ rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime;
uint64_t oldnsecs;
uint64_t newnsecs;
uint64_t tsc;
static void
rtc_set_timescale(uint64_t cycles)
{
- rtc_nanotime_info.scale = ((uint64_t)NSEC_PER_SEC << 32) / cycles;
+ rtc_nanotime_t *rntp = current_cpu_datap()->cpu_nanotime;
+ rntp->scale = ((uint64_t)NSEC_PER_SEC << 32) / cycles;
if (cycles <= SLOW_TSC_THRESHOLD)
- rtc_nanotime_info.shift = cycles;
+ rntp->shift = cycles;
else
- rtc_nanotime_info.shift = 32;
+ rntp->shift = 32;
rtc_nanotime_init(0);
}
#ifndef _I386_RTCLOCK_H_
#define _I386_RTCLOCK_H_
+#ifndef ASSEMBLER
+typedef struct rtc_nanotime {
+ uint64_t tsc_base; /* timestamp */
+ uint64_t ns_base; /* nanoseconds */
+ uint32_t scale; /* tsc -> nanosec multiplier */
+ uint32_t shift; /* tsc -> nanosec shift/div */
+ /* shift is overloaded with
+ * lower 32bits of tsc_freq
+ * on slower machines (SLOW_TSC_THRESHOLD) */
+ uint32_t generation; /* 0 == being updated */
+ uint32_t spare1;
+} rtc_nanotime_t;
+
#include <kern/etimer.h>
struct cpu_data;
+extern void _rtc_nanotime_store(
+ uint64_t tsc,
+ uint64_t nsec,
+ uint32_t scale,
+ uint32_t shift,
+ rtc_nanotime_t *dst);
+
+extern uint64_t _rtc_nanotime_read(
+ rtc_nanotime_t *rntp,
+ int slow);
+
+extern rtc_nanotime_t rtc_nanotime_info;
+#endif
+
+#define SLOW_TSC_THRESHOLD 1000067800 /* TSC is too slow for regular nanotime() algorithm */
+
+#if defined(__i386__)
+/*
+ * Assembly snippet included in exception handlers and rtc_nanotime_read()
+ * %edi points to nanotime info struct
+ * %edx:%eax returns nanotime
+ */
+#define RTC_NANOTIME_READ_FAST() \
+0: movl RNT_GENERATION(%edi),%esi /* being updated? */ ; \
+ testl %esi,%esi ; \
+ jz 0b /* wait until done */ ; \
+ rdtsc ; \
+ lfence ; \
+ subl RNT_TSC_BASE(%edi),%eax ; \
+ sbbl RNT_TSC_BASE+4(%edi),%edx /* tsc - tsc_base */ ; \
+ movl RNT_SCALE(%edi),%ecx /* * scale factor */ ; \
+ movl %edx,%ebx ; \
+ mull %ecx ; \
+ movl %ebx,%eax ; \
+ movl %edx,%ebx ; \
+ mull %ecx ; \
+ addl %ebx,%eax ; \
+ adcl $0,%edx ; \
+ addl RNT_NS_BASE(%edi),%eax /* + ns_base */ ; \
+ adcl RNT_NS_BASE+4(%edi),%edx ; \
+ cmpl RNT_GENERATION(%edi),%esi /* check for update */ ; \
+ jne 0b /* do it all again */
+
+#elif defined(__x86_64__)
+
+/*
+ * Assembly snippet included in exception handlers and rtc_nanotime_read()
+ * %rdi points to nanotime info struct.
+ * %rax returns nanotime
+ */
+#define RTC_NANOTIME_READ_FAST() \
+0: movl RNT_GENERATION(%rdi),%esi ; \
+ test %esi,%esi /* info updating? */ ; \
+ jz 0b /* - wait if so */ ; \
+ rdtsc ; \
+ lfence ; \
+ shlq $32,%rdx ; \
+ orq %rdx,%rax /* %rax := tsc */ ; \
+ subq RNT_TSC_BASE(%rdi),%rax /* tsc - tsc_base */ ; \
+ xorq %rcx,%rcx ; \
+ movl RNT_SCALE(%rdi),%ecx ; \
+ mulq %rcx /* delta * scale */ ; \
+ shrdq $32,%rdx,%rax /* %rdx:%rax >>= 32 */ ; \
+ addq RNT_NS_BASE(%rdi),%rax /* add ns_base */ ; \
+ cmpl RNT_GENERATION(%rdi),%esi /* repeat if changed */ ; \
+ jne 0b
+
+#endif
+
#endif /* _I386_RTCLOCK_H_ */
#define CX(addr,reg) addr(,reg,4)
-#include <i386/mp.h>
+#include <i386/lapic.h>
#include <i386/mp_slave_boot.h>
#include <i386/cpuid.h>
#include <i386/machine_check.h>
#include <mach/i386/syscall_sw.h>
+
+extern void throttle_lowpri_io(boolean_t);
+
/*
* Forward declarations
*/
}
regs->eax = ret;
}
- thread_exception_return();
+ throttle_lowpri_io(TRUE);
+
+ thread_exception_return();
/*NOTREACHED*/
}
uint64_t tscGranularity = 0;
uint64_t bus2tsc = 0;
uint64_t busFreq = 0;
+uint32_t flex_ratio = 0;
+uint32_t flex_ratio_min = 0;
+uint32_t flex_ratio_max = 0;
+
#define bit(n) (1ULL << (n))
#define bitmask(h,l) ((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
static const char FSB_Frequency_prop[] = "FSBFrequency";
/*
- * This routine extracts the front-side bus frequency in Hz from
- * the device tree.
+ * This routine extracts the bus frequency in Hz from the device tree.
*/
static uint64_t
EFI_FSB_frequency(void)
boolean_t N_by_2_bus_ratio = FALSE;
/*
- * Get the FSB frequency and conversion factors.
+ * Get the FSB frequency and conversion factors from EFI.
*/
busFreq = EFI_FSB_frequency();
+
+ if (cpuid_info()->cpuid_family != CPU_FAMILY_PENTIUM_M) {
+ panic("tsc_init: unknown CPU family: 0x%X\n",
+ cpuid_info()->cpuid_family);
+ }
+
+ {
+ uint64_t prfsts;
+
+ prfsts = rdmsr64(IA32_PERF_STS);
+ tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
+ N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
+ }
+
if (busFreq != 0) {
busFCvtt2n = ((1 * Giga) << 32) / busFreq;
busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n;
busFCvtInt = tmrCvt(1 * Peta, 0xFFFFFFFFFFFFFFFFULL / busFreq);
} else {
- panic("rtclock_init: EFI not supported!\n");
+ panic("tsc_init: EFI not supported!\n");
}
kprintf(" BUS: Frequency = %6d.%04dMHz, "
- "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, "
- "cvtInt = %08X.%08X\n",
- (uint32_t)(busFreq / Mega),
- (uint32_t)(busFreq % Mega),
- (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n,
- (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t,
- (uint32_t)(busFCvtInt >> 32), (uint32_t)busFCvtInt);
+ "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, "
+ "cvtInt = %08X.%08X\n",
+ (uint32_t)(busFreq / Mega),
+ (uint32_t)(busFreq % Mega),
+ (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n,
+ (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t,
+ (uint32_t)(busFCvtInt >> 32), (uint32_t)busFCvtInt);
/*
* Get the TSC increment. The TSC is incremented by this
* is set this indicates the bus ration is 0.5 more than this - i.e.
* that the true bus ratio is (2*tscGranularity + 1)/2.
*/
- if (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_M) {
- uint64_t prfsts;
-
- prfsts = rdmsr64(IA32_PERF_STS);
- tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
- N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
-
- } else {
- panic("rtclock_init: unknown CPU family: 0x%X\n",
- cpuid_info()->cpuid_family);
- }
-
if (N_by_2_bus_ratio)
tscFCvtt2n = busFCvtt2n * 2 / (1 + 2*tscGranularity);
else
tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
kprintf(" TSC: Frequency = %6d.%04dMHz, "
- "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
- (uint32_t)(tscFreq / Mega),
- (uint32_t)(tscFreq % Mega),
- (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
- (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
- tscGranularity, N_by_2_bus_ratio ? " (N/2)" : "");
+ "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
+ (uint32_t)(tscFreq / Mega),
+ (uint32_t)(tscFreq % Mega),
+ (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
+ (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
+ tscGranularity, N_by_2_bus_ratio ? " (N/2)" : "");
/*
* Calculate conversion from BUS to TSC
info->tscGranularity = tscGranularity;
info->bus2tsc = bus2tsc;
info->busFreq = busFreq;
+ info->flex_ratio = flex_ratio;
+ info->flex_ratio_min = flex_ratio_min;
+ info->flex_ratio_max = flex_ratio_max;
}
#ifndef _I386_TSC_H_
#define _I386_TSC_H_
-#define IA32_PERF_STS 0x198
+#define IA32_PERF_STS 0x198
extern uint64_t busFCvtt2n;
extern uint64_t busFCvtn2t;
extern uint64_t tscGranularity;
extern uint64_t bus2tsc;
extern uint64_t busFreq;
+extern uint32_t flex_ratio;
+extern uint32_t flex_ratio_min;
+extern uint32_t flex_ratio_max;
struct tscInfo
{
-uint64_t busFCvtt2n;
-uint64_t busFCvtn2t;
-uint64_t tscFreq;
-uint64_t tscFCvtt2n;
-uint64_t tscFCvtn2t;
-uint64_t tscGranularity;
-uint64_t bus2tsc;
-uint64_t busFreq;
+ uint64_t busFCvtt2n;
+ uint64_t busFCvtn2t;
+ uint64_t tscFreq;
+ uint64_t tscFCvtt2n;
+ uint64_t tscFCvtn2t;
+ uint64_t tscGranularity;
+ uint64_t bus2tsc;
+ uint64_t busFreq;
+ uint32_t flex_ratio;
+ uint32_t flex_ratio_min;
+ uint32_t flex_ratio_max;
};
typedef struct tscInfo tscInfo_t;
start_sel = LDTSZ_MIN;
}
- if (start_sel + num_sels > LDTSZ) {
+ if ((uint64_t)start_sel + (uint64_t)num_sels > LDTSZ) {
task_unlock(task);
return ENOMEM;
}
if (start_sel >= 8192)
return EINVAL;
- if (start_sel + num_sels > 8192)
+ if ((uint64_t)start_sel + (uint64_t)num_sels > 8192)
return EINVAL;
if (descs == 0)
return EINVAL;
void
kdp_reset(void);
+void
+kdp_init(void);
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include "kdp_serial.h"
+
+#define SKDP_START_CHAR 0xFA
+#define SKDP_END_CHAR 0xFB
+#define SKDP_ESC_CHAR 0xFE
+
+static enum {DS_WAITSTART, DS_READING, DS_ESCAPED} dsState;
+static unsigned char dsBuffer[1518];
+static int dsPos;
+
+void kdp_serialize_packet(unsigned char *packet, unsigned int len, void (*outFunc)(char))
+{
+ unsigned int index;
+ outFunc(SKDP_START_CHAR);
+ for (index = 0; index < len; index++) {
+ unsigned char byte = *packet++;
+ //need to escape '\n' because the kernel serial output turns it into a cr/lf
+ if(byte == SKDP_START_CHAR || byte == SKDP_END_CHAR || byte == SKDP_ESC_CHAR || byte == '\n')
+ {
+ outFunc(SKDP_ESC_CHAR);
+ byte = ~byte;
+ }
+ outFunc(byte);
+ }
+ outFunc(SKDP_END_CHAR);
+}
+
+unsigned char *kdp_unserialize_packet(unsigned char byte, unsigned int *len)
+{
+ switch(dsState)
+ {
+ case DS_WAITSTART:
+ if(byte == SKDP_START_CHAR)
+ {
+// printf("got start char\n");
+ dsState = DS_READING;
+ dsPos = 0;
+ *len = SERIALIZE_READING;
+ return 0;
+ }
+ *len = SERIALIZE_WAIT_START;
+ break;
+ case DS_READING:
+ if(byte == SKDP_ESC_CHAR)
+ {
+ dsState = DS_ESCAPED;
+ *len = SERIALIZE_READING;
+ return 0;
+ }
+ if(byte == SKDP_START_CHAR)
+ {
+// printf("unexpected start char, resetting\n");
+ dsPos = 0;
+ *len = SERIALIZE_READING;
+ return 0;
+ }
+ if(byte == SKDP_END_CHAR)
+ {
+ dsState = DS_WAITSTART;
+ *len = dsPos;
+ dsPos = 0;
+ return dsBuffer;
+ }
+ dsBuffer[dsPos++] = byte;
+ break;
+ case DS_ESCAPED:
+// printf("unescaping %02x to %02x\n", byte, ~byte);
+ dsBuffer[dsPos++] = ~byte;
+ dsState = DS_READING;
+ *len = SERIALIZE_READING;
+ break;
+ }
+ if(dsPos == sizeof(dsBuffer)) //too much data...forget this packet
+ {
+ dsState = DS_WAITSTART;
+ dsPos = 0;
+ *len = SERIALIZE_WAIT_START;
+ }
+
+ return 0;
+}
--- /dev/null
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ *
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ *
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ *
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _KDP_SERIAL_H_
+#define _KDP_SERIAL_H_
+
+/*
+ * APIs for escaping a KDP UDP packet into a byte stream suitable
+ * for a standard serial console
+ */
+
+enum {SERIALIZE_WAIT_START, SERIALIZE_READING};
+
+/*
+ * Take a buffer of specified length and output it with the given
+ * function. Escapes special characters as needed
+ */
+void kdp_serialize_packet(unsigned char *, unsigned int, void (*func)(char));
+
+/*
+ * Add a new character to an internal buffer, and return that
+ * buffer when a fully constructed packet has been identified.
+ * Will track intermediate state using magic enums above
+ */
+unsigned char *kdp_unserialize_packet(unsigned char, unsigned int *);
+
+#endif
/*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <mach/exception_types.h>
#include <kern/cpu_data.h>
#include <kern/debug.h>
+#include <kern/clock.h>
#include <kdp/kdp_core.h>
#include <kdp/kdp_internal.h>
#include <kdp/kdp_en_debugger.h>
+#include <kdp/kdp_callout.h>
#include <kdp/kdp_udp.h>
+#if CONFIG_SERIAL_KDP
+#include <kdp/kdp_serial.h>
+#endif
#include <vm/vm_map.h>
#include <vm/vm_protos.h>
extern int kdp_getc(void);
extern int reattach_wait;
+extern int serial_getc(void);
+extern void serial_putc(char);
+extern int serial_init(void);
+
static u_short ip_id; /* ip packet ctr, for ids */
/* @(#)udp_usrreq.c 2.2 88/05/23 4.0NFSSRC SMI; from UCB 7.1 6/5/86 */
{
unsigned int debug = 0;
- kdp_en_send_pkt = send;
- kdp_en_recv_pkt = receive;
-
debug_log_init();
kdp_timer_callout_init();
- PE_parse_boot_arg("debug", &debug);
+ PE_parse_boot_argn("debug", &debug, sizeof (debug));
if (!debug)
return;
+ kdp_en_send_pkt = send;
+ kdp_en_recv_pkt = receive;
+
if (debug & DB_KDP_BP_DIS)
kdp_flag |= KDP_BP_DIS;
if (debug & DB_KDP_GETC_ENA)
if (debug & DB_PANICLOG_DUMP)
kdp_flag |= PANIC_LOG_DUMP;
- if (PE_parse_boot_arg ("_panicd_ip", panicd_ip_str))
+ if (PE_parse_boot_argn("_panicd_ip", panicd_ip_str, sizeof (panicd_ip_str)))
panicd_specified = TRUE;
- if (PE_parse_boot_arg ("_router_ip", router_ip_str))
+ if (PE_parse_boot_argn("_router_ip", router_ip_str, sizeof (router_ip_str)))
router_specified = TRUE;
- if (!PE_parse_boot_arg ("panicd_port", &panicd_port))
+ if (!PE_parse_boot_argn("panicd_port", &panicd_port, sizeof (panicd_port)))
panicd_port = CORE_REMOTE_PORT;
kdp_flag |= KDP_READY;
}
extern char *inet_aton(const char *cp, struct in_addr *pin);
-extern int snprintf(char *str, size_t size, const char *format, ...);
/* Primary dispatch routine for the system dump */
void
not_in_kdp = 1;
panic_block = 0;
}
+
+#if CONFIG_SERIAL_KDP
+
+static boolean_t needs_serial_init = TRUE;
+
+static void
+kdp_serial_send(void *rpkt, unsigned int rpkt_len)
+{
+ if (needs_serial_init)
+ {
+ serial_init();
+ needs_serial_init = FALSE;
+ }
+
+ // printf("tx\n");
+ kdp_serialize_packet((unsigned char *)rpkt, rpkt_len, serial_putc);
+}
+
+static void
+kdp_serial_receive(void *rpkt, unsigned int *rpkt_len, unsigned int timeout)
+{
+ int readkar;
+ uint64_t now, deadline;
+
+ if (needs_serial_init)
+ {
+ serial_init();
+ needs_serial_init = FALSE;
+ }
+
+ clock_interval_to_deadline(timeout, 1000 * 1000 /* milliseconds */, &deadline);
+
+// printf("rx\n");
+ for(clock_get_uptime(&now); now < deadline; clock_get_uptime(&now))
+ {
+ readkar = serial_getc();
+ if(readkar >= 0)
+ {
+ unsigned char *packet;
+ // printf("got char %02x\n", readkar);
+ if((packet = kdp_unserialize_packet(readkar,rpkt_len)))
+ {
+ memcpy(rpkt, packet, *rpkt_len);
+ return;
+ }
+ }
+ }
+ *rpkt_len = 0;
+}
+
+static void kdp_serial_callout(__unused void *arg, kdp_event_t event)
+{
+ /* When we stop KDP, set the bit to re-initialize the console serial port
+ * the next time we send/receive a KDP packet. We don't do it on
+ * KDP_EVENT_ENTER directly because it also gets called when we trap to KDP
+ * for non-external debugging, i.e., stackshot or core dumps.
+ *
+ * Set needs_serial_init on exit (and initialization, see above) and not
+ * enter because enter is sent multiple times and causes excess reinitialization.
+ */
+
+ switch (event)
+ {
+ case KDP_EVENT_PANICLOG:
+ case KDP_EVENT_ENTER:
+ break;
+ case KDP_EVENT_EXIT:
+ needs_serial_init = TRUE;
+ break;
+ }
+}
+
+#endif /* CONFIG_SERIAL_KDP */
+
+void
+kdp_init(void)
+{
+#if CONFIG_SERIAL_KDP
+ char kdpname[80];
+ struct in_addr ipaddr;
+ struct ether_addr macaddr;
+
+#if CONFIG_EMBEDDED
+ //serial will be the debugger, unless match name is explicitly provided, and it's not "serial"
+ if(PE_parse_boot_argn("kdp_match_name", kdpname, sizeof(kdpname)) && strncmp(kdpname, "serial", sizeof(kdpname)) != 0)
+ return;
+#else
+ // serial must be explicitly requested
+ if(!PE_parse_boot_argn("kdp_match_name", kdpname, sizeof(kdpname)) || strncmp(kdpname, "serial", sizeof(kdpname)) != 0)
+ return;
+#endif
+
+ kprintf("Intializing serial KDP\n");
+
+ kdp_register_callout(kdp_serial_callout, NULL);
+ kdp_register_send_receive(kdp_serial_send, kdp_serial_receive);
+
+ /* fake up an ip and mac for early serial debugging */
+ macaddr.ether_addr_octet[0] = 's';
+ macaddr.ether_addr_octet[1] = 'e';
+ macaddr.ether_addr_octet[2] = 'r';
+ macaddr.ether_addr_octet[3] = 'i';
+ macaddr.ether_addr_octet[4] = 'a';
+ macaddr.ether_addr_octet[5] = 'l';
+ ipaddr.s_addr = 0xABADBABE;
+ kdp_set_ip_and_mac_addresses(&ipaddr, &macaddr);
+#endif /* CONFIG_SERIAL_KDP */
+}
return(t->itk_space);
}
+int get_task_numactivethreads(task_t task)
+{
+ thread_t inc;
+ int num_active_thr=0;
+ task_lock(task);
+
+ for (inc = (thread_t)queue_first(&task->threads);
+ !queue_end(&task->threads, (queue_entry_t)inc); inc = (thread_t)queue_next(&inc->task_threads))
+ {
+ if(inc->active)
+ num_active_thr++;
+ }
+ task_unlock(task);
+ return num_active_thr;
+}
+
int get_task_numacts(task_t t)
{
return(t->thread_count);
unsigned int return_on_panic = 0;
unsigned long panic_caller;
-char *debug_buf;
+char debug_buf[PAGE_SIZE];
ppnum_t debug_buf_page;
char *debug_buf_ptr;
unsigned int debug_buf_size;
{
if (debug_buf_size != 0)
return;
- if (kmem_alloc(kernel_map, (vm_offset_t *) &debug_buf, PAGE_SIZE)
- != KERN_SUCCESS)
- panic("cannot allocate debug_buf\n");
debug_buf_ptr = debug_buf;
debug_buf_size = PAGE_SIZE;
debug_buf_page = pmap_find_phys(kernel_pmap,
#ifdef KERNEL_PRIVATE
extern unsigned int systemLogDiags;
+extern char debug_buf[];
#ifdef MACH_KERNEL_PRIVATE
extern unsigned int active_debugger;
extern unsigned int debug_mode;
-extern unsigned int disable_debug_output;
+extern unsigned int disable_debug_output;
extern unsigned int panicDebugging;
extern unsigned int logPanicDataToScreen;
extern volatile unsigned int nestedpanic;
extern int unsigned long panic_caller;
-extern char *debug_buf;
extern char *debug_buf_ptr;
extern unsigned int debug_buf_size;
#define DB_DBG_POST_CORE 0x1000 /*Wait in debugger after NMI core */
#define DB_PANICLOG_DUMP 0x2000 /* Send paniclog on panic,not core*/
+
#endif /* KERNEL_PRIVATE */
__BEGIN_DECLS
round_page_32(dt_symtab_size));
}
- PE_parse_boot_arg("keepsyms", &keepsyms);
+ PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms));
segmentLE = getsegbyname(segment_name);
if (!segmentLE) {
lck_attr_t *attr)
{
#if !DEBUG
- if (LcksOpts & enaLkDeb)
- attr->lck_attr_val = LCK_ATTR_DEBUG;
- else
- attr->lck_attr_val = LCK_ATTR_NONE;
+ if (LcksOpts & enaLkDeb)
+ attr->lck_attr_val = LCK_ATTR_DEBUG;
+ else
+ attr->lck_attr_val = LCK_ATTR_NONE;
#else
- attr->lck_attr_val = LCK_ATTR_DEBUG;
-#endif
-
+ attr->lck_attr_val = LCK_ATTR_DEBUG;
+#endif /* !DEBUG */
}
/*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
extern void processor_offline(
processor_t processor);
-extern void processor_start_thread(void);
+extern void processor_start_thread(void *machine_param);
/*
* Must be implemented in machine dependent code.
extern void printf_init(void);
+extern int snprintf(char *, size_t, const char *, ...) __printflike(3,4);
+
extern void log(int level, char *fmt, ...);
void
}
int
-dsmos_page_transform(const void* from, void *to)
+dsmos_page_transform(const void* from, void *to, __unused unsigned long long src_offset, __unused void *ops)
{
/* printf("%s\n", __FUNCTION__); */
if (dsmos_hook == NULL)
return (*dsmos_hook) (from, to);
}
+
+text_crypter_create_hook_t text_crypter_create=NULL;
+void text_crypter_create_hook_set(text_crypter_create_hook_t hook)
+{
+ text_crypter_create=hook;
+};
#ifndef _KERN_PAGE_DECRYPT_H
#define _KERN_PAGE_DECRYPT_H
-typedef int (*dsmos_page_transform_hook_t) (const void *,void*);
+/*
+ * Interface for DSMOS
+ */
+typedef int (*dsmos_page_transform_hook_t) (const void *,void*);
extern void dsmos_page_transform_hook(dsmos_page_transform_hook_t hook); /* exported */
-extern int dsmos_page_transform(const void *,void*);
+extern int dsmos_page_transform(const void *,void*, unsigned long long, void*);
+
+
+/*
+ *Interface for text decryption family
+ */
+struct pager_crypt_info {
+ /* Decrypt one page */
+ int (*page_decrypt)(const void *src_vaddr, void *dst_vaddr,
+ unsigned long long src_offset, void *crypt_ops);
+ /* Pager using this crypter terminates - crypt module not needed anymore */
+ void (*crypt_end)(void *crypt_ops);
+ /* Private data for the crypter */
+ void *crypt_ops;
+};
+typedef struct pager_crypt_info pager_crypt_info_t;
+
+typedef int (*text_crypter_create_hook_t)(struct pager_crypt_info *crypt_info,
+ const char *id, void *crypt_data);
+extern void text_crypter_create_hook_set(text_crypter_create_hook_t hook);
+//extern kern_return_t text_crypter_create(pager_crypt_info_t *crypt_info, const char *id,
+// void *crypt_data);
+extern text_crypter_create_hook_t text_crypter_create;
#endif /* _KERN_PAGE_DECRYPT_H */
cnputc(c);
#ifdef MACH_BSD
- log_putc(c);
+ if (debug_mode == 0)
+ log_putc(c);
#endif
}
{
return KERN_FAILURE;
}
+#elif defined(CONFIG_EMBEDDED)
+kern_return_t
+processor_set_threads(
+ __unused processor_set_t pset,
+ __unused thread_array_t *thread_list,
+ __unused mach_msg_type_number_t *count)
+{
+ return KERN_NOT_SUPPORTED;
+}
#else
kern_return_t
processor_set_threads(
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
#include <machine/commpage.h>
#include <libkern/version.h>
+#if MACH_KDP
+#include <kdp/kdp.h>
+#endif
+
#if CONFIG_MACF
#include <security/mac_mach_internal.h>
#endif
kth_started = 1;
+#if MACH_KDP
+ kernel_bootstrap_kprintf("calling kdp_init\n");
+ kdp_init();
+#endif
+
#ifdef i386
/*
* Create and initialize the physical copy window for processor 0
* Load the first thread to start a processor.
*/
void
-slave_main(void)
+slave_main(void *machine_param)
{
processor_t processor = current_processor();
thread_t thread;
if (processor->next_thread == THREAD_NULL) {
thread = processor->idle_thread;
thread->continuation = (thread_continue_t)processor_start_thread;
- thread->parameter = NULL;
+ thread->parameter = machine_param;
}
else {
thread = processor->next_thread;
* Called at splsched.
*/
void
-processor_start_thread(void)
+processor_start_thread(void *machine_param)
{
processor_t processor = current_processor();
thread_t self = current_thread();
- slave_machine_init();
+ slave_machine_init(machine_param);
/*
* If running the idle processor thread,
* to have reserved stack.
*/
load_context_kprintf("stack %x, stackptr %x\n",
- thread->kernel_stack, thread->machine.kstackptr);
+ thread->kernel_stack, thread->machine.kstackptr);
if (!thread->kernel_stack) {
load_context_kprintf("calling stack_alloc_try\n");
if (!stack_alloc_try(thread))
/*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
/* Initialize machine dependent stuff */
extern void machine_init(void);
-extern void slave_main(void);
+extern void slave_main(void *machine_param);
/*
* The following must be implemented in machine dependent code.
*/
/* Slave cpu initialization */
-extern void slave_machine_init(void);
+extern void slave_machine_init(void *machine_param);
/* Device subystem initialization */
extern void device_service_create(void);
return (NULL);
}
-int snprintf(char *, size_t, const char *, ...);
/* Format the results of calling syms_nameforaddr into a single string.
* The buffer must be at least 13 bytes long; 80 is recommended.
extern void task_backing_store_privileged(
task_t task);
+extern int get_task_numactivethreads(
+ task_t task);
/* Get number of activations in a task */
extern int get_task_numacts(
task_t task);
#define zone_sleep(zone) \
(void) lck_mtx_sleep(&(zone)->lock, 0, (event_t)(zone), THREAD_UNINT);
-extern int snprintf(char *, size_t, const char *, ...) __printflike(3,4);
#define lock_zone_init(zone) \
MACRO_BEGIN \
char temp_buf[16];
/* see if we want freed zone element checking */
- if (PE_parse_boot_arg("-zc", temp_buf)) {
+ if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
check_freed_element = 1;
}
#define HOST_USER_NOTIFICATION_PORT (3 + HOST_MAX_SPECIAL_KERNEL_PORT)
#define HOST_LOCKD_PORT (5 + HOST_MAX_SPECIAL_KERNEL_PORT)
#define HOST_SEATBELT_PORT (7 + HOST_MAX_SPECIAL_KERNEL_PORT)
-#define HOST_MAX_SPECIAL_PORT (8 + HOST_MAX_SPECIAL_KERNEL_PORT)
+
+#define HOST_UNFREED_PORT (10 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_AMFID_PORT (11 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_MAX_SPECIAL_PORT (12 + HOST_MAX_SPECIAL_KERNEL_PORT)
/* room to grow here as well */
/*
#define host_set_lockd_port(host, port) \
(host_set_special_port((host), HOST_LOCKD_PORT, (port)))
+#define host_get_unfreed_port(host, port) \
+ (host_get_special_port((host), \
+ HOST_LOCAL_NODE, HOST_UNFREED_PORT, (port)))
+#define host_set_unfreed_port(host, port) \
+ (host_set_special_port((host), HOST_UNFREED_PORT, (port)))
+
+#define host_get_amfid_port(host, port) \
+ (host_get_special_port((host), \
+ HOST_LOCAL_NODE, HOST_AMFID_PORT, (port)))
+#define host_set_amfid_port(host, port) \
+ (host_set_special_port((host), HOST_AMFID_PORT, (port)))
+
#endif /* _MACH_HOST_SPECIAL_PORTS_H_ */
* [Response should be a release of the named reference when
* the pager deems that appropriate.]
*/
-routine memory_object_unmap(
+routine memory_object_map(
+ memory_object : memory_object_t;
+ prot : vm_prot_t);
+routine memory_object_last_unmap(
memory_object : memory_object_t);
/* vim: set ft=c : */
memory_object_offset_t offset,
vm_size_t size,
vm_sync_t sync_flags);
- kern_return_t (*memory_object_unmap)(
+ kern_return_t (*memory_object_map)(
+ memory_object_t mem_obj,
+ vm_prot_t prot);
+ kern_return_t (*memory_object_last_unmap)(
memory_object_t mem_obj);
const char *memory_object_pager_name;
} * memory_object_pager_ops_t;
ppnum_t phys_addr; /* physical page index number */
unsigned int
#ifdef XNU_KERNEL_PRIVATE
- pageout:1, /* page is to be removed on commit */
- absent:1, /* No valid data in this page */
- dirty:1, /* Page must be cleaned (O) */
- precious:1, /* must be cleaned, we have only copy */
- device:1, /* no page data, mapped dev memory */
- speculative:1, /* page is valid, but not yet accessed */
- :0; /* force to long boundary */
+ pageout:1, /* page is to be removed on commit */
+ absent:1, /* No valid data in this page */
+ dirty:1, /* Page must be cleaned (O) */
+ precious:1, /* must be cleaned, we have only copy */
+ device:1, /* no page data, mapped dev memory */
+ speculative:1, /* page is valid, but not yet accessed */
+ cs_validated:1, /* CODE SIGNING: page was validated */
+ cs_tainted:1, /* CODE SIGNING: page is tainted */
+ :0; /* force to long boundary */
#else
- opaque; /* use upl_page_xxx() accessor funcs */
+ opaque; /* use upl_page_xxx() accessor funcs */
#endif /* XNU_KERNEL_PRIVATE */
};
#define UPL_COMMIT_INACTIVATE 0x8
#define UPL_COMMIT_NOTIFY_EMPTY 0x10
#define UPL_COMMIT_ALLOW_ACCESS 0x20
+#define UPL_COMMIT_CS_VALIDATED 0x40
+
+#define UPL_COMMIT_KERNEL_ONLY_FLAGS (UPL_COMMIT_CS_VALIDATED)
/* flags for return of state from vm_map_get_upl, vm_upl address space */
/* based call */
(((upl)[(index)].phys_addr != 0) ? \
((upl)[(index)].pageout = FALSE) : FALSE)
+/* modifier macros for upl_t */
+
+#define UPL_SET_CS_VALIDATED(upl, index, value) \
+ ((upl)[(index)].cs_validated = ((value) ? TRUE : FALSE))
+
+#define UPL_SET_CS_TAINTED(upl, index, value) \
+ ((upl)[(index)].cs_tainted = ((value) ? TRUE : FALSE))
+
/* The call prototyped below is used strictly by UPL_GET_INTERNAL_PAGE_LIST */
extern vm_size_t upl_offset_to_pagelist;
#define VM_PAGE_QUERY_PAGE_PAGED_OUT 0x10
#define VM_PAGE_QUERY_PAGE_COPIED 0x20
#define VM_PAGE_QUERY_PAGE_SPECULATIVE 0x40
+#define VM_PAGE_QUERY_PAGE_CS_VALIDATED 0x100
+#define VM_PAGE_QUERY_PAGE_CS_TAINTED 0x200
#ifdef MACH_KERNEL_PRIVATE
nanoseconds_to_absolutetime(NSEC_PER_SEC>>2, &abstime);
LockTimeOut = (unsigned int)abstime;
- if (PE_parse_boot_arg("mtxspin", &mtxspin)) {
+ if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
if (mtxspin > USEC_PER_SEC>>4)
mtxspin = USEC_PER_SEC>>4;
nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime);
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
int boot_arg;
unsigned int wncpu;
- if (PE_parse_boot_arg("cpus", &wncpu)) {
+ if (PE_parse_boot_argn("cpus", &wncpu, sizeof (wncpu))) {
if ((wncpu > 0) && (wncpu < MAX_CPUS))
max_ncpus = wncpu;
}
if( PE_get_hotkey( kPEControlKey ))
halt_in_debugger = halt_in_debugger ? 0 : 1;
- if (PE_parse_boot_arg("debug", &boot_arg)) {
+ if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) {
if (boot_arg & DB_HALT) halt_in_debugger=1;
if (boot_arg & DB_PRT) disable_debug_output=FALSE;
if (boot_arg & DB_SLOG) systemLogDiags=TRUE;
if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE;
}
- if (!PE_parse_boot_arg("nvram_paniclog", &commit_paniclog_to_nvram))
+ if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
commit_paniclog_to_nvram = 1;
- PE_parse_boot_arg("vmmforce", &lowGlo.lgVMMforcedFeats);
+ PE_parse_boot_argn("vmmforce", &lowGlo.lgVMMforcedFeats, sizeof (lowGlo.lgVMMforcedFeats));
hw_lock_init(&debugger_lock); /* initialize debugger lock */
hw_lock_init(&pbtlock); /* initialize print backtrace lock */
active_debugger =1;
}
#endif /* MACH_KDB */
- if (PE_parse_boot_arg("preempt", &boot_arg)) {
+ if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) {
default_preemption_rate = boot_arg;
}
- if (PE_parse_boot_arg("unsafe", &boot_arg)) {
+ if (PE_parse_boot_argn("unsafe", &boot_arg, sizeof (boot_arg))) {
max_unsafe_quanta = boot_arg;
}
- if (PE_parse_boot_arg("poll", &boot_arg)) {
+ if (PE_parse_boot_argn("poll", &boot_arg, sizeof (boot_arg))) {
max_poll_quanta = boot_arg;
}
- if (PE_parse_boot_arg("yield", &boot_arg)) {
+ if (PE_parse_boot_argn("yield", &boot_arg, sizeof (boot_arg))) {
sched_poll_yield_shift = boot_arg;
}
}
-void slave_machine_init(void)
+void
+slave_machine_init(__unused void *param)
{
cpu_machine_init(); /* Initialize the processor */
clock_init(); /* Init the clock */
crmove kk64bit,pf64Bitb // remember if this is a 64-bit processor
stw r7,kkCountPtr(r1)
stw r31,kkR31Save(r1) // we use r31 globally for mapped user ptr
- li r31,0 // no mapped ptr yet
+
// Handle buffer length > 256MB. This is an error (ENAMETOOLONG) on copyin and copyout.
// Set up thread_recover in case we hit an illegal address.
copyin0:
+ li r31,0 // no mapped ptr yet
mfsprg r8,1 // Get the current thread
lis r2,hi16(copyinout_error)
ori r2,r2,lo16(copyinout_error)
/*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
*
* @APPLE_OSREFERENCE_LICENSE_HEADER_START@
*
PE_init_platform(FALSE, args); /* Get platform expert set up */
- if (!PE_parse_boot_arg("novmx", &novmx)) novmx=0; /* Special run without VMX? */
+ if (!PE_parse_boot_argn("novmx", &novmx, sizeof (novmx))) novmx=0; /* Special run without VMX? */
if(novmx) { /* Yeah, turn it off */
BootProcInfo.pf.Available &= ~pfAltivec; /* Turn off Altivec available */
__asm__ volatile("mtsprg 2,%0" : : "r" (BootProcInfo.pf.Available)); /* Set live value */
}
- if (!PE_parse_boot_arg("fn", &forcenap)) forcenap = 0; /* If force nap not set, make 0 */
+ if (!PE_parse_boot_argn("fn", &forcenap, sizeof (forcenap))) forcenap = 0; /* If force nap not set, make 0 */
else {
if(forcenap < 2) forcenap = forcenap + 1; /* Else set 1 for off, 2 for on */
else forcenap = 0; /* Clear for error case */
}
- if (!PE_parse_boot_arg("pmsx", &pmsExperimental)) pmsExperimental = 0; /* Check if we should start in experimental power management stepper mode */
- if (!PE_parse_boot_arg("lcks", &LcksOpts)) LcksOpts = 0; /* Set lcks options */
- if (!PE_parse_boot_arg("diag", &dgWork.dgFlags)) dgWork.dgFlags = 0; /* Set diagnostic flags */
+ if (!PE_parse_boot_argn("pmsx", &pmsExperimental, sizeof (pmsExperimental))) pmsExperimental = 0; /* Check if we should start in experimental power management stepper mode */
+ if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts))) LcksOpts = 0; /* Set lcks options */
+ if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags))) dgWork.dgFlags = 0; /* Set diagnostic flags */
if(dgWork.dgFlags & enaExpTrace) trcWork.traceMask = 0xFFFFFFFF; /* If tracing requested, enable it */
- if(PE_parse_boot_arg("ctrc", &cputrace)) { /* See if tracing is limited to a specific cpu */
+ if(PE_parse_boot_argn("ctrc", &cputrace, sizeof (cputrace))) { /* See if tracing is limited to a specific cpu */
trcWork.traceMask = (trcWork.traceMask & 0xFFFFFFF0) | (cputrace & 0xF); /* Limit to 4 */
}
- if(!PE_parse_boot_arg("tb", &trcWork.traceSize)) { /* See if non-default trace buffer size */
+ if(!PE_parse_boot_argn("tb", &trcWork.traceSize, sizeof (trcWork.traceSize))) { /* See if non-default trace buffer size */
#if DEBUG
trcWork.traceSize = 32; /* Default 32 page trace table for DEBUG */
#else
if(trcWork.traceSize > 256) trcWork.traceSize = 256; /* Maximum size of 256 pages */
trcWork.traceSize = trcWork.traceSize * 4096; /* Change page count to size */
- if (!PE_parse_boot_arg("maxmem", &maxmem))
+ if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem)))
xmaxmem=0;
else
xmaxmem = (uint64_t)maxmem * (1024 * 1024);
- if (!PE_parse_boot_arg("wcte", &wcte)) wcte = 0; /* If write combine timer enable not supplied, make 1 */
+ if (!PE_parse_boot_argn("wcte", &wcte, sizeof (wcte))) wcte = 0; /* If write combine timer enable not supplied, make 1 */
else wcte = (wcte != 0); /* Force to 0 or 1 */
- if (!PE_parse_boot_arg("mcklog", &mckFlags)) mckFlags = 0; /* If machine check flags not specified, clear */
+ if (!PE_parse_boot_argn("mcklog", &mckFlags, sizeof (mckFlags))) mckFlags = 0; /* If machine check flags not specified, clear */
else if(mckFlags > 1) mckFlags = 0; /* If bogus, clear */
- if (!PE_parse_boot_arg("ht_shift", &hash_table_shift)) /* should we use a non-default hash table size? */
+ if (!PE_parse_boot_argn("ht_shift", &hash_table_shift, sizeof (hash_table_shift))) /* should we use a non-default hash table size? */
hash_table_shift = 0; /* no, use default size */
/*
(void)ml_scom_write(GUSModeReg << 8, scdata); /* Get GUS mode register */
}
- if(PE_parse_boot_arg("mcksoft", &mcksoft)) { /* Have they supplied "machine check software recovery? */
+ if(PE_parse_boot_argn("mcksoft", &mcksoft, sizeof (mcksoft))) { /* Have they supplied "machine check software recovery? */
newhid = BootProcInfo.pf.pfHID5; /* Get the old HID5 */
if(mcksoft < 2) {
newhid &= 0xFFFFFFFFFFFFDFFFULL; /* Clear the old one */
cpu_init();
- slave_main();
+ slave_main(NULL);
}
kprintf("kprintf initialized\n");
serialmode = 0; /* Assume normal keyboard and console */
- if(PE_parse_boot_arg("serial", &serialmode)) { /* Do we want a serial keyboard and/or console? */
+ if(PE_parse_boot_argn("serial", &serialmode, sizeof (serialmode))) { /* Do we want a serial keyboard and/or console? */
kprintf("Serial mode specified: %08X\n", serialmode);
}
if(serialmode & 1) { /* Start serial if requested */
vnode_pager_data_initialize,
vnode_pager_data_unlock,
vnode_pager_synchronize,
- vnode_pager_unmap,
+ vnode_pager_map,
+ vnode_pager_last_unmap,
"vnode pager"
};
size = (vm_size_t) sizeof(struct vnode_pager);
vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
PAGE_SIZE, "vnode pager structures");
-#ifdef __i386__
+#if CONFIG_CODE_DECRYPTION
apple_protect_pager_bootstrap();
-#endif /* __i386__ */
+#endif /* CONFIG_CODE_DECRYPTION */
return;
}
*
*/
kern_return_t
-vnode_pager_unmap(
+vnode_pager_map(
+ memory_object_t mem_obj,
+ vm_prot_t prot)
+{
+ vnode_pager_t vnode_object;
+ int ret;
+ kern_return_t kr;
+
+ PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
+
+ vnode_object = vnode_pager_lookup(mem_obj);
+
+ ret = ubc_map(vnode_object->vnode_handle, prot);
+
+ if (ret != 0) {
+ kr = KERN_FAILURE;
+ } else {
+ kr = KERN_SUCCESS;
+ }
+
+ return kr;
+}
+
+kern_return_t
+vnode_pager_last_unmap(
memory_object_t mem_obj)
{
register vnode_pager_t vnode_object;
- PAGER_DEBUG(PAGER_ALL, ("vnode_pager_unmap: %p\n", mem_obj));
+ PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
vnode_object = vnode_pager_lookup(mem_obj);
device_pager_data_initialize,
device_pager_data_unlock,
device_pager_synchronize,
- device_pager_unmap,
+ device_pager_map,
+ device_pager_last_unmap,
"device pager"
};
*
*/
kern_return_t
-device_pager_unmap(
+device_pager_map(
+ __unused memory_object_t mem_obj,
+ __unused vm_prot_t prot)
+{
+ return KERN_SUCCESS;
+}
+
+kern_return_t
+device_pager_last_unmap(
__unused memory_object_t mem_obj)
{
return KERN_SUCCESS;
sync_flags);
}
-/* Routine memory_object_unmap */
-kern_return_t memory_object_unmap
+
+/*
+ * memory_object_map() is called by VM (in vm_map_enter() and its variants)
+ * each time a "named" VM object gets mapped directly or indirectly
+ * (copy-on-write mapping). A "named" VM object has an extra reference held
+ * by the pager to keep it alive until the pager decides that the
+ * memory object (and its VM object) can be reclaimed.
+ * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
+ * the mappings of that memory object have been removed.
+ *
+ * For a given VM object, calls to memory_object_map() and memory_object_unmap()
+ * are serialized (through object->mapping_in_progress), to ensure that the
+ * pager gets a consistent view of the mapping status of the memory object.
+ *
+ * This allows the pager to keep track of how many times a memory object
+ * has been mapped and with which protections, to decide when it can be
+ * reclaimed.
+ */
+
+/* Routine memory_object_map */
+kern_return_t memory_object_map
+(
+ memory_object_t memory_object,
+ vm_prot_t prot
+)
+{
+ return (memory_object->mo_pager_ops->memory_object_map)(
+ memory_object,
+ prot);
+}
+
+/* Routine memory_object_last_unmap */
+kern_return_t memory_object_last_unmap
(
memory_object_t memory_object
)
{
- return (memory_object->mo_pager_ops->memory_object_unmap)(
+ return (memory_object->mo_pager_ops->memory_object_last_unmap)(
memory_object);
}
memory_object_offset_t offset,
vm_size_t length,
vm_sync_t sync_flags);
-kern_return_t apple_protect_pager_unmap(memory_object_t mem_obj);
+kern_return_t apple_protect_pager_map(memory_object_t mem_obj,
+ vm_prot_t prot);
+kern_return_t apple_protect_pager_last_unmap(memory_object_t mem_obj);
/*
* Vector of VM operations for this EMM.
apple_protect_pager_data_initialize,
apple_protect_pager_data_unlock,
apple_protect_pager_synchronize,
- apple_protect_pager_unmap,
+ apple_protect_pager_map,
+ apple_protect_pager_last_unmap,
"apple protect pager"
};
boolean_t is_mapped; /* is this mem_obj mapped ? */
memory_object_control_t pager_control; /* mem object control handle */
vm_object_t backing_object; /* VM obj w/ encrypted data */
+ struct pager_crypt_info crypt;
} *apple_protect_pager_t;
#define APPLE_PROTECT_PAGER_NULL ((apple_protect_pager_t) NULL)
int apple_protect_pager_num_trim_total = 0;
/* internal prototypes */
-apple_protect_pager_t apple_protect_pager_create(vm_object_t backing_object);
+apple_protect_pager_t apple_protect_pager_create(vm_object_t backing_object,
+ struct pager_crypt_info *crypt_info);
apple_protect_pager_t apple_protect_pager_lookup(memory_object_t mem_obj);
void apple_protect_pager_dequeue(apple_protect_pager_t pager);
void apple_protect_pager_deallocate_internal(apple_protect_pager_t pager,
upl_t upl;
int upl_flags;
upl_size_t upl_size;
- upl_page_info_t *upl_pl;
+ upl_page_info_t *upl_pl = NULL;
+ unsigned int pl_count;
vm_object_t src_object, dst_object;
kern_return_t kr, retval;
vm_map_offset_t kernel_mapping;
src_object = VM_OBJECT_NULL;
kernel_mapping = 0;
upl = NULL;
+ upl_pl = NULL;
fault_info = (vm_object_fault_info_t) mo_fault_info;
interruptible = fault_info->interruptible;
UPL_NO_SYNC |
UPL_CLEAN_IN_PLACE | /* triggers UPL_CLEAR_DIRTY */
UPL_SET_INTERNAL;
+ pl_count = 0;
kr = memory_object_upl_request(mo_control,
offset, upl_size,
&upl, NULL, NULL, upl_flags);
* Fill in the contents of the pages requested by VM.
*/
upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+ pl_count = length / PAGE_SIZE;
for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) {
ppnum_t dst_pnum;
dst_object->wimg_bits & VM_WIMG_MASK,
TRUE);
+ /*
+ * Validate the original page...
+ */
+ if (src_page->object->code_signed) {
+ vm_page_validate_cs_mapped(src_page,
+ (const void *) src_vaddr);
+ }
+ /*
+ * ... and transfer the results to the destination page.
+ */
+ UPL_SET_CS_VALIDATED(upl_pl, cur_offset / PAGE_SIZE,
+ src_page->cs_validated);
+ UPL_SET_CS_TAINTED(upl_pl, cur_offset / PAGE_SIZE,
+ src_page->cs_tainted);
+
/*
* Decrypt the encrypted contents of the source page
* into the destination page.
*/
- dsmos_page_transform((const void *) src_vaddr,
- (void *) dst_vaddr);
-
+ pager->crypt.page_decrypt((const void *) src_vaddr,
+ (void *) dst_vaddr, offset+cur_offset,
+ pager->crypt.crypt_ops);
+
/*
* Remove the pmap mapping of the source and destination pages
* in the kernel.
if (retval != KERN_SUCCESS) {
upl_abort(upl, 0);
} else {
- upl_commit(upl, NULL, 0);
+ boolean_t empty;
+ upl_commit_range(upl, 0, upl->size,
+ UPL_COMMIT_CS_VALIDATED,
+ upl_pl, pl_count, &empty);
}
/* and deallocate the UPL */
/* trigger the destruction of the memory object */
memory_object_destroy(pager->pager_control, 0);
+
+ /* deallocate any crypt module data */
+ if(pager->crypt.crypt_end)
+ pager->crypt.crypt_end(pager->crypt.crypt_ops);
}
/*
* time the memory object gets mapped and we take one extra reference on the
* memory object to account for all its mappings.
*/
-void
+kern_return_t
apple_protect_pager_map(
- memory_object_t mem_obj)
+ memory_object_t mem_obj,
+ __unused vm_prot_t prot)
{
apple_protect_pager_t pager;
apple_protect_pager_count_mapped++;
}
mutex_unlock(&apple_protect_pager_lock);
+
+ return KERN_SUCCESS;
}
/*
- * apple_protect_pager_unmap()
+ * apple_protect_pager_last_unmap()
*
* This is called by VM when this memory object is no longer mapped anywhere.
*/
kern_return_t
-apple_protect_pager_unmap(
+apple_protect_pager_last_unmap(
memory_object_t mem_obj)
{
apple_protect_pager_t pager;
int count_unmapped;
- PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_unmap: %p\n", mem_obj));
+ PAGER_DEBUG(PAGER_ALL,
+ ("apple_protect_pager_last_unmap: %p\n", mem_obj));
pager = apple_protect_pager_lookup(mem_obj);
apple_protect_pager_t
apple_protect_pager_create(
- vm_object_t backing_object)
+ vm_object_t backing_object,
+ struct pager_crypt_info *crypt_info)
{
apple_protect_pager_t pager, pager2;
memory_object_control_t control;
pager->is_mapped = FALSE;
pager->pager_control = MEMORY_OBJECT_CONTROL_NULL;
pager->backing_object = backing_object;
+ pager->crypt = *crypt_info;
+
vm_object_reference(backing_object);
mutex_lock(&apple_protect_pager_lock);
*/
memory_object_t
apple_protect_pager_setup(
- vm_object_t backing_object)
+ vm_object_t backing_object,
+ struct pager_crypt_info *crypt_info)
{
apple_protect_pager_t pager;
apple_protect_pager_t,
pager_queue) {
if (pager->backing_object == backing_object) {
+ /* For the same object we must always use the same protection options */
+ if (!((pager->crypt.page_decrypt == crypt_info->page_decrypt) &&
+ (pager->crypt.crypt_ops == crypt_info->crypt_ops) )) {
+ mutex_unlock(&apple_protect_pager_lock);
+ return MEMORY_OBJECT_NULL;
+ }
break;
}
}
mutex_unlock(&apple_protect_pager_lock);
if (pager == APPLE_PROTECT_PAGER_NULL) {
- pager = apple_protect_pager_create(backing_object);
+ pager = apple_protect_pager_create(backing_object, crypt_info);
if (pager == APPLE_PROTECT_PAGER_NULL) {
return MEMORY_OBJECT_NULL;
}
unsigned long vm_cs_query_modified = 0;
unsigned long vm_cs_validated_dirtied = 0;
+#if CONFIG_ENFORCE_SIGNED_CODE
+#if SECURE_KERNEL
+const int cs_enforcement_disable=0;
+#else
+int cs_enforcement_disable=1;
+#endif
+#endif
+
/*
* Routine: vm_fault_init
* Purpose:
void
vm_fault_init(void)
{
+#if !SECURE_KERNEL
+#if CONFIG_ENFORCE_SIGNED_CODE
+ PE_parse_boot_argn("cs_enforcement_disable", &cs_enforcement_disable, sizeof (cs_enforcement_disable));
+#endif
+ PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug));
+#endif
}
/*
+/*
+ * CODE SIGNING:
+ * When soft faulting a page, we have to validate the page if:
+ * 1. the page is being mapped in user space
+ * 2. the page hasn't already been found to be "tainted"
+ * 3. the page belongs to a code-signed object
+ * 4. the page has not been validated yet or has been mapped for write.
+ */
+#define VM_FAULT_NEED_CS_VALIDATION(pmap, page) \
+ ((pmap) != kernel_pmap /*1*/ && \
+ !(page)->cs_tainted /*2*/ && \
+ (page)->object->code_signed /*3*/ && \
+ (!(page)->cs_validated || (page)->wpmapped /*4*/))
+
+
/*
* page queue lock must NOT be held
* m->object must be locked
cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
- if (m->object->code_signed && pmap != kernel_pmap &&
- (!m->cs_validated || m->wpmapped)) {
- vm_object_lock_assert_exclusive(m->object);
-
- if (m->cs_validated && m->wpmapped) {
- vm_cs_revalidates++;
- }
-
- /*
- * CODE SIGNING:
- * This page comes from a VM object backed by a signed
- * memory object. We are about to enter it into a process
- * address space, so we need to validate its signature.
- */
- /* VM map is locked, so 1 ref will remain on VM object */
- vm_page_validate_cs(m);
- }
-
if (m->pmapped == FALSE) {
/*
* This is the first time this page is being
}
}
- if (m->cs_tainted) {
+ if (VM_FAULT_NEED_CS_VALIDATION(pmap, m)) {
+ vm_object_lock_assert_exclusive(m->object);
+
+ if (m->cs_validated) {
+ vm_cs_revalidates++;
+ }
+
+ /* VM map is locked, so 1 ref will remain on VM object */
+ vm_page_validate_cs(m);
+ }
+
+ if (m->cs_tainted /* always invalidate a tainted page */
+#if CONFIG_ENFORCE_SIGNED_CODE
+ /*
+ * Code Signing enforcement invalidates an executable page that
+ * has no code directory, and thus could not be validated.
+ */
+ || ((prot & VM_PROT_EXECUTE) && !m->cs_validated )
+#endif
+ ) {
/*
* CODE SIGNING:
* This page has been tainted and can not be trusted.
* necessary precautions before we enter the tainted page
* into its address space.
*/
- if (cs_invalid_page()) {
- /* reject the tainted page: abort the page fault */
- kr = KERN_MEMORY_ERROR;
- cs_enter_tainted_rejected++;
- } else {
- /* proceed with the tainted page */
- kr = KERN_SUCCESS;
- cs_enter_tainted_accepted++;
+ kr = KERN_SUCCESS;
+#if CONFIG_ENFORCE_SIGNED_CODE
+ if (!cs_enforcement_disable) {
+#endif
+ if (cs_invalid_page((addr64_t) vaddr)) {
+ /* reject the tainted page: abort the page fault */
+ kr = KERN_MEMORY_ERROR;
+ cs_enter_tainted_rejected++;
+ } else {
+ /* proceed with the tainted page */
+ kr = KERN_SUCCESS;
+ cs_enter_tainted_accepted++;
+ }
+#if CONFIG_ENFORCE_SIGNED_CODE
}
+#endif
if (cs_debug || kr != KERN_SUCCESS) {
printf("CODESIGNING: vm_fault_enter(0x%llx): "
- "page %p obj %p off 0x%llx *** TAINTED ***\n",
+ "page %p obj %p off 0x%llx *** INVALID PAGE ***\n",
(long long)vaddr, m, m->object, m->offset);
}
} else {
* since this is the ONLY bit updated behind the SHARED
* lock... however, we need to figure out how to do an atomic
* update on a bit field to make this less fragile... right
- * now I don'w know how to coerce 'C' to give me the offset info
+ * now I don't know how to coerce 'C' to give me the offset info
* that's needed for an AtomicCompareAndSwap
*/
m->pmapped = TRUE;
}
ASSERT_PAGE_DECRYPTED(m);
- if (m->object->code_signed && map != kernel_map &&
- (!m->cs_validated || m->wpmapped)) {
+ if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m)) {
/*
* We might need to validate this page
* against its code signature, so we
for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
- if (pmap) {
- pmap_change_wiring(pmap,
- pmap_addr + (va - entry->vme_start), FALSE);
- }
if (object == VM_OBJECT_NULL) {
+ if (pmap) {
+ pmap_change_wiring(pmap,
+ pmap_addr + (va - entry->vme_start), FALSE);
+ }
(void) vm_fault(map, va, VM_PROT_NONE,
TRUE, THREAD_UNINT, pmap, pmap_addr);
} else {
result_object = result_page->object;
+ if ((pmap) && (result_page->phys_page != vm_page_guard_addr)) {
+ pmap_change_wiring(pmap,
+ pmap_addr + (va - entry->vme_start), FALSE);
+ }
if (deallocate) {
assert(result_page->phys_page !=
vm_page_fictitious_addr);
extern int cs_validation;
+void
+vm_page_validate_cs_mapped(
+ vm_page_t page,
+ const void *kaddr)
+{
+ vm_object_t object;
+ vm_object_offset_t offset;
+ kern_return_t kr;
+ memory_object_t pager;
+ void *blobs;
+ boolean_t validated, tainted;
+
+ assert(page->busy);
+ vm_object_lock_assert_exclusive(page->object);
+
+ if (!cs_validation) {
+ return;
+ }
+
+ if (page->wpmapped && !page->cs_tainted) {
+ /*
+ * This page was mapped for "write" access sometime in the
+ * past and could still be modifiable in the future.
+ * Consider it tainted.
+ * [ If the page was already found to be "tainted", no
+ * need to re-validate. ]
+ */
+ page->cs_validated = TRUE;
+ page->cs_tainted = TRUE;
+ if (cs_debug) {
+ printf("CODESIGNING: vm_page_validate_cs: "
+ "page %p obj %p off 0x%llx "
+ "was modified\n",
+ page, page->object, page->offset);
+ }
+ vm_cs_validated_dirtied++;
+ }
+
+ if (page->cs_validated) {
+ return;
+ }
+
+ vm_cs_validates++;
+
+ object = page->object;
+ assert(object->code_signed);
+ offset = page->offset;
+
+ if (!object->alive || object->terminating || object->pager == NULL) {
+ /*
+ * The object is terminating and we don't have its pager
+ * so we can't validate the data...
+ */
+ return;
+ }
+ /*
+ * Since we get here to validate a page that was brought in by
+ * the pager, we know that this pager is all setup and ready
+ * by now.
+ */
+ assert(!object->internal);
+ assert(object->pager != NULL);
+ assert(object->pager_ready);
+
+ pager = object->pager;
+
+ kr = vnode_pager_get_object_cs_blobs(pager, &blobs);
+ if (kr != KERN_SUCCESS) {
+ blobs = NULL;
+ }
+
+ /* verify the SHA1 hash for this page */
+ validated = cs_validate_page(blobs,
+ offset + object->paging_offset,
+ (const void *)kaddr,
+ &tainted);
+
+ page->cs_validated = validated;
+ if (validated) {
+ page->cs_tainted = tainted;
+ }
+}
+
void
vm_page_validate_cs(
vm_page_t page)
vm_map_size_t ksize;
vm_offset_t kaddr;
kern_return_t kr;
- memory_object_t pager;
- void *blobs;
- boolean_t validated, tainted;
boolean_t busy_page;
vm_object_lock_assert_held(page->object);
return;
}
- if (page->cs_validated && !page->cs_tainted && page->wpmapped) {
+ if (page->wpmapped && !page->cs_tainted) {
vm_object_lock_assert_exclusive(page->object);
/*
- * This page has already been validated and found to
- * be valid. However, it was mapped for "write" access
- * sometime in the past, so we have to check if it was
- * modified. If so, it needs to be revalidated.
- * If the page was already found to be "tainted", no
- * need to re-validate.
+ * This page was mapped for "write" access sometime in the
+ * past and could still be modifiable in the future.
+ * Consider it tainted.
+ * [ If the page was already found to be "tainted", no
+ * need to re-validate. ]
*/
- if (!page->dirty) {
- vm_cs_query_modified++;
- page->dirty = pmap_is_modified(page->phys_page);
- }
- if (page->dirty) {
- /*
- * The page is dirty, so let's clear its
- * "validated" bit and re-validate it.
- */
- if (cs_debug) {
- printf("CODESIGNING: vm_page_validate_cs: "
- "page %p obj %p off 0x%llx "
- "was modified\n",
- page, page->object, page->offset);
- }
- page->cs_validated = FALSE;
- vm_cs_validated_dirtied++;
+ page->cs_validated = TRUE;
+ page->cs_tainted = TRUE;
+ if (cs_debug) {
+ printf("CODESIGNING: vm_page_validate_cs: "
+ "page %p obj %p off 0x%llx "
+ "was modified\n",
+ page, page->object, page->offset);
}
+ vm_cs_validated_dirtied++;
}
if (page->cs_validated) {
vm_object_lock_assert_exclusive(page->object);
- vm_cs_validates++;
-
object = page->object;
assert(object->code_signed);
offset = page->offset;
object,
offset,
&ksize,
+ VM_PROT_READ,
FALSE); /* can't unlock object ! */
if (kr != KERN_SUCCESS) {
panic("vm_page_validate_cs: could not map page: 0x%x\n", kr);
}
kaddr = CAST_DOWN(vm_offset_t, koffset);
- /*
- * Since we get here to validate a page that was brought in by
- * the pager, we know that this pager is all setup and ready
- * by now.
- */
- assert(!object->internal);
- assert(object->pager != NULL);
- assert(object->pager_ready);
-
- if (!object->alive || object->terminating || object->pager == NULL) {
- /*
- * The object is terminating and we don't have its pager
- * so we can't validate the data...
- */
- goto out;
- }
-
- pager = object->pager;
- assert(pager != NULL);
-
- kr = vnode_pager_get_object_cs_blobs(pager, &blobs);
- if (kr != KERN_SUCCESS) {
- blobs = NULL;
- }
-
- /* verify the SHA1 hash for this page */
- validated = cs_validate_page(blobs,
- offset + object->paging_offset,
- (const void *)kaddr,
- &tainted);
+ /* validate the mapped page */
+ vm_page_validate_cs_mapped(page, (const void *) kaddr);
assert(page->busy);
assert(object == page->object);
vm_object_lock_assert_exclusive(object);
- page->cs_validated = validated;
- if (validated) {
- page->cs_tainted = tainted;
- }
-
-out:
if (!busy_page) {
PAGE_WAKEUP_DONE(page);
}
vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling pmap_init\n"));
pmap_init();
- if (PE_parse_boot_arg("zsize", &zsizearg))
+ if (PE_parse_boot_argn("zsize", &zsizearg, sizeof (zsizearg)))
zsize = zsizearg * 1024ULL * 1024ULL;
else {
zsize = sane_size >> 2; /* Get target zone size as 1/4 of physical memory */
vm_object_t vm_submap_object;
-/*
- * vm_map_init:
- *
- * Initialize the vm_map module. Must be called before
- * any other vm_map routines.
- *
- * Map and entry structures are allocated from zones -- we must
- * initialize those zones.
- *
- * There are three zones of interest:
- *
- * vm_map_zone: used to allocate maps.
- * vm_map_entry_zone: used to allocate map entries.
- * vm_map_kentry_zone: used to allocate map entries for the kernel.
- *
- * The kernel allocates map entries from a special zone that is initially
- * "crammed" with memory. It would be difficult (perhaps impossible) for
- * the kernel to allocate more memory to a entry zone when it became
- * empty since the very act of allocating memory implies the creation
- * of a new entry.
- */
-
static void *map_data;
static vm_map_size_t map_data_size;
static void *kentry_data;
/* Skip acquiring locks if we're in the midst of a kernel core dump */
extern unsigned int not_in_kdp;
-#ifdef __i386__
+#if CONFIG_CODE_DECRYPTION
+/*
+ * vm_map_apple_protected:
+ * This remaps the requested part of the object with an object backed by
+ * the decrypting pager.
+ * crypt_info contains entry points and session data for the crypt module.
+ * The crypt_info block will be copied by vm_map_apple_protected. The data structures
+ * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
+ */
kern_return_t
vm_map_apple_protected(
vm_map_t map,
vm_map_offset_t start,
- vm_map_offset_t end)
+ vm_map_offset_t end,
+ struct pager_crypt_info *crypt_info)
{
boolean_t map_locked;
kern_return_t kr;
if (!vm_map_lookup_entry(map,
start,
&map_entry) ||
- map_entry->vme_end != end ||
+ map_entry->vme_end < end ||
map_entry->is_sub_map) {
/* that memory is not properly mapped */
kr = KERN_INVALID_ARGUMENT;
* it.
*/
- protected_mem_obj = apple_protect_pager_setup(protected_object);
+ protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
if (protected_mem_obj == NULL) {
kr = KERN_FAILURE;
goto done;
map_entry->max_protection,
map_entry->inheritance);
assert(map_addr == start);
- if (kr == KERN_SUCCESS) {
- /* let the pager know that this mem_obj is mapped */
- apple_protect_pager_map(protected_mem_obj);
- }
/*
* Release the reference obtained by apple_protect_pager_setup().
* The mapping (if it succeeded) is now holding a reference on the
}
return kr;
}
-#endif /* __i386__ */
+#endif /* CONFIG_CODE_DECRYPTION */
+/*
+ * vm_map_init:
+ *
+ * Initialize the vm_map module. Must be called before
+ * any other vm_map routines.
+ *
+ * Map and entry structures are allocated from zones -- we must
+ * initialize those zones.
+ *
+ * There are three zones of interest:
+ *
+ * vm_map_zone: used to allocate maps.
+ * vm_map_entry_zone: used to allocate map entries.
+ * vm_map_kentry_zone: used to allocate map entries for the kernel.
+ *
+ * The kernel allocates map entries from a special zone that is initially
+ * "crammed" with memory. It would be difficult (perhaps impossible) for
+ * the kernel to allocate more memory to a entry zone when it became
+ * empty since the very act of allocating memory implies the creation
+ * of a new entry.
+ */
void
vm_map_init(
void)
result->wiring_required = FALSE;
result->no_zero_fill = FALSE;
result->mapped = FALSE;
+#if CONFIG_EMBEDDED
+ result->prot_copy_allow = FALSE;
+#else
+ result->prot_copy_allow = TRUE;
+#endif
result->wait_for_space = FALSE;
result->first_free = vm_map_to_entry(result);
result->hint = vm_map_to_entry(result);
kern_return_t
vm_map_enter(
vm_map_t map,
- vm_map_offset_t *address, /* IN/OUT */
+ vm_map_offset_t *address, /* IN/OUT */
vm_map_size_t size,
- vm_map_offset_t mask,
+ vm_map_offset_t mask,
int flags,
vm_object_t object,
vm_object_offset_t offset,
boolean_t is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
char alias;
vm_map_offset_t effective_min_offset, effective_max_offset;
+ kern_return_t kr;
+
+#if CONFIG_EMBEDDED
+ if (cur_protection & VM_PROT_WRITE) {
+ if (cur_protection & VM_PROT_EXECUTE) {
+ printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
+ cur_protection &= ~VM_PROT_EXECUTE;
+ }
+ }
+ if (max_protection & VM_PROT_WRITE) {
+ if (max_protection & VM_PROT_EXECUTE) {
+ /* Right now all kinds of data segments are RWX. No point in logging that. */
+ /* printf("EMBEDDED: %s maxprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); */
+
+ /* Try to take a hint from curprot. If curprot is not writable,
+ * make maxprot not writable. Otherwise make it not executable.
+ */
+ if((cur_protection & VM_PROT_WRITE) == 0) {
+ max_protection &= ~VM_PROT_WRITE;
+ } else {
+ max_protection &= ~VM_PROT_EXECUTE;
+ }
+ }
+ }
+ assert ((cur_protection | max_protection) == max_protection);
+#endif /* CONFIG_EMBEDDED */
if (is_submap) {
if (purgable) {
}
}
if (use_pmap && submap->pmap != NULL) {
- kern_return_t kr;
-
kr = pmap_nest(map->pmap,
submap->pmap,
tmp_start,
}
BailOut: ;
- if (result == KERN_SUCCESS &&
- pmap_empty &&
- !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
- assert(vm_map_pmap_is_empty(map, *address, *address+size));
- }
+ if (result == KERN_SUCCESS) {
+ vm_prot_t pager_prot;
+ memory_object_t pager;
- if (result != KERN_SUCCESS) {
+ if (pmap_empty &&
+ !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
+ assert(vm_map_pmap_is_empty(map,
+ *address,
+ *address+size));
+ }
+
+ /*
+ * For "named" VM objects, let the pager know that the
+ * memory object is being mapped. Some pagers need to keep
+ * track of this, to know when they can reclaim the memory
+ * object, for example.
+ * VM calls memory_object_map() for each mapping (specifying
+ * the protection of each mapping) and calls
+ * memory_object_last_unmap() when all the mappings are gone.
+ */
+ pager_prot = max_protection;
+ if (needs_copy) {
+ /*
+ * Copy-On-Write mapping: won't modify
+ * the memory object.
+ */
+ pager_prot &= ~VM_PROT_WRITE;
+ }
+ if (!is_submap &&
+ object != VM_OBJECT_NULL &&
+ object->named &&
+ object->pager != MEMORY_OBJECT_NULL) {
+ vm_object_lock(object);
+ pager = object->pager;
+ if (object->named &&
+ pager != MEMORY_OBJECT_NULL) {
+ assert(object->pager_ready);
+ vm_object_mapping_wait(object, THREAD_UNINT);
+ vm_object_mapping_begin(object);
+ vm_object_unlock(object);
+
+ kr = memory_object_map(pager, pager_prot);
+ assert(kr == KERN_SUCCESS);
+
+ vm_object_lock(object);
+ vm_object_mapping_end(object);
+ }
+ vm_object_unlock(object);
+ }
+ } else {
if (new_mapping_established) {
/*
* We have to get rid of the new mappings since we
map_addr = vm_map_trunc_page(*address);
map_size = vm_map_round_page(initial_size);
size = vm_object_round_page(initial_size);
-
+
/*
* Find the vm object (if any) corresponding to this port.
*/
return KERN_INVALID_OBJECT;
}
+ if (object != VM_OBJECT_NULL &&
+ object->named &&
+ object->pager != MEMORY_OBJECT_NULL &&
+ object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
+ memory_object_t pager;
+ vm_prot_t pager_prot;
+ kern_return_t kr;
+
+ /*
+ * For "named" VM objects, let the pager know that the
+ * memory object is being mapped. Some pagers need to keep
+ * track of this, to know when they can reclaim the memory
+ * object, for example.
+ * VM calls memory_object_map() for each mapping (specifying
+ * the protection of each mapping) and calls
+ * memory_object_last_unmap() when all the mappings are gone.
+ */
+ pager_prot = max_protection;
+ if (copy) {
+ /*
+ * Copy-On-Write mapping: won't modify the
+ * memory object.
+ */
+ pager_prot &= ~VM_PROT_WRITE;
+ }
+ vm_object_lock(object);
+ pager = object->pager;
+ if (object->named &&
+ pager != MEMORY_OBJECT_NULL &&
+ object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
+ assert(object->pager_ready);
+ vm_object_mapping_wait(object, THREAD_UNINT);
+ vm_object_mapping_begin(object);
+ vm_object_unlock(object);
+
+ kr = memory_object_map(pager, pager_prot);
+ assert(kr == KERN_SUCCESS);
+
+ vm_object_lock(object);
+ vm_object_mapping_end(object);
+ }
+ vm_object_unlock(object);
+ }
+
/*
* Perform the copy if requested
*/
vm_map_lock(map);
+ if ((new_prot & VM_PROT_COPY) && !map->prot_copy_allow) {
+ vm_map_unlock(map);
+ return(KERN_PROTECTION_FAILURE);
+ }
+
/* LP64todo - remove this check when vm_map_commpage64()
* no longer has to stuff in a map_entry for the commpage
* above the map's max_offset.
}
}
+#if CONFIG_EMBEDDED
+ if (new_prot & VM_PROT_WRITE) {
+ if (new_prot & VM_PROT_EXECUTE) {
+ printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
+ new_prot &= ~VM_PROT_EXECUTE;
+ }
+ }
+#endif
+
prev = current->vme_end;
current = current->vme_next;
}
entry->wired_count = 0;
entry->user_wired_count = 0;
offset = entry->offset = copy_entry->offset;
- /*
- * XXX FBDP
- * We should propagate the submap entry's protections
- * here instead of forcing VM_PROT_ALL.
- * Or better yet, we should inherit the protection
- * of the copy_entry.
- */
- entry->protection = VM_PROT_ALL;
- entry->max_protection = VM_PROT_ALL;
vm_map_copy_entry_unlink(copy, copy_entry);
vm_map_copy_entry_dispose(copy, copy_entry);
if (m->speculative)
*disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
+ if (m->cs_validated)
+ *disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
+ if (m->cs_tainted)
+ *disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
+
page_query_done:
vm_object_unlock(object);
{
map->user_wire_limit = limit;
}
+
+void vm_map_set_prot_copy_allow(vm_map_t map,
+ boolean_t allow)
+{
+ vm_map_lock(map);
+ map->prot_copy_allow = allow;
+ vm_map_unlock(map);
+};
boolean_t wiring_required;/* All memory wired? */
boolean_t no_zero_fill; /* No zero fill absent pages */
boolean_t mapped; /* has this map been mapped */
+ boolean_t prot_copy_allow;/* is VM_PROT_COPY allowed on this map */
unsigned int timestamp; /* Version number */
unsigned int color_rr; /* next color (not protected by a lock) */
} ;
vm_map_t map,
vm_size_t limit);
+extern void vm_map_set_prot_copy_allow(
+ vm_map_t map,
+ boolean_t allow);
+
#ifdef MACH_KERNEL_PRIVATE
/*
/* cache bitfields */
vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
vm_object_template.code_signed = FALSE;
+ vm_object_template.mapping_in_progress = FALSE;
vm_object_template.not_in_use = 0;
#ifdef UPL_DEBUG
vm_object_template.uplq.prev = NULL;
/* more mappers for this object */
if (pager != MEMORY_OBJECT_NULL) {
+ vm_object_mapping_wait(object, THREAD_UNINT);
+ vm_object_mapping_begin(object);
vm_object_unlock(object);
vm_object_cache_unlock();
- memory_object_unmap(pager);
+ memory_object_last_unmap(pager);
try_failed_count = 0;
for (;;) {
mutex_pause(try_failed_count); /* wait a bit */
}
assert(object->ref_count > 0);
+
+ vm_object_mapping_end(object);
}
}
/* fall thru */
case VM_FAULT_INTERRUPTED:
+ vm_object_lock(new_object);
+ vm_page_lock_queues();
vm_page_free(new_page);
+ vm_page_unlock_queues();
+ vm_object_unlock(new_object);
+
vm_object_deallocate(new_object);
vm_object_deallocate(src_object);
*_result_object = VM_OBJECT_NULL;
* any page fails [chosen]
*/
+ vm_object_lock(new_object);
vm_page_lock_queues();
vm_page_free(new_page);
vm_page_unlock_queues();
+ vm_object_unlock(new_object);
vm_object_deallocate(new_object);
vm_object_deallocate(src_object);
* Since its ref_count was at least 2, it
* will not vanish; so we don't need to call
* vm_object_deallocate.
- * [FBDP: that doesn't seem to be true any more]
+ * [with a caveat for "named" objects]
*
* The res_count on the backing object is
* conditionally decremented. It's possible
* is temporary and cachable.
#endif
*/
- if (backing_object->ref_count > 1) {
+ if (backing_object->ref_count > 2 ||
+ (!backing_object->named && backing_object->ref_count > 1)) {
vm_object_lock_assert_exclusive(backing_object);
backing_object->ref_count--;
#if TASK_SWAPPER
* backing object that show through to the object.
*/
#if MACH_PAGEMAP
- if (backing_rcount || backing_object->existence_map) {
+ if (backing_rcount || backing_object->existence_map)
#else
- if (backing_rcount) {
+ if (backing_rcount)
#endif /* MACH_PAGEMAP */
+ {
offset = hint_offset;
while((offset =
return (KERN_SUCCESS);
}
+unsigned int vm_page_purged_wired = 0;
+unsigned int vm_page_purged_busy = 0;
+unsigned int vm_page_purged_others = 0;
/*
* Empty a purgeable object by grabbing the physical pages assigned to it and
* putting them on the free queue without writing them to backing store, etc.
/* resume with the current page and a new quota */
purge_loop_quota = PURGE_LOOP_QUOTA;
}
-
-
- if (p->busy || p->cleaning || p->laundry ||
- p->list_req_pending) {
- /* page is being acted upon, so don't mess with it */
- continue;
- }
+
if (p->wire_count) {
/* don't discard a wired page */
+ vm_page_purged_wired++;
+
+ skip_page:
+ /*
+ * This page is no longer "purgeable",
+ * for accounting purposes.
+ */
+ assert(vm_page_purgeable_count > 0);
+ vm_page_purgeable_count--;
continue;
}
+ if (p->busy) {
+ /*
+ * We can't reclaim a busy page but we can deactivate
+ * it (if it's not wired) to make sure it gets
+ * considered by vm_pageout_scan() later.
+ */
+ vm_page_deactivate(p);
+ vm_page_purged_busy++;
+ goto skip_page;
+ }
+
+ if (p->cleaning || p->laundry || p->list_req_pending) {
+ /* page is being acted upon, so don't mess with it */
+ vm_page_purged_others++;
+ goto skip_page;
+ }
+
assert(!p->laundry);
assert(p->object != kernel_object);
}
vm_page_free_prepare(p);
+ /*
+ * vm_page_purgeable_count is not updated when freeing
+ * a page from an "empty" object, so do it explicitly here.
+ */
+ assert(vm_page_purgeable_count > 0);
+ vm_page_purgeable_count--;
/* ... and put it on our queue of pages to free */
assert(p->pageq.next == NULL &&
if (old_state != VM_PURGABLE_NONVOLATILE) {
vm_page_lock_queues();
- assert(vm_page_purgeable_count >=
- object->resident_page_count);
- vm_page_purgeable_count -= object->resident_page_count;
-
if (old_state==VM_PURGABLE_VOLATILE) {
+ assert(vm_page_purgeable_count >=
+ object->resident_page_count);
+ vm_page_purgeable_count -= object->resident_page_count;
+
assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
purgeable_q_t queue = vm_purgeable_object_remove(object);
assert(queue);
case VM_PURGABLE_VOLATILE:
- if ((old_state != VM_PURGABLE_NONVOLATILE) && (old_state != VM_PURGABLE_VOLATILE))
+ if (old_state == VM_PURGABLE_EMPTY &&
+ object->resident_page_count == 0)
break;
purgeable_q_t queue;
/* find the correct queue */
if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE)
- queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
+ queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
else {
if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO)
queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
}
- if (old_state == VM_PURGABLE_NONVOLATILE) {
+ if (old_state == VM_PURGABLE_NONVOLATILE ||
+ old_state == VM_PURGABLE_EMPTY) {
/* try to add token... this can fail */
vm_page_lock_queues();
vm_purgeable_token_delete_first(old_queue);
}
- if (old_state==VM_PURGABLE_NONVOLATILE) {
- vm_page_purgeable_count += object->resident_page_count;
+ if (old_state==VM_PURGABLE_NONVOLATILE ||
+ old_state == VM_PURGABLE_EMPTY) {
vm_page_lock_queues();
+ vm_page_purgeable_count += object->resident_page_count;
}
+ object->purgable = VM_PURGABLE_VOLATILE;
(void) vm_object_purge(object);
vm_page_unlock_queues();
}
code_signed:1, /* pages are signed and should be
validated; the signatures are stored
with the pager */
- not_in_use:23; /* for expansion */
+ mapping_in_progress:1, /* pager being mapped/unmapped */
+ not_in_use:22; /* for expansion */
#ifdef UPL_DEBUG
queue_head_t uplq; /* List of outstanding upls */
#define VM_OBJECT_EVENT_INITIALIZED 0
#define VM_OBJECT_EVENT_PAGER_READY 1
#define VM_OBJECT_EVENT_PAGING_IN_PROGRESS 2
+#define VM_OBJECT_EVENT_MAPPING_IN_PROGRESS 3
#define VM_OBJECT_EVENT_LOCK_IN_PROGRESS 4
#define VM_OBJECT_EVENT_UNCACHING 5
#define VM_OBJECT_EVENT_COPY_CALL 6
MACRO_END
+#define vm_object_mapping_begin(object) \
+ MACRO_BEGIN \
+ vm_object_lock_assert_exclusive((object)); \
+ assert(! (object)->mapping_in_progress); \
+ (object)->mapping_in_progress = TRUE; \
+ MACRO_END
+
+#define vm_object_mapping_end(object) \
+ MACRO_BEGIN \
+ vm_object_lock_assert_exclusive((object)); \
+ assert((object)->mapping_in_progress); \
+ (object)->mapping_in_progress = FALSE; \
+ vm_object_wakeup((object), \
+ VM_OBJECT_EVENT_MAPPING_IN_PROGRESS); \
+ MACRO_END
+
+#define vm_object_mapping_wait(object, interruptible) \
+ MACRO_BEGIN \
+ vm_object_lock_assert_exclusive((object)); \
+ while ((object)->mapping_in_progress) { \
+ wait_result_t _wr; \
+ \
+ _wr = vm_object_sleep((object), \
+ VM_OBJECT_EVENT_MAPPING_IN_PROGRESS, \
+ (interruptible)); \
+ /*XXX if ((interruptible) && (_wr != THREAD_AWAKENED))*/\
+ /*XXX break; */ \
+ } \
+ assert(!(object)->mapping_in_progress); \
+ MACRO_END
+
+
#define OBJECT_LOCK_SHARED 0
#define OBJECT_LOCK_EXCLUSIVE 1
vm_page_t page);
extern void vm_page_validate_cs(vm_page_t page);
+extern void vm_page_validate_cs_mapped(
+ vm_page_t page,
+ const void *kaddr);
/*
* Functions implemented as macros. m->wanted and m->busy are
#ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE /* maximum iterations of the active queue to move pages to inactive */
-#ifdef CONFIG_EMBEDDED
-#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 2048
-#else
#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE 100
#endif
-#endif
#ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
#ifdef CONFIG_EMBEDDED
* Don't sweep through active queue more than the throttle
* which should be kept relatively low
*/
- active_burst_count = vm_pageout_burst_active_throttle;
+ active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
/*
* Move pages from active to inactive.
* inactive target still not met... keep going
* until we get the queues balanced
*/
+
+ /*
+ * Recalculate vm_page_inactivate_target.
+ */
+ vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
+ vm_page_inactive_count +
+ vm_page_speculative_count);
+
+#ifndef CONFIG_EMBEDDED
+ /*
+ * XXX: if no active pages can be reclaimed, pageout scan can be stuck trying
+ * to balance the queues
+ */
if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
!queue_empty(&vm_page_queue_active))
continue;
+#endif
mutex_lock(&vm_page_queue_free_lock);
msecs = vm_pageout_empty_wait;
goto vm_pageout_scan_delay;
- } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
+ } else if (inactive_burst_count >=
+ MIN(vm_pageout_burst_inactive_throttle,
+ (vm_page_inactive_count +
+ vm_page_speculative_count))) {
vm_pageout_scan_burst_throttle++;
msecs = vm_pageout_burst_wait;
goto vm_pageout_scan_delay;
upl->highest_page = dst_page->phys_page;
if (user_page_list) {
user_page_list[entry].phys_addr = dst_page->phys_page;
- user_page_list[entry].dirty = dst_page->dirty;
user_page_list[entry].pageout = dst_page->pageout;
user_page_list[entry].absent = dst_page->absent;
+ user_page_list[entry].dirty = dst_page->dirty;
user_page_list[entry].precious = dst_page->precious;
-
+ user_page_list[entry].device = FALSE;
if (dst_page->clustered == TRUE)
user_page_list[entry].speculative = dst_page->speculative;
else
user_page_list[entry].speculative = FALSE;
+ user_page_list[entry].cs_validated = dst_page->cs_validated;
+ user_page_list[entry].cs_tainted = dst_page->cs_tainted;
}
/*
* if UPL_RET_ONLY_ABSENT is set, then
}
delayed_unlock = 1;
+ if (shadow_object->code_signed) {
+ /*
+ * CODE SIGNING:
+ * If the object is code-signed, do not let this UPL tell
+ * us if the pages are valid or not. Let the pages be
+ * validated by VM the normal way (when they get mapped or
+ * copied).
+ */
+ flags &= ~UPL_COMMIT_CS_VALIDATED;
+ }
+ if (! page_list) {
+ /*
+ * No page list to get the code-signing info from !?
+ */
+ flags &= ~UPL_COMMIT_CS_VALIDATED;
+ }
+
while (xfer_size) {
vm_page_t t, m;
m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
}
}
- if (m != VM_PAGE_NULL) {
-
- clear_refmod = 0;
+ if (m == VM_PAGE_NULL) {
+ goto commit_next_page;
+ }
- if (upl->flags & UPL_IO_WIRE) {
+ clear_refmod = 0;
- vm_page_unwire(m);
-
- if (page_list)
- page_list[entry].phys_addr = 0;
+ if (flags & UPL_COMMIT_CS_VALIDATED) {
+ /*
+ * CODE SIGNING:
+ * Set the code signing bits according to
+ * what the UPL says they should be.
+ */
+ m->cs_validated = page_list[entry].cs_validated;
+ m->cs_tainted = page_list[entry].cs_tainted;
+ }
+ if (upl->flags & UPL_IO_WIRE) {
- if (flags & UPL_COMMIT_SET_DIRTY)
- m->dirty = TRUE;
- else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
- m->dirty = FALSE;
- if (m->cs_validated && !m->cs_tainted) {
- /*
- * CODE SIGNING:
- * This page is no longer dirty
- * but could have been modified,
- * so it will need to be
- * re-validated.
- */
- m->cs_validated = FALSE;
- vm_cs_validated_resets++;
- }
- clear_refmod |= VM_MEM_MODIFIED;
- }
- if (flags & UPL_COMMIT_INACTIVATE)
- vm_page_deactivate(m);
+ vm_page_unwire(m);
- if (clear_refmod)
- pmap_clear_refmod(m->phys_page, clear_refmod);
+ if (page_list)
+ page_list[entry].phys_addr = 0;
- if (flags & UPL_COMMIT_ALLOW_ACCESS) {
- /*
- * We blocked access to the pages in this UPL.
- * Clear the "busy" bit and wake up any waiter
- * for this page.
- */
- PAGE_WAKEUP_DONE(m);
- }
- goto commit_next_page;
- }
- /*
- * make sure to clear the hardware
- * modify or reference bits before
- * releasing the BUSY bit on this page
- * otherwise we risk losing a legitimate
- * change of state
- */
- if (flags & UPL_COMMIT_CLEAR_DIRTY) {
- m->dirty = FALSE;
- if (m->cs_validated && !m->cs_tainted) {
+ if (flags & UPL_COMMIT_SET_DIRTY)
+ m->dirty = TRUE;
+ else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
+ m->dirty = FALSE;
+ if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+ m->cs_validated && !m->cs_tainted) {
/*
* CODE SIGNING:
* This page is no longer dirty
}
clear_refmod |= VM_MEM_MODIFIED;
}
- if (clear_refmod)
- pmap_clear_refmod(m->phys_page, clear_refmod);
-
- if (page_list) {
- upl_page_info_t *p;
+
+ if (flags & UPL_COMMIT_INACTIVATE)
+ vm_page_deactivate(m);
- p = &(page_list[entry]);
+ if (clear_refmod)
+ pmap_clear_refmod(m->phys_page, clear_refmod);
- if (p->phys_addr && p->pageout && !m->pageout) {
- m->busy = TRUE;
- m->pageout = TRUE;
- vm_page_wire(m);
- } else if (p->phys_addr &&
- !p->pageout && m->pageout &&
- !m->dump_cleaning) {
- m->pageout = FALSE;
- m->absent = FALSE;
- m->overwriting = FALSE;
- vm_page_unwire(m);
+ if (flags & UPL_COMMIT_ALLOW_ACCESS) {
+ /*
+ * We blocked access to the pages in this UPL.
+ * Clear the "busy" bit and wake up any waiter
+ * for this page.
+ */
+ PAGE_WAKEUP_DONE(m);
+ }
+ goto commit_next_page;
+ }
+ /*
+ * make sure to clear the hardware
+ * modify or reference bits before
+ * releasing the BUSY bit on this page
+ * otherwise we risk losing a legitimate
+ * change of state
+ */
+ if (flags & UPL_COMMIT_CLEAR_DIRTY) {
+ m->dirty = FALSE;
- PAGE_WAKEUP_DONE(m);
- }
- page_list[entry].phys_addr = 0;
+ if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+ m->cs_validated && !m->cs_tainted) {
+ /*
+ * CODE SIGNING:
+ * This page is no longer dirty
+ * but could have been modified,
+ * so it will need to be
+ * re-validated.
+ */
+ m->cs_validated = FALSE;
+#if DEVELOPMENT || DEBUG
+ vm_cs_validated_resets++;
+#endif
}
- m->dump_cleaning = FALSE;
+ clear_refmod |= VM_MEM_MODIFIED;
+ }
+ if (clear_refmod)
+ pmap_clear_refmod(m->phys_page, clear_refmod);
- if (m->laundry)
- vm_pageout_throttle_up(m);
+ if (page_list) {
+ upl_page_info_t *p;
- if (m->pageout) {
- m->cleaning = FALSE;
- m->encrypted_cleaning = FALSE;
+ p = &(page_list[entry]);
+
+ if (p->phys_addr && p->pageout && !m->pageout) {
+ m->busy = TRUE;
+ m->pageout = TRUE;
+ vm_page_wire(m);
+ } else if (p->phys_addr &&
+ !p->pageout && m->pageout &&
+ !m->dump_cleaning) {
m->pageout = FALSE;
-#if MACH_CLUSTER_STATS
- if (m->wanted) vm_pageout_target_collisions++;
-#endif
- m->dirty = FALSE;
- if (m->cs_validated && !m->cs_tainted) {
- /*
- * CODE SIGNING:
- * This page is no longer dirty
- * but could have been modified,
- * so it will need to be
- * re-validated.
- */
- m->cs_validated = FALSE;
- vm_cs_validated_resets++;
- }
-
- if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
- m->dirty = TRUE;
-
- if (m->dirty) {
- /*
- * page was re-dirtied after we started
- * the pageout... reactivate it since
- * we don't know whether the on-disk
- * copy matches what is now in memory
- */
- vm_page_unwire(m);
-
- if (upl->flags & UPL_PAGEOUT) {
- CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
- VM_STAT_INCR(reactivations);
- DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
- }
- PAGE_WAKEUP_DONE(m);
- } else {
- /*
- * page has been successfully cleaned
- * go ahead and free it for other use
- */
+ m->absent = FALSE;
+ m->overwriting = FALSE;
+ vm_page_unwire(m);
+
+ PAGE_WAKEUP_DONE(m);
+ }
+ page_list[entry].phys_addr = 0;
+ }
+ m->dump_cleaning = FALSE;
- if (m->object->internal) {
- DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
- } else {
- DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
- }
+ if (m->laundry)
+ vm_pageout_throttle_up(m);
- vm_page_free(m);
-
- if (upl->flags & UPL_PAGEOUT) {
- CLUSTER_STAT(vm_pageout_target_page_freed++;)
-
- if (page_list[entry].dirty) {
- VM_STAT_INCR(pageouts);
- DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
- pgpgout_count++;
- }
- }
- }
- goto commit_next_page;
- }
+ if (m->pageout) {
+ m->cleaning = FALSE;
+ m->encrypted_cleaning = FALSE;
+ m->pageout = FALSE;
#if MACH_CLUSTER_STATS
- if (m->wpmapped)
- m->dirty = pmap_is_modified(m->phys_page);
-
- if (m->dirty) vm_pageout_cluster_dirtied++;
- else vm_pageout_cluster_cleaned++;
- if (m->wanted) vm_pageout_cluster_collisions++;
+ if (m->wanted) vm_pageout_target_collisions++;
#endif
m->dirty = FALSE;
- if (m->cs_validated && !m->cs_tainted) {
+
+ if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+ m->cs_validated && !m->cs_tainted) {
/*
* CODE SIGNING:
* This page is no longer dirty
* re-validated.
*/
m->cs_validated = FALSE;
+#if DEVELOPMENT || DEBUG
vm_cs_validated_resets++;
+#endif
}
-
- if ((m->busy) && (m->cleaning)) {
- /*
- * the request_page_list case
+
+ if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
+ m->dirty = TRUE;
+
+ if (m->dirty) {
+ /*
+ * page was re-dirtied after we started
+ * the pageout... reactivate it since
+ * we don't know whether the on-disk
+ * copy matches what is now in memory
*/
- m->absent = FALSE;
- m->overwriting = FALSE;
- m->busy = FALSE;
- } else if (m->overwriting) {
- /*
- * alternate request page list, write to
- * page_list case. Occurs when the original
- * page was wired at the time of the list
- * request
+ vm_page_unwire(m);
+
+ if (upl->flags & UPL_PAGEOUT) {
+ CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
+ VM_STAT_INCR(reactivations);
+ DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
+ }
+ PAGE_WAKEUP_DONE(m);
+ } else {
+ /*
+ * page has been successfully cleaned
+ * go ahead and free it for other use
*/
- assert(m->wire_count != 0);
- vm_page_unwire(m);/* reactivates */
- m->overwriting = FALSE;
+
+ if (m->object->internal) {
+ DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
+ } else {
+ DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
+ }
+
+ vm_page_free(m);
+
+ if (upl->flags & UPL_PAGEOUT) {
+ CLUSTER_STAT(vm_pageout_target_page_freed++;)
+
+ if (page_list[entry].dirty) {
+ VM_STAT_INCR(pageouts);
+ DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
+ pgpgout_count++;
+ }
+ }
}
- m->cleaning = FALSE;
- m->encrypted_cleaning = FALSE;
+ goto commit_next_page;
+ }
+#if MACH_CLUSTER_STATS
+ if (m->wpmapped)
+ m->dirty = pmap_is_modified(m->phys_page);
+
+ if (m->dirty) vm_pageout_cluster_dirtied++;
+ else vm_pageout_cluster_cleaned++;
+ if (m->wanted) vm_pageout_cluster_collisions++;
+#endif
+ m->dirty = FALSE;
+ if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+ m->cs_validated && !m->cs_tainted) {
/*
- * It is a part of the semantic of COPYOUT_FROM
- * UPLs that a commit implies cache sync
- * between the vm page and the backing store
- * this can be used to strip the precious bit
- * as well as clean
+ * CODE SIGNING:
+ * This page is no longer dirty
+ * but could have been modified,
+ * so it will need to be
+ * re-validated.
*/
- if (upl->flags & UPL_PAGE_SYNC_DONE)
- m->precious = FALSE;
-
- if (flags & UPL_COMMIT_SET_DIRTY)
- m->dirty = TRUE;
+ m->cs_validated = FALSE;
+#if DEVELOPMENT || DEBUG
+ vm_cs_validated_resets++;
+#endif
+ }
- if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
+ if ((m->busy) && (m->cleaning)) {
+ /*
+ * the request_page_list case
+ */
+ m->absent = FALSE;
+ m->overwriting = FALSE;
+ m->busy = FALSE;
+ } else if (m->overwriting) {
+ /*
+ * alternate request page list, write to
+ * page_list case. Occurs when the original
+ * page was wired at the time of the list
+ * request
+ */
+ assert(m->wire_count != 0);
+ vm_page_unwire(m);/* reactivates */
+ m->overwriting = FALSE;
+ }
+ m->cleaning = FALSE;
+ m->encrypted_cleaning = FALSE;
+
+ /*
+ * It is a part of the semantic of COPYOUT_FROM
+ * UPLs that a commit implies cache sync
+ * between the vm page and the backing store
+ * this can be used to strip the precious bit
+ * as well as clean
+ */
+ if (upl->flags & UPL_PAGE_SYNC_DONE)
+ m->precious = FALSE;
+
+ if (flags & UPL_COMMIT_SET_DIRTY)
+ m->dirty = TRUE;
+
+ if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
+ vm_page_deactivate(m);
+ } else if (!m->active && !m->inactive && !m->speculative) {
+
+ if (m->clustered)
+ vm_page_speculate(m, TRUE);
+ else if (m->reference)
+ vm_page_activate(m);
+ else
vm_page_deactivate(m);
- } else if (!m->active && !m->inactive && !m->speculative) {
-
- if (m->clustered)
- vm_page_speculate(m, TRUE);
- else if (m->reference)
- vm_page_activate(m);
- else
- vm_page_deactivate(m);
- }
- if (flags & UPL_COMMIT_ALLOW_ACCESS) {
- /*
- * We blocked access to the pages in this URL.
- * Clear the "busy" bit on this page before we
- * wake up any waiter.
- */
- m->busy = FALSE;
- }
+ }
+ if (flags & UPL_COMMIT_ALLOW_ACCESS) {
/*
- * Wakeup any thread waiting for the page to be un-cleaning.
+ * We blocked access to the pages in this URL.
+ * Clear the "busy" bit on this page before we
+ * wake up any waiter.
*/
- PAGE_WAKEUP(m);
+ m->busy = FALSE;
}
+ /*
+ * Wakeup any thread waiting for the page to be un-cleaning.
+ */
+ PAGE_WAKEUP(m);
+
commit_next_page:
target_offset += PAGE_SIZE_64;
xfer_size -= PAGE_SIZE;
if (user_page_list) {
user_page_list[entry].phys_addr = dst_page->phys_page;
- user_page_list[entry].dirty = dst_page->dirty;
user_page_list[entry].pageout = dst_page->pageout;
user_page_list[entry].absent = dst_page->absent;
+ user_page_list[entry].dirty = dst_page->dirty;
user_page_list[entry].precious = dst_page->precious;
-
+ user_page_list[entry].device = FALSE;
if (dst_page->clustered == TRUE)
user_page_list[entry].speculative = dst_page->speculative;
else
user_page_list[entry].speculative = FALSE;
+ user_page_list[entry].cs_validated = dst_page->cs_validated;
+ user_page_list[entry].cs_tainted = dst_page->cs_tainted;
}
/*
* someone is explicitly grabbing this page...
vm_object_t object,
vm_object_offset_t offset,
vm_map_size_t *size,
+ vm_prot_t protection,
boolean_t can_unlock_object)
{
kern_return_t kr;
vm_object_offset_t object_offset;
int i;
-
+
if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
assert(page->busy);
/*
PMAP_ENTER(kernel_pmap,
page_map_offset,
page,
- VM_PROT_DEFAULT,
+ protection,
((int) page->object->wimg_bits &
VM_WIMG_MASK),
TRUE);
object,
object_offset,
FALSE,
- VM_PROT_DEFAULT,
+ protection,
VM_PROT_ALL,
VM_INHERIT_NONE);
if (kr != KERN_SUCCESS) {
pmap_sync_page_data_phys(page->phys_page);
}
page->pmapped = TRUE;
- page->wpmapped = TRUE;
cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
//assert(pmap_verify_free(page->phys_page));
PMAP_ENTER(kernel_pmap,
*address + page_map_offset,
page,
- VM_PROT_DEFAULT,
+ protection,
cache_attr,
TRUE);
}
page->object,
page->offset,
&kernel_mapping_size,
+ VM_PROT_READ | VM_PROT_WRITE,
FALSE);
if (kr != KERN_SUCCESS) {
panic("vm_page_encrypt: "
page->object,
page->offset,
&kernel_mapping_size,
+ VM_PROT_READ | VM_PROT_WRITE,
FALSE);
if (kr != KERN_SUCCESS) {
panic("vm_page_decrypt: "
vm_object_t object,
vm_object_offset_t offset,
vm_map_size_t *size,
+ vm_prot_t protection,
boolean_t can_unlock_object);
extern void vm_paging_unmap_object(
vm_object_t object,
extern vm_offset_t get_vm_start(vm_map_t);
extern vm_offset_t get_vm_end(vm_map_t);
-#ifdef __i386__
+#if CONFIG_CODE_DECRYPTION
+struct pager_crypt_info;
extern kern_return_t vm_map_apple_protected(
- vm_map_t map,
- vm_map_offset_t start,
- vm_map_offset_t end);
+ vm_map_t map,
+ vm_map_offset_t start,
+ vm_map_offset_t end,
+ struct pager_crypt_info *crypt_info);
extern void apple_protect_pager_bootstrap(void);
-extern memory_object_t apple_protect_pager_setup(vm_object_t backing_object);
-extern void apple_protect_pager_map(memory_object_t mem_obj);
-#endif /* __i386__ */
+extern memory_object_t apple_protect_pager_setup(vm_object_t backing_object,
+ struct pager_crypt_info *crypt_info);
+#endif /* CONFIG_CODE_DECRYPTION */
/*
memory_object_offset_t offset,
vm_size_t length,
vm_sync_t sync_flags);
-extern kern_return_t vnode_pager_unmap(
+extern kern_return_t vnode_pager_map(
+ memory_object_t mem_obj,
+ vm_prot_t prot);
+extern kern_return_t vnode_pager_last_unmap(
memory_object_t mem_obj);
extern void vnode_pager_deallocate(
memory_object_t);
struct vnode *vp);
extern void vnode_pager_release_from_cache(
int *);
+extern int ubc_map(
+ struct vnode *vp,
+ int flags);
extern void ubc_unmap(
struct vnode *vp);
memory_object_offset_t,
vm_size_t,
vm_sync_t);
-extern kern_return_t dp_memory_object_unmap(memory_object_t);
+extern kern_return_t dp_memory_object_map(memory_object_t,
+ vm_prot_t);
+extern kern_return_t dp_memory_object_last_unmap(memory_object_t);
#endif /* _memory_object_server_ */
#ifndef _memory_object_default_server_
extern kern_return_t default_pager_memory_object_create(
memory_object_offset_t,
vm_size_t,
vm_sync_t);
-extern kern_return_t device_pager_unmap(memory_object_t);
+extern kern_return_t device_pager_map(memory_object_t, vm_prot_t);
+extern kern_return_t device_pager_last_unmap(memory_object_t);
extern kern_return_t device_pager_populate_object(
memory_object_t device,
memory_object_offset_t offset,
boolean_t *encrypted_p);
extern void log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot);
-extern int cs_invalid_page(void);
+extern int cs_invalid_page(addr64_t vaddr);
extern boolean_t cs_validate_page(void *blobs,
memory_object_offset_t offset,
const void *data,
if (unripe)
assert(queue->token_q_unripe == unripe);
assert(token_cnt == queue->debug_count_tokens);
- our_inactive_count = page_cnt + queue->new_pages + token_new_pagecount;
- assert(our_inactive_count >= 0);
- assert((uint32_t) our_inactive_count == vm_page_inactive_count);
+
+ /* obsolete queue doesn't maintain token counts */
+ if(queue->type != PURGEABLE_Q_TYPE_OBSOLETE)
+ {
+ our_inactive_count = page_cnt + queue->new_pages + token_new_pagecount;
+ assert(our_inactive_count >= 0);
+ assert((uint32_t) our_inactive_count == vm_page_inactive_count);
+ }
}
#endif
enum purgeable_q_type i;
int group;
vm_object_t object = 0;
+ purgeable_q_t queue, queue2;
mutex_lock(&vm_purgeable_queue_lock);
/* Cycle through all queues */
for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) {
- purgeable_q_t queue = &purgeable_queues[i];
+ queue = &purgeable_queues[i];
/*
* Are there any ripe tokens on this queue? If yes, we'll
* lock, remove a token and then purge the object.
*/
for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
- if (!queue_empty(&queue->objq[group]) && (object = vm_purgeable_object_find_and_lock(queue, group))) {
+ if (!queue_empty(&queue->objq[group]) &&
+ (object = vm_purgeable_object_find_and_lock(queue, group))) {
mutex_unlock(&vm_purgeable_queue_lock);
vm_purgeable_token_choose_and_delete_ripe(queue, 0);
goto purge_now;
- } else {
- assert(i != PURGEABLE_Q_TYPE_OBSOLETE); /* obsolete queue must
- * have all objects in
- * group 0 */
- purgeable_q_t queue2 = &purgeable_queues[i != PURGEABLE_Q_TYPE_FIFO ? PURGEABLE_Q_TYPE_FIFO : PURGEABLE_Q_TYPE_LIFO];
-
- if (!queue_empty(&queue2->objq[group]) && (object = vm_purgeable_object_find_and_lock(queue2, group))) {
+ }
+ if (i != PURGEABLE_Q_TYPE_OBSOLETE) {
+ /* This is the token migration case, and it works between
+ * FIFO and LIFO only */
+ queue2 = &purgeable_queues[i != PURGEABLE_Q_TYPE_FIFO ?
+ PURGEABLE_Q_TYPE_FIFO :
+ PURGEABLE_Q_TYPE_LIFO];
+
+ if (!queue_empty(&queue2->objq[group]) &&
+ (object = vm_purgeable_object_find_and_lock(queue2, group))) {
mutex_unlock(&vm_purgeable_queue_lock);
vm_purgeable_token_choose_and_delete_ripe(queue2, queue);
goto purge_now;
int group;
mutex_lock(&vm_purgeable_queue_lock);
- for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
+ for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) {
purgeable_q_t queue = &purgeable_queues[i];
for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
vm_object_t o;
{
unsigned int n, override;
- if ( PE_parse_boot_arg("colors", &override) ) /* colors specified as a boot-arg? */
+ if ( PE_parse_boot_argn("colors", &override, sizeof (override)) ) /* colors specified as a boot-arg? */
n = override;
else if ( vm_cache_geometry_colors ) /* do we know what the cache geometry is? */
n = vm_cache_geometry_colors;
* Check if we want to initialize pages to a known value
*/
fill = 0; /* Assume no fill */
- if (PE_parse_boot_arg("fill", &fillval)) fill = 1; /* Set fill */
+ if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1; /* Set fill */
/*
object->resident_page_count++;
- if (object->purgable == VM_PURGABLE_VOLATILE ||
- object->purgable == VM_PURGABLE_EMPTY) {
+ if (object->purgable == VM_PURGABLE_VOLATILE) {
if (queues_lock_held == FALSE)
vm_page_lockspin_queues();
vm_page_purgeable_count++;
+ if (queues_lock_held == FALSE)
+ vm_page_unlock_queues();
+ } else if (object->purgable == VM_PURGABLE_EMPTY &&
+ mem->throttled) {
+ if (queues_lock_held == FALSE)
+ vm_page_lock_queues();
+ vm_page_deactivate(mem);
if (queues_lock_held == FALSE)
vm_page_unlock_queues();
}
found_m->offset = (vm_object_offset_t) -1;
object->resident_page_count--;
- if (object->purgable == VM_PURGABLE_VOLATILE ||
- object->purgable == VM_PURGABLE_EMPTY) {
+ if (object->purgable == VM_PURGABLE_VOLATILE) {
assert(vm_page_purgeable_count > 0);
vm_page_purgeable_count--;
}
object->resident_page_count++;
- if (object->purgable == VM_PURGABLE_VOLATILE ||
- object->purgable == VM_PURGABLE_EMPTY) {
+ if (object->purgable == VM_PURGABLE_VOLATILE) {
vm_page_purgeable_count++;
+ } else if (object->purgable == VM_PURGABLE_EMPTY) {
+ if (mem->throttled) {
+ vm_page_deactivate(mem);
+ }
}
}
mem->object->resident_page_count--;
- if (mem->object->purgable == VM_PURGABLE_VOLATILE ||
- mem->object->purgable == VM_PURGABLE_EMPTY) {
+ if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
assert(vm_page_purgeable_count > 0);
vm_page_purgeable_count--;
}
mem->zero_fill = FALSE;
OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
}
+#if CONFIG_EMBEDDED
+ {
+ int percent_avail;
+
+ /*
+ * Decide if we need to poke the memorystatus notification thread.
+ */
+ percent_avail =
+ (vm_page_active_count + vm_page_inactive_count +
+ vm_page_speculative_count + vm_page_free_count +
+ (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
+ atop_64(max_mem);
+ if (percent_avail <= (kern_memorystatus_level - 5)) {
+ kern_memorystatus_level = percent_avail;
+ thread_wakeup((event_t)&kern_memorystatus_wakeup);
+ }
+ }
+#endif
/*
* ENCRYPTED SWAP:
* The page could be encrypted, but
assert(!mem->laundry);
assert(mem->object != kernel_object);
assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
- if (!IP_VALID(memory_manager_default) &&
- mem->dirty && mem->object->internal &&
- (mem->object->purgable == VM_PURGABLE_DENY ||
- mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
- mem->object->purgable == VM_PURGABLE_VOLATILE)) {
- queue_enter(&vm_page_queue_throttled, mem, vm_page_t, pageq);
- vm_page_throttled_count++;
- mem->throttled = TRUE;
+ if (mem->object->purgable == VM_PURGABLE_EMPTY) {
+ vm_page_deactivate(mem);
} else {
- queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
- vm_page_active_count++;
- mem->active = TRUE;
+ vm_page_activate(mem);
}
- mem->reference = TRUE;
+#if CONFIG_EMBEDDED
+ {
+ int percent_avail;
+
+ /*
+ * Decide if we need to poke the memorystatus notification thread.
+ */
+ percent_avail =
+ (vm_page_active_count + vm_page_inactive_count +
+ vm_page_speculative_count + vm_page_free_count +
+ (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
+ atop_64(max_mem);
+ if (percent_avail >= (kern_memorystatus_level + 5)) {
+ kern_memorystatus_level = percent_avail;
+ thread_wakeup((event_t)&kern_memorystatus_wakeup);
+ }
+ }
+#endif
}
}
unsigned int page_idx, start_idx;
int free_considered, free_available;
int substitute_needed;
-#if MACH_ASSERT
+#if DEBUG
uint32_t tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
+#endif
+#if MACH_ASSERT
int yielded = 0;
int dumped_run = 0;
int stolen_pages = 0;
#if MACH_ASSERT
vm_page_verify_free_lists();
-
+#endif
+#if DEBUG
clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
#endif
vm_page_lock_queues();
done_scanning:
vm_page_unlock_queues();
-#if MACH_ASSERT
+#if DEBUG
clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
tv_end_sec -= tv_start_sec;
printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds... scanned %d pages... yielded %d times... dumped run %d times... stole %d pages\n",
contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
+#endif
+#if MACH_ASSERT
vm_page_verify_free_lists();
#endif
return m;
if (flags & UPL_COMMIT_FREE_ON_EMPTY)
flags |= UPL_COMMIT_NOTIFY_EMPTY;
+ if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
+ return KERN_INVALID_ARGUMENT;
+ }
+
kr = upl_commit_range(upl, offset, size, flags, pl, count, &finished);
if ((flags & UPL_COMMIT_NOTIFY_EMPTY) && finished)
void pe_init_debug(void)
{
- if (!PE_parse_boot_arg("debug", &DEBUGFlag))
+ if (!PE_parse_boot_argn("debug", &DEBUGFlag, sizeof (DEBUGFlag)))
DEBUGFlag = 0;
}
-void PE_enter_debugger(char *cause)
+void PE_enter_debugger(const char *cause)
{
if (DEBUGFlag & DB_NMI)
Debugger(cause);
#include <pexpert/pexpert.h>
#include <pexpert/protos.h>
#include <machine/machine_routines.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
#include <sys/kdebug.h>
if (!vm_initialized) {
simple_lock_init(&kprintf_lock, 0);
- if (PE_parse_boot_arg("debug", &boot_arg))
+ if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg)))
if (boot_arg & DB_KPRT)
disable_serial_output = FALSE;
movl S_ARG0, %ecx
rdtsc
+ lfence
movl %edx, 0(%ecx)
movl %eax, 4(%ecx)
UART_LCR = 3, /* line control register */
UART_MCR = 4, /* modem control register */
UART_LSR = 5, /* line status register */
- UART_MSR = 6 /* modem status register */
+ UART_MSR = 6, /* modem status register */
+ UART_SCR = 7 /* scratch register */
};
enum {
static int
uart_probe( void )
{
- /* Verify that the Divisor Register is accessible */
-
- WRITE( LCR, UART_LCR_DLAB );
- WRITE( DLL, 0x5a );
- if (READ(DLL) != 0x5a) return 0;
- WRITE( DLL, 0xa5 );
- if (READ(DLL) != 0xa5) return 0;
- WRITE( LCR, 0x00 );
+ /* Verify that the Scratch Register is accessible */
+
+ WRITE( SCR, 0x5a );
+ if (READ(SCR) != 0x5a) return 0;
+ WRITE( SCR, 0xa5 );
+ if (READ(SCR) != 0xa5) return 0;
return 1;
}
/* Set baud rate - use the supplied boot-arg if available */
- if (PE_parse_boot_arg("serialbaud", &serial_baud_rate))
+ if (PE_parse_boot_argn("serialbaud", &serial_baud_rate, sizeof (serial_baud_rate)))
{
/* Valid divisor? */
if (!((UART_CLOCK / 16) % serial_baud_rate)) {
void PE_enter_debugger(
- char *cause);
+ const char *cause);
void PE_init_platform(
boolean_t vm_initialized,
void *args);
+
+
void PE_init_kprintf(
boolean_t vm_initialized);
if (PE_state.initialized == FALSE)
panic("Platform Expert not initialized");
- if (PE_parse_boot_arg("debug", &boot_arg))
+ if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg)))
if(boot_arg & DB_KPRT) disable_serial_output = FALSE;
if (DTLookupEntry(NULL, "/options", &options) == kSuccess) {
}
/* Check the boot-args for new serial baud. */
- if (PE_parse_boot_arg("serialbaud", &serial_baud))
+ if (PE_parse_boot_argn("serialbaud", &serial_baud, sizeof (serial_baud)))
if (serial_baud != -1) gPESerialBaud = serial_baud;
if( (scc = PE_find_scc())) { /* See if we can find the serial port */
int mac_proc_check_getauid(proc_t proc);
int mac_proc_check_getlcid(proc_t proc1, proc_t proc2,
pid_t pid);
+int mac_proc_check_map_prot_copy_allow(proc_t proc);
int mac_proc_check_mprotect(proc_t proc,
user_addr_t addr, user_size_t size, int prot);
int mac_proc_check_sched(proc_t proc, proc_t proc2);
struct vnode *v2);
int mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp,
struct image_params *imgp);
+int mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1,
+ void * signature, size_t size);
int mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp,
struct attrlist *alist);
int mac_vnode_check_getextattr(vfs_context_t ctx, struct vnode *vp,
kauth_cred_t cred,
struct proc *p
);
+
+
+/**
+ @brief Access control check for manipulating a proc's vm_map
+ @param cred Subject credential
+ @param proc Object process
+
+ Determine whether the vm_map map belonging to process proc with
+ credential cred allows the VM_PROT_COPY operation.
+
+ @return Return 0 if access is granted, otherwise an appropriate value for
+ errno should be returned.
+ */
+typedef int mpo_proc_check_map_prot_copy_allow_t(
+ kauth_cred_t cred,
+ struct proc *p
+);
+
+
/**
@brief Assign a label to a new kernelspace Mach task
@param kproc New task
struct componentname *cnp,
u_int *csflags
);
+/**
+ @brief Access control check after determining the code directory hash
+ */
+typedef int mpo_vnode_check_signature_t(struct vnode *vp, struct label *label,
+ unsigned char *sha1, void *signature,
+ int size);
+
/**
@brief Access control check for retrieving file attributes
@param cred Subject credential
mpo_vnode_label_update_extattr_t *mpo_vnode_label_update_extattr;
mpo_vnode_label_update_t *mpo_vnode_label_update;
mpo_vnode_notify_create_t *mpo_vnode_notify_create;
- mpo_reserved_hook_t *mpo_reserved0;
- mpo_reserved_hook_t *mpo_reserved1;
+ mpo_vnode_check_signature_t *mpo_vnode_check_signature;
+ mpo_proc_check_map_prot_copy_allow_t *mpo_proc_check_map_prot_copy_allow;
mpo_reserved_hook_t *mpo_reserved2;
mpo_reserved_hook_t *mpo_reserved3;
mpo_reserved_hook_t *mpo_reserved4;
return (error);
}
+int
+mac_proc_check_map_prot_copy_allow(proc_t proc)
+{
+ kauth_cred_t cred;
+ int error;
+
+ if (!mac_vm_enforce) return (0);
+
+ cred = kauth_cred_proc_ref(proc);
+ MAC_CHECK(proc_check_map_prot_copy_allow, cred, proc);
+ kauth_cred_unref(&cred);
+
+ return (error);
+}
+
int
mac_proc_check_sched(proc_t curp, struct proc *proc)
{
return (error);
}
+int
+mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1,
+ void * signature, size_t size)
+{
+ int error;
+
+ if (!mac_vnode_enforce || !mac_proc_enforce)
+ return (0);
+
+ MAC_CHECK(vnode_check_signature, vp, vp->v_label, sha1, signature, size);
+ return (error);
+}
+
#if 0
int
mac_vnode_check_getacl(vfs_context_t ctx, struct vnode *vp, acl_type_t type)
return (void *) iteration;
}
+#define MAX_CACHE_DEPTH 10
static void
auto_config(int npages, int *nbufs, int *nsets)
{
int len;
int ncpu;
- int64_t cacheconfig[10];
- int64_t cachesize[10];
+ int llc;
+ int64_t cacheconfig[MAX_CACHE_DEPTH];
+ int64_t cachesize[MAX_CACHE_DEPTH];
mutter("Autoconfiguring...\n");
exit(1);
}
+ /*
+ * Find LLC
+ */
+ for (llc = MAX_CACHE_DEPTH - 1; llc > 0; llc--)
+ if (cacheconfig[llc] != 0)
+ break;
+
/*
* Calculate number of buffers of size pages*4096 bytes
* fit into 90% of an L2 cache.
*/
- *nbufs = cachesize[2] * 9 / (npages * 4096 * 10);
- mutter(" L2 cache %qd bytes: "
+ *nbufs = cachesize[llc] * 9 / (npages * 4096 * 10);
+ mutter(" L%d (LLC) cache %qd bytes: "
"using %d buffers of size %d bytes\n",
- cachesize[2], *nbufs, (npages * 4096));
+ llc, cachesize[llc], *nbufs, (npages * 4096));
/*
* Calcalute how many sets:
*/
- *nsets = cacheconfig[0]/cacheconfig[2];
- mutter(" %qd cpus; %qd cpus per L2 cache: using %d sets\n",
- cacheconfig[0], cacheconfig[2], *nsets);
+ *nsets = cacheconfig[0]/cacheconfig[llc];
+ mutter(" %qd cpus; %qd cpus per L%d cache: using %d sets\n",
+ cacheconfig[0], cacheconfig[llc], llc, *nsets);
}
void (*producer_fnp)(int *data, int isize) = &writer_fn;