]> git.saurik.com Git - apple/xnu.git/commitdiff
xnu-1228.9.59.tar.gz mac-os-x-1056 v1228.9.59
authorApple <opensource@apple.com>
Mon, 15 Dec 2008 20:44:50 +0000 (20:44 +0000)
committerApple <opensource@apple.com>
Mon, 15 Dec 2008 20:44:50 +0000 (20:44 +0000)
257 files changed:
EXTERNAL_HEADERS/mach-o/loader.h
README
bsd/conf/MASTER
bsd/conf/MASTER.i386
bsd/conf/MASTER.ppc
bsd/conf/files
bsd/dev/dtrace/dtrace.c
bsd/dev/dtrace/fbt.c
bsd/dev/i386/fbt_x86.c
bsd/dev/i386/sysctl.c
bsd/dev/i386/systemcalls.c
bsd/dev/memdev.c
bsd/dev/ppc/fbt_ppc.c
bsd/dev/ppc/systemcalls.c
bsd/dev/unix_startup.c
bsd/hfs/hfs.h
bsd/hfs/hfs_catalog.c
bsd/hfs/hfs_catalog.h
bsd/hfs/hfs_cnode.c
bsd/hfs/hfs_cnode.h
bsd/hfs/hfs_endian.c
bsd/hfs/hfs_lookup.c
bsd/hfs/hfs_readwrite.c
bsd/hfs/hfs_vfsops.c
bsd/hfs/hfs_vfsutils.c
bsd/hfs/hfs_vnops.c
bsd/hfs/hfscommon/BTree/BTree.c
bsd/hfs/hfscommon/BTree/BTreeTreeOps.c
bsd/hfs/hfscommon/Misc/FileExtentMapping.c
bsd/hfs/hfscommon/Misc/VolumeAllocation.c
bsd/kern/bsd_init.c
bsd/kern/bsd_stubs.c
bsd/kern/init_sysent.c
bsd/kern/kdebug.c
bsd/kern/kern_core.c
bsd/kern/kern_descrip.c
bsd/kern/kern_exec.c
bsd/kern/kern_exit.c
bsd/kern/kern_fork.c
bsd/kern/kern_memorystatus.c
bsd/kern/kern_mib.c
bsd/kern/kern_mman.c
bsd/kern/kern_proc.c
bsd/kern/kern_prot.c
bsd/kern/kern_sig.c
bsd/kern/kern_sysctl.c
bsd/kern/mach_loader.c
bsd/kern/mcache.c
bsd/kern/pthread_synch.c
bsd/kern/subr_log.c
bsd/kern/syscalls.master
bsd/kern/ubc_subr.c
bsd/kern/uipc_mbuf.c
bsd/kern/uipc_socket.c
bsd/man/man2/Makefile
bsd/man/man2/fstat64.2 [deleted file]
bsd/man/man2/fstatfs64.2 [deleted file]
bsd/man/man2/lstat64.2 [deleted file]
bsd/man/man2/stat.2
bsd/man/man2/stat64.2 [deleted file]
bsd/man/man2/statfs.2
bsd/man/man2/statfs64.2 [deleted file]
bsd/man/man5/dir.5
bsd/man/man5/types.5
bsd/miscfs/devfs/devfs_vnops.c
bsd/miscfs/specfs/spec_vnops.c
bsd/net/dlil.c
bsd/net/ether_if_module.c
bsd/net/if.h
bsd/net/if_vlan.c
bsd/net/route.c
bsd/netinet/Makefile
bsd/netinet/in_arp.c
bsd/netinet/in_cksum.c
bsd/netinet/in_pcb.c
bsd/netinet/in_pcb.h
bsd/netinet/ip_divert.c
bsd/netinet/ip_edgehole.c [new file with mode: 0644]
bsd/netinet/ip_edgehole.h [new file with mode: 0644]
bsd/netinet/raw_ip.c
bsd/netinet/tcp_input.c
bsd/netinet/tcp_output.c
bsd/netinet/tcp_subr.c
bsd/netinet/tcp_usrreq.c
bsd/netinet/udp_usrreq.c
bsd/netkey/key.c
bsd/nfs/nfs_lock.c
bsd/nfs/nfs_vfsops.c
bsd/sys/Makefile
bsd/sys/cdefs.h
bsd/sys/disk.h
bsd/sys/imgact.h
bsd/sys/kdebug.h
bsd/sys/mount.h
bsd/sys/mount_internal.h
bsd/sys/proc.h
bsd/sys/proc_internal.h
bsd/sys/stat.h
bsd/sys/ubc.h
bsd/sys/ubc_internal.h
bsd/sys/user.h
bsd/sys/vnode.h
bsd/sys/vnode_internal.h
bsd/vfs/vfs_cluster.c
bsd/vfs/vfs_journal.c
bsd/vfs/vfs_subr.c
bsd/vfs/vfs_syscalls.c
bsd/vfs/vfs_vnops.c
bsd/vm/vm_unix.c
bsd/vm/vnode_pager.c
config/MasterVersion
config/System6.0.i386.exports
config/Unsupported.exports
config/Unsupported.i386.exports
iokit/IOKit/IOKitKeysPrivate.h
iokit/IOKit/platform/Makefile
iokit/IOKit/pwr_mgt/IOPM.h
iokit/IOKit/pwr_mgt/IOPMPrivate.h
iokit/IOKit/pwr_mgt/IOPMlog.h
iokit/IOKit/pwr_mgt/Makefile
iokit/Kernel/IOCPU.cpp
iokit/Kernel/IOCatalogue.cpp
iokit/Kernel/IOMemoryCursor.cpp
iokit/Kernel/IONVRAM.cpp
iokit/Kernel/IOPMrootDomain.cpp
iokit/Kernel/IOPlatformExpert.cpp
iokit/Kernel/IOService.cpp
iokit/Kernel/IOServicePM.cpp
iokit/Kernel/IOServicePMPrivate.h
iokit/Kernel/IOStartIOKit.cpp
kgmacros
libkern/crypto/sha1.c
libkern/libkern/c++/OSMetaClass.h
libkern/libkern/crypto/sha1.h
libsa/catalogue.cpp
libsyscall/mach/exc_catcher.c
libsyscall/mach/exc_catcher_state.c
libsyscall/mach/exc_catcher_state_identity.c
makedefs/MakeInc.cmd
makedefs/MakeInc.def
makedefs/MakeInc.rule
osfmk/chud/i386/chud_cpu_i386.c
osfmk/chud/i386/chud_osfmk_callback_i386.c
osfmk/conf/MASTER
osfmk/conf/MASTER.i386
osfmk/conf/files
osfmk/conf/files.i386
osfmk/console/panic_dialog.c
osfmk/console/video_console.c
osfmk/ddb/db_command.c
osfmk/default_pager/dp_backing_store.c
osfmk/default_pager/dp_memory_object.c
osfmk/i386/AT386/model_dep.c
osfmk/i386/Diagnostics.c
osfmk/i386/Makefile
osfmk/i386/acpi.c
osfmk/i386/apic.h
osfmk/i386/bsd_i386.c
osfmk/i386/commpage/commpage_mach_absolute_time.s
osfmk/i386/cpu.c
osfmk/i386/cpu_data.h
osfmk/i386/cpu_threads.c
osfmk/i386/cpu_threads.h
osfmk/i386/cpu_topology.c
osfmk/i386/cpu_topology.h
osfmk/i386/cpuid.c
osfmk/i386/cpuid.h
osfmk/i386/db_machdep.h
osfmk/i386/genassym.c
osfmk/i386/hpet.c [deleted file]
osfmk/i386/hpet.h [deleted file]
osfmk/i386/hw_defs.h [deleted file]
osfmk/i386/i386_init.c
osfmk/i386/i386_lock.s
osfmk/i386/i386_vm_init.c
osfmk/i386/lapic.c [new file with mode: 0644]
osfmk/i386/lapic.h [new file with mode: 0644]
osfmk/i386/locore.s
osfmk/i386/machine_check.c
osfmk/i386/machine_check.h
osfmk/i386/machine_cpu.h
osfmk/i386/machine_routines.c
osfmk/i386/machine_routines.h
osfmk/i386/machine_routines_asm.s
osfmk/i386/mp.c
osfmk/i386/mp.h
osfmk/i386/mp_desc.c
osfmk/i386/perfmon.c
osfmk/i386/pmCPU.c
osfmk/i386/pmCPU.h
osfmk/i386/pmap.c
osfmk/i386/proc_reg.h
osfmk/i386/rtclock.c
osfmk/i386/rtclock.h
osfmk/i386/start.s
osfmk/i386/trap.c
osfmk/i386/tsc.c
osfmk/i386/tsc.h
osfmk/i386/user_ldt.c
osfmk/kdp/kdp.h
osfmk/kdp/kdp_serial.c [new file with mode: 0644]
osfmk/kdp/kdp_serial.h [new file with mode: 0644]
osfmk/kdp/kdp_udp.c
osfmk/kern/bsd_kern.c
osfmk/kern/debug.c
osfmk/kern/debug.h
osfmk/kern/kmod.c
osfmk/kern/locks.c
osfmk/kern/machine.h
osfmk/kern/misc_protos.h
osfmk/kern/page_decrypt.c
osfmk/kern/page_decrypt.h
osfmk/kern/printf.c
osfmk/kern/processor.c
osfmk/kern/startup.c
osfmk/kern/startup.h
osfmk/kern/symbols.c
osfmk/kern/task.h
osfmk/kern/zalloc.c
osfmk/mach/host_special_ports.h
osfmk/mach/memory_object.defs
osfmk/mach/memory_object_types.h
osfmk/mach/vm_statistics.h
osfmk/ppc/machine_routines.c
osfmk/ppc/model_dep.c
osfmk/ppc/movc.s
osfmk/ppc/ppc_init.c
osfmk/ppc/ppc_vm_init.c
osfmk/vm/bsd_vm.c
osfmk/vm/device_vm.c
osfmk/vm/memory_object.c
osfmk/vm/vm_apple_protect.c
osfmk/vm/vm_fault.c
osfmk/vm/vm_init.c
osfmk/vm/vm_map.c
osfmk/vm/vm_map.h
osfmk/vm/vm_object.c
osfmk/vm/vm_object.h
osfmk/vm/vm_page.h
osfmk/vm/vm_pageout.c
osfmk/vm/vm_pageout.h
osfmk/vm/vm_protos.h
osfmk/vm/vm_purgeable.c
osfmk/vm/vm_resident.c
osfmk/vm/vm_user.c
pexpert/gen/pe_gen.c
pexpert/i386/pe_interrupt.c
pexpert/i386/pe_kprintf.c
pexpert/i386/pe_misc.s
pexpert/i386/pe_serial.c
pexpert/pexpert/pexpert.h
pexpert/ppc/pe_kprintf.c
security/mac_framework.h
security/mac_policy.h
security/mac_process.c
security/mac_vfs.c
tools/tests/affinity/sets.c

index b8277bc62cc498858bcd78e4181385ce56a31286..fd49201b7eba703d6f61812bb5c29c4eebf0aa7b 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 1999-2003 Apple Computer, Inc.  All Rights Reserved.
+ * Copyright (c) 1999-2008 Apple Inc.  All Rights Reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -192,6 +192,10 @@ struct mach_header_64 {
                                          the static linker does not need to
                                          examine dependent dylibs to see
                                          if any are re-exported */
+#define        MH_PIE 0x200000                 /* When this bit is set, the OS will
+                                          load the main executable at a
+                                          random address.  Only used in
+                                          MH_EXECUTE filetypes. */
 
 /*
  * The load commands directly follow the mach_header.  The total size of all
@@ -266,6 +270,8 @@ struct load_command {
 #define LC_CODE_SIGNATURE 0x1d /* local of code signature */
 #define LC_SEGMENT_SPLIT_INFO 0x1e /* local of info to split segments */
 #define LC_REEXPORT_DYLIB (0x1f | LC_REQ_DYLD) /* load and re-export dylib */
+#define        LC_LAZY_LOAD_DYLIB 0x20 /* delay load of dylib until first use */
+#define        LC_ENCRYPTION_INFO 0x21 /* encrypted segment information */
 
 /*
  * A variable length string in a load command is represented by an lc_str
@@ -448,7 +454,13 @@ struct section_64 { /* for 64-bit architectures */
 #define        S_INTERPOSING                   0xd     /* section with only pairs of
                                                   function pointers for
                                                   interposing */
-#define        S_16BYTE_LITERALS       0xe     /* section with only 16 byte literals */
+#define        S_16BYTE_LITERALS               0xe     /* section with only 16 byte
+                                                  literals */
+#define        S_DTRACE_DOF                    0xf     /* section contains 
+                                                  DTrace Object Format */
+#define        S_LAZY_DYLIB_SYMBOL_POINTERS    0x10    /* section with only lazy
+                                                  symbol pointers to lazy
+                                                  loaded dylibs */
 /*
  * Constants for the section attributes part of the flags field of a section
  * structure.
@@ -1107,6 +1119,19 @@ struct linkedit_data_command {
     uint32_t   datasize;       /* file size of data in __LINKEDIT segment  */
 };
 
+/*
+ * The encryption_info_command contains the file offset and size of an
+ * of an encrypted segment.
+ */
+struct encryption_info_command {
+   uint32_t    cmd;            /* LC_ENCRYPTION_INFO */
+   uint32_t    cmdsize;        /* sizeof(struct encryption_info_command) */
+   uint32_t    cryptoff;       /* file offset of encrypted range */
+   uint32_t    cryptsize;      /* file size of encrypted range */
+   uint32_t    cryptid;        /* which enryption system,
+                                  0 means not-encrypted yet */
+};
+
 /*
  * The symseg_command contains the offset and size of the GNU style
  * symbol table information as described in the header file <symseg.h>.
diff --git a/README b/README
index 76ea08c3829cb1ebca801ab44a5282f7e852df2e..10f6d90847819a2f77ba4919986e41b97927276c 100644 (file)
--- a/README
+++ b/README
@@ -15,25 +15,25 @@ A. How to build XNU:
 
   By default, architecture defaults to the build machine 
   architecture, and the kernel configuration is set to build for DEVELOPMENT.
-  The machine configuration defaults to S5L8900XRB for arm and default for i386 and ppc.
+  The machine configuration defaults to S5L8900X for arm and default for i386 and ppc.
   
   This will also create a bootable image, mach_kernel,  and a kernel binary 
   with symbols, mach_kernel.sys.
        
   Examples:
        /* make a debug kernel for H1 arm board */
-       make TARGET_CONFIGS="debug arm s5l8900xrb"
+       make TARGET_CONFIGS="debug arm s5l8900x"
        
-    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
+    $(OBJROOT)/DEBUG_ARM_S5L8900X/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+    $(OBJROOT)/DEBUG_ARM_S5L8900X/mach_kernel: bootable image
 
        /* make debug and development kernels for H1 arm board */
-       make TARGET_CONFIGS="debug arm s5l8900xrb  development arm s5l8900xrb"
+       make TARGET_CONFIGS="debug arm s5l8900x  development arm s5l8900x"
        
-    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEBUG_ARM_S5L8900XRB/mach_kernel: bootable image
-    $(OBJROOT)/DEVELOPMENT_ARM/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
-    $(OBJROOT)/DEVELOPMENT_ARM/mach_kernel: bootable image
+    $(OBJROOT)/DEBUG_ARM_S5L8900X/osfmk/DEBUG/osfmk.o: pre-linked object for osfmk component
+    $(OBJROOT)/DEBUG_ARM_S5L8900X/mach_kernel: bootable image
+    $(OBJROOT)/DEVELOPMENT_ARM_S5L8900X/osfmk/DEVELOPMENT/osfmk.o: pre-linked object for osfmk component
+    $(OBJROOT)/DEVELOPMENT_ARM_S5L8900X/mach_kernel: bootable image
 
        /* this is all you need to do to build H1 arm with DEVELOPMENT kernel configuration  */
        make TARGET_CONFIGS="default arm default"
index 8e308ece2b7ae334d3daf5f1f0ac66640a7280f1..5419f96bba63b432a207200e2a1193f7df8f41b3 100644 (file)
@@ -165,6 +165,9 @@ options             CONFIG_SOWUPCALL        # SB_UPCALL on sowwakeup        # <config_sowupcall>
 options                CONFIG_FORCE_OUT_IFP    # Force IP output to use an interface # <config_force_out_ifp>
 options                CONFIG_MBUF_NOEXPAND    # limit mbuf expansion  # <config_mbuf_noexpand>
 options                CONFIG_MBUF_JUMBO       # jumbo cluster pool    # <config_mbuf_jumbo>
+options                CONFIG_IP_EDGEHOLE      # Drop tagged packets at EDGE interface # <config_ip_edgehole>
+
+options                CONFIG_WORKQUEUE        # <config_workqueue>
 
 #
 #      4.4 filesystems 
@@ -278,7 +281,7 @@ options   CONFIG_VNODES=263168              # <large,xlarge>
 options   CONFIG_VNODES=263168         # <medium>
 options   CONFIG_VNODES=10240          # <small>
 options   CONFIG_VNODES=1024           # <xsmall>
-options   CONFIG_VNODES=512                    # <bsmall>
+options   CONFIG_VNODES=640                    # <bsmall>
 
 options   CONFIG_VNODE_FREE_MIN=500            # <large,xlarge>
 options   CONFIG_VNODE_FREE_MIN=300            # <medium>
@@ -324,6 +327,11 @@ options        CONFIG_NMBCLUSTERS="((1024 * 1024) / MCLBYTES)"             # <large,xlarge>
 options        CONFIG_NMBCLUSTERS="((1024 * 512) / MCLBYTES)"          # <medium>
 options        CONFIG_NMBCLUSTERS="((1024 * 256) / MCLBYTES)"          # <bsmall,xsmall,small>
 
+# set maximum space used for packet buffers
+#
+options        CONFIG_USESOCKTHRESHOLD=1               # <large,xlarge,medium>
+options        CONFIG_USESOCKTHRESHOLD=0               # <bsmall,xsmall,small>
+
 #
 # Configure size of TCP hash table
 #
@@ -388,6 +396,13 @@ options   CONFIG_NO_KPRINTF_STRINGS                # <no_kprintf_str>
 #
 options   CONFIG_EMBEDDED                      # <config_embedded>
 
+#
+# code decryption... used on embedded for app protection
+# must be set in all the bsd/conf and osfmk/conf MASTER files
+#
+options                CONFIG_CODE_DECRYPTION  # <config_embedded>
+
+
 #
 #  Ethernet (ARP)
 #
index a4504b8a8ad55874f21a96f61f7abcf2da72fb20..c2ae93a18812bbb0e59d20bbb1d3bd60178677c4 100644 (file)
@@ -44,7 +44,7 @@
 #  
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
-#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
+#  BASE =        [ intel mach medium config_dtrace vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
 #  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo ffs union cd9660 config_volfs ]
 #  NETWORKING =  [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert netat ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk config_mbuf_jumbo ]
 #  NFS =         [ nfsclient nfsserver ]
@@ -53,7 +53,7 @@
 #  PROFILE =     [ RELEASE profile ]
 #  DEBUG =       [ BASE NETWORKING NFS VPN FILESYS libdriver_g debug xpr_debug mach_assert ]
 #
-#  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
+#  EMBEDDED_BASE =     [ intel mach bsmall vol pst gdb kernobjc fixpri simple_clock mdebug kernserv driverkit uxpr kernstack ipc_compat ipc_debug compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
 #  EMBEDDED_FILESYS =  [ devfs hfs journaling fdesc fifo ]
 #  EMBEDDED_NET =      [ inet compat_oldsock mrouting tcpdrop_synfin bpfilter config_mbuf_noexpand ]
 #  EMBEDDED =          [ EMBEDDED_BASE EMBEDDED_NET VPN EMBEDDED_FILESYS libdriver no_printf_str no_kprintf_str no_kdebug ]
@@ -92,6 +92,12 @@ options              CONFIG_MACF_SOCKET_SUBSET       # MAC socket subest (no labels)
 # app-profiling i.e. pre-heating - off? 
 options   CONFIG_APP_PROFILE=0  
 
+#
+# code decryption... used on i386 for DSMOS
+# must be set in all the bsd/conf and osfmk/conf MASTER files
+#
+options                CONFIG_CODE_DECRYPTION
+
 #
 #  Ipl measurement system
 #
index 9f4a08d6d25bd7feb923383a51d2dcf5febffd72..b669841409dc9597e1e48144861ab0857c51503a 100644 (file)
@@ -45,7 +45,7 @@
 #  Standard Apple Research Configurations:
 #  -------- ----- -------- ---------------
 #
-#  BASE =        [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot ]
+#  BASE =        [ ppc mach medium config_dtrace vol pst gdb noprofiling simple_clock kernstack compat_43_tty sysv_sem sysv_msg sysv_shm audit panic_info config_imageboot config_workqueue ]
 #  FILESYS =    [ devfs revfs hfs journaling fdesc config_fse quota namedstreams fifo ffs union cd9660 config_volfs ]
 #  NETWORKING =  [ inet inet6 compat_oldsock mrouting tcpdrop_synfin bpfilter ipdivert netat ipfirewall ipv6firewall ipfw2 dummynet traffic_mgt sendfile netmibs bond vlan gif stf zlib randomipid ifnet_input_chk ]
 #  NFS =         [ nfsclient nfsserver ]
index 4f927bcbac2a21541d801de980fbee6b99669162..0a7cfa9aeb62f5df9bd1ffd13a9507341624b0cd 100644 (file)
@@ -12,6 +12,7 @@
 OPTIONS/hw_ast                         optional hw_ast
 OPTIONS/hw_footprint                   optional hw_footprint
 OPTIONS/kernserv                       optional kernserv
+OPTIONS/config_ip_edgehole     optional config_ip_edgehole
 OPTIONS/config_macf                    optional config_macf
 OPTIONS/config_macf_socket_subset      optional config_macf_socket_subset
 OPTIONS/config_macf_socket             optional config_macf_socket
@@ -262,6 +263,7 @@ bsd/netinet/ip_id.c                         optional randomipid inet
 bsd/netinet/ip_input.c                 optional inet
 bsd/netinet/ip_mroute.c                        optional mrouting
 bsd/netinet/ip_output.c                        optional inet
+bsd/netinet/ip_edgehole.c              optional config_ip_edgehole
 bsd/netinet/raw_ip.c                   optional inet
 bsd/netinet/tcp_debug.c                        optional tcpdebug
 bsd/netinet/tcp_input.c                        optional inet
index c28ae6f0b7cc32f092e83ef615ae6df2c608890e..eebffddbbe8321269119e4502c98b2433edaf5f9 100644 (file)
@@ -16549,7 +16549,7 @@ dtrace_init( void )
                 * XXX Warn if state is LAZY_OFF? It won't break anything, but
                 * makes no sense...
                 */
-               if (!PE_parse_boot_arg("dtrace_dof_mode", &dtrace_dof_mode)) {
+               if (!PE_parse_boot_argn("dtrace_dof_mode", &dtrace_dof_mode, sizeof (dtrace_dof_mode))) {
                        dtrace_dof_mode = DTRACE_DOF_MODE_LAZY_ON;
                }
 
index f1b85f42f85d8ae9b25617cc91382ae6addfd966..311e4d6cff35beba4567e76e99b4b88a6bab37cf 100644 (file)
@@ -421,7 +421,7 @@ void
 fbt_init( void )
 {
 
-       PE_parse_boot_arg("DisableFBT", &gDisableFBT);
+       PE_parse_boot_argn("DisableFBT", &gDisableFBT, sizeof (gDisableFBT));
 
        if (0 == gDisableFBT)
        {
index 219224f80657d6108fddffba1ba0346e78a766ec..b2b021280b2e76e4d2e2ea983a30036331d36ea5 100644 (file)
@@ -142,7 +142,6 @@ static const char * critical_blacklist[] =
        "pmap_cpu_high_map_vaddr", 
        "pmap_cpu_high_shared_remap", 
        "pmap_cpu_init",
-       "rdHPET",
        "register_cpu_setup_func",
        "unregister_cpu_setup_func"
 };
@@ -405,7 +404,7 @@ __fbt_provide_module(void *arg, struct modctl *ctl)
        unsigned int i, j;
 
        int gIgnoreFBTBlacklist = 0;
-    PE_parse_boot_arg("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist);
+       PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
 
        mh = (struct mach_header *)(ctl->address);
        modname = ctl->mod_modname;
@@ -527,17 +526,12 @@ __fbt_provide_module(void *arg, struct modctl *ctl)
                 */
                if (strstr(name, "machine_stack_") == name ||
                        strstr(name, "mapping_") == name ||
-                       strstr(name, "hpet_") == name ||
-
-                       0 == strcmp(name, "rdHPET") ||
-                       0 == strcmp(name, "HPETInterrupt") ||
                        0 == strcmp(name, "tmrCvt") ||
 
                        strstr(name, "tsc_") == name ||
 
                        strstr(name, "pmCPU") == name ||
                        0 == strcmp(name, "Cstate_table_set") ||
-                       0 == strcmp(name, "pmHPETInterrupt") ||
                        0 == strcmp(name, "pmKextRegister") ||
                        0 == strcmp(name, "pmSafeMode") ||
                        0 == strcmp(name, "pmUnregister") ||
index c7b04d2965a2b43c4a684ebe7d7c40a1ff7c6505..2637c26545866835692453b25d42a607b9d61cf7 100644 (file)
@@ -31,6 +31,7 @@
 #include <sys/kernel.h>
 #include <sys/sysctl.h>
 #include <i386/cpuid.h>
+#include <i386/tsc.h>
 
 static int
 hw_cpu_sysctl SYSCTL_HANDLER_ARGS
@@ -166,6 +167,12 @@ SYSCTL_PROC(_machdep_cpu, OID_AUTO, cores_per_package,
            sizeof(uint32_t),
            hw_cpu_sysctl, "I", "CPU cores per package");
 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, microcode_version,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(i386_cpu_info_t, cpuid_microcode_version),
+           sizeof(uint32_t),
+           hw_cpu_sysctl, "I", "Microcode version number");
+
 
 SYSCTL_NODE(_machdep_cpu, OID_AUTO, mwait, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
        "mwait");
@@ -291,6 +298,34 @@ SYSCTL_PROC(_machdep_cpu_cache, OID_AUTO, size,
            hw_cpu_sysctl, "I", "Cache size (in Kbytes)");
 
 
+SYSCTL_NODE(_machdep_cpu, OID_AUTO, tlb, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
+       "tlb");
+
+SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, inst_small,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(i386_cpu_info_t, cpuid_itlb_small),
+           sizeof(uint32_t),
+           hw_cpu_sysctl, "I", "Number of small page instruction TLBs");
+
+SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, data_small,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(i386_cpu_info_t, cpuid_dtlb_small),
+           sizeof(uint32_t),
+           hw_cpu_sysctl, "I", "Number of small page data TLBs");
+
+SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, inst_large,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(i386_cpu_info_t, cpuid_itlb_large),
+           sizeof(uint32_t),
+           hw_cpu_sysctl, "I", "Number of large page instruction TLBs");
+
+SYSCTL_PROC(_machdep_cpu_tlb, OID_AUTO, data_large,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(i386_cpu_info_t, cpuid_dtlb_large),
+           sizeof(uint32_t),
+           hw_cpu_sysctl, "I", "Number of large page data TLBs");
+
+
 SYSCTL_NODE(_machdep_cpu, OID_AUTO, address_bits, CTLFLAG_RW|CTLFLAG_LOCKED, 0,
        "address_bits");
 
@@ -306,6 +341,19 @@ SYSCTL_PROC(_machdep_cpu_address_bits, OID_AUTO, virtual,
            sizeof(uint32_t),
            hw_cpu_sysctl, "I", "Number of virtual address bits");
 
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, core_count,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(i386_cpu_info_t, core_count),
+           sizeof(uint32_t),
+           hw_cpu_sysctl, "I", "Number of enabled cores per package");
+
+SYSCTL_PROC(_machdep_cpu, OID_AUTO, thread_count,
+           CTLTYPE_INT | CTLFLAG_RD, 
+           (void *)offsetof(i386_cpu_info_t, thread_count),
+           sizeof(uint32_t),
+           hw_cpu_sysctl, "I", "Number of enabled threads per package");
+
+
 uint64_t pmap_pv_hashlist_walks;
 uint64_t pmap_pv_hashlist_cnts;
 uint32_t pmap_pv_hashlist_max;
index fa6db0a3d5f9d64ff5dc92ea1321e01514b8c6b4..6b24ccf6cc8b7e7fa87447c11bcbe648e1d17804 100644 (file)
@@ -61,7 +61,6 @@ extern void dtrace_systrace_syscall_return(unsigned short, int, int *);
 extern void unix_syscall(x86_saved_state_t *);
 extern void unix_syscall64(x86_saved_state_t *);
 extern void *find_user_regs(thread_t);
-extern void throttle_lowpri_io(int *lowpri_window, mount_t v_mount);
 
 extern void x86_toggle_sysenter_arg_store(thread_t thread, boolean_t valid);
 extern boolean_t x86_sysenter_arg_store_isvalid(thread_t thread);
@@ -223,7 +222,7 @@ unix_syscall(x86_saved_state_t *state)
         */
        syscall_exit_funnelcheck();
 #endif /* DEBUG */
-       if (uthread->uu_lowpri_window && uthread->v_mount) {
+       if (uthread->uu_lowpri_window) {
                /*
                 * task is marked as a low priority I/O type
                 * and the I/O we issued while in this system call
@@ -231,7 +230,7 @@ unix_syscall(x86_saved_state_t *state)
                 * delay in order to mitigate the impact of this
                 * task on the normal operation of the system
                 */
-               throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+               throttle_lowpri_io(TRUE);
        }
        if (code != 180)
                KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
@@ -398,7 +397,7 @@ unsafe:
         */
        syscall_exit_funnelcheck();
 
-       if (uthread->uu_lowpri_window && uthread->v_mount) {
+       if (uthread->uu_lowpri_window) {
                /*
                 * task is marked as a low priority I/O type
                 * and the I/O we issued while in this system call
@@ -406,7 +405,7 @@ unsafe:
                 * delay in order to mitigate the impact of this
                 * task on the normal operation of the system
                 */
-               throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+               throttle_lowpri_io(TRUE);
        }
        if (code != 180)
                KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
@@ -533,7 +532,7 @@ unix_syscall_return(int error)
         */
        syscall_exit_funnelcheck();
 
-       if (uthread->uu_lowpri_window && uthread->v_mount) {
+       if (uthread->uu_lowpri_window) {
                /*
                 * task is marked as a low priority I/O type
                 * and the I/O we issued while in this system call
@@ -541,7 +540,7 @@ unix_syscall_return(int error)
                 * delay in order to mitigate the impact of this
                 * task on the normal operation of the system
                 */
-               throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+               throttle_lowpri_io(TRUE);
        }
        if (code != 180)
                KERNEL_DEBUG_CONSTANT(BSDDBG_CODE(DBG_BSD_EXCP_SC, code) | DBG_FUNC_END,
index f957be33c52a71e9f8109403b51827479d6fb488..5b4e005e7f57191bdf4e3437e69835f5a9c3c140 100644 (file)
@@ -108,9 +108,11 @@ static strategy_fcn_t      mdevstrategy;
 static int                             mdevbioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p);
 static int                             mdevcioctl(dev_t dev, u_long cmd, caddr_t data, int flag, struct proc *p);
 static int                             mdevrw(dev_t dev, struct uio *uio, int ioflag);
+#ifdef CONFIG_MEMDEV_INSECURE
 static char *                  nonspace(char *pos, char *end);
 static char *                  getspace(char *pos, char *end);
 static char *                  cvtnum(char *pos, char *end, unsigned int *num);
+#endif /* CONFIG_MEMDEV_INSECURE */
 
 extern void            bcopy_phys(addr64_t from, addr64_t to, vm_size_t bytes);
 extern void            mapping_set_mod(ppnum_t pn);
@@ -428,13 +430,14 @@ static    int mdevsize(dev_t dev) {
 
 void mdevinit(__unused int the_cnt) {
 
+#ifdef CONFIG_MEMDEV_INSECURE
+       
        int devid, phys;
        ppnum_t base;
        unsigned int size;
        char *ba, *lp;
        dev_t dev;
        
-       
        ba = PE_boot_args();                                                            /* Get the boot arguments */
        lp = ba + 256;                                                                          /* Point to the end */
                
@@ -471,11 +474,13 @@ void mdevinit(__unused int the_cnt) {
                
                dev = mdevadd(devid, base >> 12, size >> 12, phys);     /* Go add the device */ 
        }
-
+       
+#endif /* CONFIG_MEMDEV_INSECURE */
        return;
 
 }
 
+#ifdef CONFIG_MEMDEV_INSECURE
 char *nonspace(char *pos, char *end) {                                 /* Find next non-space in string */
 
        if(pos >= end) return end;                                                      /* Don't go past end */
@@ -529,6 +534,7 @@ char *cvtnum(char *pos, char *end, unsigned int *num) {             /* Convert to a number
                pos++;                                                                                  /* Step on */
        }
 }
+#endif /* CONFIG_MEMDEV_INSECURE */
 
 dev_t mdevadd(int devid, ppnum_t base, unsigned int size, int phys) {
        
index e2ffc421620e176e4193760e71e1bf8b3abea370..5ee9cea6a51886f3c4a5163034199918decaa3f3 100644 (file)
@@ -333,8 +333,8 @@ __fbt_provide_module(void *arg, struct modctl *ctl)
        char                                            *modname;
        unsigned int i;
 
-    int gIgnoreFBTBlacklist = 0;
-       PE_parse_boot_arg("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist);
+       int gIgnoreFBTBlacklist = 0;
+       PE_parse_boot_argn("IgnoreFBTBlacklist", &gIgnoreFBTBlacklist, sizeof (gIgnoreFBTBlacklist));
 
        mh = (struct mach_header *)(ctl->address);
        modname = ctl->mod_modname;
index 11f9eb9c43fe84de3e94151f88d138eb44389110..8cc3ca4b86e774d1895db770cb7b58c347624187 100644 (file)
@@ -69,7 +69,6 @@ find_user_regs(
        thread_t act);
 
 extern lck_spin_t * tz_slock;
-extern void throttle_lowpri_io(int *lowpri_window, mount_t v_mount);
 
 /*
  * Function:   unix_syscall
@@ -261,7 +260,7 @@ unsafe:
        /* panic if funnel is held */
        syscall_exit_funnelcheck();
 
-       if (uthread->uu_lowpri_window && uthread->v_mount) {
+       if (uthread->uu_lowpri_window) {
                /*
                 * task is marked as a low priority I/O type
                 * and the I/O we issued while in this system call
@@ -269,7 +268,7 @@ unsafe:
                 * delay in order to mitigate the impact of this
                 * task on the normal operation of the system
                 */
-               throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+               throttle_lowpri_io(TRUE);
        }
        if (kdebug_enable && (code != 180)) {
 
@@ -373,7 +372,7 @@ unix_syscall_return(int error)
        /* panic if funnel is held */
        syscall_exit_funnelcheck();
 
-       if (uthread->uu_lowpri_window && uthread->v_mount) {
+       if (uthread->uu_lowpri_window) {
                /*
                 * task is marked as a low priority I/O type
                 * and the I/O we issued while in this system call
@@ -381,7 +380,7 @@ unix_syscall_return(int error)
                 * delay in order to mitigate the impact of this
                 * task on the normal operation of the system
                 */
-               throttle_lowpri_io(&uthread->uu_lowpri_window,uthread->v_mount);
+               throttle_lowpri_io(TRUE);
        }
        if (kdebug_enable && (code != 180)) {
                if (callp->sy_return_type == _SYSCALL_RET_SSIZE_T)
index d3df771967f28895321f6bfc8bda0071bdf1a077..d2dd20b11e1997d6b22dd74824492db65ea36a3f 100644 (file)
@@ -145,6 +145,11 @@ bsd_startupearly(void)
 
 #if SOCKETS
        {
+#if CONFIG_USESOCKTHRESHOLD
+               static const unsigned int       maxspace = 64 * 1024;
+#else
+               static const unsigned int       maxspace = 128 * 1024;
+#endif
                int             scale;
 
                nmbclusters = bsd_mbuf_cluster_reserve() / MCLBYTES;
@@ -154,10 +159,10 @@ bsd_startupearly(void)
                        tcp_sendspace *= scale;
                        tcp_recvspace *= scale;
 
-                       if (tcp_sendspace > (64 * 1024))
-                               tcp_sendspace = 64 * 1024;
-                       if (tcp_recvspace > (64 * 1024))
-                               tcp_recvspace = 64 * 1024;
+                       if (tcp_sendspace > maxspace)
+                               tcp_sendspace = maxspace;
+                       if (tcp_recvspace > maxspace)
+                               tcp_recvspace = maxspace;
                }
 #endif /* INET || INET6 */
        }
index 5568cf9c50b3cf89f7c0b31c5bf44cfd6adefb64..cfed9e65dee611b3cf59128ed2d541696c5ae135 100644 (file)
@@ -124,9 +124,11 @@ typedef struct hfsmount {
        u_int32_t     hfs_flags;              /* see below */
 
        /* Physical Description */
-       u_long        hfs_phys_block_size;    /* Always a multiple of 512 */
-       daddr64_t     hfs_phys_block_count;   /* Num of PHYSICAL blocks of volume */
-       daddr64_t     hfs_alt_id_sector;      /* location of alternate VH/MDB */
+       u_int32_t     hfs_logical_block_size;   /* Logical block size of the disk as reported by ioctl(DKIOCGETBLOCKSIZE), always a multiple of 512 */
+       daddr64_t     hfs_logical_block_count;  /* Number of logical blocks on the disk */
+       daddr64_t     hfs_alt_id_sector;        /* location of alternate VH/MDB */
+       u_int32_t     hfs_physical_block_size;  /* Physical block size of the disk as reported by ioctl(DKIOCGETPHYSICALBLOCKSIZE) */ 
+       u_int32_t     hfs_log_per_phys;         /* Number of logical blocks per physical block size */
 
        /* Access to VFS and devices */
        struct mount            *hfs_mp;                                /* filesystem vfs structure */
@@ -337,6 +339,8 @@ enum privdirtype {FILE_HARDLINKS, DIR_HARDLINKS};
 #define        HFS_FOLDERCOUNT           0x10000
 /* When set, the file system exists on a virtual device, like disk image */
 #define HFS_VIRTUAL_DEVICE        0x20000
+/* When set, we're in hfs_changefs, so hfs_sync should do nothing. */
+#define HFS_IN_CHANGEFS           0x40000
 
 
 /* Macro to update next allocation block in the HFS mount structure.  If 
@@ -457,6 +461,10 @@ enum { kHFSPlusMaxFileNameBytes = kHFSPlusMaxFileNameChars * 3 };
 #define HFS_ALT_SECTOR(blksize, blkcnt)  (((blkcnt) - 1) - (512 / (blksize)))
 #define HFS_ALT_OFFSET(blksize)          ((blksize) > 1024 ? (blksize) - 1024 : 0)
 
+/* Convert the logical sector number to be aligned on physical block size boundary.  
+ * We are assuming the partition is a multiple of physical block size.
+ */
+#define HFS_PHYSBLK_ROUNDDOWN(sector_num, log_per_phys)        ((sector_num / log_per_phys) * log_per_phys)
 
 /*
  * HFS specific fcntl()'s
index 04db5d2903b7b5ad836d0c00e270c4da423d79e7..6f190bce588ce2ba58bdb144ecc08410bdf23693 100644 (file)
@@ -196,6 +196,7 @@ cat_convertattr(
        } else {
                /* Convert the data fork. */
                datafp->cf_size = recp->hfsPlusFile.dataFork.logicalSize;
+               datafp->cf_new_size = 0;
                datafp->cf_blocks = recp->hfsPlusFile.dataFork.totalBlocks;
                if ((hfsmp->hfc_stage == HFC_RECORDING) &&
                    (attrp->ca_atime >= hfsmp->hfc_timebase)) {
@@ -211,6 +212,7 @@ cat_convertattr(
 
                /* Convert the resource fork. */
                rsrcfp->cf_size = recp->hfsPlusFile.resourceFork.logicalSize;
+               rsrcfp->cf_new_size = 0;
                rsrcfp->cf_blocks = recp->hfsPlusFile.resourceFork.totalBlocks;
                if ((hfsmp->hfc_stage == HFC_RECORDING) &&
                    (attrp->ca_atime >= hfsmp->hfc_timebase)) {
@@ -686,6 +688,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files
                } else if (wantrsrc) {
                        /* Convert the resource fork. */
                        forkp->cf_size = recp->hfsPlusFile.resourceFork.logicalSize;
+                       forkp->cf_new_size = 0;
                        forkp->cf_blocks = recp->hfsPlusFile.resourceFork.totalBlocks;
                        if ((hfsmp->hfc_stage == HFC_RECORDING) &&
                            (to_bsd_time(recp->hfsPlusFile.accessDate) >= hfsmp->hfc_timebase)) {
@@ -704,6 +707,7 @@ cat_lookupbykey(struct hfsmount *hfsmp, CatalogKey *keyp, int allow_system_files
 
                        /* Convert the data fork. */
                        forkp->cf_size = recp->hfsPlusFile.dataFork.logicalSize;
+                       forkp->cf_new_size = 0;
                        forkp->cf_blocks = recp->hfsPlusFile.dataFork.totalBlocks;
                        if ((hfsmp->hfc_stage == HFC_RECORDING) &&
                            (to_bsd_time(recp->hfsPlusFile.accessDate) >= hfsmp->hfc_timebase)) {
@@ -2177,7 +2181,7 @@ cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalog
 
        blksize = hfsmp->blockSize;
        blkcount = howmany(kHFSAliasSize, blksize);
-       sectorsize = hfsmp->hfs_phys_block_size;
+       sectorsize = hfsmp->hfs_logical_block_size;
        bzero(rsrcforkp, sizeof(HFSPlusForkData));
 
        /* Allocate some disk space for the alias content. */
@@ -2193,7 +2197,7 @@ cat_makealias(struct hfsmount *hfsmp, u_int32_t inode_num, struct HFSPlusCatalog
        blkno = ((u_int64_t)rsrcforkp->extents[0].startBlock * (u_int64_t)blksize) / sectorsize;
        blkno += hfsmp->hfsPlusIOPosOffset / sectorsize;
 
-       bp = buf_getblk(hfsmp->hfs_devvp, blkno, roundup(kHFSAliasSize, hfsmp->hfs_phys_block_size), 0, 0, BLK_META);
+       bp = buf_getblk(hfsmp->hfs_devvp, blkno, roundup(kHFSAliasSize, hfsmp->hfs_logical_block_size), 0, 0, BLK_META);
        if (hfsmp->jnl) {
                journal_modify_block_start(hfsmp->jnl, bp);
        }
index 03c36567b894b7f634b196bf7cf7abceb081d4d0..0c511ff67132e03feade5e8bcad047dc4ef8a8ca 100644 (file)
@@ -113,9 +113,15 @@ struct cat_attr {
  * Catalog Node Fork (runtime)
  *
  * NOTE: this is not the same as a struct HFSPlusForkData
+ *
+ * NOTE: if cf_new_size > cf_size, then a write is in progress and is extending
+ * the EOF; the new EOF will be cf_new_size.  Writes and pageouts may validly
+ * write up to cf_new_size, but reads should only read up to cf_size.  When
+ * an extending write is not in progress, cf_new_size is zero.
  */
 struct cat_fork {
        off_t          cf_size;        /* fork's logical size in bytes */
+       off_t          cf_new_size;    /* fork's logical size after write completes */
        union {
            u_int32_t  cfu_clump;      /* fork's clump size in bytes (sys files only) */
            u_int64_t  cfu_bytesread;  /* bytes read from this fork */
index 7ff95e5933ba165a5329dd2b5591cbcb22453d27..b0ba0f1aefdb1e3032fa6efde9cd9510d2f56b45 100644 (file)
@@ -127,7 +127,7 @@ hfs_vnop_inactive(struct vnop_inactive_args *ap)
         */
        if (v_type == VDIR) {
                hfs_reldirhints(cp, 0);
-       }               
+       }
        
        if (cp->c_flag & C_HARDLINK) {
                hfs_relorigins(cp);
@@ -474,10 +474,11 @@ hfs_vnop_reclaim(struct vnop_reclaim_args *ap)
                if (vnode_isdir(vp)) {
                        hfs_reldirhints(cp, 0);
                }
-               
+       
                if (cp->c_flag & C_HARDLINK) {
                        hfs_relorigins(cp);
                }
+
        }
        /* Release the file fork and related data */
        if (fp) {
index 7a5b13601a0a1c3ffe91a2405329f40b6f5cd3f7..c4a930d1621490f6fef17ed6ea126c68a3cfe064 100644 (file)
@@ -61,6 +61,7 @@ typedef struct filefork filefork_t;
 
 /* Aliases for common fields */
 #define ff_size          ff_data.cf_size
+#define ff_new_size      ff_data.cf_new_size
 #define ff_clumpsize     ff_data.cf_clump
 #define ff_bytesread     ff_data.cf_bytesread
 #define ff_blocks        ff_data.cf_blocks
index e5775bfbc638d550b7e203df2e3510a9c9de2fc7..db0f489d53a3c89dbd272d988bbc86814723aa69 100644 (file)
@@ -116,7 +116,9 @@ hfs_swap_BTNode (
        /*
         * When first opening a BTree, we have to read the header node before the
         * control block is initialized.  In this case, totalNodes will be zero,
-        * so skip the bounds checking.
+        * so skip the bounds checking. Also, we should ignore the header node when
+                * checking for invalid forwards and backwards links, since the header node's
+                * links can point back to itself legitimately.
         */
        if (btcb->totalNodes != 0) {
                        if (srcDesc->fLink >= btcb->totalNodes) {
@@ -129,6 +131,20 @@ hfs_swap_BTNode (
                                error = fsBTInvalidHeaderErr;
                                goto fail;
                        }
+                       
+                       if ((src->blockNum != 0) && (srcDesc->fLink == (u_int32_t) src->blockNum)) {
+                               printf("hfs_swap_BTNode: invalid forward link (0x%08x == 0x%08x)\n",
+                                               srcDesc->fLink, (u_int32_t) src->blockNum);
+                               error = fsBTInvalidHeaderErr;
+                               goto fail;
+                       }
+                       if ((src->blockNum != 0) && (srcDesc->bLink == (u_int32_t) src->blockNum)) {
+                               printf("hfs_swap_BTNode: invalid backward link (0x%08x == 0x%08x)\n",
+                                               srcDesc->bLink, (u_int32_t) src->blockNum);
+                               error = fsBTInvalidHeaderErr;
+                               goto fail;
+                       }
+
                }
                
                /* 
@@ -254,17 +270,34 @@ hfs_swap_BTNode (
     if (direction == kSwapBTNodeHostToBig) {
                /*
                 * Sanity check and swap the forward and backward links.
+                * Ignore the header node since its forward and backwards links can legitimately
+                * point to itself.
                 */
                if (srcDesc->fLink >= btcb->totalNodes) {
                        panic("hfs_UNswap_BTNode: invalid forward link (0x%08X)\n", srcDesc->fLink);
                        error = fsBTInvalidHeaderErr;
                        goto fail;
                }
+               if ((src->blockNum != 0) && (srcDesc->fLink == (u_int32_t) src->blockNum)) {
+                       panic ("hfs_UNswap_BTNode: invalid forward link (0x%08x == 0x%08x)\n", 
+                                       srcDesc->fLink, (u_int32_t) src->blockNum);
+                       error = fsBTInvalidHeaderErr;
+                       goto fail;
+               }
+               
                if (srcDesc->bLink >= btcb->totalNodes) {
                        panic("hfs_UNswap_BTNode: invalid backward link (0x%08X)\n", srcDesc->bLink);
                        error = fsBTInvalidHeaderErr;
                        goto fail;
                }
+               if ((src->blockNum != 0) && (srcDesc->bLink == (u_int32_t) src->blockNum)) {
+                       panic ("hfs_UNswap_BTNode: invalid backward link (0x%08x == 0x%08x)\n", 
+                                       srcDesc->bLink, (u_int32_t) src->blockNum);
+                       error = fsBTInvalidHeaderErr;
+                       goto fail;
+               }
+
+
         srcDesc->fLink         = SWAP_BE32 (srcDesc->fLink);
         srcDesc->bLink         = SWAP_BE32 (srcDesc->bLink);
     
index 6009fb787c705d88504dde14a346eae197debbc9..1e94b8fb8fa54331afa6a7f95f399eaf52f40d09 100644 (file)
@@ -352,9 +352,11 @@ found:
                         * process removed the object before we had a chance
                         * to create the vnode, then just treat it as the not
                         * found case above and return EJUSTRETURN.
+                        * We should do the same for the RENAME operation since we are
+                        * going to write it in regardless.
                         */
                        if ((retval == ENOENT) &&
-                           (cnp->cn_nameiop == CREATE) &&
+                           ((cnp->cn_nameiop == CREATE) || (cnp->cn_nameiop == RENAME)) &&
                            (flags & ISLASTCN)) {
                                retval = EJUSTRETURN;
                        }
index 958ca6e3a09a081c0e80d9082cd95abaa9bd56c1..1e836052f75bf15a6e9d6abd8dbc3c34d27a6d40 100644 (file)
@@ -479,20 +479,51 @@ sizeok:
 
                hfs_unlock(cp);
                cnode_locked = 0;
+               
+               /*
+                * We need to tell UBC the fork's new size BEFORE calling
+                * cluster_write, in case any of the new pages need to be
+                * paged out before cluster_write completes (which does happen
+                * in embedded systems due to extreme memory pressure).
+                * Similarly, we need to tell hfs_vnop_pageout what the new EOF
+                * will be, so that it can pass that on to cluster_pageout, and
+                * allow those pageouts.
+                *
+                * We don't update ff_size yet since we don't want pageins to
+                * be able to see uninitialized data between the old and new
+                * EOF, until cluster_write has completed and initialized that
+                * part of the file.
+                *
+                * The vnode pager relies on the file size last given to UBC via
+                * ubc_setsize.  hfs_vnop_pageout relies on fp->ff_new_size or
+                * ff_size (whichever is larger).  NOTE: ff_new_size is always
+                * zero, unless we are extending the file via write.
+                */
+               if (filesize > fp->ff_size) {
+                       fp->ff_new_size = filesize;
+                       ubc_setsize(vp, filesize);
+               }
                retval = cluster_write(vp, uio, fp->ff_size, filesize, zero_off,
                                tail_off, lflag | IO_NOZERODIRTY);
                if (retval) {
+                       fp->ff_new_size = 0;    /* no longer extending; use ff_size */
+                       if (filesize > origFileSize) {
+                               ubc_setsize(vp, origFileSize);
+                       }
                        goto ioerr_exit;
                }
-               offset = uio_offset(uio);
-               if (offset > fp->ff_size) {
-                       fp->ff_size = offset;
-
-                       ubc_setsize(vp, fp->ff_size);       /* XXX check errors */
+               
+               if (filesize > origFileSize) {
+                       fp->ff_size = filesize;
+                       
                        /* Files that are changing size are not hot file candidates. */
-                       if (hfsmp->hfc_stage == HFC_RECORDING)
+                       if (hfsmp->hfc_stage == HFC_RECORDING) {
                                fp->ff_bytesread = 0;
+                       }
                }
+               fp->ff_new_size = 0;    /* ff_size now has the correct size */
+               
+               /* If we wrote some bytes, then touch the change and mod times */
                if (resid > uio_resid(uio)) {
                        cp->c_touch_chgtime = TRUE;
                        cp->c_touch_modtime = TRUE;
@@ -2947,9 +2978,17 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
        cp = VTOC(vp);
        fp = VTOF(vp);
        
-       if (vnode_isswap(vp)) {
-               filesize = fp->ff_size;
-       } else {
+       /*
+        * Figure out where the file ends, for pageout purposes.  If
+        * ff_new_size > ff_size, then we're in the middle of extending the
+        * file via a write, so it is safe (and necessary) that we be able
+        * to pageout up to that point.
+        */
+       filesize = fp->ff_size;
+       if (fp->ff_new_size > filesize)
+               filesize = fp->ff_new_size;
+       
+       if (!vnode_isswap(vp)) {
                off_t end_of_range;
                int tooklock = 0;
 
@@ -2966,7 +3005,6 @@ hfs_vnop_pageout(struct vnop_pageout_args *ap)
                    tooklock = 1;
                }
        
-               filesize = fp->ff_size;
                end_of_range = ap->a_f_offset + ap->a_size - 1;
        
                if (end_of_range >= filesize) {
@@ -3219,7 +3257,7 @@ hfs_relocate(struct  vnode *vp, u_int32_t  blockHint, kauth_cred_t cred,
                        retval = ENOSPC;
                        goto restore;
                } else if ((eflags & kEFMetadataMask) &&
-                          ((((u_int64_t)sector_b * hfsmp->hfs_phys_block_size) / blksize) >
+                          ((((u_int64_t)sector_b * hfsmp->hfs_logical_block_size) / blksize) >
                              hfsmp->hfs_metazone_end)) {
                        const char * filestr;
                        char emptystr = '\0';
index 7b67b6686ddbac8b27fc33c0b55cb3509814208d..8eac4e20e796cea10527b6cb2eefe63c120e310b 100644 (file)
@@ -231,8 +231,11 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                                
                        if ((retval = hfs_flushfiles(mp, flags, p)))
                                goto out;
-                       hfsmp->hfs_flags |= HFS_READ_ONLY;
+
+                       /* mark the volume cleanly unmounted */
+                       hfsmp->vcbAtrb |= kHFSVolumeUnmountedMask;
                        retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+                       hfsmp->hfs_flags |= HFS_READ_ONLY;
 
                        /* also get the volume bitmap blocks */
                        if (!retval) {
@@ -275,11 +278,6 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                                goto out;
                        }
 
-
-                       retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
-                       if (retval != E_NONE)
-                               goto out;
-
                        // If the journal was shut-down previously because we were
                        // asked to be read-only, let's start it back up again now
                        
@@ -300,7 +298,7 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                                                      (hfsmp->jnl_start * HFSTOVCB(hfsmp)->blockSize) + (off_t)HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
                                                      hfsmp->jnl_size,
                                                      hfsmp->hfs_devvp,
-                                                     hfsmp->hfs_phys_block_size,
+                                                     hfsmp->hfs_logical_block_size,
                                                      jflags,
                                                      0,
                                                      hfs_sync_metadata, hfsmp->hfs_mp);
@@ -319,7 +317,14 @@ hfs_mount(struct mount *mp, vnode_t devvp, user_addr_t data, vfs_context_t conte
                        /* Only clear HFS_READ_ONLY after a successfull write */
                        hfsmp->hfs_flags &= ~HFS_READ_ONLY;
 
-                       if (!(hfsmp->hfs_flags & (HFS_READ_ONLY & HFS_STANDARD))) {
+                       /* mark the volume dirty (clear clean unmount bit) */
+                       hfsmp->vcbAtrb &= ~kHFSVolumeUnmountedMask;
+
+                       retval = hfs_flushvolumeheader(hfsmp, MNT_WAIT, 0);
+                       if (retval != E_NONE)
+                               goto out;
+
+                       if (!(hfsmp->hfs_flags & (HFS_READ_ONLY | HFS_STANDARD))) {
                                /* Setup private/hidden directories for hardlinks. */
                                hfs_privatedir_init(hfsmp, FILE_HARDLINKS);
                                hfs_privatedir_init(hfsmp, DIR_HARDLINKS);
@@ -439,6 +444,8 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
        vcb = HFSTOVCB(hfsmp);
        mount_flags = (unsigned int)vfs_flags(mp);
 
+       hfsmp->hfs_flags |= HFS_IN_CHANGEFS;
+       
        permswitch = (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) &&
                       ((mount_flags & MNT_UNKNOWNPERMISSIONS) == 0)) ||
                      (((hfsmp->hfs_flags & HFS_UNKNOWN_PERMS) == 0) &&
@@ -447,7 +454,8 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
        /* The root filesystem must operate with actual permissions: */
        if (permswitch && (mount_flags & MNT_ROOTFS) && (mount_flags & MNT_UNKNOWNPERMISSIONS)) {
                vfs_clearflags(mp, (u_int64_t)((unsigned int)MNT_UNKNOWNPERMISSIONS));  /* Just say "No". */
-               return EINVAL;
+               retval = EINVAL;
+               goto exit;
        }
        if (mount_flags & MNT_UNKNOWNPERMISSIONS)
                hfsmp->hfs_flags |= HFS_UNKNOWN_PERMS;
@@ -555,6 +563,7 @@ hfs_changefs(struct mount *mp, struct hfs_mount_args *args)
                (void) hfs_relconverter(old_encoding);
        }
 exit:
+       hfsmp->hfs_flags &= ~HFS_IN_CHANGEFS;
        return (retval);
 }
 
@@ -626,7 +635,6 @@ hfs_reload(struct mount *mountp)
 {
        register struct vnode *devvp;
        struct buf *bp;
-       int sectorsize;
        int error, i;
        struct hfsmount *hfsmp;
        struct HFSPlusVolumeHeader *vhp;
@@ -634,6 +642,7 @@ hfs_reload(struct mount *mountp)
        struct filefork *forkp;
        struct cat_desc cndesc;
        struct hfs_reload_cargs args;
+       daddr64_t priIDSector;
 
        hfsmp = VFSTOHFS(mountp);
        vcb = HFSTOVCB(hfsmp);
@@ -665,18 +674,19 @@ hfs_reload(struct mount *mountp)
        /*
         * Re-read VolumeHeader from disk.
         */
-       sectorsize = hfsmp->hfs_phys_block_size;
+       priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) + 
+                       HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 
        error = (int)buf_meta_bread(hfsmp->hfs_devvp,
-                       (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) + HFS_PRI_SECTOR(sectorsize)),
-                       sectorsize, NOCRED, &bp);
+                       HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+                       hfsmp->hfs_physical_block_size, NOCRED, &bp);
        if (error) {
                if (bp != NULL)
                        buf_brelse(bp);
                return (error);
        }
 
-       vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
+       vhp = (HFSPlusVolumeHeader *) (buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 
        /* Do a quick sanity check */
        if ((SWAP_BE16(vhp->signature) != kHFSPlusSigWord &&
@@ -812,8 +822,9 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        int mntwrapper;
        kauth_cred_t cred;
        u_int64_t disksize;
-       daddr64_t blkcnt;
-       u_int32_t blksize;
+       daddr64_t log_blkcnt;
+       u_int32_t log_blksize;
+       u_int32_t phys_blksize;
        u_int32_t minblksize;
        u_int32_t iswritable;
        daddr64_t mdb_offset;
@@ -832,13 +843,25 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        /* Advisory locking should be handled at the VFS layer */
        vfs_setlocklocal(mp);
 
-       /* Get the real physical block size. */
-       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&blksize, 0, context)) {
+       /* Get the logical block size (treated as physical block size everywhere) */
+       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&log_blksize, 0, context)) {
                retval = ENXIO;
                goto error_exit;
        }
+       /* Get the physical block size. */
+       retval = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_blksize, 0, context);
+       if (retval) {
+               if ((retval != ENOTSUP) && (retval != ENOTTY)) {
+                       retval = ENXIO;
+                       goto error_exit;
+               }
+               /* If device does not support this ioctl, assume that physical 
+                * block size is same as logical block size 
+                */
+               phys_blksize = log_blksize;
+       }
        /* Switch to 512 byte sectors (temporarily) */
-       if (blksize > 512) {
+       if (log_blksize > 512) {
                u_int32_t size512 = 512;
 
                if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&size512, FWRITE, context)) {
@@ -847,15 +870,15 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                }
        }
        /* Get the number of 512 byte physical blocks. */
-       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) {
+       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
                /* resetting block size may fail if getting block count did */
-               (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context);
+               (void)VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context);
 
                retval = ENXIO;
                goto error_exit;
        }
        /* Compute an accurate disk size (i.e. within 512 bytes) */
-       disksize = (u_int64_t)blkcnt * (u_int64_t)512;
+       disksize = (u_int64_t)log_blkcnt * (u_int64_t)512;
 
        /*
         * On Tiger it is not necessary to switch the device 
@@ -863,18 +886,20 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
         * worth of blocks but to insure compatibility with
         * pre-Tiger systems we have to do it.
         */
-       if (blkcnt > 0x000000007fffffff) {
-               minblksize = blksize = 4096;
+       if (log_blkcnt > 0x000000007fffffff) {
+               minblksize = log_blksize = 4096;
+               if (phys_blksize < log_blksize)
+                       phys_blksize = log_blksize;
        }
        
        /* Now switch to our preferred physical block size. */
-       if (blksize > 512) {
-               if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) {
+       if (log_blksize > 512) {
+               if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
                        retval = ENXIO;
                        goto error_exit;
                }
                /* Get the count of physical blocks. */
-               if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) {
+               if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
                        retval = ENXIO;
                        goto error_exit;
                }
@@ -882,16 +907,18 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        /*
         * At this point:
         *   minblksize is the minimum physical block size
-        *   blksize has our preferred physical block size
-        *   blkcnt has the total number of physical blocks
+        *   log_blksize has our preferred physical block size
+        *   log_blkcnt has the total number of physical blocks
         */
 
-       mdb_offset = (daddr64_t)HFS_PRI_SECTOR(blksize);
-       if ((retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp))) {
+       mdb_offset = (daddr64_t)HFS_PRI_SECTOR(log_blksize);
+       if ((retval = (int)buf_meta_bread(devvp, 
+                               HFS_PHYSBLK_ROUNDDOWN(mdb_offset, (phys_blksize/log_blksize)), 
+                               phys_blksize, cred, &bp))) {
                goto error_exit;
        }
        MALLOC(mdbp, HFSMasterDirectoryBlock *, kMDBSize, M_TEMP, M_WAITOK);
-       bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, kMDBSize);
+       bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, kMDBSize);
        buf_brelse(bp);
        bp = NULL;
 
@@ -912,8 +939,10 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
        hfsmp->hfs_raw_dev = vnode_specrdev(devvp);
        hfsmp->hfs_devvp = devvp;
        vnode_ref(devvp);  /* Hold a ref on the device, dropped when hfsmp is freed. */
-       hfsmp->hfs_phys_block_size = blksize;
-       hfsmp->hfs_phys_block_count = blkcnt;
+       hfsmp->hfs_logical_block_size = log_blksize;
+       hfsmp->hfs_logical_block_count = log_blkcnt;
+       hfsmp->hfs_physical_block_size = phys_blksize;
+       hfsmp->hfs_log_per_phys = (phys_blksize / log_blksize);
        hfsmp->hfs_flags |= HFS_WRITEABLE_MEDIA;
        if (ronly)
                hfsmp->hfs_flags |= HFS_READ_ONLY;
@@ -983,18 +1012,18 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                        goto error_exit;
                }
                /* HFS disks can only use 512 byte physical blocks */
-               if (blksize > kHFSBlockSize) {
-                       blksize = kHFSBlockSize;
-                       if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) {
+               if (log_blksize > kHFSBlockSize) {
+                       log_blksize = kHFSBlockSize;
+                       if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
                                retval = ENXIO;
                                goto error_exit;
                        }
-                       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) {
+                       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
                                retval = ENXIO;
                                goto error_exit;
                        }
-                       hfsmp->hfs_phys_block_size = blksize;
-                       hfsmp->hfs_phys_block_count = blkcnt;
+                       hfsmp->hfs_logical_block_size = log_blksize;
+                       hfsmp->hfs_logical_block_count = log_blkcnt;
                }
                if (args) {
                        hfsmp->hfs_encoding = args->hfs_encoding;
@@ -1030,37 +1059,38 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                         * block size so everything will line up on a block
                         * boundary.
                         */
-                       if ((embeddedOffset % blksize) != 0) {
+                       if ((embeddedOffset % log_blksize) != 0) {
                                printf("HFS Mount: embedded volume offset not"
                                    " a multiple of physical block size (%d);"
-                                   " switching to 512\n", blksize);
-                               blksize = 512;
+                                   " switching to 512\n", log_blksize);
+                               log_blksize = 512;
                                if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE,
-                                   (caddr_t)&blksize, FWRITE, context)) {
+                                   (caddr_t)&log_blksize, FWRITE, context)) {
                                        retval = ENXIO;
                                        goto error_exit;
                                }
                                if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT,
-                                   (caddr_t)&blkcnt, 0, context)) {
+                                   (caddr_t)&log_blkcnt, 0, context)) {
                                        retval = ENXIO;
                                        goto error_exit;
                                }
                                /* Note: relative block count adjustment */
-                               hfsmp->hfs_phys_block_count *=
-                                   hfsmp->hfs_phys_block_size / blksize;
-                               hfsmp->hfs_phys_block_size = blksize;
+                               hfsmp->hfs_logical_block_count *=
+                                   hfsmp->hfs_logical_block_size / log_blksize;
+                               hfsmp->hfs_logical_block_size = log_blksize;
                        }
 
                        disksize = (u_int64_t)SWAP_BE16(mdbp->drEmbedExtent.blockCount) *
                                   (u_int64_t)SWAP_BE32(mdbp->drAlBlkSiz);
 
-                       hfsmp->hfs_phys_block_count = disksize / blksize;
+                       hfsmp->hfs_logical_block_count = disksize / log_blksize;
        
-                       mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
-                       retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+                       mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
+                       retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+                                       phys_blksize, cred, &bp);
                        if (retval)
                                goto error_exit;
-                       bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, 512);
+                       bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize), mdbp, 512);
                        buf_brelse(bp);
                        bp = NULL;
                        vhp = (HFSPlusVolumeHeader*) mdbp;
@@ -1119,13 +1149,15 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                                    hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
                                    
                                    if (mdb_offset == 0) {
-                                       mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
+                                       mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
                                    }
 
                                    bp = NULL;
-                                   retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+                                   retval = (int)buf_meta_bread(devvp, 
+                                                   HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), 
+                                                   phys_blksize, cred, &bp);
                                    if (retval == 0) {
-                                       jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blksize));
+                                       jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
                                            
                                        if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
                                                printf ("hfs(1): Journal replay fail.  Writing lastMountVersion as FSK!\n");
@@ -1170,22 +1202,22 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                 * If the backend didn't like our physical blocksize
                 * then retry with physical blocksize of 512.
                 */
-               if ((retval == ENXIO) && (blksize > 512) && (blksize != minblksize)) {
+               if ((retval == ENXIO) && (log_blksize > 512) && (log_blksize != minblksize)) {
                        printf("HFS Mount: could not use physical block size "
-                               "(%d) switching to 512\n", blksize);
-                       blksize = 512;
-                       if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&blksize, FWRITE, context)) {
+                               "(%d) switching to 512\n", log_blksize);
+                       log_blksize = 512;
+                       if (VNOP_IOCTL(devvp, DKIOCSETBLOCKSIZE, (caddr_t)&log_blksize, FWRITE, context)) {
                                retval = ENXIO;
                                goto error_exit;
                        }
-                       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&blkcnt, 0, context)) {
+                       if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&log_blkcnt, 0, context)) {
                                retval = ENXIO;
                                goto error_exit;
                        }
-                       devvp->v_specsize = blksize;
+                       devvp->v_specsize = log_blksize;
                        /* Note: relative block count adjustment (in case this is an embedded volume). */
-                       hfsmp->hfs_phys_block_count *= hfsmp->hfs_phys_block_size / blksize;
-                       hfsmp->hfs_phys_block_size = blksize;
+                       hfsmp->hfs_logical_block_count *= hfsmp->hfs_logical_block_size / log_blksize;
+                       hfsmp->hfs_logical_block_size = log_blksize;
  
                        if (hfsmp->jnl) {
                            // close and re-open this with the new block size
@@ -1203,13 +1235,14 @@ hfs_mountfs(struct vnode *devvp, struct mount *mp, struct hfs_mount_args *args,
                                        hfsmp->hfs_flags |= HFS_NEED_JNL_RESET;
                                    
                                        if (mdb_offset == 0) {
-                                                       mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
+                                                       mdb_offset = (daddr64_t)((embeddedOffset / log_blksize) + HFS_PRI_SECTOR(log_blksize));
                                        }
 
                                                bp = NULL;
-                                       retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+                                       retval = (int)buf_meta_bread(devvp, HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys), 
+                                                       phys_blksize, cred, &bp);
                                        if (retval == 0) {
-                                                       jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blksize));
+                                                       jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(phys_blksize));
                                            
                                                        if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
                                                                printf ("hfs(2): Journal replay fail.  Writing lastMountVersion as FSK!\n");
@@ -1669,16 +1702,18 @@ hfs_sync_metadata(void *arg)
        struct hfsmount *hfsmp;
        ExtendedVCB *vcb;
        buf_t   bp;
-       int  sectorsize, retval;
+       int  retval;
        daddr64_t priIDSector;
        hfsmp = VFSTOHFS(mp);
        vcb = HFSTOVCB(hfsmp);
 
        // now make sure the super block is flushed
-       sectorsize = hfsmp->hfs_phys_block_size;
-       priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) +
-                                 HFS_PRI_SECTOR(sectorsize));
-       retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp);
+       priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+                                 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
+
+       retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+                       HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+                       hfsmp->hfs_physical_block_size, NOCRED, &bp);
        if ((retval != 0 ) && (retval != ENXIO)) {
                printf("hfs_sync_metadata: can't read volume header at %d! (retval 0x%x)\n",
                       (int)priIDSector, retval);
@@ -1695,7 +1730,9 @@ hfs_sync_metadata(void *arg)
        //          hfs_btreeio.c:FlushAlternate() should flag when it was
        //          written...
        if (hfsmp->hfs_alt_id_sector) {
-               retval = (int)buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &bp);
+               retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+                               HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
+                               hfsmp->hfs_physical_block_size, NOCRED, &bp);
                if (retval == 0 && ((buf_flags(bp) & (B_DELWRI | B_LOCKED)) == B_DELWRI)) {
                    buf_bwrite(bp);
                } else if (bp) {
@@ -1760,14 +1797,14 @@ hfs_sync(struct mount *mp, int waitfor, vfs_context_t context)
        int error, allerror = 0;
        struct hfs_sync_cargs args;
 
+       hfsmp = VFSTOHFS(mp);
+
        /*
-        * During MNT_UPDATE hfs_changefs might be manipulating
-        * vnodes so back off
+        * hfs_changefs might be manipulating vnodes so back off
         */
-       if (((u_int32_t)vfs_flags(mp)) & MNT_UPDATE)    /* XXX MNT_UPDATE may not be visible here */
+       if (hfsmp->hfs_flags & HFS_IN_CHANGEFS)
                return (0);
 
-       hfsmp = VFSTOHFS(mp);
        if (hfsmp->hfs_flags & HFS_READ_ONLY)
                return (EROFS);
 
@@ -2118,7 +2155,7 @@ hfs_sysctl(int *name, __unused u_int namelen, user_addr_t oldp, size_t *oldlenp,
                                                         + HFSTOVCB(hfsmp)->hfsPlusIOPosOffset,
                                                         (off_t)((unsigned)name[3]),
                                                         hfsmp->hfs_devvp,
-                                                        hfsmp->hfs_phys_block_size,
+                                                        hfsmp->hfs_logical_block_size,
                                                         0,
                                                         0,
                                                         hfs_sync_metadata, hfsmp->hfs_mp);
@@ -2675,7 +2712,7 @@ hfs_flushMDB(struct hfsmount *hfsmp, int waitfor, int altflush)
        int sectorsize;
        ByteCount namelen;
 
-       sectorsize = hfsmp->hfs_phys_block_size;
+       sectorsize = hfsmp->hfs_logical_block_size;
        retval = (int)buf_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize), sectorsize, NOCRED, &bp);
        if (retval) {
                if (bp)
@@ -2774,7 +2811,6 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
        int retval;
        struct buf *bp;
        int i;
-       int sectorsize;
        daddr64_t priIDSector;
        int critical;
        u_int16_t  signature;
@@ -2787,15 +2823,16 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
                return hfs_flushMDB(hfsmp, waitfor, altflush);
        }
        critical = altflush;
-       sectorsize = hfsmp->hfs_phys_block_size;
-       priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / sectorsize) +
-                                 HFS_PRI_SECTOR(sectorsize));
+       priIDSector = (daddr64_t)((vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+                                 HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size));
 
        if (hfs_start_transaction(hfsmp) != 0) {
            return EINVAL;
        }
 
-       retval = (int)buf_meta_bread(hfsmp->hfs_devvp, priIDSector, sectorsize, NOCRED, &bp);
+       retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+                       HFS_PHYSBLK_ROUNDDOWN(priIDSector, hfsmp->hfs_log_per_phys),
+                       hfsmp->hfs_physical_block_size, NOCRED, &bp);
        if (retval) {
                if (bp)
                        buf_brelse(bp);
@@ -2810,7 +2847,8 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
                journal_modify_block_start(hfsmp->jnl, bp);
        }
 
-       volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(sectorsize));
+       volumeHeader = (HFSPlusVolumeHeader *)((char *)buf_dataptr(bp) + 
+                       HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 
        /*
         * Sanity check what we just read.
@@ -2839,15 +2877,16 @@ hfs_flushvolumeheader(struct hfsmount *hfsmp, int waitfor, int altflush)
                struct buf *bp2;
                HFSMasterDirectoryBlock *mdb;
 
-               retval = (int)buf_meta_bread(hfsmp->hfs_devvp, (daddr64_t)HFS_PRI_SECTOR(sectorsize),
-                               sectorsize, NOCRED, &bp2);
+               retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+                               HFS_PHYSBLK_ROUNDDOWN(HFS_PRI_SECTOR(hfsmp->hfs_logical_block_size), hfsmp->hfs_log_per_phys),
+                               hfsmp->hfs_physical_block_size, NOCRED, &bp2);
                if (retval) {
                        if (bp2)
                                buf_brelse(bp2);
                        retval = 0;
                } else {
                        mdb = (HFSMasterDirectoryBlock *)(buf_dataptr(bp2) +
-                               HFS_PRI_OFFSET(sectorsize));
+                               HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
 
                        if ( SWAP_BE32 (mdb->drCrDate) != vcb->localCreateDate )
                          {
@@ -2991,12 +3030,16 @@ done:
        if (altflush && hfsmp->hfs_alt_id_sector) {
                struct buf *alt_bp = NULL;
 
-               if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector, sectorsize, NOCRED, &alt_bp) == 0) {
+               if (buf_meta_bread(hfsmp->hfs_devvp, 
+                               HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
+                               hfsmp->hfs_physical_block_size, NOCRED, &alt_bp) == 0) {
                        if (hfsmp->jnl) {
                                journal_modify_block_start(hfsmp->jnl, alt_bp);
                        }
 
-                       bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
+                       bcopy(volumeHeader, (char *)buf_dataptr(alt_bp) + 
+                                       HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), 
+                                       kMDBSize);
 
                        if (hfsmp->jnl) {
                                journal_modify_block_end(hfsmp->jnl, alt_bp, NULL, NULL);
@@ -3048,6 +3091,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        u_int32_t  addblks;
        u_int64_t  sectorcnt;
        u_int32_t  sectorsize;
+       u_int32_t  phys_sectorsize;
        daddr64_t  prev_alt_sector;
        daddr_t    bitmapblks;
        int  lockflags;
@@ -3093,7 +3137,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        if (VNOP_IOCTL(devvp, DKIOCGETBLOCKSIZE, (caddr_t)&sectorsize, 0, context)) {
                return (ENXIO);
        }
-       if (sectorsize != hfsmp->hfs_phys_block_size) {
+       if (sectorsize != hfsmp->hfs_logical_block_size) {
                return (ENXIO);
        }
        if (VNOP_IOCTL(devvp, DKIOCGETBLOCKCOUNT, (caddr_t)&sectorcnt, 0, context)) {
@@ -3103,12 +3147,23 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                printf("hfs_extendfs: not enough space on device\n");
                return (ENOSPC);
        }
+       error = VNOP_IOCTL(devvp, DKIOCGETPHYSICALBLOCKSIZE, (caddr_t)&phys_sectorsize, 0, context);
+       if (error) {
+               if ((error != ENOTSUP) && (error != ENOTTY)) {
+                       return (ENXIO);
+               }
+               /* If ioctl is not supported, force physical and logical sector size to be same */
+               phys_sectorsize = sectorsize;
+       }
+       if (phys_sectorsize != hfsmp->hfs_physical_block_size) {
+               return (ENXIO);
+       }
        oldsize = (u_int64_t)hfsmp->totalBlocks * (u_int64_t)hfsmp->blockSize;
 
        /*
         * Validate new size.
         */
-       if ((newsize <= oldsize) || (newsize % sectorsize)) {
+       if ((newsize <= oldsize) || (newsize % sectorsize) || (newsize % phys_sectorsize)) {
                printf("hfs_extendfs: invalid size\n");
                return (EINVAL);
        }
@@ -3261,14 +3316,14 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        /*
         * Adjust file system variables for new space.
         */
-       prev_phys_block_count = hfsmp->hfs_phys_block_count;
+       prev_phys_block_count = hfsmp->hfs_logical_block_count;
        prev_alt_sector = hfsmp->hfs_alt_id_sector;
 
        vcb->totalBlocks += addblks;
        vcb->freeBlocks += addblks;
-       hfsmp->hfs_phys_block_count = newsize / sectorsize;
+       hfsmp->hfs_logical_block_count = newsize / sectorsize;
        hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / sectorsize) +
-                                 HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_phys_block_count);
+                                 HFS_ALT_SECTOR(sectorsize, hfsmp->hfs_logical_block_count);
        MarkVCBDirty(vcb);
        error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
        if (error) {
@@ -3290,7 +3345,7 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
                }
                vcb->totalBlocks -= addblks;
                vcb->freeBlocks -= addblks;
-               hfsmp->hfs_phys_block_count = prev_phys_block_count;
+               hfsmp->hfs_logical_block_count = prev_phys_block_count;
                hfsmp->hfs_alt_id_sector = prev_alt_sector;
                MarkVCBDirty(vcb);
                if (vcb->blockSize == 512)
@@ -3304,11 +3359,12 @@ hfs_extendfs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         */
        bp = NULL;
        if (prev_alt_sector) {
-               if (buf_meta_bread(hfsmp->hfs_devvp, prev_alt_sector, sectorsize,
-                                  NOCRED, &bp) == 0) {
+               if (buf_meta_bread(hfsmp->hfs_devvp, 
+                               HFS_PHYSBLK_ROUNDDOWN(prev_alt_sector, hfsmp->hfs_log_per_phys),
+                               hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
                        journal_modify_block_start(hfsmp->jnl, bp);
        
-                       bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(sectorsize), kMDBSize);
+                       bzero((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size), kMDBSize);
        
                        journal_modify_block_end(hfsmp->jnl, bp, NULL, NULL);
                } else if (bp) {
@@ -3402,7 +3458,9 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
        /* Make sure new size is valid. */
        if ((newsize < HFS_MIN_SIZE) ||
            (newsize >= oldsize) ||
-           (newsize % hfsmp->hfs_phys_block_size)) {
+           (newsize % hfsmp->hfs_logical_block_size) ||
+           (newsize % hfsmp->hfs_physical_block_size)) {
+               printf ("hfs_truncatefs: invalid size\n");
                error = EINVAL;
                goto out;
        }
@@ -3502,10 +3560,11 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         * since this block will be outside of the truncated file system!
         */
        if (hfsmp->hfs_alt_id_sector) {
-               if (buf_meta_bread(hfsmp->hfs_devvp, hfsmp->hfs_alt_id_sector,
-                                  hfsmp->hfs_phys_block_size, NOCRED, &bp) == 0) {
+               if (buf_meta_bread(hfsmp->hfs_devvp, 
+                               HFS_PHYSBLK_ROUNDDOWN(hfsmp->hfs_alt_id_sector, hfsmp->hfs_log_per_phys),
+                               hfsmp->hfs_physical_block_size, NOCRED, &bp) == 0) {
        
-                       bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_phys_block_size)), kMDBSize);
+                       bzero((void*)((char *)buf_dataptr(bp) + HFS_ALT_OFFSET(hfsmp->hfs_physical_block_size)), kMDBSize);
                        (void) VNOP_BWRITE(bp);
                } else if (bp) {
                        buf_brelse(bp);
@@ -3521,8 +3580,8 @@ hfs_truncatefs(struct hfsmount *hfsmp, u_int64_t newsize, vfs_context_t context)
         * Adjust file system variables and flush them to disk.
         */
        hfsmp->totalBlocks = newblkcnt;
-       hfsmp->hfs_phys_block_count = newsize / hfsmp->hfs_phys_block_size;
-       hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size, hfsmp->hfs_phys_block_count);
+       hfsmp->hfs_logical_block_count = newsize / hfsmp->hfs_logical_block_size;
+       hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size, hfsmp->hfs_logical_block_count);
        MarkVCBDirty(hfsmp);
        error = hfs_flushvolumeheader(hfsmp, MNT_WAIT, HFS_ALTFLUSH);
        if (error)
@@ -3632,7 +3691,7 @@ hfs_copy_extent(
        size_t ioSize;
        u_int32_t ioSizeSectors;        /* Device sectors in this I/O */
        daddr64_t srcSector, destSector;
-       u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_phys_block_size;
+       u_int32_t sectorsPerBlock = hfsmp->blockSize / hfsmp->hfs_logical_block_size;
 
        /*
         * Sanity check that we have locked the vnode of the file we're copying.
@@ -3674,11 +3733,11 @@ hfs_copy_extent(
        buf_setdataptr(bp, (uintptr_t)buffer);
        
        resid = (off_t) blockCount * (off_t) hfsmp->blockSize;
-       srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_phys_block_size;
-       destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_phys_block_size;
+       srcSector = (daddr64_t) oldStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
+       destSector = (daddr64_t) newStart * hfsmp->blockSize / hfsmp->hfs_logical_block_size;
        while (resid > 0) {
                ioSize = MIN(bufferSize, resid);
-               ioSizeSectors = ioSize / hfsmp->hfs_phys_block_size;
+               ioSizeSectors = ioSize / hfsmp->hfs_logical_block_size;
                
                /* Prepare the buffer for reading */
                buf_reset(bp, B_READ);
@@ -3988,7 +4047,7 @@ hfs_journal_relocate_callback(void *_args)
        JournalInfoBlock *jibp;
 
        error = buf_meta_bread(hfsmp->hfs_devvp,
-               hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_phys_block_size),
+               hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
                hfsmp->blockSize, vfs_context_ucred(args->context), &bp);
        if (error) {
                printf("hfs_reclaim_journal_file: failed to read JIB (%d)\n", error);
@@ -4144,14 +4203,14 @@ hfs_reclaim_journal_info_block(struct hfsmount *hfsmp, vfs_context_t context)
        
        /* Copy the old journal info block content to the new location */
        error = buf_meta_bread(hfsmp->hfs_devvp,
-               hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_phys_block_size),
+               hfsmp->vcbJinfoBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
                hfsmp->blockSize, vfs_context_ucred(context), &old_bp);
        if (error) {
                printf("hfs_reclaim_journal_info_block: failed to read JIB (%d)\n", error);
                goto free_fail;
        }
        new_bp = buf_getblk(hfsmp->hfs_devvp,
-               newBlock * (hfsmp->blockSize/hfsmp->hfs_phys_block_size),
+               newBlock * (hfsmp->blockSize/hfsmp->hfs_logical_block_size),
                hfsmp->blockSize, 0, 0, BLK_META);
        bcopy((char*)buf_dataptr(old_bp), (char*)buf_dataptr(new_bp), hfsmp->blockSize);
        buf_brelse(old_bp);
index 736ab6199549cfdfde83c23ff271908a9561b9c5..43e5ae8be942e7b46ba2332a411a2083546e953d 100644 (file)
@@ -146,11 +146,11 @@ OSErr hfs_MountHFSVolume(struct hfsmount *hfsmp, HFSMasterDirectoryBlock *mdb,
        if (error || (utf8chars == 0))
                (void) mac_roman_to_utf8(mdb->drVN, NAME_MAX, &utf8chars, vcb->vcbVN);
 
-       hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size);
+       hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
        vcb->vcbVBMIOSize = kHFSBlockSize;
 
-       hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size,
-                                                 hfsmp->hfs_phys_block_count);
+       hfsmp->hfs_alt_id_sector = HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
+                                                 hfsmp->hfs_logical_block_count);
 
        bzero(&cndesc, sizeof(cndesc));
        cndesc.cd_parentcnid = kHFSRootParentID;
@@ -330,11 +330,24 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                return (EINVAL);
 
        /* Make sure we can live with the physical block size. */
-       if ((disksize & (hfsmp->hfs_phys_block_size - 1)) ||
-           (embeddedOffset & (hfsmp->hfs_phys_block_size - 1)) ||
-           (blockSize < hfsmp->hfs_phys_block_size)) {
+       if ((disksize & (hfsmp->hfs_logical_block_size - 1)) ||
+           (embeddedOffset & (hfsmp->hfs_logical_block_size - 1)) ||
+           (blockSize < hfsmp->hfs_logical_block_size)) {
                return (ENXIO);
        }
+
+       /* If allocation block size is less than the physical 
+        * block size, we assume that the physical block size 
+        * is same as logical block size.  The physical block 
+        * size value is used to round down the offsets for 
+        * reading and writing the primary and alternate volume 
+        * headers at physical block boundary and will cause 
+        * problems if it is less than the block size.
+        */
+       if (blockSize < hfsmp->hfs_physical_block_size) {
+               hfsmp->hfs_physical_block_size = hfsmp->hfs_logical_block_size;
+       }
+
        /*
         * The VolumeHeader seems OK: transfer info from it into VCB
         * Note - the VCB starts out clear (all zeros)
@@ -378,22 +391,22 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
         * (currently set up from the wrapper MDB) using the
         * new blocksize value:
         */
-       hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_phys_block_size);
+       hfsmp->hfs_logBlockSize = BestBlockSizeFit(vcb->blockSize, MAXBSIZE, hfsmp->hfs_logical_block_size);
        vcb->vcbVBMIOSize = min(vcb->blockSize, MAXPHYSIO);
 
        /*
         * Validate and initialize the location of the alternate volume header.
         */
-       spare_sectors = hfsmp->hfs_phys_block_count -
+       spare_sectors = hfsmp->hfs_logical_block_count -
                        (((daddr64_t)vcb->totalBlocks * blockSize) /
-                          hfsmp->hfs_phys_block_size);
+                          hfsmp->hfs_logical_block_size);
 
-       if (spare_sectors > (blockSize / hfsmp->hfs_phys_block_size)) {
+       if (spare_sectors > (daddr64_t)(blockSize / hfsmp->hfs_logical_block_size)) {
                hfsmp->hfs_alt_id_sector = 0;  /* partition has grown! */
        } else {
-               hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size) +
-                                          HFS_ALT_SECTOR(hfsmp->hfs_phys_block_size,
-                                                         hfsmp->hfs_phys_block_count);
+               hfsmp->hfs_alt_id_sector = (hfsmp->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size) +
+                                          HFS_ALT_SECTOR(hfsmp->hfs_logical_block_size,
+                                                         hfsmp->hfs_logical_block_count);
        }
 
        bzero(&cndesc, sizeof(cndesc));
@@ -411,6 +424,7 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
        cndesc.cd_cnid = cnattr.ca_fileid = kHFSExtentsFileID;
 
        cfork.cf_size    = SWAP_BE64 (vhp->extentsFile.logicalSize);
+       cfork.cf_new_size= 0;
        cfork.cf_clump   = SWAP_BE32 (vhp->extentsFile.clumpSize);
        cfork.cf_blocks  = SWAP_BE32 (vhp->extentsFile.totalBlocks);
        cfork.cf_vblocks = 0;
@@ -607,9 +621,11 @@ OSErr hfs_MountHFSPlusVolume(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                    
                                mdb_offset = (daddr64_t)((embeddedOffset / blockSize) + HFS_PRI_SECTOR(blockSize));
 
-                               retval = (int)buf_meta_bread(hfsmp->hfs_devvp, mdb_offset, blockSize, cred, &bp);
+                               retval = (int)buf_meta_bread(hfsmp->hfs_devvp, 
+                                               HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+                                               hfsmp->hfs_physical_block_size, cred, &bp);
                                if (retval == 0) {
-                                       jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(blockSize));
+                                       jvhp = (HFSPlusVolumeHeader *)(buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size));
                                            
                                        if (SWAP_BE16(jvhp->signature) == kHFSPlusSigWord || SWAP_BE16(jvhp->signature) == kHFSXSigWord) {
                                                printf ("hfs(3): Journal replay fail.  Writing lastMountVersion as FSK!\n");
@@ -1760,7 +1776,8 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
        JournalInfoBlock *jibp;
        struct buf       *jinfo_bp, *bp;
        int               sectors_per_fsblock, arg_flags=0, arg_tbufsz=0;
-       int               retval, blksize = hfsmp->hfs_phys_block_size;
+       int               retval;
+       uint32_t                  blksize = hfsmp->hfs_logical_block_size;
        struct vnode     *devvp;
        struct hfs_mount_args *args = _args;
        u_int32_t         jib_flags;
@@ -1808,7 +1825,7 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                                      jib_offset + embeddedOffset,
                                      jib_size,
                                      devvp,
-                                     hfsmp->hfs_phys_block_size);
+                                     hfsmp->hfs_logical_block_size);
 
            hfsmp->jnl = NULL;
 
@@ -1865,14 +1882,16 @@ hfs_early_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp,
                        if (mdb_offset == 0) {
                                mdb_offset = (daddr64_t)((embeddedOffset / blksize) + HFS_PRI_SECTOR(blksize));
                        }
-                       retval = (int)buf_meta_bread(devvp, mdb_offset, blksize, cred, &bp);
+                       retval = (int)buf_meta_bread(devvp, 
+                                       HFS_PHYSBLK_ROUNDDOWN(mdb_offset, hfsmp->hfs_log_per_phys),
+                                       hfsmp->hfs_physical_block_size, cred, &bp);
                        if (retval) {
                                buf_brelse(bp);
                                printf("hfs: failed to reload the mdb after opening the journal (retval %d)!\n",
                                           retval);
                                return retval;
                        }
-                       bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(blksize), mdbp, 512);
+                       bcopy((char *)buf_dataptr(bp) + HFS_PRI_OFFSET(hfsmp->hfs_physical_block_size), mdbp, 512);
                        buf_brelse(bp);
                        bp = NULL;
                }
@@ -1955,9 +1974,9 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
        }
 
 
-       sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_phys_block_size;
+       sectors_per_fsblock = SWAP_BE32(vhp->blockSize) / hfsmp->hfs_logical_block_size;
        retval = (int)buf_meta_bread(devvp,
-                                               (daddr64_t)(vcb->hfsPlusIOPosOffset / hfsmp->hfs_phys_block_size + 
+                                               (daddr64_t)(vcb->hfsPlusIOPosOffset / hfsmp->hfs_logical_block_size + 
                                                (SWAP_BE32(vhp->journalInfoBlock)*sectors_per_fsblock)),
                                                SWAP_BE32(vhp->blockSize), NOCRED, &jinfo_bp);
        if (retval) {
@@ -2021,7 +2040,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
                                      jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
                                      jib_size,
                                      devvp,
-                                     hfsmp->hfs_phys_block_size);
+                                     hfsmp->hfs_logical_block_size);
 
            hfsmp->jnl = NULL;
 
@@ -2042,7 +2061,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
                                                                        jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
                                                                        jib_size,
                                                                        devvp,
-                                                                       hfsmp->hfs_phys_block_size,
+                                                                       hfsmp->hfs_logical_block_size,
                                                                        arg_flags,
                                                                        arg_tbufsz,
                                                                        hfs_sync_metadata, hfsmp->hfs_mp);
@@ -2071,7 +2090,7 @@ hfs_late_journal_init(struct hfsmount *hfsmp, HFSPlusVolumeHeader *vhp, void *_a
                                                                  jib_offset + (off_t)vcb->hfsPlusIOPosOffset,
                                                                  jib_size,
                                                                  devvp,
-                                                                 hfsmp->hfs_phys_block_size,
+                                                                 hfsmp->hfs_logical_block_size,
                                                                  arg_flags,
                                                                  arg_tbufsz,
                                                                  hfs_sync_metadata, hfsmp->hfs_mp);
index eef6b5e9660486b10484d49bd60c81c25a9bcf21..61875f626671835c2aa78314b1c8d2ac58ae195a 100644 (file)
@@ -2304,8 +2304,32 @@ hfs_vnop_rename(ap)
        error = hfs_lockfour(VTOC(fdvp), VTOC(fvp), VTOC(tdvp), tvp ? VTOC(tvp) : NULL,
                             HFS_EXCLUSIVE_LOCK);
        if (error) {
-               if (took_trunc_lock)
+               if (took_trunc_lock) {
                        hfs_unlock_truncate(VTOC(tvp), TRUE);   
+                       took_trunc_lock = 0;
+               }
+        /* 
+         * tvp might no longer exist. if we get ENOENT, re-check the
+         * C_NOEXISTS flag  on tvp to find out whether it's still in the
+         * namespace.
+         */
+        if (error == ENOENT && tvp) {
+            /* 
+             * It's okay to just check C_NOEXISTS without having a lock,
+             * because we have an iocount on it from the vfs layer so it can't
+             * have disappeared.
+             */
+            if (VTOC(tvp)->c_flag & C_NOEXISTS) {
+                /*
+                 * tvp is no longer in the namespace. Try again with NULL
+                 * tvp/tcp (NULLing these out is fine because the vfs syscall
+                 * will vnode_put the vnodes).
+                 */
+                tcp = NULL;
+                tvp = NULL;
+                goto retry;
+            }
+        }
                return (error);
        }
 
@@ -2815,7 +2839,7 @@ hfs_vnop_symlink(struct vnop_symlink_args *ap)
        }
 
        /* Write the link to disk */
-       bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, VTOHFS(vp)->hfs_phys_block_size),
+       bp = buf_getblk(vp, (daddr64_t)0, roundup((int)fp->ff_size, hfsmp->hfs_physical_block_size),
                        0, 0, BLK_META);
        if (hfsmp->jnl) {
                journal_modify_block_start(hfsmp->jnl, bp);
@@ -3185,8 +3209,7 @@ hfs_vnop_readlink(ap)
 
                MALLOC(fp->ff_symlinkptr, char *, fp->ff_size, M_TEMP, M_WAITOK);
                error = (int)buf_meta_bread(vp, (daddr64_t)0,
-                                           roundup((int)fp->ff_size,
-                                           VTOHFS(vp)->hfs_phys_block_size),
+                                           roundup((int)fp->ff_size, VTOHFS(vp)->hfs_physical_block_size),
                                            vfs_context_ucred(ap->a_context), &bp);
                if (error) {
                        if (bp)
index 86307fbcb94147f5d84f63c0ab7274c4849da82f..70108d229c4d81913d18aeed404e25fde948a9bd 100644 (file)
@@ -230,8 +230,8 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc)
        btreePtr->fileRefNum            = GetFileRefNumFromFCB(filePtr);
        filePtr->fcbBTCBPtr                     = (Ptr) btreePtr;       // attach btree cb to file
 
-       /* The minimum node size is the physical block size */
-       nodeRec.blockSize = VTOHFS(btreePtr->fileRefNum)->hfs_phys_block_size;
+       /* Prefer doing I/O a physical block at a time */
+       nodeRec.blockSize = VTOHFS(btreePtr->fileRefNum)->hfs_physical_block_size;
 
        /* Start with the allocation block size for regular files. */
        if (FTOC(filePtr)->c_fileid >= kHFSFirstUserCatalogNodeID)
@@ -301,8 +301,8 @@ OSStatus BTOpenPath(FCB *filePtr, KeyCompareProcPtr keyCompareProc)
 
        // set kBadClose attribute bit, and UpdateNode
 
-       /* b-tree node size must be at least as big as the physical block size */
-       if (btreePtr->nodeSize < nodeRec.blockSize)
+       /* b-tree node size must be at least as big as the logical block size */
+       if (btreePtr->nodeSize < VTOHFS(btreePtr->fileRefNum)->hfs_logical_block_size)
        {
                /*
                 * If this tree has any records or the media is writeable then
index 97e308497214d67cce0872adbf05947807c2becb..9ac5c926f37bd3f2bd36169257d4e1fbbe4b172e 100644 (file)
@@ -584,11 +584,22 @@ static OSErr      InsertNode      (BTreeControlBlockPtr    btreePtr,
 
        /////////////////////// Try Simple Insert ///////////////////////////////
 
-       if ( node == leftNodeNum )
-               targetNode = leftNode;
-       else
-               targetNode = rightNode;
-
+       /* sanity check our left and right nodes here. */
+       if (node == leftNodeNum) {
+               if (leftNode->buffer == NULL) {
+                       err = fsBTInvalidNodeErr;
+                       M_ExitOnError(err);     
+               }
+               else{
+                       targetNode = leftNode;
+               }
+       }
+       else {
+               // we can assume right node is initialized.
+               targetNode = rightNode; 
+       }
+       
+       
        recordFit = InsertKeyRecord (btreePtr, targetNode->buffer, index, key->keyPtr, key->keyLength, key->recPtr, key->recSize);
 
        if ( recordFit )
@@ -605,7 +616,7 @@ static OSErr        InsertNode      (BTreeControlBlockPtr    btreePtr,
        
        if ( !recordFit && leftNodeNum > 0 )
        {
-               PanicIf ( leftNode->buffer != nil, "\p InsertNode: leftNode already aquired!");
+               PanicIf ( leftNode->buffer != nil, "\p InsertNode: leftNode already acquired!");
 
                if ( leftNode->buffer == nil )
                {
index a732366ca8be9a1bf8dd3efe5c82ee2a3ad84d5d..718a87bdc5cb7168dacec49f66a7dc1981014106 100644 (file)
@@ -487,7 +487,7 @@ OSErr MapFileBlockC (
        off_t                           tmpOff;
 
        allocBlockSize = vcb->blockSize;
-       sectorSize = VCBTOHFS(vcb)->hfs_phys_block_size;
+       sectorSize = VCBTOHFS(vcb)->hfs_logical_block_size;
 
        err = SearchExtentFile(vcb, fcb, offset, &foundKey, foundData, &foundIndex, &hint, &nextFABN);
        if (err == noErr) {
index 0a999100e5b915d91645d876ba96b5fa7e51211a..be4d28c5e917265f8f58e6020032af10a37756e1 100644 (file)
@@ -86,6 +86,7 @@ Internal routines:
 #include <sys/types.h>
 #include <sys/buf.h>
 #include <sys/systm.h>
+#include <sys/disk.h>
 
 #include "../../hfs.h"
 #include "../../hfs_dbg.h"
@@ -1177,6 +1178,7 @@ OSErr BlockMarkFree(
        u_int32_t  wordsPerBlock;
     // XXXdbg
        struct hfsmount *hfsmp = VCBTOHFS(vcb);
+       dk_discard_t discard;
 
        /*
         * NOTE: We use vcb->totalBlocks instead of vcb->allocLimit because we
@@ -1189,6 +1191,10 @@ OSErr BlockMarkFree(
                goto Exit;
        }
 
+       memset(&discard, 0, sizeof(dk_discard_t));
+       discard.offset = (uint64_t)startingBlock * (uint64_t)vcb->blockSize;
+       discard.length = (uint64_t)numBlocks * (uint64_t)vcb->blockSize;
+
 
        //
        //      Pre-read the bitmap block containing the first word of allocation
@@ -1313,6 +1319,12 @@ Exit:
        if (buffer)
                (void)ReleaseBitmapBlock(vcb, blockRef, true);
 
+       if (err == noErr) {
+               // it doesn't matter if this fails, it's just informational anyway
+               VNOP_IOCTL(vcb->hfs_devvp, DKIOCDISCARD, (caddr_t)&discard, 0, vfs_context_kernel());
+       }
+
+
        return err;
 
 Corruption:
index 50d955a3f7ac1729f9944d8da9f7cdc069a40e0c..cf4ee656a3d627a42f21d518c8109b343dd08b62 100644 (file)
@@ -1015,29 +1015,29 @@ parse_bsd_args(void)
        char namep[16];
        int msgbuf;
 
-       if (PE_parse_boot_arg("-s", namep))
+       if (PE_parse_boot_argn("-s", namep, sizeof (namep)))
                boothowto |= RB_SINGLE;
 
-       if (PE_parse_boot_arg("-b", namep))
+       if (PE_parse_boot_argn("-b", namep, sizeof (namep)))
                boothowto |= RB_NOBOOTRC;
 
-       if (PE_parse_boot_arg("-x", namep)) /* safe boot */
+       if (PE_parse_boot_argn("-x", namep, sizeof (namep))) /* safe boot */
                boothowto |= RB_SAFEBOOT;
 
-       if (PE_parse_boot_arg("-l", namep)) /* leaks logging */
+       if (PE_parse_boot_argn("-l", namep, sizeof (namep))) /* leaks logging */
                turn_on_log_leaks = 1;
 
-       PE_parse_boot_arg("srv", &srv);
-       PE_parse_boot_arg("ncl", &ncl);
-       if (PE_parse_boot_arg("nbuf", &max_nbuf_headers)) {
+       PE_parse_boot_argn("srv", &srv, sizeof (srv));
+       PE_parse_boot_argn("ncl", &ncl, sizeof (ncl));
+       if (PE_parse_boot_argn("nbuf", &max_nbuf_headers, sizeof (max_nbuf_headers))) {
                customnbuf = 1;
        }
 #if !defined(SECURE_KERNEL)
-       PE_parse_boot_arg("kmem", &setup_kmem);
+       PE_parse_boot_argn("kmem", &setup_kmem, sizeof (setup_kmem));
 #endif
-       PE_parse_boot_arg("trace", &new_nkdbufs);
+       PE_parse_boot_argn("trace", &new_nkdbufs, sizeof (new_nkdbufs));
 
-       if (PE_parse_boot_arg("msgbuf", &msgbuf)) {
+       if (PE_parse_boot_argn("msgbuf", &msgbuf, sizeof (msgbuf))) {
                log_setsize(msgbuf);
        }
 }
index 657d9fde6e7df6d1faa014c0654e4f0f64b5bafe..37a5afb3406a56b9714e032f0166ea081614043e 100644 (file)
@@ -311,7 +311,7 @@ issingleuser(void)
 {
        char namep[16];
 
-       if (PE_parse_boot_arg("-s", namep)) {
+       if (PE_parse_boot_argn("-s", namep, sizeof(namep))) {
                return(1);
        } else {
                return(0);
index 1f421780e0c40aef3af1223a75d6baa0109034c5..f736a0c8a2c5ee027b825ca794ecd2ce9b3470ed 100644 (file)
@@ -520,15 +520,26 @@ __private_extern__ struct sysent sysent[] = {
        {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 358 = nosys */
        {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 359 = nosys */
 #endif
+#if CONFIG_WORKQUEUE
        {AC(bsdthread_create_args), 0, 0, (sy_call_t *)bsdthread_create, munge_wwwww, munge_ddddd, _SYSCALL_RET_ADDR_T, 20}, /* 360 = bsdthread_create */
        {AC(bsdthread_terminate_args), 0, 0, (sy_call_t *)bsdthread_terminate, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 361 = bsdthread_terminate */
+#else
+       {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 360 = nosys */
+       {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 361 = nosys */
+#endif
        {0, 0, 0, (sy_call_t *)kqueue, NULL, NULL, _SYSCALL_RET_INT_T, 0},                          /* 362 = kqueue */
        {AC(kevent_args), 0, 0, (sy_call_t *)kevent, munge_wwwwww, munge_dddddd, _SYSCALL_RET_INT_T, 24}, /* 363 = kevent */
        {AC(lchown_args), 0, 0, (sy_call_t *)lchown, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12},  /* 364 = lchown */
        {AC(stack_snapshot_args), 0, 0, (sy_call_t *)stack_snapshot, munge_wwww, munge_dddd, _SYSCALL_RET_INT_T, 16}, /* 365 = stack_snapshot */
+#if CONFIG_WORKQUEUE
        {AC(bsdthread_register_args), 0, 0, (sy_call_t *)bsdthread_register, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 366 = bsdthread_register */
        {0, 0, 0, (sy_call_t *)workq_open, NULL, NULL, _SYSCALL_RET_INT_T, 0},                      /* 367 = workq_open */
        {AC(workq_ops_args), 0, 0, (sy_call_t *)workq_ops, munge_www, munge_ddd, _SYSCALL_RET_INT_T, 12}, /* 368 = workq_ops */
+#else
+       {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 366 = nosys */
+       {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 367 = nosys */
+       {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 368 = nosys */
+#endif
        {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 369 = nosys */
        {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 370 = nosys */
        {0, 0, 0, (sy_call_t *)nosys, NULL, NULL, _SYSCALL_RET_INT_T, 0},                           /* 371 = nosys */
index 952d2b87c3f7639d334b0f062ce4a48b81360f33..7377435358ca785d5148746628e36094185ec332 100644 (file)
@@ -402,7 +402,7 @@ kdbg_lock_init(void)
        /* get the number of cpus and cache it */
 #define BSD_HOST 1
        host_info((host_t)BSD_HOST, HOST_BASIC_INFO, (host_info_t)&hinfo, &count);
-       kd_cpus = hinfo.physical_cpu_max;
+       kd_cpus = hinfo.logical_cpu_max;
 
        if (kmem_alloc(kernel_map, (unsigned int *)&kdbip,
                       sizeof(struct kd_bufinfo) * kd_cpus) != KERN_SUCCESS)
index fcd359b617e55b1792b7eef0066e884c708fdaf1..70fb531b359c4497125858701368bab1cb21fd7d 100644 (file)
@@ -114,7 +114,11 @@ kern_return_t thread_getstatus(register thread_t act, int flavor,
 void task_act_iterate_wth_args(task_t, void(*)(thread_t, void *), void *);
 
 
+#ifdef SECURE_KERNEL
+__private_extern__ int do_coredump = 0;        /* default: don't dump cores */
+#else
 __private_extern__ int do_coredump = 1;        /* default: dump cores */
+#endif
 __private_extern__ int sugid_coredump = 0; /* default: but not SGUID binaries */
 
 void
index 0a67834ff191d0a05bf50976e71c2dd809ecca4c..d7711096fb515efe1e39a8200458e54ce2adc8c9 100644 (file)
 #include <kern/kalloc.h>
 #include <libkern/OSAtomic.h>
 
-#include <sys/ubc.h>
+#include <sys/ubc_internal.h>
 
 struct psemnode;
 struct pshmnode;
@@ -142,6 +142,8 @@ extern int soo_stat(struct socket *so, void *ub, int isstat64);
 
 extern kauth_scope_t   kauth_scope_fileop;
 
+extern int cs_debug;
+
 #define f_flag f_fglob->fg_flag
 #define f_type f_fglob->fg_type
 #define f_msgcount f_fglob->fg_msgcount
@@ -1370,6 +1372,14 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
                        goto outdrop;
                }
 
+               if(ubc_cs_blob_get(vp, CPU_TYPE_ANY, fs.fs_file_start))
+               {
+                       if(cs_debug)
+                               printf("CODE SIGNING: resident blob offered for: %s\n", vp->v_name);
+                       vnode_put(vp);
+                       goto outdrop;
+               }
+                                  
 #define CS_MAX_BLOB_SIZE (1ULL * 1024 * 1024) /* XXX ? */
                if (fs.fs_blob_size > CS_MAX_BLOB_SIZE) {
                        error = E2BIG;
@@ -1378,9 +1388,7 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
                }
 
                kernel_blob_size = CAST_DOWN(vm_size_t, fs.fs_blob_size);
-               kr = kmem_alloc(kernel_map,
-                               &kernel_blob_addr,
-                               kernel_blob_size);
+               kr = ubc_cs_blob_allocate(&kernel_blob_addr, &kernel_blob_size);
                if (kr != KERN_SUCCESS) {
                        error = ENOMEM;
                        vnode_put(vp);
@@ -1391,9 +1399,8 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
                               (void *) kernel_blob_addr,
                               kernel_blob_size);
                if (error) {
-                       kmem_free(kernel_map,
-                                 kernel_blob_addr,
-                                 kernel_blob_size);
+                       ubc_cs_blob_deallocate(kernel_blob_addr,
+                                              kernel_blob_size);
                        vnode_put(vp);
                        goto outdrop;
                }
@@ -1405,11 +1412,10 @@ fcntl_nocancel(proc_t p, struct fcntl_nocancel_args *uap, register_t *retval)
                        kernel_blob_addr,
                        kernel_blob_size);
                if (error) {
-                       kmem_free(kernel_map,
-                                 kernel_blob_addr,
-                                 kernel_blob_size);
+                       ubc_cs_blob_deallocate(kernel_blob_addr,
+                                              kernel_blob_size);
                } else {
-                       /* ubc_blob_add() was consumed "kernel_blob_addr" */
+                       /* ubc_blob_add() has consumed "kernel_blob_addr" */
                }
 
                (void) vnode_put(vp);
index 6b2702d7b63e0b9475ccd52f5b2d6b79ba6c394d..43ab48894ec26a8a6dc295f16e6db945ee4a27fa 100644 (file)
@@ -893,9 +893,6 @@ grade:
                imgp->ip_csflags |= CS_KILL;
 
 
-       /* load_machfile() maps the vnode */
-       (void)ubc_map(imgp->ip_vp, PROT_READ | PROT_EXEC);
-
        /*
         * Set up the system reserved areas in the new address space.
         */
@@ -919,8 +916,6 @@ grade:
         */
        error = exec_handle_sugid(imgp);
 
-       proc_knote(p, NOTE_EXEC);
-
        if (!vfexec && (p->p_lflag & P_LTRACED))
                psignal(p, SIGTRAP);
 
@@ -928,6 +923,13 @@ grade:
                goto badtoolate;
        }
        
+#if CONFIG_MACF
+       /* Determine if the map will allow VM_PROT_COPY */
+       error = mac_proc_check_map_prot_copy_allow(p);
+       vm_map_set_prot_copy_allow(get_task_map(task), 
+                                  error ? FALSE : TRUE);
+#endif 
+
        if (load_result.unixproc &&
                create_unix_stack(get_task_map(task),
                                  load_result.user_stack,
@@ -1127,6 +1129,8 @@ grade:
        }
 
 badtoolate:
+       proc_knote(p, NOTE_EXEC);
+
        if (vfexec) {
                task_deallocate(new_task);
                thread_deallocate(thread);
@@ -1196,6 +1200,7 @@ exec_activate_image(struct image_params *imgp)
        int once = 1;   /* save SGUID-ness for interpreted files */
        int i;
        int iterlimit = EAI_ITERLIMIT;
+       proc_t p = vfs_context_proc(imgp->ip_vfs_context);
 
        error = execargs_alloc(imgp);
        if (error)
@@ -1209,7 +1214,7 @@ exec_activate_image(struct image_params *imgp)
         */
        error = exec_save_path(imgp, imgp->ip_user_fname, imgp->ip_seg);
        if (error) {
-               goto bad;
+               goto bad_notrans;
        }
 
        DTRACE_PROC1(exec, uintptr_t, imgp->ip_strings);
@@ -1220,10 +1225,12 @@ exec_activate_image(struct image_params *imgp)
 again:
        error = namei(&nd);
        if (error)
-               goto bad;
+               goto bad_notrans;
        imgp->ip_ndp = &nd;     /* successful namei(); call nameidone() later */
        imgp->ip_vp = nd.ni_vp; /* if set, need to vnode_put() at some point */
 
+       proc_transstart(p, 0);
+
        error = exec_check_permissions(imgp);
        if (error)
                goto bad;
@@ -1292,6 +1299,7 @@ encapsulated_binary:
 
                        nd.ni_segflg = UIO_SYSSPACE32;
                        nd.ni_dirp = CAST_USER_ADDR_T(imgp->ip_interp_name);
+                       proc_transend(p, 0);
                        goto again;
 
                default:
@@ -1310,6 +1318,9 @@ encapsulated_binary:
        }
 
 bad:
+       proc_transend(p, 0);
+
+bad_notrans:
        if (imgp->ip_strings)
                execargs_free(imgp);
        if (imgp->ip_ndp)
@@ -1949,7 +1960,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, register_t *retval)
        if (!(uthread->uu_flag & UT_VFORK)) {
                if (task != kernel_task) { 
                        proc_lock(p);
-                       numthreads = get_task_numacts(task);
+                       numthreads = get_task_numactivethreads(task);
                        if (numthreads <= 0 ) {
                                proc_unlock(p);
                                kauth_cred_unref(&context.vc_ucred);
@@ -1974,9 +1985,7 @@ __mac_execve(proc_t p, struct __mac_execve_args *uap, register_t *retval)
        }
 #endif
 
-       proc_transstart(p, 0);
        error = exec_activate_image(imgp);
-       proc_transend(p, 0);
 
        kauth_cred_unref(&context.vc_ucred);
        
@@ -2711,7 +2720,8 @@ exec_handle_sugid(struct image_params *imgp)
         */
        p->p_ucred = kauth_cred_setsvuidgid(p->p_ucred, kauth_cred_getuid(p->p_ucred),  p->p_ucred->cr_gid);
        
-       /* XXX Obsolete; security token should not be separate from cred */
+       /* Update the process' identity version and set the security token */
+       p->p_idversion++;
        set_security_token(p);
 
        return(error);
index 27f98defbaf6af68a9d6e0c667af0f4ba0ab235c..a8e9ef7f0713a4dd5c5acde86d4b3ddc71a95386 100644 (file)
@@ -418,6 +418,17 @@ proc_exit(proc_t p)
         */
        fdfree(p);
 
+       if (uth->uu_lowpri_window) {
+               /*
+                * task is marked as a low priority I/O type
+                * and the I/O we issued while flushing files on close
+                * collided with normal I/O operations...
+                * no need to throttle this thread since its going away
+                * but we do need to update our bookeeping w/r to throttled threads
+                */
+               throttle_lowpri_io(FALSE);
+       }
+
 #if SYSV_SHM
        /* Close ref SYSV Shared memory*/
        if (p->vm_shm)
@@ -777,6 +788,15 @@ proc_exit(proc_t p)
                (void)reap_child_locked(pp, p, 1, 1, 1);
                /* list lock dropped by reap_child_locked */
        }
+       if (uth->uu_lowpri_window) {
+               /*
+                * task is marked as a low priority I/O type and we've
+                * somehow picked up another throttle during exit processing...
+                * no need to throttle this thread since its going away
+                * but we do need to update our bookeeping w/r to throttled threads
+                */
+               throttle_lowpri_io(FALSE);
+       }
 
        proc_rele(pp);
 
index fa43edd2eb8b309d31c1761cd861735bb88ffc2b..3bc45c1cec3df5ca05f4775584e494f4a375110e 100644 (file)
@@ -850,7 +850,7 @@ proc_t
 forkproc(proc_t parent, int lock)
 {
        struct proc *  child;   /* Our new process */
-       static int nextpid = 0, pidwrap = 0;
+       static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
        int error = 0;
        struct session *sessp;
        uthread_t uth_parent = (uthread_t)get_bsdthread_info(current_thread());
@@ -926,6 +926,7 @@ retry:
        }
        nprocs++;
        child->p_pid = nextpid;
+       child->p_idversion = nextpidversion++;
 #if 1
        if (child->p_pid != 0) {
                if (pfind_locked(child->p_pid) != PROC_NULL)
@@ -1183,6 +1184,17 @@ uthread_cleanup(task_t task, void *uthread, void * bsd_info)
        uthread_t uth = (uthread_t)uthread;
        proc_t p = (proc_t)bsd_info;
 
+
+       if (uth->uu_lowpri_window) {
+               /*
+                * task is marked as a low priority I/O type
+                * and we've somehow managed to not dismiss the throttle
+                * through the normal exit paths back to user space...
+                * no need to throttle this thread since its going away
+                * but we do need to update our bookeeping w/r to throttled threads
+                */
+               throttle_lowpri_io(FALSE);
+       }
        /*
         * Per-thread audit state should never last beyond system
         * call return.  Since we don't audit the thread creation/
index 78380edcfe6e8bce5c13e6cbd3a3876b440c0342..912fdef3f50031aecbd7da35f8926a1e9b8798a3 100644 (file)
 #include <libkern/libkern.h>
 #include <sys/sysctl.h>
 
+extern unsigned int    vm_page_free_count;
+extern unsigned int    vm_page_active_count;
+extern unsigned int    vm_page_inactive_count;
+extern unsigned int    vm_page_purgeable_count;
+extern unsigned int    vm_page_wire_count;
+
 static void kern_memorystatus_thread(void);
 
 int kern_memorystatus_wakeup = 0;
-int kern_memorystatus_pause = 0;
 int kern_memorystatus_level = 0;
 int kern_memorystatus_last_level = 0;
 unsigned int kern_memorystatus_kev_failure_count = 0;
@@ -82,6 +87,13 @@ static void
 kern_memorystatus_thread(void)
 {
        struct kev_msg ev_msg;
+       struct {
+               uint32_t free_pages;
+               uint32_t active_pages;
+               uint32_t inactive_pages;
+               uint32_t purgeable_pages;
+               uint32_t wired_pages;
+       } data;
        int ret;
 
        while(1) {
@@ -95,7 +107,15 @@ kern_memorystatus_thread(void)
                /* pass the memory status level in the event code (as percent used) */
                ev_msg.event_code     = 100 - kern_memorystatus_last_level;
 
-               ev_msg.dv[0].data_length = 0;
+               ev_msg.dv[0].data_length = sizeof data;
+               ev_msg.dv[0].data_ptr = &data;
+               ev_msg.dv[1].data_length = 0;
+
+               data.free_pages = vm_page_free_count;
+               data.active_pages = vm_page_active_count;
+               data.inactive_pages = vm_page_inactive_count;
+               data.purgeable_pages = vm_page_purgeable_count;
+               data.wired_pages = vm_page_wire_count;
 
                ret = kev_post_msg(&ev_msg);
                if (ret) {
@@ -103,9 +123,6 @@ kern_memorystatus_thread(void)
                        printf("%s: kev_post_msg() failed, err %d\n", __func__, ret);
                }
 
-               assert_wait_timeout((event_t)&kern_memorystatus_pause, THREAD_UNINT, 1, 250*1000*NSEC_PER_USEC);
-               (void)thread_block(THREAD_CONTINUE_NULL);
-
                if (kern_memorystatus_level >= kern_memorystatus_last_level + 5 ||
                    kern_memorystatus_level <= kern_memorystatus_last_level - 5)
                        continue;
index 0b8f8f02468e51bd418aee91d8afdfd8c45fa860..01ac7e637e1e1664379268f83eb495bb40161361 100644 (file)
@@ -592,7 +592,7 @@ sysctl_mib_init(void)
        if (cpusubtype == CPU_SUBTYPE_POWERPC_970 && 
            cpu_info.l2_cache_size == 1 * 1024 * 1024)
                /* The signature of the dual-core G5 */
-               packages = hinfo.max_cpus / 2;
+               packages = roundup(hinfo.max_cpus, 2) / 2;
        else
                packages = hinfo.max_cpus;
 
@@ -647,9 +647,9 @@ sysctl_mib_init(void)
        cachesize[4] = 0;
 
        /* hw.packages */
-       packages = ml_cpu_cache_sharing(0) /
-                       cpuid_info()->cpuid_cores_per_package;
-       
+       packages = roundup(ml_cpu_cache_sharing(0), cpuid_info()->thread_count)
+                       / cpuid_info()->thread_count;
+
 #else /* end __arm__ */
 # warning we do not support this platform yet
 #endif /* __ppc__ */
index 499d6f61baea3f635ac3c4603b8bd9bdd1cc3e6e..9fa5bd2e017ae6ee0f418b63e5d24bb38dc1b308 100644 (file)
@@ -191,7 +191,7 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
        struct fileproc *fp;
        register struct         vnode *vp;
        int                     flags;
-       int                     prot, file_prot;
+       int                     prot;
        int                     err=0;
        vm_map_t                user_map;
        kern_return_t           result;
@@ -565,13 +565,6 @@ mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
                                (void)vnode_put(vp);
                                goto out;
                }
-
-               file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC);
-               if (docow) {
-                       /* private mapping: won't write to the file */
-                       file_prot &= ~PROT_WRITE;
-               }
-               (void) ubc_map(vp, file_prot);
        }
 
        if (!mapanon)
@@ -1231,7 +1224,6 @@ map_fd_funneled(
        }
 
        ubc_setthreadcred(vp, current_proc(), current_thread());
-       (void)ubc_map(vp, (PROT_READ | PROT_EXEC));
        (void)vnode_put(vp);
        err = 0;
 bad:
index a3c81e332aa59bbf89b348778d3174051f4425c9..9dd8f6ad1924023db5cec9baac204c341febc7bc 100644 (file)
@@ -155,7 +155,9 @@ lck_attr_t * lctx_lck_attr;
 static void    lctxinit(void);
 #endif
 
+#if DEBUG
 #define __PROC_INTERNAL_DEBUG 1
+#endif
 /* Name to give to core files */
 __private_extern__ char corefilename[MAXPATHLEN+1] = {"/cores/core.%P"};
 
@@ -284,16 +286,22 @@ out:
 int
 isinferior(proc_t p, proc_t t)
 {
-int retval = 0;
+       int retval = 0;
+       int nchecked = 0;
+       proc_t start = p;
 
        /* if p==t they are not inferior */
        if (p == t)
                return(0);
 
        proc_list_lock();
-       for (; p != t; p = p->p_pptr)
-               if (p->p_pid == 0)
+       for (; p != t; p = p->p_pptr) {
+               nchecked++;
+
+               /* Detect here if we're in a cycle */
+               if ((p->p_pid == 0) || (p->p_pptr == start) || (nchecked >= nprocs))
                        goto out;
+       }
        retval = 1;
 out:
        proc_list_unlock();
@@ -548,9 +556,9 @@ proc_childdrainend(proc_t p)
 }
 
 void
-proc_checkdeadrefs(proc_t p)
+proc_checkdeadrefs(__unused proc_t p)
 {
-//#if __PROC_INTERNAL_DEBUG
+#if __PROC_INTERNAL_DEBUG
        if ((p->p_listflag  & P_LIST_INHASH) != 0)
                panic("proc being freed and still in hash %x: %x\n", (unsigned int)p, (unsigned int)p->p_listflag);
        if (p->p_childrencnt != 0)
@@ -559,7 +567,7 @@ proc_checkdeadrefs(proc_t p)
                panic("proc being freed and pending refcount %x:%x\n", (unsigned int)p, (unsigned int)p->p_refcount);
        if (p->p_parentref != 0)
                panic("proc being freed and pending parentrefs %x:%x\n", (unsigned int)p, (unsigned int)p->p_parentref);
-//#endif
+#endif
 }
 
 int
@@ -755,6 +763,18 @@ proc_is64bit(proc_t p)
        return(IS_64BIT_PROCESS(p));
 }
 
+int
+proc_pidversion(proc_t p)
+{
+       return(p->p_idversion);
+}
+
+int
+proc_getcdhash(proc_t p, unsigned char *cdhash)
+{
+       return vn_getcdhash(p->p_textvp, p->p_textoff, cdhash);
+}
+
 void
 bsd_set_dependency_capable(task_t task)
 {
@@ -1705,7 +1725,6 @@ csops(__unused proc_t p, struct csops_args *uap, __unused register_t *retval)
                        buf = (char *)kalloc(usize);
                        if (buf == NULL) 
                                return(ENOMEM);
-
                        bzero(buf, usize);
 
                        error = vnode_getwithvid(tvp, vid);
@@ -2456,7 +2475,8 @@ SYSCTL_INT(_vm, OID_AUTO, cs_force_hard, CTLFLAG_RW, &cs_force_hard, 0, "");
 SYSCTL_INT(_vm, OID_AUTO, cs_debug, CTLFLAG_RW, &cs_debug, 0, "");
 
 int
-cs_invalid_page(void)
+cs_invalid_page(
+       addr64_t vaddr)
 {
        struct proc     *p;
        int             retval;
@@ -2475,48 +2495,41 @@ cs_invalid_page(void)
        if (cs_force_hard)
                p->p_csflags |= CS_HARD;
 
-       if (p->p_csflags & CS_VALID) {
-               p->p_csflags &= ~CS_VALID;
-
+       /* CS_KILL triggers us to send a kill signal. Nothing else. */
+       if (p->p_csflags & CS_KILL) {
                proc_unlock(p);
-               cs_procs_invalidated++;
-               printf("CODE SIGNING: cs_invalid_page: "
-                      "p=%d[%s] clearing CS_VALID\n",
-                      p->p_pid, p->p_comm);
+               if (cs_debug) {
+                       printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+                              "p=%d[%s] honoring CS_KILL\n",
+                              vaddr, p->p_pid, p->p_comm);
+               }
+               cs_procs_killed++;
+               psignal(p, SIGKILL);
                proc_lock(p);
-
-
-               if (p->p_csflags & CS_KILL) {
-                       proc_unlock(p);
-                       if (cs_debug) {
-                               printf("CODE SIGNING: cs_invalid_page: "
-                                      "p=%d[%s] honoring CS_KILL\n",
-                                      p->p_pid, p->p_comm);
-                       }
-                       cs_procs_killed++;
-                       psignal(p, SIGKILL);
-                       proc_lock(p);
+       }
+       
+       /* CS_HARD means fail the mapping operation so the process stays valid. */
+       if (p->p_csflags & CS_HARD) {
+               proc_unlock(p);
+               if (cs_debug) {
+                       printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+                              "p=%d[%s] honoring CS_HARD\n",
+                              vaddr, p->p_pid, p->p_comm);
                }
-               
-               if (p->p_csflags & CS_HARD) {
+               retval = 1;
+       } else {
+               if (p->p_csflags & CS_VALID) {
+                       p->p_csflags &= ~CS_VALID;
+                       
                        proc_unlock(p);
-                       if (cs_debug) {
-                               printf("CODE SIGNING: cs_invalid_page: "
-                                      "p=%d[%s] honoring CS_HARD\n",
-                                      p->p_pid, p->p_comm);
-                       }
-                       retval = 1;
+                       cs_procs_invalidated++;
+                       printf("CODE SIGNING: cs_invalid_page(0x%llx): "
+                              "p=%d[%s] clearing CS_VALID\n",
+                              vaddr, p->p_pid, p->p_comm);
                } else {
                        proc_unlock(p);
-                       retval = 0;
-               }
-       } else {
-               proc_unlock(p);
-               if (cs_debug) {
-                       printf("CODE SIGNING: cs_invalid_page: "
-                              "p=%d[%s] ignored...\n",
-                              p->p_pid, p->p_comm);
                }
+               
                retval = 0;
        }
 
index 47c7983ca37848bff07696a21d03aebe5545ccac..46ab8bf1bce557f9ec7da33cf08d57b2a2c9618c 100644 (file)
@@ -1986,7 +1986,7 @@ set_security_token(proc_t p)
        audit_token.val[4] = my_cred->cr_rgid;
        audit_token.val[5] = p->p_pid;
        audit_token.val[6] = my_cred->cr_au.ai_asid;
-       audit_token.val[7] = my_cred->cr_au.ai_termid.port;
+       audit_token.val[7] = p->p_idversion;
 
 #if CONFIG_MACF_MACH
        mac_task_label_update_cred(my_cred, p->task);
index 105ece4278643f544468f883da20d975146493dd..b9b14ffb5e8c8ef723a6df43787031b5fabf8365 100644 (file)
@@ -2709,7 +2709,7 @@ bsd_ast(thread_t thread)
 
 }
 
-/* ptrace set runnalbe */
+/* ptrace set runnable */
 void
 pt_setrunnable(proc_t p)
 {
@@ -2723,7 +2723,9 @@ pt_setrunnable(proc_t p)
                proc_unlock(p);
                if (p->sigwait) {
                        wakeup((caddr_t)&(p->sigwait));
-                       task_release(task);
+                       if ((p->p_lflag & P_LSIGEXC) == 0) {    // 5878479
+                               task_release(task);
+                       }
                }
        }
 }
index 82f89c2deddc8035ce1a74c0585c0ab23170a49f..b8673b4bc0f862b19e35f5f1dba54b1c208f4d6d 100644 (file)
@@ -607,7 +607,9 @@ kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
                && !(name[0] == KERN_PROC
                        || name[0] == KERN_PROF 
                        || name[0] == KERN_KDEBUG
+#if !CONFIG_EMBEDDED
                        || name[0] == KERN_PROCARGS
+#endif
                        || name[0] == KERN_PROCARGS2
                        || name[0] == KERN_IPC
                        || name[0] == KERN_SYSV
@@ -635,9 +637,11 @@ kern_sysctl(int *name, u_int namelen, user_addr_t oldp, size_t *oldlenp,
 #endif
        case KERN_KDEBUG:
                return (kdebug_ops(name + 1, namelen - 1, oldp, oldlenp, p));
+#if !CONFIG_EMBEDDED
        case KERN_PROCARGS:
                /* new one as it does not use kinfo_proc */
                return (sysctl_procargs(name + 1, namelen - 1, oldp, oldlenp, p));
+#endif
        case KERN_PROCARGS2:
                /* new one as it does not use kinfo_proc */
                return (sysctl_procargs2(name + 1, namelen - 1, oldp, oldlenp, p));
@@ -2224,6 +2228,9 @@ static int
 sysctl_coredump
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
+#ifdef SECURE_KERNEL
+       return (ENOTSUP);
+#endif
        int new_value, changed;
        int error = sysctl_io_number(req, do_coredump, sizeof(int), &new_value, &changed);
        if (changed) {
@@ -2243,6 +2250,9 @@ static int
 sysctl_suid_coredump
 (__unused struct sysctl_oid *oidp, __unused void *arg1, __unused int arg2, struct sysctl_req *req)
 {
+#ifdef SECURE_KERNEL
+       return (ENOTSUP);
+#endif
        int new_value, changed;
        int error = sysctl_io_number(req, sugid_coredump, sizeof(int), &new_value, &changed);
        if (changed) {
index 9a4f954b90b90af2894491b69d5164c01a1c4e95..123339d3aacc15abbaaee7853b490d76f7fd51d3 100644 (file)
@@ -66,6 +66,7 @@
 #include <kern/kalloc.h>
 #include <kern/task.h>
 #include <kern/thread.h>
+#include <kern/page_decrypt.h>
 
 #include <mach-o/fat.h>
 #include <mach-o/loader.h>
@@ -162,6 +163,15 @@ int load_code_signature(
        cpu_type_t                      cputype,
        load_result_t                   *result);
        
+#if CONFIG_CODE_DECRYPTION
+static load_return_t
+set_code_unprotect(
+       struct encryption_info_command  *lcp,
+       caddr_t                         addr,
+       vm_map_t                        map,
+       struct vnode                    *vp);
+#endif
+
 static load_return_t
 load_unixthread(
        struct thread_command   *tcp,
@@ -436,7 +446,6 @@ parse_machfile(
                        kfree(kl_addr, kl_size);
                return(LOAD_IOERROR);
        }
-       /* (void)ubc_map(vp, PROT_EXEC); */ /* NOT HERE */
        
        /*
         *      Scan through the commands, processing each one as necessary.
@@ -551,6 +560,21 @@ parse_machfile(
                                        got_code_signatures = TRUE;
                                }
                                break;
+#if CONFIG_CODE_DECRYPTION
+                       case LC_ENCRYPTION_INFO:
+                               if (pass != 2)
+                                       break;
+                               ret = set_code_unprotect(
+                                       (struct encryption_info_command *) lcp,
+                                       addr, map, vp);
+                               if (ret != LOAD_SUCCESS) {
+                                       printf("proc %d: set unprotect error %d "
+                                              "for file \"%s\"\n",
+                                              p->p_pid, ret, vp->v_name);
+                                       ret = LOAD_SUCCESS; /* ignore error */
+                               }
+                               break;
+#endif
                        default:
                                /* Other commands are ignored by the kernel */
                                ret = LOAD_SUCCESS;
@@ -597,13 +621,10 @@ parse_machfile(
        if (kl_addr )
                kfree(kl_addr, kl_size);
 
-       if (ret == LOAD_SUCCESS)
-               (void)ubc_map(vp, PROT_READ | PROT_EXEC);
-               
        return(ret);
 }
 
-#ifdef __i386__
+#if CONFIG_CODE_DECRYPTION
 
 #define        APPLE_UNPROTECTED_HEADER_SIZE   (3 * PAGE_SIZE_64)
 
@@ -640,9 +661,14 @@ unprotect_segment_64(
                        map_size -= delta;
                }
                /* ... transform the rest of the mapping. */
+               struct pager_crypt_info crypt_info;
+               crypt_info.page_decrypt = dsmos_page_transform;
+               crypt_info.crypt_ops = NULL;
+               crypt_info.crypt_end = NULL;
                kr = vm_map_apple_protected(map,
                                            map_addr,
-                                           map_addr + map_size);
+                                           map_addr + map_size,
+                                           &crypt_info);
        }
 
        if (kr != KERN_SUCCESS) {
@@ -650,10 +676,10 @@ unprotect_segment_64(
        }
        return LOAD_SUCCESS;
 }
-#else  /* __i386__ */
+#else  /* CONFIG_CODE_DECRYPTION */
 #define unprotect_segment_64(file_off, file_size, map, map_addr, map_size) \
        LOAD_SUCCESS
-#endif /* __i386__ */
+#endif /* CONFIG_CODE_DECRYPTION */
 
 static
 load_return_t
@@ -1293,7 +1319,6 @@ load_dylinker(
        if (ret == LOAD_SUCCESS) {              
                result->dynlinker = TRUE;
                result->entry_point = myresult.entry_point;
-               (void)ubc_map(vp, PROT_READ | PROT_EXEC);
        }
 out:
        vnode_put(vp);
@@ -1316,6 +1341,7 @@ load_code_signature(
        int             resid;
        struct cs_blob  *blob;
        int             error;
+       vm_size_t       blob_size;
 
        addr = 0;
        blob = NULL;
@@ -1341,7 +1367,8 @@ load_code_signature(
                goto out;
        }
 
-       kr = kmem_alloc(kernel_map, &addr, round_page(lcp->datasize));
+       blob_size = lcp->datasize;
+       kr = ubc_cs_blob_allocate(&addr, &blob_size);
        if (kr != KERN_SUCCESS) {
                ret = LOAD_NOSPACE;
                goto out;
@@ -1383,13 +1410,117 @@ out:
                result->csflags |= blob->csb_flags;
        }
        if (addr != 0) {
-               kmem_free(kernel_map, addr, round_page(lcp->datasize));
+               ubc_cs_blob_deallocate(addr, blob_size);
                addr = 0;
        }
 
        return ret;
 }
 
+
+#if CONFIG_CODE_DECRYPTION
+
+static load_return_t
+set_code_unprotect(
+                  struct encryption_info_command *eip,
+                  caddr_t addr,        
+                  vm_map_t map,
+                  struct vnode *vp)
+{
+       int result, len;
+       char vpath[MAXPATHLEN];
+       pager_crypt_info_t crypt_info;
+       const char * cryptname = 0;
+       
+       size_t offset;
+       struct segment_command_64 *seg64;
+       struct segment_command *seg32;
+       vm_map_offset_t map_offset, map_size;
+       kern_return_t kr;
+       
+       switch(eip->cryptid) {
+               case 0:
+                       /* not encrypted, just an empty load command */
+                       return LOAD_SUCCESS;
+               case 1:
+                       cryptname="com.apple.unfree";
+                       break;
+               case 0x10:      
+                       /* some random cryptid that you could manually put into
+                        * your binary if you want NULL */
+                       cryptname="com.apple.null";
+                       break;
+               default:
+                       return LOAD_FAILURE;
+       }
+       
+       len = MAXPATHLEN;
+       result = vn_getpath(vp, vpath, &len);
+       if(result) return result;
+       
+       /* set up decrypter first */
+       if(NULL==text_crypter_create) return LOAD_FAILURE;
+       kr=text_crypter_create(&crypt_info, cryptname, (void*)vpath);
+       
+       if(kr) {
+               printf("set_code_unprotect: unable to find decrypter %s, kr=%d\n",
+                      cryptname, kr);
+               return LOAD_FAILURE;
+       }
+       
+       /* this is terrible, but we have to rescan the load commands to find the
+        * virtual address of this encrypted stuff. This code is gonna look like
+        * the dyld source one day... */
+       struct mach_header *header = (struct mach_header *)addr;
+       size_t mach_header_sz = sizeof(struct mach_header);
+       if (header->magic == MH_MAGIC_64 ||
+           header->magic == MH_CIGAM_64) {
+               mach_header_sz = sizeof(struct mach_header_64);
+       }
+       offset = mach_header_sz;
+       uint32_t ncmds = header->ncmds;
+       while (ncmds--) {
+               /*
+                *      Get a pointer to the command.
+                */
+               struct load_command *lcp = (struct load_command *)(addr + offset);
+               offset += lcp->cmdsize;
+               
+               switch(lcp->cmd) {
+                       case LC_SEGMENT_64:
+                               seg64 = (struct segment_command_64 *)lcp;
+                               if ((seg64->fileoff <= eip->cryptoff) &&
+                                   (seg64->fileoff+seg64->filesize >= 
+                                    eip->cryptoff+eip->cryptsize)) {
+                                       map_offset = seg64->vmaddr + eip->cryptoff - seg64->fileoff;
+                                       map_size = eip->cryptsize;
+                                       goto remap_now;
+                               }
+                       case LC_SEGMENT:
+                               seg32 = (struct segment_command *)lcp;
+                               if ((seg32->fileoff <= eip->cryptoff) &&
+                                   (seg32->fileoff+seg32->filesize >= 
+                                    eip->cryptoff+eip->cryptsize)) {
+                                       map_offset = seg32->vmaddr + eip->cryptoff - seg32->fileoff;
+                                       map_size = eip->cryptsize;
+                                       goto remap_now;
+                               }
+               }
+       }
+       
+       /* if we get here, did not find anything */
+       return LOAD_FAILURE;
+       
+remap_now:
+       /* now remap using the decrypter */
+       kr = vm_map_apple_protected(map, map_offset, map_offset+map_size, &crypt_info);
+       if(kr) printf("set_code_unprotect(): mapping failed with %x\n", kr);
+       
+       return LOAD_SUCCESS;
+}
+
+#endif
+
 /*
  * This routine exists to support the load_dylinker().
  *
index 9d5fb63d63cfbe0c81629910fd2880e92dd5e20e..c8912286e75556a0493d3bef3121e81a7ddba059 100644 (file)
@@ -203,7 +203,7 @@ mcache_init(void)
                    (btp->bt_bktsize + 1) * sizeof (void *), 0, 0, MCR_SLEEP);
        }
 
-       PE_parse_boot_arg("mcache_flags", &mcache_flags);
+       PE_parse_boot_argn("mcache_flags", &mcache_flags, sizeof (mcache_flags));
        mcache_flags &= MCF_FLAGS_MASK;
 
        mcache_audit_cache = mcache_create("audit", sizeof (mcache_audit_t),
index 9ccbc9a9e50a61d1829d32214f69d58aff844835..4ccfd04fcb4389e2958505b0ed98d402eb514215 100644 (file)
@@ -1001,20 +1001,21 @@ bsdthread_terminate(__unused struct proc *p, struct bsdthread_terminate_args  *u
 #if 0
        KERNEL_DEBUG_CONSTANT(0x9000084 |DBG_FUNC_START, (unsigned int)freeaddr, (unsigned int)freesize, (unsigned int)kthport, 0xff, 0);
 #endif
-       if (sem != MACH_PORT_NULL) {
-                kret = semaphore_signal_internal_trap(sem);
+       if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
+               kret = mach_vm_deallocate(current_map(), freeaddr, freesize);
                if (kret != KERN_SUCCESS) {
                        return(EINVAL);
                }
        }
-       if ((freesize != (mach_vm_size_t)0) && (freeaddr != (mach_vm_offset_t)0)) {
-               kret = mach_vm_deallocate(current_map(), freeaddr, freesize);
+       
+       (void) thread_terminate(current_thread());
+       if (sem != MACH_PORT_NULL) {
+                kret = semaphore_signal_internal_trap(sem);
                if (kret != KERN_SUCCESS) {
                        return(EINVAL);
                }
        }
        
-       (void) thread_terminate(current_thread());
        if (kthport != MACH_PORT_NULL)
                        mach_port_deallocate(get_task_ipcspace(current_task()), kthport);
        thread_exception_return();
@@ -1982,7 +1983,6 @@ wq_runitem(proc_t p, user_addr_t item, thread_t th, struct threadlist *tl,
 int
 setup_wqthread(proc_t p, thread_t th, user_addr_t item, int reuse_thread, struct threadlist *tl)
 {
-
 #if defined(__ppc__)
        /*
         * Set up PowerPC registers...
index af0e42341f6ead095a998148ae2c27a547b40063..def0a5aac4d6a820c6ed36109b320713faedb863 100644 (file)
@@ -484,6 +484,13 @@ log_dmesg(user_addr_t buffer, uint32_t buffersize, register_t * retval) {
                        continue;
                newl = ch == '\n';
                localbuff[i++] = ch;
+               /* The original version of this routine contained a buffer
+                * overflow. At the time, a "small" targeted fix was desired
+                * so the change below to check the buffer bounds was made.
+                * TODO: rewrite this needlessly convoluted routine.
+                */
+               if (i == (localbuff_size - 2))
+                       break;
        }
        if (!newl)
                localbuff[i++] = '\n';
index 10d694d4c19d4c6a334cdc38e8a0ed2abf6e79ed..cb5a66b78e8f8be2f9c0d06642c49d2a34958d62 100644 (file)
 359    ALL     { int nosys(void); } 
 #endif
 
+#if CONFIG_WORKQUEUE
 360    ALL     { user_addr_t bsdthread_create(user_addr_t func, user_addr_t func_arg, user_addr_t stack, user_addr_t pthread, uint32_t flags) NO_SYSCALL_STUB; } 
 361    ALL     { int bsdthread_terminate(user_addr_t stackaddr, size_t freesize, uint32_t port, uint32_t sem) NO_SYSCALL_STUB; } 
+#else
+360    ALL     { int nosys(void); } 
+361    ALL     { int nosys(void); } 
+#endif
+
 362    ALL     { int kqueue(void); } 
 363    ALL     { int kevent(int fd, const struct kevent *changelist, int nchanges, struct kevent *eventlist, int nevents, const struct timespec *timeout); } 
 364    ALL     { int lchown(user_addr_t path, uid_t owner, gid_t group); }
 365    ALL     { int stack_snapshot(pid_t pid, user_addr_t tracebuf, uint32_t tracebuf_size, uint32_t options) NO_SYSCALL_STUB; }
+
+#if CONFIG_WORKQUEUE
 366    ALL     { int bsdthread_register(user_addr_t threadstart, user_addr_t wqthread, int pthsize) NO_SYSCALL_STUB; } 
 367    ALL     { int workq_open(void) NO_SYSCALL_STUB; }
 368    ALL     { int workq_ops(int options, user_addr_t item, int prio) NO_SYSCALL_STUB; }
+#else
+366    ALL     { int nosys(void); } 
+367    ALL     { int nosys(void); } 
+368    ALL     { int nosys(void); } 
+#endif
+
 369    ALL     { int nosys(void); } 
 370    ALL     { int nosys(void); } 
 371    ALL     { int nosys(void); } 
index 43cd4543136d37aff54111c52143de58ab15a602..df8047ea8201c215bd9114c6248224b9ca2d04a0 100644 (file)
@@ -66,6 +66,8 @@
 
 #include <libkern/crypto/sha1.h>
 
+#include <security/mac_framework.h>
+
 /* XXX These should be in a BSD accessible Mach header, but aren't. */
 extern kern_return_t memory_object_pages_resident(memory_object_control_t,
                                                        boolean_t *);
@@ -217,7 +219,6 @@ CS_CodeDirectory *findCodeDirectory(
                 */
                cd = (const CS_CodeDirectory *) embedded;
        }
-
        if (cd &&
            cs_valid_range(cd, cd + 1, lower_bound, upper_bound) &&
            cs_valid_range(cd, (const char *) cd + ntohl(cd->length),
@@ -1936,6 +1937,10 @@ ubc_upl_commit_range(
        if (flags & UPL_COMMIT_FREE_ON_EMPTY)
                flags |= UPL_COMMIT_NOTIFY_EMPTY;
 
+       if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
        pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
 
        kr = upl_commit_range(upl, offset, size, flags,
@@ -2106,7 +2111,7 @@ UBCINFOEXISTS(struct vnode * vp)
 /*
  * CODE SIGNING
  */
-#define CS_BLOB_KEEP_IN_KERNEL 1
+#define CS_BLOB_PAGEABLE 0
 static volatile SInt32 cs_blob_size = 0;
 static volatile SInt32 cs_blob_count = 0;
 static SInt32 cs_blob_size_peak = 0;
@@ -2123,6 +2128,39 @@ SYSCTL_INT(_vm, OID_AUTO, cs_blob_count_peak, CTLFLAG_RD, &cs_blob_count_peak, 0
 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_peak, CTLFLAG_RD, &cs_blob_size_peak, 0, "Peak size of code signature blobs");
 SYSCTL_INT(_vm, OID_AUTO, cs_blob_size_max, CTLFLAG_RD, &cs_blob_size_max, 0, "Size of biggest code signature blob");
 
+kern_return_t
+ubc_cs_blob_allocate(
+       vm_offset_t     *blob_addr_p,
+       vm_size_t       *blob_size_p)
+{
+       kern_return_t   kr;
+
+#if CS_BLOB_PAGEABLE
+       *blob_size_p = round_page(*blob_size_p);
+       kr = kmem_alloc(kernel_map, blob_addr_p, *blob_size_p);
+#else  /* CS_BLOB_PAGEABLE */
+       *blob_addr_p = (vm_offset_t) kalloc(*blob_size_p);
+       if (*blob_addr_p == 0) {
+               kr = KERN_NO_SPACE;
+       } else {
+               kr = KERN_SUCCESS;
+       }
+#endif /* CS_BLOB_PAGEABLE */
+       return kr;
+}
+
+void
+ubc_cs_blob_deallocate(
+       vm_offset_t     blob_addr,
+       vm_size_t       blob_size)
+{
+#if CS_BLOB_PAGEABLE
+       kmem_free(kernel_map, blob_addr, blob_size);
+#else  /* CS_BLOB_PAGEABLE */
+       kfree((void *) blob_addr, blob_size);
+#endif /* CS_BLOB_PAGEABLE */
+}
+       
 int
 ubc_cs_blob_add(
        struct vnode    *vp,
@@ -2148,6 +2186,7 @@ ubc_cs_blob_add(
                return ENOMEM;
        }
 
+#if CS_BLOB_PAGEABLE
        /* get a memory entry on the blob */
        blob_size = (memory_object_size_t) size;
        kr = mach_make_memory_entry_64(kernel_map,
@@ -2168,7 +2207,10 @@ ubc_cs_blob_add(
                error = EINVAL;
                goto out;
        }
-
+#else
+       blob_size = (memory_object_size_t) size;
+       blob_handle = IPC_PORT_NULL;
+#endif
 
        /* fill in the new blob */
        blob->csb_cpu_type = cputype;
@@ -2177,7 +2219,6 @@ ubc_cs_blob_add(
        blob->csb_mem_offset = 0;
        blob->csb_mem_handle = blob_handle;
        blob->csb_mem_kaddr = addr;
-
        
        /*
         * Validate the blob's contents
@@ -2207,7 +2248,15 @@ ubc_cs_blob_add(
                SHA1Final(blob->csb_sha1, &sha1ctxt);
        }
 
-
+       /* 
+        * Let policy module check whether the blob's signature is accepted.
+        */
+#if CONFIG_MACF
+       error = mac_vnode_check_signature(vp, blob->csb_sha1, (void*)addr, size);
+       if (error) 
+               goto out;
+#endif 
+       
        /*
         * Validate the blob's coverage
         */
@@ -2328,10 +2377,6 @@ ubc_cs_blob_add(
                       blob->csb_flags);
        }
 
-#if !CS_BLOB_KEEP_IN_KERNEL
-       blob->csb_mem_kaddr = 0;
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
-
        vnode_unlock(vp);
 
        error = 0;      /* success ! */
@@ -2347,10 +2392,6 @@ out:
                        mach_memory_entry_port_release(blob_handle);
                        blob_handle = IPC_PORT_NULL;
                }
-       } else {
-#if !CS_BLOB_KEEP_IN_KERNEL
-               kmem_free(kernel_map, addr, size);
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
        }
 
        if (error == EAGAIN) {
@@ -2363,7 +2404,7 @@ out:
                /*
                 * Since we're not failing, consume the data we received.
                 */
-               kmem_free(kernel_map, addr, size);
+               ubc_cs_blob_deallocate(addr, size);
        }
 
        return error;
@@ -2421,12 +2462,13 @@ ubc_cs_free(
             blob = next_blob) {
                next_blob = blob->csb_next;
                if (blob->csb_mem_kaddr != 0) {
-                       kmem_free(kernel_map,
-                                 blob->csb_mem_kaddr,
-                                 blob->csb_mem_size);
+                       ubc_cs_blob_deallocate(blob->csb_mem_kaddr,
+                                              blob->csb_mem_size);
                        blob->csb_mem_kaddr = 0;
                }
-               mach_memory_entry_port_release(blob->csb_mem_handle);
+               if (blob->csb_mem_handle != IPC_PORT_NULL) {
+                       mach_memory_entry_port_release(blob->csb_mem_handle);
+               }
                blob->csb_mem_handle = IPC_PORT_NULL;
                OSAddAtomic(-1, &cs_blob_count);
                OSAddAtomic(-blob->csb_mem_size, &cs_blob_size);
@@ -2537,9 +2579,6 @@ cs_validate_page(
                            cd->hashType != 0x1 ||
                            cd->hashSize != SHA1_RESULTLEN) {
                                /* bogus blob ? */
-#if !CS_BLOB_KEEP_IN_KERNEL
-                               kmem_free(kernel_map, kaddr, ksize);
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
                                continue;
                        }
                            
@@ -2549,9 +2588,6 @@ cs_validate_page(
                        if (offset < start_offset ||
                            offset >= end_offset) {
                                /* our page is not covered by this blob */
-#if !CS_BLOB_KEEP_IN_KERNEL
-                               kmem_free(kernel_map, kaddr, ksize);
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
                                continue;
                        }
 
@@ -2564,11 +2600,6 @@ cs_validate_page(
                                found_hash = TRUE;
                        }
 
-#if !CS_BLOB_KEEP_IN_KERNEL
-                       /* we no longer need that blob in the kernel map */
-                       kmem_free(kernel_map, kaddr, ksize);
-#endif /* CS_BLOB_KEEP_IN_KERNEL */
-
                        break;
                }
        }
@@ -2591,9 +2622,9 @@ cs_validate_page(
                validated = FALSE;
                *tainted = FALSE;
        } else {
-               const uint32_t *asha1, *esha1;
 
                size = PAGE_SIZE;
+               const uint32_t *asha1, *esha1;
                if (offset + size > codeLimit) {
                        /* partial page at end of segment */
                        assert(offset < codeLimit);
@@ -2601,7 +2632,7 @@ cs_validate_page(
                }
                /* compute the actual page's SHA1 hash */
                SHA1Init(&sha1ctxt);
-               SHA1Update(&sha1ctxt, data, size);
+               SHA1UpdateUsePhysicalAddress(&sha1ctxt, data, size);
                SHA1Final(actual_hash, &sha1ctxt);
 
                asha1 = (const uint32_t *) actual_hash;
index aba70cc665a3f041f5739ac59fb6b9f6cc34de9e..1784e5f1d7a4893487fc5a9e5239221e50899622 100644 (file)
@@ -1026,7 +1026,7 @@ mbinit(void)
        VERIFY(slabstbl != NULL);
 
        /* Allocate audit structures if needed */
-       PE_parse_boot_arg("mbuf_debug", &mbuf_debug);
+       PE_parse_boot_argn("mbuf_debug", &mbuf_debug, sizeof (mbuf_debug));
        mbuf_debug |= mcache_getflags();
        if (mbuf_debug & MCF_AUDIT) {
                MALLOC(mclaudit, mcl_audit_t *,
@@ -1051,7 +1051,7 @@ mbinit(void)
        embutl = (union mcluster *)
            ((unsigned char *)mbutl + (nmbclusters * MCLBYTES));
 
-       PE_parse_boot_arg("initmcl", &initmcl);
+       PE_parse_boot_argn("initmcl", &initmcl, sizeof (initmcl));
 
        lck_mtx_lock(mbuf_mlock);
 
index 7b259ec9f657e36a049d8383732bf7f5118088c7..57dff6de9dd12be195a4de850448027ab0b83818 100644 (file)
@@ -245,7 +245,7 @@ socketinit(void)
                return;
        }
 
-       PE_parse_boot_arg("socket_debug", &socket_debug);
+       PE_parse_boot_argn("socket_debug", &socket_debug, sizeof (socket_debug));
 
        /*
         * allocate lock group attribute and group for socket cache mutex
index 5a44dee4fb498701a9666b26fb4cfcb6f17b6101..b8984ac80c4b5eb6b5486be9d554daa1450cb2e9 100644 (file)
@@ -54,9 +54,7 @@ DATAFILES = \
        fremovexattr.2          \
        fsetxattr.2             \
        fstat.2                 \
-       fstat64.2               \
        fstatfs.2               \
-       fstatfs64.2             \
        fsync.2                 \
        ftruncate.2             \
        futimes.2               \
@@ -102,7 +100,6 @@ DATAFILES = \
        listxattr.2             \
        lseek.2                 \
        lstat.2                 \
-       lstat64.2               \
        madvise.2               \
        mincore.2               \
        minherit.2              \
@@ -181,9 +178,7 @@ DATAFILES = \
        socket.2                \
        socketpair.2            \
        stat.2                  \
-       stat64.2                \
        statfs.2                \
-       statfs64.2              \
        symlink.2               \
        sync.2                  \
        syscall.2               \
diff --git a/bsd/man/man2/fstat64.2 b/bsd/man/man2/fstat64.2
deleted file mode 100644 (file)
index b1a86c1..0000000
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
diff --git a/bsd/man/man2/fstatfs64.2 b/bsd/man/man2/fstatfs64.2
deleted file mode 100644 (file)
index 923d3c0..0000000
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/statfs.2
diff --git a/bsd/man/man2/lstat64.2 b/bsd/man/man2/lstat64.2
deleted file mode 100644 (file)
index 4fe4fb4..0000000
+++ /dev/null
@@ -1,2 +0,0 @@
-.so man2/stat.2
-
index 0c9f062e007edbecac17df90ac024a6e05e09b7f..53704ece88aea98009ed92e37c4279377c130d08 100644 (file)
 .Os BSD 4
 .Sh NAME
 .Nm fstat ,
-.Nm fstat64 ,
 .Nm lstat ,
-.Nm lstat64 ,
-.Nm stat ,
-.Nm stat64
+.Nm stat
 .Nd get file status
 .Sh SYNOPSIS
 .Fd #include <sys/stat.h>
 .Fa "struct stat *buf"
 .Fc
 .Ft int
-.Fo fstat64
-.Fa "int fildes"
-.Fa "struct stat64 *buf"
-.Fc
-.Ft int
 .Fo lstat
 .Fa "const char *restrict path"
 .Fa "struct stat *restrict buf"
 .Fc
 .Ft int
-.Fo lstat64
-.Fa "const char *restrict path"
-.Fa "struct stat64 *restrict buf"
-.Fc
-.Ft int
 .Fo stat
 .Fa "const char *restrict path"
 .Fa "struct stat *restrict buf"
 .Fc
-.Ft int
-.Fo stat64
-.Fa "const char *restrict path"
-.Fa "struct stat64 *restrict buf"
-.Fc
 .Sh DESCRIPTION
 The
 .Fn stat
-family of functions and their 64 bit variants obtain information about a file. The 
+family of functions obtain information about a file. The 
 .Fn stat 
 function obtains information about the file pointed to by
 .Fa path .
@@ -116,38 +98,16 @@ The
 .Fa buf
 argument is a pointer to a
 .Fa stat
-or
-.Fa stat64
 structure 
 as defined by
 .Aq Pa sys/stat.h
-(both shown below)
 and into which information is placed concerning the file.
 .Bd -literal
 struct stat {
-    dev_t    st_dev;    /* device inode resides on */
-    ino_t    st_ino;    /* inode's number */
-    mode_t   st_mode;   /* inode protection mode */
-    nlink_t  st_nlink;  /* number or hard links to the file */
-    uid_t    st_uid;    /* user-id of owner */
-    gid_t    st_gid;    /* group-id of owner */
-    dev_t    st_rdev;   /* device type, for special file inode */
-    struct timespec st_atimespec;  /* time of last access */
-    struct timespec st_mtimespec;  /* time of last data modification */
-    struct timespec st_ctimespec;  /* time of last file status change */
-    off_t    st_size;   /* file size, in bytes */
-    quad_t   st_blocks; /* blocks allocated for file */
-    u_long   st_blksize;/* optimal file sys I/O ops blocksize */
-    u_long   st_flags;  /* user defined flags for file */
-    u_long   st_gen;    /* file generation number */
-};
-
-
-struct stat64 {
     dev_t           st_dev;           /* ID of device containing file */
     mode_t          st_mode;          /* Mode of file (see below) */
     nlink_t         st_nlink;         /* Number of hard links */
-    ino64_t        st_ino;          /* File serial number */
+    ino_t          st_ino;           /* File serial number */
     uid_t           st_uid;           /* User ID of the file */
     gid_t           st_gid;           /* Group ID of the file */
     dev_t           st_rdev;          /* Device ID */
@@ -169,8 +129,6 @@ struct stat64 {
 .Pp
 The time-related fields of
 .Fa struct stat
-and
-.Fa struct stat64
 are as follows:
 .Bl -tag -width XXXst_birthtime
 .It st_atime
@@ -203,9 +161,8 @@ and
 .Xr write 2
 system calls.
 .It st_birthtime
-Time of file creation. Only set once when the file is created. This field is 
-only available in the 64 bit variants. On filesystems where birthtime is 
-not available, this field holds the
+Time of file creation. Only set once when the file is created.
+On filesystems where birthtime is not available, this field holds the
 .Fa ctime
 instead.
 .El
@@ -343,23 +300,6 @@ in the structure pointed to by
 The file generation number,
 .Fa st_gen ,
 is only available to the super-user.
-.br
-The fields in the stat structure currently marked
-.Fa st_spare1 ,
-.Fa st_spare2 ,
-and
-.Fa st_spare3
-are present in preparation for inode time stamps expanding
-to 64 bits.  This, however, can break certain programs that
-depend on the time stamps being contiguous (in calls to
-.Xr utimes 2 ) .
-.Sh LEGACY SYNOPSIS
-.Fd #include <sys/types.h>
-.Fd #include <sys/stat.h>
-.Pp
-The include file
-.In sys/types.h
-is necessary.
 .Sh SEE ALSO
 .Xr chflags 2 ,
 .Xr chmod 2 ,
@@ -386,9 +326,3 @@ An
 .Fn lstat
 function call appeared in
 .Bx 4.2 .
-The 
-.Fn stat64 ,
-.Fn fstat64 ,
-and
-.Fn lstat64
-system calls first appeared in Mac OS X 10.5 (Leopard).
diff --git a/bsd/man/man2/stat64.2 b/bsd/man/man2/stat64.2
deleted file mode 100644 (file)
index b1a86c1..0000000
+++ /dev/null
@@ -1 +0,0 @@
-.so man2/stat.2
index 1ababf3f81c12db33abfbabbc62d63eb0ddbe193..4b6a3db354256877612957d9779c54fea7fd5652 100644 (file)
@@ -38,9 +38,7 @@
 .Os
 .Sh NAME
 .Nm statfs,
-.Nm statfs64,
-.Nm fstatfs,
-.Nm fstatfs64
+.Nm fstatfs
 .Nd get file system statistics
 .Sh SYNOPSIS
 .Fd #include <sys/param.h>
 .Ft int
 .Fn statfs "const char *path" "struct statfs *buf"
 .Ft int
-.Fn statfs64 "const char *path" "struct statfs64 *buf"
-.Ft int
 .Fn fstatfs "int fd" "struct statfs *buf"
-.Ft int
-.Fn fstatfs64 "int fd" "struct statfs64 *buf"
 .Sh DESCRIPTION
 .Fn Statfs
 returns information about a mounted file system.
@@ -61,41 +55,14 @@ is the path name of any file within the mounted file system.
 .Fa Buf
 is a pointer to a 
 .Fa statfs
-or
-.Fa statfs64
 structure defined as follows:
 .Bd -literal
 typedef struct { int32_t val[2]; } fsid_t;
 
-#define MFSNAMELEN      15 /* length of fs type name, not inc. nul */
-#define MNAMELEN        90 /* length of buffer for returned name */
 #define MFSTYPENAMELEN  16 /* length of fs type name including null */
 #define MAXPATHLEN      1024   
 
 struct statfs {
-    short   f_otype;   /* type of file system (reserved: zero) */
-    short   f_oflags;  /* copy of mount flags (reserved: zero) */
-    long    f_bsize;   /* fundamental file system block size */
-    long    f_iosize;  /* optimal transfer block size */
-    long    f_blocks;  /* total data blocks in file system */
-    long    f_bfree;   /* free blocks in fs */
-    long    f_bavail;  /* free blocks avail to non-superuser */
-    long    f_files;   /* total file nodes in file system */
-    long    f_ffree;   /* free file nodes in fs */
-    fsid_t  f_fsid;    /* file system id */
-    uid_t   f_owner;   /* user that mounted the file system */
-    short   f_reserved1;       /* reserved for future use */
-    short   f_type;    /* type of file system (reserved) */
-    long    f_flags;   /* copy of mount flags (reserved) */
-    long    f_reserved2[2];    /* reserved for future use */
-    char    f_fstypename[MFSNAMELEN]; /* fs type name */
-    char    f_mntonname[MNAMELEN];    /* directory on which mounted */
-    char    f_mntfromname[MNAMELEN];  /* mounted file system */
-    char    f_reserved3;       /* reserved for future use */
-    long    f_reserved4[4];    /* reserved for future use */
-};
-
-struct statfs64 {
     uint32_t        f_bsize;        /* fundamental file system block size */ 
     int32_t         f_iosize;       /* optimal transfer block size */ 
     uint64_t        f_blocks;       /* total data blocks in file system */ 
@@ -223,8 +190,4 @@ error occurred while reading from or writing to the file system.
 .Sh HISTORY
 The
 .Fn statfs
-function first appeared in 4.4BSD. The
-.Fn statfs64
-and 
-.Fn fstatfs64
-first appeared in Max OS X 10.5 (Leopard).
+function first appeared in 4.4BSD.
diff --git a/bsd/man/man2/statfs64.2 b/bsd/man/man2/statfs64.2
deleted file mode 100644 (file)
index 3a64852..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-.so man2/statfs.2
-
-
index b33c223c0b8cd3fbb72a33d5857e60e63afa1a18..891e5db37f2d41fc66332015663cbda1f851bf3e 100644 (file)
@@ -88,32 +88,25 @@ and further in the file
 .Bd -literal
 /*** Excerpt from <sys/dirent.h> ***/
 /*
- * The dirent structure defines the format of directory entries returned by 
- * the getdirentries(2) system call.
+ * The dirent structure defines the format of directory entries.
  *
  * A directory entry has a struct dirent at the front of it, containing its
  * inode number, the length of the entry, and the length of the name
  * contained in the entry.  These are followed by the name padded to a 4
  * byte boundary with null bytes.  All names are guaranteed null terminated.
- * The maximum length of a name in a directory is MAXNAMLEN.
- * The dirent structure defines the format of directory entries returned by 
- * the getdirentries(2) system call.
+ * The maximum length of a name in a directory is MAXPATHLEN.
  */
 
 #ifndef _SYS_DIRENT_H 
 #define _SYS_DIRENT_H 
 
 struct dirent {
-        u_int32_t d_fileno;             /* file number of entry */
+        ino_t     d_ino;                /* file number of entry */
+        u_int64_t d_seekoff;            /* length of this record */
         u_int16_t d_reclen;             /* length of this record */
+        u_int16_t d_namlen;             /* length of string in d_name */
         u_int8_t  d_type;               /* file type, see below */
-        u_int8_t  d_namlen;             /* length of string in d_name */
-#ifdef _POSIX_SOURCE
-        char    d_name[255 + 1];        /* name must be no longer than this */
-#else
-#define MAXNAMLEN       255
-        char    d_name[MAXNAMLEN + 1];  /* name must be no longer than this */
-#endif
+        char      d_name[MAXPATHLEN];   /* name must be no longer than this */
 };
 
 /*
@@ -139,51 +132,26 @@ struct dirent {
 #ifndef _DIRENT_H 
 #define _DIRENT_H 
 
-#ifdef _POSIX_SOURCE
-typedef void *  DIR;
-#else
-
-#define d_ino           d_fileno        /* backward compatibility */
-
 /* definitions for library routines operating on directories. */
 #define DIRBLKSIZ       1024
 
 struct _telldir;                /* see telldir.h */
 
 /* structure describing an open directory. */
-typedef struct _dirdesc {
-        int     dd_fd;          /* file descriptor associated with directory */
-        long    dd_loc;         /* offset in current buffer */
-        long    dd_size;        /* amount of data returned by getdirentries */
-        char    *dd_buf;        /* data buffer */
-        int     dd_len;         /* size of data buffer */
-        long    dd_seek;        /* magic cookie returned by getdirentries */
-        long    dd_rewind;      /* magic cookie for rewinding */
-        int     dd_flags;       /* flags for readdir */
-        pthread_mutex_t dd_lock; /* for thread locking */
-        struct _telldir *dd_td; /* telldir position recording */
+typedef struct {
+        int     __dd_fd;        /* file descriptor associated with directory */
+        long    __dd_loc;       /* offset in current buffer */
+        long    __dd_size;      /* amount of data returned by getdirentries */
+        char    *__dd_buf;      /* data buffer */
+        int     __dd_len;       /* size of data buffer */
+        long    __dd_seek;      /* magic cookie returned by getdirentries */
+        long    __dd_rewind;    /* magic cookie for rewinding */
+        int     __dd_flags;     /* flags for readdir */
+        pthread_mutex_t __dd_lock; /* for thread locking */
+        struct _telldir *__dd_td; /* telldir position recording */
 } DIR;
 
-#define dirfd(dirp)     ((dirp)->dd_fd)
-
-/* flags for opendir2 */
-#define DTF_HIDEW       0x0001  /* hide whiteout entries */
-#define DTF_NODUP       0x0002  /* don't return duplicate names */
-/* structure describing an open directory. */
-typedef struct _dirdesc {
-        int     dd_fd;          /* file descriptor associated with directory */
-        long    dd_loc;         /* offset in current buffer */
-        long    dd_size;        /* amount of data returned by getdirentries */
-        char    *dd_buf;        /* data buffer */
-        int     dd_len;         /* size of data buffer */
-        long    dd_seek;        /* magic cookie returned by getdirentries */
-        long    dd_rewind;      /* magic cookie for rewinding */
-        int     dd_flags;       /* flags for readdir */
-        pthread_mutex_t dd_lock; /* for thread locking */
-        struct _telldir *dd_td; /* telldir position recording */
-} DIR;
-
-#define dirfd(dirp)     ((dirp)->dd_fd)
+#define dirfd(dirp)     ((dirp)->__dd_fd)
 
 /* flags for opendir2 */
 #define DTF_HIDEW       0x0001  /* hide whiteout entries */
@@ -191,12 +159,6 @@ typedef struct _dirdesc {
 #define DTF_REWIND      0x0004  /* rewind after reading union stack */
 #define __DTF_READALL   0x0008  /* everything has been read */
 
-#ifndef NULL
-#define NULL    0
-#endif
-
-#endif /* _POSIX_SOURCE */
-
 #endif /* !_DIRENT_H_ */
 .Ed
 .Sh SEE ALSO
index 8226db1999a76c3140b63001780be5caaade0cc2..9f030a8650d36f407c34579d48a88a0d1a6e421b 100644 (file)
@@ -93,7 +93,7 @@ typedef       long *  qaddr_t;        /* should be typedef quad * qaddr_t; */
 
 typedef        long    daddr_t;
 typedef        char *  caddr_t;
-typedef        u_long  ino_t;
+typedef        u_int64_t ino_t;
 typedef        long    swblk_t;
 typedef        long    segsz_t;
 typedef        long    off_t;
index d9aedf798df57c93e32d49946810b4e01c335bb7..6c7f5af9dab6ca481251b61d93628bb9e5a12fcb 100644 (file)
@@ -565,14 +565,19 @@ devfsspec_close(struct vnop_close_args *ap)
        struct vnode *          vp = ap->a_vp;
        register devnode_t *    dnp;
        struct timeval now;
+       int ref = 1;
 
-       if (vnode_isinuse(vp, 1)) {
+       if (vp->v_type == VBLK)
+               ref = 0;
+
+       if (vnode_isinuse(vp, ref)) {
            DEVFS_LOCK();
            microtime(&now);
            dnp = VTODN(vp);
            dn_times(dnp, &now, &now, &now);
            DEVFS_UNLOCK();
        }
+
        return (VOCALL (spec_vnodeop_p, VOFFSET(vnop_close), ap));
 }
 
index 752afe8df04fe8f4e47094aeaca7ec796f46ee2c..aad1b0250e1d9f5cad5ca0c166c6c54ef09c82db 100644 (file)
@@ -590,7 +590,6 @@ spec_fsync(struct vnop_fsync_args *ap)
  */
 extern int hard_throttle_on_root;
 void IOSleep(int);
-extern void throttle_lowpri_io(int *lowpri_window,mount_t v_mount);
 
 // the low priority process may wait for at most LOWPRI_MAX_DELAY millisecond
 #define LOWPRI_INITIAL_WINDOW_MSECS 100
@@ -599,6 +598,12 @@ extern void throttle_lowpri_io(int *lowpri_window,mount_t v_mount);
 #define LOWPRI_MAX_WAITING_MSECS 200
 #define LOWPRI_SLEEP_INTERVAL 5
 
+struct _throttle_io_info_t {
+       struct timeval  last_normal_IO_timestamp;
+       SInt32 numthreads_throttling;
+};
+
+struct _throttle_io_info_t _throttle_io_info[LOWPRI_MAX_NUM_DEV];
 int    lowpri_IO_initial_window_msecs  = LOWPRI_INITIAL_WINDOW_MSECS;
 int    lowpri_IO_window_msecs_inc  = LOWPRI_WINDOW_MSECS_INC;
 int    lowpri_max_window_msecs  = LOWPRI_MAX_WINDOW_MSECS;
@@ -609,40 +614,74 @@ SYSCTL_INT(_debug, OID_AUTO, lowpri_IO_window_inc, CTLFLAG_RW, &lowpri_IO_window
 SYSCTL_INT(_debug, OID_AUTO, lowpri_max_window_msecs, CTLFLAG_RW, &lowpri_max_window_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
 SYSCTL_INT(_debug, OID_AUTO, lowpri_max_waiting_msecs, CTLFLAG_RW, &lowpri_max_waiting_msecs, LOWPRI_INITIAL_WINDOW_MSECS, "");
 
-void throttle_lowpri_io(int *lowpri_window,mount_t v_mount)
+int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit)
 {
-       int i;
-       struct timeval last_lowpri_IO_timestamp,last_normal_IO_timestamp;
        struct timeval elapsed;
-       int lowpri_IO_window_msecs;
-       struct timeval lowpri_IO_window;
-       int max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL;
+       int elapsed_msecs;
 
-       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
-                    *lowpri_window, 0, 0, 0, 0);
+       microuptime(&elapsed);
+       timevalsub(&elapsed, &_throttle_io_info[devbsdunit].last_normal_IO_timestamp);
+       elapsed_msecs = elapsed.tv_sec * 1000 + elapsed.tv_usec / 1000;
 
-        last_normal_IO_timestamp = v_mount->last_normal_IO_timestamp;
-                        
-       for (i=0; i<max_try_num; i++) {
-               microuptime(&last_lowpri_IO_timestamp);
+       if (lowpri_window_msecs == -1) // use the max waiting time
+               lowpri_window_msecs = lowpri_max_waiting_msecs;
 
-               elapsed = last_lowpri_IO_timestamp;
-               timevalsub(&elapsed, &last_normal_IO_timestamp);
+       return elapsed_msecs < lowpri_window_msecs;
+}
 
-               lowpri_IO_window_msecs = *lowpri_window;
-               lowpri_IO_window.tv_sec  = lowpri_IO_window_msecs / 1000;
-               lowpri_IO_window.tv_usec = (lowpri_IO_window_msecs % 1000) * 1000;
+void throttle_lowpri_io(boolean_t ok_to_sleep)
+{
+       int i;
+       int max_try_num;
+       struct uthread *ut;
 
-               if (timevalcmp(&elapsed, &lowpri_IO_window, <)) {
-                       IOSleep(LOWPRI_SLEEP_INTERVAL);
-               } else {
-                       break;
+       ut = get_bsdthread_info(current_thread());
+
+       if (ut->uu_lowpri_window == 0)
+               return;
+
+       max_try_num = lowpri_max_waiting_msecs / LOWPRI_SLEEP_INTERVAL * MAX(1, _throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+
+       KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_START,
+                    ut->uu_lowpri_window, 0, 0, 0, 0);
+
+       if (ok_to_sleep == TRUE) {
+               for (i=0; i<max_try_num; i++) {
+                       if (throttle_io_will_be_throttled(ut->uu_lowpri_window, ut->uu_devbsdunit)) {
+                               IOSleep(LOWPRI_SLEEP_INTERVAL);
+                       } else {
+                               break;
+                       }
                }
        }
-
        KERNEL_DEBUG((FSDBG_CODE(DBG_FSRW, 97)) | DBG_FUNC_END,
-                    *lowpri_window, i*5, 0, 0, 0);
-       *lowpri_window = 0;
+                    ut->uu_lowpri_window, i*5, 0, 0, 0);
+       SInt32 oldValue;
+       oldValue = OSDecrementAtomic(&_throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+       ut->uu_lowpri_window = 0;
+
+       if (oldValue <= 0) {
+               panic("%s: numthreads negative", __func__);
+       }
+}
+
+int throttle_get_io_policy(struct uthread **ut)
+{
+       int policy = IOPOL_DEFAULT;
+       proc_t p = current_proc();
+
+       *ut = get_bsdthread_info(current_thread());
+               
+       if (p != NULL)
+               policy = p->p_iopol_disk;
+
+       if (*ut != NULL) {
+               // the I/O policy of the thread overrides that of the process
+               // unless the I/O policy of the thread is default
+               if ((*ut)->uu_iopol_disk != IOPOL_DEFAULT)
+                       policy = (*ut)->uu_iopol_disk;
+       }
+       return policy;
 }
 
 int
@@ -677,23 +716,14 @@ spec_strategy(struct vnop_strategy_args *ap)
                hard_throttle_on_root = 1;
 
        if (lowpri_IO_initial_window_msecs) {
-               proc_t  p;
                struct uthread  *ut;
-               int policy = IOPOL_DEFAULT;
+               int policy;
                int is_throttleable_io = 0;
                int is_passive_io = 0;
-               p = current_proc();
-               ut = get_bsdthread_info(current_thread());
-               
-               if (p != NULL)
-                       policy = p->p_iopol_disk;
-
-               if (ut != NULL) {
-                       // the I/O policy of the thread overrides that of the process
-                       // unless the I/O policy of the thread is default
-                       if (ut->uu_iopol_disk != IOPOL_DEFAULT)
-                               policy = ut->uu_iopol_disk;
-               }
+               size_t devbsdunit;
+               SInt32 oldValue;
+
+               policy = throttle_get_io_policy(&ut);
 
                switch (policy) {
                case IOPOL_DEFAULT:
@@ -713,9 +743,13 @@ spec_strategy(struct vnop_strategy_args *ap)
                if (!is_throttleable_io && ISSET(bflags, B_PASSIVE))
                    is_passive_io |= 1;
 
+               if (buf_vnode(bp)->v_mount != NULL)
+                       devbsdunit = buf_vnode(bp)->v_mount->mnt_devbsdunit;
+               else
+                       devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
                if (!is_throttleable_io) {
-                       if (!is_passive_io && buf_vnode(bp)->v_mount != NULL){
-                               microuptime(&(buf_vnode(bp)->v_mount->last_normal_IO_timestamp));
+                       if (!is_passive_io){
+                               microuptime(&_throttle_io_info[devbsdunit].last_normal_IO_timestamp);
                        }
                } else {
                        /*
@@ -728,14 +762,25 @@ spec_strategy(struct vnop_strategy_args *ap)
                         * do the delay just before we return from the system
                         * call that triggered this I/O or from vnode_pagein
                         */
-                       if(buf_vnode(bp)->v_mount != NULL)
-                                ut->v_mount = buf_vnode(bp)->v_mount;
                        if (ut->uu_lowpri_window == 0) {
+                               ut->uu_devbsdunit = devbsdunit;
+                               oldValue = OSIncrementAtomic(&_throttle_io_info[devbsdunit].numthreads_throttling);
+                               if (oldValue < 0) {
+                                       panic("%s: numthreads negative", __func__);
+                               }
                                ut->uu_lowpri_window = lowpri_IO_initial_window_msecs;
+                               ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * oldValue;
                        } else {
-                               ut->uu_lowpri_window += lowpri_IO_window_msecs_inc;
-                               if (ut->uu_lowpri_window > lowpri_max_window_msecs)
-                                       ut->uu_lowpri_window = lowpri_max_window_msecs;
+                               if (ut->uu_devbsdunit != devbsdunit) { // the thread sends I/Os to different devices within the same system call
+                                       // keep track of the numthreads in the right device
+                                       OSDecrementAtomic(&_throttle_io_info[ut->uu_devbsdunit].numthreads_throttling);
+                                       OSIncrementAtomic(&_throttle_io_info[devbsdunit].numthreads_throttling);
+                                       ut->uu_devbsdunit = devbsdunit;
+                               }
+                               int numthreads = MAX(1, _throttle_io_info[devbsdunit].numthreads_throttling);
+                               ut->uu_lowpri_window += lowpri_IO_window_msecs_inc * numthreads;
+                               if (ut->uu_lowpri_window > lowpri_max_window_msecs * numthreads)
+                                       ut->uu_lowpri_window = lowpri_max_window_msecs * numthreads;
                        }
                }
        }
@@ -827,7 +872,7 @@ spec_close(struct vnop_close_args *ap)
                 * sum of the reference counts on all the aliased
                 * vnodes descends to one, we are on last close.
                 */
-               if (vcount(vp) > 1)
+               if (vcount(vp) > 0)
                        return (0);
 #else /* DEVFS_IMPLEMENTS_LOCKING */
                /*
@@ -837,7 +882,7 @@ spec_close(struct vnop_close_args *ap)
                 * sum of the reference counts on all the aliased
                 * vnodes descends to one, we are on last close.
                 */
-               if (vcount(vp) > 1)
+               if (vcount(vp) > 0)
                        return (0);
 
                /*
index e3b16f486462417c600feacbe27bf90bab241be2..d38346b0c881f751e23b0f4d1a9817198bddb5b3 100644 (file)
@@ -564,7 +564,7 @@ dlil_affinity_set(struct thread *tp, u_int32_t tag)
 void
 dlil_init(void)
 {
-       PE_parse_boot_arg("net_affinity", &net_affinity);
+       PE_parse_boot_argn("net_affinity", &net_affinity, sizeof (net_affinity));
        
        TAILQ_INIT(&dlil_ifnet_head);
        TAILQ_INIT(&ifnet_head);
index 8a4da6c6c951a5f05941e90ce7e9d6fe6a8ba19d..c2607645ab1a2321b31dbdd1d228f5c8f265202c 100644 (file)
@@ -406,11 +406,35 @@ ether_demux(
                }
        }
        
-       /* Quick check for VLAN */
-       if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0 ||
-               ether_type == htons(ETHERTYPE_VLAN)) {
-               *protocol_family = PF_VLAN;
-               return 0;
+       /* check for VLAN */
+       if ((m->m_pkthdr.csum_flags & CSUM_VLAN_TAG_VALID) != 0) {
+               if (EVL_VLANOFTAG(m->m_pkthdr.vlan_tag) != 0) {
+                       *protocol_family = PF_VLAN;
+                       return (0);
+               }
+               /* the packet is just priority-tagged, clear the bit */
+               m->m_pkthdr.csum_flags &= ~CSUM_VLAN_TAG_VALID;
+       }
+       else if (ether_type == htons(ETHERTYPE_VLAN)) {
+               struct ether_vlan_header *      evl;
+
+               evl = (struct ether_vlan_header *)frame_header;
+               if (m->m_len < ETHER_VLAN_ENCAP_LEN
+                   || ntohs(evl->evl_proto) == ETHERTYPE_VLAN
+                   || EVL_VLANOFTAG(ntohs(evl->evl_tag)) != 0) {
+                       *protocol_family = PF_VLAN;
+                       return 0;
+               }
+               /* the packet is just priority-tagged */
+
+               /* make the encapsulated ethertype the actual ethertype */
+               ether_type = evl->evl_encap_proto = evl->evl_proto;
+
+               /* remove the encapsulation header */
+               m->m_len -= ETHER_VLAN_ENCAP_LEN;
+               m->m_data += ETHER_VLAN_ENCAP_LEN;
+               m->m_pkthdr.len -= ETHER_VLAN_ENCAP_LEN;
+               m->m_pkthdr.csum_flags = 0; /* can't trust hardware checksum */
        }
        
        data = mtod(m, u_int8_t*);
index a883f471566933b0798aa50d9414d382e6a9e722..7e82691c213a17573a19b18b306969720e3e4486 100644 (file)
@@ -148,6 +148,8 @@ struct if_clonereq64 {
 #define IFEF_VLAN              0x200   /* interface has one or more vlans */
 #define IFEF_BOND              0x400   /* interface is part of bond */
 #define        IFEF_ARPLL              0x800   /* ARP for IPv4LL addresses on this port */
+#define        IFEF_NOWINDOWSCALE      0x1000  /* TCP window scale disabled on this interface, see 5933937 & 5959897*/
+#define        IFEF_NOTIMESTAMPS       IFEF_NOWINDOWSCALE      /* We don't actualy disable timestamps, just window scale see 5959897 */
 #define        IFEF_SENDLIST   0x10000000 /* Interface supports sending a list of packets */
 #define IFEF_REUSE     0x20000000 /* DLIL ifnet recycler, ifnet is not new */
 #define IFEF_INUSE     0x40000000 /* DLIL ifnet recycler, ifnet in use */
index 3f5c2c14c11496d9e00dd9977c8373ba2172b2e6..4a783b6d8472a28e3c2afa05dff246012f896cae 100644 (file)
@@ -1108,6 +1108,7 @@ vlan_input(ifnet_t p, __unused protocol_family_t protocol,
        /* We found a vlan interface, inject on that interface. */
        dlil_input_packet_list(ifp, m);
     } else {
+       m->m_pkthdr.header = frame_header;
        /* Send priority-tagged packet up through the parent */
        dlil_input_packet_list(p, m);
     }
index e00ce3eaa10976ab53cd3922a2438c7cfd8c9ec8..7f4ec5ac694ebb5e74550c9a9d48ba412ac4763d 100644 (file)
@@ -186,7 +186,7 @@ route_init(void)
 {
        int size;
 
-       PE_parse_boot_arg("rte_debug", &rte_debug);
+       PE_parse_boot_argn("rte_debug", &rte_debug, sizeof (rte_debug));
        if (rte_debug != 0)
                rte_debug |= RTD_DEBUG;
 
index de3d2890a7c93667321840c438fec83007f87214..26195cc07077240bf08272b7dd86315a50c7c6c9 100644 (file)
@@ -34,7 +34,7 @@ KERNELFILES = \
 PRIVATE_DATAFILES = \
        if_fddi.h if_atm.h ip_dummynet.h \
        tcp_debug.h \
-       in_gif.h ip_compat.h
+       in_gif.h ip_compat.h ip_edgehole.h
 
 PRIVATE_KERNELFILES = ${KERNELFILES} \
        ip_ecn.h ip_encap.h ip_flow.h
index d764b37182ab27444d8d926dc0c3c640126eda07..174aa7742ce058975a915bd41fbf47c6101e2d8d 100644 (file)
@@ -242,7 +242,8 @@ arp_rtrequest(
                        gate = rt->rt_gateway;
                        SDL(gate)->sdl_type = rt->rt_ifp->if_type;
                        SDL(gate)->sdl_index = rt->rt_ifp->if_index;
-                       rt->rt_expire = timenow.tv_sec;
+                       /* In case we're called before 1.0 sec. has elapsed */
+                       rt->rt_expire = MAX(timenow.tv_sec, 1);
                        break;
                }
                /* Announce a new entry if requested. */
@@ -296,7 +297,8 @@ arp_rtrequest(
                        gate_ll->sdl_alen = broadcast_len;
                        gate_ll->sdl_family = AF_LINK;
                        gate_ll->sdl_len = sizeof(struct sockaddr_dl);
-                       rt->rt_expire = timenow.tv_sec;
+                       /* In case we're called before 1.0 sec. has elapsed */
+                       rt->rt_expire = MAX(timenow.tv_sec, 1);
                }
 #endif
 
index 28601114c94062ee12222600703dc753abf0e620..c9fc56d8655f077891c6ab5d910304d48d266ea4 100644 (file)
@@ -156,7 +156,7 @@ inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip,
        KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0);
 
        /* sanity check */
-       if (m->m_pkthdr.len < skip + len) {
+       if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < skip + len) {
                panic("inet_cksum: mbuf len (%d) < off+len (%d+%d)\n",
                    m->m_pkthdr.len, skip, len);
        }
@@ -248,7 +248,7 @@ inet_cksum(struct mbuf *m, unsigned int nxt, unsigned int skip,
        KERNEL_DEBUG(DBG_FNC_IN_CKSUM | DBG_FUNC_START, len,0,0,0,0);
 
        /* sanity check */
-       if (m->m_pkthdr.len < skip + len) {
+       if ((m->m_flags & M_PKTHDR) && m->m_pkthdr.len < skip + len) {
                panic("inet_cksum: mbuf len (%d) < off+len (%d+%d)\n",
                    m->m_pkthdr.len, skip, len);
        }
index a17a1fb7ee752f84f7e5c9d648e61a98c783c93c..fce3bb78b8b2a9c8f87bf63f50fa31f961e4c004 100644 (file)
@@ -230,6 +230,9 @@ in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo, __unused struct proc *
                return (mac_error);
        }
        mac_inpcb_label_associate(so, inp);
+#endif
+#if CONFIG_IP_EDGEHOLE
+       ip_edgehole_attach(inp);
 #endif
        so->so_pcb = (caddr_t)inp;
 
index 3fd86be88cb60d7a6c1e355dee78aeae70a70452..0186e42a5cfe8900ff0db3d3f6e486e669b71ae2 100644 (file)
@@ -203,6 +203,10 @@ struct inpcb {
        void    *pdp_ifp;
 #endif /* _KERN_SYS_KERNELTYPES_H_ */
 #endif /* CONFIG_EMBEDDED */
+#if CONFIG_IP_EDGEHOLE
+       u_int32_t       inpcb_edgehole_flags;
+       u_int32_t       inpcb_edgehole_mask;
+#endif
 };
 
 #endif /* KERNEL_PRIVATE */
index 73ab3ea91caef70b4bee4e7f8815fcad4f93f317..ebc0772b0933e4d9911f5468da5b90d79ae5e978 100644 (file)
@@ -370,6 +370,9 @@ div_output(struct socket *so, struct mbuf *m, struct sockaddr *addr,
                socket_unlock(so, 0);
 #if CONFIG_MACF_NET
                mac_mbuf_label_associate_inpcb(inp, m);
+#endif
+#if CONFIG_IP_EDGEHOLE
+               ip_edgehole_mbuf_tag(inp, m);
 #endif
                error = ip_output(m,
                            inp->inp_options, &inp->inp_route,
diff --git a/bsd/netinet/ip_edgehole.c b/bsd/netinet/ip_edgehole.c
new file mode 100644 (file)
index 0000000..aa56449
--- /dev/null
@@ -0,0 +1,333 @@
+#include <sys/param.h>
+#include <sys/proc.h>
+#include <sys/kpi_mbuf.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
+#include <string.h>            // For bzero
+#include <libkern/libkern.h> // for printf
+#include <kern/debug.h> // For panic
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/route.h>
+#include <netinet/in.h>
+#include <netinet/in_pcb.h>
+#include <netinet/in_var.h>
+#include <netinet/ip_var.h>
+#include <netinet/tcp.h>
+#include <netinet/tcp_fsm.h>
+#include <netinet/tcp_seq.h>
+#include <netinet/tcp_timer.h>
+#include <netinet/tcp_var.h>
+#include <libkern/OSMalloc.h>
+#include <libkern/OSAtomic.h>
+#include <kern/thread_call.h>
+#include "ip_edgehole.h"
+
+enum
+{
+       kEdgeHoleFlag_BlockInternet     =       0x00000001,
+       kEdgeHoleFlag_BlockVV           =       0x00000002
+};
+
+struct edgehole_tag
+{
+       // flags tells us whether or not we should block traffic
+       u_int32_t                       eh_flags;
+       
+       // These fields are used to help us find the PCB after we block traffic for TCP
+       struct inpcbinfo        *eh_inpinfo;
+       struct inpcb            *eh_inp;
+};
+
+struct edgehole_delayed_notify
+{
+       // flags tells us whether or not we should block traffic
+       struct edgehole_delayed_notify  *next;
+       
+       // These fields are used to help us find the PCB after we block traffic for TCP
+       struct inpcbinfo        *inpinfo;
+       struct inpcb            *inp;
+};
+
+static mbuf_tag_id_t   edgehole_tag = 0;
+static thread_call_t   edgehole_callout = NULL;
+static OSMallocTag             edgehole_mtag = 0;
+static struct edgehole_delayed_notify  *edgehole_delay_list = NULL;
+
+#ifndef        HAS_COMPARE_AND_SWAP_PTR
+// 64bit kernels have an OSCompareAndSwapPtr that does the right thing
+static Boolean
+OSCompareAndSwapPtr(
+       void *oldValue,
+       void *newValue,
+       volatile void *address)
+{
+       return OSCompareAndSwap((UInt32)oldValue, (UInt32)newValue, (volatile UInt32*)address);
+}
+#endif
+
+static void
+ip_edgehole_notify_delayed(
+       struct inpcb            *inp,
+       struct inpcbinfo        *inpinfo)
+{
+       if (in_pcb_checkstate(inp, WNT_ACQUIRE, 0) != WNT_STOPUSING)
+       {
+               // We've found an inpcb for the packet we're dropping.
+               struct socket   *so = inp->inp_socket;
+               if (so && so != &inpinfo->nat_dummy_socket)
+               {
+                       socket_lock(so, 1);
+                       if (in_pcb_checkstate(inp, WNT_RELEASE,1) != WNT_STOPUSING)
+                       {
+                               if (inp->inp_ip_p == IPPROTO_TCP)
+                               {
+                                       // Why do we still have caddr_t? Come on! Casting from
+                                       // caddr_t to something else causes "cast increases required alignment"
+                                       // warnings. warnings are treated as failures. This union does the
+                                       // exact same thing without the warning.
+                                       union
+                                       {
+                                               caddr_t caddrt_sucks;
+                                               void    *void_ptr;
+                                       } bite_me;
+                                       
+                                       bite_me.caddrt_sucks = inp->inp_ppcb;
+                                       tcp_drop((struct tcpcb*)bite_me.void_ptr, EPERM);
+                               }
+                               else
+                               {
+                                       // Is this enough?
+                                       socantsendmore(so);
+                               }
+                       }
+                       socket_unlock(so, 1);
+               }
+       }
+}
+
+// Some shortcomings of this strategy:
+// 1) an inpcb could be reused for a new socket before we get a chance to notify
+
+static void
+ip_edgehole_process_delayed(
+       __unused void *unused1,
+       __unused void *unused2)
+{
+       struct edgehole_delayed_notify  *head;
+       
+       while (edgehole_delay_list)
+       {
+               // Atomically grab the list
+               do
+               {
+                       head = edgehole_delay_list;
+               }
+               while (!OSCompareAndSwapPtr(head, NULL, &edgehole_delay_list));
+               
+               if (head == NULL)
+               {
+                       break;
+               }
+               
+               // Prune duplicates from the list
+               struct edgehole_delayed_notify  *current;
+               struct edgehole_delayed_notify  **current_p;
+               struct edgehole_delayed_notify  *ye_dead;
+               for (current = head; current && current->next; current = current->next)
+               {
+                       current_p = &head;
+                       while (*current_p)
+                       {
+                               if ((*current_p)->inp == current->inp)
+                               {
+                                       ye_dead = *current_p;
+                                       *current_p = ye_dead->next;
+                                       OSFree(ye_dead, sizeof(*ye_dead), edgehole_mtag);
+                               }
+                               else
+                               {
+                                       current_p = &(*current_p)->next;
+                               }
+                       }
+               }
+               
+               while (head)
+               {
+                       struct inpcbinfo *lockedinfo;
+                       
+                       lockedinfo = head->inpinfo;
+                       
+                       // Lock the list
+                       lck_rw_lock_shared(lockedinfo->mtx);
+                       
+                       struct inpcb *inp;
+                       
+                       // Walk the inp list.
+                       LIST_FOREACH(inp, lockedinfo->listhead, inp_list)
+                       {
+                               // Walk the list of notifications
+                               for (current = head; current != NULL; current = current->next)
+                               {
+                                       // Found a match, notify
+                                       if (current->inpinfo == lockedinfo && current->inp == inp)
+                                       {
+                                               ip_edgehole_notify_delayed(inp, lockedinfo);
+                                       }
+                               }
+                       }
+                       
+                       lck_rw_done(lockedinfo->mtx);
+                       
+                       // Release all the notifications for this inpcbinfo
+                       current_p = &head;
+                       while (*current_p)
+                       {
+                               // Free any items for this inpcbinfo
+                               if ((*current_p)->inpinfo == lockedinfo)
+                               {
+                                       ye_dead = *current_p;
+                                       *current_p = ye_dead->next;
+                                       OSFree(ye_dead, sizeof(*ye_dead), edgehole_mtag);
+                               }
+                               else
+                               {
+                                       current_p = &(*current_p)->next;
+                               }
+                       }
+               }
+       }
+}
+
+static void
+ip_edgehole_notify(
+       struct edgehole_tag     *tag)
+{
+       // Since the lock on the socket may be held while a packet is being transmitted,
+       // we must allocate storage to keep track of this information and schedule a
+       // thread to handle the work.
+       
+       if (tag->eh_inp == NULL || tag->eh_inpinfo == NULL)
+               return;
+       
+       struct edgehole_delayed_notify  *delayed = OSMalloc(sizeof(*delayed), edgehole_mtag);
+       if (delayed)
+       {
+               delayed->inp = tag->eh_inp;
+               delayed->inpinfo = tag->eh_inpinfo;
+               do
+               {
+                       delayed->next = edgehole_delay_list;
+               }
+               while (!OSCompareAndSwapPtr(delayed->next, delayed, &edgehole_delay_list));
+               
+               thread_call_enter(edgehole_callout);
+       }
+}
+
+__private_extern__ void
+ip_edgehole_attach(
+       struct inpcb    *inp)
+{
+       inp->inpcb_edgehole_flags = 0;
+       inp->inpcb_edgehole_mask = 0;
+       
+       // TBD: call MAC framework to find out of we are allowed to use EDGE
+#ifdef TEST_THE_EVIL_EDGE_HOLE
+       char    pidname[64];
+       proc_selfname(pidname, sizeof(pidname));
+       pidname[sizeof(pidname) -1] = 0;
+       if (strcmp(pidname, "MobileSafari") == 0 ||
+               strcmp(pidname, "ping") == 0)
+       {
+               inp->inpcb_edgehole_flags = kEdgeHoleFlag_BlockInternet;
+               inp->inpcb_edgehole_mask = kEdgeHoleFlag_BlockInternet;
+       }
+#endif
+       
+       if (inp->inpcb_edgehole_mask != 0)
+       {
+               // Allocate a callout
+               if (edgehole_callout == NULL)
+               {
+                       thread_call_t tmp_callout = thread_call_allocate(ip_edgehole_process_delayed, NULL);
+                       if (!tmp_callout) panic("ip_edgehole_attach: thread_call_allocate failed");
+                       if (!OSCompareAndSwapPtr(NULL, tmp_callout, &edgehole_callout))
+                               thread_call_free(tmp_callout);
+               }
+               
+               // Allocate a malloc tag
+               if (edgehole_mtag == 0)
+               {
+                       OSMallocTag     mtag = OSMalloc_Tagalloc("com.apple.ip_edgehole", 0);
+                       if (!mtag) panic("ip_edgehole_attach: OSMalloc_Tagalloc failed");
+                       if (!OSCompareAndSwapPtr(NULL, mtag, &edgehole_mtag))
+                               OSMalloc_Tagfree(mtag);
+               }
+       }
+}
+
+__private_extern__ void
+ip_edgehole_mbuf_tag(
+       struct inpcb    *inp,
+       mbuf_t                  m)
+{
+       // Immediately bail if there are no flags on this inpcb
+       if (inp->inpcb_edgehole_mask == 0)
+       {
+               return;
+       }
+       
+       // Allocate a tag_id if we don't have one already
+       if (edgehole_tag == 0)
+               mbuf_tag_id_find("com.apple.edgehole", &edgehole_tag);
+       
+       struct edgehole_tag     *tag;
+       size_t  length;
+       
+       // Find an existing tag
+       if (mbuf_tag_find(m, edgehole_tag, 0, &length, (void**)&tag) == 0)
+       {
+               if (length != sizeof(*tag))
+                       panic("ip_edgehole_mbuf_tag - existing tag is wrong size");
+               
+               // add restrictions
+               tag->eh_flags = (tag->eh_flags & (~inp->inpcb_edgehole_mask)) |
+                                               (inp->inpcb_edgehole_flags & inp->inpcb_edgehole_mask);
+       }
+       else if ((inp->inpcb_edgehole_mask & inp->inpcb_edgehole_flags) != 0)
+       {
+               // Add the tag
+               if (mbuf_tag_allocate(m, edgehole_tag, 0, sizeof(*tag), MBUF_WAITOK, (void**)&tag) != 0)
+                       panic("ip_edgehole_mbuf_tag - mbuf_tag_allocate failed"); // ouch - how important is it that we block this stuff?
+               
+               tag->eh_flags = (inp->inpcb_edgehole_flags & inp->inpcb_edgehole_mask);
+               tag->eh_inp = inp;
+               tag->eh_inpinfo = inp->inp_pcbinfo;
+       }
+}
+
+int
+ip_edgehole_filter(
+       mbuf_t                  *m,
+       __unused int    isVV)
+{
+       struct edgehole_tag     *tag;
+       size_t  length;
+       
+       if (mbuf_tag_find(*m, edgehole_tag, 0, &length, (void**)&tag) == 0)
+       {
+               if (length != sizeof(*tag))
+                       panic("ip_edgehole_filter - existing tag is wrong size");
+               
+               if ((tag->eh_flags & kEdgeHoleFlag_BlockInternet) != 0)
+               {
+                       ip_edgehole_notify(tag);
+                       
+                       mbuf_freem(*m); *m = NULL;
+                       return EPERM;
+               }
+       }
+       
+       return 0;
+}
diff --git a/bsd/netinet/ip_edgehole.h b/bsd/netinet/ip_edgehole.h
new file mode 100644 (file)
index 0000000..5bfe7a0
--- /dev/null
@@ -0,0 +1,17 @@
+#include <sys/kpi_mbuf.h>
+
+struct inpcb;
+
+// Tag an mbuf on the way out with the edge flags from the inpcb
+extern void ip_edgehole_mbuf_tag(struct inpcb *inp, mbuf_t m);
+
+// Attach the edge flags to the inpcb
+extern void ip_edgehole_attach(struct inpcb *inp);
+
+// Called by the edge interface to determine if the edge interface
+// should drop the packet. Will return 0 if the packet should continue
+// to be processed or EPERM if ip_edgehole_filter swallowed the packet.
+// When ip_edgehole_filter swallows a packet, it frees it and sets your
+// pointer to it to NULL. isVV should be set to zero unless the edge
+// interface in question is the visual voicemail edge interface.
+extern int ip_edgehole_filter(mbuf_t *m, int isVV);
index e30687513e2f8ecd23b1ba5dc1f47024e94a8385..533184f4ea09602a29c67169733a6214202bcc40 100644 (file)
@@ -381,6 +381,10 @@ rip_output(m, so, dst)
        mac_mbuf_label_associate_inpcb(inp, m);
 #endif
 
+#if CONFIG_IP_EDGEHOLE
+       ip_edgehole_mbuf_tag(inp, m);
+#endif
+
 #if CONFIG_FORCE_OUT_IFP
        return (ip_output_list(m, 0, inp->inp_options, &inp->inp_route, flags,
                          inp->inp_moptions, inp->pdp_ifp));
index 59dd0cb78b093112f3c68ba1fdf8df27b3898499..36756785d677bc1892e5208b0efaa1c9bf9626e2 100644 (file)
@@ -1734,6 +1734,14 @@ findpcb:
                        /* ECN-setup SYN */
                        tp->ecn_flags |= (TE_SETUPRECEIVED | TE_SENDIPECT);
                }
+#ifdef IFEF_NOWINDOWSCALE
+               if (m->m_pkthdr.rcvif != NULL &&
+                       (m->m_pkthdr.rcvif->if_eflags & IFEF_NOWINDOWSCALE) != 0)
+               {
+                       // Timestamps are not enabled on this interface
+                       tp->t_flags &= ~(TF_REQ_SCALE);
+               }
+#endif
                goto trimthenstep6;
                }
 
@@ -2393,44 +2401,72 @@ trimthenstep6:
                                tp->t_dupacks = 0;
                        break;
                }
+
+               if (!IN_FASTRECOVERY(tp)) {
+                       /*
+                        * We were not in fast recovery.  Reset the duplicate ack
+                        * counter.
+                        */
+                       tp->t_dupacks = 0;
+               }
                /*
                 * If the congestion window was inflated to account
                 * for the other side's cached packets, retract it.
                 */
-               if (tcp_do_newreno || tp->sack_enable) {
-                       if (IN_FASTRECOVERY(tp)) {
+               else {
+                       if (tcp_do_newreno || tp->sack_enable) {
                                if (SEQ_LT(th->th_ack, tp->snd_recover)) {
                                        if (tp->sack_enable)
                                                tcp_sack_partialack(tp, th);
                                        else
-                                               tcp_newreno_partial_ack(tp, th);
-                               } else {
-                                       /*
-                                        * Out of fast recovery.
-                                        * Window inflation should have left us
-                                        * with approximately snd_ssthresh
-                                        * outstanding data.
-                                        * But in case we would be inclined to
-                                        * send a burst, better to do it via
-                                        * the slow start mechanism.
-                                        */
-                                       if (SEQ_GT(th->th_ack +
-                                                       tp->snd_ssthresh,
-                                                  tp->snd_max))
-                                               tp->snd_cwnd = tp->snd_max -
-                                                               th->th_ack +
-                                                               tp->t_maxseg;
-                                       else
-                                               tp->snd_cwnd = tp->snd_ssthresh;
+                                               tcp_newreno_partial_ack(tp, th);                        
+                               }
+                               else {
+                                       if (tcp_do_newreno) {
+                                               long ss = tp->snd_max - th->th_ack;
+       
+                                               /*
+                                                * Complete ack.  Inflate the congestion window to
+                                                * ssthresh and exit fast recovery.
+                                                *
+                                                * Window inflation should have left us with approx.
+                                                * snd_ssthresh outstanding data.  But in case we
+                                                * would be inclined to send a burst, better to do
+                                                * it via the slow start mechanism.
+                                                */
+                                               if (ss < tp->snd_ssthresh)
+                                                       tp->snd_cwnd = ss + tp->t_maxseg;
+                                               else
+                                                       tp->snd_cwnd = tp->snd_ssthresh;
+                                       }
+                                       else {
+                                               /*
+                                                * Clamp the congestion window to the crossover point
+                                                * and exit fast recovery.
+                                                */
+                                               if (tp->snd_cwnd > tp->snd_ssthresh)
+                                                       tp->snd_cwnd = tp->snd_ssthresh;                                        
+                                       }
+       
+                                       EXIT_FASTRECOVERY(tp);
+                                       tp->t_dupacks = 0;
+                                       tp->t_bytes_acked = 0;
                                }
                        }
-               } else {
-                       if (tp->t_dupacks >= tcprexmtthresh &&
-                           tp->snd_cwnd > tp->snd_ssthresh)
-                               tp->snd_cwnd = tp->snd_ssthresh;
+                       else {
+                               /*
+                                * Clamp the congestion window to the crossover point
+                                * and exit fast recovery in non-newreno and non-SACK case.
+                                */
+                               if (tp->snd_cwnd > tp->snd_ssthresh)
+                                       tp->snd_cwnd = tp->snd_ssthresh;                                        
+                               EXIT_FASTRECOVERY(tp);
+                               tp->t_dupacks = 0;
+                               tp->t_bytes_acked = 0;
+                       }
                }
-               tp->t_dupacks = 0;
-               tp->t_bytes_acked = 0;
+
+
                /*
                 * If we reach this point, ACK is not a duplicate,
                 *     i.e., it ACKs something we sent.
index ca32f6b401d57c82d02fe25c1b57e0d80fbe8bea..e22e04993f01589eb77a4b9e95e8ce7d995a326d 100644 (file)
@@ -1199,6 +1199,9 @@ send:
 #if CONFIG_MACF_NET
        mac_mbuf_label_associate_inpcb(tp->t_inpcb, m);
 #endif
+#if CONFIG_IP_EDGEHOLE
+       ip_edgehole_mbuf_tag(tp->t_inpcb, m);
+#endif
 #if INET6
        if (isipv6) {
                ip6 = mtod(m, struct ip6_hdr *);
@@ -1652,7 +1655,7 @@ tcp_ip_output(struct socket *so, struct tcpcb *tp, struct mbuf *pkt,
                        unlocked = TRUE;
                        socket_unlock(so, 0);
        }
-
+       
        /*
         * Don't send down a chain of packets when:
         * - TCP chaining is disabled
index f51febfc1af45998e510d576213555feb73c2418..a94f8ad2a7f0e8f4cf6f10f7059919f952d3468f 100644 (file)
@@ -608,6 +608,12 @@ tcp_respond(
                mac_netinet_tcp_reply(m);
        }
 #endif
+       
+#if CONFIG_IP_EDGEHOLE
+       if (tp && tp->t_inpcb)
+               ip_edgehole_mbuf_tag(tp->t_inpcb, m);
+#endif
+       
        nth->th_seq = htonl(seq);
        nth->th_ack = htonl(ack);
        nth->th_x2 = 0;
@@ -1433,11 +1439,6 @@ tcp6_ctlinput(cmd, sa, d)
 
 #define ISN_BYTES_PER_SECOND 1048576
 
-//PWC - md5 routines cause alignment exceptions.  Need to figure out why.  For now use lame incremental
-// isn.  how's that for not easily guessable!?
-
-int pwc_bogus;
-
 tcp_seq
 tcp_new_isn(tp)
        struct tcpcb *tp;
@@ -1625,7 +1626,7 @@ tcp_mtudisc(
 
 /*
  * Look-up the routing entry to the peer of this inpcb.  If no route
- * is found and it cannot be allocated the return NULL.  This routine
+ * is found and it cannot be allocated then return NULL.  This routine
  * is called by TCP routines that access the rmx structure and by tcp_mss
  * to get the interface MTU.
  */
@@ -1675,6 +1676,15 @@ tcp_rtlookup(inp)
        else
                tp->t_flags |= TF_PMTUD;
 
+#ifdef IFEF_NOWINDOWSCALE
+       if (tp->t_state == TCPS_SYN_SENT && rt != NULL && rt->rt_ifp != NULL &&
+               (rt->rt_ifp->if_eflags & IFEF_NOWINDOWSCALE) != 0)
+       {
+               // Timestamps are not enabled on this interface
+               tp->t_flags &= ~(TF_REQ_SCALE);
+       }
+#endif
+
        return rt;
 }
 
index ed796e474876da538a091370ba0c94ffb1e71251..d194a867feb498ce3a8b1d2ccb54ace82b73b556 100644 (file)
@@ -155,7 +155,11 @@ SYSCTL_QUAD(_net_inet_tcp, OID_AUTO, out_sw_cksum_bytes, CTLFLAG_RD,
 #define        TCPDEBUG2(req)
 #endif
 
+#if CONFIG_USESOCKTHRESHOLD
 __private_extern__ unsigned int        tcp_sockthreshold = 64;
+#else
+__private_extern__ unsigned int        tcp_sockthreshold = 0;
+#endif
 SYSCTL_INT(_net_inet_tcp, OID_AUTO, sockthreshold, CTLFLAG_RW, 
     &tcp_sockthreshold , 0, "TCP Socket size increased if less than threshold");
 
index 7e97ffd9a7b5f84797326b1a00899f2ba8a8a139..88e5413f5b9eca9dec7c3570077de27dcbb4d66c 100644 (file)
@@ -1102,7 +1102,10 @@ udp_output(inp, m, addr, control, p)
 #if CONFIG_MACF_NET
        mac_mbuf_label_associate_inpcb(inp, m);
 #endif
-
+       
+#if CONFIG_IP_EDGEHOLE
+       ip_edgehole_mbuf_tag(inp, m);
+#endif
 
        /*
         * Calculate data length and get a mbuf
@@ -1317,6 +1320,7 @@ udp_send(struct socket *so, __unused int flags, struct mbuf *m, struct sockaddr
                m_freem(m);
                return EINVAL;
        }
+       
        return udp_output(inp, m, addr, control, p);
 }
 
index 86ea9ada0c4c00d664a9860a9ae65359c2ec0a5f..0e1d6dc69f58d3fa4ac8c54b952a1e3456f8aaa0 100644 (file)
@@ -527,6 +527,9 @@ void ipsec_send_natt_keepalive(struct secasvar *sav);
 
 void key_init(void);
 
+static errno_t ipsecif_register_control(void);
+
+
 
 /*
  * PF_KEY init
@@ -561,6 +564,10 @@ key_init(void)
                LIST_INIT(&spihash[i]);
 
        raw_init();
+
+       /* register ip_if application of kernel control */
+       ipsecif_register_control();
+       
 }
 
 
@@ -8179,3 +8186,655 @@ key_alloc_mbuf(l)
 
        return m;
 }
+
+
+/* ----------------------------------------------------------------------------------
+Application of kernel control for interface creation
+
+Theory of operation:
+ipsecif acts as glue between kernel control sockets and ipsec network interfaces. This
+kernel control will register an interface for every client that connects. 
+ipsec interface do not send or receive packets, an they are intercepted by ipsec before 
+they reach the interface. ipsec needs interface to attach tunnel ip addresses.
+In the future, we may want to change the control mechanism to use PF_KEY to create 
+interfaces for ipsec
+---------------------------------------------------------------------------------- */
+
+#include <sys/systm.h>
+//#include "if_ip.h" 
+#include <sys/kern_control.h>
+#include <net/kpi_protocol.h>
+#include <net/kpi_interface.h>
+#include <net/if.h>
+#include <net/if_types.h>
+#include <net/bpf.h>
+#include <libkern/OSMalloc.h>
+#include <libkern/OSAtomic.h>
+#include <sys/mbuf.h> /* Until leopard, our ugly bpf protocol prepend will need this */
+#include <sys/sockio.h>
+#include <netinet/in.h>
+#include <netinet6/in6_var.h>
+
+/*
+*/
+
+#define IPSECIF_CONTROL_NAME "com.apple.net.ipsecif_control"
+
+/* Kernel Control functions */
+static errno_t ipsecif_ctl_connect(kern_ctl_ref kctlref, struct sockaddr_ctl *sac,
+                                                                void **unitinfo);
+static errno_t ipsecif_ctl_disconnect(kern_ctl_ref kctlref, u_int32_t unit,
+                                                                       void *unitinfo);
+static errno_t ipsecif_ctl_send(kern_ctl_ref kctlref, u_int32_t unit,
+                                                         void *unitinfo, mbuf_t m, int flags);
+
+/* Network Interface functions */
+static errno_t ipsecif_output(ifnet_t interface, mbuf_t data);
+static errno_t ipsecif_demux(ifnet_t interface, mbuf_t data, char *frame_header,
+                                                  protocol_family_t *protocol);
+static errno_t ipsecif_add_proto(ifnet_t interface, protocol_family_t protocol,
+                                                          const struct ifnet_demux_desc *demux_array,
+                                                          u_int32_t demux_count);
+static errno_t ipsecif_del_proto(ifnet_t interface, protocol_family_t protocol);
+static errno_t ipsecif_ioctl(ifnet_t interface, u_int32_t cmd, void *data);
+static errno_t ipsecif_settap(ifnet_t interface, bpf_tap_mode mode,
+                                                       bpf_packet_func callback);
+static void            ipsecif_detached(ifnet_t interface);
+
+/* Protocol handlers */
+static errno_t ipsecif_attach_proto(ifnet_t interface, protocol_family_t proto);
+static errno_t ipsecif_proto_input(ifnet_t interface, protocol_family_t protocol,
+                                                                mbuf_t m, char *frame_header);
+
+/* Control block allocated for each kernel control connection */
+struct ipsecif_pcb {
+       kern_ctl_ref    ctlref;
+       u_int32_t               unit;
+       ifnet_t                 ifp;
+       bpf_tap_mode    mode;
+       bpf_packet_func tap;
+};
+
+static kern_ctl_ref    ipsecif_kctlref;
+static u_int32_t       ipsecif_family;
+static OSMallocTag     ipsecif_malloc_tag;
+static SInt32          ipsecif_ifcount = 0;
+
+/* Prepend length */
+static void*
+ipsecif_alloc(size_t size)
+{
+       size_t  *mem = OSMalloc(size + sizeof(size_t), ipsecif_malloc_tag);
+       
+       if (mem) {
+               *mem = size + sizeof(size_t);
+               mem++;
+       }
+       
+       return (void*)mem;
+}
+
+static void
+ipsecif_free(void *ptr)
+{
+       size_t  *size = ptr;
+       size--;
+       OSFree(size, *size, ipsecif_malloc_tag);
+}
+
+static errno_t
+ipsecif_register_control(void)
+{
+       struct kern_ctl_reg     kern_ctl;
+       errno_t                         result = 0;
+       
+       /* Create a tag to allocate memory */
+       ipsecif_malloc_tag = OSMalloc_Tagalloc(IPSECIF_CONTROL_NAME, OSMT_DEFAULT);
+       
+       /* Find a unique value for our interface family */
+       result = mbuf_tag_id_find(IPSECIF_CONTROL_NAME, &ipsecif_family);
+       if (result != 0) {
+               printf("ipsecif_register_control - mbuf_tag_id_find_internal failed: %d\n", result);
+               return result;
+       }
+       
+       bzero(&kern_ctl, sizeof(kern_ctl));
+       strncpy(kern_ctl.ctl_name, IPSECIF_CONTROL_NAME, sizeof(kern_ctl.ctl_name));
+       kern_ctl.ctl_name[sizeof(kern_ctl.ctl_name) - 1] = 0;
+       kern_ctl.ctl_flags = CTL_FLAG_PRIVILEGED; /* Require root */
+       kern_ctl.ctl_connect = ipsecif_ctl_connect;
+       kern_ctl.ctl_disconnect = ipsecif_ctl_disconnect;
+       kern_ctl.ctl_send = ipsecif_ctl_send;
+       
+       result = ctl_register(&kern_ctl, &ipsecif_kctlref);
+       if (result != 0) {
+               printf("ipsecif_register_control - ctl_register failed: %d\n", result);
+               return result;
+       }
+       
+       /* Register the protocol plumbers */
+       if ((result = proto_register_plumber(PF_INET, ipsecif_family,
+                                                                                ipsecif_attach_proto, NULL)) != 0) {
+               printf("ipsecif_register_control - proto_register_plumber(PF_INET, %d) failed: %d\n",
+                          ipsecif_family, result);
+               ctl_deregister(ipsecif_kctlref);
+               return result;
+       }
+       
+       /* Register the protocol plumbers */
+       if ((result = proto_register_plumber(PF_INET6, ipsecif_family,
+                                                                                ipsecif_attach_proto, NULL)) != 0) {
+               proto_unregister_plumber(PF_INET, ipsecif_family);
+               ctl_deregister(ipsecif_kctlref);
+               printf("ipsecif_register_control - proto_register_plumber(PF_INET6, %d) failed: %d\n",
+                          ipsecif_family, result);
+               return result;
+       }
+       
+       return 0;
+}
+
+/* Kernel control functions */
+
+static errno_t
+ipsecif_ctl_connect(
+       kern_ctl_ref            kctlref,
+       struct sockaddr_ctl     *sac, 
+       void                            **unitinfo)
+{
+       struct ifnet_init_params        ipsecif_init;
+       struct ipsecif_pcb                              *pcb;
+       errno_t                                         result;
+       
+       /* kernel control allocates, interface frees */
+       pcb = ipsecif_alloc(sizeof(*pcb));
+       if (pcb == NULL)
+               return ENOMEM;
+       
+       /* Setup the protocol control block */
+       bzero(pcb, sizeof(*pcb));
+       *unitinfo = pcb;
+       pcb->ctlref = kctlref;
+       pcb->unit = sac->sc_unit;
+       printf("ipsecif_ctl_connect: creating unit ip%d\n", pcb->unit);
+       
+       /* Create the interface */
+       bzero(&ipsecif_init, sizeof(ipsecif_init));
+       ipsecif_init.name = "ipsec";
+       ipsecif_init.unit = pcb->unit;
+       ipsecif_init.family = ipsecif_family;
+       ipsecif_init.type = IFT_OTHER;
+       ipsecif_init.output = ipsecif_output;
+       ipsecif_init.demux = ipsecif_demux;
+       ipsecif_init.add_proto = ipsecif_add_proto;
+       ipsecif_init.del_proto = ipsecif_del_proto;
+       ipsecif_init.softc = pcb;
+       ipsecif_init.ioctl = ipsecif_ioctl;
+       ipsecif_init.set_bpf_tap = ipsecif_settap;
+       ipsecif_init.detach = ipsecif_detached;
+       
+       result = ifnet_allocate(&ipsecif_init, &pcb->ifp);
+       if (result != 0) {
+               printf("ipsecif_ctl_connect - ifnet_allocate failed: %d\n", result);
+               ipsecif_free(pcb);
+               return result;
+       }
+       OSIncrementAtomic(&ipsecif_ifcount);
+       
+       /* Set flags and additional information. */
+       ifnet_set_mtu(pcb->ifp, 1280);
+       ifnet_set_flags(pcb->ifp, IFF_UP | IFF_MULTICAST | IFF_BROADCAST, 0xffff);
+//     ifnet_set_flags(pcb->ifp, IFF_UP | IFF_MULTICAST | IFF_POINTOPOINT, 0xffff);
+       
+       /* Attach the interface */
+       result = ifnet_attach(pcb->ifp, NULL);
+       if (result != 0) {
+               printf("ipsecif_ctl_connect - ifnet_allocate failed: %d\n", result);
+               ifnet_release(pcb->ifp);
+               ipsecif_free(pcb);
+       }
+       
+       /* Attach to bpf */
+       if (result == 0)
+               bpfattach(pcb->ifp, DLT_NULL, 4);
+
+       return result;
+}
+
+/*
+ * These defines are marked private but it's impossible to remove an interface
+ * without them.
+ */
+#ifndef SIOCPROTODETACH
+#define        SIOCPROTODETACH _IOWR('i', 81, struct ifreq)    /* detach proto from interface */
+#endif /* SIOCPROTODETACH */
+
+#ifndef SIOCPROTODETACH_IN6
+#define SIOCPROTODETACH_IN6 _IOWR('i', 111, struct in6_ifreq)    /* detach proto from interface */
+#endif /* SIOCPROTODETACH */
+
+
+static errno_t
+ipsecif_detach_ip(
+       ifnet_t                         interface,
+       protocol_family_t       protocol,
+       socket_t                        pf_socket)
+{
+       errno_t result = EPROTONOSUPPORT;
+       
+       /* Attempt a detach */
+       if (protocol == PF_INET) {
+               struct ifreq    ifr;
+               
+               bzero(&ifr, sizeof(ifr));
+               snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
+                                ifnet_name(interface), ifnet_unit(interface));
+               
+               result = sock_ioctl(pf_socket, SIOCPROTODETACH, &ifr);
+       }
+       else if (protocol == PF_INET6) {
+               struct in6_ifreq        ifr6;
+               
+               bzero(&ifr6, sizeof(ifr6));
+               snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
+                                ifnet_name(interface), ifnet_unit(interface));
+               
+               result = sock_ioctl(pf_socket, SIOCPROTODETACH_IN6, &ifr6);
+       }
+       
+       return result;
+}
+
+static void
+ipsecif_remove_address(
+       ifnet_t                         interface,
+       protocol_family_t       protocol,
+       ifaddr_t                        address,
+       socket_t                        pf_socket)
+{
+       errno_t result = 0;
+       
+       /* Attempt a detach */
+       if (protocol == PF_INET) {
+               struct ifreq    ifr;
+               
+               bzero(&ifr, sizeof(ifr));
+               snprintf(ifr.ifr_name, sizeof(ifr.ifr_name), "%s%d",
+                                ifnet_name(interface), ifnet_unit(interface));
+               result = ifaddr_address(address, &ifr.ifr_addr, sizeof(ifr.ifr_addr));
+               if (result != 0) {
+                       printf("ipsecif_remove_address - ifaddr_address failed: %d", result);
+               }
+               else {
+                       result = sock_ioctl(pf_socket, SIOCDIFADDR, &ifr);
+                       if (result != 0) {
+                               printf("ipsecif_remove_address - SIOCDIFADDR failed: %d", result);
+                       }
+               }
+       }
+       else if (protocol == PF_INET6) {
+               struct in6_ifreq        ifr6;
+               
+               bzero(&ifr6, sizeof(ifr6));
+               snprintf(ifr6.ifr_name, sizeof(ifr6.ifr_name), "%s%d",
+                                ifnet_name(interface), ifnet_unit(interface));
+               result = ifaddr_address(address, (struct sockaddr*)&ifr6.ifr_addr,
+                                                               sizeof(ifr6.ifr_addr));
+               if (result != 0) {
+                       printf("ipsecif_remove_address - ifaddr_address failed (v6): %d",
+                                  result);
+               }
+               else {
+                       result = sock_ioctl(pf_socket, SIOCDIFADDR_IN6, &ifr6);
+                       if (result != 0) {
+                               printf("ipsecif_remove_address - SIOCDIFADDR_IN6 failed: %d",
+                                          result);
+                       }
+               }
+       }
+}
+
+static void
+ipsecif_cleanup_family(
+       ifnet_t                         interface,
+       protocol_family_t       protocol)
+{
+       errno_t         result = 0;
+       socket_t        pf_socket = NULL;
+       ifaddr_t        *addresses = NULL;
+       int                     i;
+       
+       if (protocol != PF_INET && protocol != PF_INET6) {
+               printf("ipsecif_cleanup_family - invalid protocol family %d\n", protocol);
+               return;
+       }
+       
+       /* Create a socket for removing addresses and detaching the protocol */
+       result = sock_socket(protocol, SOCK_DGRAM, 0, NULL, NULL, &pf_socket);
+       if (result != 0) {
+               if (result != EAFNOSUPPORT)
+                       printf("ipsecif_cleanup_family - failed to create %s socket: %d\n",
+                               protocol == PF_INET ? "IP" : "IPv6", result);
+               goto cleanup;
+       }
+       
+       result = ipsecif_detach_ip(interface, protocol, pf_socket);
+       if (result == 0 || result == ENXIO) {
+               /* We are done! We either detached or weren't attached. */
+               goto cleanup;
+       }
+       else if (result != EBUSY) {
+               /* Uh, not really sure what happened here... */
+               printf("ipsecif_cleanup_family - ipsecif_detach_ip failed: %d\n", result);
+               goto cleanup;
+       }
+       
+       /*
+        * At this point, we received an EBUSY error. This means there are
+        * addresses attached. We should detach them and then try again.
+        */
+       result = ifnet_get_address_list_family(interface, &addresses, protocol);
+       if (result != 0) {
+               printf("fnet_get_address_list_family(%s%d, 0xblah, %s) - failed: %d\n",
+                       ifnet_name(interface), ifnet_unit(interface), 
+                       protocol == PF_INET ? "PF_INET" : "PF_INET6", result);
+               goto cleanup;
+       }
+       
+       for (i = 0; addresses[i] != 0; i++) {
+               ipsecif_remove_address(interface, protocol, addresses[i], pf_socket);
+       }
+       ifnet_free_address_list(addresses);
+       addresses = NULL;
+       
+       /*
+        * The addresses should be gone, we should try the remove again.
+        */
+       result = ipsecif_detach_ip(interface, protocol, pf_socket);
+       if (result != 0 && result != ENXIO) {
+               printf("ipsecif_cleanup_family - ipsecif_detach_ip failed: %d\n", result);
+       }
+       
+cleanup:
+       if (pf_socket != NULL)
+               sock_close(pf_socket);
+       
+       if (addresses != NULL)
+               ifnet_free_address_list(addresses);
+}
+
+static errno_t
+ipsecif_ctl_disconnect(
+       __unused kern_ctl_ref   kctlref,
+       __unused u_int32_t              unit,
+       void                                    *unitinfo)
+{
+       struct ipsecif_pcb      *pcb = unitinfo;
+       ifnet_t                 ifp = pcb->ifp;
+       errno_t                 result = 0;
+       
+       pcb->ctlref = NULL;
+       pcb->unit = 0;
+       
+       /*
+        * We want to do everything in our power to ensure that the interface
+        * really goes away when the socket is closed. We must remove IP/IPv6
+        * addresses and detach the protocols. Finally, we can remove and
+        * release the interface.
+        */
+       ipsecif_cleanup_family(ifp, AF_INET);
+       ipsecif_cleanup_family(ifp, AF_INET6);
+       
+       if ((result = ifnet_detach(ifp)) != 0) {
+               printf("ipsecif_ctl_disconnect - ifnet_detach failed: %d\n", result);
+       }
+       
+       if ((result = ifnet_release(ifp)) != 0) {
+               printf("ipsecif_ctl_disconnect - ifnet_release failed: %d\n", result);
+       }
+       
+       return 0;
+}
+
+static inline void
+call_bpf_tap(
+       ifnet_t                 ifp,
+       bpf_packet_func tap,
+       mbuf_t                  m)
+{
+       struct m_hdr    hack_hdr;
+       struct mbuf             *n;
+       int                             af;
+       
+       if (!tap)
+               return;
+       
+       af = (((*(char*)(mbuf_data(m))) & 0xf0) >> 4); // 4 or 6
+       if(af == 4) {
+               af = AF_INET;
+       }
+       else if (af == 6) {
+               af = AF_INET6;
+       }
+       else {
+               /* Uh...this ain't right */
+               af = 0;
+       }
+       
+       hack_hdr.mh_next = (struct mbuf*)m;
+       hack_hdr.mh_nextpkt = NULL;
+       hack_hdr.mh_len = 4;
+       hack_hdr.mh_data = (char *)&af;
+       hack_hdr.mh_type = ((struct mbuf*)m)->m_type;
+       hack_hdr.mh_flags = 0;
+       
+       n = (struct mbuf*)&hack_hdr;
+       
+       tap(ifp, (mbuf_t)n);
+}
+       
+
+static errno_t
+ipsecif_ctl_send(
+       __unused kern_ctl_ref   kctlref,
+       __unused u_int32_t              unit,
+       void                                    *unitinfo,
+       mbuf_t                                  m,
+       __unused int                    flags)
+{
+       struct ipsecif_pcb                                              *pcb = unitinfo;
+       struct ifnet_stat_increment_param       incs;
+       errno_t                                                         result;
+       
+       bzero(&incs, sizeof(incs));
+       
+       mbuf_pkthdr_setrcvif(m, pcb->ifp);
+       
+       if (pcb->mode & BPF_MODE_INPUT) {
+               call_bpf_tap(pcb->ifp, pcb->tap, m);
+       }
+       
+       incs.packets_in = 1;
+       incs.bytes_in = mbuf_pkthdr_len(m);
+       result = ifnet_input(pcb->ifp, m, &incs);
+       if (result != 0) {
+               ifnet_stat_increment_in(pcb->ifp, 0, 0, 1);
+               printf("ipsecif_ctl_send - ifnet_input failed: %d\n", result);
+               mbuf_freem(m);
+       }
+       
+       return 0;
+}
+
+/* Network Interface functions */
+static errno_t
+ipsecif_output(
+                          ifnet_t      interface,
+                          mbuf_t       data)
+{
+       struct ipsecif_pcb      *pcb = ifnet_softc(interface);
+       errno_t                 result;
+       
+       if (pcb->mode & BPF_MODE_OUTPUT) {
+               call_bpf_tap(interface, pcb->tap, data);
+       }
+       
+       // no packet should go to the ipsec interface
+       mbuf_freem(data);
+
+#if 0  
+       if (pcb->ctlref) {
+               int     length = mbuf_pkthdr_len(data);
+               result = ctl_enqueuembuf(pcb->ctlref, pcb->unit, data, CTL_DATA_EOR);
+               if (result != 0) {
+                       mbuf_freem(data);
+                       printf("ipsecif_output - ctl_enqueuembuf failed: %d\n", result);
+                       ifnet_stat_increment_out(interface, 0, 0, 1);
+               }
+               else {
+                       ifnet_stat_increment_out(interface, 1, length, 0);
+               }
+       }
+       else 
+               mbuf_freem(data);
+#endif
+       
+       return 0;
+}
+
+/* Network Interface functions */
+static errno_t
+ipsecif_demux(
+       __unused ifnet_t        interface,
+       mbuf_t                          data,
+       __unused char           *frame_header,
+       protocol_family_t       *protocol)
+{
+       u_int8_t        *vers;
+       
+       while (data != NULL && mbuf_len(data) < 1) {
+               data = mbuf_next(data);
+       }
+       
+       if (data != NULL) {
+               vers = mbuf_data(data);
+               switch(((*vers) & 0xf0) >> 4) {
+                       case 4:
+                               *protocol = PF_INET;
+                               return 0;
+       
+                       case 6:
+                               *protocol = PF_INET6;
+                               return 0;
+               }
+       }
+       
+       return ENOENT;
+}
+
+static errno_t
+ipsecif_add_proto(
+       __unused ifnet_t                                                interface,
+       protocol_family_t                                               protocol,
+       __unused const struct ifnet_demux_desc  *demux_array,
+       __unused u_int32_t                                              demux_count)
+{
+       switch(protocol) {
+               case PF_INET:
+                       return 0;
+               case PF_INET6:
+                       return 0;
+               default:
+                       break;
+       }
+       
+       return ENOPROTOOPT;
+}
+
+static errno_t
+ipsecif_del_proto(
+       __unused ifnet_t                        interface,
+       __unused protocol_family_t      protocol)
+{
+       return 0;
+}
+
+static errno_t
+ipsecif_ioctl(
+       __unused ifnet_t        interface,
+       __unused u_int32_t      command,
+       __unused void           *data)
+{
+       errno_t result = 0;
+       
+       switch(command) {
+               case SIOCSIFMTU:
+                       ifnet_set_mtu(interface, ((struct ifreq*)data)->ifr_mtu);
+                       break;
+               
+               default:
+                       result = EOPNOTSUPP;
+       }
+       
+       return result;
+}
+
+static errno_t
+ipsecif_settap(
+       ifnet_t                 interface,
+       bpf_tap_mode    mode,
+       bpf_packet_func callback)
+{
+       struct ipsecif_pcb      *pcb = ifnet_softc(interface);
+       
+       pcb->mode = mode;
+       pcb->tap = callback;
+       
+       return 0;
+}
+
+static void
+ipsecif_detached(
+       ifnet_t interface)
+{
+       struct ipsecif_pcb      *pcb = ifnet_softc(interface);
+       
+       ipsecif_free(pcb);
+       
+       OSDecrementAtomic(&ipsecif_ifcount);
+}
+
+/* Protocol Handlers */
+
+static errno_t
+ipsecif_proto_input(
+       __unused ifnet_t        interface,
+       protocol_family_t       protocol,
+       mbuf_t                          m,
+       __unused char           *frame_header)
+{
+       proto_input(protocol, m);
+       
+       return 0;
+}
+
+static errno_t
+ipsecif_attach_proto(
+       ifnet_t                         interface,
+       protocol_family_t       protocol)
+{
+       struct ifnet_attach_proto_param proto;
+       errno_t                                                 result;
+       
+       bzero(&proto, sizeof(proto));
+       proto.input = ipsecif_proto_input;
+       
+       result = ifnet_attach_protocol(interface, protocol, &proto);
+       if (result != 0 && result != EEXIST) {
+               printf("ipsecif_attach_inet - ifnet_attach_protocol %d failed: %d\n",
+                       protocol, result);
+       }
+       
+       return result;
+}
+
index c767ae5236c49ab5d82285a2c3df3c6886d0769c..68125dd264fb8c97320241215bac16b7271525b2 100644 (file)
@@ -686,10 +686,14 @@ nfs3_vnop_advlock(
 
        /*
         * Fill in the information structure.
+        * We set all values to zero with bzero to clear
+        * out any information in the sockaddr_storage 
+        * and nfs_filehandle contained in msgreq so that
+        * we will not leak extraneous information out of 
+        * the kernel when calling up to lockd via our mig
+        * generated routine.
         */
-       msgreq.lmr_answered = 0;
-       msgreq.lmr_errno = 0;
-       msgreq.lmr_saved_errno = 0;
+       bzero(&msgreq, sizeof(msgreq));
        msg = &msgreq.lmr_msg;
        msg->lm_version = LOCKD_MSG_VERSION;
        msg->lm_flags = 0;
index 7d65f3c4f24fbfbcc3d36d708a78c373ecd3ad82..d7e3d0c3c438a4a5226a1185fdc7d35e7ba483c9 100644 (file)
@@ -572,14 +572,15 @@ nfs_vfs_getattr(mount_t mp, struct vfs_attr *fsap, vfs_context_t ctx)
                        if (nmp->nm_fsattr.nfsa_flags & NFS_FSFLAG_CASE_PRESERVING)
                                caps |= VOL_CAP_FMT_CASE_PRESERVING;
                }
+               /* Note: VOL_CAP_FMT_2TB_FILESIZE is actually used to test for "large file support" */
                if (NFS_BITMAP_ISSET(nmp->nm_fsattr.nfsa_bitmap, NFS_FATTR_MAXFILESIZE)) {
-                       /* Is server's max file size at least 2TB? */
-                       if (nmp->nm_fsattr.nfsa_maxfilesize >= 0x20000000000ULL)
+                       /* Is server's max file size at least 4GB? */
+                       if (nmp->nm_fsattr.nfsa_maxfilesize >= 0x100000000ULL)
                                caps |= VOL_CAP_FMT_2TB_FILESIZE;
                } else if (nfsvers >= NFS_VER3) {
                        /*
                         * NFSv3 and up supports 64 bits of file size.
-                        * So, we'll just assume maxfilesize >= 2TB
+                        * So, we'll just assume maxfilesize >= 4GB
                         */
                        caps |= VOL_CAP_FMT_2TB_FILESIZE;
                }
@@ -780,8 +781,7 @@ nfs3_fsinfo(struct nfsmount *nmp, nfsnode_t np, vfs_context_t ctx)
        if (maxsize < nmp->nm_readdirsize)
                nmp->nm_readdirsize = maxsize;
 
-       nfsm_chain_get_64(error, &nmrep, maxsize);
-       nmp->nm_fsattr.nfsa_maxfilesize = maxsize;
+       nfsm_chain_get_64(error, &nmrep, nmp->nm_fsattr.nfsa_maxfilesize);
 
        nfsm_chain_adv(error, &nmrep, 2 * NFSX_UNSIGNED); // skip time_delta
 
@@ -906,7 +906,7 @@ tryagain:
                //PWC hack until we have a real "mount" tool to remount root rw
                int rw_root=0;
                int flags = MNT_ROOTFS|MNT_RDONLY;
-               PE_parse_boot_arg("-rwroot_hack", &rw_root);
+               PE_parse_boot_argn("-rwroot_hack", &rw_root, sizeof (rw_root));
                if(rw_root)
                {
                        flags = MNT_ROOTFS;
index 3324fe92afc6886c9d314809cf1ca657c4af88ee..907cc9a64908eacaa9bd7c43b3b1ee13d85f1f18 100644 (file)
@@ -7,6 +7,14 @@ export MakeInc_dir=${SRCROOT}/makedefs/MakeInc.dir
 include $(MakeInc_cmd)
 include $(MakeInc_def)
 
+ALLPRODUCTS = AppleTV iPhone MacOSX
+PRODUCT = $(shell tconf --product)
+EXTRAUNIFDEF = $(foreach x,$(ALLPRODUCTS),$(if $(findstring $(PRODUCT),$(x)),-DPRODUCT_$(x),-UPRODUCT_$(x)))
+SINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+SPINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+KINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+KPINCFRAME_UNIFDEF += $(EXTRAUNIFDEF)
+
 INSTINC_SUBDIRS = \
 
 INSTINC_SUBDIRS_PPC = \
index c4881ef339a69e8fec10f4ac1e2d5887485ab60c..8cd063f272ff13dd87d6d67c7c25f82c4a29b913 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
  * used without a prototype in scope.
  */
 
+/* These settings are particular to each product. */
+#ifdef KERNEL
+#define __DARWIN_ONLY_64_BIT_INO_T     0
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 0
+#define __DARWIN_ONLY_VERS_1050                0
+#else /* !KERNEL */
+#ifdef PRODUCT_AppleTV
+/* Product: AppleTV */
+#define __DARWIN_ONLY_64_BIT_INO_T     1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050                1
+#endif /* PRODUCT_AppleTV */
+#ifdef PRODUCT_iPhone
+/* Product: iPhone */
+#define __DARWIN_ONLY_64_BIT_INO_T     1
+#define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#define __DARWIN_ONLY_VERS_1050                1
+#endif /* PRODUCT_iPhone */
+#ifdef PRODUCT_MacOSX
+/* Product: MacOSX */
+#define __DARWIN_ONLY_64_BIT_INO_T     0
+/* #undef __DARWIN_ONLY_UNIX_CONFORMANCE (automatically set for 64-bit) */
+#define __DARWIN_ONLY_VERS_1050                0
+#endif /* PRODUCT_MacOSX */
+#endif /* KERNEL */
+
 /*
  * The __DARWIN_ALIAS macros are used to do symbol renaming; they allow
  * legacy code to use the old symbol, thus maintiang binary compatability
  * pre-10.5, and it is the default compilation environment, revert the
  * compilation environment to pre-__DARWIN_UNIX03.
  */
+#if !defined(__DARWIN_ONLY_UNIX_CONFORMANCE)
+#  if defined(__LP64__)
+#    define __DARWIN_ONLY_UNIX_CONFORMANCE 1
+#  else /* !__LP64__ */
+#    define __DARWIN_ONLY_UNIX_CONFORMANCE 0
+#  endif /* __LP64__ */
+#endif /* !__DARWIN_ONLY_UNIX_CONFORMANCE */
+
 #if !defined(__DARWIN_UNIX03)
-#  if defined(_DARWIN_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE) || defined(__LP64__) || (defined(__arm__) && !defined(KERNEL))
+#  if defined(KERNEL)
+#    define __DARWIN_UNIX03    0
+#  elif __DARWIN_ONLY_UNIX_CONFORMANCE
 #    if defined(_NONSTD_SOURCE)
-#      error "Can't define both _NONSTD_SOURCE and any of _DARWIN_C_SOURCE, _XOPEN_SOURCE, _POSIX_C_SOURCE, or __LP64__"
+#      error "Can't define _NONSTD_SOURCE when only UNIX conformance is available."
 #    endif /* _NONSTD_SOURCE */
 #    define __DARWIN_UNIX03    1
-#  elif defined(_NONSTD_SOURCE) || defined(KERNEL)
+#  elif defined(_DARWIN_C_SOURCE) || defined(_XOPEN_SOURCE) || defined(_POSIX_C_SOURCE)
+#    if defined(_NONSTD_SOURCE)
+#      error "Can't define both _NONSTD_SOURCE and any of _DARWIN_C_SOURCE, _XOPEN_SOURCE or _POSIX_C_SOURCE."
+#    endif /* _NONSTD_SOURCE */
+#    define __DARWIN_UNIX03    1
+#  elif defined(_NONSTD_SOURCE)
 #    define __DARWIN_UNIX03    0
 #  else /* default */
 #    if defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) < 1050)
 #endif /* !__DARWIN_UNIX03 */
 
 #if !defined(__DARWIN_64_BIT_INO_T)
-#  if defined(_DARWIN_USE_64_BIT_INODE)
+#  if defined(KERNEL)
+#    define __DARWIN_64_BIT_INO_T 0
+#  elif defined(_DARWIN_USE_64_BIT_INODE)
+#    if defined(_DARWIN_NO_64_BIT_INODE)
+#      error "Can't define both _DARWIN_USE_64_BIT_INODE and _DARWIN_NO_64_BIT_INODE."
+#    endif /* _DARWIN_NO_64_BIT_INODE */
 #    define __DARWIN_64_BIT_INO_T 1
-#  elif defined(_DARWIN_NO_64_BIT_INODE) || defined(KERNEL)
+#  elif defined(_DARWIN_NO_64_BIT_INODE)
+#    if __DARWIN_ONLY_64_BIT_INO_T
+#      error "Can't define _DARWIN_NO_64_BIT_INODE when only 64-bit inodes are available."
+#    endif /* __DARWIN_ONLY_64_BIT_INO_T */
 #    define __DARWIN_64_BIT_INO_T 0
 #  else /* default */
-#    define __DARWIN_64_BIT_INO_T 0
+#    if __DARWIN_ONLY_64_BIT_INO_T
+#      define __DARWIN_64_BIT_INO_T 1
+#    else /* !__DARWIN_ONLY_64_BIT_INO_T */
+#      define __DARWIN_64_BIT_INO_T 0
+#    endif /* __DARWIN_ONLY_64_BIT_INO_T */
 #  endif
 #endif /* !__DARWIN_64_BIT_INO_T */
 
-#if !defined(__DARWIN_NON_CANCELABLE)
+#if !defined(__DARWIN_VERS_1050)
 #  if defined(KERNEL)
-#    define __DARWIN_NON_CANCELABLE 0
+#    define __DARWIN_VERS_1050 0
+#  elif __DARWIN_ONLY_VERS_1050
+#    define __DARWIN_VERS_1050 1
+#  elif defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) >= 1050)
+#    define __DARWIN_VERS_1050 1
 #  else /* default */
-#    define __DARWIN_NON_CANCELABLE 0
+#    define __DARWIN_VERS_1050 0
 #  endif
-#endif /* !__DARWIN_NON_CANCELABLE */
+#endif /* !__DARWIN_VERS_1050 */
 
-#if !defined(__DARWIN_VERS_1050)
-#  if !defined(KERNEL) && defined(__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__) && ((__ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0) >= 1050)
-#    define __DARWIN_VERS_1050 1
+#if !defined(__DARWIN_NON_CANCELABLE)
+#  if defined(KERNEL)
+#    define __DARWIN_NON_CANCELABLE 0
 #  else /* default */
-#    define __DARWIN_VERS_1050 0
+#    define __DARWIN_NON_CANCELABLE 0
 #  endif
 #endif /* !__DARWIN_NON_CANCELABLE */
 
  * symbol suffixes used for symbol versioning
  */
 #if __DARWIN_UNIX03
-#  if !defined(__LP64__) && !defined(__arm__)
-#    define __DARWIN_SUF_UNIX03                "$UNIX2003"
-#    define __DARWIN_SUF_UNIX03_SET    1
-#  else /* __LP64__ || __arm__ */
+#  if __DARWIN_ONLY_UNIX_CONFORMANCE
 #    define __DARWIN_SUF_UNIX03                /* nothing */
-#    define __DARWIN_SUF_UNIX03_SET    0
-#  endif /* !__LP64__ && !__arm__ */
+#  else /* !__DARWIN_ONLY_UNIX_CONFORMANCE */
+#    define __DARWIN_SUF_UNIX03                "$UNIX2003"
+#  endif /* __DARWIN_ONLY_UNIX_CONFORMANCE */
 
 #  if __DARWIN_64_BIT_INO_T
-#    define __DARWIN_SUF_64_BIT_INO_T  "$INODE64"
+#    if __DARWIN_ONLY_64_BIT_INO_T
+#      define __DARWIN_SUF_64_BIT_INO_T        /* nothing */
+#    else /* !__DARWIN_ONLY_64_BIT_INO_T */
+#      define __DARWIN_SUF_64_BIT_INO_T        "$INODE64"
+#    endif /* __DARWIN_ONLY_64_BIT_INO_T */
 #  else /* !__DARWIN_64_BIT_INO_T */
 #    define __DARWIN_SUF_64_BIT_INO_T  /* nothing */
-#  endif /* __DARWIN_UNIX03 */
+#  endif /* __DARWIN_64_BIT_INO_T */
+
+#  if __DARWIN_VERS_1050
+#    if __DARWIN_ONLY_VERS_1050
+#      define __DARWIN_SUF_1050                /* nothing */
+#    else /* !__DARWIN_ONLY_VERS_1050 */
+#      define __DARWIN_SUF_1050                "$1050"
+#    endif /* __DARWIN_ONLY_VERS_1050 */
+#  else /* !__DARWIN_VERS_1050 */
+#    define __DARWIN_SUF_1050          /* nothing */
+#  endif /* __DARWIN_VERS_1050 */
 
 #  if __DARWIN_NON_CANCELABLE
 #    define __DARWIN_SUF_NON_CANCELABLE        "$NOCANCEL"
 #    define __DARWIN_SUF_NON_CANCELABLE        /* nothing */
 #  endif /* __DARWIN_NON_CANCELABLE */
 
-#  if __DARWIN_VERS_1050
-#    define __DARWIN_SUF_1050          "$1050"
-#  else /* !__DARWIN_VERS_1050 */
-#    define __DARWIN_SUF_1050          /* nothing */
-#  endif /* __DARWIN_VERS_1050 */
-
 #else /* !__DARWIN_UNIX03 */
 #  define __DARWIN_SUF_UNIX03          /* nothing */
-#  define __DARWIN_SUF_UNIX03_SET      0
 #  define __DARWIN_SUF_64_BIT_INO_T    /* nothing */
 #  define __DARWIN_SUF_NON_CANCELABLE  /* nothing */
 #  define __DARWIN_SUF_1050            /* nothing */
  * long doubles.  This applies only to ppc; i386 already has long double
  * support, while ppc64 doesn't have any backwards history.
  */
-#if defined(__ppc__)
+#if   defined(__ppc__)
 #  if defined(__LDBL_MANT_DIG__) && defined(__DBL_MANT_DIG__) && \
        __LDBL_MANT_DIG__ > __DBL_MANT_DIG__
 #    if __ENVIRONMENT_MAC_OS_X_VERSION_MIN_REQUIRED__-0 < 1040
 #   define     __DARWIN_LDBL_COMPAT2(x) /* nothing */
 #   define     __DARWIN_LONG_DOUBLE_IS_DOUBLE  1
 #  endif
-#elif defined(__i386__) || defined(__ppc64__) || defined(__x86_64__) || defined (__arm__)
+#elif defined(__i386__) || defined(__ppc64__) || defined(__x86_64__)
 #  define      __DARWIN_LDBL_COMPAT(x) /* nothing */
 #  define      __DARWIN_LDBL_COMPAT2(x) /* nothing */
 #  define      __DARWIN_LONG_DOUBLE_IS_DOUBLE  0
  *  Public darwin-specific feature macros
  *****************************************/
 
+/*
+ * _DARWIN_FEATURE_64_BIT_INODE indicates that the ino_t type is 64-bit, and
+ * structures modified for 64-bit inodes (like struct stat) will be used.
+ */
+#if __DARWIN_64_BIT_INO_T
+#define _DARWIN_FEATURE_64_BIT_INODE           1
+#endif
+
 /*
  * _DARWIN_FEATURE_LONG_DOUBLE_IS_DOUBLE indicates when the long double type
- * is the same as the double type (ppc only)
+ * is the same as the double type (ppc and arm only)
  */
 #if __DARWIN_LONG_DOUBLE_IS_DOUBLE
 #define _DARWIN_FEATURE_LONG_DOUBLE_IS_DOUBLE  1
 #endif
 
 /*
- * _DARWIN_FEATURE_UNIX_CONFORMANCE indicates whether UNIX conformance is on,
- * and specifies the conformance level (3 is SUSv3)
+ * _DARWIN_FEATURE_64_ONLY_BIT_INODE indicates that the ino_t type may only
+ * be 64-bit; there is no support for 32-bit ino_t when this macro is defined
+ * (and non-zero).  There is no struct stat64 either, as the regular
+ * struct stat will already be the 64-bit version.
  */
-#if __DARWIN_UNIX03
-#define _DARWIN_FEATURE_UNIX_CONFORMANCE       3
+#if __DARWIN_ONLY_64_BIT_INO_T
+#define _DARWIN_FEATURE_ONLY_64_BIT_INODE      1
 #endif
 
 /*
- * _DARWIN_FEATURE_64_BIT_INODE indicates that the ino_t type is 64-bit, and
- * structures modified for 64-bit inodes (like struct stat) will be used.
+ * _DARWIN_FEATURE_ONLY_VERS_1050 indicates that only those APIs updated
+ * in 10.5 exists; no pre-10.5 variants are available.
  */
-#if __DARWIN_64_BIT_INO_T
-#define _DARWIN_FEATURE_64_BIT_INODE           1
+#if __DARWIN_ONLY_VERS_1050
+#define _DARWIN_FEATURE_ONLY_VERS_1050         1
+#endif
+
+/*
+ * _DARWIN_FEATURE_ONLY_UNIX_CONFORMANCE indicates only UNIX conforming API
+ * are available (the legacy BSD APIs are not available)
+ */
+#if __DARWIN_ONLY_UNIX_CONFORMANCE
+#define _DARWIN_FEATURE_ONLY_UNIX_CONFORMANCE  1
+#endif
+
+/*
+ * _DARWIN_FEATURE_UNIX_CONFORMANCE indicates whether UNIX conformance is on,
+ * and specifies the conformance level (3 is SUSv3)
+ */
+#if __DARWIN_UNIX03
+#define _DARWIN_FEATURE_UNIX_CONFORMANCE       3
 #endif
 
 #endif /* !_CDEFS_H_ */
index b36277729cac25e1453db1e0cb6b1396b231466b..879dcb22b0438c0049303bb025e3ae1f9e6b55ae 100644 (file)
@@ -50,6 +50,8 @@
  * DKIOCISFORMATTED                      is media formatted?
  * DKIOCISWRITABLE                       is media writable?
  *
+ * DKIOCDISCARD                          delete unused data
+ *
  * DKIOCGETMAXBLOCKCOUNTREAD             get maximum block count for reads
  * DKIOCGETMAXBLOCKCOUNTWRITE            get maximum block count for writes
  * DKIOCGETMAXBYTECOUNTREAD              get maximum byte count for reads
  *
  * DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT  get minimum segment alignment in bytes
  * DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT get maximum segment width in bits
+ *
+ * DKIOCGETPHYSICALBLOCKSIZE             get device's block size
  */
 
 typedef struct
 {
-    char path[128];
+    uint64_t               offset;
+    uint64_t               length;
+
+    uint8_t                reserved0128[16];       /* reserved, clear to zero */
+} dk_discard_t;
+
+typedef struct
+{
+    char                   path[128];
 } dk_firmware_path_t;
 
 typedef struct
@@ -102,6 +114,8 @@ typedef struct
 #define DKIOCISFORMATTED                      _IOR('d', 23, uint32_t)
 #define DKIOCISWRITABLE                       _IOR('d', 29, uint32_t)
 
+#define DKIOCDISCARD                          _IOW('d', 31, dk_discard_t)
+
 #define DKIOCGETMAXBLOCKCOUNTREAD             _IOR('d', 64, uint64_t)
 #define DKIOCGETMAXBLOCKCOUNTWRITE            _IOR('d', 65, uint64_t)
 #define DKIOCGETMAXBYTECOUNTREAD              _IOR('d', 70, uint64_t)
@@ -115,7 +129,10 @@ typedef struct
 #define DKIOCGETMINSEGMENTALIGNMENTBYTECOUNT  _IOR('d', 74, uint64_t)
 #define DKIOCGETMAXSEGMENTADDRESSABLEBITCOUNT _IOR('d', 75, uint64_t)
 
+#define DKIOCGETPHYSICALBLOCKSIZE             _IOR('d', 77, uint32_t)
+
 #ifdef KERNEL
+#define DK_FEATURE_DISCARD                    0x00000010
 #define DK_FEATURE_FORCE_UNIT_ACCESS          0x00000001
 #define DKIOCGETBLOCKCOUNT32                  _IOR('d', 25, uint32_t)
 #define DKIOCSETBLOCKSIZE                     _IOW('d', 24, uint32_t)
index 1bed0a7c5ee361791dd6aa67ba2790d3654710dd..36fb8d7e93b32e0204877f92944dc7e42c6b57cc 100644 (file)
@@ -116,6 +116,9 @@ struct image_params {
 #define        IMGPF_NONE      0x00000000              /* No flags */
 #define        IMGPF_INTERPRET 0x00000001              /* Interpreter invoked */
 #define        IMGPF_POWERPC   0x00000002              /* ppc mode for x86 */
+#if CONFIG_EMBEDDED
+#undef IMGPF_POWERPC
+#endif
 #define        IMGPF_WAS_64BIT 0x00000004              /* exec from a 64Bit binary */
 #define        IMGPF_IS_64BIT  0x00000008              /* exec to a 64Bit binary */
 
index b8f903856c893bbbe5253f806c4ab8b8057a0889..a5251673f20bad0c06e9df073f810de139287134 100644 (file)
@@ -191,7 +191,8 @@ __BEGIN_DECLS
 #define DBG_IOBLUETOOTH                46      /* Bluetooth */
 #define DBG_IOFIREWIRE         47      /* FireWire */
 #define DBG_IOINFINIBAND       48      /* Infiniband */
-#define DBG_IOCPUPM            49      /* CPU Power Management */
+#define DBG_IOCPUPM                    49      /* CPU Power Management */
+#define DBG_IOGRAPHICS         50      /* Graphics */
 
 /* Backwards compatibility */
 #define        DBG_IOPOINTING          DBG_IOHID                       /* OBSOLETE: Use DBG_IOHID instead */
@@ -214,6 +215,7 @@ __BEGIN_DECLS
 #define DBG_DRVBLUETOOTH       15      /* Bluetooth */
 #define DBG_DRVFIREWIRE                16      /* FireWire */
 #define DBG_DRVINFINIBAND      17      /* Infiniband */
+#define DBG_DRVGRAPHICS                18  /* Graphics */
 
 /* Backwards compatibility */
 #define        DBG_DRVPOINTING         DBG_DRVHID              /* OBSOLETE: Use DBG_DRVHID instead */
index 80f8138398afffd2fb79a5df84c7de0374de62a8..9019d694bbaf59d024540c6638a0da149f1ea63e 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -79,6 +79,7 @@
 #include <stdint.h>
 #include <sys/ucred.h>
 #include <sys/queue.h>         /* XXX needed for user builds */
+#include <Availability.h>
 #else
 #include <sys/kernel_types.h>
 #endif
@@ -112,8 +113,12 @@ typedef struct fsid { int32_t val[2]; } fsid_t;    /* file system id type */
        uint32_t        f_reserved[8];  /* For future use */ \
 }
 
+#if !__DARWIN_ONLY_64_BIT_INO_T
+
 struct statfs64 __DARWIN_STRUCT_STATFS64;
 
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
+
 #if __DARWIN_64_BIT_INO_T
 
 struct statfs __DARWIN_STRUCT_STATFS64;
@@ -636,15 +641,23 @@ typedef struct fhandle    fhandle_t;
 __BEGIN_DECLS
 int    fhopen(const struct fhandle *, int);
 int    fstatfs(int, struct statfs *) __DARWIN_INODE64(fstatfs);
-int    fstatfs64(int, struct statfs64 *);
+#if !__DARWIN_ONLY_64_BIT_INO_T
+int    fstatfs64(int, struct statfs64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
 int    getfh(const char *, fhandle_t *);
 int    getfsstat(struct statfs *, int, int) __DARWIN_INODE64(getfsstat);
-int    getfsstat64(struct statfs64 *, int, int);
+#if !__DARWIN_ONLY_64_BIT_INO_T
+int    getfsstat64(struct statfs64 *, int, int) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
 int    getmntinfo(struct statfs **, int) __DARWIN_INODE64(getmntinfo);
-int    getmntinfo64(struct statfs64 **, int);
+#if !__DARWIN_ONLY_64_BIT_INO_T
+int    getmntinfo64(struct statfs64 **, int) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
 int    mount(const char *, const char *, int, void *);
 int    statfs(const char *, struct statfs *) __DARWIN_INODE64(statfs);
-int    statfs64(const char *, struct statfs64 *);
+#if !__DARWIN_ONLY_64_BIT_INO_T
+int    statfs64(const char *, struct statfs64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
 int    unmount(const char *, int);
 int    getvfsbyname(const char *, struct vfsconf *);
 __END_DECLS
index 1ff74ca3ef26d251b60daa539154d24f89b0b8e0..eb59661724cc15d74a6c3d620dde59ca9ea6abd6 100644 (file)
@@ -123,6 +123,7 @@ struct mount {
        lck_rw_t        mnt_rwlock;             /* mutex readwrite lock */
         lck_mtx_t      mnt_renamelock;         /* mutex that serializes renames that change shape of tree */
        vnode_t         mnt_devvp;              /* the device mounted on for local file systems */
+       uint32_t        mnt_devbsdunit;         /* the BSD unit number of the device */
        int32_t         mnt_crossref;           /* refernces to cover lookups  crossing into mp */
        int32_t         mnt_iterref;            /* refernces to cover iterations; drained makes it -ve  */
  
@@ -174,8 +175,6 @@ struct mount {
         */
        pid_t           mnt_dependent_pid;
        void            *mnt_dependent_process;
-
-       struct timeval  last_normal_IO_timestamp;
 };
 
 /*
@@ -340,6 +339,15 @@ struct user_statfs {
 #endif
 };
 
+/*
+ * throttle I/Os are affected only by normal I/Os happening on the same bsd device node.  For example, disk1s3 and
+ * disk1s5 are the same device node, while disk1s3 and disk2 are not (although disk2 might be a mounted disk image file
+ * and the disk image file resides on a partition in disk1).  The following constant defines the maximum number of
+ * different bsd device nodes the algorithm can consider, and larger numbers are rounded by this maximum.  Since
+ * throttled I/O is usually useful in non-server environment only, a small number 16 is enough in most cases
+ */
+#define LOWPRI_MAX_NUM_DEV 16
+
 __BEGIN_DECLS
 
 extern int mount_generation;
@@ -377,6 +385,11 @@ void mount_iterdrop(mount_t);
 void mount_iterdrain(mount_t);
 void mount_iterreset(mount_t);
 
+/* throttled I/O api */
+int throttle_get_io_policy(struct uthread **ut);
+extern void throttle_lowpri_io(boolean_t ok_to_sleep);
+int throttle_io_will_be_throttled(int lowpri_window_msecs, size_t devbsdunit);
+
 __END_DECLS
 
 #endif /* !_SYS_MOUNT_INTERNAL_H_ */
index 9d41f7ca7b9b6531fe7d102de77536dbaadcbd36..6395fa2cf7a1a6b98682266e36bc58286d54ee91 100644 (file)
@@ -288,7 +288,10 @@ extern int IS_64BIT_PROCESS(proc_t);
 
 extern int     tsleep(void *chan, int pri, const char *wmesg, int timo);
 extern int     msleep1(void *chan, lck_mtx_t *mtx, int pri, const char *wmesg, u_int64_t timo);
-#endif
+
+extern int proc_pidversion(proc_t);
+extern int proc_getcdhash(proc_t, unsigned char *);
+#endif /* KERNEL_PRIVATE */
 
 __END_DECLS
 
index 08f5fe12c80fa35dd2fe14028f032089ff9d1bd7..280e812b1e59b72c43aa08067955743ebd98ee2e 100644 (file)
@@ -81,7 +81,9 @@ __BEGIN_DECLS
 #include <kern/locks.h>
 __END_DECLS
 
+#if DEBUG
 #define        __PROC_INTERNAL_DEBUG 1
+#endif
 
 /* 
  * The short form for various locks that protect fields in the data structures.
@@ -327,6 +329,7 @@ struct      proc {
        struct  timeval p_start;                /* starting time */
        void *  p_rcall;
        int             p_ractive;
+       int     p_idversion;            /* version of process identity */
 #if DIAGNOSTIC
        unsigned int p_fdlock_pc[4];
        unsigned int p_fdunlock_pc[4];
index f60c65821d0bb54267812d0db5c607db31dedfd8..ab921b8aeda66c2af9a3abd2b995c5f6618d933f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -74,6 +74,8 @@
 #include <sys/cdefs.h>
 #ifdef KERNEL
 #include <machine/types.h>
+#else /* !KERNEL */
+#include <Availability.h>
 #endif /* KERNEL */
 
 /* [XSI] The timespec structure may be defined as described in <time.h> */
@@ -264,8 +266,12 @@ struct stat {
 
 #if !defined(_POSIX_C_SOURCE) || defined(_DARWIN_C_SOURCE)
 
+#if !__DARWIN_ONLY_64_BIT_INO_T
+
 struct stat64 __DARWIN_STRUCT_STAT64;
 
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
+
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 
@@ -537,15 +543,18 @@ int       mkdirx_np(const char *, filesec_t);
 int    mkfifox_np(const char *, filesec_t);
 int    statx_np(const char *, struct stat *, filesec_t) __DARWIN_INODE64(statx_np);
 int    umaskx_np(filesec_t);
-/* The following are simillar  to stat and friends except provide struct stat64 instead of struct stat  */
-int    fstatx64_np(int, struct stat64 *, filesec_t);
-int    lstatx64_np(const char *, struct stat64 *, filesec_t);
-int    statx64_np(const char *, struct stat64 *, filesec_t);
-int    fstat64(int, struct stat64 *);
-int    lstat64(const char *, struct stat64 *);
-int    stat64(const char *, struct stat64 *);
+
+#if !__DARWIN_ONLY_64_BIT_INO_T
+/* The following deprecated routines are simillar to stat and friends except provide struct stat64 instead of struct stat  */
+int    fstatx64_np(int, struct stat64 *, filesec_t) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int    lstatx64_np(const char *, struct stat64 *, filesec_t) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int    statx64_np(const char *, struct stat64 *, filesec_t) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int    fstat64(int, struct stat64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int    lstat64(const char *, struct stat64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+int    stat64(const char *, struct stat64 *) __OSX_AVAILABLE_BUT_DEPRECATED(__MAC_10_5,__MAC_10_6,__IPHONE_NA,__IPHONE_NA);
+#endif /* !__DARWIN_ONLY_64_BIT_INO_T */
 #endif /* (!_POSIX_C_SOURCE || _DARWIN_C_SOURCE) */
 
 __END_DECLS
-#endif
+#endif /* !KERNEL */
 #endif /* !_SYS_STAT_H_ */
index d22dfcaabb46bf08084450ea2dc3a2f85098ea32..739eae812a537abc7dc5a9b04f6cec023dfc2851 100644 (file)
@@ -68,11 +68,7 @@ int  ubc_pages_resident(vnode_t);
 
 /* code signing */
 struct cs_blob;
-int    ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t);
 struct cs_blob *ubc_cs_blob_get(vnode_t, cpu_type_t, off_t);
-struct cs_blob *ubc_get_cs_blobs(vnode_t);
-int    ubc_cs_getcdhash(vnode_t, off_t, unsigned char *);
-
 
 /* cluster IO routines */
 int    advisory_read(vnode_t, off_t, off_t, int);
index eb4d413b50483a30e48e5cf4193058dcb6515aea..9ac742bc1b36d2b73c24c7620fc3a7e21019bc68 100644 (file)
@@ -139,7 +139,6 @@ __private_extern__ void ubc_init(void);
 __private_extern__ int ubc_umount(mount_t mp);
 __private_extern__ void        ubc_unmountall(void);
 __private_extern__ memory_object_t ubc_getpager(vnode_t);
-__private_extern__ int  ubc_map(vnode_t, int);
 __private_extern__ void        ubc_destroy_named(vnode_t);
 
 /* internal only */
@@ -169,6 +168,14 @@ int        ubc_getcdhash(vnode_t, off_t, unsigned char *);
 
 int UBCINFOEXISTS(vnode_t);
 
+/* code signing */
+struct cs_blob;
+int    ubc_cs_blob_add(vnode_t, cpu_type_t, off_t, vm_address_t, vm_size_t);
+struct cs_blob *ubc_get_cs_blobs(vnode_t);
+int    ubc_cs_getcdhash(vnode_t, off_t, unsigned char *);
+kern_return_t ubc_cs_blob_allocate(vm_offset_t *, vm_size_t *);
+void ubc_cs_blob_deallocate(vm_offset_t, vm_size_t);
+
 __END_DECLS
 
 
index 89dd1cd4f9b154dbe65a9426556184e079c65b55..4aeb5c885166014a21cd0bc0f13a564eb79e76e6 100644 (file)
@@ -172,7 +172,8 @@ struct uthread {
        u_int32_t       dlil_incremented_read;
        lck_mtx_t       *uu_mtx;
 
-        int            uu_lowpri_window;
+       int             uu_lowpri_window;
+       size_t          uu_devbsdunit;          // to identify which device throttled I/Os are sent to
 
        struct user_sigaltstack uu_sigstk;
         int            uu_defer_reclaims;
@@ -224,7 +225,6 @@ struct uthread {
 #endif
 #endif /* CONFIG_DTRACE */
        void *          uu_threadlist;
-       mount_t         v_mount;
 };
 
 typedef struct uthread * uthread_t;
index 681ad05f0de47fb4a668f4e50cca27c38d8e2430..7aef5e9e89370b1bfeca13d8dc5249ad95506f68 100644 (file)
@@ -662,6 +662,9 @@ int vnode_iterate(struct mount *, int, int (*)(struct vnode *, void *), void *);
 #define VNODE_ITERATE_ALL              0x80
 #define VNODE_ITERATE_ACTIVE   0x100
 #define VNODE_ITERATE_INACTIVE 0x200
+#ifdef BSD_KERNEL_PRIVATE
+#define VNODE_ALWAYS           0x400
+#endif /* BSD_KERNEL_PRIVATE */
 
 /*
  * return values from callback
index ae4be39a919a1c12961f826d49e0778c30ef99a8..66e32d7c303c1cb9332a83272df25346879fe048 100644 (file)
@@ -413,6 +413,9 @@ int vnode_ref_ext(vnode_t, int);
 void   vnode_rele_ext(vnode_t, int, int);
 void   vnode_rele_internal(vnode_t, int, int, int);
 int    vnode_getwithref(vnode_t);
+#ifdef BSD_KERNEL_PRIVATE
+int    vnode_getalways(vnode_t);
+#endif /* BSD_KERNEL_PRIVATE */
 int    vnode_get_locked(vnode_t);
 int    vnode_put_locked(vnode_t);
 
index 3a34e1787f61be54be9aa68585e9f60692a28a70..9b1a7af25f323da25a511614537dc97118da1d31 100644 (file)
@@ -416,6 +416,17 @@ cluster_hard_throttle_on(vnode_t vp)
                if (timevalcmp(&elapsed, &hard_throttle_maxelapsed, <))
                        return(1);
        }
+       struct uthread  *ut;
+       if (throttle_get_io_policy(&ut) == IOPOL_THROTTLE) {
+               size_t devbsdunit;
+               if (vp->v_mount != NULL)
+                       devbsdunit = vp->v_mount->mnt_devbsdunit;
+               else
+                       devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
+               if (throttle_io_will_be_throttled(-1, devbsdunit)) {
+                       return(1);
+               }
+       }
        return(0);
 }
 
index bff33c625850dffa54a16c52d0ce3242fa52140f..eb070de33b22916d53780eae0d7908598f355487 100644 (file)
@@ -1706,14 +1706,6 @@ journal_open(struct vnode *jvp,
     if (phys_blksz != (size_t)jnl->jhdr->jhdr_size && jnl->jhdr->jhdr_size != 0) {
                printf("jnl: %s: open: phys_blksz %lu does not match journal header size %d\n",
                    jdev_name, phys_blksz, jnl->jhdr->jhdr_size);
-
-               orig_blksz = phys_blksz;
-               phys_blksz = jnl->jhdr->jhdr_size;
-               if (VNOP_IOCTL(jvp, DKIOCSETBLOCKSIZE, (caddr_t)&phys_blksz, FWRITE, &context)) {
-                   printf("jnl: %s: could not set block size to %lu bytes.\n", jdev_name, phys_blksz);
-                   goto bad_journal;
-               }
-//             goto bad_journal;
     }
 
     if (   jnl->jhdr->start <= 0
index 482fb8c46c33d6ddaed9e99bb287f665008d5830..c5d91125ddf9186e380222993c8dbb73cf89ab74 100644 (file)
@@ -3389,6 +3389,16 @@ retry:
                log(LOG_EMERG, "%d desired, %d numvnodes, "
                        "%d free, %d dead, %d rage\n",
                        desiredvnodes, numvnodes, freevnodes, deadvnodes, ragevnodes);
+#if CONFIG_EMBEDDED
+               /*
+                * Running out of vnodes tends to make a system unusable.  On an
+                * embedded system, it's unlikely that the user can do anything
+                * about it (or would know what to do, if they could).  So panic
+                * the system so it will automatically restart (and hopefully we
+                * can get a panic log that tells us why we ran out).
+                */
+               panic("vnode table is full\n");
+#endif
                *vpp = NULL;
                return (ENFILE);
        }
@@ -3558,6 +3568,12 @@ vnode_getwithref(vnode_t vp)
 }
 
 
+__private_extern__ int
+vnode_getalways(vnode_t vp)
+{
+        return(vget_internal(vp, 0, VNODE_ALWAYS));
+}
+
 int
 vnode_put(vnode_t vp)
 {
@@ -3726,6 +3742,7 @@ vnode_getiocount(vnode_t vp, int vid, int vflags)
 {
        int nodead = vflags & VNODE_NODEAD;
        int nosusp = vflags & VNODE_NOSUSPEND;
+       int always = vflags & VNODE_ALWAYS;
 
        for (;;) {
                /*
@@ -3754,6 +3771,8 @@ vnode_getiocount(vnode_t vp, int vid, int vflags)
                    (vp->v_owner == current_thread())) {
                        break;
                }
+               if (always != 0) 
+                       break;
                vnode_lock_convert(vp);
 
                if (vp->v_lflag & VL_TERMINATE) {
index 51b67f399541fb403162bd9c209aa2d2144b783d..be9bfe17a6c3fbd5aac65ca2c40b23d265b6115b 100644 (file)
@@ -95,6 +95,7 @@
 #include <sys/sysproto.h>
 #include <sys/xattr.h>
 #include <sys/ubc_internal.h>
+#include <sys/disk.h>
 #include <machine/cons.h>
 #include <machine/limits.h>
 #include <miscfs/specfs/specdev.h>
@@ -418,6 +419,7 @@ __mac_mount(struct proc *p, register struct __mac_mount_args *uap, __unused regi
        strncpy(mp->mnt_vfsstat.f_mntonname, nd.ni_cnd.cn_pnbuf, MAXPATHLEN);
        mp->mnt_vnodecovered = vp;
        mp->mnt_vfsstat.f_owner = kauth_cred_getuid(vfs_context_ucred(ctx));
+       mp->mnt_devbsdunit = LOWPRI_MAX_NUM_DEV - 1;
 
        /* XXX 3762912 hack to support HFS filesystem 'owner' - filesystem may update later */
        vfs_setowner(mp, KAUTH_UID_NONE, KAUTH_GID_NONE);
@@ -590,6 +592,11 @@ update:
                        goto out3;
        }
 #endif
+       if (device_vnode != NULL) {
+               VNOP_IOCTL(device_vnode, DKIOCGETBSDUNIT, (caddr_t)&mp->mnt_devbsdunit, 0, NULL);
+               mp->mnt_devbsdunit %= LOWPRI_MAX_NUM_DEV;
+       }
+
        /*
         * Mount the filesystem.
         */
@@ -1020,6 +1027,7 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
        int needwakeup = 0;
        int forcedunmount = 0;
        int lflags = 0;
+       struct vnode *devvp = NULLVP;
 
        if (flags & MNT_FORCE)
                forcedunmount = 1;
@@ -1115,10 +1123,14 @@ dounmount(struct mount *mp, int flags, int withref, vfs_context_t ctx)
                OSAddAtomic(1, (SInt32 *)&vfs_nummntops);
 
        if ( mp->mnt_devvp && mp->mnt_vtable->vfc_vfsflags & VFC_VFSLOCALARGS) {
-               mp->mnt_devvp->v_specflags &= ~SI_MOUNTEDON;
-               VNOP_CLOSE(mp->mnt_devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
+               /* hold an io reference and drop the usecount before close */
+               devvp = mp->mnt_devvp;
+               vnode_clearmountedon(devvp);
+               vnode_getalways(devvp);
+               vnode_rele(devvp);
+               VNOP_CLOSE(devvp, mp->mnt_flag & MNT_RDONLY ? FREAD : FREAD|FWRITE,
                        ctx);
-               vnode_rele(mp->mnt_devvp);
+               vnode_put(devvp);
        }
        lck_rw_done(&mp->mnt_rwlock);
        mount_list_remove(mp);
@@ -4691,6 +4703,7 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval)
        struct nameidata fromnd, tond;
        vfs_context_t ctx = vfs_context_current();
        int error;
+       int do_retry;
        int mntrename;
        int need_event;
        const char *oname;
@@ -4702,6 +4715,7 @@ rename(__unused proc_t p, struct rename_args *uap, __unused register_t *retval)
        fse_info from_finfo, to_finfo;
        
        holding_mntlock = 0;
+    do_retry = 0;
 retry:
        fvp = tvp = NULL;
        fdvp = tdvp = NULL;
@@ -4816,8 +4830,17 @@ retry:
                        if ((error = vnode_authorize(((tvp != NULL) && vnode_isdir(tvp)) ? tvp : tdvp,
                                 NULL, 
                                 vnode_isdir(fvp) ? KAUTH_VNODE_ADD_SUBDIRECTORY : KAUTH_VNODE_ADD_FILE,
-                                ctx)) != 0)
+                                ctx)) != 0) {
+                /*
+                 * We could encounter a race where after doing the namei, tvp stops
+                 * being valid. If so, simply re-drive the rename call from the
+                 * top.
+                 */
+                 if (error == ENOENT) {
+                     do_retry = 1;
+                 }
                                goto auth_exit;
+                       }
                } else {
                        /* node staying in same directory, must be allowed to add new name */
                        if ((error = vnode_authorize(fdvp, NULL,
@@ -4826,8 +4849,17 @@ retry:
                }
                /* overwriting tvp */
                if ((tvp != NULL) && !vnode_isdir(tvp) &&
-                   ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0))
+                   ((error = vnode_authorize(tvp, tdvp, KAUTH_VNODE_DELETE, ctx)) != 0)) {
+            /*
+             * We could encounter a race where after doing the namei, tvp stops
+             * being valid. If so, simply re-drive the rename call from the
+             * top.
+             */
+            if (error == ENOENT) {
+                do_retry = 1;
+            }
                        goto auth_exit;
+               }
                    
                /* XXX more checks? */
 
@@ -5071,6 +5103,15 @@ auth_exit:
                holding_mntlock = 0;
        }
        if (error) {
+        /*
+         * We may encounter a race in the VNOP where the destination didn't 
+         * exist when we did the namei, but it does by the time we go and 
+         * try to create the entry. In this case, we should re-drive this rename
+         * call from the top again.
+         */
+        if (error == EEXIST) {
+            do_retry = 1;
+        }
 
                goto out1;
        } 
@@ -5158,14 +5199,18 @@ auth_exit:
                vnode_update_identity(fvp, tdvp, tond.ni_cnd.cn_nameptr, tond.ni_cnd.cn_namelen, tond.ni_cnd.cn_hash, update_flags);
        }
 out1:
-       if (to_name != NULL)
-               RELEASE_PATH(to_name);
-       if (from_name != NULL)
-               RELEASE_PATH(from_name);
-
+       if (to_name != NULL) {
+               RELEASE_PATH(to_name);
+               to_name = NULL;
+       }
+       if (from_name != NULL) {
+               RELEASE_PATH(from_name);
+               from_name = NULL;
+       }
        if (holding_mntlock) {
                mount_unlock_renames(locked_mp);
                mount_drop(locked_mp, 0);
+               holding_mntlock = 0;
        }
        if (tdvp) {
                /*
@@ -5189,6 +5234,16 @@ out1:
                        vnode_put(fvp);
                vnode_put(fdvp);
        }
+
+    /*
+     * If things changed after we did the namei, then we will re-drive
+     * this rename call from the top.
+     */
+       if(do_retry) {
+        do_retry = 0;
+               goto retry;
+       }
+
        return (error);
 }
 
index 4dafffdf3d8f3183f582a549152b094813ad72e7..a8fc43f7c406b28937756011c947eeaf61b849ae 100644 (file)
@@ -408,7 +408,7 @@ badcreate:
                goto bad;
        }
        if ( (error = vnode_ref_ext(vp, fmode)) ) {
-               goto bad;
+               goto bad2;
        }
 
        /* call out to allow 3rd party notification of open. 
@@ -419,6 +419,8 @@ badcreate:
 
        *fmodep = fmode;
        return (0);
+bad2:
+       VNOP_CLOSE(vp, fmode, ctx);
 bad:
        ndp->ni_vp = NULL;
        if (vp) {
@@ -493,9 +495,16 @@ vn_close(struct vnode *vp, int flags, vfs_context_t ctx)
                }
        }
 #endif
+       
+       /* work around for foxhound */
+       if (vp->v_type == VBLK)
+               (void)vnode_rele_ext(vp, flags, 0);
+
        error = VNOP_CLOSE(vp, flags, ctx);
-       (void)vnode_rele_ext(vp, flags, 0);
 
+       if (vp->v_type != VBLK)
+               (void)vnode_rele_ext(vp, flags, 0);
+       
        return (error);
 }
 
index 094b6258c16197787fb5dfb7736c8e1a8a90c742..559f832909286578991187a4112ca9671b4f864c 100644 (file)
@@ -1036,12 +1036,6 @@ shared_region_map_np(
                goto done;
        }
 
-       /*
-        * The mapping was successful.  Let the buffer cache know
-        * that we've mapped that file with these protections.  This
-        * prevents the vnode from getting recycled while it's mapped.
-        */
-       (void) ubc_map(vp, VM_PROT_READ);
        error = 0;
 
        /* update the vnode's access time */
index c35f33369dfe15107d31c47a6533e5fc355ddf40..483bda11734fab1f368d4f7fed205b60681537c4 100644 (file)
@@ -343,8 +343,6 @@ out:
 }
 
 
-extern void throttle_lowpri_io(int *lowpri_window,mount_t v_mount);
-
 pager_return_t
 vnode_pagein(
        struct vnode            *vp,
@@ -512,15 +510,15 @@ out:
 
        ut = get_bsdthread_info(current_thread());
 
-       if (ut->uu_lowpri_window && ut->v_mount) {
+       if (ut->uu_lowpri_window) {
                /*
                 * task is marked as a low priority I/O type
-                * and the I/O we issued while in this system call
+                * and the I/O we issued while in this page fault
                 * collided with normal I/O operations... we'll
                 * delay in order to mitigate the impact of this
                 * task on the normal operation of the system
                 */
-               throttle_lowpri_io(&ut->uu_lowpri_window,ut->v_mount);
+               throttle_lowpri_io(TRUE);
        }
        return (error);
 }
index 64db93b879ccf0349a22ba9e0afdf708ab1b798a..38648f07c11f6399c50e9fc9958688882e12aa9a 100644 (file)
@@ -1,4 +1,4 @@
-9.5.0
+9.6.0
 
 # The first line of this file contains the master version number for the kernel.
 # All other instances of the kernel version in xnu are derived from this file.
index 10c23908dbb87e264f5b85a7aadcdfb4182d75be..e558b7286003ad903e664aeee0fcc38352bdec71 100644 (file)
@@ -1,4 +1,3 @@
-_Cstate_table_set
 _PE_install_interrupt_handler
 _PE_interrupt_handler
 _acpi_install_wake_handler
@@ -18,11 +17,8 @@ _mp_cpus_call
 _mp_rendezvous_no_intrs
 _mtrr_range_add
 _mtrr_range_remove
-_pmsCPUSetPStateLimit
-_pmsCPULoadVIDTable
 _rtc_clock_stepped
 _rtc_clock_stepping
 _smp_initialized
-_thread_bind
 __ZN24IOBufferMemoryDescriptor20initWithPhysicalMaskEP4taskmyyy
 __ZN24IOBufferMemoryDescriptor22inTaskWithPhysicalMaskEP4taskmyy
index ae2f63239f4f783d82cd5eff4250cc5b2fcc9ae4..8aab26874cf20d26915296732988edf23aeedacb 100644 (file)
@@ -233,6 +233,7 @@ _sbappendrecord
 _sbflush
 _sbspace
 _securelevel
+_sha1_hardware_hook
 _sleep
 _soabort
 _sobind
index 1664fbf316a2f544fef0fa66095464ea3610c3b8..7720a4d418b11080b8ded738856db90a34047b8e 100644 (file)
@@ -1,9 +1,6 @@
 _cpu_number
 _dsmos_page_transform_hook
 _gPEEFISystemTable
-_hpet_get_info
-_hpet_register_callback
-_hpet_request
 _in6addr_local
 _io_map_spec
 _kdp_register_callout
@@ -13,7 +10,6 @@ _m_mtod
 _ml_get_apicid
 _ml_get_maxbusdelay
 _ml_get_maxsnoop
-_ml_hpet_cfg
 _ml_cpu_int_event_time
 _mp_rendezvous
 _mp_rendezvous_no_intrs
@@ -21,7 +17,6 @@ _nd6_storelladdr
 _pmCPUControl
 _pmKextRegister
 _pm_init_lock
-_rdHPET
 _real_ncpus
 _rtc_clock_napped
 _serial_getc
index b9a6f5b6c24f4e115e66c3652ea6ed0508942aca..489b2f58dc934752f953aeaf85cb23f158a3c16f 100644 (file)
@@ -70,4 +70,11 @@ enum { kIOPrepareToPhys32 = 0x04 };
 #define kIOPlatformQuiesceActionKey    "IOPlatformQuiesceAction"       /* value is OSNumber (priority) */
 #define kIOPlatformActiveActionKey     "IOPlatformActiveAction"        /* value is OSNumber (priority) */
 
+#define kIOPlatformFunctionHandlerSet                  "IOPlatformFunctionHandlerSet"
+#if defined(__i386__)
+#define kIOPlatformFunctionHandlerMaxBusDelay          "IOPlatformFunctionHandlerMaxBusDelay"
+#define kIOPlatformFunctionHandlerMaxInterruptDelay    "IOPlatformFunctionHandlerMaxInterruptDelay"
+#endif /* defined(__i386__) */
+
+
 #endif /* ! _IOKIT_IOKITKEYSPRIVATE_H */
index 17669c8946a0ec593b9270745d0c327d919046f8..405a7c3ebc102fc302dae23d57d4bd5a1d39cdd5 100644 (file)
@@ -12,12 +12,7 @@ include $(MakeInc_def)
 
 MI_DIR = platform
 NOT_EXPORT_HEADERS = 
-NOT_KF_MI_HEADERS  = AppleARMCPU.h AppleARMFunction.h AppleARMIICController.h \
-                       AppleARMIICDevice.h AppleARMIISController.h \
-                       AppleARMIISDevice.h AppleARMIO.h AppleARMIODevice.h \
-                       AppleARMNORFlashController.h AppleARMNORFlashDevice.h \
-                       AppleARMPE.h AppleARMRTC.h AppleARMSPIController.h \
-                       AppleARMSPIDevice.h
+NOT_KF_MI_HEADERS  = 
 
 INSTINC_SUBDIRS =
 INSTINC_SUBDIRS_PPC =
index 1d3953308de3df4e1030d6d6eaf3988837a939ec..aa6a0a615f7e7a7d0a6656ed86718d63c2cc2db7 100644 (file)
@@ -361,6 +361,7 @@ enum {
 #define kIOPMPSHealthConfidenceKey                  "HealthConfidence"
 #define kIOPMPSCapacityEstimatedKey                    "CapacityEstimated"
 #define kIOPMPSBatteryChargeStatusKey               "ChargeStatus"
+#define kIOPMPSBatteryTemperatureKey                "Temperature"
 
 // kIOPMBatteryChargeStatusKey may have one of the following values, or may have
 // no value. If kIOPMBatteryChargeStatusKey has a NULL value (or no value) associated with it
@@ -484,6 +485,7 @@ enum {
 #define kIOPMSettingDisplaySleepUsesDimKey          "Display Sleep Uses Dim"
 #define kIOPMSettingTimeZoneOffsetKey               "TimeZoneOffsetSeconds"
 #define kIOPMSettingMobileMotionModuleKey           "MobileMotionModule"
+#define kIOPMSettingGraphicsSwitchKey               "GPUSwitch"
 
 // Setting controlling drivers can register to receive scheduled wake data
 // Either in "CF seconds" type, or structured calendar data in a formatted
index 8161a3b996e94b6b9c00ac5f65add032015fa8be..e32611c99788aae570210555792e883b5f31ee41 100644 (file)
@@ -41,5 +41,42 @@ enum {
     kIOPMSetACAdaptorConnected = (1<<18)
 };
 
+/*
+ * PM notification types
+ */
+
+/* @constant kIOPMStateConsoleUserShutdown
+ * @abstract Notification of GUI shutdown state available to kexts.
+ * @discussion This type can be passed as arguments to registerPMSettingController()
+ * to receive callbacks.
+ */
+#define kIOPMStateConsoleShutdown   "ConsoleShutdown"
+
+/* @enum ShutdownValues
+ * @abstract Potential values shared with key kIOPMStateConsoleUserShutdown
+ */
+enum {
+/* @constant kIOPMStateConsoleShutdownNone
+ * @abstract System shutdown (or restart) hasn't started; system is ON.
+ * @discussion Next state: 2
+ */
+    kIOPMStateConsoleShutdownNone   = 1,
+/* @constant kIOPMStateConsoleShutdownPossible
+ * @abstract User has been presented with the option to shutdown or restart. Shutdown may be cancelled.
+ * @discussion Next state may be: 1, 4
+ */
+    kIOPMStateConsoleShutdownPossible = 2,
+/* @constant kIOPMStateConsoleShutdownUnderway
+ * @abstract Shutdown or restart is proceeding. It may still be cancelled.
+ * @discussion Next state may be: 1, 4. This state is currently unused.
+ */
+    kIOPMStateConsoleShutdownUnderway = 3,
+/* @constant kIOPMStateConsoleShutdownCertain
+ * @abstract Shutdown is in progress and irrevocable.
+ * @discussion State remains 4 until power is removed from CPU.
+ */
+    kIOPMStateConsoleShutdownCertain = 4
+};
+
 #endif /* ! _IOKIT_IOPMPRIVATE_H */
 
index 187638627e056937cb0740542d8d6ad4d6e2ec37..695c267b1060a6e97001e18c18fe838ef358018b 100644 (file)
@@ -78,6 +78,7 @@ enum PMLogEnum {
     kPMLogSetClockGating,              // 50   0x051000c8 - platform device specific clock control
     kPMLogSetPowerGating,              // 51   0x051000cc - platform device specific power control
     kPMLogSetPinGroup,                 // 52   0x051000d0 - platform device specific gpio control
+    kPMLogIdleCancel,                  // 53   0x051000d4 - device unidle during change
     kIOPMlogLastEvent
 };
 
index dd2bff563997e4c6861f667cc839eda1bab5dbc9..b1b7a39b0d2c1df17f2f25864f98d031708972d0 100644 (file)
@@ -15,9 +15,8 @@ NOT_EXPORT_HEADERS = \
        IOPMinformee.h          \
        IOPMinformeeList.h      \
        IOPMlog.h               \
-       IOPMPagingPlexus.h      \
-       IOPMPrivate.h
-
+       IOPMPagingPlexus.h
+       
 INSTINC_SUBDIRS =
 INSTINC_SUBDIRS_PPC =
 INSTINC_SUBDIRS_I386 =
@@ -31,7 +30,7 @@ EXPINC_SUBDIRS_ARM = ${INSTINC_SUBDIRS_ARM}
 ALL_HEADERS = $(shell (cd $(SOURCE); echo *.h))
 
 INSTALL_MI_LIST        = IOPMLibDefs.h IOPM.h IOPMDeprecated.h
-INSTALL_MI_LCL_LIST = ""
+INSTALL_MI_LCL_LIST = IOPMPrivate.h
 INSTALL_MI_DIR = $(MI_DIR)
 
 EXPORT_MI_LIST = $(filter-out $(NOT_EXPORT_HEADERS), $(ALL_HEADERS))
index 144b4c836f52c811c8052ae92911cc2a449f5bfe..9ce7974ed0565801d2cf68d853031d2244c431c7 100644 (file)
@@ -270,6 +270,7 @@ void PE_cpu_machine_quiesce(cpu_id_t target)
   if (targetCPU) targetCPU->quiesceCPU();
 }
 
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #define super IOService
index 939c78a3ca56c5fc73c62e73364d482ff1740ca4..0b5f54a2560bc69d8e1b210d39f05529b812fe7a 100644 (file)
@@ -1609,7 +1609,7 @@ kern_return_t IOCatalogue::removeKernelLinker(void) {
         goto finish;
     }
 
-    PE_parse_boot_arg("keepsyms", &keepsyms);
+    PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms));
  
     IOLog("Jettisoning kernel linker.\n");
 
index 2487209f813e07e48143d8fd32920886596d407e..503fb3a64a73dfdc2a9da8ef2625d3d22831b746 100644 (file)
@@ -72,7 +72,7 @@ IOMemoryCursor::initWithSpecification(SegmentFunction  inSegFunc,
 static UInt sMaxDBDMASegment;
 if (!sMaxDBDMASegment) {
     sMaxDBDMASegment = (UInt) -1;
-    if (PE_parse_boot_arg("mseg", &sMaxDBDMASegment))
+    if (PE_parse_boot_argn("mseg", &sMaxDBDMASegment, sizeof (sMaxDBDMASegment)))
         IOLog("Setting MaxDBDMASegment to %d\n", sMaxDBDMASegment);
 }
 
index b4e86780de77447b010c25d504a33b3c425d5d77..499aa6cc89015c833763c03093ff34f81986183b 100644 (file)
@@ -31,6 +31,8 @@
 #include <IOKit/IOPlatformExpert.h>
 #include <IOKit/IOUserClient.h>
 #include <IOKit/IOKitKeys.h>
+#include <kern/debug.h>
+#include <pexpert/pexpert.h>
 
 #define super IOService
 
@@ -926,6 +928,9 @@ OFVariable gOFVariables[] = {
   {"security-password", kOFVariableTypeData, kOFVariablePermRootOnly, -1},
   {"boot-image", kOFVariableTypeData, kOFVariablePermUserWrite, -1},
   {"com.apple.System.fp-state", kOFVariableTypeData, kOFVariablePermKernelOnly, -1},
+#if CONFIG_EMBEDDED
+  {"backlight-level", kOFVariableTypeData, kOFVariablePermUserWrite, -1},
+#endif
   {0, kOFVariableTypeData, kOFVariablePermUserRead, -1}
 };
 
index 81568ee1e1d5dddbeef7d9361ffd9cd385db595c..1ff71887b7646605e7d0d05cca6df22a7f310c09 100644 (file)
@@ -329,7 +329,7 @@ static UInt32 computeDeltaTimeMS( const AbsoluteTime * startTime )
 // expert informs us we are the root.
 // **********************************************************************************
 
-#define kRootDomainSettingsCount        14
+#define kRootDomainSettingsCount        16
 
 static SYSCTL_STRUCT(_kern, OID_AUTO, sleeptime, 
                     CTLFLAG_RD | CTLFLAG_NOAUTO | CTLFLAG_KERN, 
@@ -363,7 +363,9 @@ bool IOPMrootDomain::start ( IOService * nub )
             OSSymbol::withCString(kIOPMSettingWakeOnACChangeKey),
             OSSymbol::withCString(kIOPMSettingTimeZoneOffsetKey),
             OSSymbol::withCString(kIOPMSettingDisplaySleepUsesDimKey),
-            OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey)
+            OSSymbol::withCString(kIOPMSettingMobileMotionModuleKey),
+            OSSymbol::withCString(kIOPMSettingGraphicsSwitchKey),
+            OSSymbol::withCString(kIOPMStateConsoleShutdown)
         };
     
 
@@ -2305,12 +2307,30 @@ void IOPMrootDomain::tellChangeUp ( unsigned long stateNum)
 {
     if ( stateNum == ON_STATE ) 
     {
-#if    HIBERNATION
         // Direct callout into OSMetaClass so it can disable kmod unloads
         // during sleep/wake to prevent deadlocks.
         OSMetaClassSystemSleepOrWake( kIOMessageSystemHasPoweredOn );
 
-       IOHibernateSystemPostWake();
+       if (getPowerState() == ON_STATE)
+       {
+           // this is a quick wake from aborted sleep
+           if (idleSeconds && !wrangler)
+           {
+               AbsoluteTime deadline;
+               sleepASAP = false;
+               // stay awake for at least idleSeconds
+               clock_interval_to_deadline(idleSeconds, kSecondScale, &deadline);       
+               thread_call_enter_delayed(extraSleepTimer, deadline);
+               // this gets turned off when we sleep again
+               idleSleepPending = true;
+           }
+           tellClients(kIOMessageSystemWillPowerOn);
+       }
+#if    HIBERNATION
+       else
+       {
+           IOHibernateSystemPostWake();
+       }
 #endif
         return tellClients(kIOMessageSystemHasPoweredOn);
     }
index 1b53461ecd5b7cf14fb2f7522f1236d0a8be6a86..a03ef1d90bd8dbcbc952850c5f57f0735acf135a 100644 (file)
@@ -107,7 +107,7 @@ bool IOPlatformExpert::start( IOService * provider )
       return false;
     
     // Override the mapper present flag is requested by boot arguments.
-    if (PE_parse_boot_arg("dart", &debugFlags) && (debugFlags == 0))
+    if (PE_parse_boot_argn("dart", &debugFlags, sizeof (debugFlags)) && (debugFlags == 0))
       removeProperty(kIOPlatformMapperPresentKey);
     
     // Register the presence or lack thereof a system 
index f92941428f5a7c6ed5ac8849453b10ad731a182f..f58ea137c7dd02cf388b55b54359bd02ccb70ef7 100644 (file)
@@ -153,6 +153,8 @@ const OSSymbol *            gIOPlatformWakeActionKey;
 const OSSymbol *               gIOPlatformQuiesceActionKey;
 const OSSymbol *               gIOPlatformActiveActionKey;
 
+const OSSymbol *               gIOPlatformFunctionHandlerSet;
+
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
 #define LOCKREADNOTIFY()       \
@@ -206,19 +208,34 @@ bool IOService::isInactive( void ) const
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
-#if __i386__
+#if defined(__i386__)
 
 // Only used by the intel implementation of
-//     IOService::requireMaxBusStall(UInt32 __unused ns)
-struct BusStallEntry
+//     IOService::requireMaxBusStall(UInt32 ns)
+//     IOService::requireMaxInterruptDelay(uint32_t ns)
+struct CpuDelayEntry
 {
-    const IOService *fService;
-    UInt32 fMaxDelay;
+    IOService * fService;
+    UInt32      fMaxDelay;
+    UInt32      fDelayType;
+};
+
+enum {
+    kCpuDelayBusStall, kCpuDelayInterrupt,
+    kCpuNumDelayTypes
 };
 
-static OSData *sBusStall     = OSData::withCapacity(8 * sizeof(BusStallEntry));
-static IOLock *sBusStallLock = IOLockAlloc();
-#endif /* __i386__ */
+static OSData          *sCpuDelayData = OSData::withCapacity(8 * sizeof(CpuDelayEntry));
+static IORecursiveLock *sCpuDelayLock = IORecursiveLockAlloc();
+static OSArray         *sCpuLatencyHandlers[kCpuNumDelayTypes];
+const OSSymbol         *sCPULatencyFunctionName[kCpuNumDelayTypes];
+
+static void
+requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType);
+static IOReturn
+setLatencyHandler(UInt32 delayType, IOService * target, bool enable);
+
+#endif /* defined(__i386__) */
 
 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
 
@@ -288,6 +305,11 @@ void IOService::initialize( void )
     gIOPlatformQuiesceActionKey        = OSSymbol::withCStringNoCopy(kIOPlatformQuiesceActionKey);
     gIOPlatformActiveActionKey = OSSymbol::withCStringNoCopy(kIOPlatformActiveActionKey);
 
+    gIOPlatformFunctionHandlerSet              = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerSet);
+#if defined(__i386__)
+    sCPULatencyFunctionName[kCpuDelayBusStall] = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerMaxBusDelay);
+    sCPULatencyFunctionName[kCpuDelayInterrupt]        = OSSymbol::withCStringNoCopy(kIOPlatformFunctionHandlerMaxInterruptDelay);
+#endif
     gNotificationLock          = IORecursiveLockAlloc();
 
     assert( gIOServicePlane && gIODeviceMemoryKey
@@ -822,9 +844,23 @@ IOReturn IOService::callPlatformFunction( const OSSymbol * functionName,
                                          void *param3, void *param4 )
 {
   IOReturn  result = kIOReturnUnsupported;
-  IOService *provider = getProvider();
-  
-  if (provider != 0) {
+  IOService *provider;
+
+  if (gIOPlatformFunctionHandlerSet == functionName)
+  {
+#if defined(__i386__)
+    const OSSymbol * functionHandlerName = (const OSSymbol *) param1;
+    IOService *             target              = (IOService *) param2;
+    bool            enable              = (param3 != 0);
+
+    if (sCPULatencyFunctionName[kCpuDelayBusStall] == functionHandlerName)
+       result = setLatencyHandler(kCpuDelayBusStall, target, enable);
+    else if (sCPULatencyFunctionName[kCpuDelayInterrupt] == param1)
+       result = setLatencyHandler(kCpuDelayInterrupt, target, enable);
+#endif /* defined(__i386__) */
+  }
+
+  if ((kIOReturnUnsupported == result) && (provider = getProvider())) {
     result = provider->callPlatformFunction(functionName, waitForFunction,
                                            param1, param2, param3, param4);
   }
@@ -4421,82 +4457,182 @@ void IOService::setDeviceMemory( OSArray * array )
 void IOService::
 setCPUSnoopDelay(UInt32 __unused ns)
 {
-#if __i386__
+#if defined(__i386__)
     ml_set_maxsnoop(ns); 
-#endif /* __i386__ */
+#endif /* defined(__i386__) */
 }
 
 UInt32 IOService::
 getCPUSnoopDelay()
 {
-#if __i386__
+#if defined(__i386__)
     return ml_get_maxsnoop(); 
 #else
     return 0;
-#endif /* __i386__ */
+#endif /* defined(__i386__) */
 }
 
-void IOService::
-requireMaxBusStall(UInt32 __unused ns)
+#if defined(__i386__)
+static void
+requireMaxCpuDelay(IOService * service, UInt32 ns, UInt32 delayType)
 {
-#if __i386__
     static const UInt kNoReplace = -1U;        // Must be an illegal index
     UInt replace = kNoReplace;
+    bool setCpuDelay = false;
 
-    IOLockLock(sBusStallLock);
+    IORecursiveLockLock(sCpuDelayLock);
 
-    UInt count = sBusStall->getLength() / sizeof(BusStallEntry);
-    BusStallEntry *entries = (BusStallEntry *) sBusStall->getBytesNoCopy();
+    UInt count = sCpuDelayData->getLength() / sizeof(CpuDelayEntry);
+    CpuDelayEntry *entries = (CpuDelayEntry *) sCpuDelayData->getBytesNoCopy();
+    IOService * holder = NULL;
 
     if (ns) {
-       const BusStallEntry ne = {this, ns};
-
-       // Set Maximum bus delay.
-       for (UInt i = 0; i < count; i++) {
-           const IOService *thisService = entries[i].fService;
-           if (this == thisService)
-               replace = i;
-           else if (!thisService) {
-               if (kNoReplace == replace)
-                   replace = i;
-           }
-           else {
-               const UInt32 thisMax = entries[i].fMaxDelay;
-               if (thisMax < ns)
-                   ns = thisMax;
-           }
-       }
-
-       // Must be safe to call from locked context
-       ml_set_maxbusdelay(ns);
-
-       if (kNoReplace == replace)
-           sBusStall->appendBytes(&ne, sizeof(ne));
-       else
-           entries[replace] = ne;
+        const CpuDelayEntry ne = {service, ns, delayType};
+       holder = service;
+        // Set maximum delay.
+        for (UInt i = 0; i < count; i++) {
+            IOService *thisService = entries[i].fService;
+            bool sameType = (delayType == entries[i].fDelayType);            
+            if ((service == thisService) && sameType)
+                replace = i;
+            else if (!thisService) {
+                if (kNoReplace == replace)
+                    replace = i;
+            }
+            else if (sameType) {
+                const UInt32 thisMax = entries[i].fMaxDelay;
+                if (thisMax < ns)
+               {
+                    ns = thisMax;
+                   holder = thisService;
+               }
+            }
+        }
+        
+        setCpuDelay = true;
+        if (kNoReplace == replace)
+            sCpuDelayData->appendBytes(&ne, sizeof(ne));
+        else
+            entries[replace] = ne;
     }
     else {
-       ns = -1U;       // Set to max unsigned, i.e. no restriction
-
-       for (UInt i = 0; i < count; i++) {
-           // Clear a maximum bus delay.
-           const IOService *thisService = entries[i].fService;
-           UInt32 thisMax = entries[i].fMaxDelay;
-           if (this == thisService)
-               replace = i;
-           else if (thisService && thisMax < ns)
-               ns = thisMax;
+        ns = -1U;      // Set to max unsigned, i.e. no restriction
+
+        for (UInt i = 0; i < count; i++) {
+            // Clear a maximum delay.
+            IOService *thisService = entries[i].fService;
+            if (thisService && (delayType == entries[i].fDelayType)) {
+                UInt32 thisMax = entries[i].fMaxDelay;
+                if (service == thisService)
+                    replace = i;
+                else if (thisMax < ns) {
+                    ns = thisMax;
+                   holder = thisService;
+               }
+            }
+        }
+
+        // Check if entry found
+        if (kNoReplace != replace) {
+            entries[replace].fService = 0;     // Null the entry
+            setCpuDelay = true;
+        }
+    }
+
+    if (setCpuDelay)
+    {
+        // Must be safe to call from locked context
+        if (delayType == kCpuDelayBusStall)
+        {
+            ml_set_maxbusdelay(ns);
+        }
+        else if (delayType == kCpuDelayInterrupt)
+        {
+            ml_set_maxintdelay(ns);
+        }
+
+       OSArray * handlers = sCpuLatencyHandlers[delayType];
+       IOService * target;
+       if (handlers) for (unsigned int idx = 0; 
+                           (target = (IOService *) handlers->getObject(idx));
+                           idx++)
+       {
+           target->callPlatformFunction(sCPULatencyFunctionName[delayType], false,
+                                           (void *) (uintptr_t) ns, holder,
+                                           NULL, NULL);
        }
+    }
 
-       // Check if entry found
-       if (kNoReplace != replace) {
-           entries[replace].fService = 0;      // Null the entry
-           ml_set_maxbusdelay(ns);
+    IORecursiveLockUnlock(sCpuDelayLock);
+}
+
+static IOReturn
+setLatencyHandler(UInt32 delayType, IOService * target, bool enable)
+{
+    IOReturn result = kIOReturnNotFound;
+    OSArray * array;
+    unsigned int idx;
+
+    IORecursiveLockLock(sCpuDelayLock);
+
+    do
+    {
+       if (enable && !sCpuLatencyHandlers[delayType])
+           sCpuLatencyHandlers[delayType] = OSArray::withCapacity(4);
+       array = sCpuLatencyHandlers[delayType];
+       if (!array)
+           break;
+       idx = array->getNextIndexOfObject(target, 0);
+       if (!enable)
+       {
+           if (-1U != idx)
+           {
+               array->removeObject(idx);
+               result = kIOReturnSuccess;
+           }
+       }
+       else
+       {
+           if (-1U != idx) {
+               result = kIOReturnExclusiveAccess;
+               break;
+           }
+           array->setObject(target);
+           
+           UInt count = sCpuDelayData->getLength() / sizeof(CpuDelayEntry);
+           CpuDelayEntry *entries = (CpuDelayEntry *) sCpuDelayData->getBytesNoCopy();
+           UInt32 ns = -1U;    // Set to max unsigned, i.e. no restriction
+           IOService * holder = NULL;
+
+           for (UInt i = 0; i < count; i++) {
+               if (entries[i].fService 
+                 && (delayType == entries[i].fDelayType) 
+                 && (entries[i].fMaxDelay < ns)) {
+                   ns = entries[i].fMaxDelay;
+                   holder = entries[i].fService;
+               }
+           }
+           target->callPlatformFunction(sCPULatencyFunctionName[delayType], false,
+                                           (void *) (uintptr_t) ns, holder,
+                                           NULL, NULL);
+           result = kIOReturnSuccess;
        }
     }
+    while (false);
 
-    IOLockUnlock(sBusStallLock);
-#endif /* __i386__ */
+    IORecursiveLockUnlock(sCpuDelayLock);
+
+    return (result);
+}
+
+#endif /* defined(__i386__) */
+
+void IOService::
+requireMaxBusStall(UInt32 __unused ns)
+{
+#if defined(__i386__)
+    requireMaxCpuDelay(this, ns, kCpuDelayBusStall);
+#endif
 }
 
 /*
index 1f9678fd565cd8b0ef76d1b33360c918164e5e0e..5b5cc0b8a414713610e3a65a9f74ac3376087cea 100644 (file)
@@ -117,6 +117,10 @@ do {                                  \
 
 #define NS_TO_MS(nsec)                ((int)((nsec) / 1000000ULL))
 
+#if CONFIG_EMBEDDED
+#define SUPPORT_IDLE_CANCEL                            1
+#endif
+
 //*********************************************************************************
 // PM machine states
 //*********************************************************************************
@@ -1784,6 +1788,7 @@ IOReturn IOService::requestPowerDomainState (
     unsigned long              computedState;
     unsigned long              theDesiredState;
        IOService *                     child;
+       IOPMRequest *           childRequest;
 
     if (!initialized)
                return IOPMNotYetInitialized;
@@ -1893,7 +1898,9 @@ IOReturn IOService::requestPowerDomainState (
        }
 
        // Record the child's desires on the connection.
-
+#if SUPPORT_IDLE_CANCEL
+       bool attemptCancel = ((kIOPMPreventIdleSleep & desiredState) && !whichChild->getPreventIdleSleepFlag());
+#endif
        whichChild->setDesiredDomainState( computedState );
        whichChild->setPreventIdleSleepFlag( desiredState & kIOPMPreventIdleSleep );
        whichChild->setPreventSystemSleepFlag( desiredState & kIOPMPreventSystemSleep );
@@ -1907,8 +1914,6 @@ IOReturn IOService::requestPowerDomainState (
 
        if (!fWillAdjustPowerState && !fDeviceOverrides)
        {
-               IOPMRequest * childRequest;
-
                childRequest = acquirePMRequest( this, kIOPMRequestTypeAdjustPowerState );
                if (childRequest)
                {
@@ -1916,6 +1921,16 @@ IOReturn IOService::requestPowerDomainState (
                        fWillAdjustPowerState = true;
                }
        }
+#if SUPPORT_IDLE_CANCEL
+       if (attemptCancel)
+       {
+               childRequest = acquirePMRequest( this, kIOPMRequestTypeIdleCancel );
+               if (childRequest)
+               {
+                       submitPMRequest( childRequest );
+               }
+       }
+#endif
 
        return IOPMNoErr;
 }
@@ -3898,11 +3913,7 @@ bool IOService::ackTimerTick( void )
                        // apps didn't respond in time
             cleanClientResponses(true);
             OUR_PMLog(kPMLogClientTardy, 0, 1);
-                       if (fMachineState == kIOPM_OurChangeTellClientsPowerDown)
-                       {
-                               // tardy equates to veto
-                               fDoNotPowerDown = true;
-                       }
+                       // tardy equates to approval
                        done = true;
             break;
 
@@ -4855,6 +4866,11 @@ IOReturn IOService::cancelPowerChange ( unsigned long refcon )
         return kIOReturnSuccess;
     }
 
+    OSString * name = IOCopyLogNameForPID(proc_selfpid());
+    PM_ERROR("PM notification cancel (%s)\n", name ? name->getCStringNoCopy() : "");
+    if (name)
+        name->release();
+
        request = acquirePMRequest( this, kIOPMRequestTypeCancelPowerChange );
        if (!request)
        {
@@ -5376,11 +5392,6 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
 
                        case kIOPM_OurChangeTellClientsPowerDown:
                                // our change, was it vetoed?
-                               if (fDesiredPowerState > fHeadNoteState)
-                               {
-                                       PM_DEBUG("%s: idle cancel\n", fName);
-                                       fDoNotPowerDown = true;
-                               }
                                if (!fDoNotPowerDown)
                                {
                                        // no, we can continue
@@ -5388,6 +5399,8 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
                                }
                                else
                                {
+                                       OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
+                                       PM_ERROR("%s: idle cancel\n", fName);
                                        // yes, rescind the warning
                                        tellNoChangeDown(fHeadNoteState);
                                        // mark the change note un-actioned
@@ -5398,7 +5411,25 @@ bool IOService::servicePMRequest( IOPMRequest * request, IOPMWorkQueue * queue )
                                break;
 
                        case kIOPM_OurChangeTellPriorityClientsPowerDown:
-                               OurChangeTellPriorityClientsPowerDown();  
+                               // our change, should it be acted on still?
+#if SUPPORT_IDLE_CANCEL
+                               if (fDoNotPowerDown)
+                               {
+                                       OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, fMachineState);
+                                       PM_ERROR("%s: idle revert\n", fName);
+                                       // no, tell clients we're back in the old state
+                                       tellChangeUp(fCurrentPowerState);
+                                       // mark the change note un-actioned
+                                       fHeadNoteFlags |= IOPMNotDone;
+                                       // and we're done
+                                       all_done();
+                               }
+                               else
+#endif
+                               {
+                                       // yes, we can continue
+                                       OurChangeTellPriorityClientsPowerDown();  
+                               }
                                break;
 
                        case kIOPM_OurChangeNotifyInterestedDriversWillChange:
@@ -5671,6 +5702,20 @@ bool IOService::servicePMReplyQueue( IOPMRequest * request, IOPMRequestQueue * q
                        more = true;
                        break;
 
+#if SUPPORT_IDLE_CANCEL
+               case kIOPMRequestTypeIdleCancel:
+                       if ((fMachineState == kIOPM_OurChangeTellClientsPowerDown) 
+                        || (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown))
+                       {
+                               OUR_PMLog(kPMLogIdleCancel, (uintptr_t) this, 0);
+                               fDoNotPowerDown = true;
+                               if (fMachineState == kIOPM_OurChangeTellPriorityClientsPowerDown)
+                                       cleanClientResponses(false);
+                               more = true;
+                       }
+                       break;
+#endif
+
                default:
                        IOPanic("servicePMReplyQueue: unknown reply type");
        }
index 658730e05f47c9e95b21fd3eb2ba2c95c2d6a2b8..21e9361ffa80195e134d250487510a28578fd791 100644 (file)
@@ -462,7 +462,8 @@ enum {
     kIOPMRequestTypeAckSetPowerState       = 0x82,
     kIOPMRequestTypeAllowPowerChange       = 0x83,
     kIOPMRequestTypeCancelPowerChange      = 0x84,
-    kIOPMRequestTypeInterestChanged        = 0x85
+    kIOPMRequestTypeInterestChanged        = 0x85,
+    kIOPMRequestTypeIdleCancel             = 0x86
 };
 
 //*********************************************************************************
index 25c13cb0ee32afa0bebe495a55ec27b3599c9f31..42003bf24532ab2394a2a22779155c55b0106212 100644 (file)
@@ -52,7 +52,6 @@
 extern "C" {
 
 extern void OSlibkernInit (void);
-extern void ml_hpet_cfg(uint32_t, uint32_t);
 
 #include <kern/clock.h>
 #include <sys/time.h>
@@ -100,7 +99,7 @@ void StartIOKit( void * p1, void * p2, void * p3, void * p4 )
     OSCollectionIterator *      kmodIter;   // must release
     OSString *                  kmodName;   // don't release
 
-    if( PE_parse_boot_arg( "io", &debugFlags ))
+    if( PE_parse_boot_argn( "io", &debugFlags, sizeof (debugFlags) ))
        gIOKitDebug = debugFlags;
 
     // Check for the log synchronous bit set in io
index 8da2cc9b22cce7300b5e202799a6ab2dc39cc99f..1fa767c152d951f65211f6e16da32e6858975310 100644 (file)
--- a/kgmacros
+++ b/kgmacros
@@ -1960,6 +1960,7 @@ define showuserstack
                     _kgm_update_loop
                   end
                else
+               if ($kgm_mtype == 7)
                        set $newact = (struct thread *) $arg0
 #This needs to identify 64-bit processes as well
                        set $newiss = (x86_saved_state32_t) ($newact->machine.pcb->iss.uss.ss_32)
@@ -1975,6 +1976,9 @@ define showuserstack
                        _kgm_flush_loop
                        _kgm_update_loop
                        end                     
+               else
+                       echo showuserstack not supported on this architecture\n
+               end
                end
 end
 document showuserstack
@@ -2053,11 +2057,15 @@ define switchtocorethread
           flushstack
           set $pc = $newact->machine->pcb.save_srr0
        else
+       if ($kgm_mtype == 7)
                set $kgm_cstatep = (struct x86_kernel_state32 *) \
                                        ($newact->kernel_stack + 0x4000 \
                                         - sizeof(struct x86_kernel_state32))
                loadcontext $kgm_cstatep
                flushstack
+       else
+               echo switchtocorethread not supported on this architecture\n
+       end
        end
        showcontext_int
        end
@@ -2116,6 +2124,7 @@ define loadcontext
        set $cr = $kgm_contextp.save_cr
        set $ctr = $kgm_contextp.save_ctr
        else
+       if ($kgm_mtype == 7)
                set $kgm_contextp = (struct x86_kernel_state32 *) $arg0
                set $ebx = $kgm_contextp->k_ebx 
                set $ebp = $kgm_contextp->k_ebp 
@@ -2123,6 +2132,9 @@ define loadcontext
                set $esi = $kgm_contextp->k_esi 
                set $eip = $kgm_contextp->k_eip 
                set $pc =  $kgm_contextp->k_eip
+       else
+               echo loadcontext not supported on this architecture\n
+       end
        end
 end
 
@@ -2146,6 +2158,8 @@ define resetcorectx
                flushstack
                set $pc = $kdpstatep->eip
                update
+       else
+               echo resetcorectx not supported on this architecture\n
        end
        end
        showcontext_int
@@ -5350,7 +5364,7 @@ define showMCAstate
         _if_present mca_threshold_status_present
         printf "\n%d error banks, ", mca_error_bank_count
         printf "family code 0x%x, ", mca_family
-        printf "machine-check exception taken: %d\n", mca_exception_taken
+        printf "machine-check dump state: %d\n", mca_dump_state
         set $kgm_cpu = 0
         while cpu_data_ptr[$kgm_cpu] != 0
             set $kgm_mcp = cpu_data_ptr[$kgm_cpu]->cpu_mca_state
index 5e10e07b0349ab35626f0dd95bac5a02f1368665..7924e638218313fa6ca241249b02cb1b9a66acb8 100644 (file)
@@ -55,6 +55,7 @@
 
 #include <sys/types.h>
 #include <sys/systm.h>
+#include <libkern/OSAtomic.h>
 #include <libkern/crypto/sha1.h>
 
 #define        memset(x, y, z) bzero(x, z);
@@ -141,8 +142,17 @@ static unsigned char PADDING[64] = { 0x80, /* zeros */ };
 static void SHA1Transform(u_int32_t, u_int32_t, u_int32_t, u_int32_t,
     u_int32_t, const u_int8_t *, SHA1_CTX *);
 
+void _SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen);
+
 void SHA1Final_r(SHA1_CTX *, void *);
 
+typedef kern_return_t (*InKernelPerformSHA1Func)(void *ref, const void *data, size_t dataLen, u_int32_t *inHash, u_int32_t options, u_int32_t *outHash, Boolean usePhysicalAddress); 
+void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref);
+static void *SHA1Ref;
+InKernelPerformSHA1Func performSHA1WithinKernelOnly; 
+#define SHA1_USE_HARDWARE_THRESHOLD 2048 //bytes 
+
+
 /*
  * SHA1 initialization. Begins a SHA1 operation, writing a new context.
  */
@@ -166,7 +176,7 @@ SHA1Init(SHA1_CTX *context)
  * context.
  */
 void
-SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
+_SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
 {
        u_int32_t i, index, partLen;
        const unsigned char *input = (const unsigned char *)inpp;
@@ -210,6 +220,105 @@ SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
        memcpy(&context->buffer[index], &input[i], inputLen - i);
 }
 
+
+
+
+/*
+ * This function is called by the SHA1 hardware kext during its init. 
+ * This will register the function to call to perform SHA1 using hardware. 
+ */
+void sha1_hardware_hook(Boolean option, InKernelPerformSHA1Func func, void *ref)
+{
+       if(option) {
+               // Establish the hook. The hardware is ready.
+               OSCompareAndSwap((uintptr_t)NULL, (uintptr_t)ref, (uintptr_t *)&SHA1Ref); 
+
+               if(!OSCompareAndSwap((uintptr_t)NULL, (uintptr_t)func, (uintptr_t *)&performSHA1WithinKernelOnly)) {
+                       panic("sha1_hardware_hook: Called twice.. Should never happen\n");
+               }
+       }
+       else {
+               // The hardware is going away. Tear down the hook.      
+               performSHA1WithinKernelOnly = NULL;
+               SHA1Ref = NULL;
+       }
+}
+
+static u_int32_t SHA1UpdateWithHardware(SHA1_CTX *context, const unsigned char *data, size_t dataLen, Boolean usePhysicalAddress)
+{
+       u_int32_t *inHashBuffer = context->state;
+       u_int32_t options = 0;
+       int result;
+
+       result = performSHA1WithinKernelOnly(SHA1Ref, data, dataLen, inHashBuffer, options, inHashBuffer, usePhysicalAddress);
+       if(result != KERN_SUCCESS) {
+               //The hardware failed to hash for some reason. Fall back to software. 
+               return 0;
+       }
+
+       //Update the context with the total length.
+        /* Update number of bits */
+        if ((context->bcount[1] += (dataLen << 3)) < (dataLen << 3))
+                context->bcount[0]++;
+        context->bcount[0] += (dataLen >> 29);
+       return dataLen;
+}
+
+/*
+ * This is function is only called in from the pagefault path or from page_copy().
+ * So we assume that we can safely convert the virtual address to the physical address and use it.
+ * Assumptions: The passed in address(inpp) is a kernel virtual address 
+ * and a physical page has been faulted in. 
+ * The inputLen passed in should always be less than or equal to a  page size (4096) 
+ * and inpp should be on a page boundary. 
+ * "performSHA1WithinKernelOnly" is initialized only when the hardware driver exists and is ready.
+ */
+void SHA1UpdateUsePhysicalAddress(SHA1_CTX *context, const void *inpp, size_t inputLen)
+{
+       Boolean usePhysicalAddress = TRUE;
+       if((inputLen == PAGE_SIZE) && performSHA1WithinKernelOnly) { // If hardware exists and is ready.
+               if(SHA1UpdateWithHardware(context, (const unsigned char *)inpp, inputLen, usePhysicalAddress))
+                       return;
+               //else for some reason the hardware failed.. 
+               //fall through to software and try the hash in software. 
+       }
+       //Use the software implementation since the hardware is absent or 
+       // has not been initialized yet or inputLen !=  PAGE_SIZE. 
+       _SHA1Update(context, inpp, inputLen);
+}
+
+/*
+ * A wrapper around _SHA1Update() to pick between software or hardware based SHA1. 
+ *
+ */
+void SHA1Update(SHA1_CTX *context, const void *inpp, size_t inputLen)
+{
+       const unsigned char *input = (const unsigned char *)inpp;
+       Boolean usePhysicalAddress = FALSE;
+       u_int32_t index;
+       
+       if((inputLen > SHA1_USE_HARDWARE_THRESHOLD) && performSHA1WithinKernelOnly) { 
+               index = (context->bcount[1] >> 3) & 0x3F;
+               if(index != 0) {  //bytes left in the context. Handle them first.
+                       u_int32_t partLen = 64 - index;
+                       memcpy(&context->buffer[index], input, partLen);
+                       _SHA1Update(context, inpp, inputLen);
+                       inputLen -= partLen; 
+                       input += partLen; 
+               }
+               
+               u_int32_t lenForHardware = inputLen & (~0x3F); //multiple of 64
+               u_int32_t bytesHashed = 0;
+               bytesHashed = SHA1UpdateWithHardware(context, input, lenForHardware, usePhysicalAddress);       
+               
+               inputLen -= bytesHashed;
+               input += bytesHashed;
+       }
+
+       //Fall through to the software implementation.
+       _SHA1Update(context, input, inputLen);
+}
+
 /*
  * For backwards compatibility, sha1_result symbol is mapped to this
  * routine since it's equivalent to SHA1Final with reversed parameters.
index fd4bc66b6124c12702050e3b58f818ca1701e1d2..ef527fdd7dce5896d3f678e68a20117c72829fb1 100644 (file)
@@ -48,6 +48,14 @@ class OSSerialize;
 
 #define APPLE_KEXT_VTABLE_PADDING   1
 
+#if defined(__LP64__)
+#define        APPLE_KEXT_LEGACY_ABI   0
+#elif defined(__arm__) && (__GNUC__ > 4 || (__GNUC__ == 4 && __GNUC_MINOR__ >= 2))
+#define        APPLE_KEXT_LEGACY_ABI   0
+#else
+#define        APPLE_KEXT_LEGACY_ABI   1
+#endif
+
 #if APPLE_KEXT_VTABLE_PADDING
 #define APPLE_KEXT_PAD_METHOD      virtual
 #define APPLE_KEXT_PAD_IMPL(index)  gMetaClass.reservedCalled(index)
@@ -100,13 +108,14 @@ public:
 #define OSCheckTypeInst(typeinst, inst) \
     OSMetaClassBase::checkTypeInst(inst, typeinst)
     
+typedef void (*_ptf_t)(void);
+
+#if APPLE_KEXT_LEGACY_ABI
 
 // Arcane evil code interprets a C++ pointer to function as specified in the
 // -fapple-kext ABI, i.e. the gcc-2.95 generated code.  IT DOES NOT ALLOW
 // the conversion of functions that are from MULTIPLY inherited classes.
 
-typedef void (*_ptf_t)(void);
-
 static inline _ptf_t
 _ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void))
 {
@@ -141,6 +150,43 @@ _ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void))
     }
 }
 
+#else /* !APPLE_KEXT_LEGACY_ABI */
+
+
+// Slightly less arcane and slightly less evil code to do
+// the same for kexts compiled with the standard Itanium C++
+// ABI
+
+static inline _ptf_t
+_ptmf2ptf(const OSMetaClassBase *self, void (OSMetaClassBase::*func)(void))
+{
+    union {
+       void (OSMetaClassBase::*fIn)(void);
+       uintptr_t fVTOffset;
+       _ptf_t fPFN;
+    } map;
+
+    map.fIn = func;
+
+    if (map.fVTOffset & 1) {
+       // virtual
+       union {
+           const OSMetaClassBase *fObj;
+           _ptf_t **vtablep;
+       } u;
+       u.fObj = self;
+
+       // Virtual member function so dereference vtable
+       return *(_ptf_t *)(((uintptr_t)*u.vtablep) + map.fVTOffset - 1);
+    } else {
+       // Not virtual, i.e. plain member func
+       return map.fPFN;
+    }
+}
+
+
+#endif /* !APPLE_KEXT_LEGACY_ABI */
+
 /*! @function OSMemberFunctionCast
     @abstract Convert a pointer to a member function to a c-style pointer to function.  No warnings are generated.
     @param type The type of pointer function desired.
index 47a6e11c3d3266f9a3db2c1cdd5e93af842df90b..8ecb9e2f7333525a8b8f53bb7e8ebaca93c3833a 100644 (file)
@@ -60,6 +60,7 @@ typedef struct sha1_ctxt {
 
 extern void SHA1Init(SHA1_CTX *);
 extern void SHA1Update(SHA1_CTX *, const void *, size_t);
+extern void SHA1UpdateUsePhysicalAddress(SHA1_CTX *context, const void *inpp, size_t inputLen);
 extern void SHA1Final(void *, SHA1_CTX *);
 
 #ifdef  __cplusplus
index 886094ab6d1dcb802002ba72d4267c46d1bfe0d8..ab7ce249aba877d95fa0bf7f2f1772ed90382553 100644 (file)
@@ -373,7 +373,7 @@ bool validateExtensionDict(OSDictionary * extension, int index) {
             goto finish;
         }
 
-    } else if (PE_parse_boot_arg("-x", namep)) { /* safe boot */
+    } else if (PE_parse_boot_argn("-x", namep, sizeof (namep))) { /* safe boot */
         ineligible_for_safe_boot = true;
         result = false;
         goto finish;
@@ -502,6 +502,30 @@ OSDictionary * compareExtensionVersions(
         goto finish;
      }
   
+    if (0 == strcmp("com.apple.driver.AppleIntelCPUPowerManagement",
+                    incumbentName->getCStringNoCopy())) {
+      /* Special rules. Always favor version 51.0.0 exactly at the
+       * expense of all other versions newer or older.
+       */
+      if(0 == strcmp(incumbentVersionString->getCStringNoCopy(), "51.0.0")) {
+       IOLog(VTYELLOW "Skipping duplicate extension \"%s\" with "
+             " version (%s -> %s).\n" VTRESET,
+             candidateName->getCStringNoCopy(),
+             candidateVersionString->getCStringNoCopy(),
+             incumbentVersionString->getCStringNoCopy());
+       winner = incumbent;
+       goto finish;
+      } else if (0 == strcmp(candidateVersionString->getCStringNoCopy(), "51.0.0")) {
+       IOLog(VTYELLOW "Skipping duplicate extension \"%s\" with "
+             " version (%s -> %s).\n" VTRESET,
+             candidateName->getCStringNoCopy(),
+             incumbentVersionString->getCStringNoCopy(),
+             candidateVersionString->getCStringNoCopy());
+       winner = candidate;
+       goto finish;
+      }
+    }
+
     if (candidate_vers > incumbent_vers) {
         IOLog(VTYELLOW "Replacing extension \"%s\" with newer version "
             "(%s -> %s).\n" VTRESET,
index 507cb1000074aa469df27744f16b1f776a426206..a850865198bb172dfb42745789971c11c1e6f6aa 100644 (file)
@@ -35,7 +35,7 @@
 #include <mach/message.h>
 #include <mach/exception.h>
 #include <mach/mig_errors.h>
-#include <mach-o/dyld.h>
+#include <dlfcn.h>
 #include <stdlib.h>
 
 __private_extern__ kern_return_t internal_catch_exception_raise (
@@ -52,7 +52,7 @@ __private_extern__ kern_return_t internal_catch_exception_raise (
     static kern_return_t (*func)(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t);
     if (checkForFunction == 0) {
         checkForFunction = 1;
-        _dyld_lookup_and_bind("_catch_exception_raise", (unsigned long *)&func, (void **)0);
+               func = dlsym(RTLD_DEFAULT, "catch_exception_raise");
     }
     if (func == 0) {
         /* The user hasn't defined catch_exception_raise in their binary */
index c372d1c20be69c08703e5cd1a5376ae7c28e7300..efcb5344c51d1bcf38f012145b65ca9a4f1eed86 100644 (file)
@@ -35,7 +35,7 @@
 #include <mach/message.h>
 #include <mach/exception.h>
 #include <mach/mig_errors.h>
-#include <mach-o/dyld.h>
+#include <dlfcn.h>
 #include <stdlib.h>
 
 __private_extern__ kern_return_t internal_catch_exception_raise_state (
@@ -55,7 +55,7 @@ __private_extern__ kern_return_t internal_catch_exception_raise_state (
     static kern_return_t (*func)(mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
     if (checkForFunction == 0) {
         checkForFunction = 1;
-        _dyld_lookup_and_bind("_catch_exception_raise_state", (unsigned long *)&func, (void **)0);
+               func = dlsym(RTLD_DEFAULT, "catch_exception_raise_state");
     }
     if (func == 0) {
         /* The user hasn't defined catch_exception_raise in their binary */
index 139b772c22cca27388a9ff808ee4fef5884a7faf..1e0c5c0dff026e4e1075f893636b87f361044e05 100644 (file)
@@ -35,7 +35,7 @@
 #include <mach/message.h>
 #include <mach/exception.h>
 #include <mach/mig_errors.h>
-#include <mach-o/dyld.h>
+#include <dlfcn.h>
 #include <stdlib.h>
 
 __private_extern__ kern_return_t internal_catch_exception_raise_state_identity (
@@ -57,7 +57,7 @@ __private_extern__ kern_return_t internal_catch_exception_raise_state_identity (
     static kern_return_t (*func)(mach_port_t, mach_port_t, mach_port_t, exception_type_t, exception_data_t, mach_msg_type_number_t, int *, thread_state_t, mach_msg_type_number_t, thread_state_t, mach_msg_type_number_t *);
     if (checkForFunction == 0) {
         checkForFunction = 1;
-        _dyld_lookup_and_bind("_catch_exception_raise_state_identity", (unsigned long *)&func, (void **)0);
+               func = dlsym(RTLD_DEFAULT, "catch_exception_raise_state_identity");
     }
     if (func == 0) {
         /* The user hasn't defined catch_exception_raise in their binary */
index 6af46de7f05c94ba2a029ce80c9b9eadc5abdb72..1fa58a06775197ae4f96b71cd51427f07864a271 100644 (file)
@@ -11,6 +11,7 @@ LN = /bin/ln -fs
 CAT = /bin/cat
 MKDIR = /bin/mkdir -p
 FIND = /usr/bin/find
+INSTALL = /usr/bin/install
 
 TAR = /usr/bin/gnutar
 STRIP = /usr/bin/strip
index 5ac6c41b40e5444bc03dd67a384cce163bb72ff6..fab9fa5244d47c538aec896d507da37b1a3c600d 100644 (file)
@@ -220,10 +220,6 @@ export CFLAGS_DEVELOPMENT  =
 export CFLAGS_DEBUG    = 
 export CFLAGS_PROFILE  =  -pg
 
-ifeq ($(ARCH_CONFIG),ARM)
-BUILD_STABS = 1
-endif
-
 ifeq ($(BUILD_STABS),1)
 export CFLAGS_PPC      = -Dppc -DPPC -D__PPC__ -DPAGE_SIZE_FIXED \
                                -mno-altivec -gstabs+ -force_cpusubtype_ALL
index add2ecf971832e4327e0ad286309892cdcf655d2..9e62069ae4ddae18bc58a13014f43cd23e0847c9 100644 (file)
@@ -72,7 +72,7 @@ $(INSTALL_MI_GEN_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR)/% : %
                ./incmidir/$${filename_strip};                  \
        if [ -s ./incmidir/$${filename_strip} ];                        \
        then (                                                  \
-               install $(INSTALL_FLAGS) ./incmidir/$${filename} $(dir $@);\
+               $(INSTALL) $(INSTALL_FLAGS) ./incmidir/$${filename} $(dir $@);\
        );                                                      \
        else                                                    \
                echo Header file $< not exported;               \
@@ -94,7 +94,7 @@ $(INSTALL_KF_MI_GEN_FILES): $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR)/% : %
                ./kincmidir/$${filename_strip};                 \
        if [ -s ./kincmidir/$${filename_strip} ];                       \
        then (                                                  \
-               install $(INSTALL_FLAGS) ./kincmidir/$${filename} $(dir $@);\
+               $(INSTALL) $(INSTALL_FLAGS) ./kincmidir/$${filename} $(dir $@);\
        );                                                      \
        else                                                    \
                echo Header file $< not exported;               \
@@ -116,7 +116,7 @@ $(INSTALL_MI_GEN_LCL_FILES): $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR)/% : %
                ./pincmidir/$${filename_strip};                 \
        if [ -s ./pincmidir/$${filename_strip} ];                       \
        then (                                                  \
-               install $(INSTALL_FLAGS) ./pincmidir/$${filename} $(dir $@);\
+               $(INSTALL) $(INSTALL_FLAGS) ./pincmidir/$${filename} $(dir $@);\
        );                                                      \
        else                                                    \
                echo Header file $< not exported;               \
@@ -138,7 +138,7 @@ $(INSTALL_KF_MI_LCL_GEN_FILES): $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR)/% : %
                ./kpincmidir/$${filename_strip};                        \
        if [ -s ./kpincmidir/$${filename_strip} ];                      \
        then (                                                  \
-               install $(INSTALL_FLAGS) ./kpincmidir/$${filename} $(dir $@);\
+               $(INSTALL) $(INSTALL_FLAGS) ./kpincmidir/$${filename} $(dir $@);\
        );                                                      \
        else                                                    \
                echo Header file $< not exported;               \
@@ -160,7 +160,7 @@ $(INSTALL_MD_GEN_INC_FILES): $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR)/% : %
                ./incdir/$${filename_strip};                    \
        if [ -s ./incdir/$${filename_strip} ];                  \
        then (                                                  \
-               install $(INSTALL_FLAGS) ./incdir/$${filename} $(dir $@);\
+               $(INSTALL) $(INSTALL_FLAGS) ./incdir/$${filename} $(dir $@);\
        );                                                      \
        else                                                    \
                echo Header file $< not exported;               \
@@ -182,7 +182,7 @@ $(INSTALL_KF_MD_GEN_FILES): $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR)/% : %
                ./kincdir/$${filename_strip};                   \
        if [ -s ./kincdir/$${filename_strip} ];                 \
        then (                                                  \
-               install $(INSTALL_FLAGS) ./kincdir/$${filename} $(dir $@);\
+               $(INSTALL) $(INSTALL_FLAGS) ./kincdir/$${filename} $(dir $@);\
        );                                                      \
        else                                                    \
                echo Header file $< not exported;               \
@@ -205,7 +205,7 @@ $(INSTALL_MD_GEN_LCL_FILES): $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR)/% : %
                ./pincdir/$${filename_strip};                   \
        if [ -s ./pincdir/$${filename_strip} ];                 \
        then (                                                  \
-               install $(INSTALL_FLAGS) ./pincdir/$${filename} $(dir $@);\
+               $(INSTALL) $(INSTALL_FLAGS) ./pincdir/$${filename} $(dir $@);\
        );                                                      \
        else                                                    \
                echo Header file $< not exported;               \
@@ -228,7 +228,7 @@ $(INSTALL_KF_MD_LCL_GEN_FILES): $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR)/% : %
                ./kpincdir/$${filename_strip};                  \
        if [ -s ./kpincdir/$${filename_strip} ];                        \
        then (                                                  \
-               install $(INSTALL_FLAGS) ./kpincdir/$${filename} $(dir $@);\
+               $(INSTALL) $(INSTALL_FLAGS) ./kpincdir/$${filename} $(dir $@);\
        );                                                      \
        else                                                    \
                echo Header file $< not exported;               \
@@ -254,7 +254,7 @@ do_installhdrs_mi: $(INSTALL_MI_GEN_FILES) $(INSTALL_MI_GEN_LCL_FILES) $(INSTALL
                    ./incmidir/$$j.strip;                               \
                if [ -s ./incmidir/$$j.strip ];                         \
                then (                                                  \
-                   install $(INSTALL_FLAGS) ./incmidir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR);     \
+                   $(INSTALL) $(INSTALL_FLAGS) ./incmidir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MI_DIR);  \
                );                                                      \
                else                                                    \
                    echo Header file $$j not exported;          \
@@ -276,7 +276,7 @@ do_installhdrs_mi: $(INSTALL_MI_GEN_FILES) $(INSTALL_MI_GEN_LCL_FILES) $(INSTALL
                    ./pincmidir/$$j.strip;                              \
                if [ -s ./pincmidir/$$j.strip ];                                \
                then (                                                  \
-                   install $(INSTALL_FLAGS) ./pincmidir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR);    \
+                   $(INSTALL) $(INSTALL_FLAGS) ./pincmidir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MI_DIR); \
                );                                                      \
                else                                                    \
                    echo Header file $$j not exported;          \
@@ -298,7 +298,7 @@ do_installhdrs_mi: $(INSTALL_MI_GEN_FILES) $(INSTALL_MI_GEN_LCL_FILES) $(INSTALL
                    ./kincmidir/$$j.strip;                              \
                if [ -s ./kincmidir/$$j.strip ];                                \
                then (                                                  \
-                   install $(INSTALL_FLAGS) ./kincmidir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR);    \
+                   $(INSTALL) $(INSTALL_FLAGS) ./kincmidir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MI_DIR); \
                );                                                      \
                else                                                    \
                    echo Header file $$j not exported;          \
@@ -320,7 +320,7 @@ do_installhdrs_mi: $(INSTALL_MI_GEN_FILES) $(INSTALL_MI_GEN_LCL_FILES) $(INSTALL
                    ./kpincmidir/$$j.strip;                             \
                if [ -s ./kpincmidir/$$j.strip ];                               \
                then (                                                  \
-                   install $(INSTALL_FLAGS) ./kpincmidir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR);  \
+                   $(INSTALL) $(INSTALL_FLAGS) ./kpincmidir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MI_DIR);       \
                );                                                      \
                else                                                    \
                    echo Header file $$j not exported;          \
@@ -349,7 +349,7 @@ do_installhdrs_md: $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INS
                    ./incdir/$$j.strip;                         \
                if [ -s ./incdir/$$j.strip ];                           \
                then (                                                  \
-                   install $(INSTALL_FLAGS) ./incdir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR);       \
+                   $(INSTALL) $(INSTALL_FLAGS) ./incdir/$$j $(DSTROOT)/$(INCDIR)/$(INSTALL_MD_DIR);    \
                );                                                      \
                else                                                    \
                    echo Header file $$j not exported;          \
@@ -371,7 +371,7 @@ do_installhdrs_md: $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INS
                    ./pincdir/$$j.strip;                                \
                if [ -s ./pincdir/$$j.strip ];                          \
                then (                                                  \
-                   install $(INSTALL_FLAGS) ./pincdir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR);      \
+                   $(INSTALL) $(INSTALL_FLAGS) ./pincdir/$$j $(DSTROOT)/$(LCLDIR)/$(INSTALL_MD_DIR);   \
                );                                                      \
                else                                                    \
                    echo Header file $$j not exported;          \
@@ -393,7 +393,7 @@ do_installhdrs_md: $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INS
                    ./kincdir/$$j.strip;                                \
                if [ -s ./kincdir/$$j.strip ];                          \
                then (                                                  \
-                   install $(INSTALL_FLAGS) ./kincdir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR);      \
+                   $(INSTALL) $(INSTALL_FLAGS) ./kincdir/$$j $(DSTROOT)/$(KINCDIR)/$(EXPORT_MD_DIR);   \
                );                                                      \
                else                                                    \
                    echo Header file $$j not exported;          \
@@ -415,7 +415,7 @@ do_installhdrs_md: $(INSTALL_MD_GEN_INC_FILES) $(INSTALL_MD_GEN_LCL_FILES) $(INS
                    ./kpincdir/$$j.strip;                               \
                if [ -s ./kpincdir/$$j.strip ];                         \
                then (                                                  \
-                   install $(INSTALL_FLAGS) ./kpincdir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR);    \
+                   $(INSTALL) $(INSTALL_FLAGS) ./kpincdir/$$j $(DSTROOT)/$(KPINCDIR)/$(EXPORT_MD_DIR); \
                );                                                      \
                else                                                    \
                    echo Header file $$j not exported;          \
@@ -605,7 +605,7 @@ endif
 # mach_kernel building rules
 #
 do_build_mach_kernel: $(OBJPATH)/kgmacros
-       $(_v)install $(DATA_INSTALL_FLAGS) $(SRCROOT)/config/version.c $(OBJPATH)/version.c;
+       $(_v)$(INSTALL) $(DATA_INSTALL_FLAGS) $(SRCROOT)/config/version.c $(OBJPATH)/version.c;
        $(_v)$(SRCROOT)/config/newvers.pl $(OBJPATH)/version.c > /dev/null;
        @echo CC version.o
        $(_v)${KCC} -c ${filter-out ${${join $@,_CFLAGS_RM}}, ${CFLAGS}} ${${join $@,_CFLAGS_ADD}} ${INCFLAGS} ${${join $@,_INCFLAGS}} $(OBJPATH)/version.c -o $(OBJPATH)/version.o
@@ -619,7 +619,7 @@ do_build_mach_kernel: $(OBJPATH)/kgmacros
        $(_v)$(STRIP) $(STRIP_FLAGS) $(TARGET)/mach_kernel.sys -o $(TARGET)/mach_kernel
 
 $(OBJPATH)/kgmacros: $(SRCROOT)/kgmacros
-       $(_v)$(CP) $? $@
+       $(_v)$(INSTALL) $(INSTALL_FLAGS) $? $@
 
 # Special rules to install machine configuration variants
 
@@ -630,7 +630,7 @@ $(DSTROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TA
        fi;                                                     \
        if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
                $(RM) $(RMFLAGS) $@;                            \
-               install $(FILE_INSTALL_FLAGS) $< $@;            \
+               $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@;         \
        else                                                    \
                if [ ! -e $@ ]; then                            \
                        echo >empty_file_$(notdir $@);                  \
@@ -648,7 +648,14 @@ $(SYMROOT)$(INSTALL_FILE_DIR)mach.$(KERNEL_CONFIG_LC).$(MACHINE_CONFIG_LC): $(TA
        fi;                                                     \
        if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
                $(RM) $(RMFLAGS) $@;                            \
-               install $(FILE_INSTALL_FLAGS) $< $@;            \
+               $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@;         \
+               if [ $(BUILD_DWARF) -eq 1 ]; then                       \
+                       $(RM) -rf $@.dSYM;                              \
+                       $(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR);    \
+                       $(INSTALL) $(INSTALL_FLAGS)                     \
+                               $<.dSYM/$(DSYMBUILDDIR)/$(notdir $<)    \
+                               $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@);   \
+               fi;                                                     \
        else                                                    \
                if [ ! -e $@ ]; then                            \
                        echo >empty_file_$(notdir $@);                  \
@@ -675,7 +682,7 @@ $(INSTALL_FILE_FILES_GENERIC): $(DSTROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/% forc
        fi;                                                             \
        if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then         \
                $(RM) $(RMFLAGS) $@;                                    \
-               install $(FILE_INSTALL_FLAGS) $< $@;            \
+               $(INSTALL) $(FILE_INSTALL_FLAGS) $< $@;         \
        else                                                            \
                if [ ! -e $@ ]; then                                    \
                        echo >empty_file_$(notdir $@);                  \
@@ -692,7 +699,7 @@ $(INSTALL_FILE_FILES_GENERIC): $(DSTROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/% forc
                                -exec $(RM) -rf {} \;   ;               \
                        $(CTFMERGE) -l xnu -o $<.ctfsys                 \
                                $(OBJPATH)/*/$(KERNEL_CONFIG)/*.*o.ctf || true; \
-                       install $(FILE_INSTALL_FLAGS) $<.ctfsys $(dir $@); \
+                       $(INSTALL) $(FILE_INSTALL_FLAGS) $<.ctfsys $(dir $@); \
                else                                                    \
                        if [ ! -e $@.ctfsys ]; then                     \
                                echo >empty_file_$(notdir $@);          \
@@ -722,14 +729,14 @@ $(INSTALL_FILESYS_FILES_GENERIC): $(SYMROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/%.s
        fi;                                                     \
        if [ "`echo $(INSTALL_ARCHS_LC) | wc -w`" -eq 1 ]; then \
                $(RM) $(RMFLAGS) $@;                            \
-               install $(INSTALL_FLAGS) $< $@;                 \
+               $(INSTALL) $(INSTALL_FLAGS) $< $@;                      \
                if [ $(BUILD_DWARF) -eq 1 ]; then                       \
                        $(DSYMUTIL) $(DSYMUTIL_FLAGS)                   \
                                $(TARGET)/mach_kernel.sys               \
                                -o $(TARGET)/mach_kernel.sys.dSYM;      \
                        $(RM) -rf $@.dSYM;                              \
                        $(MKDIR) -p -m 0755 $@.dSYM/$(DSYMBUILDDIR);    \
-                       install $(INSTALL_FLAGS)                        \
+                       $(INSTALL) $(INSTALL_FLAGS)                     \
                                $<.dSYM/$(DSYMBUILDDIR)/$(notdir $<)    \
                                $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@);   \
                fi;                                                     \
@@ -761,7 +768,7 @@ $(INSTALL_FILESYS_FILES_GENERIC): $(SYMROOT)$(INSTALL_FILE_DIR)% : $(TARGET)/%.s
                                -o $@.dSYM/$(DSYMBUILDDIR)/$(notdir $@);       \
                fi;                                                            \
        fi
-       $(CP) $(SOURCE)kgmacros $(SYMROOT)$(INSTALL_FILE_DIR)
+       $(INSTALL) $(INSTALL_FLAGS) $(SOURCE)kgmacros $(SYMROOT)$(INSTALL_FILE_DIR)
 
 INSTALL_DATA_FILES = $(addprefix $(DSTROOT)$(INSTALL_DATA_DIR), $(INSTALL_DATA_LIST))
 
@@ -769,7 +776,7 @@ $(INSTALL_DATA_FILES): $(DSTROOT)$(INSTALL_DATA_DIR)% : $(SOURCE)/%
        @echo Installing $< in $@;
        $(_v)[ -d $(dir $@) ] ||$(MKDIR) $(dir $@);             \
        $(RM) $(RMFLAGS) $@;            \
-       install $(DATA_INSTALL_FLAGS) $< $(dir $@);
+       $(INSTALL) $(DATA_INSTALL_FLAGS) $< $(dir $@);
 
 setup_build_install:
        @echo "[ $(SOURCE) ] make setup_build_install $(KERNEL_CONFIG) $(ARCH_CONFIG) $(TARGET)"
@@ -790,7 +797,7 @@ do_installman: $(INSTALL_MAN_FILES)
                        $(MKDIR) $$man_dir;     \
                fi;                             \
                echo Installing $(INSTALL_MAN_LIST) in $$man_dir;       \
-               install $(INSTALL_FLAGS) $(INSTALL_MAN_LIST) $$man_dir; \
+               $(INSTALL) $(INSTALL_FLAGS) $(INSTALL_MAN_LIST) $$man_dir;      \
                if [ -n "$(strip $(INSTALL_MAN_LINKS))" ]; then \
                        set `echo ${INSTALL_MAN_LINKS}`; \
                        while : ; do \
@@ -809,7 +816,7 @@ $(INSTALL_MAN_FILES): $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR)/% : %
        @true echo Installing $< in $(dir $@)
        $(_v)$(MKDIR) $(DSTROOT)/$(MANDIR)/$(INSTALL_MAN_DIR);       \
        $(RM) $(RMFLAGS) $@;                                    \
-       install $(INSTALL_FLAGS) $< $(dir $@);
+       $(INSTALL) $(INSTALL_FLAGS) $< $(dir $@);
 
 ifeq    ($(INCL_MAKEDEP), TRUE)
 -include Makedep
index 8510249dc7d217e1db24de672f463e83cb6af8b6..2ee2e428e363cf2fd0accb674ce3d45ae650b1e6 100644 (file)
@@ -35,6 +35,7 @@
 #include <i386/cpu_data.h>
 #include <i386/machine_routines.h>
 #include <i386/perfmon.h>
+#include <i386/lapic.h>
 #include <i386/mp.h>
 #include <i386/trap.h>
 #include <mach/i386/syscall_sw.h>
index a3f12cf152d7a529d2f541628982ad24e79d6456..0c0fafa354509d0594ac4cc2b42e856e7b935148 100644 (file)
@@ -46,6 +46,7 @@
 #include <chud/chud_thread.h>
 
 #include <i386/misc_protos.h>
+#include <i386/lapic.h>
 #include <i386/mp.h>
 #include <i386/machine_cpu.h>
 
index 5f7d87d6d6b95594fad0d55cfb34e2657d47ecae..08c28268cc79923abd04bd4d83433afece2ca9f2 100644 (file)
@@ -222,6 +222,11 @@ options   CONFIG_NO_KPRINTF_STRINGS                # <no_kprintf_str>
 #
 options   CONFIG_EMBEDDED                       # <config_embedded>
 
+# only execute signed code. Hang this off config_embedded since there's
+# nothing more appropriate right now
+#
+options   CONFIG_ENFORCE_SIGNED_CODE           # <config_embedded>
+
 #  jettison_kernel_linker - jettison kernel linker after kernel init; don't wait for kextd to launch
 options   CONFIG_JETTISON_KERNEL_LINKER                # <jettison_kernel_linker>
 
@@ -230,3 +235,9 @@ options       CONFIG_VC_PROGRESS_WHITE              # <vc_progress_white>
 
 # secure_kernel - secure kernel from user programs
 options                SECURE_KERNEL           # <secure_kernel>
+
+#
+# code decryption... used on embedded for app protection
+# must be set in all the bsd/conf and osfmk/conf MASTER files
+#
+options                CONFIG_CODE_DECRYPTION  # <config_embedded>
index 5eb745c8764b28d59950ff1a092d4f2c16060e13..07289a808aca9a50b878fea525e4134375402977 100644 (file)
@@ -9,12 +9,12 @@
 #  Standard Apple MacOS X Configurations:
 #  -------- ---- -------- ---------------
 #
-#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto config_dtrace]
+#  RELEASE = [ medium intel pc iokit mach_pe mach mach_kdp config_serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto config_dtrace]
 #  DEBUG_KDP = [ RELEASE osf_debug debug ]
 #  DEBUG= [ RELEASE osf_debug debug mach_kdb mach_assert ]
 #  PROFILE = [ RELEASE profile ]
 #
-#  EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto ]
+#  EMBEDDED_BASE = [ bsmall intel pc iokit mach_pe mach mach_kdp serial_kdp event vol hd pst gdb fixpri simple_clock mkernserv uxpr kernstack ipc_compat ipc_debug fb mk30 mk30_i386 hibernation crypto ]
 #  EMBEDDED = [ EMBEDDED_BASE no_printf_str no_kprintf_str no_kdebug ]
 #  DEVELOPMENT = [ EMBEDDED_BASE mach_assert config_dtrace ]
 #
@@ -54,6 +54,7 @@ options         MACH_PE               #                               # <mach_pe>
 options                DDB             # Inline debugger               # <debug>
 options                MACH_KDB        #                               # <mach_kdb>
 options                MACH_KDP        # KDP                           # <mach_kdp>
+options                CONFIG_SERIAL_KDP       # KDP over serial                               # <config_serial_kdp>
 options                PAE
 options                X86_64
 options                DISPATCH_COUNTS
@@ -64,3 +65,9 @@ options               DISPATCH_COUNTS
 #
 options                CONFIG_MACF             # Mandatory Access Control Framework
 #options       CONFIG_MACF_MACH        # MACF applied to Mach services
+
+#
+# code decryption... used on i386 for DSMOS
+# must be set in all the bsd/conf and osfmk/conf MASTER files
+#
+options                CONFIG_CODE_DECRYPTION
index 9bfe3b7dd7680b3a429c048165c39c2ae156b7e0..e5a07a963e2dda25ed68111638e11073238d076f 100644 (file)
@@ -47,6 +47,7 @@ OPTIONS/mach_ipc_test         optional mach_ipc_test
 OPTIONS/mach_kdb               optional mach_kdb
 OPTIONS/mach_kgdb              optional mach_kgdb
 OPTIONS/mach_kdp               optional mach_kdp
+OPTIONS/config_serial_kdp              optional config_serial_kdp
 OPTIONS/mach_kprof             optional mach_kprof
 OPTIONS/mach_ldebug            optional mach_ldebug
 OPTIONS/mach_mp_debug          optional mach_mp_debug
@@ -123,6 +124,7 @@ osfmk/ddb/db_write_cmd.c            optional mach_kdb
 osfmk/ddb/tr.c                 optional mach_tr
 osfmk/kdp/kdp.c                        optional mach_kdp
 osfmk/kdp/kdp_udp.c                    optional mach_kdp
+osfmk/kdp/kdp_serial.c                 optional config_serial_kdp
 osfmk/ipc/ipc_entry.c                  standard
 osfmk/ipc/ipc_hash.c                   standard
 osfmk/ipc/ipc_init.c                   standard
index 421ab63588f5ed56d3e7fe0fb65cd6be5bc71241..a41da57da6b4b8bea47bb7307d4be37c5a66fcbe 100644 (file)
@@ -71,7 +71,6 @@ osfmk/i386/trap.c             standard
 osfmk/i386/user_ldt.c          standard
 osfmk/i386/Diagnostics.c       standard
 osfmk/i386/pmCPU.c             standard
-osfmk/i386/hpet.c              standard
 osfmk/i386/tsc.c               standard
 
 osfmk/i386/commpage/commpage.c standard
@@ -98,6 +97,7 @@ osfmk/i386/commpage/commpage_sigs.c   standard
 osfmk/i386/AT386/conf.c                standard
 osfmk/i386/AT386/model_dep.c   standard
 
+osfmk/i386/lapic.c             standard
 osfmk/i386/mp.c                        standard
 osfmk/i386/mp_slave_boot.s     standard
 
index 52a26a331d2a2bf89ebcc2fbe788d7241ee32b04..78754910be6d62cddcc3859769db4a30137fce49 100644 (file)
@@ -51,7 +51,7 @@ static int panic_dialog_verify( const struct panicimage * data, unsigned int siz
 static int pixels_needed_to_blit_digit( int digit );
 static void blit_digit( int digit );
 static const char * strnstr(const char * s, const char * find, size_t slen);
-static void dim_screen(void);
+void dim_screen(void);
 static void panic_blit_rect(unsigned int x, unsigned int y, unsigned int width,
                            unsigned int height, int transparent,
                            const unsigned char * dataPtr);
@@ -779,7 +779,7 @@ decode_rle(const unsigned char *dataPtr, unsigned int *quantity,
 }
 
 
-static void 
+void 
 dim_screen(void)
 {
        unsigned long *p, *endp, *row;
index c42f0df5a392a7ffe6f36ab42ef75343479b041a..4a040103fe81d2d2b2fd2d5369d86c61f381223a 100644 (file)
@@ -2317,7 +2317,7 @@ initialize_screen(PE_Video * boot_vinfo, unsigned int op)
                    new_vinfo.v_baseaddr = newVideoVirt + boot_vinfo->v_offset;                         /* Set the new framebuffer address */
                else
                    new_vinfo.v_baseaddr = lastVideoVirt + boot_vinfo->v_offset;                                /* Set the new framebuffer address */
-       
+
                /* Update the vinfo structure atomically with respect to the vc_progress task if running */
                if (vc_progress)
                {
index f1962e8d1daa39033b580cdd9d1575a8f9e4afab..cf99ace6b3e291eaa4963b5bfb9d3292681e268f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -768,11 +768,6 @@ struct db_command db_command_table[] = {
                .fcn = db_apic,
                .flag = CS_MORE,
        },
-       {
-               .name = "hp",
-               .fcn = db_hpet,
-               .flag = CS_MORE,
-       },
 #endif /* !__ppc__ */
 #if defined(__ppc__)   
        {
index 78c99073fd040ce84ad90b3b6fe20eec3b8dc005..5dfcf6952ce64dc26804f5f85677daa305e47171 100644 (file)
@@ -3565,6 +3565,7 @@ vs_cluster_transfer(
 
                if (size == 0) {
                        ASSERT(unavail_size);
+                       ps_clunmap(vs, offset, unavail_size);
                        cnt -= unavail_size;
                        offset += unavail_size;
                        if((offset & ((vm_page_size << vs->vs_clshift) - 1)) 
@@ -3643,6 +3644,7 @@ vs_cluster_transfer(
                                        */
                                        write_vsmap = *vsmap_ptr;
                                        *vsmap_ptr = read_vsmap;
+                                       ps_clunmap(vs, offset, size);
                                } else {
                                        /* discard the old backing object */
                                        write_vsmap = *vsmap_ptr;
index 4690f5d3ef750e12c5499a910b1bd1b971b305c0..c2e488dce020d30c39e0152a1c474e12bc15db35 100644 (file)
@@ -367,7 +367,8 @@ const struct memory_object_pager_ops default_pager_ops = {
        dp_memory_object_data_initialize,
        dp_memory_object_data_unlock,
        dp_memory_object_synchronize,
-       dp_memory_object_unmap,
+       dp_memory_object_map,
+       dp_memory_object_last_unmap,
        "default pager"
 };
 
@@ -414,11 +415,19 @@ dp_memory_object_synchronize(
 }
 
 kern_return_t
-dp_memory_object_unmap(
-       __unused memory_object_t                mem_obj)
+dp_memory_object_map(
+       __unused memory_object_t        mem_obj,
+       __unused vm_prot_t              prot)
 {
-       panic("dp_memory_object_unmap");
+       panic("dp_memory_object_map");
+       return KERN_FAILURE;
+}
 
+kern_return_t
+dp_memory_object_last_unmap(
+       __unused memory_object_t        mem_obj)
+{
+       panic("dp_memory_object_last_unmap");
        return KERN_FAILURE;
 }
 
index bd01828889bc641aa913082ccf77a1991e715a8b..73ea3948abbb7b68894ce858bbfb04e6d3de6098 100644 (file)
@@ -152,7 +152,7 @@ machine_startup(void)
             halt_in_debugger = halt_in_debugger ? 0 : 1;
 #endif
 
-       if (PE_parse_boot_arg("debug", &boot_arg)) {
+       if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) {
                if (boot_arg & DB_HALT) halt_in_debugger=1;
                if (boot_arg & DB_PRT) disable_debug_output=FALSE; 
                if (boot_arg & DB_SLOG) systemLogDiags=TRUE; 
@@ -160,14 +160,14 @@ machine_startup(void)
                if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE; 
        }
 
-       if (!PE_parse_boot_arg("nvram_paniclog", &commit_paniclog_to_nvram))
+       if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
                commit_paniclog_to_nvram = 1;
 
        /*
         * Entering the debugger will put the CPUs into a "safe"
         * power mode.
         */
-       if (PE_parse_boot_arg("pmsafe_debug", &boot_arg))
+       if (PE_parse_boot_argn("pmsafe_debug", &boot_arg, sizeof (boot_arg)))
            pmsafe_debug = boot_arg;
 
 #if NOTYET
@@ -199,25 +199,25 @@ machine_startup(void)
        }
 #endif /* MACH_KDB */
 
-       if (PE_parse_boot_arg("preempt", &boot_arg)) {
+       if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) {
                default_preemption_rate = boot_arg;
        }
-       if (PE_parse_boot_arg("unsafe", &boot_arg)) {
+       if (PE_parse_boot_argn("unsafe", &boot_arg, sizeof (boot_arg))) {
                max_unsafe_quanta = boot_arg;
        }
-       if (PE_parse_boot_arg("poll", &boot_arg)) {
+       if (PE_parse_boot_argn("poll", &boot_arg, sizeof (boot_arg))) {
                max_poll_quanta = boot_arg;
        }
-       if (PE_parse_boot_arg("yield", &boot_arg)) {
+       if (PE_parse_boot_argn("yield", &boot_arg, sizeof (boot_arg))) {
                sched_poll_yield_shift = boot_arg;
        }
-       if (PE_parse_boot_arg("idlehalt", &boot_arg)) {
+       if (PE_parse_boot_argn("idlehalt", &boot_arg, sizeof (boot_arg))) {
                idlehalt = boot_arg;
        }
 /* The I/O port to issue a read from, in the event of a panic. Useful for
  * triggering logic analyzers.
  */
-       if (PE_parse_boot_arg("panic_io_port", &boot_arg)) {
+       if (PE_parse_boot_argn("panic_io_port", &boot_arg, sizeof (boot_arg))) {
                /*I/O ports range from 0 through 0xFFFF */
                panic_io_port = boot_arg & 0xffff;
        }
@@ -968,7 +968,7 @@ panic_i386_backtrace(void *_frame, int nframes, const char *msg, boolean_t regdu
                pbtcpu = cpu_number();
        }
 
-       PE_parse_boot_arg("keepsyms", &keepsyms);
+       PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms));
 
        if (msg != NULL) {
                kdb_printf(msg);
index 34209af9eb825fea6ff0d9364d1a6b94b3073981..74f806a6371cd9181b3c9b9160f0d1f6ec38eee6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2005-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -69,7 +69,6 @@
 #include <i386/mp.h>
 #include <i386/pmCPU.h>
 #include <i386/tsc.h>
-#include <i386/hpet.h>
 #include <mach/i386/syscall_sw.h>
 
 extern uint64_t lastNapClear;
index a5fa0188ef4751777e6a66497fe2acf25c5c0afa..6cae8e5cdc2293e421c29ea90ff950477881c49c 100644 (file)
@@ -16,8 +16,8 @@ EXPORT_ONLY_FILES =   \
                    cpu_topology.h \
                    cpuid.h \
                    eflags.h \
-                   hpet.h \
                    io_map_entries.h \
+                   lapic.h \
                    lock.h \
                    locks.h \
                    machine_routines.h \
index 64c21447ee624d97d1b02db5b7121f19c86a5590..cd5bdbb714074cfd3a8a638dc12d33a2f9ac9cc4 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <i386/vmx/vmx_cpu.h>
 #include <i386/acpi.h>
 #include <i386/fpu.h>
+#include <i386/lapic.h>
 #include <i386/mp.h>
 #include <i386/mp_desc.h>
 #include <i386/serial_io.h>
-#include <i386/hpet.h>
 #include <i386/machine_check.h>
+#include <i386/pmCPU.h>
 
 #include <kern/cpu_data.h>
 #include <console/serial_protos.h>
@@ -147,8 +148,8 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        data.refcon = refcon;
 #endif
 
-       /* Save HPET state */
-       hpet_save();
+       /* Save power management timer state */
+       pmTimerSave();
 
        /* 
         * Turn off VT, otherwise switching to legacy mode will fail
@@ -212,6 +213,12 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
        /* set up PAT following boot processor power up */
        pat_init();
 
+       /*
+        * Go through all of the CPUs and mark them as requiring
+        * a full restart.
+        */
+       pmMarkAllCPUsOff();
+
        /* let the realtime clock reset */
        rtc_sleep_wakeup(acpi_sleep_abstime);
 
@@ -220,10 +227,13 @@ acpi_sleep_kernel(acpi_sleep_callback func, void *refcon)
 
        /* re-enable and re-init local apic */
        if (lapic_probe())
-               lapic_init();
+               lapic_configure();
+
+       /* Restore power management register state */
+       pmCPUMarkRunning(current_cpu_datap());
 
-       /* Restore HPET state */
-       hpet_restore();
+       /* Restore power management timer state */
+       pmTimerRestore();
 
        /* Restart tick interrupts from the LAPIC timer */
        rtc_lapic_start_ticking();
index 6cce0663ce3e0b89fa9790551d3578acb284d45e..971e1d092c8747c9385bfe44eed1d945d59e3013 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #ifndef _I386_APIC_H_
 #define _I386_APIC_H_
 
-#define LAPIC_START                    0xFEE00000
-#define LAPIC_SIZE                     0x00000400
-
-#define LAPIC_ID                       0x00000020
-#define                LAPIC_ID_SHIFT          24
-#define                LAPIC_ID_MASK           0x0F
-#define LAPIC_VERSION                  0x00000030
-#define                LAPIC_VERSION_MASK      0xFF
-#define LAPIC_TPR                      0x00000080
-#define                LAPIC_TPR_MASK          0xFF
-#define LAPIC_APR                      0x00000090
-#define                LAPIC_APR_MASK          0xFF
-#define LAPIC_PPR                      0x000000A0
-#define                LAPIC_PPR_MASK          0xFF
-#define LAPIC_EOI                      0x000000B0
-#define LAPIC_REMOTE_READ              0x000000C0
-#define LAPIC_LDR                      0x000000D0
-#define                LAPIC_LDR_SHIFT         24
-#define LAPIC_DFR                      0x000000E0
-#define                LAPIC_DFR_FLAT          0xFFFFFFFF
-#define                LAPIC_DFR_CLUSTER       0x0FFFFFFF
-#define                LAPIC_DFR_SHIFT         28
-#define LAPIC_SVR                      0x000000F0
-#define                LAPIC_SVR_MASK          0x0FF
-#define                LAPIC_SVR_ENABLE        0x100
-#define                LAPIC_SVR_FOCUS_OFF     0x200
-#define LAPIC_ISR_BASE                 0x00000100
-#define LAPIC_TMR_BASE                 0x00000180
-#define LAPIC_IRR_BASE                 0x00000200
-#define LAPIC_ERROR_STATUS             0x00000280
-#define LAPIC_ICR                      0x00000300
-#define                LAPIC_ICR_VECTOR_MASK   0x000FF
-#define                LAPIC_ICR_DM_MASK       0x00700
-#define                LAPIC_ICR_DM_FIXED      0x00000
-#define                LAPIC_ICR_DM_LOWEST     0x00100
-#define                LAPIC_ICR_DM_SMI        0x00200
-#define                LAPIC_ICR_DM_REMOTE     0x00300
-#define                LAPIC_ICR_DM_NMI        0x00400
-#define                LAPIC_ICR_DM_INIT       0x00500
-#define                LAPIC_ICR_DM_STARTUP    0x00600
-#define                LAPIC_ICR_DM_LOGICAL    0x00800
-#define                LAPIC_ICR_DS_PENDING    0x01000
-#define                LAPIC_ICR_LEVEL_ASSERT  0x04000
-#define                LAPIC_ICR_TRIGGER_LEVEL 0x08000
-#define                LAPIC_ICR_RR_MASK       0x30000
-#define                LAPIC_ICR_RR_INVALID    0x00000
-#define                LAPIC_ICR_RR_INPROGRESS 0x10000
-#define                LAPIC_ICR_RR_VALID      0x20000
-#define                LAPIC_ICR_DSS_MASK      0xC0000
-#define                LAPIC_ICR_DSS_DEST      0x00000
-#define                LAPIC_ICR_DSS_SELF      0x40000
-#define                LAPIC_ICR_DSS_ALL       0x80000
-#define                LAPIC_ICR_DSS_OTHERS    0xC0000
-#define LAPIC_ICRD                     0x00000310
-#define                LAPIC_ICRD_DEST_SHIFT   24
-#define LAPIC_LVT_TIMER                        0x00000320
-#define LAPIC_LVT_THERMAL              0x00000330
-#define LAPIC_LVT_PERFCNT              0x00000340
-#define LAPIC_LVT_LINT0                        0x00000350
-#define LAPIC_LVT_LINT1                        0x00000360
-#define LAPIC_LVT_ERROR                        0x00000370
-#define                LAPIC_LVT_VECTOR_MASK   0x000FF
-#define                LAPIC_LVT_DM_SHIFT      8
-#define                LAPIC_LVT_DM_MASK       0x00007
-#define                LAPIC_LVT_DM_FIXED      0x00000
-#define                LAPIC_LVT_DM_NMI        0x00400
-#define                LAPIC_LVT_DM_EXTINT     0x00700
-#define                LAPIC_LVT_DS_PENDING    0x01000
-#define                LAPIC_LVT_IP_PLRITY_LOW 0x02000
-#define                LAPIC_LVT_REMOTE_IRR    0x04000
-#define                LAPIC_LVT_TM_LEVEL      0x08000
-#define                LAPIC_LVT_MASKED        0x10000
-#define                LAPIC_LVT_PERIODIC      0x20000
-#define LAPIC_TIMER_INITIAL_COUNT      0x00000380
-#define LAPIC_TIMER_CURRENT_COUNT      0x00000390
-#define LAPIC_TIMER_DIVIDE_CONFIG      0x000003E0
-/* divisor encoded by bits 0,1,3 with bit 2 always 0: */
-#define        LAPIC_TIMER_DIVIDE_MASK 0x0000000F
-#define        LAPIC_TIMER_DIVIDE_2    0x00000000
-#define        LAPIC_TIMER_DIVIDE_4    0x00000001
-#define        LAPIC_TIMER_DIVIDE_8    0x00000002
-#define        LAPIC_TIMER_DIVIDE_16   0x00000003
-#define        LAPIC_TIMER_DIVIDE_32   0x00000008
-#define        LAPIC_TIMER_DIVIDE_64   0x00000009
-#define        LAPIC_TIMER_DIVIDE_128  0x0000000A
-#define        LAPIC_TIMER_DIVIDE_1    0x0000000B
-
-#ifndef        ASSEMBLER
-#include <stdint.h>
-typedef enum {
-       periodic,
-       one_shot
-} lapic_timer_mode_t;
-typedef enum { 
-       divide_by_1   = LAPIC_TIMER_DIVIDE_1,
-       divide_by_2   = LAPIC_TIMER_DIVIDE_2,
-       divide_by_4   = LAPIC_TIMER_DIVIDE_4,
-       divide_by_8   = LAPIC_TIMER_DIVIDE_8,
-       divide_by_16  = LAPIC_TIMER_DIVIDE_16,
-       divide_by_32  = LAPIC_TIMER_DIVIDE_32,
-       divide_by_64  = LAPIC_TIMER_DIVIDE_64,
-       divide_by_128 = LAPIC_TIMER_DIVIDE_128
-} lapic_timer_divide_t;
-typedef uint32_t lapic_timer_count_t;
-#endif /* ASSEMBLER */
-
 #define IOAPIC_START                   0xFEC00000
 #define        IOAPIC_SIZE                     0x00000020
 
index 562b0b3920b65aef81c4699a6a427ba612e7c2af..a870cc503db0112b893d1cf79736990736022718 100644 (file)
@@ -102,6 +102,7 @@ kern_return_t
 thread_compose_cthread_desc(unsigned int addr, pcb_t pcb);
 
 void IOSleep(int);
+extern void throttle_lowpri_io(boolean_t);
 
 void thread_set_cthreadself(thread_t thread, uint64_t pself, int isLP64);
 
@@ -394,6 +395,8 @@ machdep_syscall(x86_saved_state_t *state)
        if (current_thread()->funnel_lock)
                (void) thread_funnel_set(current_thread()->funnel_lock, FALSE);
 
+       throttle_lowpri_io(TRUE);
+
        thread_exception_return();
        /* NOTREACHED */
 }
@@ -432,6 +435,8 @@ machdep_syscall64(x86_saved_state_t *state)
        if (current_thread()->funnel_lock)
                (void) thread_funnel_set(current_thread()->funnel_lock, FALSE);
 
+       throttle_lowpri_io(TRUE);
+
        thread_exception_return();
        /* NOTREACHED */
 }
@@ -712,6 +717,8 @@ mach_call_munger(x86_saved_state_t *state)
                        retval, 0, 0, 0, 0);
        regs->eax = retval;
 
+       throttle_lowpri_io(TRUE);
+
        thread_exception_return();
        /* NOTREACHED */
 }
@@ -767,6 +774,8 @@ mach_call_munger64(x86_saved_state_t *state)
                                           (call_number)) | DBG_FUNC_END,
                              (int)regs->rax, 0, 0, 0, 0);
 
+       throttle_lowpri_io(TRUE);
+
        thread_exception_return();
        /* NOTREACHED */
 }
index 3ea04c5f1f170f7f9d96d39e7f23f06ee4da2f2d..60baed63c14eb3f45977385213867bb807b7b8a3 100644 (file)
@@ -57,6 +57,8 @@ Lnanotime:
        jz      0b
 
        rdtsc                                   /* get TSC in %edx:%eax */
+       lfence
+
        subl    _COMM_PAGE_NT_TSC_BASE,%eax
        sbbl    _COMM_PAGE_NT_TSC_BASE+4,%edx
 
@@ -160,6 +162,7 @@ Lnanotime_64:                                       // NB: must preserve r9, r10, and r11
        testl   %r8d,%r8d                       // if 0, data is being changed...
        jz      1b                              // ...so loop until stable
        rdtsc                                   // edx:eax := tsc
+       lfence
        shlq    $32,%rdx                        // rax := ((edx << 32) | eax), ie 64-bit tsc
        orq     %rdx,%rax
        subq    _NT_TSC_BASE(%rsi), %rax        // rax := (tsc - base_tsc)
index b0f87d7dadefae91d204cc0d205e3a59c09babf2..1760eabf5962ea3d1787c7485d12491b50662a56 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -113,14 +113,26 @@ cpu_start(
        if (cpu == cpu_number()) {
                cpu_machine_init();
                return KERN_SUCCESS;
-       } else {
+       }
+
+       /*
+        * Try to bring the CPU back online without a reset.
+        * If the fast restart doesn't succeed, fall back to
+        * the slow way.
+        */
+       ret = intel_startCPU_fast(cpu);
+       if (ret != KERN_SUCCESS) {
                /*
                 * Should call out through PE.
                 * But take the shortcut here.
                 */
                ret = intel_startCPU(cpu);
-               return(ret);
        }
+
+       if (ret != KERN_SUCCESS)
+               kprintf("cpu: cpu_start(%d) returning failure!\n", cpu);
+
+       return(ret);
 }
 
 void
@@ -130,7 +142,8 @@ cpu_exit_wait(
        cpu_data_t      *cdp = cpu_datap(cpu);
 
        simple_lock(&x86_topo_lock);
-       while (!cdp->lcpu.halted) {
+       while ((cdp->lcpu.state != LCPU_HALT)
+              && (cdp->lcpu.state != LCPU_OFF)) {
            simple_unlock(&x86_topo_lock);
            cpu_pause();
            simple_lock(&x86_topo_lock);
index e061888d7d237f06ac1dd44e04728c863bbde21a..ae05b1767f87ec6f33a676286f0f461ca6130b06 100644 (file)
@@ -66,20 +66,6 @@ typedef struct rtclock_timer {
        boolean_t       has_expired;
 } rtclock_timer_t;
 
-typedef struct rtc_nanotime {
-       uint64_t        tsc_base;               /* timestamp */
-       uint64_t        ns_base;                /* nanoseconds */
-       uint32_t        scale;                  /* tsc -> nanosec multiplier */
-       uint32_t        shift;                  /* tsc -> nanosec shift/div */
-                                               /* shift is overloaded with
-                                                * lower 32bits of tsc_freq
-                                                * on slower machines (SLOW_TSC_THRESHOLD) */
-       uint32_t        generation;             /* 0 == being updated */
-       uint32_t        spare1;
-} rtc_nanotime_t;
-
-#define        SLOW_TSC_THRESHOLD      1000067800      /* TSC is too slow for regular nanotime() algorithm */
-
 
 typedef struct {
        struct i386_tss         *cdi_ktss;
@@ -181,7 +167,7 @@ typedef struct cpu_data
        uint64_t                *cpu_physwindow_ptep;
        void                    *cpu_hi_iss;
        boolean_t               cpu_tlb_invalid;
-       uint32_t                cpu_hwIntCnt[256];              /* Interrupt counts */
+       uint32_t                cpu_hwIntCnt[256];      /* Interrupt counts */
        uint64_t                cpu_dr7; /* debug control register */
        uint64_t                cpu_int_event_time;     /* intr entry/exit time */
        vmx_cpu_t               cpu_vmx;                /* wonderful world of virtualization */
@@ -195,7 +181,7 @@ typedef struct cpu_data
                                                           * arg store
                                                           * validity flag.
                                                           */
-
+       rtc_nanotime_t          *cpu_nanotime;          /* Nanotime info */
                                                          
 } cpu_data_t;
 
index b7f108ecf828e5b48f27303e13cdaa5d42e73c2c..7727eb7eac654bb9bf8482457ce7c4c6bba88bf6 100644 (file)
 #include <i386/perfmon.h>
 #include <i386/pmCPU.h>
 
+//#define TOPO_DEBUG           1
+#if TOPO_DEBUG
+void debug_topology_print(void);
+#define DBG(x...)      kprintf("DBG: " x)
+#else
+#define DBG(x...)
+#endif /* TOPO_DEBUG */
+
 #define bitmask(h,l)   ((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
 #define bitfield(x,h,l)        (((x) & bitmask(h,l)) >> l)
 
-/*
- * Kernel parameter determining whether threads are halted unconditionally
- * in the idle state.  This is the default behavior.
- * See machine_idle() for use.
- */
-int idlehalt = 1;
-
-x86_pkg_t      *x86_pkgs       = NULL;
-uint32_t       num_packages    = 0;
+x86_pkg_t      *x86_pkgs               = NULL;
 uint32_t       num_Lx_caches[MAX_CACHE_DEPTH]  = { 0 };
 
 static x86_pkg_t       *free_pkgs      = NULL;
+static x86_die_t       *free_dies      = NULL;
 static x86_core_t      *free_cores     = NULL;
+static uint32_t                num_dies        = 0;
 
 static x86_cpu_cache_t *x86_caches     = NULL;
 static uint32_t                num_caches      = 0;
 
+static boolean_t       topoParmsInited = FALSE;
+x86_topology_parameters_t      topoParms;
+
 decl_simple_lock_data(, x86_topo_lock);
+static boolean_t
+cpu_is_hyperthreaded(void)
+{
+    i386_cpu_info_t    *cpuinfo;
+
+    cpuinfo = cpuid_info();
+    return(cpuinfo->thread_count > cpuinfo->core_count);
+}
 
 static x86_cpu_cache_t *
 x86_cache_alloc(void)
@@ -84,6 +98,167 @@ x86_cache_alloc(void)
 
     return(cache);
 }
+static void
+x86_LLC_info(void)
+{
+    uint32_t           index;
+    uint32_t           cache_info[4];
+    uint32_t           cache_level     = 0;
+    uint32_t           nCPUsSharing    = 1;
+    i386_cpu_info_t    *cpuinfo;
+
+    cpuinfo = cpuid_info();
+
+    do_cpuid(0, cache_info);
+
+    if (cache_info[eax] < 4) {
+       /*
+        * Processor does not support deterministic
+        * cache information. Set LLC sharing to 1, since
+        * we have no better information.
+        */
+       if (cpu_is_hyperthreaded()) {
+           topoParms.nCoresSharingLLC = 1;
+           topoParms.nLCPUsSharingLLC = 2;
+           topoParms.maxSharingLLC = 2;
+       } else {
+           topoParms.nCoresSharingLLC = 1;
+           topoParms.nLCPUsSharingLLC = 1;
+           topoParms.maxSharingLLC = 1;
+       }
+       return;
+    }
+
+    for (index = 0; ; index += 1) {
+       uint32_t                this_level;
+
+       cache_info[eax] = 4;
+       cache_info[ecx] = index;
+       cache_info[ebx] = 0;
+       cache_info[edx] = 0;
+
+       cpuid(cache_info);
+
+       /*
+        * See if all levels have been queried.
+        */
+       if (bitfield(cache_info[eax], 4, 0) == 0)
+           break;
+
+       /*
+        * Get the current level.
+        */
+       this_level = bitfield(cache_info[eax], 7, 5);
+
+       /*
+        * Only worry about it if it's a deeper level than
+        * what we've seen before.
+        */
+       if (this_level > cache_level) {
+           cache_level = this_level;
+
+           /*
+            * Save the number of CPUs sharing this cache.
+            */
+           nCPUsSharing = bitfield(cache_info[eax], 25, 14) + 1;
+       }
+    }
+
+    /*
+     * Make the level of the LLC be 0 based.
+     */
+    topoParms.LLCDepth = cache_level - 1;
+
+    /*
+     * nCPUsSharing represents the *maximum* number of cores or
+     * logical CPUs sharing the cache.
+     */
+    topoParms.maxSharingLLC = nCPUsSharing;
+
+    topoParms.nCoresSharingLLC = nCPUsSharing;
+    topoParms.nLCPUsSharingLLC = nCPUsSharing;
+
+    /*
+     * nCPUsSharing may not be the number of *active* cores or
+     * threads that are sharing the cache.
+     */
+    if (nCPUsSharing > cpuinfo->core_count)
+       topoParms.nCoresSharingLLC = cpuinfo->core_count;
+    if (nCPUsSharing > cpuinfo->thread_count)
+       topoParms.nLCPUsSharingLLC = cpuinfo->thread_count;
+
+
+    if (nCPUsSharing > cpuinfo->thread_count)
+       topoParms.maxSharingLLC = cpuinfo->thread_count;
+}
+
+static void
+initTopoParms(void)
+{
+    i386_cpu_info_t    *cpuinfo;
+
+    cpuinfo = cpuid_info();
+
+    /*
+     * We need to start with getting the LLC information correct.
+     */
+    x86_LLC_info();
+
+    /*
+     * Compute the number of threads (logical CPUs) per core.
+     */
+    topoParms.nLThreadsPerCore = cpuinfo->thread_count / cpuinfo->core_count;
+    topoParms.nPThreadsPerCore = cpuinfo->cpuid_logical_per_package / cpuinfo->cpuid_cores_per_package;
+
+    /*
+     * Compute the number of dies per package.
+     */
+    topoParms.nLDiesPerPackage = cpuinfo->core_count / topoParms.nCoresSharingLLC;
+    topoParms.nPDiesPerPackage = cpuinfo->cpuid_cores_per_package / (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+    /*
+     * Compute the number of cores per die.
+     */
+    topoParms.nLCoresPerDie = topoParms.nCoresSharingLLC;
+    topoParms.nPCoresPerDie = (topoParms.maxSharingLLC / topoParms.nPThreadsPerCore);
+
+    /*
+     * Compute the number of threads per die.
+     */
+    topoParms.nLThreadsPerDie = topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie;
+    topoParms.nPThreadsPerDie = topoParms.nPThreadsPerCore * topoParms.nPCoresPerDie;
+
+    /*
+     * Compute the number of cores per package.
+     */
+    topoParms.nLCoresPerPackage = topoParms.nLCoresPerDie * topoParms.nLDiesPerPackage;
+    topoParms.nPCoresPerPackage = topoParms.nPCoresPerDie * topoParms.nPDiesPerPackage;
+
+    /*
+     * Compute the number of threads per package.
+     */
+    topoParms.nLThreadsPerPackage = topoParms.nLThreadsPerCore * topoParms.nLCoresPerPackage;
+    topoParms.nPThreadsPerPackage = topoParms.nPThreadsPerCore * topoParms.nPCoresPerPackage;
+
+    DBG("\nLogical Topology Parameters:\n");
+    DBG("\tThreads per Core:  %d\n", topoParms.nLThreadsPerCore);
+    DBG("\tCores per Die:     %d\n", topoParms.nLCoresPerDie);
+    DBG("\tThreads per Die:   %d\n", topoParms.nLThreadsPerDie);
+    DBG("\tDies per Package:  %d\n", topoParms.nLDiesPerPackage);
+    DBG("\tCores per Package: %d\n", topoParms.nLCoresPerPackage);
+    DBG("\tThreads per Package: %d\n", topoParms.nLThreadsPerPackage);
+
+    DBG("\nPhysical Topology Parameters:\n");
+    DBG("\tThreads per Core: %d\n", topoParms.nPThreadsPerCore);
+    DBG("\tCores per Die:     %d\n", topoParms.nPCoresPerDie);
+    DBG("\tThreads per Die:   %d\n", topoParms.nPThreadsPerDie);
+    DBG("\tDies per Package:  %d\n", topoParms.nPDiesPerPackage);
+    DBG("\tCores per Package: %d\n", topoParms.nPCoresPerPackage);
+    DBG("\tThreads per Package: %d\n", topoParms.nPThreadsPerPackage);
+
+    topoParmsInited = TRUE;
+}
 
 static void
 x86_cache_free(x86_cpu_cache_t *cache)
@@ -141,7 +316,7 @@ x86_cache_list(void)
 
        cur->type = bitfield(cache_info[eax], 4, 0);
        cur->level = bitfield(cache_info[eax], 7, 5);
-       cur->nlcpus = bitfield(cache_info[eax], 25, 14) + 1;
+       cur->maxcpus = (bitfield(cache_info[eax], 25, 14) + 1);
        cur->line_size = bitfield(cache_info[ebx], 11, 0) + 1;
        cur->partitions = bitfield(cache_info[ebx], 21, 12) + 1;
        cur->ways = bitfield(cache_info[ebx], 31, 22) + 1;
@@ -156,20 +331,33 @@ x86_cache_list(void)
            last = cur;
        }
 
+       cur->nlcpus = 0;
        num_Lx_caches[cur->level - 1] += 1;
     }
 
     return(root);
 }
 
-static boolean_t
-cpu_is_hyperthreaded(void)
+static x86_cpu_cache_t *
+x86_match_cache(x86_cpu_cache_t *list, x86_cpu_cache_t *matcher)
 {
-    if  (cpuid_features() & CPUID_FEATURE_HTT)
-       return (cpuid_info()->cpuid_logical_per_package /
-               cpuid_info()->cpuid_cores_per_package) > 1;
-    else
-       return FALSE;
+    x86_cpu_cache_t    *cur_cache;
+
+    cur_cache = list;
+    while (cur_cache != NULL) {
+       if (cur_cache->maxcpus  == matcher->maxcpus
+           && cur_cache->type  == matcher->type
+           && cur_cache->level == matcher->level
+           && cur_cache->ways  == matcher->ways
+           && cur_cache->partitions == matcher->partitions
+           && cur_cache->line_size  == matcher->line_size
+           && cur_cache->cache_size == matcher->cache_size)
+           break;
+
+       cur_cache = cur_cache->next;
+    }
+
+    return(cur_cache);
 }
 
 static void
@@ -184,17 +372,21 @@ x86_lcpu_init(int cpu)
     lcpu = &cpup->lcpu;
     lcpu->lcpu = lcpu;
     lcpu->cpu  = cpup;
-    lcpu->next = NULL;
-    lcpu->core = NULL;
+    lcpu->next_in_core = NULL;
+    lcpu->next_in_die  = NULL;
+    lcpu->next_in_pkg  = NULL;
+    lcpu->core         = NULL;
+    lcpu->die          = NULL;
+    lcpu->package      = NULL;
+    lcpu->cpu_num = cpu;
     lcpu->lnum = cpu;
     lcpu->pnum = cpup->cpu_phys_number;
-    lcpu->halted = FALSE;      /* XXX is this correct? */
-    lcpu->idle   = FALSE;      /* XXX is this correct? */
+    lcpu->state = LCPU_OFF;
     for (i = 0; i < MAX_CACHE_DEPTH; i += 1)
        lcpu->caches[i] = NULL;
 
-    lcpu->master = (lcpu->pnum == (unsigned int) master_cpu);
-    lcpu->primary = (lcpu->pnum % cpuid_info()->cpuid_logical_per_package) == 0;
+    lcpu->master = (lcpu->cpu_num == (unsigned int) master_cpu);
+    lcpu->primary = (lcpu->pnum % topoParms.nPThreadsPerPackage) == 0;
 }
 
 static x86_core_t *
@@ -202,16 +394,14 @@ x86_core_alloc(int cpu)
 {
     x86_core_t *core;
     cpu_data_t *cpup;
-    uint32_t   cpu_in_pkg;
-    uint32_t   lcpus_per_core;
 
     cpup = cpu_datap(cpu);
 
     simple_lock(&x86_topo_lock);
     if (free_cores != NULL) {
        core = free_cores;
-       free_cores = core->next;
-       core->next = NULL;
+       free_cores = core->next_in_die;
+       core->next_in_die = NULL;
        simple_unlock(&x86_topo_lock);
     } else {
        simple_unlock(&x86_topo_lock);
@@ -222,12 +412,8 @@ x86_core_alloc(int cpu)
 
     bzero((void *) core, sizeof(x86_core_t));
 
-    cpu_in_pkg = cpu % cpuid_info()->cpuid_logical_per_package;
-    lcpus_per_core = cpuid_info()->cpuid_logical_per_package /
-                    cpuid_info()->cpuid_cores_per_package;
-
-    core->pcore_num = cpup->cpu_phys_number / lcpus_per_core;
-    core->lcore_num = core->pcore_num % cpuid_info()->cpuid_cores_per_package;
+    core->pcore_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
+    core->lcore_num = core->pcore_num % topoParms.nPCoresPerPackage;
 
     core->flags = X86CORE_FL_PRESENT | X86CORE_FL_READY
                | X86CORE_FL_HALTED | X86CORE_FL_IDLE;
@@ -239,7 +425,7 @@ static void
 x86_core_free(x86_core_t *core)
 {
     simple_lock(&x86_topo_lock);
-    core->next = free_cores;
+    core->next_in_die = free_cores;
     free_cores = core;
     simple_unlock(&x86_topo_lock);
 }
@@ -253,7 +439,7 @@ x86_package_find(int cpu)
 
     cpup = cpu_datap(cpu);
 
-    pkg_num = cpup->cpu_phys_number / cpuid_info()->cpuid_logical_per_package;
+    pkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
 
     pkg = x86_pkgs;
     while (pkg != NULL) {
@@ -264,48 +450,203 @@ x86_package_find(int cpu)
 
     return(pkg);
 }
+static x86_die_t *
+x86_die_find(int cpu)
+{
+    x86_die_t  *die;
+    x86_pkg_t  *pkg;
+    cpu_data_t *cpup;
+    uint32_t   die_num;
+
+    cpup = cpu_datap(cpu);
+
+    die_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+    pkg = x86_package_find(cpu);
+    if (pkg == NULL)
+       return(NULL);
+
+    die = pkg->dies;
+    while (die != NULL) {
+       if (die->pdie_num == die_num)
+           break;
+       die = die->next_in_pkg;
+    }
+
+    return(die);
+}
 
 static x86_core_t *
 x86_core_find(int cpu)
 {
     x86_core_t *core;
-    x86_pkg_t  *pkg;
+    x86_die_t  *die;
     cpu_data_t *cpup;
     uint32_t   core_num;
 
     cpup = cpu_datap(cpu);
 
-    core_num = cpup->cpu_phys_number
-              / (cpuid_info()->cpuid_logical_per_package
-                 / cpuid_info()->cpuid_cores_per_package);
+    core_num = cpup->cpu_phys_number / topoParms.nPThreadsPerCore;
 
-    pkg = x86_package_find(cpu);
-    if (pkg == NULL)
+    die = x86_die_find(cpu);
+    if (die == NULL)
        return(NULL);
 
-    core = pkg->cores;
+    core = die->cores;
     while (core != NULL) {
        if (core->pcore_num == core_num)
            break;
-       core = core->next;
+       core = core->next_in_die;
     }
 
     return(core);
 }
+void
+x86_set_lcpu_numbers(x86_lcpu_t *lcpu)
+{
+    lcpu->lnum = lcpu->cpu_num % topoParms.nLThreadsPerCore;
+}
+
+void
+x86_set_core_numbers(x86_core_t *core, x86_lcpu_t *lcpu)
+{
+    core->pcore_num = lcpu->cpu_num / topoParms.nLThreadsPerCore;
+    core->lcore_num = core->pcore_num % topoParms.nLCoresPerDie;
+}
+
+void
+x86_set_die_numbers(x86_die_t *die, x86_lcpu_t *lcpu)
+{
+    die->pdie_num = lcpu->cpu_num / (topoParms.nLThreadsPerCore * topoParms.nLCoresPerDie);
+    die->ldie_num = die->pdie_num % topoParms.nLDiesPerPackage;
+}
+
+void
+x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu)
+{
+    pkg->ppkg_num = lcpu->cpu_num / topoParms.nLThreadsPerPackage;
+    pkg->lpkg_num = pkg->ppkg_num;
+}
+
+static x86_die_t *
+x86_die_alloc(int cpu)
+{
+    x86_die_t  *die;
+    cpu_data_t *cpup;
+
+    cpup = cpu_datap(cpu);
+
+    simple_lock(&x86_topo_lock);
+    if (free_dies != NULL) {
+       die = free_dies;
+       free_dies = die->next_in_pkg;
+       die->next_in_pkg = NULL;
+       simple_unlock(&x86_topo_lock);
+    } else {
+       simple_unlock(&x86_topo_lock);
+       die = kalloc(sizeof(x86_die_t));
+       if (die == NULL)
+           panic("x86_die_alloc() kalloc of x86_die_t failed!\n");
+    }
+
+    bzero((void *) die, sizeof(x86_die_t));
+
+    die->pdie_num = cpup->cpu_phys_number / topoParms.nPThreadsPerDie;
+
+    die->ldie_num = num_dies;
+    atomic_incl((long *) &num_dies, 1);
+
+    die->flags = X86DIE_FL_PRESENT;
+    return(die);
+}
 
 static void
-x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
+x86_die_free(x86_die_t *die)
+{
+    simple_lock(&x86_topo_lock);
+    die->next_in_pkg = free_dies;
+    free_dies = die;
+    atomic_decl((long *) &num_dies, 1);
+    simple_unlock(&x86_topo_lock);
+}
+
+static x86_pkg_t *
+x86_package_alloc(int cpu)
+{
+    x86_pkg_t  *pkg;
+    cpu_data_t *cpup;
+
+    cpup = cpu_datap(cpu);
+
+    simple_lock(&x86_topo_lock);
+    if (free_pkgs != NULL) {
+       pkg = free_pkgs;
+       free_pkgs = pkg->next;
+       pkg->next = NULL;
+       simple_unlock(&x86_topo_lock);
+    } else {
+       simple_unlock(&x86_topo_lock);
+       pkg = kalloc(sizeof(x86_pkg_t));
+       if (pkg == NULL)
+           panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
+    }
+
+    bzero((void *) pkg, sizeof(x86_pkg_t));
+
+    pkg->ppkg_num = cpup->cpu_phys_number / topoParms.nPThreadsPerPackage;
+
+    pkg->lpkg_num = topoParms.nPackages;
+    atomic_incl((long *) &topoParms.nPackages, 1);
+
+    pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
+    return(pkg);
+}
+
+static void
+x86_package_free(x86_pkg_t *pkg)
+{
+    simple_lock(&x86_topo_lock);
+    pkg->next = free_pkgs;
+    free_pkgs = pkg;
+    atomic_decl((long *) &topoParms.nPackages, 1);
+    simple_unlock(&x86_topo_lock);
+}
+
+static void
+x86_cache_add_lcpu(x86_cpu_cache_t *cache, x86_lcpu_t *lcpu)
+{
+    x86_cpu_cache_t    *cur_cache;
+    int                        i;
+
+    /*
+     * Put the new CPU into the list of the cache.
+     */
+    cur_cache = lcpu->caches[cache->level - 1];
+    lcpu->caches[cache->level - 1] = cache;
+    cache->next = cur_cache;
+    cache->nlcpus += 1;
+    for (i = 0; i < cache->nlcpus; i += 1) {
+       if (cache->cpus[i] == NULL) {
+           cache->cpus[i] = lcpu;
+           break;
+       }
+    }
+}
+
+static void
+x86_lcpu_add_caches(x86_lcpu_t *lcpu)
 {
     x86_cpu_cache_t    *list;
     x86_cpu_cache_t    *cur;
-    x86_core_t         *cur_core;
+    x86_cpu_cache_t    *match;
+    x86_die_t          *die;
+    x86_core_t         *core;
     x86_lcpu_t         *cur_lcpu;
-    boolean_t          found;
-    int                        level;
-    int                        i;
-    uint32_t           cpu_mask;
+    uint32_t           level;
+    boolean_t          found           = FALSE;
 
-    assert(core != NULL);
     assert(lcpu != NULL);
 
     /*
@@ -328,8 +669,9 @@ x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
         * If the cache isn't shared then just put it where it
         * belongs.
         */
-       if (cur->nlcpus == 1) {
-           goto found_first;
+       if (cur->maxcpus == 1) {
+           x86_cache_add_lcpu(cur, lcpu);
+           continue;
        }
 
        /*
@@ -345,101 +687,131 @@ x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
        /*
         * This is a shared cache, so we have to figure out if
         * this is the first time we've seen this cache.  We do
-        * this by searching through the package and seeing if
-        * a related core is already describing this cache.
+        * this by searching through the topology and seeing if
+        * this cache is already described.
         *
-        * NOTE: This assumes that CPUs whose ID mod <# sharing cache>
-        * are indeed sharing the cache.
+        * Assume that L{LLC-1} are all at the core level and that
+        * LLC is shared at the die level.
         */
-       cpu_mask = lcpu->pnum & ~(cur->nlcpus - 1);
-       cur_core = core->package->cores;
-       found = FALSE;
-
-       while (cur_core != NULL && !found) {
-           cur_lcpu = cur_core->lcpus;
-           while (cur_lcpu != NULL && !found) {
-               if ((cur_lcpu->pnum & ~(cur->nlcpus - 1)) == cpu_mask) {
-                   lcpu->caches[level] = cur_lcpu->caches[level];
-                   found = TRUE;
-                   x86_cache_free(cur);
+       if (level < topoParms.LLCDepth) {
+           /*
+            * Shared at the core.
+            */
+           core = lcpu->core;
+           cur_lcpu = core->lcpus;
+           while (cur_lcpu != NULL) {
+               /*
+                * Skip ourselves.
+                */
+               if (cur_lcpu == lcpu) {
+                   cur_lcpu = cur_lcpu->next_in_core;
+                   continue;
+               }
 
-                   /*
-                    * Put the new CPU into the list of the cache.
-                    */
-                   cur = lcpu->caches[level];
-                   for (i = 0; i < cur->nlcpus; i += 1) {
-                       if (cur->cpus[i] == NULL) {
-                           cur->cpus[i] = lcpu;
-                           break;
-                       }
-                   }
+               /*
+                * If there's a cache on this logical CPU,
+                * then use that one.
+                */
+               match = x86_match_cache(cur_lcpu->caches[level], cur);
+               if (match != NULL) {
+                   x86_cache_free(cur);
+                   x86_cache_add_lcpu(match, lcpu);
+                   found = TRUE;
+                   break;
                }
-               cur_lcpu = cur_lcpu->next;
+
+               cur_lcpu = cur_lcpu->next_in_core;
            }
+       } else {
+           /*
+            * Shared at the die.
+            */
+           die = lcpu->die;
+           cur_lcpu = die->lcpus;
+           while (cur_lcpu != NULL) {
+               /*
+                * Skip ourselves.
+                */
+               if (cur_lcpu == lcpu) {
+                   cur_lcpu = cur_lcpu->next_in_die;
+                   continue;
+               }
 
-           cur_core = cur_core->next;
+               /*
+                * If there's a cache on this logical CPU,
+                * then use that one.
+                */
+               match = x86_match_cache(cur_lcpu->caches[level], cur);
+               if (match != NULL) {
+                   x86_cache_free(cur);
+                   x86_cache_add_lcpu(match, lcpu);
+                   found = TRUE;
+                   break;
+               }
+
+               cur_lcpu = cur_lcpu->next_in_die;
+           }
        }
 
+       /*
+        * If a shared cache wasn't found, then this logical CPU must
+        * be the first one encountered.
+        */
        if (!found) {
-found_first:
-           cur->next = lcpu->caches[level];
-           lcpu->caches[level] = cur;
-           cur->cpus[0] = lcpu;
+           x86_cache_add_lcpu(cur, lcpu);
        }
     }
 
-    /*
-     * Add the Logical CPU to the core.
-     */
-    lcpu->next = core->lcpus;
-    lcpu->core = core;
-    core->lcpus = lcpu;
-    core->num_lcpus += 1;
-
     simple_unlock(&x86_topo_lock);
 }
 
-static x86_pkg_t *
-x86_package_alloc(int cpu)
+static void
+x86_core_add_lcpu(x86_core_t *core, x86_lcpu_t *lcpu)
 {
-    x86_pkg_t  *pkg;
-    cpu_data_t *cpup;
-
-    cpup = cpu_datap(cpu);
+    assert(core != NULL);
+    assert(lcpu != NULL);
 
     simple_lock(&x86_topo_lock);
-    if (free_pkgs != NULL) {
-       pkg = free_pkgs;
-       free_pkgs = pkg->next;
-       pkg->next = NULL;
-       simple_unlock(&x86_topo_lock);
-    } else {
-       simple_unlock(&x86_topo_lock);
-       pkg = kalloc(sizeof(x86_pkg_t));
-       if (pkg == NULL)
-           panic("x86_package_alloc() kalloc of x86_pkg_t failed!\n");
-    }
 
-    bzero((void *) pkg, sizeof(x86_pkg_t));
+    lcpu->next_in_core = core->lcpus;
+    lcpu->core = core;
+    core->lcpus = lcpu;
+    core->num_lcpus += 1;
+    simple_unlock(&x86_topo_lock);
+}
 
-    pkg->ppkg_num = cpup->cpu_phys_number
-                   / cpuid_info()->cpuid_logical_per_package;
+static void
+x86_die_add_lcpu(x86_die_t *die, x86_lcpu_t *lcpu)
+{
+    assert(die != NULL);
+    assert(lcpu != NULL);
+    lcpu->next_in_die = die->lcpus;
+    lcpu->die = die;
+    die->lcpus = lcpu;
+}
 
-    pkg->lpkg_num = num_packages;
-    atomic_incl((long *) &num_packages, 1);
+static void
+x86_die_add_core(x86_die_t *die, x86_core_t *core)
+{
+    assert(die != NULL);
+    assert(core != NULL);
 
-    pkg->flags = X86PKG_FL_PRESENT | X86PKG_FL_READY;
-    return(pkg);
+    core->next_in_die = die->cores;
+    core->die = die;
+    die->cores = core;
+    die->num_cores += 1;
 }
 
-static void
-x86_package_free(x86_pkg_t *pkg)
+ static void
+x86_package_add_lcpu(x86_pkg_t *pkg, x86_lcpu_t *lcpu)
 {
-    simple_lock(&x86_topo_lock);
-    pkg->next = free_pkgs;
-    free_pkgs = pkg;
-    atomic_decl((long *) &num_packages, 1);
-    simple_unlock(&x86_topo_lock);
+    assert(pkg != NULL);
+    assert(lcpu != NULL);
+
+    lcpu->next_in_pkg = pkg->lcpus;
+    lcpu->package = pkg;
+    pkg->lcpus = lcpu;
 }
 
 static void
@@ -448,26 +820,56 @@ x86_package_add_core(x86_pkg_t *pkg, x86_core_t *core)
     assert(pkg != NULL);
     assert(core != NULL);
 
-    core->next = pkg->cores;
+    core->next_in_pkg = pkg->cores;
     core->package = pkg;
     pkg->cores = core;
-    pkg->num_cores += 1;
+}
+
+static void
+x86_package_add_die(x86_pkg_t *pkg, x86_die_t *die)
+{
+    assert(pkg != NULL);
+    assert(die != NULL);
+
+    die->next_in_pkg = pkg->dies;
+    die->package = pkg;
+    pkg->dies = die;
+    pkg->num_dies += 1;
 }
 
 void *
 cpu_thread_alloc(int cpu)
 {
-    x86_core_t *core;
-    x86_pkg_t  *pkg;
+    x86_core_t *core           = NULL;
+    x86_die_t  *die            = NULL;
+    x86_pkg_t  *pkg            = NULL;
     cpu_data_t *cpup;
     uint32_t   phys_cpu;
 
+    /*
+     * Only allow one to manipulate the topology at a time.
+     */
+    simple_lock(&x86_topo_lock);
+
+    /*
+     * Make sure all of the topology parameters have been initialized.
+     */
+    if (!topoParmsInited)
+       initTopoParms();
+
     cpup = cpu_datap(cpu);
 
     phys_cpu = cpup->cpu_phys_number;
 
     x86_lcpu_init(cpu);
 
+     /*
+     * Allocate performance counter structure.
+     */
+    simple_unlock(&x86_topo_lock);
+    cpup->lcpu.pmc = pmc_alloc();
+    simple_lock(&x86_topo_lock);
+
     /*
      * Assume that all cpus have the same features.
      */
@@ -478,22 +880,9 @@ cpu_thread_alloc(int cpu)
     }
 
     /*
-     * Only allow one to manipulate the topology at a time.
-     */
-    simple_lock(&x86_topo_lock);
-
-    /*
-     * Get the core for this logical CPU.
+     * Get the package that the logical CPU is in.
      */
-  core_again:
-    core = x86_core_find(cpu);
-    if (core == NULL) {
-       /*
-        * Core structure hasn't been created yet, do it now.
-        *
-        * Get the package that the core is part of.
-        */
-      package_again:
+    do {
        pkg = x86_package_find(cpu);
        if (pkg == NULL) {
            /*
@@ -504,7 +893,7 @@ cpu_thread_alloc(int cpu)
            simple_lock(&x86_topo_lock);
            if (x86_package_find(cpu) != NULL) {
                x86_package_free(pkg);
-               goto package_again;
+               continue;
            }
            
            /*
@@ -513,31 +902,58 @@ cpu_thread_alloc(int cpu)
            pkg->next = x86_pkgs;
            x86_pkgs = pkg;
        }
+    } while (pkg == NULL);
 
-       /*
-        * Allocate the core structure now.
-        */
-       simple_unlock(&x86_topo_lock);
-       core = x86_core_alloc(cpu);
-       simple_lock(&x86_topo_lock);
-       if (x86_core_find(cpu) != NULL) {
-           x86_core_free(core);
-           goto core_again;
+    /*
+     * Get the die that the logical CPU is in.
+     */
+    do {
+       die = x86_die_find(cpu);
+       if (die == NULL) {
+           /*
+            * Die structure hasn't been created yet, do it now.
+            */
+           simple_unlock(&x86_topo_lock);
+           die = x86_die_alloc(cpu);
+           simple_lock(&x86_topo_lock);
+           if (x86_die_find(cpu) != NULL) {
+               x86_die_free(die);
+               continue;
+           }
+
+           /*
+            * Add the die to the package.
+            */
+           x86_package_add_die(pkg, die);
        }
+    } while (die == NULL);
 
-       /*
-        * Add it to the package.
-        */
-       x86_package_add_core(pkg, core);
-       machine_info.physical_cpu_max += 1;
+    /*
+     * Get the core for this logical CPU.
+     */
+    do {
+       core = x86_core_find(cpu);
+       if (core == NULL) {
+           /*
+            * Allocate the core structure now.
+            */
+           simple_unlock(&x86_topo_lock);
+           core = x86_core_alloc(cpu);
+           simple_lock(&x86_topo_lock);
+           if (x86_core_find(cpu) != NULL) {
+               x86_core_free(core);
+               continue;
+           }
+
+           /*
+            * Add the core to the die & package.
+            */
+           x86_die_add_core(die, core);
+           x86_package_add_core(pkg, core);
+           machine_info.physical_cpu_max += 1;
+       }
+    } while (core == NULL);
 
-       /*
-        * Allocate performance counter structure.
-        */
-       simple_unlock(&x86_topo_lock);
-       core->pmc = pmc_alloc();
-       simple_lock(&x86_topo_lock);
-    }
     
     /*
      * Done manipulating the topology, so others can get in.
@@ -545,7 +961,13 @@ cpu_thread_alloc(int cpu)
     machine_info.logical_cpu_max += 1;
     simple_unlock(&x86_topo_lock);
 
+    /*
+     * Add the logical CPU to the other topology structures.
+     */
     x86_core_add_lcpu(core, &cpup->lcpu);
+    x86_die_add_lcpu(core->die, &cpup->lcpu);
+    x86_package_add_lcpu(core->package, &cpup->lcpu);
+    x86_lcpu_add_caches(&cpup->lcpu);
 
     return (void *) core;
 }
@@ -553,10 +975,10 @@ cpu_thread_alloc(int cpu)
 void
 cpu_thread_init(void)
 {
-    int                my_cpu  = get_cpu_number();
-    cpu_data_t *cpup   = current_cpu_datap();
+    int                my_cpu          = get_cpu_number();
+    cpu_data_t *cpup           = current_cpu_datap();
     x86_core_t *core;
-    static int initialized = 0;
+    static int initialized     = 0;
 
     /*
      * If we're the boot processor, we do all of the initialization of
@@ -582,8 +1004,6 @@ cpu_thread_init(void)
     if (core->active_lcpus == 0)
        machine_info.physical_cpu += 1;
     core->active_lcpus += 1;
-    cpup->lcpu.halted = FALSE;
-    cpup->lcpu.idle   = FALSE;
     simple_unlock(&x86_topo_lock);
 
     pmCPUMarkRunning(cpup);
@@ -602,7 +1022,6 @@ cpu_thread_halt(void)
 
     simple_lock(&x86_topo_lock);
     machine_info.logical_cpu -= 1;
-    cpup->lcpu.idle   = TRUE;
     core = cpup->lcpu.core;
     core->active_lcpus -= 1;
     if (core->active_lcpus == 0)
@@ -619,3 +1038,62 @@ cpu_thread_halt(void)
     }
     /* NOT REACHED */
 }
+
+#if TOPO_DEBUG
+/*
+ * Prints out the topology
+ */
+void
+debug_topology_print(void)
+{
+    x86_pkg_t          *pkg;
+    x86_die_t          *die;
+    x86_core_t         *core;
+    x86_lcpu_t         *cpu;
+
+    pkg = x86_pkgs;
+    while (pkg != NULL) {
+       kprintf("Package:\n");
+       kprintf("    Physical: %d\n", pkg->ppkg_num);
+       kprintf("    Logical:  %d\n", pkg->lpkg_num);
+
+       die = pkg->dies;
+       while (die != NULL) {
+           kprintf("    Die:\n");
+           kprintf("        Physical: %d\n", die->pdie_num);
+           kprintf("        Logical:  %d\n", die->ldie_num);
+
+           core = die->cores;
+           while (core != NULL) {
+               kprintf("        Core:\n");
+               kprintf("            Physical: %d\n", core->pcore_num);
+               kprintf("            Logical:  %d\n", core->lcore_num);
+
+               cpu = core->lcpus;
+               while (cpu != NULL) {
+                   kprintf("            LCPU:\n");
+                   kprintf("                CPU #:    %d\n", cpu->cpu_num);
+                   kprintf("                Physical: %d\n", cpu->pnum);
+                   kprintf("                Logical:  %d\n", cpu->lnum);
+                   kprintf("                Flags:    ");
+                   if (cpu->master)
+                       kprintf("MASTER ");
+                   if (cpu->primary)
+                       kprintf("PRIMARY");
+                   if (!cpu->master && !cpu->primary)
+                       kprintf("(NONE)");
+                   kprintf("\n");
+
+                   cpu = cpu->next_in_core;
+               }
+
+               core = core->next_in_die;
+           }
+
+           die = die->next_in_pkg;
+       }
+
+       pkg = pkg->next;
+    }
+}
+#endif /* TOPO_DEBUG */
index 8208cc7ca4b2e8e1d46c5e308276b34f3765b45b..dca8b4016b7d0de4ede118c16eeab5d7dfad649f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 
 #define cpu_to_lcpu(cpu)               ((cpu_datap(cpu) != NULL) ? _cpu_to_lcpu(cpu) : NULL)
 #define cpu_to_core(cpu)               ((cpu_to_lcpu(cpu) != NULL) ? _cpu_to_lcpu(cpu)->core : NULL)
-#define cpu_to_package(cpu)            ((cpu_to_core(cpu) != NULL) ? _cpu_to_core(cpu)->package : NULL)
+#define cpu_to_die(cpu)                        ((cpu_to_lcpu(cpu) != NULL) ? _cpu_to_lcpu(cpu)->die : NULL)
+#define cpu_to_package(cpu)            ((cpu_to_lcpu(cpu) != NULL) ? _cpu_to_lcpu(cpu)->package : NULL)
 
 /* Fast access: */
 #define x86_lcpu()                     (&current_cpu_datap()->lcpu)
 #define x86_core()                     (x86_lcpu()->core)
-#define x86_package()                  (x86_core()->package)
+#define x86_die()                      (x86_lcpu()->die)
+#define x86_package()                  (x86_lcpu()->package)
 
 #define cpu_is_same_core(cpu1,cpu2)    (cpu_to_core(cpu1) == cpu_to_core(cpu2))
+#define cpu_is_same_die(cpu1,cpu2)     (cpu_to_die(cpu1) == cpu_to_die(cpu2))
 #define cpu_is_same_package(cpu1,cpu2) (cpu_to_package(cpu1) == cpu_to_package(cpu2))
 #define cpus_share_cache(cpu1,cpu2,_cl) (cpu_to_lcpu(cpu1)->caches[_cl] == cpu_to_lcpu(cpu2)->caches[_cl])
 
@@ -65,4 +68,11 @@ extern void *cpu_thread_alloc(int);
 extern void cpu_thread_init(void);
 extern void cpu_thread_halt(void);
 
+extern void x86_set_lcpu_numbers(x86_lcpu_t *lcpu);
+extern void x86_set_core_numbers(x86_core_t *core, x86_lcpu_t *lcpu);
+extern void x86_set_die_numbers(x86_die_t *die, x86_lcpu_t *lcpu);
+extern void x86_set_pkg_numbers(x86_pkg_t *pkg, x86_lcpu_t *lcpu);
+
+extern x86_topology_parameters_t       topoParms;
+
 #endif /* _I386_CPU_THREADS_H_ */
index 56b3b43a9db4130f95e50fc8b4bcdf44fb924255..6e823c98057f97aaa1f9c5cc902c76cb878445d9 100644 (file)
@@ -36,7 +36,7 @@
 #include <i386/machine_cpu.h>
 #include <i386/machine_routines.h>
 #include <i386/lock.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
 
 //#define TOPO_DEBUG 1
 #if TOPO_DEBUG
@@ -44,6 +44,7 @@
 #else
 #define DBG(x...)
 #endif
+void debug_topology_print(void);
 
 __private_extern__ void qsort(
     void * array,
@@ -116,6 +117,13 @@ cpu_topology_start(void)
         */
        for (i = 1; i < ncpus; i++) {
                cpu_data_t      *cpup = cpu_datap(i);
+               x86_core_t      *core = cpup->lcpu.core;
+               x86_die_t       *die  = cpup->lcpu.die;
+               x86_pkg_t       *pkg  = cpup->lcpu.package;
+
+               assert(core != NULL);
+               assert(die != NULL);
+               assert(pkg != NULL);
 
                if (cpup->cpu_number != i) {
                        kprintf("cpu_datap(%d):0x%08x local apic id 0x%x "
@@ -124,27 +132,37 @@ cpu_topology_start(void)
                                cpup->cpu_number);
                }
                cpup->cpu_number = i;
-               cpup->lcpu.lnum = i;
+               cpup->lcpu.cpu_num = i;
+               cpup->lcpu.pnum = cpup->cpu_phys_number;
                lapic_cpu_map(cpup->cpu_phys_number, i);
+               x86_set_lcpu_numbers(&cpup->lcpu);
+               x86_set_core_numbers(core, &cpup->lcpu);
+               x86_set_die_numbers(die, &cpup->lcpu);
+               x86_set_pkg_numbers(pkg, &cpup->lcpu);
        }
 
+#if TOPO_DEBUG
+       debug_topology_print();
+#endif /* TOPO_DEBUG */
+
        ml_set_interrupts_enabled(istate);
+       DBG("cpu_topology_start() LLC is L%d\n", topoParms.LLCDepth + 1);
 
        /*
         * Iterate over all logical cpus finding or creating the affinity set
-        * for their L2 cache. Each affinity set possesses a processor set
+        * for their LLC cache. Each affinity set possesses a processor set
         * into which each logical processor is added.
         */
        DBG("cpu_topology_start() creating affinity sets:\n");
        for (i = 0; i < ncpus; i++) {
                cpu_data_t              *cpup = cpu_datap(i);
                x86_lcpu_t              *lcpup = cpu_to_lcpu(i);
-               x86_cpu_cache_t         *L2_cachep;
+               x86_cpu_cache_t         *LLC_cachep;
                x86_affinity_set_t      *aset;
 
-               L2_cachep = lcpup->caches[CPU_CACHE_DEPTH_L2];
-               assert(L2_cachep->type == CPU_CACHE_TYPE_UNIF);
-               aset = find_cache_affinity(L2_cachep); 
+               LLC_cachep = lcpup->caches[topoParms.LLCDepth];
+               assert(LLC_cachep->type == CPU_CACHE_TYPE_UNIF);
+               aset = find_cache_affinity(LLC_cachep); 
                if (aset == NULL) {
                        aset = (x86_affinity_set_t *) kalloc(sizeof(*aset));
                        if (aset == NULL)
@@ -152,7 +170,7 @@ cpu_topology_start(void)
                        aset->next = x86_affinities;
                        x86_affinities = aset;
                        aset->num = x86_affinity_count++;
-                       aset->cache = L2_cachep;
+                       aset->cache = LLC_cachep;
                        aset->pset = (i == master_cpu) ?
                                        processor_pset(master_processor) :
                                        pset_create(pset_node_root());
@@ -163,7 +181,7 @@ cpu_topology_start(void)
                }
 
                DBG("\tprocessor_init set %p(%d) lcpup %p(%d) cpu %p processor %p\n",
-                       aset, aset->num, lcpup, lcpup->lnum, cpup, cpup->cpu_processor);
+                       aset, aset->num, lcpup, lcpup->cpu_num, cpup, cpup->cpu_processor);
 
                if (i != master_cpu)
                        processor_init(cpup->cpu_processor, i, aset->pset);
@@ -222,8 +240,7 @@ ml_affinity_to_pset(uint32_t affinity_num)
                if (affinity_num == aset->num)
                        break;
        }
-       return (aset == NULL) ? PROCESSOR_SET_NULL : aset->pset;                        
-       
+       return (aset == NULL) ? PROCESSOR_SET_NULL : aset->pset;
 }
 
 uint64_t
@@ -233,7 +250,7 @@ ml_cpu_cache_size(unsigned int level)
 
        if (level == 0) {
                return machine_info.max_mem;
-       } else if ( 1 <= level && level <= 3) {
+       } else if ( 1 <= level && level <= MAX_CACHE_DEPTH) {
                cachep = current_cpu_datap()->lcpu.caches[level-1];
                return cachep ? cachep->cache_size : 0;
        } else {
@@ -248,7 +265,7 @@ ml_cpu_cache_sharing(unsigned int level)
 
        if (level == 0) {
                return machine_info.max_cpus;
-       } else if ( 1 <= level && level <= 3) {
+       } else if ( 1 <= level && level <= MAX_CACHE_DEPTH) {
                cachep = current_cpu_datap()->lcpu.caches[level-1];
                return cachep ? cachep->nlcpus : 0;
        } else {
index 86729b9353d0a6f4b9e3a5fb7816c029e581b07a..f5cbbefb4d556082175467c2f59faca779686399 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -45,6 +45,7 @@
 typedef struct x86_cpu_cache
 {
     struct x86_cpu_cache *next;                /* next cache at this level/lcpu */
+    struct x86_die     *die;           /* die containing this cache (only for LLC) */
     uint8_t            maxcpus;        /* maximum # of cpus that can share */
     uint8_t            nlcpus;         /* # of logical cpus sharing this cache */
     uint8_t            type;           /* type of cache */
@@ -68,22 +69,83 @@ typedef struct x86_cpu_cache
 
 struct pmc;
 struct cpu_data;
+struct mca_state;
 
+/*
+ * Define the states that a (logical) CPU can be in.
+ *
+ * LCPU_OFF    This indicates that the CPU is "off".  It requires a full
+ *             restart.  This is the state of a CPU when the system first
+ *             boots or when it comes out of "sleep" (aka S3/S5).
+ *
+ * LCPU_HALT   This indicates that the CPU has been "halted".  It has been
+ *             removed from the system but still retains its internal state
+ *             so that it can be quickly brought back on-line.
+ *
+ * LCPU_NONSCHED       This indicates that the CPU is not schedulable.  It
+ *             will still appear in the system as a viable CPU however no
+ *             work will be sceduled on it.
+ *
+ * LCPU_PAUSE  This indicates that the CPU is "paused".  This is usually
+ *             done only during kernel debug.
+ *
+ * LCPU_IDLE   This indicates that the CPU is idle.  The scheduler has
+ *             determined that there is no work for this CPU to do.
+ *
+ * LCPU_RUN    This indicates that the CPU is running code and performing work.
+ *
+ * In normal system operation, CPUs will usually be transitioning between
+ * LCPU_IDLE and LCPU_RUN.
+ */
+typedef enum lcpu_state
+{
+    LCPU_OFF           = 0,    /* 0 so the right thing happens on boot */
+    LCPU_HALT          = 1,
+    LCPU_NONSCHED      = 2,
+    LCPU_PAUSE         = 3,
+    LCPU_IDLE          = 4,
+    LCPU_RUN           = 5,
+} lcpu_state_t;
+
+/*
+ * In each topology structure there are two numbers: a logical number and a
+ * physical number.
+ *
+ * The logical numbers represent the ID of that structure
+ * relative to the enclosing structure and always starts at 0.  So when using
+ * logical numbers, it is necessary to specify all elements in the topology
+ * (ie to "name" a logical CPU using logical numbers, 4 numbers are required:
+ * package, die, core, logical CPU).
+ *
+ * The physical numbers represent the ID of that structure and is unique (for
+ * that structure) across the entire topology.
+ *
+ * The logical CPU structure contains a third number which is the CPU number.
+ * This number is identical to the CPU number used in other parts of the kernel.
+ */
 typedef struct x86_lcpu
 {
-    struct x86_lcpu    *next;  /* next logical cpu in core */
-    struct x86_lcpu    *lcpu;  /* pointer back to self */
-    struct x86_core    *core;  /* core containing the logical cpu */
-    struct cpu_data    *cpu;   /* cpu_data structure */
-    uint32_t           lnum;   /* logical cpu number */
-    uint32_t           pnum;   /* physical cpu number */
-    boolean_t          master; /* logical cpu is the master (boot) CPU */
-    boolean_t          primary;/* logical cpu is primary CPU in package */
-    boolean_t          halted; /* logical cpu is halted */
-    boolean_t          idle;   /* logical cpu is idle */
-    uint64_t           rtcPop; /* when etimer wants a timer pop */
+    struct x86_lcpu    *next_in_core;  /* next logical cpu in core */
+    struct x86_lcpu    *next_in_die;   /* next logical cpu in die */
+    struct x86_lcpu    *next_in_pkg;   /* next logical cpu in package */
+    struct x86_lcpu    *lcpu;          /* pointer back to self */
+    struct x86_core    *core;          /* core containing the logical cpu */
+    struct x86_die     *die;           /* die containing the logical cpu */
+    struct x86_pkg     *package;       /* package containing the logical cpu */
+    struct cpu_data    *cpu;           /* cpu_data structure */
+    uint32_t           cpu_num;        /* cpu number */
+    uint32_t           lnum;           /* logical cpu number (within core) */
+    uint32_t           pnum;           /* physical cpu number */
+    boolean_t          master;         /* logical cpu is the master (boot) CPU */
+    boolean_t          primary;        /* logical cpu is primary CPU in package */
+    volatile lcpu_state_t      state;  /* state of the logical CPU */
+    volatile boolean_t stopped;        /* used to indicate that the CPU has "stopped" */
+    uint64_t           rtcPop;         /* when etimer wants a timer pop */
     uint64_t           rtcDeadline;
     x86_cpu_cache_t    *caches[MAX_CACHE_DEPTH];
+    struct pmc         *pmc;           /* Pointer to perfmon data */
+    void               *pmStats;       /* Power management stats for lcpu */
+    void               *pmState;       /* Power management state for lcpu */
 } x86_lcpu_t;
 
 #define X86CORE_FL_PRESENT     0x80000000      /* core is present */
@@ -93,25 +155,38 @@ typedef struct x86_lcpu
 
 typedef struct x86_core
 {
-    struct x86_core    *next;          /* next core in package */
-    struct x86_lcpu    *lcpus;         /* list of logical cpus in core */
+    struct x86_core    *next_in_die;   /* next core in die */
+    struct x86_core    *next_in_pkg;   /* next core in package */
+    struct x86_die     *die;           /* die containing the core */
     struct x86_pkg     *package;       /* package containing core */
+    struct x86_lcpu    *lcpus;         /* list of logical cpus in core */
     uint32_t           flags;
-    uint32_t           lcore_num;      /* logical core # (unique to package) */
+    uint32_t           lcore_num;      /* logical core # (unique within die) */
     uint32_t           pcore_num;      /* physical core # (globally unique) */
     uint32_t           num_lcpus;      /* Number of logical cpus */
-    uint32_t           active_lcpus;   /* Number of non-halted cpus */
-    struct pmc         *pmc;           /* Pointer to perfmon data */
-    struct hpetTimer   *Hpet;          /* Address of the HPET for this core */
-    uint32_t           HpetVec;        /* Interrupt vector for HPET */
-    uint64_t           HpetInt;        /* Number of HPET Interrupts */
-    uint64_t           HpetCmp;        /* HPET Comparitor */
-    uint64_t           HpetCfg;        /* HPET configuration */
-    uint64_t           HpetTime;
+    uint32_t           active_lcpus;   /* Number of {running, idle} cpus */
     void               *pmStats;       /* Power management stats for core */
     void               *pmState;       /* Power management state for core */
 } x86_core_t;
 
+#define X86DIE_FL_PRESENT      0x80000000      /* die is present */
+#define X86DIE_FL_READY                0x40000000      /* die struct is init'd */
+
+typedef struct x86_die
+{
+    struct x86_die     *next_in_pkg;   /* next die in package */
+    struct x86_lcpu    *lcpus;         /* list of lcpus in die */
+    struct x86_core    *cores;         /* list of cores in die */
+    struct x86_pkg     *package;       /* package containing the die */
+    uint32_t           flags;
+    uint32_t           ldie_num;       /* logical die # (unique to package) */
+    uint32_t           pdie_num;       /* physical die # (globally unique) */
+    uint32_t           num_cores;      /* Number of cores in die */
+    x86_cpu_cache_t    *LLC;           /* LLC contained in this die */
+    void               *pmStats;       /* Power Management stats for die */
+    void               *pmState;       /* Power Management state for die */
+} x86_die_t;
+
 #define X86PKG_FL_PRESENT      0x80000000      /* package is present */
 #define X86PKG_FL_READY                0x40000000      /* package struct init'd */
 #define X86PKG_FL_HAS_HPET     0x10000000      /* package has HPET assigned */
@@ -121,27 +196,43 @@ typedef struct x86_core
 typedef struct x86_pkg
 {
     struct x86_pkg     *next;          /* next package */
+    struct x86_lcpu    *lcpus;         /* list of logical cpus in package */
     struct x86_core    *cores;         /* list of cores in package */
+    struct x86_die     *dies;          /* list of dies in package */
     uint32_t           flags;
     uint32_t           lpkg_num;       /* logical package # */
     uint32_t           ppkg_num;       /* physical package # */
-    uint32_t           num_cores;      /* number of cores in package */
-    struct hpetTimer   *Hpet;          /* address of HPET for this package */
-    uint32_t           HpetVec;        /* Interrupt vector for HPET */
-    uint64_t           HpetInt;        /* Number of HPET interrupts */
-    uint64_t           HpetCmp;        /* HPET comparitor */
-    uint64_t           HpetCfg;        /* HPET configuration */
-    uint64_t           HpetTime;
+    uint32_t           num_dies;       /* number of dies in package */
     void               *pmStats;       /* Power Management stats for package*/
     void               *pmState;       /* Power Management state for package*/
+    struct mca_state   *mca_state;     /* MCA state for memory errors */
 } x86_pkg_t;
 
 extern x86_pkg_t       *x86_pkgs;      /* root of all CPU packages */
+typedef struct x86_topology_parameters
+{
+    uint32_t           LLCDepth;
+    uint32_t           nCoresSharingLLC;
+    uint32_t           nLCPUsSharingLLC;
+    uint32_t           maxSharingLLC;
+    uint32_t           nLThreadsPerCore;
+    uint32_t           nPThreadsPerCore;
+    uint32_t           nLCoresPerDie;
+    uint32_t           nPCoresPerDie;
+    uint32_t           nLDiesPerPackage;
+    uint32_t           nPDiesPerPackage;
+    uint32_t           nLThreadsPerDie;
+    uint32_t           nPThreadsPerDie;
+    uint32_t           nLThreadsPerPackage;
+    uint32_t           nPThreadsPerPackage;
+    uint32_t           nLCoresPerPackage;
+    uint32_t           nPCoresPerPackage;
+    uint32_t           nPackages;
+} x86_topology_parameters_t;
 
 /* Called after cpu discovery */
 extern void            cpu_topology_start(void);
 
-extern int idlehalt;
-
 #endif /* _I386_CPU_TOPOLOGY_H_ */
 #endif /* KERNEL_PRIVATE */
index a23ed95dfe8c601bfdb8a6d3a20c5b4a2f752869..f9c58c5bb02a41ff41b18671a4514d3bb8f21697 100644 (file)
@@ -213,6 +213,76 @@ cpuid_set_cache_info( i386_cpu_info_t * info_p )
        else if (linesizes[L1D])
                info_p->cache_linesize = linesizes[L1D];
        else panic("no linesize");
+
+       /*
+        * Extract and publish TLB information.
+        */
+       for (i = 1; i < sizeof(info_p->cache_info); i++) {
+               uint8_t         desc = info_p->cache_info[i];
+
+               switch (desc) {
+               case CPUID_CACHE_ITLB_4K_32_4:
+                       info_p->cpuid_itlb_small = 32;
+                       break;
+               case CPUID_CACHE_ITLB_4M_2:
+                       info_p->cpuid_itlb_large = 2;
+                       break;
+               case CPUID_CACHE_DTLB_4K_64_4:
+                       info_p->cpuid_dtlb_small = 64;
+                       break;
+               case CPUID_CACHE_DTLB_4M_8_4:
+                       info_p->cpuid_dtlb_large = 8;
+                       break;
+               case CPUID_CACHE_DTLB_4M_32_4:
+                       info_p->cpuid_dtlb_large = 32;
+                       break;
+               case CPUID_CACHE_ITLB_64:
+                       info_p->cpuid_itlb_small = 64;
+                       info_p->cpuid_itlb_large = 64;
+                       break;
+               case CPUID_CACHE_ITLB_128:
+                       info_p->cpuid_itlb_small = 128;
+                       info_p->cpuid_itlb_large = 128;
+                       break;
+               case CPUID_CACHE_ITLB_256:
+                       info_p->cpuid_itlb_small = 256;
+                       info_p->cpuid_itlb_large = 256;
+                       break;
+               case CPUID_CACHE_DTLB_64:
+                       info_p->cpuid_dtlb_small = 64;
+                       info_p->cpuid_dtlb_large = 64;
+                       break;
+               case CPUID_CACHE_DTLB_128:
+                       info_p->cpuid_dtlb_small = 128;
+                       info_p->cpuid_dtlb_large = 128;
+                       break;
+               case CPUID_CACHE_DTLB_256:
+                       info_p->cpuid_dtlb_small = 256;
+                       info_p->cpuid_dtlb_large = 256;
+                       break;
+               case CPUID_CACHE_ITLB_4M2M_7:
+                       info_p->cpuid_itlb_large = 7;
+                       break;
+               case CPUID_CACHE_DTLB_4K_16_4:
+                       info_p->cpuid_dtlb_small = 16;
+                       break;
+               case CPUID_CACHE_DTLB_4M2M_32_4:
+                       info_p->cpuid_dtlb_large = 32;
+                       break;
+               case CPUID_CACHE_ITLB_4K_128_4:
+                       info_p->cpuid_itlb_small = 128;
+                       break;
+               case CPUID_CACHE_ITLB_4M_8:
+                       info_p->cpuid_itlb_large = 8;
+                       break;
+               case CPUID_CACHE_DTLB_4K_128_4:
+                       info_p->cpuid_dtlb_small = 128;
+                       break;
+               case CPUID_CACHE_DTLB_4K_256_4:
+                       info_p->cpuid_dtlb_small = 256;
+                       break;
+               }
+       }
 }
 
 static void
@@ -291,7 +361,7 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
        /* Fold extensions into family/model */
        if (info_p->cpuid_family == 0x0f)
                info_p->cpuid_family += info_p->cpuid_extfamily;
-       if (info_p->cpuid_family == 0x0f || info_p->cpuid_family== 0x06)
+       if (info_p->cpuid_family == 0x0f || info_p->cpuid_family == 0x06)
                info_p->cpuid_model += (info_p->cpuid_extmodel << 4);
 
        if (info_p->cpuid_features & CPUID_FEATURE_HTT)
@@ -306,7 +376,7 @@ cpuid_set_generic_info(i386_cpu_info_t *info_p)
                                quad(cpuid_reg[ecx], cpuid_reg[edx]);
        }
 
-       if (info_p->cpuid_extfeatures && CPUID_FEATURE_MONITOR) {
+       if (info_p->cpuid_features & CPUID_FEATURE_MONITOR) {
                /*
                 * Extract the Monitor/Mwait Leaf info:
                 */
@@ -373,6 +443,13 @@ cpuid_set_info(void)
 
        cpuid_set_cache_info(&cpuid_cpu_info);
 
+       if (cpuid_cpu_info.core_count == 0) {
+               cpuid_cpu_info.core_count =
+                       cpuid_cpu_info.cpuid_cores_per_package;
+               cpuid_cpu_info.thread_count =
+                       cpuid_cpu_info.cpuid_logical_per_package;
+       }
+
        cpuid_cpu_info.cpuid_model_string = ""; /* deprecated */
 }
 
@@ -422,6 +499,7 @@ static struct {
        {CPUID_FEATURE_PDCM,    "PDCM"},
        {CPUID_FEATURE_SSE4_1,  "SSE4.1"},
        {CPUID_FEATURE_SSE4_2,  "SSE4.2"},
+       {CPUID_FEATURE_xAPIC,   "xAPIC"},
        {CPUID_FEATURE_POPCNT,  "POPCNT"},
        {0, 0}
 },
@@ -436,7 +514,7 @@ extfeature_map[] = {
 i386_cpu_info_t        *
 cpuid_info(void)
 {
-       /* Set-up the cpuid_indo stucture lazily */
+       /* Set-up the cpuid_info stucture lazily */
        if (cpuid_cpu_infop == NULL) {
                cpuid_set_info();
                cpuid_cpu_infop = &cpuid_cpu_info;
@@ -571,12 +649,12 @@ uint64_t
 cpuid_features(void)
 {
        static int checked = 0;
-       char    fpu_arg[16] = { 0 };
+       char    fpu_arg[20] = { 0 };
 
        (void) cpuid_info();
        if (!checked) {
                    /* check for boot-time fpu limitations */
-                       if (PE_parse_boot_arg("_fpu", &fpu_arg[0])) {
+                       if (PE_parse_boot_argn("_fpu", &fpu_arg[0], sizeof (fpu_arg))) {
                                printf("limiting fpu features to: %s\n", fpu_arg);
                                if (!strncmp("387", fpu_arg, sizeof("387")) || !strncmp("mmx", fpu_arg, sizeof("mmx"))) {
                                        printf("no sse or sse2\n");
index efc6bb239fcc76fbe6ff04d5ac7f00e1ce7e0aa5..34eed7b4df0b210f8d530a2ae35e654a17de4812 100644 (file)
 #define CPUID_FEATURE_CX16    _HBit(13)        /* CmpXchg16b instruction */
 #define CPUID_FEATURE_xTPR    _HBit(14)        /* Send Task PRiority msgs */
 #define CPUID_FEATURE_PDCM    _HBit(15)        /* Perf/Debug Capability MSR */
+#define CPUID_FEATURE_DCA     _HBit(18)        /* Direct Cache Access */
 #define CPUID_FEATURE_SSE4_1  _HBit(19)        /* Streaming SIMD extensions 4.1 */
-#define CPUID_FEATURE_SSE4_2  _HBit(20)        /* Streaming SIMD extensions 4.1 */
+#define CPUID_FEATURE_SSE4_2  _HBit(20)        /* Streaming SIMD extensions 4.2 */
+#define CPUID_FEATURE_xAPIC   _HBit(21)        /* Extended APIC Mode */
 #define CPUID_FEATURE_POPCNT  _HBit(23)        /* POPCNT instruction */
 
 /*
 #define        CPUID_CACHE_SIZE        16      /* Number of descriptor vales */
 
 #define        CPUID_CACHE_NULL           0x00 /* NULL */
-#define        CPUID_CACHE_ITLB_4K        0x01 /* Instruction TLB: 4K pages */
-#define        CPUID_CACHE_ITLB_4M        0x02 /* Instruction TLB: 4M pages */
-#define        CPUID_CACHE_DTLB_4K        0x03 /* Data TLB: 4K pages */
-#define        CPUID_CACHE_DTLB_4M        0x04 /* Data TLB: 4M pages */
-#define        CPUID_CACHE_ICACHE_8K      0x06 /* Instruction cache: 8K */
-#define        CPUID_CACHE_ICACHE_16K     0x08 /* Instruction cache: 16K */
-#define        CPUID_CACHE_DCACHE_8K      0x0A /* Data cache: 8K */
-#define        CPUID_CACHE_DCACHE_16K     0x0C /* Data cache: 16K */
+#define        CPUID_CACHE_ITLB_4K_32_4   0x01 /* Inst TLB: 4K pages, 32 ents, 4-way */
+#define        CPUID_CACHE_ITLB_4M_2      0x02 /* Inst TLB: 4M pages, 2 ents */
+#define        CPUID_CACHE_DTLB_4K_64_4   0x03 /* Data TLB: 4K pages, 64 ents, 4-way */
+#define        CPUID_CACHE_DTLB_4M_8_4    0x04 /* Data TLB: 4M pages, 8 ents, 4-way */
+#define        CPUID_CACHE_DTLB_4M_32_4   0x05 /* Data TLB: 4M pages, 32 ents, 4-way */
+#define        CPUID_CACHE_L1I_8K         0x06 /* Icache: 8K */
+#define        CPUID_CACHE_L1I_16K        0x08 /* Icache: 16K */
+#define        CPUID_CACHE_L1I_32K        0x09 /* Icache: 32K, 4-way, 64 bytes */
+#define        CPUID_CACHE_L1D_8K         0x0A /* Dcache: 8K */
+#define        CPUID_CACHE_L1D_16K        0x0C /* Dcache: 16K */
+#define        CPUID_CACHE_L1D_16K_4_32   0x0D /* Dcache: 16K, 4-way, 64 byte, ECC */
+#define CPUID_CACHE_L2_256K_8_64   0x21 /* L2: 256K, 8-way, 64 bytes */
 #define CPUID_CACHE_L3_512K        0x22 /* L3: 512K */
 #define CPUID_CACHE_L3_1M          0x23 /* L3: 1M */
 #define CPUID_CACHE_L3_2M          0x25 /* L3: 2M */
 #define CPUID_CACHE_L3_4M          0x29 /* L3: 4M */
-#define CPUID_CACHE_DCACHE_32K     0x2C /* Data cache: 32K, 8-way */
-#define CPUID_CACHE_ICACHE_32K     0x30 /* Instruction cache: 32K, 8-way */
+#define CPUID_CACHE_L1D_32K_8      0x2C /* Dcache: 32K, 8-way, 64 byte */
+#define CPUID_CACHE_L1I_32K_8      0x30 /* Icache: 32K, 8-way */
 #define CPUID_CACHE_L2_128K_S4     0x39 /* L2: 128K, 4-way, sectored */
 #define CPUID_CACHE_L2_128K_S2     0x3B /* L2: 128K, 2-way, sectored */
 #define CPUID_CACHE_L2_256K_S4     0x3C /* L2: 256K, 4-way, sectored */
 #define CPUID_CACHE_L2_12M_12_64   0x4C /* L2: 12M, 12-way, 64 bytes */
 #define CPUID_CACHE_L2_16M_16_64   0x4D /* L2: 16M, 16-way, 64 bytes */
 #define CPUID_CACHE_L2_6M_24_64    0x4E /* L2:  6M, 24-way, 64 bytes */
-#define CPUID_CACHE_ITLB_64        0x50 /* Instruction TLB: 64 entries */
-#define CPUID_CACHE_ITLB_128       0x51 /* Instruction TLB: 128 entries */
-#define CPUID_CACHE_ITLB_256       0x52 /* Instruction TLB: 256 entries */
+#define CPUID_CACHE_ITLB_64        0x50 /* Inst TLB: 64 entries */
+#define CPUID_CACHE_ITLB_128       0x51 /* Inst TLB: 128 entries */
+#define CPUID_CACHE_ITLB_256       0x52 /* Inst TLB: 256 entries */
+#define CPUID_CACHE_ITLB_4M2M_7    0x55 /* Inst TLB: 4M/2M, 7 entries */
 #define CPUID_CACHE_DTLB_4M_16_4   0x56 /* Data TLB: 4M, 16 entries, 4-way */
-#define CPUID_CACHE_DTLB_4K_16_4   0x56 /* Data TLB: 4K, 16 entries, 4-way */
+#define CPUID_CACHE_DTLB_4K_16_4   0x57 /* Data TLB: 4K, 16 entries, 4-way */
+#define CPUID_CACHE_DTLB_4M2M_32_4 0x5A /* Data TLB: 4M/2M, 32 entries */
 #define CPUID_CACHE_DTLB_64        0x5B /* Data TLB: 64 entries */
 #define CPUID_CACHE_DTLB_128       0x5C /* Data TLB: 128 entries */
 #define CPUID_CACHE_DTLB_256       0x5D /* Data TLB: 256 entries */
-#define        CPUID_CACHE_DCACHE_16K_8_64 0x60 /* Data cache: 16K, 8-way, 64 bytes */
-#define        CPUID_CACHE_DCACHE_8K_4_64  0x66 /* Data cache:  8K, 4-way, 64 bytes */
-#define        CPUID_CACHE_DCACHE_16K_4_64 0x67 /* Data cache: 16K, 4-way, 64 bytes */
-#define        CPUID_CACHE_DCACHE_32K_4_64 0x68 /* Data cache: 32K, 4-way, 64 bytes */
+#define        CPUID_CACHE_L1D_16K_8_64   0x60 /* Data cache: 16K, 8-way, 64 bytes */
+#define        CPUID_CACHE_L1D_8K_4_64    0x66 /* Data cache:  8K, 4-way, 64 bytes */
+#define        CPUID_CACHE_L1D_16K_4_64   0x67 /* Data cache: 16K, 4-way, 64 bytes */
+#define        CPUID_CACHE_L1D_32K_4_64   0x68 /* Data cache: 32K, 4-way, 64 bytes */
 #define CPUID_CACHE_TRACE_12K_8    0x70 /* Trace cache 12K-uop, 8-way */
 #define CPUID_CACHE_TRACE_16K_8    0x71 /* Trace cache 16K-uop, 8-way */
 #define CPUID_CACHE_TRACE_32K_8    0x72 /* Trace cache 32K-uop, 8-way */
 #define CPUID_CACHE_L2_1M_8_64     0x87 /* L2:   1M, 8-way, 64 bytes */
 #define CPUID_CACHE_ITLB_4K_128_4  0xB0 /* ITLB: 4KB, 128 entries, 4-way */
 #define CPUID_CACHE_ITLB_4M_4_4    0xB1 /* ITLB: 4MB,   4 entries, 4-way, or  */
-#define CPUID_CACHE_ITLB_2M_8_4    0xB1 /* ITLB: 2MB,   8 entries, 4-way */
+#define CPUID_CACHE_ITLB_2M_8_4    0xB1 /* ITLB: 2MB,   8 entries, 4-way, or  */
+#define CPUID_CACHE_ITLB_4M_8      0xB1 /* ITLB: 4MB,   8 entries */
+#define CPUID_CACHE_ITLB_4K_64_4   0xB2 /* ITLB: 4KB,  64 entries, 4-way */
 #define CPUID_CACHE_DTLB_4K_128_4  0xB3 /* DTLB: 4KB, 128 entries, 4-way */
 #define CPUID_CACHE_DTLB_4K_256_4  0xB4 /* DTLB: 4KB, 256 entries, 4-way */
+#define CPUID_CACHE_2TLB_4K_512_4  0xB4 /* 2nd-level TLB: 4KB, 512, 4-way */
+#define CPUID_CACHE_L3_512K_4_64   0xD0 /* L3: 512KB, 4-way, 64 bytes */
+#define CPUID_CACHE_L3_1M_4_64     0xD1 /* L3:    1M, 4-way, 64 bytes */
+#define CPUID_CACHE_L3_2M_4_64     0xD2 /* L3:    2M, 4-way, 64 bytes */
+#define CPUID_CACHE_L3_1M_8_64     0xD6 /* L3:    1M, 8-way, 64 bytes */
+#define CPUID_CACHE_L3_2M_8_64     0xD7 /* L3:    2M, 8-way, 64 bytes */
+#define CPUID_CACHE_L3_4M_8_64     0xD8 /* L3:    4M, 8-way, 64 bytes */
+#define CPUID_CACHE_L3_1M5_12_64   0xDC /* L3:  1.5M, 12-way, 64 bytes */
+#define CPUID_CACHE_L3_3M_12_64    0xDD /* L3:    3M, 12-way, 64 bytes */
+#define CPUID_CACHE_L3_6M_12_64    0xDE /* L3:    6M, 12-way, 64 bytes */
+#define CPUID_CACHE_L3_2M_16_64    0xE2 /* L3:    2M, 16-way, 64 bytes */
+#define CPUID_CACHE_L3_4M_16_64    0xE3 /* L3:    4M, 16-way, 64 bytes */
+#define CPUID_CACHE_L3_8M_16_64    0xE4 /* L3:    8M, 16-way, 64 bytes */
 #define CPUID_CACHE_PREFETCH_64    0xF0 /* 64-Byte Prefetching */
 #define CPUID_CACHE_PREFETCH_128   0xF1 /* 128-Byte Prefetching */
 
 #define CPUID_MWAIT_EXTENSION  _Bit(0) /* enumeration of WMAIT extensions */
 #define CPUID_MWAIT_BREAK      _Bit(1) /* interrupts are break events     */
 
+#define CPUID_MODEL_YONAH      14
+#define CPUID_MODEL_MEROM      15
+#define CPUID_MODEL_PENRYN     23
+#define CPUID_MODEL_NEHALEM    26
+
 #ifndef ASSEMBLER
 #include <stdint.h>
 #include <mach/mach_types.h>
@@ -297,6 +325,18 @@ typedef struct {
        /* Virtual and physical address aize: */
        uint32_t        cpuid_address_bits_physical;
        uint32_t        cpuid_address_bits_virtual;
+
+       uint32_t        cpuid_microcode_version;
+
+       /* Numbers of tlbs per processor */
+       uint32_t        cpuid_itlb_small;
+       uint32_t        cpuid_dtlb_small;
+       uint32_t        cpuid_itlb_large;
+       uint32_t        cpuid_dtlb_large;
+
+       uint32_t        core_count;
+       uint32_t        thread_count;
+
 } i386_cpu_info_t;
 
 #ifdef __cplusplus
index fda25c022cfb7f20a8a50a441f820c365caff1c6..99577bb30db1622653255f5eca23f3ab382e22bf 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -70,7 +70,6 @@
 #include <i386/eflags.h>
 #include <i386/trap.h>
 #include <i386/pmCPU.h>
-#include <i386/hpet.h>
 
 typedef        addr64_t        db_addr_t;      /* address - unsigned */
 typedef        uint64_t        db_expr_t;      /* expression */
@@ -172,9 +171,6 @@ extern void db_msr(db_expr_t addr, boolean_t have_addr, db_expr_t count,
                   char *modif);
 extern void db_apic(db_expr_t addr, boolean_t have_addr, db_expr_t count,
                    char *modif);
-extern void db_display_hpet(hpetReg_t *);
-extern void db_hpet(db_expr_t addr, boolean_t have_addr, db_expr_t count,
-                   char *modif);
 
 /* macros for printing OS server dependent task name */
 
@@ -203,7 +199,6 @@ extern void         kdb_on(
                                int                     cpu);
 
 #if MACH_KDB
-extern void db_getpmgr(pmData_t *pmj);
 extern void db_chkpmgr(void);
 #endif /* MACH_KDB */
 extern void db_pmgr(db_expr_t addr, int have_addr, db_expr_t count, char * modif);
index a723585bb75824d73b5f267f6e40f0ab9e8cd7cf..86b97b4bc146ae68d8ad6481553f8a9677dcd1a6 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -82,7 +82,6 @@
 #include <i386/cpuid.h>
 #include <i386/Diagnostics.h>
 #include <i386/pmCPU.h>
-#include <i386/hpet.h>
 #include <mach/i386/vm_param.h>
 #include <mach/i386/thread_status.h>
 #include <machine/commpage.h>
@@ -441,6 +440,9 @@ main(
        DECLARE("CPU_UBER_ARG_STORE_VALID",
                offsetof(cpu_data_t *, cpu_uber_arg_store_valid));
 
+       DECLARE("CPU_NANOTIME",
+               offsetof(cpu_data_t *, cpu_nanotime));
+
        DECLARE("CPU_DR7",
                offsetof(cpu_data_t *, cpu_dr7));
 
@@ -548,21 +550,6 @@ main(
 
        DECLARE("OnProc", OnProc);
 
-
-       DECLARE("GCAP_ID",              offsetof(hpetReg_t *, GCAP_ID));
-       DECLARE("GEN_CONF",             offsetof(hpetReg_t *, GEN_CONF));
-       DECLARE("GINTR_STA",    offsetof(hpetReg_t *, GINTR_STA));
-       DECLARE("MAIN_CNT",             offsetof(hpetReg_t *, MAIN_CNT));
-       DECLARE("TIM0_CONF",    offsetof(hpetReg_t *, TIM0_CONF));
-       DECLARE("TIM_CONF",             TIM_CONF);
-       DECLARE("Tn_INT_ENB_CNF",       Tn_INT_ENB_CNF);
-       DECLARE("TIM0_COMP",    offsetof(hpetReg_t *, TIM0_COMP));
-       DECLARE("TIM_COMP",             TIM_COMP);
-       DECLARE("TIM1_CONF",    offsetof(hpetReg_t *, TIM1_CONF));
-       DECLARE("TIM1_COMP",    offsetof(hpetReg_t *, TIM1_COMP));
-       DECLARE("TIM2_CONF",    offsetof(hpetReg_t *, TIM2_CONF));
-       DECLARE("TIM2_COMP",    offsetof(hpetReg_t *, TIM2_COMP));
-
 #if    CONFIG_DTRACE
        DECLARE("LS_LCK_MTX_LOCK_ACQUIRE", LS_LCK_MTX_LOCK_ACQUIRE);
        DECLARE("LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE", LS_LCK_MTX_TRY_SPIN_LOCK_ACQUIRE);
diff --git a/osfmk/i386/hpet.c b/osfmk/i386/hpet.c
deleted file mode 100644 (file)
index 940a7c6..0000000
+++ /dev/null
@@ -1,550 +0,0 @@
-/*
- * Copyright (c) 2005-2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-
-#include <string.h>
-#include <mach/vm_param.h>
-#include <mach/vm_prot.h>
-#include <mach/machine.h>
-#include <mach/time_value.h>
-#include <kern/spl.h>
-#include <kern/assert.h>
-#include <kern/debug.h>
-#include <kern/misc_protos.h>
-#include <kern/startup.h>
-#include <kern/clock.h>
-#include <kern/cpu_data.h>
-#include <kern/processor.h>
-#include <vm/vm_page.h>
-#include <vm/pmap.h>
-#include <vm/vm_kern.h>
-#include <i386/pmap.h>
-#include <i386/misc_protos.h>
-#include <i386/cpuid.h>
-#include <i386/mp.h>
-#include <i386/machine_cpu.h>
-#include <i386/machine_routines.h>
-#include <i386/io_map_entries.h>
-#include <architecture/i386/pio.h>
-#include <i386/cpuid.h>
-#include <i386/apic.h>
-#include <i386/tsc.h>
-#include <i386/hpet.h>
-#include <i386/pmCPU.h>
-#include <i386/cpu_topology.h>
-#include <i386/cpu_threads.h>
-#include <pexpert/device_tree.h>
-#if    MACH_KDB
-#include <i386/db_machdep.h>
-#endif
-#if    MACH_KDB
-#include <ddb/db_aout.h>
-#include <ddb/db_access.h>
-#include <ddb/db_sym.h>
-#include <ddb/db_variables.h>
-#include <ddb/db_command.h>
-#include <ddb/db_output.h>
-#include <ddb/db_expr.h>
-#endif /* MACH_KDB */
-#include <ddb/tr.h>
-
-/* Decimal powers: */
-#define kilo (1000ULL)
-#define Mega (kilo * kilo)
-#define Giga (kilo * Mega)
-#define Tera (kilo * Giga)
-#define Peta (kilo * Tera)
-
-uint32_t hpetArea = 0;                 
-uint32_t hpetAreap = 0;                        
-uint64_t hpetFemto = 0;
-uint64_t hpetFreq = 0;
-uint64_t hpetCvt = 0;                  /* (TAKE OUT LATER)  */
-uint64_t hpetCvtt2n = 0;
-uint64_t hpetCvtn2t = 0;
-uint64_t tsc2hpet = 0;
-uint64_t hpet2tsc = 0;
-uint64_t bus2hpet = 0;
-uint64_t hpet2bus = 0;
-
-uint32_t rcbaArea = 0;                 
-uint32_t rcbaAreap = 0;                        
-
-static int (*hpet_req)(uint32_t apicid, void *arg, hpetRequest_t *hpet) = NULL;
-static void *hpet_arg = NULL;
-
-#if DEBUG
-#define DBG(x...)      kprintf("DBG: " x)
-#else
-#define DBG(x...)
-#endif
-
-int
-hpet_register_callback(int (*hpet_reqst)(uint32_t apicid,
-                                        void *arg,
-                                        hpetRequest_t *hpet),
-                      void *arg)
-{
-    hpet_req = hpet_reqst;
-    hpet_arg = arg;
-    return(0);
-}
-
-/*
- * This routine is called to obtain an HPET and have it assigned
- * to a CPU.  It returns 0 if successful and non-zero if one could
- * not be assigned.
- */
-int
-hpet_request(uint32_t cpu)
-{
-    hpetRequest_t      hpetReq;
-    int                        rc;
-    x86_lcpu_t         *lcpu;
-    x86_core_t         *core;
-    x86_pkg_t          *pkg;
-    boolean_t          enabled;
-
-    if (hpet_req == NULL) {
-       return(-1);
-    }
-
-    /*
-     * Deal with the case where the CPU # passed in is past the
-     * value specified in cpus=n in boot-args.
-     */
-    if (cpu >= real_ncpus) {
-       enabled = ml_set_interrupts_enabled(FALSE);
-       lcpu = cpu_to_lcpu(cpu);
-       if (lcpu != NULL) {
-           core = lcpu->core;
-           pkg  = core->package;
-
-           if (lcpu->primary) {
-               pkg->flags |= X86PKG_FL_HAS_HPET;
-           }
-       }
-
-       ml_set_interrupts_enabled(enabled);
-       return(0);
-    }
-
-    rc = (*hpet_req)(ml_get_apicid(cpu), hpet_arg, &hpetReq);
-    if (rc != 0) {
-       return(rc);
-    }
-
-    enabled = ml_set_interrupts_enabled(FALSE);
-    lcpu = cpu_to_lcpu(cpu);
-    core = lcpu->core;
-    pkg  = core->package;
-
-    /*
-     * Compute the address of the HPET.
-     */
-    core->Hpet = (hpetTimer_t *)((uint8_t *)hpetArea + hpetReq.hpetOffset);
-    core->HpetVec = hpetReq.hpetVector;
-
-    /*
-     * Enable interrupts
-     */
-    core->Hpet->Config |= Tn_INT_ENB_CNF;
-
-    /*
-     * Save the configuration
-     */
-    core->HpetCfg = core->Hpet->Config;
-    core->HpetCmp = 0;
-
-    /*
-     * If the CPU is the "primary" for the package, then
-     * add the HPET to the package too.
-     */
-    if (lcpu->primary) {
-       pkg->Hpet = core->Hpet;
-       pkg->HpetCfg = core->HpetCfg;
-       pkg->HpetCmp = core->HpetCmp;
-       pkg->flags |= X86PKG_FL_HAS_HPET;
-    }
-
-    ml_set_interrupts_enabled(enabled);
-
-    return(0);
-}
-
-/*
- * Map the RCBA area.
- */
-static void
-map_rcbaArea(void)
-{
-       /*
-        * Get RCBA area physical address and map it
-        */
-       outl(cfgAdr, lpcCfg | (0xF0 & 0xFC));
-       rcbaAreap = inl(cfgDat | (0xF0 & 0x03));
-       rcbaArea = io_map_spec(rcbaAreap & -4096, PAGE_SIZE * 4, VM_WIMG_IO);
-       kprintf("RCBA: vaddr = %08X, paddr = %08X\n", rcbaArea, rcbaAreap);
-}
-
-/*
- * Initialize the HPET
- */
-void
-hpet_init(void)
-{
-       unsigned int    *xmod;
-
-       map_rcbaArea();
-
-       /*
-        * Is the HPET memory already enabled?
-        * If not, set address and enable.
-        */
-       xmod = (uint32_t *)(rcbaArea + 0x3404); /* Point to the HPTC */
-       uint32_t hptc = *xmod;                  /* Get HPET config */
-       DBG("    current RCBA.HPTC:  %08X\n", *xmod);
-       if(!(hptc & hptcAE)) {
-               DBG("HPET memory is not enabled, "
-                   "enabling and assigning to 0xFED00000 (hope that's ok)\n");
-               *xmod = (hptc & ~3) | hptcAE;
-       }
-
-       /*
-        * Get physical address of HPET and map it.
-        */
-       hpetAreap = hpetAddr | ((hptc & 3) << 12);
-       hpetArea = io_map_spec(hpetAreap & -4096, PAGE_SIZE * 4, VM_WIMG_IO);
-       kprintf("HPET: vaddr = %08X, paddr = %08X\n", hpetArea, hpetAreap);
-
-       /*
-        * Extract the HPET tick rate.
-        * The period of the HPET is reported in femtoseconds (10**-15s)
-        * and convert to frequency in hertz.
-        */
-       hpetFemto = (uint32_t)(((hpetReg_t *)hpetArea)->GCAP_ID >> 32);
-       hpetFreq = (1 * Peta) / hpetFemto;
-
-       /*
-        * The conversion factor is the number of nanoseconds per HPET tick
-        * with about 32 bits of fraction.  The value is converted to a
-        * base-2 fixed point number.  To convert from HPET to nanoseconds,
-        * multiply the value by the conversion factor using 96-bit arithmetic,
-        * then shift right 32 bits.  If the value is known to be small,
-        * 64-bit arithmetic will work.
-        */
-
-       /*
-        * Begin conversion of base 10 femtoseconds to base 2, calculate:
-        *  - HPET ticks to nanoseconds conversion in base 2 fraction (* 2**32)
-        *  - nanoseconds to HPET ticks conversion
-        */
-       hpetCvtt2n = (uint64_t)hpetFemto << 32;
-       hpetCvtt2n = hpetCvtt2n / 1000000ULL;
-       hpetCvtn2t = 0xFFFFFFFFFFFFFFFFULL / hpetCvtt2n;
-       kprintf("HPET: Frequency = %6d.%04dMHz, "
-               "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X\n",
-               (uint32_t)(hpetFreq / Mega), (uint32_t)(hpetFreq % Mega), 
-               (uint32_t)(hpetCvtt2n >> 32), (uint32_t)hpetCvtt2n,
-               (uint32_t)(hpetCvtn2t >> 32), (uint32_t)hpetCvtn2t);
-
-
-       /* (TAKE OUT LATER)
-        * Begin conversion of base 10 femtoseconds to base 2
-        * HPET ticks to nanoseconds in base 2 fraction (times 1048576)
-        */
-       hpetCvt = (uint64_t)hpetFemto << 20;
-       hpetCvt = hpetCvt / 1000000ULL;
-
-       /* Calculate conversion from TSC to HPET */
-       tsc2hpet = tmrCvt(tscFCvtt2n, hpetCvtn2t);
-       DBG(" CVT: TSC to HPET = %08X.%08X\n",
-           (uint32_t)(tsc2hpet >> 32), (uint32_t)tsc2hpet);
-
-       /* Calculate conversion from HPET to TSC */
-       hpet2tsc = tmrCvt(hpetCvtt2n, tscFCvtn2t);
-       DBG(" CVT: HPET to TSC = %08X.%08X\n",
-           (uint32_t)(hpet2tsc >> 32), (uint32_t)hpet2tsc);
-
-       /* Calculate conversion from BUS to HPET */
-       bus2hpet = tmrCvt(busFCvtt2n, hpetCvtn2t);
-       DBG(" CVT: BUS to HPET = %08X.%08X\n",
-           (uint32_t)(bus2hpet >> 32), (uint32_t)bus2hpet);
-
-       /* Calculate conversion from HPET to BUS */
-       hpet2bus = tmrCvt(hpetCvtt2n, busFCvtn2t);
-       DBG(" CVT: HPET to BUS = %08X.%08X\n",
-           (uint32_t)(hpet2bus >> 32), (uint32_t)hpet2bus);
-
-#if MACH_KDB
-       db_display_hpet((hpetReg_t *)hpetArea); /* (BRINGUP) */
-#endif
-}
-
-/*
- * This routine is used to get various information about the HPET
- * without having to export gobs of globals.  It fills in a data
- * structure with the info.
- */
-void
-hpet_get_info(hpetInfo_t *info)
-{
-    info->hpetCvtt2n = hpetCvtt2n;
-    info->hpetCvtn2t = hpetCvtn2t;
-    info->tsc2hpet   = tsc2hpet;
-    info->hpet2tsc   = hpet2tsc;
-    info->bus2hpet   = bus2hpet;
-    info->hpet2bus   = hpet2bus;
-    /*
-     * XXX
-     * We're repurposing the rcbaArea so we can use the HPET.
-     * Eventually we'll rename this correctly.
-     */
-    info->rcbaArea   = hpetArea;
-    info->rcbaAreap  = hpetAreap;
-}
-
-
-/*
- * This routine is called by the HPET driver
- * when it assigns an HPET timer to a processor.
- *
- * XXX with the new callback into the HPET driver,
- * this routine will be deprecated.
- */
-void
-ml_hpet_cfg(uint32_t cpu, uint32_t hpetVect)
-{
-       uint64_t        *hpetVaddr;
-       hpetTimer_t     *hpet;
-       x86_lcpu_t      *lcpu;
-       x86_core_t      *core;
-       x86_pkg_t       *pkg;
-       boolean_t       enabled;
-       
-       if(cpu > 1) {
-               panic("ml_hpet_cfg: invalid cpu = %d\n", cpu);
-       }
-
-       lcpu = cpu_to_lcpu(cpu);
-       core = lcpu->core;
-       pkg  = core->package;
-
-       /*
-        * Only deal with the primary CPU for the package.
-        */
-       if (!lcpu->primary)
-           return;
-
-       enabled = ml_set_interrupts_enabled(FALSE);
-
-       /* Calculate address of the HPET for this processor */
-       hpetVaddr = (uint64_t *)(((uint32_t)&(((hpetReg_t *)hpetArea)->TIM1_CONF)) + (cpu << 5));
-       hpet = (hpetTimer_t *)hpetVaddr;
-
-       DBG("ml_hpet_cfg: HPET for cpu %d at %p, vector = %d\n",
-            cpu, hpetVaddr, hpetVect);
-
-       /* Save the address and vector of the HPET for this processor */
-       core->Hpet = hpet;
-       core->HpetVec = hpetVect;
-
-       /*
-        * Enable interrupts
-        */
-       core->Hpet->Config |= Tn_INT_ENB_CNF;
-
-       /* Save the configuration */
-       core->HpetCfg = core->Hpet->Config;
-       core->HpetCmp = 0;
-
-       /*
-        * We're only doing this for the primary CPU, so go
-        * ahead and add the HPET to the package too.
-        */
-       pkg->Hpet = core->Hpet;
-       pkg->HpetVec = core->HpetVec;
-       pkg->HpetCfg = core->HpetCfg;
-       pkg->HpetCmp = core->HpetCmp;
-       pkg->flags |= X86PKG_FL_HAS_HPET;
-
-       ml_set_interrupts_enabled(enabled);
-}
-
-/*
- * This is the HPET interrupt handler.
- *
- * It just hands off to the power management code so that the
- * appropriate things get done there.
- */
-int
-HPETInterrupt(void)
-{
-
-       /* All we do here is to bump the count */
-       x86_package()->HpetInt++;
-
-       /*
-        * Let power management do it's thing.
-        */
-       pmHPETInterrupt();
-
-       /* Return and show that the 'rupt has been handled... */
-       return 1;
-}
-
-
-static hpetReg_t saved_hpet;
-
-void
-hpet_save(void)
-{
-       hpetReg_t       *from = (hpetReg_t *) hpetArea;
-       hpetReg_t       *to = &saved_hpet;
-
-       to->GEN_CONF  = from->GEN_CONF;
-       to->TIM0_CONF = from->TIM0_CONF;
-       to->TIM0_COMP = from->TIM0_COMP;
-       to->TIM1_CONF = from->TIM1_CONF;
-       to->TIM1_COMP = from->TIM1_COMP;
-       to->TIM2_CONF = from->TIM2_CONF;
-       to->TIM2_COMP = from->TIM2_COMP;
-       to->MAIN_CNT  = from->MAIN_CNT;
-}
-
-void
-hpet_restore(void)
-{
-       hpetReg_t       *from = &saved_hpet;
-       hpetReg_t       *to = (hpetReg_t *) hpetArea;
-
-       /*
-        * Is the HPET memory already enabled?
-        * If not, set address and enable.
-        */
-       uint32_t *hptcp = (uint32_t *)(rcbaArea + 0x3404);
-       uint32_t hptc = *hptcp;
-       if(!(hptc & hptcAE)) {
-               DBG("HPET memory is not enabled, "
-                   "enabling and assigning to 0xFED00000 (hope that's ok)\n");
-               *hptcp = (hptc & ~3) | hptcAE;
-       }
-
-       to->GEN_CONF  = from->GEN_CONF & ~1;
-
-       to->TIM0_CONF = from->TIM0_CONF;
-       to->TIM0_COMP = from->TIM0_COMP;
-       to->TIM1_CONF = from->TIM1_CONF;
-       to->TIM1_COMP = from->TIM1_COMP;
-       to->TIM2_CONF = from->TIM2_CONF;
-       to->TIM2_COMP = from->TIM2_COMP;
-       to->GINTR_STA = -1ULL;
-       to->MAIN_CNT  = from->MAIN_CNT;
-
-       to->GEN_CONF = from->GEN_CONF;
-}
-
-/*
- *      Read the HPET timer
- *
- */
-uint64_t
-rdHPET(void)
-{
-       hpetReg_t               *hpetp = (hpetReg_t *) hpetArea;
-       volatile uint32_t       *regp = (uint32_t *) &hpetp->MAIN_CNT;
-       uint32_t                high;
-       uint32_t                low;
-
-       do {
-               high = *(regp + 1);
-               low = *regp;
-       } while (high != *(regp + 1));
-
-       return (((uint64_t) high) << 32) | low;
-}
-
-#if MACH_KDB
-
-#define HI32(x)        ((uint32_t)(((x) >> 32) & 0xFFFFFFFF))
-#define LO32(x)        ((uint32_t)((x) & 0xFFFFFFFF))
-
-/*
- *     Displays HPET memory mapped area
- *     hp
- */
-void 
-db_hpet(__unused db_expr_t addr, __unused int have_addr, __unused db_expr_t count, __unused char *modif)
-{
-
-       db_display_hpet((hpetReg_t *) hpetArea);        /* Dump out the HPET
-                                                        * stuff */
-       return;
-}
-
-void
-db_display_hpet(hpetReg_t *hpt)
-{
-       uint64_t        cmain;
-
-       cmain = hpt->MAIN_CNT;  /* Get the main timer */
-
-       /* General capabilities */
-       db_printf("  GCAP_ID = %08X.%08X\n",
-                 HI32(hpt->GCAP_ID), LO32(hpt->GCAP_ID));
-       /* General configuration */
-       db_printf(" GEN_CONF = %08X.%08X\n",
-                 HI32(hpt->GEN_CONF), LO32(hpt->GEN_CONF));
-       /* General Interrupt status */
-       db_printf("GINTR_STA = %08X.%08X\n",
-                 HI32(hpt->GINTR_STA), LO32(hpt->GINTR_STA));
-       /* Main counter */
-       db_printf(" MAIN_CNT = %08X.%08X\n",
-                 HI32(cmain), LO32(cmain));
-       /* Timer 0 config and cap */
-       db_printf("TIM0_CONF = %08X.%08X\n",
-                 HI32(hpt->TIM0_CONF), LO32(hpt->TIM0_CONF));
-       /* Timer 0 comparator */
-       db_printf("TIM0_COMP = %08X.%08X\n",
-                 HI32(hpt->TIM0_COMP), LO32(hpt->TIM0_COMP));
-       /* Timer 1 config and cap */
-       db_printf("TIM0_CONF = %08X.%08X\n",
-                 HI32(hpt->TIM1_CONF), LO32(hpt->TIM1_CONF));
-       /* Timer 1 comparator */
-       db_printf("TIM1_COMP = %08X.%08X\n",
-                 HI32(hpt->TIM1_COMP), LO32(hpt->TIM1_COMP));
-       /* Timer 2 config and cap */
-       db_printf("TIM2_CONF = %08X.%08X\n",
-                 HI32(hpt->TIM2_CONF), LO32(hpt->TIM2_CONF));
-       /* Timer 2 comparator */
-       db_printf("TIM2_COMP = %08X.%08X\n",
-                 HI32(hpt->TIM2_COMP), LO32(hpt->TIM2_COMP));
-
-       db_printf("\nHPET Frequency = %d.%05dMHz\n",
-         (uint32_t) (hpetFreq / 1000000), (uint32_t) (hpetFreq % 1000000));
-}
-#endif
diff --git a/osfmk/i386/hpet.h b/osfmk/i386/hpet.h
deleted file mode 100644 (file)
index 72656d0..0000000
+++ /dev/null
@@ -1,126 +0,0 @@
-/*
- * Copyright (c) 2006 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifdef KERNEL_PRIVATE
-#ifndef _I386_HPET_H_
-#define _I386_HPET_H_
-
-/*
- * HPET kernel functions to support the HPET KEXT and the
- * power management KEXT.
- */
-
-
-/*
- *     Memory mapped registers for the HPET
- */
-typedef struct hpetReg {
-       uint64_t        GCAP_ID;                /* General capabilities */
-       uint64_t        rsv1;
-       uint64_t        GEN_CONF;               /* General configuration */
-       uint64_t        rsv2;
-       uint64_t        GINTR_STA;              /* General Interrupt status */
-       uint64_t        rsv3[25];
-       uint64_t        MAIN_CNT;               /* Main counter */
-       uint64_t        rsv4;
-       uint64_t        TIM0_CONF;              /* Timer 0 config and cap */
-#define                        TIM_CONF 0
-#define                        Tn_INT_ENB_CNF 4
-       uint64_t        TIM0_COMP;              /* Timer 0 comparator */
-#define                        TIM_COMP 8
-       uint64_t        rsv5[2];
-       uint64_t        TIM1_CONF;              /* Timer 1 config and cap */
-       uint64_t        TIM1_COMP;              /* Timer 1 comparator */
-       uint64_t        rsv6[2];
-       uint64_t        TIM2_CONF;              /* Timer 2 config and cap */
-       uint64_t        TIM2_COMP;              /* Timer 2 comparator */
-       uint64_t        rsv7[2];
-} hpetReg;
-typedef struct         hpetReg hpetReg_t;
-
-typedef struct hpetTimer {
-       uint64_t        Config;         /* Timer config and capabilities */
-       uint64_t        Compare;        /* Timer comparitor */
-} hpetTimer_t;
-
-struct hpetInfo
-{
-       uint64_t        hpetCvtt2n;
-       uint64_t        hpetCvtn2t;
-       uint64_t        tsc2hpet;
-       uint64_t        hpet2tsc;
-       uint64_t        bus2hpet;
-       uint64_t        hpet2bus;
-       uint32_t        rcbaArea;
-       uint32_t        rcbaAreap;
-};
-typedef struct hpetInfo hpetInfo_t;
-
-struct hpetRequest
-{
-       uint32_t        flags;
-       uint32_t        hpetOffset;
-       uint32_t        hpetVector;
-};
-typedef struct hpetRequest hpetRequest_t;
-
-#define HPET_REQFL_64BIT       0x00000001      /* Timer is 64 bits */
-
-extern uint64_t hpetFemto;
-extern uint64_t hpetFreq;
-extern uint64_t hpetCvtt2n;
-extern uint64_t hpetCvtn2t;
-extern uint64_t tsc2hpet;
-extern uint64_t hpet2tsc;
-extern uint64_t bus2hpet;
-extern uint64_t hpet2bus;
-
-extern uint32_t rcbaArea;                      
-extern uint32_t rcbaAreap;
-
-extern void map_rcbaAread(void);
-extern void hpet_init(void);
-
-extern void hpet_save(void);
-extern void hpet_restore(void);
-
-#ifdef XNU_KERNEL_PRIVATE
-extern int HPETInterrupt(void);
-#endif
-
-extern int hpet_register_callback(int (*hpet_reqst)(uint32_t apicid, void *arg, hpetRequest_t *hpet), void *arg);
-extern int hpet_request(uint32_t cpu);
-
-extern uint64_t rdHPET(void);
-extern void hpet_get_info(hpetInfo_t *info);
-
-#define hpetAddr       0xFED00000
-#define hptcAE                 0x80
-
-#endif /* _I386_HPET_H_ */
-
-#endif /* KERNEL_PRIVATE */
diff --git a/osfmk/i386/hw_defs.h b/osfmk/i386/hw_defs.h
deleted file mode 100644 (file)
index 0fac10f..0000000
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2005 Apple Computer, Inc. All rights reserved.
- *
- * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
- * 
- * This file contains Original Code and/or Modifications of Original Code
- * as defined in and that are subject to the Apple Public Source License
- * Version 2.0 (the 'License'). You may not use this file except in
- * compliance with the License. The rights granted to you under the License
- * may not be used to create, or enable the creation or redistribution of,
- * unlawful or unlicensed copies of an Apple operating system, or to
- * circumvent, violate, or enable the circumvention or violation of, any
- * terms of an Apple operating system software license agreement.
- * 
- * Please obtain a copy of the License at
- * http://www.opensource.apple.com/apsl/ and read it before using this file.
- * 
- * The Original Code and all software distributed under the License are
- * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
- * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
- * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
- * Please see the License for the specific language governing rights and
- * limitations under the License.
- * 
- * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
- */
-#ifndef _I386_HW_DEFS_H_
-#define _I386_HW_DEFS_H_
-
-
-#define pmMwaitC1      0x00
-#define pmMwaitC2      0x10
-#define pmMwaitC3      0x20
-#define pmMwaitC4      0x30
-#define pmMwaitBrInt 0x1
-
-#define pmBase                 0x400
-#define pmCtl1                 0x04
-#define pmCtl2                 0x20
-#define pmC3Res        0x54
-#define pmStatus       0x00
-#define msrTSC                 0x10
-
-#endif /* _I386_HW_DEFS_H_ */
index 67f0f28032dc98a8b8071bb31d9e3e463143e70b..8005189dd10d7091f631b0e48d36805d77521c98 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2003-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2003-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -91,7 +91,6 @@
 #include <i386/Diagnostics.h>
 #include <i386/pmCPU.h>
 #include <i386/tsc.h>
-#include <i386/hpet.h>
 #include <i386/locks.h> /* LcksOpts */
 #if    MACH_KDB
 #include <ddb/db_aout.h>
@@ -153,11 +152,11 @@ i386_init(vm_offset_t boot_args_start)
        /* setup debugging output if one has been chosen */
        PE_init_kprintf(FALSE);
 
-       if (!PE_parse_boot_arg("diag", &dgWork.dgFlags))
+       if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags)))
                dgWork.dgFlags = 0;
 
        serialmode = 0;
-       if(PE_parse_boot_arg("serial", &serialmode)) {
+       if(PE_parse_boot_argn("serial", &serialmode, sizeof (serialmode))) {
                /* We want a serial keyboard and/or console */
                kprintf("Serial mode specified: %08X\n", serialmode);
        }
@@ -172,12 +171,12 @@ i386_init(vm_offset_t boot_args_start)
        kprintf("version_variant = %s\n", version_variant);
        kprintf("version         = %s\n", version);
        
-       if (!PE_parse_boot_arg("maxmem", &maxmem))
-               maxmemtouse=0;
+       if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem)))
+               maxmemtouse = 0;
        else
                maxmemtouse = ((uint64_t)maxmem) * (uint64_t)(1024 * 1024);
 
-       if (PE_parse_boot_arg("cpus", &cpus)) {
+       if (PE_parse_boot_argn("cpus", &cpus, sizeof (cpus))) {
                if ((0 < cpus) && (cpus < max_ncpus))
                         max_ncpus = cpus;
        }
@@ -185,7 +184,7 @@ i386_init(vm_offset_t boot_args_start)
        /*
         * debug support for > 4G systems
         */
-       if (!PE_parse_boot_arg("himemory_mode", &vm_himemory_mode))
+       if (!PE_parse_boot_argn("himemory_mode", &vm_himemory_mode, sizeof (vm_himemory_mode)))
                vm_himemory_mode = 0;
 
        if (!PE_parse_boot_argn("immediate_NMI", &fidn, sizeof (fidn)))
@@ -200,7 +199,7 @@ i386_init(vm_offset_t boot_args_start)
        boolean_t IA32e = FALSE;
        if (cpuid_extfeatures() & CPUID_EXTFEATURE_EM64T) {
                kprintf("EM64T supported");
-               if (PE_parse_boot_arg("-legacy", &legacy_mode)) {
+               if (PE_parse_boot_argn("-legacy", &legacy_mode, sizeof (legacy_mode))) {
                        kprintf(" but legacy mode forced\n");
                } else {
                        IA32e = TRUE;
@@ -212,7 +211,7 @@ i386_init(vm_offset_t boot_args_start)
                nx_enabled = 0;
 
        /* Obtain "lcks" options:this currently controls lock statistics */
-       if (!PE_parse_boot_arg("lcks", &LcksOpts))
+       if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts)))
                LcksOpts = 0;
 
        /*   
@@ -221,11 +220,10 @@ i386_init(vm_offset_t boot_args_start)
         */
        i386_vm_init(maxmemtouse, IA32e, kernelBootArgs);
 
-       if ( ! PE_parse_boot_arg("novmx", &noVMX))
+       if ( ! PE_parse_boot_argn("novmx", &noVMX, sizeof (noVMX)))
                noVMX = 0;      /* OK to support Altivec in rosetta? */
 
        tsc_init();
-       hpet_init();
        power_management_init();
 
        PE_init_platform(TRUE, kernelBootArgs);
index d8ffd44e40ac3471a23454715bbc5c8a14980f43..b5153c73dbfd25907f0c42464f59ef4f513c727e 100644 (file)
@@ -421,6 +421,7 @@ LEAF_ENTRY(hw_lock_to)
        mov     %edx,%edi
 
        rdtsc                           /* read cyclecount into %edx:%eax */
+       lfence
        addl    %ecx,%eax               /* fetch and timeout */
        adcl    $0,%edx                 /* add carry */
        mov     %edx,%ecx
@@ -442,6 +443,7 @@ LEAF_ENTRY(hw_lock_to)
         * Here after spinning INNER_LOOP_COUNT times, check for timeout
         */
        rdtsc                           /* cyclecount into %edx:%eax */
+       lfence
        cmpl    %ecx,%edx               /* compare high-order 32-bits */
        jb      4b                      /* continue spinning if less, or */
        cmpl    %ebx,%eax               /* compare low-order 32-bits */ 
index 9123aa771e0ac65523075919a9e03ab29f54599c..797022979c4aa14157de2c5699fb738940344257 100644 (file)
@@ -196,7 +196,7 @@ i386_vm_init(uint64_t       maxmem,
         * Compute the memory size.
         */
 
-       if ((1 == vm_himemory_mode) || PE_parse_boot_arg("-x", &safeboot)) {
+       if ((1 == vm_himemory_mode) || PE_parse_boot_argn("-x", &safeboot, sizeof (safeboot))) {
                maxpg = 1 << (32 - I386_PGSHIFT);
        }
        avail_remaining = 0;
@@ -440,12 +440,12 @@ i386_vm_init(uint64_t     maxmem,
 
        kprintf("Physical memory %llu MB\n", sane_size/MEG);
 
-       if (!PE_parse_boot_arg("max_valid_dma_addr", &maxdmaaddr))
+       if (!PE_parse_boot_argn("max_valid_dma_addr", &maxdmaaddr, sizeof (maxdmaaddr)))
                max_valid_dma_address = 1024ULL * 1024ULL * 4096ULL;
        else
                max_valid_dma_address = ((uint64_t) maxdmaaddr) * 1024ULL * 1024ULL;
 
-       if (!PE_parse_boot_arg("maxbouncepool", &maxbouncepoolsize))
+       if (!PE_parse_boot_argn("maxbouncepool", &maxbouncepoolsize, sizeof (maxbouncepoolsize)))
                maxbouncepoolsize = MAXBOUNCEPOOL;
        else
                maxbouncepoolsize = maxbouncepoolsize * (1024 * 1024);
@@ -455,7 +455,7 @@ i386_vm_init(uint64_t       maxmem,
         * in order to correctly determine the size of the mbuf pool
         * that will be reserved
         */
-       if (!PE_parse_boot_arg("maxloreserve", &maxloreserve))
+       if (!PE_parse_boot_argn("maxloreserve", &maxloreserve, sizeof (maxloreserve)))
                maxloreserve = MAXLORESERVE + bsd_mbuf_cluster_reserve();
        else
                maxloreserve = maxloreserve * (1024 * 1024);
diff --git a/osfmk/i386/lapic.c b/osfmk/i386/lapic.c
new file mode 100644 (file)
index 0000000..1dd1212
--- /dev/null
@@ -0,0 +1,597 @@
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ */
+
+#include <mach/mach_types.h>
+#include <mach/kern_return.h>
+
+#include <kern/kern_types.h>
+#include <kern/cpu_number.h>
+#include <kern/cpu_data.h>
+#include <kern/assert.h>
+#include <kern/machine.h>
+
+#include <vm/vm_map.h>
+#include <vm/vm_kern.h>
+
+#include <i386/lapic.h>
+#include <i386/cpuid.h>
+#include <i386/proc_reg.h>
+#include <i386/machine_cpu.h>
+#include <i386/misc_protos.h>
+#include <i386/mp.h>
+#include <i386/mtrr.h>
+#include <i386/postcode.h>
+#include <i386/cpu_threads.h>
+#include <i386/trap.h>
+#include <i386/machine_routines.h>
+#include <i386/machine_check.h>
+
+#if MACH_KDB
+#include <machine/db_machdep.h>
+#endif
+
+#include <sys/kdebug.h>
+
+#if    MP_DEBUG
+#define PAUSE          delay(1000000)
+#define DBG(x...)      kprintf(x)
+#else
+#define DBG(x...)
+#define PAUSE
+#endif /* MP_DEBUG */
+
+/* Initialize lapic_id so cpu_number() works on non SMP systems */
+unsigned long  lapic_id_initdata = 0;
+unsigned long  lapic_id = (unsigned long)&lapic_id_initdata;
+vm_offset_t    lapic_start;
+
+static i386_intr_func_t        lapic_intr_func[LAPIC_FUNC_TABLE_SIZE];
+
+/* TRUE if local APIC was enabled by the OS not by the BIOS */
+static boolean_t lapic_os_enabled = FALSE;
+
+/* Base vector for local APIC interrupt sources */
+int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
+
+int            lapic_to_cpu[MAX_CPUS];
+int            cpu_to_lapic[MAX_CPUS];
+
+static void
+lapic_cpu_map_init(void)
+{
+       int     i;
+
+       for (i = 0; i < MAX_CPUS; i++) {
+               lapic_to_cpu[i] = -1;
+               cpu_to_lapic[i] = -1;
+       }
+}
+
+void
+lapic_cpu_map(int apic_id, int cpu)
+{
+       cpu_to_lapic[cpu] = apic_id;
+       lapic_to_cpu[apic_id] = cpu;
+}
+
+/*
+ * Retrieve the local apic ID a cpu.
+ *
+ * Returns the local apic ID for the given processor.
+ * If the processor does not exist or apic not configured, returns -1.
+ */
+
+uint32_t
+ml_get_apicid(uint32_t cpu)
+{
+       if(cpu >= (uint32_t)MAX_CPUS)
+               return 0xFFFFFFFF;      /* Return -1 if cpu too big */
+       
+       /* Return the apic ID (or -1 if not configured) */
+       return (uint32_t)cpu_to_lapic[cpu];
+
+}
+
+#ifdef MP_DEBUG
+static void
+lapic_cpu_map_dump(void)
+{
+       int     i;
+
+       for (i = 0; i < MAX_CPUS; i++) {
+               if (cpu_to_lapic[i] == -1)
+                       continue;
+               kprintf("cpu_to_lapic[%d]: %d\n",
+                       i, cpu_to_lapic[i]);
+       }
+       for (i = 0; i < MAX_CPUS; i++) {
+               if (lapic_to_cpu[i] == -1)
+                       continue;
+               kprintf("lapic_to_cpu[%d]: %d\n",
+                       i, lapic_to_cpu[i]);
+       }
+}
+#endif /* MP_DEBUG */
+
+void
+lapic_init(void)
+{
+       int             result;
+       vm_map_entry_t  entry;
+       uint32_t        lo;
+       uint32_t        hi;
+       boolean_t       is_boot_processor;
+       boolean_t       is_lapic_enabled;
+       vm_offset_t     lapic_base;
+
+       /* Examine the local APIC state */
+       rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+       is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
+       is_lapic_enabled  = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
+       lapic_base = (lo &  MSR_IA32_APIC_BASE_BASE);
+       kprintf("MSR_IA32_APIC_BASE 0x%x %s %s\n", lapic_base,
+               is_lapic_enabled ? "enabled" : "disabled",
+               is_boot_processor ? "BSP" : "AP");
+       if (!is_boot_processor || !is_lapic_enabled)
+               panic("Unexpected local APIC state\n");
+
+       /* Establish a map to the local apic */
+       lapic_start = vm_map_min(kernel_map);
+       result = vm_map_find_space(kernel_map,
+                                  (vm_map_address_t *) &lapic_start,
+                                  round_page(LAPIC_SIZE), 0,
+                                  VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
+       if (result != KERN_SUCCESS) {
+               panic("smp_init: vm_map_find_entry FAILED (err=%d)", result);
+       }
+       vm_map_unlock(kernel_map);
+/* Map in the local APIC non-cacheable, as recommended by Intel
+ * in section 8.4.1 of the "System Programming Guide".
+ */
+       pmap_enter(pmap_kernel(),
+                       lapic_start,
+                       (ppnum_t) i386_btop(lapic_base),
+                       VM_PROT_READ|VM_PROT_WRITE,
+                       VM_WIMG_IO,
+                       TRUE);
+       lapic_id = (unsigned long)(lapic_start + LAPIC_ID);
+
+       if ((LAPIC_READ(VERSION)&LAPIC_VERSION_MASK) < 0x14) {
+               printf("Local APIC version 0x%x, 0x14 or greater expected\n",
+                       (LAPIC_READ(VERSION)&LAPIC_VERSION_MASK));
+       }
+
+       /* Set up the lapic_id <-> cpu_number map and add this boot processor */
+       lapic_cpu_map_init();
+       lapic_cpu_map((LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0);
+       kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]);
+}
+
+
+static int
+lapic_esr_read(void)
+{
+       /* write-read register */
+       LAPIC_WRITE(ERROR_STATUS, 0);
+       return LAPIC_READ(ERROR_STATUS);
+}
+
+static void 
+lapic_esr_clear(void)
+{
+       LAPIC_WRITE(ERROR_STATUS, 0);
+       LAPIC_WRITE(ERROR_STATUS, 0);
+}
+
+static const char *DM_str[8] = {
+       "Fixed",
+       "Lowest Priority",
+       "Invalid",
+       "Invalid",
+       "NMI",
+       "Reset",
+       "Invalid",
+       "ExtINT"};
+
+void
+lapic_dump(void)
+{
+       int     i;
+
+#define BOOL(a) ((a)?' ':'!')
+#define VEC(lvt) \
+       LAPIC_READ(lvt)&LAPIC_LVT_VECTOR_MASK
+#define        DS(lvt) \
+       (LAPIC_READ(lvt)&LAPIC_LVT_DS_PENDING)?" SendPending" : "Idle"
+#define DM(lvt) \
+       DM_str[(LAPIC_READ(lvt)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK]
+#define MASK(lvt) \
+       BOOL(LAPIC_READ(lvt)&LAPIC_LVT_MASKED)
+#define TM(lvt) \
+       (LAPIC_READ(lvt)&LAPIC_LVT_TM_LEVEL)? "Level" : "Edge"
+#define IP(lvt) \
+       (LAPIC_READ(lvt)&LAPIC_LVT_IP_PLRITY_LOW)? "Low " : "High"
+
+       kprintf("LAPIC %d at 0x%x version 0x%x\n", 
+               (LAPIC_READ(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK,
+               lapic_start,
+               LAPIC_READ(VERSION)&LAPIC_VERSION_MASK);
+       kprintf("Priorities: Task 0x%x  Arbitration 0x%x  Processor 0x%x\n",
+               LAPIC_READ(TPR)&LAPIC_TPR_MASK,
+               LAPIC_READ(APR)&LAPIC_APR_MASK,
+               LAPIC_READ(PPR)&LAPIC_PPR_MASK);
+       kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
+               LAPIC_READ(DFR)>>LAPIC_DFR_SHIFT,
+               LAPIC_READ(LDR)>>LAPIC_LDR_SHIFT);
+       kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
+               BOOL(LAPIC_READ(SVR)&LAPIC_SVR_ENABLE),
+               BOOL(!(LAPIC_READ(SVR)&LAPIC_SVR_FOCUS_OFF)),
+               LAPIC_READ(SVR) & LAPIC_SVR_MASK);
+       kprintf("LVT_TIMER:   Vector 0x%02x %s %cmasked %s\n",
+               VEC(LVT_TIMER),
+               DS(LVT_TIMER),
+               MASK(LVT_TIMER),
+               (LAPIC_READ(LVT_TIMER)&LAPIC_LVT_PERIODIC)?"Periodic":"OneShot");
+       kprintf("  Initial Count: 0x%08x \n", LAPIC_READ(TIMER_INITIAL_COUNT));
+       kprintf("  Current Count: 0x%08x \n", LAPIC_READ(TIMER_CURRENT_COUNT));
+       kprintf("  Divide Config: 0x%08x \n", LAPIC_READ(TIMER_DIVIDE_CONFIG));
+       kprintf("LVT_PERFCNT: Vector 0x%02x [%s] %s %cmasked\n",
+               VEC(LVT_PERFCNT),
+               DM(LVT_PERFCNT),
+               DS(LVT_PERFCNT),
+               MASK(LVT_PERFCNT));
+       kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n",
+               VEC(LVT_THERMAL),
+               DM(LVT_THERMAL),
+               DS(LVT_THERMAL),
+               MASK(LVT_THERMAL));
+       kprintf("LVT_LINT0:   Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
+               VEC(LVT_LINT0),
+               DM(LVT_LINT0),
+               TM(LVT_LINT0),
+               IP(LVT_LINT0),
+               DS(LVT_LINT0),
+               MASK(LVT_LINT0));
+       kprintf("LVT_LINT1:   Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
+               VEC(LVT_LINT1),
+               DM(LVT_LINT1),
+               TM(LVT_LINT1),
+               IP(LVT_LINT1),
+               DS(LVT_LINT1),
+               MASK(LVT_LINT1));
+       kprintf("LVT_ERROR:   Vector 0x%02x %s %cmasked\n",
+               VEC(LVT_ERROR),
+               DS(LVT_ERROR),
+               MASK(LVT_ERROR));
+       kprintf("ESR: %08x \n", lapic_esr_read());
+       kprintf("       ");
+       for(i=0xf; i>=0; i--)
+               kprintf("%x%x%x%x",i,i,i,i);
+       kprintf("\n");
+       kprintf("TMR: 0x");
+       for(i=7; i>=0; i--)
+               kprintf("%08x",LAPIC_READ_OFFSET(TMR_BASE, i*0x10));
+       kprintf("\n");
+       kprintf("IRR: 0x");
+       for(i=7; i>=0; i--)
+               kprintf("%08x",LAPIC_READ_OFFSET(IRR_BASE, i*0x10));
+       kprintf("\n");
+       kprintf("ISR: 0x");
+       for(i=7; i >= 0; i--)
+               kprintf("%08x",LAPIC_READ_OFFSET(ISR_BASE, i*0x10));
+       kprintf("\n");
+}
+
+#if MACH_KDB
+/*
+ *     Displays apic junk
+ *
+ *     da
+ */
+void 
+db_apic(__unused db_expr_t addr,
+       __unused int have_addr,
+       __unused db_expr_t count,
+       __unused char *modif)
+{
+
+       lapic_dump();
+
+       return;
+}
+
+#endif
+
+boolean_t
+lapic_probe(void)
+{
+       uint32_t        lo;
+       uint32_t        hi;
+
+       if (cpuid_features() & CPUID_FEATURE_APIC)
+               return TRUE;
+
+       if (cpuid_family() == 6 || cpuid_family() == 15) {
+               /*
+                * Mobile Pentiums:
+                * There may be a local APIC which wasn't enabled by BIOS.
+                * So we try to enable it explicitly.
+                */
+               rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+               lo &= ~MSR_IA32_APIC_BASE_BASE;
+               lo |= MSR_IA32_APIC_BASE_ENABLE | LAPIC_START;
+               lo |= MSR_IA32_APIC_BASE_ENABLE;
+               wrmsr(MSR_IA32_APIC_BASE, lo, hi);
+
+               /*
+                * Re-initialize cpu features info and re-check.
+                */
+               cpuid_set_info();
+               if (cpuid_features() & CPUID_FEATURE_APIC) {
+                       printf("Local APIC discovered and enabled\n");
+                       lapic_os_enabled = TRUE;
+                       lapic_interrupt_base = LAPIC_REDUCED_INTERRUPT_BASE;
+                       return TRUE;
+               }
+       }
+
+       return FALSE;
+}
+
+void
+lapic_shutdown(void)
+{
+       uint32_t lo;
+       uint32_t hi;
+       uint32_t value;
+
+       /* Shutdown if local APIC was enabled by OS */
+       if (lapic_os_enabled == FALSE)
+               return;
+
+       mp_disable_preemption();
+
+       /* ExtINT: masked */
+       if (get_cpu_number() == master_cpu) {
+               value = LAPIC_READ(LVT_LINT0);
+               value |= LAPIC_LVT_MASKED;
+               LAPIC_WRITE(LVT_LINT0, value);
+       }
+
+       /* Timer: masked */
+       LAPIC_WRITE(LVT_TIMER, LAPIC_READ(LVT_TIMER) | LAPIC_LVT_MASKED);
+
+       /* Perfmon: masked */
+       LAPIC_WRITE(LVT_PERFCNT, LAPIC_READ(LVT_PERFCNT) | LAPIC_LVT_MASKED);
+
+       /* Error: masked */
+       LAPIC_WRITE(LVT_ERROR, LAPIC_READ(LVT_ERROR) | LAPIC_LVT_MASKED);
+
+       /* APIC software disabled */
+       LAPIC_WRITE(SVR, LAPIC_READ(SVR) & ~LAPIC_SVR_ENABLE);
+
+       /* Bypass the APIC completely and update cpu features */
+       rdmsr(MSR_IA32_APIC_BASE, lo, hi);
+       lo &= ~MSR_IA32_APIC_BASE_ENABLE;
+       wrmsr(MSR_IA32_APIC_BASE, lo, hi);
+       cpuid_set_info();
+
+       mp_enable_preemption();
+}
+
+void
+lapic_configure(void)
+{
+       int     value;
+
+       /* Set flat delivery model, logical processor id */
+       LAPIC_WRITE(DFR, LAPIC_DFR_FLAT);
+       LAPIC_WRITE(LDR, (get_cpu_number()) << LAPIC_LDR_SHIFT);
+
+       /* Accept all */
+       LAPIC_WRITE(TPR, 0);
+
+       LAPIC_WRITE(SVR, LAPIC_VECTOR(SPURIOUS) | LAPIC_SVR_ENABLE);
+
+       /* ExtINT */
+       if (get_cpu_number() == master_cpu) {
+               value = LAPIC_READ(LVT_LINT0);
+               value &= ~LAPIC_LVT_MASKED;
+               value |= LAPIC_LVT_DM_EXTINT;
+               LAPIC_WRITE(LVT_LINT0, value);
+       }
+
+       /* Timer: unmasked, one-shot */
+       LAPIC_WRITE(LVT_TIMER, LAPIC_VECTOR(TIMER));
+
+       /* Perfmon: unmasked */
+       LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
+
+       /* Thermal: unmasked */
+       LAPIC_WRITE(LVT_THERMAL, LAPIC_VECTOR(THERMAL));
+
+       lapic_esr_clear();
+
+       LAPIC_WRITE(LVT_ERROR, LAPIC_VECTOR(ERROR));
+}
+
+void
+lapic_set_timer(
+       boolean_t               interrupt,
+       lapic_timer_mode_t      mode,
+       lapic_timer_divide_t    divisor,
+       lapic_timer_count_t     initial_count)
+{
+       boolean_t       state;
+       uint32_t        timer_vector;
+
+       state = ml_set_interrupts_enabled(FALSE);
+       timer_vector = LAPIC_READ(LVT_TIMER);
+       timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);;
+       timer_vector |= interrupt ? 0 : LAPIC_LVT_MASKED;
+       timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
+       LAPIC_WRITE(LVT_TIMER, timer_vector);
+       LAPIC_WRITE(TIMER_DIVIDE_CONFIG, divisor);
+       LAPIC_WRITE(TIMER_INITIAL_COUNT, initial_count);
+       ml_set_interrupts_enabled(state);
+}
+
+void
+lapic_get_timer(
+       lapic_timer_mode_t      *mode,
+       lapic_timer_divide_t    *divisor,
+       lapic_timer_count_t     *initial_count,
+       lapic_timer_count_t     *current_count)
+{
+       boolean_t       state;
+
+       state = ml_set_interrupts_enabled(FALSE);
+       if (mode)
+               *mode = (LAPIC_READ(LVT_TIMER) & LAPIC_LVT_PERIODIC) ?
+                               periodic : one_shot;
+       if (divisor)
+               *divisor = LAPIC_READ(TIMER_DIVIDE_CONFIG) & LAPIC_TIMER_DIVIDE_MASK;
+       if (initial_count)
+               *initial_count = LAPIC_READ(TIMER_INITIAL_COUNT);
+       if (current_count)
+               *current_count = LAPIC_READ(TIMER_CURRENT_COUNT);
+       ml_set_interrupts_enabled(state);
+} 
+
+static inline void
+_lapic_end_of_interrupt(void)
+{
+       LAPIC_WRITE(EOI, 0);
+}
+
+void
+lapic_end_of_interrupt(void)
+{
+       _lapic_end_of_interrupt();
+}
+
+void
+lapic_set_intr_func(int vector, i386_intr_func_t func)
+{
+       if (vector > lapic_interrupt_base)
+               vector -= lapic_interrupt_base;
+
+       switch (vector) {
+       case LAPIC_NMI_INTERRUPT:
+       case LAPIC_INTERPROCESSOR_INTERRUPT:
+       case LAPIC_TIMER_INTERRUPT:
+       case LAPIC_THERMAL_INTERRUPT:
+       case LAPIC_PERFCNT_INTERRUPT:
+               lapic_intr_func[vector] = func;
+               break;
+       default:
+               panic("lapic_set_intr_func(%d,%p) invalid vector\n",
+                       vector, func);
+       }
+}
+
+int
+lapic_interrupt(int interrupt, x86_saved_state_t *state)
+{
+       int     retval = 0;
+
+       interrupt -= lapic_interrupt_base;
+       if (interrupt < 0) {
+               if (interrupt == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base) &&
+                   lapic_intr_func[LAPIC_NMI_INTERRUPT] != NULL) {
+                       retval = (*lapic_intr_func[LAPIC_NMI_INTERRUPT])(state);
+                       _lapic_end_of_interrupt();
+                       return retval;
+               }
+               else
+                       return 0;
+       }
+
+       switch(interrupt) {
+       case LAPIC_TIMER_INTERRUPT:
+       case LAPIC_THERMAL_INTERRUPT:
+       case LAPIC_INTERPROCESSOR_INTERRUPT:
+               if (lapic_intr_func[interrupt] != NULL)
+                       (void) (*lapic_intr_func[interrupt])(state);
+               if (interrupt == LAPIC_PERFCNT_INTERRUPT)
+                       LAPIC_WRITE(LVT_PERFCNT, LAPIC_VECTOR(PERFCNT));
+               _lapic_end_of_interrupt();
+               retval = 1;
+               break;
+       case LAPIC_ERROR_INTERRUPT:
+               lapic_dump();
+               panic("Local APIC error\n");
+               _lapic_end_of_interrupt();
+               retval = 1;
+               break;
+       case LAPIC_SPURIOUS_INTERRUPT:
+               kprintf("SPIV\n");
+               /* No EOI required here */
+               retval = 1;
+               break;
+       }
+
+       return retval;
+}
+
+void
+lapic_smm_restore(void)
+{
+       boolean_t state;
+
+       if (lapic_os_enabled == FALSE)
+               return;
+
+       state = ml_set_interrupts_enabled(FALSE);
+
+       if (LAPIC_ISR_IS_SET(LAPIC_REDUCED_INTERRUPT_BASE, TIMER)) {
+               /*
+                * Bogus SMI handler enables interrupts but does not know about
+                * local APIC interrupt sources. When APIC timer counts down to
+                * zero while in SMM, local APIC will end up waiting for an EOI
+                * but no interrupt was delivered to the OS.
+                */
+               _lapic_end_of_interrupt();
+
+               /*
+                * timer is one-shot, trigger another quick countdown to trigger
+                * another timer interrupt.
+                */
+               if (LAPIC_READ(TIMER_CURRENT_COUNT) == 0) {
+                       LAPIC_WRITE(TIMER_INITIAL_COUNT, 1);
+               }
+
+               kprintf("lapic_smm_restore\n");
+       }
+
+       ml_set_interrupts_enabled(state);
+}
+
diff --git a/osfmk/i386/lapic.h b/osfmk/i386/lapic.h
new file mode 100644 (file)
index 0000000..4fa8556
--- /dev/null
@@ -0,0 +1,253 @@
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+/*
+ * @OSF_COPYRIGHT@
+ * 
+ */
+#ifndef _I386_LAPIC_H_
+#define _I386_LAPIC_H_
+
+#define LAPIC_START                    0xFEE00000
+#define LAPIC_SIZE                     0x00000400
+
+#define LAPIC_ID                       0x00000020
+#define                LAPIC_ID_SHIFT          24
+#define                LAPIC_ID_MASK           0xFF
+#define LAPIC_VERSION                  0x00000030
+#define                LAPIC_VERSION_MASK      0xFF
+#define LAPIC_TPR                      0x00000080
+#define                LAPIC_TPR_MASK          0xFF
+#define LAPIC_APR                      0x00000090
+#define                LAPIC_APR_MASK          0xFF
+#define LAPIC_PPR                      0x000000A0
+#define                LAPIC_PPR_MASK          0xFF
+#define LAPIC_EOI                      0x000000B0
+#define LAPIC_REMOTE_READ              0x000000C0
+#define LAPIC_LDR                      0x000000D0
+#define                LAPIC_LDR_SHIFT         24
+#define LAPIC_DFR                      0x000000E0
+#define                LAPIC_DFR_FLAT          0xFFFFFFFF
+#define                LAPIC_DFR_CLUSTER       0x0FFFFFFF
+#define                LAPIC_DFR_SHIFT         28
+#define LAPIC_SVR                      0x000000F0
+#define                LAPIC_SVR_MASK          0x0FF
+#define                LAPIC_SVR_ENABLE        0x100
+#define                LAPIC_SVR_FOCUS_OFF     0x200
+#define LAPIC_ISR_BASE                 0x00000100
+#define LAPIC_TMR_BASE                 0x00000180
+#define LAPIC_IRR_BASE                 0x00000200
+#define LAPIC_ERROR_STATUS             0x00000280
+#define LAPIC_ICR                      0x00000300
+#define                LAPIC_ICR_VECTOR_MASK   0x000FF
+#define                LAPIC_ICR_DM_MASK       0x00700
+#define                LAPIC_ICR_DM_FIXED      0x00000
+#define                LAPIC_ICR_DM_LOWEST     0x00100
+#define                LAPIC_ICR_DM_SMI        0x00200
+#define                LAPIC_ICR_DM_REMOTE     0x00300
+#define                LAPIC_ICR_DM_NMI        0x00400
+#define                LAPIC_ICR_DM_INIT       0x00500
+#define                LAPIC_ICR_DM_STARTUP    0x00600
+#define                LAPIC_ICR_DM_LOGICAL    0x00800
+#define                LAPIC_ICR_DS_PENDING    0x01000
+#define                LAPIC_ICR_LEVEL_ASSERT  0x04000
+#define                LAPIC_ICR_TRIGGER_LEVEL 0x08000
+#define                LAPIC_ICR_RR_MASK       0x30000
+#define                LAPIC_ICR_RR_INVALID    0x00000
+#define                LAPIC_ICR_RR_INPROGRESS 0x10000
+#define                LAPIC_ICR_RR_VALID      0x20000
+#define                LAPIC_ICR_DSS_MASK      0xC0000
+#define                LAPIC_ICR_DSS_DEST      0x00000
+#define                LAPIC_ICR_DSS_SELF      0x40000
+#define                LAPIC_ICR_DSS_ALL       0x80000
+#define                LAPIC_ICR_DSS_OTHERS    0xC0000
+#define LAPIC_ICRD                     0x00000310
+#define                LAPIC_ICRD_DEST_SHIFT   24
+#define LAPIC_LVT_TIMER                        0x00000320
+#define LAPIC_LVT_THERMAL              0x00000330
+#define LAPIC_LVT_PERFCNT              0x00000340
+#define LAPIC_LVT_LINT0                        0x00000350
+#define LAPIC_LVT_LINT1                        0x00000360
+#define LAPIC_LVT_ERROR                        0x00000370
+#define                LAPIC_LVT_VECTOR_MASK   0x000FF
+#define                LAPIC_LVT_DM_SHIFT      8
+#define                LAPIC_LVT_DM_MASK       0x00007
+#define                LAPIC_LVT_DM_FIXED      0x00000
+#define                LAPIC_LVT_DM_NMI        0x00400
+#define                LAPIC_LVT_DM_EXTINT     0x00700
+#define                LAPIC_LVT_DS_PENDING    0x01000
+#define                LAPIC_LVT_IP_PLRITY_LOW 0x02000
+#define                LAPIC_LVT_REMOTE_IRR    0x04000
+#define                LAPIC_LVT_TM_LEVEL      0x08000
+#define                LAPIC_LVT_MASKED        0x10000
+#define                LAPIC_LVT_PERIODIC      0x20000
+#define LAPIC_TIMER_INITIAL_COUNT      0x00000380
+#define LAPIC_TIMER_CURRENT_COUNT      0x00000390
+#define LAPIC_TIMER_DIVIDE_CONFIG      0x000003E0
+/* divisor encoded by bits 0,1,3 with bit 2 always 0: */
+#define        LAPIC_TIMER_DIVIDE_MASK 0x0000000F
+#define        LAPIC_TIMER_DIVIDE_2    0x00000000
+#define        LAPIC_TIMER_DIVIDE_4    0x00000001
+#define        LAPIC_TIMER_DIVIDE_8    0x00000002
+#define        LAPIC_TIMER_DIVIDE_16   0x00000003
+#define        LAPIC_TIMER_DIVIDE_32   0x00000008
+#define        LAPIC_TIMER_DIVIDE_64   0x00000009
+#define        LAPIC_TIMER_DIVIDE_128  0x0000000A
+#define        LAPIC_TIMER_DIVIDE_1    0x0000000B
+
+#define LAPIC_ID_MAX                   (LAPIC_ID_MASK)
+
+#define CPU_NUMBER(r)                          \
+       movl    %gs:CPU_NUMBER_GS,r
+
+#define CPU_NUMBER_FROM_LAPIC(r)               \
+       movl    EXT(lapic_id),r;                \
+       movl    0(r),r;                         \
+       shrl    $(LAPIC_ID_SHIFT),r;            \
+       andl    $(LAPIC_ID_MASK),r;             \
+       movl    EXT(lapic_to_cpu)(,r,4),r
+
+#ifndef        ASSEMBLER
+#include <stdint.h>
+#include <sys/cdefs.h>
+#include <mach/boolean.h>
+#include <mach/kern_return.h>
+typedef enum {
+       periodic,
+       one_shot
+} lapic_timer_mode_t;
+typedef enum { 
+       divide_by_1   = LAPIC_TIMER_DIVIDE_1,
+       divide_by_2   = LAPIC_TIMER_DIVIDE_2,
+       divide_by_4   = LAPIC_TIMER_DIVIDE_4,
+       divide_by_8   = LAPIC_TIMER_DIVIDE_8,
+       divide_by_16  = LAPIC_TIMER_DIVIDE_16,
+       divide_by_32  = LAPIC_TIMER_DIVIDE_32,
+       divide_by_64  = LAPIC_TIMER_DIVIDE_64,
+       divide_by_128 = LAPIC_TIMER_DIVIDE_128
+} lapic_timer_divide_t;
+typedef uint32_t lapic_timer_count_t;
+
+/*
+ * By default, use high vectors to leave vector space for systems
+ * with multiple I/O APIC's. However some systems that boot with
+ * local APIC disabled will hang in SMM when vectors greater than
+ * 0x5F are used. Those systems are not expected to have I/O APIC
+ * so 16 (0x50 - 0x40) vectors for legacy PIC support is perfect.
+ */
+#define LAPIC_DEFAULT_INTERRUPT_BASE   0xD0
+#define LAPIC_REDUCED_INTERRUPT_BASE   0x50
+/*
+ * Specific lapic interrupts are relative to this base
+ * in priority order from high to low:
+ */
+
+#define LAPIC_PERFCNT_INTERRUPT                0xF
+#define LAPIC_TIMER_INTERRUPT          0xE
+#define LAPIC_INTERPROCESSOR_INTERRUPT 0xD
+#define LAPIC_THERMAL_INTERRUPT                0xC
+#define LAPIC_ERROR_INTERRUPT          0xB
+#define LAPIC_SPURIOUS_INTERRUPT       0xA
+#define LAPIC_CMCI_INTERRUPT           0x9
+/* The vector field is ignored for NMI interrupts via the LAPIC
+ * or otherwise, so this is not an offset from the interrupt
+ * base.
+ */
+#define LAPIC_NMI_INTERRUPT            0x2
+#define LAPIC_FUNC_TABLE_SIZE          LAPIC_PERFCNT_INTERRUPT
+
+#define LAPIC_WRITE(reg,val) \
+       *((volatile uint32_t *)(lapic_start + LAPIC_##reg)) = (val)
+#define LAPIC_READ(reg) \
+       (*((volatile uint32_t *)(lapic_start + LAPIC_##reg)))
+#define LAPIC_READ_OFFSET(reg,off) \
+       (*((volatile uint32_t *)(lapic_start + LAPIC_##reg + (off))))
+
+#define LAPIC_VECTOR(src) \
+       (lapic_interrupt_base + LAPIC_##src##_INTERRUPT)
+
+#define LAPIC_ISR_IS_SET(base,src) \
+       (LAPIC_READ_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) \
+               & (1 <<((base + LAPIC_##src##_INTERRUPT)%32)))
+
+extern vm_offset_t     lapic_start;
+
+extern void            lapic_init(void);
+extern void            lapic_configure(void);
+extern void            lapic_shutdown(void);
+extern void            lapic_smm_restore(void);
+extern boolean_t       lapic_probe(void);
+extern void            lapic_dump(void);
+extern int             lapic_interrupt(
+                               int interrupt, x86_saved_state_t *state);
+extern void            lapic_end_of_interrupt(void);
+extern int             lapic_to_cpu[];
+extern int             cpu_to_lapic[];
+extern int             lapic_interrupt_base;
+extern void            lapic_cpu_map(int lapic, int cpu_num);
+extern uint32_t                ml_get_apicid(uint32_t cpu);
+
+extern void            lapic_set_timer(
+                               boolean_t               interrupt,
+                               lapic_timer_mode_t      mode,
+                               lapic_timer_divide_t    divisor,
+                               lapic_timer_count_t     initial_count);
+
+extern void            lapic_get_timer(
+                               lapic_timer_mode_t      *mode,
+                               lapic_timer_divide_t    *divisor,
+                               lapic_timer_count_t     *initial_count,
+                               lapic_timer_count_t     *current_count);
+
+typedef        int (*i386_intr_func_t)(x86_saved_state_t *state);
+extern void            lapic_set_intr_func(int intr, i386_intr_func_t func);
+
+static inline void     lapic_set_timer_func(i386_intr_func_t func)
+{
+       lapic_set_intr_func(LAPIC_VECTOR(TIMER), func);
+}
+static inline void     lapic_set_pmi_func(i386_intr_func_t func)
+{
+       lapic_set_intr_func(LAPIC_VECTOR(PERFCNT), func);
+}
+static inline void     lapic_set_thermal_func(i386_intr_func_t func)
+{
+       lapic_set_intr_func(LAPIC_VECTOR(THERMAL), func);
+}
+
+#ifdef MP_DEBUG
+#define LAPIC_CPU_MAP_DUMP()   lapic_cpu_map_dump()
+#define LAPIC_DUMP()           lapic_dump()
+#else
+#define LAPIC_CPU_MAP_DUMP()
+#define LAPIC_DUMP()
+#endif /* MP_DEBUG */
+
+#endif /* ASSEMBLER */
+
+#endif /* _I386_LAPIC_H_ */
+
index 9fa0eb36fe9ea795f2dcc4b8ab001f3e0c7667fd..5b57ee4cc8e4bc5a48b7f6e59b53ddeee4a8d988 100644 (file)
@@ -66,6 +66,8 @@
 #include <i386/asm.h>
 #include <i386/cpuid.h>
 #include <i386/eflags.h>
+#include <i386/lapic.h>
+#include <i386/rtclock.h>
 #include <i386/proc_reg.h>
 #include <i386/trap.h>
 #include <assym.s>
@@ -232,34 +234,12 @@ Entry(timer_grab)
  * Nanotime returned in %edx:%eax.
  * Computed from tsc based on the scale factor
  * and an implicit 32 bit shift.
- * This code must match what _rtc_nanotime_read does in
- * i386/machine_routines_asm.s.  Failure to do so can
- * result in "weird" timing results.
  *
  * Uses %eax, %ebx, %ecx, %edx, %esi, %edi.
  */
-#define RNT_INFO               _rtc_nanotime_info
 #define NANOTIME                                                       \
-       lea     RNT_INFO,%edi                                           ; \
-0:                                                                     ; \
-       movl    RNT_GENERATION(%edi),%esi       /* being updated? */    ; \
-       testl   %esi,%esi                                               ; \
-       jz      0b                              /* wait until done */   ; \
-       rdtsc                                                           ; \
-       subl    RNT_TSC_BASE(%edi),%eax                                 ; \
-       sbbl    RNT_TSC_BASE+4(%edi),%edx       /* tsc - tsc_base */    ; \
-       movl    RNT_SCALE(%edi),%ecx            /* * scale factor */    ; \
-       movl    %edx,%ebx                                               ; \
-       mull    %ecx                                                    ; \
-       movl    %ebx,%eax                                               ; \
-       movl    %edx,%ebx                                               ; \
-       mull    %ecx                                                    ; \
-       addl    %ebx,%eax                                               ; \
-       adcl    $0,%edx                                                 ; \
-       addl    RNT_NS_BASE(%edi),%eax          /* + ns_base */         ; \
-       adcl    RNT_NS_BASE+4(%edi),%edx                                ; \
-       cmpl    RNT_GENERATION(%edi),%esi       /* check for update */  ; \
-       jne     0b                              /* do it all again */
+       mov     %gs:CPU_NANOTIME,%edi                                   ; \
+       RTC_NANOTIME_READ_FAST()
 
 
 /*
index 79adff8276a65cd0b3075c9adbda8cd873623665..23f26fc50db41d21f45853763782b42f16681c8b 100644 (file)
  */
 
 #include <kern/kalloc.h>
+#include <mach/mach_time.h>
 #include <i386/cpu_data.h>
 #include <i386/cpuid.h>
+#include <i386/cpu_topology.h>
+#include <i386/cpu_threads.h>
+#include <i386/machine_cpu.h>
 #include <i386/machine_check.h>
 #include <i386/proc_reg.h>
 
@@ -44,8 +48,6 @@ static boolean_t      mca_threshold_status_present = FALSE;
 static boolean_t       mca_extended_MSRs_present = FALSE;
 static unsigned int    mca_extended_MSRs_count = 0;
 static ia32_mcg_cap_t  ia32_mcg_cap;
-static boolean_t       mca_exception_taken = FALSE;
-
 decl_simple_lock_data(static, mca_lock);
 
 typedef struct {
@@ -61,6 +63,13 @@ typedef struct mca_state {
        mca_mci_bank_t          mca_error_bank[0];
 } mca_state_t;
 
+typedef enum {
+       CLEAR,
+       DUMPING,
+       DUMPED
+} mca_dump_state_t;
+static volatile mca_dump_state_t mca_dump_state = CLEAR;
+
 static void
 mca_get_availability(void)
 {
@@ -161,15 +170,13 @@ mca_cpu_alloc(cpu_data_t  *cdp)
 }
 
 static void
-mca_save_state(void)
+mca_save_state(mca_state_t *mca_state)
 {
-       mca_state_t     *mca_state;
        mca_mci_bank_t  *bank;
        unsigned int    i;
 
        assert(!ml_get_interrupts_enabled() || get_preemption_level() > 0);
 
-       mca_state = (mca_state_t *) current_cpu_datap()->cpu_mca_state;
        if  (mca_state == NULL)
                return;
 
@@ -193,8 +200,8 @@ mca_save_state(void)
 void
 mca_check_save(void)
 {
-       if (mca_exception_taken)
-               mca_save_state();
+       if (mca_dump_state > CLEAR)
+               mca_save_state(current_cpu_datap()->cpu_mca_state);
 }
 
 static void mca_dump_64bit_state(void)
@@ -250,15 +257,14 @@ mca_report_cpu_info(void)
 
        // microcode revision is top 32 bits of MSR_IA32_UCODE_REV
        microcode = rdmsr64(MSR_IA32_UCODE_REV) >> 32;
-       kdb_printf("family: %d model: %d stepping: %d microcode revision %d\n", 
+       kdb_printf(" family: %d model: %d stepping: %d microcode: %d\n",
                infop->cpuid_family,
                infop->cpuid_model,
                infop->cpuid_stepping,
                (uint32_t) microcode);
-       kdb_printf("%s\n", infop->cpuid_brand_string);
+       kdb_printf(" %s\n", infop->cpuid_brand_string);
 }
 
-
 static const char *mca_threshold_status[] = {
        [THRESHOLD_STATUS_NO_TRACKING]  "No tracking",
        [THRESHOLD_STATUS_GREEN]        "Green",
@@ -267,56 +273,65 @@ static const char *mca_threshold_status[] = {
 };
 
 static void
-mca_dump_error_banks(void)
+mca_dump_bank(mca_state_t *state, int i)
 {
-       unsigned int            i;
+       mca_mci_bank_t          *bank;
        ia32_mci_status_t       status;
 
-       kdb_printf("MCA error-reporting registers:\n");
-       for (i = 0; i < mca_error_bank_count; i++ ) {
-               status.u64 = rdmsr64(IA32_MCi_STATUS(i));
+       bank = &state->mca_error_bank[i];
+       status = bank->mca_mci_status;
+       kdb_printf(
+               " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n",
+               i, IA32_MCi_STATUS(i), status.u64, IF(!status.bits.val, "in"));
+       if (!status.bits.val)
+               return;
+
+       kdb_printf(
+               "  MCA error code:            0x%04x\n",
+               status.bits.mca_error);
+       kdb_printf(
+               "  Model specific error code: 0x%04x\n",
+               status.bits.model_specific_error);
+       if (!mca_threshold_status_present) {
                kdb_printf(
-                       " IA32_MC%d_STATUS(0x%x): 0x%016qx %svalid\n",
-                       i, IA32_MCi_STATUS(i), status.u64,
-                       IF(!status.bits.val, "in"));
-               if (!status.bits.val)
-                       continue;
+                       "  Other information:         0x%08x\n",
+                       status.bits.other_information);
+       } else {
+               int     threshold = status.bits_tes_p.threshold;
                kdb_printf(
-                       "  MCA error code           : 0x%04x\n",
-                       status.bits.mca_error);
+                       "  Other information:         0x%08x\n"
+                       "  Threshold-based status:    %s\n",
+                       status.bits_tes_p.other_information,
+                       (status.bits_tes_p.uc == 0) ?
+                           mca_threshold_status[threshold] :
+                           "Undefined");
+       }
+       kdb_printf(
+               "  Status bits:\n%s%s%s%s%s%s",
+               IF(status.bits.pcc,   "   Processor context corrupt\n"),
+               IF(status.bits.addrv, "   ADDR register valid\n"),
+               IF(status.bits.miscv, "   MISC register valid\n"),
+               IF(status.bits.en,    "   Error enabled\n"),
+               IF(status.bits.uc,    "   Uncorrected error\n"),
+               IF(status.bits.over,  "   Error overflow\n"));
+       if (status.bits.addrv)
                kdb_printf(
-                       "  Model specific error code: 0x%04x\n",
-                       status.bits.model_specific_error);
-               if (!mca_threshold_status_present) {
-                       kdb_printf(
-                               "  Other information        : 0x%08x\n",
-                               status.bits.other_information);
-               } else {
-                       int     threshold = status.bits_tes_p.threshold;
-                       kdb_printf(
-                               "  Other information        : 0x%08x\n"
-                               "  Threshold-based status   : %s\n",
-                               status.bits_tes_p.other_information,
-                               (status.bits_tes_p.uc == 0) ?
-                                       mca_threshold_status[threshold] :
-                                       "Undefined");
-               }
+                       " IA32_MC%d_ADDR(0x%x): 0x%016qx\n",
+                       i, IA32_MCi_ADDR(i), bank->mca_mci_addr);
+       if (status.bits.miscv)
                kdb_printf(
-                       "  Status bits:\n%s%s%s%s%s%s",
-                       IF(status.bits.pcc,   "   Processor context corrupt\n"),
-                       IF(status.bits.addrv, "   ADDR register valid\n"),
-                       IF(status.bits.miscv, "   MISC register valid\n"),
-                       IF(status.bits.en,    "   Error enabled\n"),
-                       IF(status.bits.uc,    "   Uncorrected error\n"),
-                       IF(status.bits.over,  "   Error overflow\n"));
-               if (status.bits.addrv)
-                       kdb_printf(
-                               "  IA32_MC%d_ADDR(0x%x): 0x%016qx\n",
-                               i, IA32_MCi_ADDR(i), rdmsr64(IA32_MCi_ADDR(i)));
-               if (status.bits.miscv)
-                       kdb_printf(
-                               "  IA32_MC%d_MISC(0x%x): 0x%016qx\n",
-                               i, IA32_MCi_MISC(i), rdmsr64(IA32_MCi_MISC(i)));
+                       " IA32_MC%d_MISC(0x%x): 0x%016qx\n",
+                       i, IA32_MCi_MISC(i), bank->mca_mci_misc);
+}
+
+static void
+mca_dump_error_banks(mca_state_t *state)
+{
+       unsigned int            i;
+
+       kdb_printf("MCA error-reporting registers:\n");
+       for (i = 0; i < mca_error_bank_count; i++ ) {
+               mca_dump_bank(state, i);
        }
 }
 
@@ -324,19 +339,27 @@ void
 mca_dump(void)
 {
        ia32_mcg_status_t       status;
+       mca_state_t             *mca_state = current_cpu_datap()->cpu_mca_state;
 
-       mca_save_state();
+       /*
+        * Capture local MCA registers to per-cpu data.
+        */
+       mca_save_state(mca_state);
 
        /*
         * Serialize in case of multiple simultaneous machine-checks.
-        * Only the first caller is allowed to print MCA registers.
+        * Only the first caller is allowed to dump MCA registers,
+        * other threads spin meantime.
         */
        simple_lock(&mca_lock);
-       if (mca_exception_taken) {
+       if (mca_dump_state > CLEAR) {
                simple_unlock(&mca_lock);
+               while (mca_dump_state == DUMPING)
+                       cpu_pause();
                return;
        }
-       mca_exception_taken = TRUE;
+       mca_dump_state = DUMPING;
+       simple_unlock(&mca_lock);
 
        /*
         * Report machine-check capabilities:
@@ -348,11 +371,12 @@ mca_dump(void)
        mca_report_cpu_info();
 
        kdb_printf(
-               " %d error-reporting banks\n%s%s", mca_error_bank_count,
+               " %d error-reporting banks\n%s%s%s", mca_error_bank_count,
                IF(mca_control_MSR_present,
                   " control MSR present\n"),
                IF(mca_threshold_status_present,
-                  " threshold-based error status present\n"));
+                  " threshold-based error status present\n"),
+               "");
        if (mca_extended_MSRs_present)
                kdb_printf(
                        " %d extended MSRs present\n", mca_extended_MSRs_count);
@@ -362,7 +386,7 @@ mca_dump(void)
         */
        status.u64 = rdmsr64(IA32_MCG_STATUS);
        kdb_printf(
-               "Machine-check status 0x%016qx\n%s%s%s", status.u64,
+               "Machine-check status 0x%016qx:\n%s%s%s", status.u64,
                IF(status.bits.ripv, " restart IP valid\n"),
                IF(status.bits.eipv, " error IP valid\n"),
                IF(status.bits.mcip, " machine-check in progress\n"));
@@ -370,7 +394,7 @@ mca_dump(void)
        /*
         * Dump error-reporting registers:
         */
-       mca_dump_error_banks();
+       mca_dump_error_banks(mca_state);
 
        /*
         * Dump any extended machine state:
@@ -382,5 +406,6 @@ mca_dump(void)
                        mca_dump_32bit_state();
        }
 
-       simple_unlock(&mca_lock);
+       /* Update state to release any other threads. */
+       mca_dump_state = DUMPED;
 }
index 853fe36f437bde254470fa035ec624b361f876ef..233e78e2c9929b7907c9e1a95561ca11deeb2f85 100644 (file)
@@ -49,11 +49,10 @@ typedef union {
        uint64_t        count                   :BITS(7,0);
        uint64_t        mcg_ctl_p               :BIT1(8);
        uint64_t        mcg_ext_p               :BIT1(9);
-       uint64_t        reserved1               :BIT1(10);
+       uint64_t        mcg_reserved1           :BIT1(10);
        uint64_t        mcg_tes_p               :BIT1(11);
-       uint64_t        reserved2               :BITS(15,12);
+       uint64_t        mcg_reserved2           :BITS(15,12);
        uint64_t        mcg_ext_cnt             :BITS(23,16);
-       uint64_t        reserved3               :BITS(63,24);
      }         bits;
      uint64_t  u64;
 } ia32_mcg_cap_t;
@@ -64,7 +63,6 @@ typedef union {
        uint64_t        ripv                    :BIT1(0);
        uint64_t        eipv                    :BIT1(1);
        uint64_t        mcip                    :BIT1(2);
-       uint64_t        reserved                :BITS(61,3);
      }         bits;
      uint64_t  u64;
 } ia32_mcg_status_t;
@@ -113,7 +111,7 @@ typedef uint64_t    ia32_mci_ctl_t;
 #define IA32_MCi_CTL_ENABLE_ALL        (0xFFFFFFFFFFFFFFFFULL)
 
 typedef union {
-     struct {
+    struct {
        uint64_t        mca_error               :BITS(15,0);
        uint64_t        model_specific_error    :BITS(31,16);
        uint64_t        other_information       :BITS(56,32);
@@ -124,13 +122,12 @@ typedef union {
        uint64_t        uc                      :BIT1(61);
        uint64_t        over                    :BIT1(62);
        uint64_t        val                     :BIT1(63);
-     }         bits;
-     struct {          /* Variant if threshold-based error status present: */
+             bits;
+    struct {           /* Variant if threshold-based error status present: */
        uint64_t        mca_error               :BITS(15,0);
        uint64_t        model_specific_error    :BITS(31,16);
        uint64_t        other_information       :BITS(52,32);
        uint64_t        threshold               :BITS(54,53);
-       uint64_t        reserved                :BITS(56,55);
        uint64_t        pcc                     :BIT1(57);
        uint64_t        addrv                   :BIT1(58);
        uint64_t        miscv                   :BIT1(59);
@@ -138,8 +135,8 @@ typedef union {
        uint64_t        uc                      :BIT1(61);
        uint64_t        over                    :BIT1(62);
        uint64_t        val                     :BIT1(63);
-     }         bits_tes_p;
-     uint64_t  u64;
+             bits_tes_p;
+    uint64_t   u64;
 } ia32_mci_status_t;
 
 /* Values for threshold_status if mcg_tes_p == 1 and uc == 0 */
@@ -151,7 +148,6 @@ typedef union {
 typedef uint64_t       ia32_mci_addr_t;
 typedef uint64_t       ia32_mci_misc_t;
 
-
 #define IA32_MCG_EAX           (0x180)
 #define IA32_MCG_EBX           (0x181)
 #define IA32_MCG_ECX           (0x182)
@@ -189,10 +185,10 @@ typedef uint64_t  ia32_mci_misc_t;
 #define IA32_MCG_R14           (0x196)
 #define IA32_MCG_R15           (0x197)
 
-extern void    mca_cpu_alloc(cpu_data_t *cdp);
-extern void    mca_cpu_init(void);
-extern void    mca_dump(void);
-extern void    mca_check_save(void);
+extern void            mca_cpu_alloc(cpu_data_t *cdp);
+extern void            mca_cpu_init(void);
+extern void            mca_dump(void);
+extern void            mca_check_save(void);
 
 #endif /* _I386_MACHINE_CHECK_H_ */
 #endif /* KERNEL_PRIVATE */
index b3143b07ef1f491907871e241837e38025e3f4cb..2460bf606bb3c1c8e93af66f5238a2d2fc9d8fc9 100644 (file)
@@ -38,9 +38,6 @@ __BEGIN_DECLS
 void   cpu_machine_init(
        void);
 
-void   cpu_signal_handler(
-       x86_saved_state_t *regs);
-
 void   handle_pending_TLB_flushes(
        void);
 
index 4ffb4ddc3361b6614c9276ebaf14d4c95b8a4425..d42f6d2f1f9aaf71361ac24d07b510a6b865b8b2 100644 (file)
@@ -38,7 +38,7 @@
 #include <kern/thread.h>
 #include <i386/cpu_data.h>
 #include <i386/machine_cpu.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
 #include <i386/mp_events.h>
 #include <i386/pmap.h>
 #include <i386/misc_protos.h>
@@ -292,39 +292,6 @@ void ml_install_interrupt_handler(
 }
 
 
-void
-machine_idle(void)
-{
-       x86_core_t      *my_core = x86_core();
-       cpu_data_t      *my_cpu  = current_cpu_datap();
-       int             others_active;
-
-       /*
-        * We halt this cpu thread
-        * unless kernel param idlehalt is false and no other thread
-        * in the same core is active - if so, don't halt so that this
-        * core doesn't go into a low-power mode.
-        * For 4/4, we set a null "active cr3" while idle.
-        */
-       if (my_core == NULL || my_cpu == NULL)
-           goto out;
-
-       others_active = !atomic_decl_and_test(
-                               (long *) &my_core->active_lcpus, 1);
-       my_cpu->lcpu.idle = TRUE;
-       if (idlehalt || others_active) {
-               DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
-               MARK_CPU_IDLE(cpu_number());
-               machine_idle_cstate(FALSE);
-               MARK_CPU_ACTIVE(cpu_number());
-               DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
-       }
-       my_cpu->lcpu.idle = FALSE;
-       atomic_incl((long *) &my_core->active_lcpus, 1);
-  out:
-       __asm__ volatile("sti");
-}
-
 void
 machine_signal_idle(
         processor_t processor)
@@ -376,7 +343,7 @@ ml_processor_register(
                goto failed;
 
        if (!boot_cpu) {
-               this_cpu_datap->lcpu.core = cpu_thread_alloc(this_cpu_datap->cpu_number);
+               cpu_thread_alloc(this_cpu_datap->cpu_number);
                if (this_cpu_datap->lcpu.core == NULL)
                        goto failed;
 
@@ -526,7 +493,7 @@ ml_init_lock_timeout(void)
        LockTimeOut = (uint32_t) abstime;
        LockTimeOutTSC = (uint32_t) tmrCvt(abstime, tscFCvtn2t);
 
-       if (PE_parse_boot_arg("mtxspin", &mtxspin)) {
+       if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
                if (mtxspin > USEC_PER_SEC>>4)
                        mtxspin =  USEC_PER_SEC>>4;
                nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime);
index d746ed64293966d9aeecbe8f07d7ead32628f424..cf0af4761917e09a6231a3782dbf3ade5046ae43 100644 (file)
@@ -285,10 +285,10 @@ extern void ml_set_maxsnoop(uint32_t maxdelay);
 extern unsigned ml_get_maxsnoop(void);
 extern void ml_set_maxbusdelay(uint32_t mdelay);
 extern uint32_t ml_get_maxbusdelay(void);
+extern void ml_set_maxintdelay(uint64_t mdelay);
+extern uint64_t ml_get_maxintdelay(void);
 
 
-extern void ml_hpet_cfg(uint32_t cpu, uint32_t hpetVect);
-
 extern uint64_t tmrCvt(uint64_t time, uint64_t conversion);
 
 extern uint64_t ml_cpu_int_event_time(void);
index b7187aecf91ad0f984dd5faa522348c3d4627e88..f68b813768f7dee0ad029763b78baaa603fdf16c 100644 (file)
@@ -27,6 +27,7 @@
  */
  
 #include <i386/asm.h>
+#include <i386/rtclock.h>
 #include <i386/proc_reg.h>
 #include <i386/eflags.h>
        
@@ -47,6 +48,7 @@ ENTRY(ml_get_timebase)
                        movl    S_ARG0, %ecx
                        
                        rdtsc
+                       lfence
                        
                        movl    %edx, 0(%ecx)
                        movl    %eax, 4(%ecx)
@@ -217,30 +219,7 @@ LEXT(_rtc_nanotime_read)
                jnz             Lslow
                
                /* Processor whose TSC frequency is faster than SLOW_TSC_THRESHOLD */
-0:
-               movl            RNT_GENERATION(%edi),%esi               /* get generation (0 if being changed) */
-               testl           %esi,%esi                               /* if being changed, loop until stable */
-               jz              0b
-
-               rdtsc                                                   /* get TSC in %edx:%eax */
-               subl            RNT_TSC_BASE(%edi),%eax
-               sbbl            RNT_TSC_BASE+4(%edi),%edx
-
-               movl            RNT_SCALE(%edi),%ecx
-
-               movl            %edx,%ebx
-               mull            %ecx
-               movl            %ebx,%eax
-               movl            %edx,%ebx
-               mull            %ecx
-               addl            %ebx,%eax
-               adcl            $0,%edx
-
-               addl            RNT_NS_BASE(%edi),%eax
-               adcl            RNT_NS_BASE+4(%edi),%edx
-
-               cmpl            RNT_GENERATION(%edi),%esi               /* have the parameters changed? */
-               jne             0b                                      /* yes, loop until stable */
+               RTC_NANOTIME_READ_FAST()
 
                popl            %ebx
                popl            %edi
index 23ce618604ebff3a6b19ab069d38d99d0270cae2..5073997835e1fe7d83e6e90389b7edbb99839800 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -46,6 +46,7 @@
 #include <kern/assert.h>
 #include <kern/machine.h>
 #include <kern/pms.h>
+#include <kern/misc_protos.h>
 
 #include <vm/vm_map.h>
 #include <vm/vm_kern.h>
@@ -55,7 +56,7 @@
 #include <i386/mp.h>
 #include <i386/mp_events.h>
 #include <i386/mp_slave_boot.h>
-#include <i386/apic.h>
+#include <i386/lapic.h>
 #include <i386/ipl.h>
 #include <i386/fpu.h>
 #include <i386/cpuid.h>
@@ -71,7 +72,6 @@
 #include <i386/trap.h>
 #include <i386/machine_routines.h>
 #include <i386/pmCPU.h>
-#include <i386/hpet.h>
 #include <i386/machine_check.h>
 
 #include <chud/chud_xnu.h>
 #define PAUSE
 #endif /* MP_DEBUG */
 
-/* Initialize lapic_id so cpu_number() works on non SMP systems */
-unsigned long  lapic_id_initdata = 0;
-unsigned long  lapic_id = (unsigned long)&lapic_id_initdata;
-vm_offset_t    lapic_start;
-
-static i386_intr_func_t        lapic_timer_func;
-static i386_intr_func_t        lapic_pmi_func;
-static i386_intr_func_t        lapic_thermal_func;
-
-/* TRUE if local APIC was enabled by the OS not by the BIOS */
-static boolean_t lapic_os_enabled = FALSE;
-
-/* Base vector for local APIC interrupt sources */
-int lapic_interrupt_base = LAPIC_DEFAULT_INTERRUPT_BASE;
+#define FULL_SLAVE_INIT        (NULL)
+#define FAST_SLAVE_INIT        ((void *)(uintptr_t)1)
 
 void           slave_boot_init(void);
 
@@ -124,8 +112,9 @@ static void mp_kdp_wait(boolean_t flush);
 static void    mp_rendezvous_action(void);
 static void    mp_broadcast_action(void);
 
-static int             NMIInterruptHandler(x86_saved_state_t *regs);
 static boolean_t       cpu_signal_pending(int cpu, mp_event_t event);
+static int             cpu_signal_handler(x86_saved_state_t *regs);
+static int             NMIInterruptHandler(x86_saved_state_t *regs);
 
 boolean_t      smp_initialized = FALSE;
 volatile boolean_t     force_immediate_debugger_NMI = FALSE;
@@ -150,77 +139,13 @@ static volatile long      mp_rv_complete __attribute__((aligned(64)));
 /* Variables needed for MP broadcast. */
 static void        (*mp_bc_action_func)(void *arg);
 static void        *mp_bc_func_arg;
-static int     mp_bc_ncpus;
+static int             mp_bc_ncpus;
 static volatile long   mp_bc_count;
 decl_mutex_data(static, mp_bc_lock);
+static volatile int    debugger_cpu = -1;
 
 static void    mp_cpus_call_action(void); 
 
-int            lapic_to_cpu[MAX_CPUS];
-int            cpu_to_lapic[MAX_CPUS];
-
-static void
-lapic_cpu_map_init(void)
-{
-       int     i;
-
-       for (i = 0; i < MAX_CPUS; i++) {
-               lapic_to_cpu[i] = -1;
-               cpu_to_lapic[i] = -1;
-       }
-}
-
-void
-lapic_cpu_map(int apic_id, int cpu)
-{
-       cpu_to_lapic[cpu] = apic_id;
-       lapic_to_cpu[apic_id] = cpu;
-}
-
-/*
- * Retrieve the local apic ID a cpu.
- *
- * Returns the local apic ID for the given processor.
- * If the processor does not exist or apic not configured, returns -1.
- */
-
-uint32_t
-ml_get_apicid(uint32_t cpu)
-{
-       if(cpu >= (uint32_t)MAX_CPUS)
-               return 0xFFFFFFFF;      /* Return -1 if cpu too big */
-       
-       /* Return the apic ID (or -1 if not configured) */
-       return (uint32_t)cpu_to_lapic[cpu];
-
-}
-
-#ifdef MP_DEBUG
-static void
-lapic_cpu_map_dump(void)
-{
-       int     i;
-
-       for (i = 0; i < MAX_CPUS; i++) {
-               if (cpu_to_lapic[i] == -1)
-                       continue;
-               kprintf("cpu_to_lapic[%d]: %d\n",
-                       i, cpu_to_lapic[i]);
-       }
-       for (i = 0; i < MAX_CPUS; i++) {
-               if (lapic_to_cpu[i] == -1)
-                       continue;
-               kprintf("lapic_to_cpu[%d]: %d\n",
-                       i, lapic_to_cpu[i]);
-       }
-}
-#define LAPIC_CPU_MAP_DUMP()   lapic_cpu_map_dump()
-#define LAPIC_DUMP()           lapic_dump()
-#else
-#define LAPIC_CPU_MAP_DUMP()
-#define LAPIC_DUMP()
-#endif /* MP_DEBUG */
-
 #if GPROF
 /*
  * Initialize dummy structs for profiling. These aren't used but
@@ -243,14 +168,6 @@ struct profile_vars *_profile_vars_cpus[MAX_CPUS] = { &_profile_vars };
 void
 smp_init(void)
 {
-       int             result;
-       vm_map_entry_t  entry;
-       uint32_t        lo;
-       uint32_t        hi;
-       boolean_t       is_boot_processor;
-       boolean_t       is_lapic_enabled;
-       vm_offset_t     lapic_base;
-
        simple_lock_init(&mp_kdp_lock, 0);
        simple_lock_init(&mp_rv_lock, 0);
        mutex_init(&mp_cpu_boot_lock, 0);
@@ -261,48 +178,10 @@ smp_init(void)
        if (!lapic_probe())
                return;
 
-       /* Examine the local APIC state */
-       rdmsr(MSR_IA32_APIC_BASE, lo, hi);
-       is_boot_processor = (lo & MSR_IA32_APIC_BASE_BSP) != 0;
-       is_lapic_enabled  = (lo & MSR_IA32_APIC_BASE_ENABLE) != 0;
-       lapic_base = (lo &  MSR_IA32_APIC_BASE_BASE);
-       kprintf("MSR_IA32_APIC_BASE 0x%x %s %s\n", lapic_base,
-               is_lapic_enabled ? "enabled" : "disabled",
-               is_boot_processor ? "BSP" : "AP");
-       if (!is_boot_processor || !is_lapic_enabled)
-               panic("Unexpected local APIC state\n");
-
-       /* Establish a map to the local apic */
-       lapic_start = vm_map_min(kernel_map);
-       result = vm_map_find_space(kernel_map,
-                                  (vm_map_address_t *) &lapic_start,
-                                  round_page(LAPIC_SIZE), 0,
-                                  VM_MAKE_TAG(VM_MEMORY_IOKIT), &entry);
-       if (result != KERN_SUCCESS) {
-               panic("smp_init: vm_map_find_entry FAILED (err=%d)", result);
-       }
-       vm_map_unlock(kernel_map);
-/* Map in the local APIC non-cacheable, as recommended by Intel
- * in section 8.4.1 of the "System Programming Guide".
- */
-       pmap_enter(pmap_kernel(),
-                       lapic_start,
-                       (ppnum_t) i386_btop(lapic_base),
-                       VM_PROT_READ|VM_PROT_WRITE,
-                       VM_WIMG_IO,
-                       TRUE);
-       lapic_id = (unsigned long)(lapic_start + LAPIC_ID);
-
-       if ((LAPIC_REG(VERSION)&LAPIC_VERSION_MASK) != 0x14) {
-               printf("Local APIC version not 0x14 as expected\n");
-       }
-
-       /* Set up the lapic_id <-> cpu_number map and add this boot processor */
-       lapic_cpu_map_init();
-       lapic_cpu_map((LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK, 0);
-       kprintf("Boot cpu local APIC id 0x%x\n", cpu_to_lapic[0]);
-
        lapic_init();
+       lapic_configure();
+       lapic_set_intr_func(LAPIC_NMI_INTERRUPT,  NMIInterruptHandler);
+       lapic_set_intr_func(LAPIC_VECTOR(INTERPROCESSOR), cpu_signal_handler);
 
        cpu_thread_init();
 
@@ -316,420 +195,17 @@ smp_init(void)
        return;
 }
 
-
-static int
-lapic_esr_read(void)
-{
-       /* write-read register */
-       LAPIC_REG(ERROR_STATUS) = 0;
-       return LAPIC_REG(ERROR_STATUS);
-}
-
-static void 
-lapic_esr_clear(void)
-{
-       LAPIC_REG(ERROR_STATUS) = 0;
-       LAPIC_REG(ERROR_STATUS) = 0;
-}
-
-static const char *DM[8] = {
-       "Fixed",
-       "Lowest Priority",
-       "Invalid",
-       "Invalid",
-       "NMI",
-       "Reset",
-       "Invalid",
-       "ExtINT"};
-
-void
-lapic_dump(void)
-{
-       int     i;
-
-#define BOOL(a) ((a)?' ':'!')
-
-       kprintf("LAPIC %d at 0x%x version 0x%x\n", 
-               (LAPIC_REG(ID)>>LAPIC_ID_SHIFT)&LAPIC_ID_MASK,
-               lapic_start,
-               LAPIC_REG(VERSION)&LAPIC_VERSION_MASK);
-       kprintf("Priorities: Task 0x%x  Arbitration 0x%x  Processor 0x%x\n",
-               LAPIC_REG(TPR)&LAPIC_TPR_MASK,
-               LAPIC_REG(APR)&LAPIC_APR_MASK,
-               LAPIC_REG(PPR)&LAPIC_PPR_MASK);
-       kprintf("Destination Format 0x%x Logical Destination 0x%x\n",
-               LAPIC_REG(DFR)>>LAPIC_DFR_SHIFT,
-               LAPIC_REG(LDR)>>LAPIC_LDR_SHIFT);
-       kprintf("%cEnabled %cFocusChecking SV 0x%x\n",
-               BOOL(LAPIC_REG(SVR)&LAPIC_SVR_ENABLE),
-               BOOL(!(LAPIC_REG(SVR)&LAPIC_SVR_FOCUS_OFF)),
-               LAPIC_REG(SVR) & LAPIC_SVR_MASK);
-       kprintf("LVT_TIMER:   Vector 0x%02x %s %cmasked %s\n",
-               LAPIC_REG(LVT_TIMER)&LAPIC_LVT_VECTOR_MASK,
-               (LAPIC_REG(LVT_TIMER)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
-               BOOL(LAPIC_REG(LVT_TIMER)&LAPIC_LVT_MASKED),
-               (LAPIC_REG(LVT_TIMER)&LAPIC_LVT_PERIODIC)?"Periodic":"OneShot");
-       kprintf("  Initial Count: 0x%08x \n", LAPIC_REG(TIMER_INITIAL_COUNT));
-       kprintf("  Current Count: 0x%08x \n", LAPIC_REG(TIMER_CURRENT_COUNT));
-       kprintf("  Divide Config: 0x%08x \n", LAPIC_REG(TIMER_DIVIDE_CONFIG));
-       kprintf("LVT_PERFCNT: Vector 0x%02x [%s] %s %cmasked\n",
-               LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_VECTOR_MASK,
-               DM[(LAPIC_REG(LVT_PERFCNT)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
-               (LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
-               BOOL(LAPIC_REG(LVT_PERFCNT)&LAPIC_LVT_MASKED));
-       kprintf("LVT_THERMAL: Vector 0x%02x [%s] %s %cmasked\n",
-               LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_VECTOR_MASK,
-               DM[(LAPIC_REG(LVT_THERMAL)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
-               (LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
-               BOOL(LAPIC_REG(LVT_THERMAL)&LAPIC_LVT_MASKED));
-       kprintf("LVT_LINT0:   Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
-               LAPIC_REG(LVT_LINT0)&LAPIC_LVT_VECTOR_MASK,
-               DM[(LAPIC_REG(LVT_LINT0)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
-               (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_TM_LEVEL)?"Level":"Edge ",
-               (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_IP_PLRITY_LOW)?"Low ":"High",
-               (LAPIC_REG(LVT_LINT0)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
-               BOOL(LAPIC_REG(LVT_LINT0)&LAPIC_LVT_MASKED));
-       kprintf("LVT_LINT1:   Vector 0x%02x [%s][%s][%s] %s %cmasked\n",
-               LAPIC_REG(LVT_LINT1)&LAPIC_LVT_VECTOR_MASK,
-               DM[(LAPIC_REG(LVT_LINT1)>>LAPIC_LVT_DM_SHIFT)&LAPIC_LVT_DM_MASK],
-               (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_TM_LEVEL)?"Level":"Edge ",
-               (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_IP_PLRITY_LOW)?"Low ":"High",
-               (LAPIC_REG(LVT_LINT1)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
-               BOOL(LAPIC_REG(LVT_LINT1)&LAPIC_LVT_MASKED));
-       kprintf("LVT_ERROR:   Vector 0x%02x %s %cmasked\n",
-               LAPIC_REG(LVT_ERROR)&LAPIC_LVT_VECTOR_MASK,
-               (LAPIC_REG(LVT_ERROR)&LAPIC_LVT_DS_PENDING)?"SendPending":"Idle",
-               BOOL(LAPIC_REG(LVT_ERROR)&LAPIC_LVT_MASKED));
-       kprintf("ESR: %08x \n", lapic_esr_read());
-       kprintf("       ");
-       for(i=0xf; i>=0; i--)
-               kprintf("%x%x%x%x",i,i,i,i);
-       kprintf("\n");
-       kprintf("TMR: 0x");
-       for(i=7; i>=0; i--)
-               kprintf("%08x",LAPIC_REG_OFFSET(TMR_BASE, i*0x10));
-       kprintf("\n");
-       kprintf("IRR: 0x");
-       for(i=7; i>=0; i--)
-               kprintf("%08x",LAPIC_REG_OFFSET(IRR_BASE, i*0x10));
-       kprintf("\n");
-       kprintf("ISR: 0x");
-       for(i=7; i >= 0; i--)
-               kprintf("%08x",LAPIC_REG_OFFSET(ISR_BASE, i*0x10));
-       kprintf("\n");
-}
-
-#if MACH_KDB
 /*
- *     Displays apic junk
- *
- *     da
+ * Poll a CPU to see when it has marked itself as running.
  */
-void 
-db_apic(__unused db_expr_t addr,
-       __unused int have_addr,
-       __unused db_expr_t count,
-       __unused char *modif)
-{
-
-       lapic_dump();
-
-       return;
-}
-
-#endif
-
-boolean_t
-lapic_probe(void)
-{
-       uint32_t        lo;
-       uint32_t        hi;
-
-       if (cpuid_features() & CPUID_FEATURE_APIC)
-               return TRUE;
-
-       if (cpuid_family() == 6 || cpuid_family() == 15) {
-               /*
-                * Mobile Pentiums:
-                * There may be a local APIC which wasn't enabled by BIOS.
-                * So we try to enable it explicitly.
-                */
-               rdmsr(MSR_IA32_APIC_BASE, lo, hi);
-               lo &= ~MSR_IA32_APIC_BASE_BASE;
-               lo |= MSR_IA32_APIC_BASE_ENABLE | LAPIC_START;
-               lo |= MSR_IA32_APIC_BASE_ENABLE;
-               wrmsr(MSR_IA32_APIC_BASE, lo, hi);
-
-               /*
-                * Re-initialize cpu features info and re-check.
-                */
-               cpuid_set_info();
-               if (cpuid_features() & CPUID_FEATURE_APIC) {
-                       printf("Local APIC discovered and enabled\n");
-                       lapic_os_enabled = TRUE;
-                       lapic_interrupt_base = LAPIC_REDUCED_INTERRUPT_BASE;
-                       return TRUE;
-               }
-       }
-
-       return FALSE;
-}
-
-void
-lapic_shutdown(void)
-{
-       uint32_t lo;
-       uint32_t hi;
-       uint32_t value;
-
-       /* Shutdown if local APIC was enabled by OS */
-       if (lapic_os_enabled == FALSE)
-               return;
-
-       mp_disable_preemption();
-
-       /* ExtINT: masked */
-       if (get_cpu_number() == master_cpu) {
-               value = LAPIC_REG(LVT_LINT0);
-               value |= LAPIC_LVT_MASKED;
-               LAPIC_REG(LVT_LINT0) = value;
-       }
-
-       /* Timer: masked */
-       LAPIC_REG(LVT_TIMER) |= LAPIC_LVT_MASKED;
-
-       /* Perfmon: masked */
-       LAPIC_REG(LVT_PERFCNT) |= LAPIC_LVT_MASKED;
-
-       /* Error: masked */
-       LAPIC_REG(LVT_ERROR) |= LAPIC_LVT_MASKED;
-
-       /* APIC software disabled */
-       LAPIC_REG(SVR) &= ~LAPIC_SVR_ENABLE;
-
-       /* Bypass the APIC completely and update cpu features */
-       rdmsr(MSR_IA32_APIC_BASE, lo, hi);
-       lo &= ~MSR_IA32_APIC_BASE_ENABLE;
-       wrmsr(MSR_IA32_APIC_BASE, lo, hi);
-       cpuid_set_info();
-
-       mp_enable_preemption();
-}
-
-void
-lapic_init(void)
-{
-       int     value;
-
-       /* Set flat delivery model, logical processor id */
-       LAPIC_REG(DFR) = LAPIC_DFR_FLAT;
-       LAPIC_REG(LDR) = (get_cpu_number()) << LAPIC_LDR_SHIFT;
-
-       /* Accept all */
-       LAPIC_REG(TPR) =  0;
-
-       LAPIC_REG(SVR) = LAPIC_VECTOR(SPURIOUS) | LAPIC_SVR_ENABLE;
-
-       /* ExtINT */
-       if (get_cpu_number() == master_cpu) {
-               value = LAPIC_REG(LVT_LINT0);
-               value &= ~LAPIC_LVT_MASKED;
-               value |= LAPIC_LVT_DM_EXTINT;
-               LAPIC_REG(LVT_LINT0) = value;
-       }
-
-       /* Timer: unmasked, one-shot */
-       LAPIC_REG(LVT_TIMER) = LAPIC_VECTOR(TIMER);
-
-       /* Perfmon: unmasked */
-       LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT);
-
-       /* Thermal: unmasked */
-       LAPIC_REG(LVT_THERMAL) = LAPIC_VECTOR(THERMAL);
-
-       lapic_esr_clear();
-
-       LAPIC_REG(LVT_ERROR) = LAPIC_VECTOR(ERROR);
-}
-
-void
-lapic_set_timer_func(i386_intr_func_t func)
-{
-       lapic_timer_func = func;
-}
-
-void
-lapic_set_timer(
-       boolean_t               interrupt,
-       lapic_timer_mode_t      mode,
-       lapic_timer_divide_t    divisor,
-       lapic_timer_count_t     initial_count)
-{
-       boolean_t       state;
-       uint32_t        timer_vector;
-
-       state = ml_set_interrupts_enabled(FALSE);
-       timer_vector = LAPIC_REG(LVT_TIMER);
-       timer_vector &= ~(LAPIC_LVT_MASKED|LAPIC_LVT_PERIODIC);;
-       timer_vector |= interrupt ? 0 : LAPIC_LVT_MASKED;
-       timer_vector |= (mode == periodic) ? LAPIC_LVT_PERIODIC : 0;
-       LAPIC_REG(LVT_TIMER) = timer_vector;
-       LAPIC_REG(TIMER_DIVIDE_CONFIG) = divisor;
-       LAPIC_REG(TIMER_INITIAL_COUNT) = initial_count;
-       ml_set_interrupts_enabled(state);
-}
-
-void
-lapic_get_timer(
-       lapic_timer_mode_t      *mode,
-       lapic_timer_divide_t    *divisor,
-       lapic_timer_count_t     *initial_count,
-       lapic_timer_count_t     *current_count)
-{
-       boolean_t       state;
-
-       state = ml_set_interrupts_enabled(FALSE);
-       if (mode)
-               *mode = (LAPIC_REG(LVT_TIMER) & LAPIC_LVT_PERIODIC) ?
-                               periodic : one_shot;
-       if (divisor)
-               *divisor = LAPIC_REG(TIMER_DIVIDE_CONFIG) & LAPIC_TIMER_DIVIDE_MASK;
-       if (initial_count)
-               *initial_count = LAPIC_REG(TIMER_INITIAL_COUNT);
-       if (current_count)
-               *current_count = LAPIC_REG(TIMER_CURRENT_COUNT);
-       ml_set_interrupts_enabled(state);
-} 
-
-void
-lapic_set_pmi_func(i386_intr_func_t func)
-{
-       lapic_pmi_func = func;
-}
-
-void
-lapic_set_thermal_func(i386_intr_func_t func)
-{
-        lapic_thermal_func = func;
-}
-
-static inline void
-_lapic_end_of_interrupt(void)
-{
-       LAPIC_REG(EOI) = 0;
-}
-
-void
-lapic_end_of_interrupt(void)
-{
-       _lapic_end_of_interrupt();
-}
-
-int
-lapic_interrupt(int interrupt, x86_saved_state_t *state)
-{
-       int     retval = 0;
-
-       /* Did we just field an interruption for the HPET comparator? */
-       if(x86_core()->HpetVec == ((uint32_t)interrupt - 0x40)) {
-               /* Yes, go handle it... */
-               retval = HPETInterrupt();
-               /* Was it really handled? */
-               if(retval) {
-                       /* If so, EOI the 'rupt */
-                       _lapic_end_of_interrupt();
-                       /*
-                        * and then leave,
-                        * indicating that this has been handled
-                        */
-                       return 1;
-               }
-       }
-
-       interrupt -= lapic_interrupt_base;
-       if (interrupt < 0) {
-               if (interrupt == (LAPIC_NMI_INTERRUPT - lapic_interrupt_base)) {
-                       retval = NMIInterruptHandler(state);
-                       _lapic_end_of_interrupt();
-                       return retval;
-               }
-               else
-                       return 0;
-       }
-
-       switch(interrupt) {
-       case LAPIC_PERFCNT_INTERRUPT:
-               if (lapic_pmi_func != NULL)
-                       (*lapic_pmi_func)(NULL);
-               /* Clear interrupt masked */
-               LAPIC_REG(LVT_PERFCNT) = LAPIC_VECTOR(PERFCNT);
-               _lapic_end_of_interrupt();
-               retval = 1;
-               break;
-       case LAPIC_TIMER_INTERRUPT:
-               _lapic_end_of_interrupt();
-               if (lapic_timer_func != NULL)
-                       (*lapic_timer_func)(state);
-               retval = 1;
-               break;
-       case LAPIC_THERMAL_INTERRUPT:
-               if (lapic_thermal_func != NULL)
-                       (*lapic_thermal_func)(NULL);
-               _lapic_end_of_interrupt();
-               retval = 1;
-               break;
-       case LAPIC_ERROR_INTERRUPT:
-               lapic_dump();
-               panic("Local APIC error\n");
-               _lapic_end_of_interrupt();
-               retval = 1;
-               break;
-       case LAPIC_SPURIOUS_INTERRUPT:
-               kprintf("SPIV\n");
-               /* No EOI required here */
-               retval = 1;
-               break;
-       case LAPIC_INTERPROCESSOR_INTERRUPT:
-               _lapic_end_of_interrupt();
-               cpu_signal_handler(state);
-               retval = 1;
-               break;
-       }
-
-       return retval;
-}
-
-void
-lapic_smm_restore(void)
+static void
+mp_wait_for_cpu_up(int slot_num, unsigned int iters, unsigned int usecdelay)
 {
-       boolean_t state;
-
-       if (lapic_os_enabled == FALSE)
-               return;
-
-       state = ml_set_interrupts_enabled(FALSE);
-
-       if (LAPIC_ISR_IS_SET(LAPIC_REDUCED_INTERRUPT_BASE, TIMER)) {
-               /*
-                * Bogus SMI handler enables interrupts but does not know about
-                * local APIC interrupt sources. When APIC timer counts down to
-                * zero while in SMM, local APIC will end up waiting for an EOI
-                * but no interrupt was delivered to the OS.
-                */
-               _lapic_end_of_interrupt();
-
-               /*
-                * timer is one-shot, trigger another quick countdown to trigger
-                * another timer interrupt.
-                */
-               if (LAPIC_REG(TIMER_CURRENT_COUNT) == 0) {
-                       LAPIC_REG(TIMER_INITIAL_COUNT) = 1;
-               }
-
-               kprintf("lapic_smm_restore\n");
+       while (iters-- > 0) {
+               if (cpu_datap(slot_num)->cpu_running)
+                       break;
+               delay(usecdelay);
        }
-
-       ml_set_interrupts_enabled(state);
 }
 
 kern_return_t
@@ -766,27 +242,23 @@ intel_startCPU(
                return KERN_SUCCESS;
        }
 
-       LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
-       LAPIC_REG(ICR) = LAPIC_ICR_DM_INIT;
+       LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
+       LAPIC_WRITE(ICR, LAPIC_ICR_DM_INIT);
        delay(10000);
 
-       LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
-       LAPIC_REG(ICR) = LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12);
+       LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
+       LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12));
        delay(200);
 
-       LAPIC_REG(ICRD) = lapic << LAPIC_ICRD_DEST_SHIFT;
-       LAPIC_REG(ICR) = LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12);
+       LAPIC_WRITE(ICRD, lapic << LAPIC_ICRD_DEST_SHIFT);
+       LAPIC_WRITE(ICR, LAPIC_ICR_DM_STARTUP|(MP_BOOT>>12));
        delay(200);
 
 #ifdef POSTCODE_DELAY
        /* Wait much longer if postcodes are displayed for a delay period. */
        i *= 10000;
 #endif
-       while(i-- > 0) {
-               if (cpu_datap(slot_num)->cpu_running)
-                       break;
-               delay(10000);
-       }
+       mp_wait_for_cpu_up(slot_num, i, 10000);
 
        mp_enable_preemption();
        mutex_unlock(&mp_cpu_boot_lock);
@@ -803,6 +275,47 @@ intel_startCPU(
        }
 }
 
+/*
+ * Quickly bring a CPU back online which has been halted.
+ */
+kern_return_t
+intel_startCPU_fast(int slot_num)
+{
+       kern_return_t   rc;
+
+       /*
+        * Try to perform a fast restart
+        */
+       rc = pmCPUExitHalt(slot_num);
+       if (rc != KERN_SUCCESS)
+               /*
+                * The CPU was not eligible for a fast restart.
+                */
+               return(rc);
+
+       /*
+        * Wait until the CPU is back online.
+        */
+       mp_disable_preemption();
+    
+       /*
+        * We use short pauses (1us) for low latency.  30,000 iterations is
+        * longer than a full restart would require so it should be more
+        * than long enough.
+        */
+       mp_wait_for_cpu_up(slot_num, 30000, 1);
+       mp_enable_preemption();
+
+       /*
+        * Check to make sure that the CPU is really running.  If not,
+        * go through the slow path.
+        */
+       if (cpu_datap(slot_num)->cpu_running)
+               return(KERN_SUCCESS);
+       else
+               return(KERN_FAILURE);
+}
+
 extern char    slave_boot_base[];
 extern char    slave_boot_end[];
 extern void    slave_pstart(void);
@@ -854,7 +367,7 @@ MP_EVENT_NAME_DECL();
 
 #endif /* MP_DEBUG */
 
-void
+int
 cpu_signal_handler(x86_saved_state_t *regs)
 {
        int             my_cpu;
@@ -924,11 +437,10 @@ cpu_signal_handler(x86_saved_state_t *regs)
 
        mp_enable_preemption();
 
+       return 0;
 }
 
-/* We want this to show up in backtraces, hence marked noinline.
- */
-static int __attribute__((noinline))
+static int
 NMIInterruptHandler(x86_saved_state_t *regs)
 {
        void    *stackptr;
@@ -936,8 +448,13 @@ NMIInterruptHandler(x86_saved_state_t *regs)
        sync_iss_to_iks_unconditionally(regs);
        __asm__ volatile("movl %%ebp, %0" : "=m" (stackptr));
 
+       if (cpu_number() == debugger_cpu)
+                       goto NMExit;
+
        if (pmap_tlb_flush_timeout == TRUE && current_cpu_datap()->cpu_tlb_invalid) {
-               panic_i386_backtrace(stackptr, 10, "Panic: Unresponsive processor\n", TRUE, regs);
+               char pstr[128];
+               snprintf(&pstr[0], sizeof(pstr), "Panic(CPU %d): Unresponsive processor\n", cpu_number());
+               panic_i386_backtrace(stackptr, 10, &pstr[0], TRUE, regs);
                panic_io_port_read();
                mca_check_save();
                if (pmsafe_debug)
@@ -947,11 +464,12 @@ NMIInterruptHandler(x86_saved_state_t *regs)
                }
        }
        mp_kdp_wait(FALSE);
+NMExit:        
        return 1;
 }
 
 #ifdef MP_DEBUG
-extern int     max_lock_loops;
+int            max_lock_loops = 1000000;
 int            trappedalready = 0;     /* (BRINGUP */
 #endif /* MP_DEBUG */
 
@@ -981,20 +499,18 @@ i386_cpu_IPI(int cpu)
        /* Wait for previous interrupt to be delivered... */
 #ifdef MP_DEBUG
        int     pending_busy_count = 0;
-       while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) {
+       while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
                if (++pending_busy_count > max_lock_loops)
                        panic("i386_cpu_IPI() deadlock\n");
 #else
-       while (LAPIC_REG(ICR) & LAPIC_ICR_DS_PENDING) {
+       while (LAPIC_READ(ICR) & LAPIC_ICR_DS_PENDING) {
 #endif /* MP_DEBUG */
                cpu_pause();
        }
 
        state = ml_set_interrupts_enabled(FALSE);
-       LAPIC_REG(ICRD) =
-               cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT;
-       LAPIC_REG(ICR)  =
-               LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED;
+       LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
+       LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_FIXED);
        (void) ml_set_interrupts_enabled(state);
 }
 
@@ -1023,13 +539,11 @@ cpu_NMI_interrupt(int cpu)
        if (smp_initialized) {
                state = ml_set_interrupts_enabled(FALSE);
 /* Program the interrupt command register */
-               LAPIC_REG(ICRD) =
-                       cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT;
+               LAPIC_WRITE(ICRD, cpu_to_lapic[cpu] << LAPIC_ICRD_DEST_SHIFT);
 /* The vector is ignored in this case--the target CPU will enter on the
  * NMI vector.
  */
-               LAPIC_REG(ICR)  =
-                       LAPIC_VECTOR(INTERPROCESSOR) | LAPIC_ICR_DM_NMI;
+               LAPIC_WRITE(ICR, LAPIC_VECTOR(INTERPROCESSOR)|LAPIC_ICR_DM_NMI);
                (void) ml_set_interrupts_enabled(state);
        }
 }
@@ -1491,7 +1005,7 @@ int       pmsafe_debug    = 1;
 
 #if    MACH_KDP
 volatile boolean_t     mp_kdp_trap = FALSE;
-volatile unsigned long         mp_kdp_ncpus;
+volatile unsigned long mp_kdp_ncpus;
 boolean_t              mp_kdp_state;
 
 
@@ -1500,7 +1014,7 @@ mp_kdp_enter(void)
 {
        unsigned int    cpu;
        unsigned int    ncpus;
-       unsigned int    my_cpu = cpu_number();
+       unsigned int    my_cpu;
        uint64_t        tsc_timeout;
 
        DBG("mp_kdp_enter()\n");
@@ -1522,6 +1036,8 @@ mp_kdp_enter(void)
                mp_kdp_wait(TRUE);
                simple_lock(&mp_kdp_lock);
        }
+       my_cpu = cpu_number();
+       debugger_cpu = my_cpu;
        mp_kdp_ncpus = 1;       /* self */
        mp_kdp_trap = TRUE;
        simple_unlock(&mp_kdp_lock);
@@ -1634,6 +1150,7 @@ void
 mp_kdp_exit(void)
 {
        DBG("mp_kdp_exit()\n");
+       debugger_cpu = -1;
        atomic_decl((volatile long *)&mp_kdp_ncpus, 1);
        mp_kdp_trap = FALSE;
        __asm__ volatile("mfence");
@@ -1776,62 +1293,91 @@ mp_kdb_exit(void)
 
 #endif /* MACH_KDB */
 
-/*
- * i386_init_slave() is called from pstart.
- * We're in the cpu's interrupt stack with interrupts disabled.
- * At this point we are in legacy mode. We need to switch on IA32e
- * if the mode is set to 64-bits.
- */
-void
-i386_init_slave(void)
+static void
+do_init_slave(boolean_t fast_restart)
 {
+       void    *init_param     = FULL_SLAVE_INIT;
+
        postcode(I386_INIT_SLAVE);
 
-       /* Ensure that caching and write-through are enabled */
-       set_cr0(get_cr0() & ~(CR0_NW|CR0_CD));
+       if (!fast_restart) {
+               /* Ensure that caching and write-through are enabled */
+               set_cr0(get_cr0() & ~(CR0_NW|CR0_CD));
 
-       DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
-               get_cpu_number(), get_cpu_phys_number());
+               DBG("i386_init_slave() CPU%d: phys (%d) active.\n",
+                   get_cpu_number(), get_cpu_phys_number());
 
-       assert(!ml_get_interrupts_enabled());
+               assert(!ml_get_interrupts_enabled());
 
-       cpu_mode_init(current_cpu_datap());
+               cpu_mode_init(current_cpu_datap());
 
-       mca_cpu_init();
+               mca_cpu_init();
 
-       lapic_init();
-       LAPIC_DUMP();
-       LAPIC_CPU_MAP_DUMP();
+               lapic_configure();
+               LAPIC_DUMP();
+               LAPIC_CPU_MAP_DUMP();
 
-       init_fpu();
+               init_fpu();
 
-       mtrr_update_cpu();
+               mtrr_update_cpu();
+       } else
+               init_param = FAST_SLAVE_INIT;
 
        /* resume VT operation */
        vmx_resume();
 
-       pat_init();
+       if (!fast_restart)
+               pat_init();
 
        cpu_thread_init();      /* not strictly necessary */
 
        cpu_init();     /* Sets cpu_running which starter cpu waits for */ 
 
-       slave_main();
+       slave_main(init_param);
 
-       panic("i386_init_slave() returned from slave_main()");
+       panic("do_init_slave() returned from slave_main()");
 }
 
+/*
+ * i386_init_slave() is called from pstart.
+ * We're in the cpu's interrupt stack with interrupts disabled.
+ * At this point we are in legacy mode. We need to switch on IA32e
+ * if the mode is set to 64-bits.
+ */
 void
-slave_machine_init(void)
+i386_init_slave(void)
+{
+       do_init_slave(FALSE);
+}
+
+/*
+ * i386_init_slave_fast() is called from pmCPUHalt.
+ * We're running on the idle thread and need to fix up
+ * some accounting and get it so that the scheduler sees this
+ * CPU again.
+ */
+void
+i386_init_slave_fast(void)
+{
+       do_init_slave(TRUE);
+}
+
+void
+slave_machine_init(void *param)
 {
        /*
         * Here in process context, but with interrupts disabled.
         */
        DBG("slave_machine_init() CPU%d\n", get_cpu_number());
 
-       clock_init();
+       if (param == FULL_SLAVE_INIT) {
+               /*
+                * Cold start
+                */
+               clock_init();
 
-       cpu_machine_init();             /* Interrupts enabled hereafter */
+               cpu_machine_init();     /* Interrupts enabled hereafter */
+       }
 }
 
 #undef cpu_number()
index 99ba34fe2441bd78ab32d1058d7f8a24e7d756ce..0fac0fbd5ff84a0fda7896014442a2b58264f0df 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -58,8 +58,8 @@
  */
 #ifdef KERNEL_PRIVATE
 
-#ifndef _I386AT_MP_H_
-#define _I386AT_MP_H_
+#ifndef _I386_MP_H_
+#define _I386_MP_H_
 
 #ifndef        DEBUG
 #include <debug.h>
 #include <i386/apic.h>
 #include <i386/mp_events.h>
 
-#define LAPIC_ID_MAX   (LAPIC_ID_MASK)
-
-#define MAX_CPUS       (LAPIC_ID_MAX + 1)
+#define MAX_CPUS       32              /* (8*sizeof(long)) */  
 
 #ifndef        ASSEMBLER
+#include <stdint.h>
 #include <sys/cdefs.h>
 #include <mach/boolean.h>
 #include <mach/kern_return.h>
 #include <mach/i386/thread_status.h>
+#include <kern/lock.h>
 
 __BEGIN_DECLS
 
 extern kern_return_t intel_startCPU(int slot_num);
+extern kern_return_t intel_startCPU_fast(int slot_num);
 extern void i386_init_slave(void);
+extern void i386_init_slave_fast(void);
 extern void smp_init(void);
 
 extern void cpu_interrupt(int cpu);
-
-extern void lapic_init(void);
-extern void lapic_shutdown(void);
-extern void lapic_smm_restore(void);
-extern boolean_t lapic_probe(void);
-extern void lapic_dump(void);
-extern int  lapic_interrupt(int interrupt, x86_saved_state_t *state);
-extern void lapic_end_of_interrupt(void);
-extern int  lapic_to_cpu[];
-extern int  cpu_to_lapic[];
-extern int  lapic_interrupt_base;
-extern void lapic_cpu_map(int lapic, int cpu_num);
-extern uint32_t ml_get_apicid(uint32_t cpu);
-
-extern void lapic_set_timer(
-               boolean_t               interrupt,
-               lapic_timer_mode_t      mode,
-               lapic_timer_divide_t    divisor,
-               lapic_timer_count_t     initial_count);
-
-extern void lapic_get_timer(
-               lapic_timer_mode_t      *mode,
-               lapic_timer_divide_t    *divisor,
-               lapic_timer_count_t     *initial_count,
-               lapic_timer_count_t     *current_count);
-
-typedef        void (*i386_intr_func_t)(void *);
-extern void lapic_set_timer_func(i386_intr_func_t func);
-extern void lapic_set_pmi_func(i386_intr_func_t func);
-extern void lapic_set_thermal_func(i386_intr_func_t func);
-
 __END_DECLS
 
-/*
- * By default, use high vectors to leave vector space for systems
- * with multiple I/O APIC's. However some systems that boot with
- * local APIC disabled will hang in SMM when vectors greater than
- * 0x5F are used. Those systems are not expected to have I/O APIC
- * so 16 (0x50 - 0x40) vectors for legacy PIC support is perfect.
- */
-#define LAPIC_DEFAULT_INTERRUPT_BASE   0xD0
-#define LAPIC_REDUCED_INTERRUPT_BASE   0x50
-/*
- * Specific lapic interrupts are relative to this base
- * in priority order from high to low:
- */
-
-#define LAPIC_PERFCNT_INTERRUPT                0xF
-#define LAPIC_TIMER_INTERRUPT          0xE
-#define LAPIC_INTERPROCESSOR_INTERRUPT 0xD
-#define LAPIC_THERMAL_INTERRUPT                0xC
-#define LAPIC_ERROR_INTERRUPT          0xB
-#define LAPIC_SPURIOUS_INTERRUPT       0xA
-/* The vector field is ignored for NMI interrupts via the LAPIC
- * or otherwise, so this is not an offset from the interrupt
- * base.
- */
-#define LAPIC_NMI_INTERRUPT            0x2
-
-#define LAPIC_REG(reg) \
-       (*((volatile uint32_t *)(lapic_start + LAPIC_##reg)))
-#define LAPIC_REG_OFFSET(reg,off) \
-       (*((volatile uint32_t *)(lapic_start + LAPIC_##reg + (off))))
-
-#define LAPIC_VECTOR(src) \
-       (lapic_interrupt_base + LAPIC_##src##_INTERRUPT)
-
-#define LAPIC_ISR_IS_SET(base,src) \
-       (LAPIC_REG_OFFSET(ISR_BASE,((base+LAPIC_##src##_INTERRUPT)/32)*0x10) & \
-               (1 <<((base + LAPIC_##src##_INTERRUPT)%32)))
-
-extern vm_offset_t     lapic_start;
-
-#endif /* ASSEMBLER */
-
-#define CPU_NUMBER(r)                          \
-       movl    %gs:CPU_NUMBER_GS,r
-
-#define CPU_NUMBER_FROM_LAPIC(r)               \
-       movl    EXT(lapic_id),r;                \
-       movl    0(r),r;                         \
-       shrl    $(LAPIC_ID_SHIFT),r;            \
-       andl    $(LAPIC_ID_MASK),r;             \
-       movl    EXT(lapic_to_cpu)(,r,4),r
-
-
-/* word describing the reason for the interrupt, one per cpu */
-
-#ifndef        ASSEMBLER
-#include <kern/lock.h>
-
 extern unsigned int    real_ncpus;             /* real number of cpus */
 extern unsigned int    max_ncpus;              /* max number of cpus */
 decl_simple_lock_data(extern,kdb_lock) /* kdb lock             */
@@ -425,6 +338,6 @@ extern cpu_signal_event_log_t       *cpu_handle[];
 #define MP_ENABLE_PREEMPTION_NO_CHECK
 #endif /* MACH_RT */
 
-#endif /* _I386AT_MP_H_ */
+#endif /* _I386_MP_H_ */
 
 #endif /* KERNEL_PRIVATE */
index 75bbe25cf81c27614e5e01648f8d09f8e01ed301..9e2df152ef445b3e77e0e2135d75d76dead27e9e 100644 (file)
@@ -94,7 +94,12 @@ extern uint32_t              low_eintstack[];        /* top */
  * The master cpu (cpu 0) has its data area statically allocated;
  * others are allocated dynamically and this array is updated at runtime.
  */
-cpu_data_t     cpu_data_master;
+cpu_data_t     cpu_data_master = {
+                       .cpu_this = &cpu_data_master,
+                       .cpu_nanotime = &rtc_nanotime_info,
+                       .cpu_is64bit = FALSE,
+                       .cpu_int_stack_top = (vm_offset_t) low_eintstack,
+               };
 cpu_data_t     *cpu_data_ptr[MAX_CPUS] = { [0] &cpu_data_master };
 
 decl_simple_lock_data(,cpu_lock);      /* protects real_ncpus */
@@ -488,9 +493,6 @@ cpu_data_alloc(boolean_t is_boot_cpu)
                if (cdp->cpu_processor == NULL) {
                        cdp->cpu_processor = cpu_processor_alloc(TRUE);
                        cdp->cpu_pmap = pmap_cpu_alloc(TRUE);
-                       cdp->cpu_this = cdp;
-                       cdp->cpu_is64bit = FALSE;
-                       cdp->cpu_int_stack_top = (vm_offset_t) low_eintstack;
                        cpu_desc_init(cdp, TRUE);
                        fast_syscall_init();
                }
@@ -566,6 +568,8 @@ cpu_data_alloc(boolean_t is_boot_cpu)
        real_ncpus++;
        simple_unlock(&cpu_lock);
 
+       cdp->cpu_nanotime = &rtc_nanotime_info;
+
        kprintf("cpu_data_alloc(%d) %p desc_table: %p "
                "ldt: %p "
                "int_stack: 0x%x-0x%x\n",
index 61e107963396ec7794bbd3e4fc83cb060aa06fe5..1dffe6d593a99352bd81cf8daf2e172ba499f6f5 100644 (file)
@@ -32,7 +32,7 @@
 #include <i386/perfmon.h>
 #include <i386/proc_reg.h>
 #include <i386/cpu_threads.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
 #include <i386/cpuid.h>
 #include <i386/lock.h>
 #include <vm/vm_kern.h>
@@ -260,7 +260,7 @@ _pmc_machine_type(void)
 static void
 pmc_p4_intr(void *state)
 {
-       pmc_table_t     *pmc_table = (pmc_table_t *) x86_core()->pmc;
+       pmc_table_t     *pmc_table = (pmc_table_t *) x86_lcpu()->pmc;
        uint32_t        cccr_addr;
        pmc_cccr_t      cccr;
        pmc_id_t        id;
@@ -300,7 +300,7 @@ pmc_p4_intr(void *state)
 static void
 pmc_p6_intr(void *state)
 {
-       pmc_table_t     *pmc_table = (pmc_table_t *) x86_core()->pmc;
+       pmc_table_t     *pmc_table = (pmc_table_t *) x86_lcpu()->pmc;
        pmc_id_t        id;
 
        /*
@@ -315,7 +315,7 @@ pmc_p6_intr(void *state)
 static void
 pmc_core_intr(void *state)
 {
-       pmc_table_t     *pmc_table = (pmc_table_t *) x86_core()->pmc;
+       pmc_table_t     *pmc_table = (pmc_table_t *) x86_lcpu()->pmc;
        pmc_id_t        id;
        pmc_global_status_t     ovf_status;
 
@@ -367,7 +367,7 @@ pmc_alloc(void)
                pmc_table->id_max = 17;
                pmc_table->msr_counter_base = MSR_COUNTER_ADDR(0);
                pmc_table->msr_control_base = MSR_CCCR_ADDR(0);
-               lapic_set_pmi_func(&pmc_p4_intr);
+               lapic_set_pmi_func((i386_intr_func_t) &pmc_p4_intr);
                break;
        case pmc_Core:
                pmc_table->id_max = 1;
@@ -376,13 +376,13 @@ pmc_alloc(void)
                pmc_table->Core.msr_global_ctrl = MSR_PERF_GLOBAL_CTRL;
                pmc_table->Core.msr_global_ovf_ctrl = MSR_PERF_GLOBAL_OVF_CTRL;
                pmc_table->Core.msr_global_status = MSR_PERF_GLOBAL_STATUS;
-               lapic_set_pmi_func(&pmc_core_intr);
+               lapic_set_pmi_func((i386_intr_func_t) &pmc_core_intr);
                break;
        case pmc_P6:
                pmc_table->id_max = 1;
                pmc_table->msr_counter_base = MSR_P6_COUNTER_ADDR(0);
                pmc_table->msr_control_base = MSR_P6_PES_ADDR(0);
-               lapic_set_pmi_func(&pmc_p6_intr);
+               lapic_set_pmi_func((i386_intr_func_t) &pmc_p6_intr);
                break;
        default:
                break;
@@ -398,12 +398,12 @@ pmc_alloc(void)
 static inline pmc_table_t *
 pmc_table_valid(pmc_id_t id)
 {
-       x86_core_t      *my_core = x86_core();
+       x86_lcpu_t      *my_lcpu = x86_lcpu();
        pmc_table_t     *pmc;
 
-       assert(my_core != NULL);
+       assert(my_lcpu != NULL);
        
-       pmc = (pmc_table_t *) my_core->pmc;
+       pmc = (pmc_table_t *) my_lcpu->pmc;
        if ((pmc == NULL) ||
            (id > pmc->id_max) ||
            (pmc->machine_type == pmc_P4_Xeon && !pmc->P4.reserved[id]) ||
@@ -416,12 +416,12 @@ pmc_table_valid(pmc_id_t id)
 int
 pmc_machine_type(pmc_machine_t *type)
 {
-       x86_core_t      *my_core = x86_core();
+       x86_lcpu_t      *my_lcpu = x86_lcpu();
        pmc_table_t     *pmc_table;
 
-       assert(my_core != NULL);
+       assert(my_lcpu != NULL);
 
-       pmc_table = (pmc_table_t *) my_core->pmc;
+       pmc_table = (pmc_table_t *) my_lcpu->pmc;
        if (pmc_table == NULL)
                return KERN_FAILURE;
 
@@ -433,12 +433,12 @@ pmc_machine_type(pmc_machine_t *type)
 int
 pmc_reserve(pmc_id_t id)
 {
-       x86_core_t      *my_core = x86_core();
+       x86_lcpu_t      *my_lcpu = x86_lcpu();
        pmc_table_t     *pmc_table;
 
-       assert(my_core != NULL);
+       assert(my_lcpu != NULL);
 
-       pmc_table = (pmc_table_t *) my_core->pmc;
+       pmc_table = (pmc_table_t *) my_lcpu->pmc;
        if (pmc_table == NULL)
                return KERN_FAILURE;
        if (id > pmc_table->id_max)
index 1f12073fb5a2e7e543010031cb2d3f5afa5311b1..d2efc8bc9bf03cf7b08296da4861342d5c10e7d2 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2004-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2004-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -31,6 +31,7 @@
  *
  * Implements the "wrappers" to the KEXT.
  */
+#include <kern/machine.h>
 #include <i386/machine_routines.h>
 #include <i386/machine_cpu.h>
 #include <i386/misc_protos.h>
 #include <i386/pmCPU.h>
 #include <i386/cpuid.h>
 #include <i386/rtclock.h>
+#include <kern/sched_prim.h>
+
+/*
+ * Kernel parameter determining whether threads are halted unconditionally
+ * in the idle state.  This is the default behavior.
+ * See machine_idle() for use.
+ */
+int idlehalt                                   = 1;
 
 extern int disableConsoleOutput;
 
@@ -54,185 +63,8 @@ decl_simple_lock_data(,pm_init_lock);
  */
 pmDispatch_t   *pmDispatch     = NULL;
 
-/*
- * Current power management states (for use until KEXT is loaded).
- */
-static pmInitState_t   pmInitState;
-
 static uint32_t                pmInitDone      = 0;
 
-/*
- * Nap control variables:
- */
-uint32_t forcenap = 0;                 /* Force nap (fn) boot-arg controls */
-
-/*
- * Do any initialization needed
- */
-void
-pmsInit(void)
-{
-    static int         initialized     = 0;
-
-    /*
-     * Initialize some of the initial state to "uninitialized" until
-     * it gets set with something more useful.  This allows the KEXT
-     * to determine if the initial value was actually set to something.
-     */
-    if (!initialized) {
-       pmInitState.PState = -1;
-       pmInitState.PLimit = -1;
-       pmInitState.maxBusDelay = -1;
-       initialized = 1;
-    }
-
-    if (pmDispatch != NULL && pmDispatch->pmsInit != NULL)
-       (*pmDispatch->pmsInit)();
-}
-
-/*
- * Start the power management stepper on all processors
- *
- * All processors must be parked.  This should be called when the hardware
- * is ready to step.  Probably only at boot and after wake from sleep.
- *
- */
-void
-pmsStart(void)
-{
-    if (pmDispatch != NULL && pmDispatch->pmsStart != NULL)
-       (*pmDispatch->pmsStart)();
-}
-
-/*
- * Park the stepper execution.  This will force the stepper on this
- * processor to abandon its current step and stop.  No changes to the
- * hardware state is made and any previous step is lost.
- *     
- * This is used as the initial state at startup and when the step table
- * is being changed.
- *
- */
-void
-pmsPark(void)
-{
-    if (pmDispatch != NULL && pmDispatch->pmsPark != NULL)
-       (*pmDispatch->pmsPark)();
-}
-
-/*
- * Control the Power Management Stepper.
- * Called from user state by the superuser.
- * Interrupts disabled.
- *
- * This interface is deprecated and is now a no-op.
- */
-kern_return_t
-pmsControl(__unused uint32_t request, __unused user_addr_t reqaddr,
-          __unused uint32_t reqsize)
-{
-    return(KERN_SUCCESS);
-}
-
-/*
- * Broadcast a change to all processors including ourselves.
- *
- * Interrupts disabled.
- */
-void
-pmsRun(uint32_t nstep)
-{
-    if (pmDispatch != NULL && pmDispatch->pmsRun != NULL)
-       (*pmDispatch->pmsRun)(nstep);
-}
-
-/*
- * Build the tables needed for the stepper.  This includes both the step
- * definitions and the step control table.
- *
- * We most absolutely need to be parked before this happens because we're
- * going to change the table.  We also have to be complte about checking
- * for errors.  A copy is always made because we don't want to be crippled
- * by not being able to change the table or description formats.
- *
- * We pass in a table of external functions and the new stepper def uses
- * the corresponding indexes rather than actual function addresses.  This
- * is done so that a proper table can be built with the control syscall.
- * It can't supply addresses, so the index has to do.  We internalize the
- * table so our caller does not need to keep it.  Note that passing in a 0
- * will use the current function table.  Also note that entry 0 is reserved
- * and must be 0, we will check and fail the build.
- *
- * The platformData parameter is a 32-bit word of data that is passed unaltered
- * to the set function.
- *
- * The queryFunc parameter is the address of a function that will return the
- * current state of the platform. The format of the data returned is the same
- * as the platform specific portions of pmsSetCmd, i.e., pmsXClk, pmsVoltage,
- * and any part of pmsPowerID that is maintained by the platform hardware
- * (an example would be the values of the gpios that correspond to pmsPowerID).
- * The value should be constructed by querying hardware rather than returning
- * a value cached by software. One of the intents of this function is to help
- * recover lost or determine initial power states.
- *
- */
-kern_return_t
-pmsBuild(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab,
-        uint32_t platformData, pmsQueryFunc_t queryFunc)
-{
-    kern_return_t      rc      = 0;
-
-    if (pmDispatch != NULL && pmDispatch->pmsBuild != NULL)
-       rc = (*pmDispatch->pmsBuild)(pd, pdsize, functab,
-                                    platformData, queryFunc);
-
-    return(rc);
-}
-
-
-/*
- * Load a new ratio/VID table.
- *
- * Note that this interface is specific to the Intel SpeedStep implementation.
- * It is expected that this will only be called once to override the default
- * ratio/VID table when the platform starts.
- *
- * Normally, the table will need to be replaced at the same time that the
- * stepper program proper is replaced, as the PState indices from an old
- * program may no longer be valid.  When replacing the default program this
- * should not be a problem as any new table will have at least two PState
- * entries and the default program only references P0 and P1.
- */
-kern_return_t
-pmsCPULoadVIDTable(uint16_t *tablep, int nstates)
-{
-    if (pmDispatch != NULL && pmDispatch->pmsCPULoadVIDTable != NULL)
-       return((*pmDispatch->pmsCPULoadVIDTable)(tablep, nstates));
-    else {
-       int     i;
-
-       if (nstates > MAX_PSTATES)
-           return(KERN_FAILURE);
-
-       for (i = 0; i < nstates; i += 1)
-           pmInitState.VIDTable[i] = tablep[i];
-    }
-    return(KERN_SUCCESS);
-}
-
-/*
- * Set the (global) PState limit.  CPUs will not be permitted to run at
- * a lower (more performant) PState than this.
- */
-kern_return_t
-pmsCPUSetPStateLimit(uint32_t limit)
-{
-    if (pmDispatch != NULL && pmDispatch->pmsCPUSetPStateLimit != NULL)
-       return((*pmDispatch->pmsCPUSetPStateLimit)(limit));
-
-    pmInitState.PLimit = limit;
-    return(KERN_SUCCESS);
-}
 
 /*
  * Initialize the Cstate change code.
@@ -255,62 +87,55 @@ power_management_init(void)
 }
 
 /*
- * ACPI calls the following routine to set/update mwait hints.  A table
- * (possibly null) specifies the available Cstates and their hints, all
- * other states are assumed to be invalid.  ACPI may update available
- * states to change the nap policy (for example, while AC power is
- * available).
+ * Called when the CPU is idle.  It calls into the power management kext
+ * to determine the best way to idle the CPU.
  */
-kern_return_t
-Cstate_table_set(Cstate_hint_t *tablep, unsigned int nstates)
+void
+machine_idle(void)
 {
-    if (forcenap)
-       return(KERN_SUCCESS);
+    cpu_data_t         *my_cpu         = current_cpu_datap();
 
-    if (pmDispatch != NULL && pmDispatch->cstateTableSet != NULL)
-       return((*pmDispatch->cstateTableSet)(tablep, nstates));
-    else {
-       unsigned int    i;
+    if (my_cpu == NULL)
+       goto out;
 
-       for (i = 0; i < nstates; i += 1) {
-           pmInitState.CStates[i].number = tablep[i].number;
-           pmInitState.CStates[i].hint   = tablep[i].hint;
-       }
+    /*
+     * If idlehalt isn't set, then don't do any power management related
+     * idle handling.
+     */
+    if (!idlehalt)
+       goto out;
+
+    my_cpu->lcpu.state = LCPU_IDLE;
+    DBGLOG(cpu_handle, cpu_number(), MP_IDLE);
+    MARK_CPU_IDLE(cpu_number());
 
-       pmInitState.CStatesCount = nstates;
+    if (pmInitDone
+       && pmDispatch != NULL
+       && pmDispatch->cstateMachineIdle != NULL)
+       (*pmDispatch->cstateMachineIdle)(0x7FFFFFFFFFFFFFFFULL);
+    else {
+       /*
+        * If no power management, re-enable interrupts and halt.
+        * This will keep the CPU from spinning through the scheduler
+        * and will allow at least some minimal power savings (but it
+        * cause problems in some MP configurations w.r.t. the APIC
+        * stopping during a GV3 transition).
+        */
+       __asm__ volatile ("sti; hlt");
     }
-    return(KERN_SUCCESS);
-}
 
-/*
- * Called when the CPU is idle.  It will choose the best C state to
- * be in.
- */
-void
-machine_idle_cstate(boolean_t halted)
-{
-       if (pmInitDone
-           && pmDispatch != NULL
-           && pmDispatch->cstateMachineIdle != NULL)
-               (*pmDispatch->cstateMachineIdle)(!halted ?
-                                                0x7FFFFFFFFFFFFFFFULL : 0ULL);
-       else if (halted) {
-           /*
-            * If no power managment and a processor is taken off-line,
-            * then invalidate the cache and halt it (it will not be able
-            * to be brought back on-line without resetting the CPU).
-            */
-           __asm__ volatile ( "wbinvd; hlt" );
-       } else {
-           /*
-            * If no power management, re-enable interrupts and halt.
-            * This will keep the CPU from spinning through the scheduler
-            * and will allow at least some minimal power savings (but it
-            * may cause problems in some MP configurations w.r.t to the
-            * APIC stopping during a P-State transition).
-            */
-           __asm__ volatile ( "sti; hlt" );
-       }
+    /*
+     * Mark the CPU as running again.
+     */
+    MARK_CPU_ACTIVE(cpu_number());
+    DBGLOG(cpu_handle, cpu_number(), MP_UNIDLE);
+    my_cpu->lcpu.state = LCPU_RUN;
+
+    /*
+     * Re-enable interrupts.
+     */
+  out:
+    __asm__ volatile("sti");
 }
 
 /*
@@ -320,13 +145,16 @@ machine_idle_cstate(boolean_t halted)
 void
 pmCPUHalt(uint32_t reason)
 {
+    cpu_data_t *cpup   = current_cpu_datap();
 
     switch (reason) {
     case PM_HALT_DEBUG:
+       cpup->lcpu.state = LCPU_PAUSE;
        __asm__ volatile ("wbinvd; hlt");
        break;
 
     case PM_HALT_PANIC:
+       cpup->lcpu.state = LCPU_PAUSE;
        __asm__ volatile ("cli; wbinvd; hlt");
        break;
 
@@ -337,31 +165,40 @@ pmCPUHalt(uint32_t reason)
        if (pmInitDone
            && pmDispatch != NULL
            && pmDispatch->pmCPUHalt != NULL) {
+           /*
+            * Halt the CPU (and put it in a low power state.
+            */
            (*pmDispatch->pmCPUHalt)();
-       } else {
-           cpu_data_t  *cpup   = current_cpu_datap();
 
+           /*
+            * We've exited halt, so get the the CPU schedulable again.
+            */
+           i386_init_slave_fast();
+
+           panic("init_slave_fast returned");
+       } else {
            /*
             * If no power managment and a processor is taken off-line,
             * then invalidate the cache and halt it (it will not be able
             * to be brought back on-line without resetting the CPU).
             */
            __asm__ volatile ("wbinvd");
-           cpup->lcpu.halted = TRUE;
+           cpup->lcpu.state = LCPU_HALT;
            __asm__ volatile ( "wbinvd; hlt" );
+
+           panic("back from Halt");
        }
        break;
     }
 }
 
-/*
- * Called to initialize the power management structures for the CPUs.
- */
 void
-pmCPUStateInit(void)
+pmMarkAllCPUsOff(void)
 {
-    if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
-       (*pmDispatch->pmCPUStateInit)();
+    if (pmInitDone
+       && pmDispatch != NULL
+       && pmDispatch->markAllCPUsOff != NULL)
+       (*pmDispatch->markAllCPUsOff)();
 }
 
 static void
@@ -398,6 +235,20 @@ pmGetMyCore(void)
     return(cpup->lcpu.core);
 }
 
+static x86_die_t *
+pmGetDie(int cpu)
+{
+    return(cpu_to_die(cpu));
+}
+
+static x86_die_t *
+pmGetMyDie(void)
+{
+    cpu_data_t *cpup   = current_cpu_datap();
+
+    return(cpup->lcpu.die);
+}
+
 static x86_pkg_t *
 pmGetPackage(int cpu)
 {
@@ -409,7 +260,7 @@ pmGetMyPackage(void)
 {
     cpu_data_t *cpup   = current_cpu_datap();
 
-    return(cpup->lcpu.core->package);
+    return(cpup->lcpu.package);
 }
 
 static void
@@ -484,29 +335,43 @@ pmCPUExitIdle(cpu_data_t *cpu)
     return(do_ipi);
 }
 
+kern_return_t
+pmCPUExitHalt(int cpu)
+{
+    kern_return_t      rc      = KERN_INVALID_ARGUMENT;
+
+    if (pmInitDone
+       && pmDispatch != NULL
+       && pmDispatch->exitHalt != NULL)
+       rc = pmDispatch->exitHalt(cpu_to_lcpu(cpu));
+
+    return(rc);
+}
+
 /*
- * Called when a CPU is being restarted after being powered off (as in S3).
+ * Called to initialize the power management structures for the CPUs.
  */
 void
-pmCPUMarkRunning(cpu_data_t *cpu)
+pmCPUStateInit(void)
 {
-    if (pmInitDone
-       && pmDispatch != NULL
-       && pmDispatch->markCPURunning != NULL)
-       (*pmDispatch->markCPURunning)(&cpu->lcpu);
+    if (pmDispatch != NULL && pmDispatch->pmCPUStateInit != NULL)
+       (*pmDispatch->pmCPUStateInit)();
 }
 
 /*
- * Called from the HPET interrupt handler to perform the
- * necessary power management work.
+ * Called when a CPU is being restarted after being powered off (as in S3).
  */
 void
-pmHPETInterrupt(void)
+pmCPUMarkRunning(cpu_data_t *cpu)
 {
+    cpu_data_t *cpup   = current_cpu_datap();
+
     if (pmInitDone
        && pmDispatch != NULL
-       && pmDispatch->HPETInterrupt != NULL)
-       (*pmDispatch->HPETInterrupt)();
+       && pmDispatch->markCPURunning != NULL)
+       (*pmDispatch->markCPURunning)(&cpu->lcpu);
+    else
+       cpup->lcpu.state = LCPU_RUN;
 }
 
 /*
@@ -524,6 +389,30 @@ pmCPUControl(uint32_t cmd, void *datap)
     return(rc);
 }
 
+/*
+ * Called to save the timer state used by power management prior
+ * to "sleeping".
+ */
+void
+pmTimerSave(void)
+{
+    if (pmDispatch != NULL
+       && pmDispatch->pmTimerStateSave != NULL)
+       (*pmDispatch->pmTimerStateSave)();
+}
+
+/*
+ * Called to restore the timer state used by power management after
+ * waking from "sleep".
+ */
+void
+pmTimerRestore(void)
+{
+    if (pmDispatch != NULL
+       && pmDispatch->pmTimerStateRestore != NULL)
+       (*pmDispatch->pmTimerStateRestore)();
+}
+
 /*
  * Set the worst-case time for the C4 to C2 transition.
  * No longer does anything.
@@ -578,8 +467,29 @@ ml_set_maxbusdelay(uint32_t mdelay)
     if (pmDispatch != NULL
        && pmDispatch->setMaxBusDelay != NULL)
        pmDispatch->setMaxBusDelay(maxdelay);
-    else
-       pmInitState.maxBusDelay = maxdelay;
+}
+
+uint64_t
+ml_get_maxintdelay(void)
+{
+    uint64_t   max_delay       = 0;
+
+    if (pmDispatch != NULL
+       && pmDispatch->getMaxIntDelay != NULL)
+       max_delay = pmDispatch->getMaxIntDelay();
+
+    return(max_delay);
+}
+
+/*
+ * Set the maximum delay allowed for an interrupt.
+ */
+void
+ml_set_maxintdelay(uint64_t mdelay)
+{
+    if (pmDispatch != NULL
+       && pmDispatch->setMaxIntDelay != NULL)
+       pmDispatch->setMaxIntDelay(mdelay);
 }
 
 /*
@@ -602,15 +512,14 @@ pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
         * We only look at the PAUSE and RESUME flags.  The other flag(s)
         * will not make any sense without the KEXT, so just ignore them.
         *
-        * We set the halted flag in the LCPU structure to indicate
-        * that this CPU isn't to do anything.  If it's the CPU we're
-        * currently running on, then spin until the halted flag is
-        * reset.
+        * We set the CPU's state to indicate that it's halted.  If this
+        * is the CPU we're currently running on, then spin until the
+        * state becomes non-halted.
         */
        if (flags & PM_SAFE_FL_PAUSE) {
-           lcpu->halted = TRUE;
+           lcpu->state = LCPU_PAUSE;
            if (lcpu == x86_lcpu()) {
-               while (lcpu->halted)
+               while (lcpu->state == LCPU_PAUSE)
                    cpu_pause();
            }
        }
@@ -620,7 +529,7 @@ pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags)
         * get it out of it's spin loop.
         */
        if (flags & PM_SAFE_FL_RESUME) {
-           lcpu->halted = FALSE;
+           lcpu->state = LCPU_RUN;
        }
     }
 }
@@ -657,21 +566,23 @@ pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
               pmCallBacks_t *callbacks)
 {
     if (callbacks != NULL && version == PM_DISPATCH_VERSION) {
-       callbacks->InitState   = &pmInitState;
        callbacks->setRTCPop   = setPop;
        callbacks->resyncDeadlines = etimer_resync_deadlines;
        callbacks->initComplete= pmInitComplete;
        callbacks->GetLCPU     = pmGetLogicalCPU;
        callbacks->GetCore     = pmGetCore;
+       callbacks->GetDie      = pmGetDie;
        callbacks->GetPackage  = pmGetPackage;
        callbacks->GetMyLCPU   = pmGetMyLogicalCPU;
        callbacks->GetMyCore   = pmGetMyCore;
+       callbacks->GetMyDie    = pmGetMyDie;
        callbacks->GetMyPackage= pmGetMyPackage;
-       callbacks->CoresPerPkg = cpuid_info()->cpuid_cores_per_package;
        callbacks->GetPkgRoot  = pmGetPkgRoot;
        callbacks->LockCPUTopology = pmLockCPUTopology;
        callbacks->GetHibernate    = pmCPUGetHibernate;
        callbacks->LCPUtoProcessor = pmLCPUtoProcessor;
+       callbacks->ThreadBind      = thread_bind;
+       callbacks->topoParms       = &topoParms;
     }
 
     if (cpuFuncs != NULL) {
@@ -690,3 +601,42 @@ pmUnRegister(pmDispatch_t *cpuFuncs)
     }
 }
 
+/******************************************************************************
+ *
+ * All of the following are deprecated interfaces and no longer used.
+ *
+ ******************************************************************************/
+kern_return_t
+pmsControl(__unused uint32_t request, __unused user_addr_t reqaddr,
+          __unused uint32_t reqsize)
+{
+    return(KERN_SUCCESS);
+}
+
+void
+pmsInit(void)
+{
+}
+
+void
+pmsStart(void)
+{
+}
+
+void
+pmsPark(void)
+{
+}
+
+void
+pmsRun(__unused uint32_t nstep)
+{
+}
+
+kern_return_t
+pmsBuild(__unused pmsDef *pd, __unused uint32_t pdsize,
+        __unused pmsSetFunc_t *functab,
+        __unused uint32_t platformData, __unused pmsQueryFunc_t queryFunc)
+{
+    return(KERN_SUCCESS);
+}
index 1ef88c3fe5399bfbf8f6ade937fbf965f3c22909..ca3072b2a301e5493713d0feed7c80cb34111e68 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2006-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2006-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #ifndef _I386_PMCPU_H_
 #define _I386_PMCPU_H_
 
-#include <kern/pms.h>
 #include <i386/cpu_topology.h>
 
 #ifndef ASSEMBLER
 
-#define MAX_PSTATES    32                      /* architectural limit */
-
-typedef enum
-{
-    Cn1, Cn2, Cn3, Cn4, Cn5, Cn6, CnHlt, Cn0, CnRun, Cnmax
-} Cstate_number_t;
-
-typedef struct
-{
-       Cstate_number_t number;
-       uint32_t        hint;
-} Cstate_hint_t;
-
-
-struct pmData {
-       uint8_t pad[93];
-};
-typedef struct         pmData pmData_t;
-
-#define pmNapHalt      0x00000010
-#define pmNapC1                0x00000008
-#define pmNapC2                0x00000004
-#define pmNapC3                0x00000002
-#define pmNapC4                0x00000001
-#define pmNapMask      0x000000FF
-
-#define cfgAdr                 0xCF8
-#define cfgDat                 0xCFC
-#define lpcCfg                 (0x80000000 | (0 << 16) | (31 << 11) | (0 << 8))
-
 /*
  * This value should be changed each time that pmDsipatch_t or pmCallBacks_t
  * changes.
  */
-#define PM_DISPATCH_VERSION    7
+#define PM_DISPATCH_VERSION    12
 
 /*
  * Dispatch table for functions that get installed when the power
@@ -76,79 +45,55 @@ typedef struct      pmData pmData_t;
  */
 typedef struct
 {
-    /*
-     * The following are the stepper table interfaces.
-     */
     int                        (*pmCPUStateInit)(void);
-    void               (*pmsInit)(void);
-    void               (*pmsStart)(void);
-    void               (*pmsPark)(void);
-    kern_return_t      (*pmsCPUSetPStateLimit)(uint32_t limit);
-
-    /*
-     * The following are legacy stepper interfaces.
-     */
-    void               (*pmsRun)(uint32_t nstep);
-    kern_return_t      (*pmsBuild)(pmsDef *pd, uint32_t pdsize, pmsSetFunc_t *functab, uint32_t platformData, pmsQueryFunc_t queryFunc);
-    kern_return_t      (*pmsCPULoadVIDTable)(uint16_t *tablep, int nstates);
 
     /*
      * The following are the 'C' State interfaces.
      */
     void               (*cstateInit)(void);
     uint64_t           (*cstateMachineIdle)(uint64_t maxIdleDuration);
-    kern_return_t      (*cstateTableSet)(Cstate_hint_t *tablep, unsigned int nstates);
     uint64_t           (*GetDeadline)(x86_lcpu_t *lcpu);
     uint64_t           (*SetDeadline)(x86_lcpu_t *lcpu, uint64_t);
     void               (*Deadline)(x86_lcpu_t *lcpu);
     boolean_t          (*exitIdle)(x86_lcpu_t *lcpu);
     void               (*markCPURunning)(x86_lcpu_t *lcpu);
-    void               (*HPETInterrupt)(void);
     int                        (*pmCPUControl)(uint32_t cmd, void *datap);
     void               (*pmCPUHalt)(void);
     uint64_t           (*getMaxSnoop)(void);
     void               (*setMaxBusDelay)(uint64_t time);
     uint64_t           (*getMaxBusDelay)(void);
+    void               (*setMaxIntDelay)(uint64_t time);
+    uint64_t           (*getMaxIntDelay)(void);
     void               (*pmCPUSafeMode)(x86_lcpu_t *lcpu, uint32_t flags);
+    void               (*pmTimerStateSave)(void);
+    void               (*pmTimerStateRestore)(void);
+    kern_return_t      (*exitHalt)(x86_lcpu_t *lcpu);
+    void               (*markAllCPUsOff)(void);
 } pmDispatch_t;
 
 typedef struct {
-    uint32_t           PState;
-    uint32_t           PLimit;
-    uint16_t           VIDTable[MAX_PSTATES];
-    uint32_t           VIDTableCount;
-    Cstate_hint_t      CStates[Cnmax];
-    uint32_t           CStatesCount;
-    uint64_t           maxBusDelay;
-} pmInitState_t;
-
-typedef struct {
-    uint64_t           *(*HPETAddr)(void);
-    pmInitState_t      *InitState;
     int                        (*setRTCPop)(uint64_t time);
     void               (*resyncDeadlines)(void);
     void               (*initComplete)(void);
     x86_lcpu_t         *(*GetLCPU)(int cpu);
     x86_core_t         *(*GetCore)(int cpu);
+    x86_die_t          *(*GetDie)(int cpu);
     x86_pkg_t          *(*GetPackage)(int cpu);
     x86_lcpu_t         *(*GetMyLCPU)(void);
     x86_core_t         *(*GetMyCore)(void);
+    x86_die_t          *(*GetMyDie)(void);
     x86_pkg_t          *(*GetMyPackage)(void);
-    uint32_t           CoresPerPkg;
     x86_pkg_t          *(*GetPkgRoot)(void);
     void               (*LockCPUTopology)(int lock);
     boolean_t          (*GetHibernate)(int cpu);
     processor_t                (*LCPUtoProcessor)(int lcpu);
+    processor_t                (*ThreadBind)(processor_t proc);
+    x86_topology_parameters_t  *topoParms;
 } pmCallBacks_t;
 
 extern pmDispatch_t    *pmDispatch;
 
-extern uint32_t                forcenap;
-
 void power_management_init(void);
-void machine_nap_policy(void);
-kern_return_t Cstate_table_set(Cstate_hint_t *tablep, unsigned int nstates);
-void machine_idle_cstate(boolean_t halted);
 void pmKextRegister(uint32_t version, pmDispatch_t *cpuFuncs,
                    pmCallBacks_t *callbacks);
 void pmUnRegister(pmDispatch_t *cpuFuncs);
@@ -158,13 +103,17 @@ uint64_t pmCPUSetDeadline(struct cpu_data *cpu, uint64_t deadline);
 void pmCPUDeadline(struct cpu_data *cpu);
 boolean_t pmCPUExitIdle(struct cpu_data *cpu);
 void pmCPUMarkRunning(struct cpu_data *cpu);
-void pmHPETInterrupt(void);
+void pmMarkAllCPUsOff(void);
 int pmCPUControl(uint32_t cmd, void *datap);
 void pmCPUHalt(uint32_t reason);
+void pmTimerSave(void);
+void pmTimerRestore(void);
+kern_return_t pmCPUExitHalt(int cpu);
 
 #define PM_HALT_NORMAL         0               /* normal halt path */
 #define PM_HALT_DEBUG          1               /* debug code wants to halt */
 #define PM_HALT_PANIC          2               /* panic code wants to halt */
+#define PM_HALT_SLEEP          3               /* sleep code wants to halt */
 
 void pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags);
 
@@ -174,6 +123,14 @@ void pmSafeMode(x86_lcpu_t *lcpu, uint32_t flags);
 #define PM_SAFE_FL_RESUME      0x00000020      /* resume execution on the CPU */
 
 extern int pmsafe_debug;
+extern int idlehalt;
+
+/******************************************************************************
+ *
+ * All of the following are deprecated interfaces and no longer used.
+ *
+ ******************************************************************************/
+
 
 #endif /* ASSEMBLER */
 
index b839471934efd31a9a0cfdaeae2db6b869ae15ec..a424d7e11e08ea1bd38f4966a234bd758cd984a7 100644 (file)
@@ -1215,7 +1215,7 @@ pmap_bootstrap(
 
        virtual_avail = va;
 
-       if (PE_parse_boot_arg("npvhash", &npvhash)) {
+       if (PE_parse_boot_argn("npvhash", &npvhash, sizeof (npvhash))) {
          if (0 != ((npvhash+1) & npvhash)) {
            kprintf("invalid hash %d, must be ((2^N)-1), using default %d\n",npvhash,NPVHASH);
            npvhash = NPVHASH;
@@ -1226,7 +1226,7 @@ pmap_bootstrap(
        printf("npvhash=%d\n",npvhash);
 
        wpkernel = 1;
-       if (PE_parse_boot_arg("wpkernel", &boot_arg)) {
+       if (PE_parse_boot_argn("wpkernel", &boot_arg, sizeof (boot_arg))) {
                if (boot_arg == 0)
                        wpkernel = 0;
        }
@@ -1331,12 +1331,12 @@ pmap_bootstrap(
         * By default for 64-bit users loaded at 4GB, share kernel mapping.
         * But this may be overridden by the -no_shared_cr3 boot-arg.
         */
-       if (PE_parse_boot_arg("-no_shared_cr3", &no_shared_cr3)) {
+       if (PE_parse_boot_argn("-no_shared_cr3", &no_shared_cr3, sizeof (no_shared_cr3))) {
                kprintf("Shared kernel address space disabled\n");
        }       
 
 #ifdef PMAP_TRACES
-       if (PE_parse_boot_arg("-pmap_trace", &pmap_trace)) {
+       if (PE_parse_boot_argn("-pmap_trace", &pmap_trace, sizeof (pmap_trace))) {
                kprintf("Kernel traces for pmap operations enabled\n");
        }       
 #endif /* PMAP_TRACES */
@@ -4573,8 +4573,10 @@ pmap_flush_tlbs(pmap_t   pmap)
                 */
                while (cpus_to_respond != 0) {
                        if (mach_absolute_time() > deadline) {
-                               pmap_tlb_flush_timeout = TRUE;
-                               pmap_cpuset_NMIPI(cpus_to_respond);
+                               if (!panic_active()) {
+                                       pmap_tlb_flush_timeout = TRUE;
+                                       pmap_cpuset_NMIPI(cpus_to_respond);
+                               }
                                panic("pmap_flush_tlbs() timeout: "
                                    "cpu(s) failing to respond to interrupts, pmap=%p cpus_to_respond=0x%lx",
                                    pmap, cpus_to_respond);
index ee5fe6de1673e8452bfff5bd5c3d0e2af8aa5071..a74ca7548c8ac314fbac6f0aca297f2fbf03763d 100644 (file)
@@ -275,7 +275,7 @@ static inline void invlpg(unsigned long addr)
        __asm__ volatile("wrmsr" : : "c" (msr), "a" (lo), "d" (hi))
 
 #define rdtsc(lo,hi) \
-       __asm__ volatile("rdtsc" : "=a" (lo), "=d" (hi))
+       __asm__ volatile("rdtsc; lfence" : "=a" (lo), "=d" (hi))
 
 #define write_tsc(lo,hi) wrmsr(0x10, lo, hi)
 
@@ -297,7 +297,7 @@ static inline void wrmsr64(uint32_t msr, uint64_t val)
 static inline uint64_t rdtsc64(void)
 {
        uint64_t ret;
-       __asm__ volatile("rdtsc" : "=A" (ret));
+       __asm__ volatile("rdtsc; lfence" : "=A" (ret));
        return ret;
 }
 
@@ -410,4 +410,6 @@ __END_DECLS
 #define MSR_IA32_GS_BASE       0xC0000101
 #define MSR_IA32_KERNEL_GS_BASE        0xC0000102
 
+#define MSR_IA32_BIOS_SIGN_ID  0x08B
+
 #endif /* _I386_PROC_REG_H_ */
index a1784f3bfcc9dc2d4116ba74807a08e921366e46..982c160f4a2749674dce28ede90c26fa0079d72f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -60,7 +60,7 @@
 #include <i386/misc_protos.h>
 #include <i386/proc_reg.h>
 #include <i386/machine_cpu.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
 #include <i386/cpuid.h>
 #include <i386/cpu_data.h>
 #include <i386/cpu_threads.h>
@@ -71,7 +71,6 @@
 #include <machine/commpage.h>
 #include <sys/kdebug.h>
 #include <i386/tsc.h>
-#include <i386/hpet.h>
 #include <i386/rtclock.h>
 
 #define NSEC_PER_HZ                    (NSEC_PER_SEC / 100) /* nsec per tick */
@@ -93,20 +92,8 @@ extern clock_timer_func_t    rtclock_timer_expire;
 static void    rtc_set_timescale(uint64_t cycles);
 static uint64_t        rtc_export_speed(uint64_t cycles);
 
-extern void            _rtc_nanotime_store(
-                                       uint64_t                tsc,
-                                       uint64_t                nsec,
-                                       uint32_t                scale,
-                                       uint32_t                shift,
-                                       rtc_nanotime_t  *dst);
-
-extern uint64_t                _rtc_nanotime_read(
-                                       rtc_nanotime_t  *rntp,
-                                       int             slow );
-
 rtc_nanotime_t rtc_nanotime_info = {0,0,0,0,1,0};
 
-
 /*
  * tsc_to_nanoseconds:
  *
@@ -124,7 +111,9 @@ _tsc_to_nanoseconds(uint64_t value)
                 "mull  %%ecx           ;"
                 "addl  %%edi,%%eax     ;"      
                 "adcl  $0,%%edx         "
-                : "+A" (value) : "c" (rtc_nanotime_info.scale) : "esi", "edi");
+                : "+A" (value)
+                : "c" (current_cpu_datap()->cpu_nanotime->scale)
+                : "esi", "edi");
 
     return (value);
 }
@@ -212,7 +201,7 @@ _rtc_nanotime_init(rtc_nanotime_t *rntp, uint64_t base)
 static void
 rtc_nanotime_init(uint64_t base)
 {
-       rtc_nanotime_t  *rntp = &rtc_nanotime_info;
+       rtc_nanotime_t  *rntp = current_cpu_datap()->cpu_nanotime;
 
        _rtc_nanotime_init(rntp, base);
        rtc_nanotime_set_commpage(rntp);
@@ -230,7 +219,7 @@ rtc_nanotime_init_commpage(void)
 {
        spl_t                   s = splclock();
 
-       rtc_nanotime_set_commpage(&rtc_nanotime_info);
+       rtc_nanotime_set_commpage(current_cpu_datap()->cpu_nanotime);
 
        splx(s);
 }
@@ -247,10 +236,10 @@ rtc_nanotime_read(void)
        
 #if CONFIG_EMBEDDED
        if (gPEClockFrequencyInfo.timebase_frequency_hz > SLOW_TSC_THRESHOLD)
-               return  _rtc_nanotime_read( &rtc_nanotime_info, 1 );    /* slow processor */
+               return  _rtc_nanotime_read(current_cpu_datap()->cpu_nanotime, 1);       /* slow processor */
        else
 #endif
-       return  _rtc_nanotime_read( &rtc_nanotime_info, 0 );    /* assume fast processor */
+       return  _rtc_nanotime_read(current_cpu_datap()->cpu_nanotime, 0);       /* assume fast processor */
 }
 
 /*
@@ -263,7 +252,7 @@ rtc_nanotime_read(void)
 void
 rtc_clock_napped(uint64_t base, uint64_t tsc_base)
 {
-       rtc_nanotime_t  *rntp = &rtc_nanotime_info;
+       rtc_nanotime_t  *rntp = current_cpu_datap()->cpu_nanotime;
        uint64_t        oldnsecs;
        uint64_t        newnsecs;
        uint64_t        tsc;
@@ -372,12 +361,13 @@ rtclock_init(void)
 static void
 rtc_set_timescale(uint64_t cycles)
 {
-       rtc_nanotime_info.scale = ((uint64_t)NSEC_PER_SEC << 32) / cycles;
+       rtc_nanotime_t  *rntp = current_cpu_datap()->cpu_nanotime;
+       rntp->scale = ((uint64_t)NSEC_PER_SEC << 32) / cycles;
 
        if (cycles <= SLOW_TSC_THRESHOLD)
-               rtc_nanotime_info.shift = cycles;
+               rntp->shift = cycles;
        else
-               rtc_nanotime_info.shift = 32;
+               rntp->shift = 32;
 
        rtc_nanotime_init(0);
 }
index 904b387861d310c0b5f0191477abeda04bbdb923..e3ea716d46e088327768ee2308dd9e78d26c8ae1 100644 (file)
 #ifndef _I386_RTCLOCK_H_
 #define _I386_RTCLOCK_H_
 
+#ifndef ASSEMBLER
+typedef struct rtc_nanotime {
+       uint64_t        tsc_base;               /* timestamp */
+       uint64_t        ns_base;                /* nanoseconds */
+       uint32_t        scale;                  /* tsc -> nanosec multiplier */
+       uint32_t        shift;                  /* tsc -> nanosec shift/div */
+                                               /* shift is overloaded with
+                                                * lower 32bits of tsc_freq
+                                                * on slower machines (SLOW_TSC_THRESHOLD) */
+       uint32_t        generation;             /* 0 == being updated */
+       uint32_t        spare1;
+} rtc_nanotime_t;
+
 #include <kern/etimer.h>
 
 struct cpu_data;
 
+extern void    _rtc_nanotime_store(
+                       uint64_t        tsc,
+                       uint64_t        nsec,
+                       uint32_t        scale,
+                       uint32_t        shift,
+                       rtc_nanotime_t  *dst);
+
+extern uint64_t        _rtc_nanotime_read(
+                       rtc_nanotime_t  *rntp,
+                       int             slow);
+
+extern rtc_nanotime_t  rtc_nanotime_info;
+#endif
+
+#define        SLOW_TSC_THRESHOLD      1000067800      /* TSC is too slow for regular nanotime() algorithm */
+
+#if defined(__i386__)
+/*
+ * Assembly snippet included in exception handlers and rtc_nanotime_read()
+ * %edi points to nanotime info struct
+ * %edx:%eax returns nanotime
+ */
+#define RTC_NANOTIME_READ_FAST()                                         \
+0:     movl    RNT_GENERATION(%edi),%esi       /* being updated? */    ; \
+       testl   %esi,%esi                                               ; \
+       jz      0b                              /* wait until done */   ; \
+       rdtsc                                                           ; \
+       lfence                                                          ; \
+       subl    RNT_TSC_BASE(%edi),%eax                                 ; \
+       sbbl    RNT_TSC_BASE+4(%edi),%edx       /* tsc - tsc_base */    ; \
+       movl    RNT_SCALE(%edi),%ecx            /* * scale factor */    ; \
+       movl    %edx,%ebx                                               ; \
+       mull    %ecx                                                    ; \
+       movl    %ebx,%eax                                               ; \
+       movl    %edx,%ebx                                               ; \
+       mull    %ecx                                                    ; \
+       addl    %ebx,%eax                                               ; \
+       adcl    $0,%edx                                                 ; \
+       addl    RNT_NS_BASE(%edi),%eax          /* + ns_base */         ; \
+       adcl    RNT_NS_BASE+4(%edi),%edx                                ; \
+       cmpl    RNT_GENERATION(%edi),%esi       /* check for update */  ; \
+       jne     0b                              /* do it all again */
+
+#elif defined(__x86_64__)
+
+/*
+ * Assembly snippet included in exception handlers and rtc_nanotime_read()
+ * %rdi points to nanotime info struct.
+ * %rax returns nanotime
+ */
+#define RTC_NANOTIME_READ_FAST()                                         \
+0:     movl    RNT_GENERATION(%rdi),%esi                               ; \
+       test    %esi,%esi                       /* info updating? */    ; \
+       jz      0b                              /* - wait if so */      ; \
+       rdtsc                                                           ; \
+       lfence                                                          ; \
+       shlq    $32,%rdx                                                ; \
+       orq     %rdx,%rax                       /* %rax := tsc */       ; \
+       subq    RNT_TSC_BASE(%rdi),%rax         /* tsc - tsc_base */    ; \
+       xorq    %rcx,%rcx                                               ; \
+       movl    RNT_SCALE(%rdi),%ecx                                    ; \
+       mulq    %rcx                            /* delta * scale */     ; \
+       shrdq   $32,%rdx,%rax                   /* %rdx:%rax >>= 32 */  ; \
+       addq    RNT_NS_BASE(%rdi),%rax          /* add ns_base */       ; \
+       cmpl    RNT_GENERATION(%rdi),%esi       /* repeat if changed */ ; \
+       jne     0b
+
+#endif
+
 #endif /* _I386_RTCLOCK_H_ */
index 3218d3d15f577a719e089f29daf4a7f9ed442926..e063283aae6f19afe43040ba55b5df9422895004 100644 (file)
@@ -66,7 +66,7 @@
 
 #define        CX(addr,reg)    addr(,reg,4)
 
-#include <i386/mp.h>
+#include <i386/lapic.h>
 #include <i386/mp_slave_boot.h>
 #include <i386/cpuid.h>
 
index d65419a236962a76fc7a4f083f04b8ce0621b6b5..b263be9ff8ded8a9428fcf7a0e6b0f48407d088f 100644 (file)
 #include <i386/machine_check.h>
 #include <mach/i386/syscall_sw.h>
 
+
+extern void throttle_lowpri_io(boolean_t);
+
 /*
  * Forward declarations
  */
@@ -163,7 +166,9 @@ thread_syscall_return(
                }
                regs->eax = ret;
        }
-        thread_exception_return();
+       throttle_lowpri_io(TRUE);
+
+       thread_exception_return();
         /*NOTREACHED*/
 }
 
index 19b7469a694cf38327e495de4d922f581eee6583..624e5d431197bb8f781eabb4d074a7a056e6f613 100644 (file)
@@ -75,6 +75,10 @@ uint64_t     tscFCvtn2t = 0;
 uint64_t       tscGranularity = 0;
 uint64_t       bus2tsc = 0;
 uint64_t       busFreq = 0;
+uint32_t       flex_ratio = 0;
+uint32_t       flex_ratio_min = 0;
+uint32_t       flex_ratio_max = 0;
+
 
 #define bit(n)         (1ULL << (n))
 #define bitmask(h,l)   ((bit(h)|(bit(h)-1)) & ~(bit(l)-1))
@@ -91,8 +95,7 @@ uint64_t      busFreq = 0;
 
 static const char      FSB_Frequency_prop[] = "FSBFrequency";
 /*
- * This routine extracts the front-side bus frequency in Hz from
- * the device tree.
+ * This routine extracts the bus frequency in Hz from the device tree.
  */
 static uint64_t
 EFI_FSB_frequency(void)
@@ -136,25 +139,39 @@ tsc_init(void)
        boolean_t       N_by_2_bus_ratio = FALSE;
 
        /*
-        * Get the FSB frequency and conversion factors.
+        * Get the FSB frequency and conversion factors from EFI.
         */
        busFreq = EFI_FSB_frequency();
+
+       if (cpuid_info()->cpuid_family != CPU_FAMILY_PENTIUM_M) {
+               panic("tsc_init: unknown CPU family: 0x%X\n",
+                       cpuid_info()->cpuid_family);
+       }
+
+       {
+               uint64_t        prfsts;
+
+               prfsts = rdmsr64(IA32_PERF_STS);
+               tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
+               N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
+       }
+
        if (busFreq != 0) {
                busFCvtt2n = ((1 * Giga) << 32) / busFreq;
                busFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / busFCvtt2n;
                busFCvtInt = tmrCvt(1 * Peta, 0xFFFFFFFFFFFFFFFFULL / busFreq); 
        } else {
-               panic("rtclock_init: EFI not supported!\n");
+               panic("tsc_init: EFI not supported!\n");
        }
 
        kprintf(" BUS: Frequency = %6d.%04dMHz, "
-                       "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, "
-                       "cvtInt = %08X.%08X\n",
-                       (uint32_t)(busFreq / Mega),
-                       (uint32_t)(busFreq % Mega), 
-                       (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n,
-                       (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t,
-                       (uint32_t)(busFCvtInt >> 32), (uint32_t)busFCvtInt);
+               "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, "
+               "cvtInt = %08X.%08X\n",
+               (uint32_t)(busFreq / Mega),
+               (uint32_t)(busFreq % Mega), 
+               (uint32_t)(busFCvtt2n >> 32), (uint32_t)busFCvtt2n,
+               (uint32_t)(busFCvtn2t >> 32), (uint32_t)busFCvtn2t,
+               (uint32_t)(busFCvtInt >> 32), (uint32_t)busFCvtInt);
 
        /*
         * Get the TSC increment.  The TSC is incremented by this
@@ -164,18 +181,6 @@ tsc_init(void)
         * is set this indicates the bus ration is 0.5 more than this - i.e.
         * that the true bus ratio is (2*tscGranularity + 1)/2.
         */
-       if (cpuid_info()->cpuid_family == CPU_FAMILY_PENTIUM_M) {
-               uint64_t        prfsts;
-
-               prfsts = rdmsr64(IA32_PERF_STS);
-               tscGranularity = (uint32_t)bitfield(prfsts, 44, 40);
-               N_by_2_bus_ratio = (prfsts & bit(46)) != 0;
-
-       } else {
-               panic("rtclock_init: unknown CPU family: 0x%X\n",
-                       cpuid_info()->cpuid_family);
-       }
-
        if (N_by_2_bus_ratio)
                tscFCvtt2n = busFCvtt2n * 2 / (1 + 2*tscGranularity);
        else
@@ -185,12 +190,12 @@ tsc_init(void)
        tscFCvtn2t = 0xFFFFFFFFFFFFFFFFULL / tscFCvtt2n;
 
        kprintf(" TSC: Frequency = %6d.%04dMHz, "
-                       "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
-                       (uint32_t)(tscFreq / Mega),
-                       (uint32_t)(tscFreq % Mega), 
-                       (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
-                       (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
-                       tscGranularity, N_by_2_bus_ratio ? " (N/2)" : "");
+               "cvtt2n = %08X.%08X, cvtn2t = %08X.%08X, gran = %lld%s\n",
+               (uint32_t)(tscFreq / Mega),
+               (uint32_t)(tscFreq % Mega), 
+               (uint32_t)(tscFCvtt2n >> 32), (uint32_t)tscFCvtt2n,
+               (uint32_t)(tscFCvtn2t >> 32), (uint32_t)tscFCvtn2t,
+               tscGranularity, N_by_2_bus_ratio ? " (N/2)" : "");
 
        /*
         * Calculate conversion from BUS to TSC
@@ -209,4 +214,7 @@ tsc_get_info(tscInfo_t *info)
        info->tscGranularity = tscGranularity;
        info->bus2tsc        = bus2tsc;
        info->busFreq        = busFreq;
+       info->flex_ratio     = flex_ratio;
+       info->flex_ratio_min = flex_ratio_min;
+       info->flex_ratio_max = flex_ratio_max;
 }
index 79ece708588d8efc20c83b28bde3bc3f29e6724c..1b6589de7805b087b947c03980769a28dcf63878 100644 (file)
@@ -40,7 +40,7 @@
 #ifndef _I386_TSC_H_
 #define _I386_TSC_H_
 
-#define IA32_PERF_STS 0x198
+#define IA32_PERF_STS          0x198
 
 extern uint64_t        busFCvtt2n;
 extern uint64_t        busFCvtn2t;
@@ -50,17 +50,23 @@ extern uint64_t tscFCvtn2t;
 extern uint64_t tscGranularity;
 extern uint64_t bus2tsc;
 extern uint64_t busFreq;
+extern uint32_t        flex_ratio;
+extern uint32_t        flex_ratio_min;
+extern uint32_t        flex_ratio_max;
 
 struct tscInfo
 {
-uint64_t       busFCvtt2n;
-uint64_t       busFCvtn2t;
-uint64_t       tscFreq;
-uint64_t       tscFCvtt2n;
-uint64_t       tscFCvtn2t;
-uint64_t       tscGranularity;
-uint64_t       bus2tsc;
-uint64_t       busFreq;
+       uint64_t        busFCvtt2n;
+       uint64_t        busFCvtn2t;
+       uint64_t        tscFreq;
+       uint64_t        tscFCvtt2n;
+       uint64_t        tscFCvtn2t;
+       uint64_t        tscGranularity;
+       uint64_t        bus2tsc;
+       uint64_t        busFreq;
+       uint32_t        flex_ratio;
+       uint32_t        flex_ratio_min;
+       uint32_t        flex_ratio_max;
 };
 typedef struct tscInfo tscInfo_t;
 
index 87aab04269f469fc109f5675cbc8cbfc8459077e..e06afda78feab0d47dc6ce10c3e0450de791c64d 100644 (file)
@@ -152,7 +152,7 @@ i386_set_ldt(
                start_sel = LDTSZ_MIN;
            }
                
-           if (start_sel + num_sels > LDTSZ) {
+           if ((uint64_t)start_sel + (uint64_t)num_sels > LDTSZ) {
                task_unlock(task);
                return ENOMEM;
            }
@@ -294,7 +294,7 @@ i386_get_ldt(
 
        if (start_sel >= 8192)
            return EINVAL;
-       if (start_sel + num_sels > 8192)
+       if ((uint64_t)start_sel + (uint64_t)num_sels > 8192)
            return EINVAL;
        if (descs == 0)
            return EINVAL;
index 9d5b91352f8915ab3d87b603adae307606d33724..ab846c753582464e638a5789ca6a587aa8204604 100644 (file)
@@ -45,3 +45,5 @@ kdp_raise_exception(
 void
 kdp_reset(void);
 
+void
+kdp_init(void);
diff --git a/osfmk/kdp/kdp_serial.c b/osfmk/kdp/kdp_serial.c
new file mode 100644 (file)
index 0000000..bc8f136
--- /dev/null
@@ -0,0 +1,108 @@
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#include "kdp_serial.h"
+
+#define SKDP_START_CHAR 0xFA
+#define SKDP_END_CHAR 0xFB
+#define SKDP_ESC_CHAR 0xFE
+
+static enum {DS_WAITSTART, DS_READING, DS_ESCAPED} dsState;
+static unsigned char dsBuffer[1518];
+static int dsPos;
+
+void kdp_serialize_packet(unsigned char *packet, unsigned int len, void (*outFunc)(char))
+{
+       unsigned int index;
+       outFunc(SKDP_START_CHAR);
+       for (index = 0; index < len; index++) {
+               unsigned char byte = *packet++;
+               //need to escape '\n' because the kernel serial output turns it into a cr/lf
+               if(byte == SKDP_START_CHAR || byte == SKDP_END_CHAR || byte == SKDP_ESC_CHAR || byte == '\n')
+               {
+                       outFunc(SKDP_ESC_CHAR);
+                       byte = ~byte;
+               }
+               outFunc(byte);
+       }
+       outFunc(SKDP_END_CHAR);
+}
+
+unsigned char *kdp_unserialize_packet(unsigned char byte, unsigned int *len)
+{
+       switch(dsState)
+       {
+               case DS_WAITSTART:
+                       if(byte == SKDP_START_CHAR)
+                       {
+//                             printf("got start char\n");
+                               dsState = DS_READING;
+                               dsPos = 0;
+                               *len = SERIALIZE_READING;
+                               return 0;
+                       }
+                       *len = SERIALIZE_WAIT_START;
+                       break;
+               case DS_READING:
+                       if(byte == SKDP_ESC_CHAR)
+                       {
+                               dsState = DS_ESCAPED;
+                               *len = SERIALIZE_READING;
+                               return 0;
+                       }
+                       if(byte == SKDP_START_CHAR)
+                       {
+//                             printf("unexpected start char, resetting\n");
+                               dsPos = 0;
+                               *len = SERIALIZE_READING;
+                               return 0;
+                       }
+                       if(byte == SKDP_END_CHAR)
+                       {
+                               dsState = DS_WAITSTART;
+                               *len = dsPos;
+                               dsPos = 0;
+                               return dsBuffer;
+                       }
+                       dsBuffer[dsPos++] = byte;
+                       break;
+               case DS_ESCAPED:
+//                     printf("unescaping %02x to %02x\n", byte, ~byte);
+                       dsBuffer[dsPos++] = ~byte;
+                       dsState = DS_READING;
+                       *len = SERIALIZE_READING;
+                       break;
+       }
+       if(dsPos == sizeof(dsBuffer)) //too much data...forget this packet
+       {
+               dsState = DS_WAITSTART;
+               dsPos = 0;
+               *len = SERIALIZE_WAIT_START;
+       }
+       
+       return 0;
+}
diff --git a/osfmk/kdp/kdp_serial.h b/osfmk/kdp/kdp_serial.h
new file mode 100644 (file)
index 0000000..68dc301
--- /dev/null
@@ -0,0 +1,51 @@
+/*
+ * Copyright (c) 2008 Apple Inc. All rights reserved.
+ *
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
+ * 
+ * This file contains Original Code and/or Modifications of Original Code
+ * as defined in and that are subject to the Apple Public Source License
+ * Version 2.0 (the 'License'). You may not use this file except in
+ * compliance with the License. The rights granted to you under the License
+ * may not be used to create, or enable the creation or redistribution of,
+ * unlawful or unlicensed copies of an Apple operating system, or to
+ * circumvent, violate, or enable the circumvention or violation of, any
+ * terms of an Apple operating system software license agreement.
+ * 
+ * Please obtain a copy of the License at
+ * http://www.opensource.apple.com/apsl/ and read it before using this file.
+ * 
+ * The Original Code and all software distributed under the License are
+ * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
+ * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
+ * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
+ * Please see the License for the specific language governing rights and
+ * limitations under the License.
+ * 
+ * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
+ */
+#ifndef _KDP_SERIAL_H_
+#define _KDP_SERIAL_H_
+
+/*
+ * APIs for escaping a KDP UDP packet into a byte stream suitable
+ * for a standard serial console
+ */
+
+enum {SERIALIZE_WAIT_START, SERIALIZE_READING};
+
+/*
+ * Take a buffer of specified length and output it with the given
+ * function. Escapes special characters as needed
+ */
+void kdp_serialize_packet(unsigned char *, unsigned int, void (*func)(char));
+
+/*
+ * Add a new character to an internal buffer, and return that
+ * buffer when a fully constructed packet has been identified.
+ * Will track intermediate state using magic enums above
+ */
+unsigned char *kdp_unserialize_packet(unsigned char, unsigned int *);
+
+#endif
index e47f63dfc09e98ba7d7c2df708372752c1abe0d7..1575afcca2c0b3850b263be1e9b2bfebab892adf 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2006 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <mach/exception_types.h>
 #include <kern/cpu_data.h>
 #include <kern/debug.h>
+#include <kern/clock.h>
 
 #include <kdp/kdp_core.h>
 #include <kdp/kdp_internal.h>
 #include <kdp/kdp_en_debugger.h>
+#include <kdp/kdp_callout.h>
 #include <kdp/kdp_udp.h>
+#if CONFIG_SERIAL_KDP
+#include <kdp/kdp_serial.h>
+#endif
 
 #include <vm/vm_map.h>
 #include <vm/vm_protos.h>
 extern int kdp_getc(void);
 extern int reattach_wait;
 
+extern int serial_getc(void);
+extern void serial_putc(char);
+extern int serial_init(void);
+
 static u_short ip_id;                          /* ip packet ctr, for ids */
 
 /*     @(#)udp_usrreq.c        2.2 88/05/23 4.0NFSSRC SMI;     from UCB 7.1 6/5/86     */
@@ -219,19 +228,19 @@ kdp_register_send_receive(
 {
        unsigned int    debug = 0;
 
-       kdp_en_send_pkt = send;
-       kdp_en_recv_pkt = receive;
-
        debug_log_init();
 
        kdp_timer_callout_init();
 
-       PE_parse_boot_arg("debug", &debug);
+       PE_parse_boot_argn("debug", &debug, sizeof (debug));
 
 
        if (!debug)
                return;
 
+       kdp_en_send_pkt = send;
+       kdp_en_recv_pkt = receive;
+
        if (debug & DB_KDP_BP_DIS)
                kdp_flag |= KDP_BP_DIS;   
        if (debug & DB_KDP_GETC_ENA)
@@ -250,13 +259,13 @@ kdp_register_send_receive(
        if (debug & DB_PANICLOG_DUMP)
                kdp_flag |= PANIC_LOG_DUMP;
 
-       if (PE_parse_boot_arg ("_panicd_ip", panicd_ip_str))
+       if (PE_parse_boot_argn("_panicd_ip", panicd_ip_str, sizeof (panicd_ip_str)))
                panicd_specified = TRUE;
 
-       if (PE_parse_boot_arg ("_router_ip", router_ip_str))
+       if (PE_parse_boot_argn("_router_ip", router_ip_str, sizeof (router_ip_str)))
                router_specified = TRUE;
 
-       if (!PE_parse_boot_arg ("panicd_port", &panicd_port))
+       if (!PE_parse_boot_argn("panicd_port", &panicd_port, sizeof (panicd_port)))
                panicd_port = CORE_REMOTE_PORT;
 
        kdp_flag |= KDP_READY;
@@ -1438,7 +1447,6 @@ kdp_get_xnu_version(char *versionbuf)
 }
 
 extern char *inet_aton(const char *cp, struct in_addr *pin);
-extern int snprintf(char *str, size_t size, const char *format, ...);
 
 /* Primary dispatch routine for the system dump */
 void 
@@ -1558,3 +1566,111 @@ abort_panic_transfer(void)
        not_in_kdp = 1;
        panic_block = 0;
 }
+
+#if CONFIG_SERIAL_KDP
+
+static boolean_t needs_serial_init = TRUE;
+
+static void
+kdp_serial_send(void *rpkt, unsigned int rpkt_len)
+{
+       if (needs_serial_init)
+       {
+           serial_init();
+           needs_serial_init = FALSE;
+       }
+       
+       //      printf("tx\n");
+       kdp_serialize_packet((unsigned char *)rpkt, rpkt_len, serial_putc);
+}
+
+static void 
+kdp_serial_receive(void *rpkt, unsigned int *rpkt_len, unsigned int timeout)
+{
+       int readkar;
+       uint64_t now, deadline;
+       
+       if (needs_serial_init)
+       {
+           serial_init();
+           needs_serial_init = FALSE;
+       }
+       
+       clock_interval_to_deadline(timeout, 1000 * 1000 /* milliseconds */, &deadline);
+
+//     printf("rx\n");
+       for(clock_get_uptime(&now); now < deadline; clock_get_uptime(&now))
+       {
+               readkar = serial_getc();
+               if(readkar >= 0)
+               {
+                       unsigned char *packet;
+                       //                      printf("got char %02x\n", readkar);
+                       if((packet = kdp_unserialize_packet(readkar,rpkt_len)))
+                       {
+                               memcpy(rpkt, packet, *rpkt_len);
+                               return;
+                       }
+               }
+       }
+       *rpkt_len = 0;
+}
+
+static void kdp_serial_callout(__unused void *arg, kdp_event_t event)
+{
+    /* When we stop KDP, set the bit to re-initialize the console serial port
+     * the next time we send/receive a KDP packet.  We don't do it on
+     * KDP_EVENT_ENTER directly because it also gets called when we trap to KDP
+     * for non-external debugging, i.e., stackshot or core dumps.
+     *
+     * Set needs_serial_init on exit (and initialization, see above) and not
+     * enter because enter is sent multiple times and causes excess reinitialization.
+     */
+       
+    switch (event)
+    {
+               case KDP_EVENT_PANICLOG:
+               case KDP_EVENT_ENTER:
+                       break;
+               case KDP_EVENT_EXIT:
+                       needs_serial_init = TRUE;
+                       break;
+    }
+}
+
+#endif /* CONFIG_SERIAL_KDP */
+
+void
+kdp_init(void)
+{
+#if CONFIG_SERIAL_KDP
+       char kdpname[80];
+       struct in_addr ipaddr;
+       struct ether_addr macaddr;
+
+#if CONFIG_EMBEDDED
+       //serial will be the debugger, unless match name is explicitly provided, and it's not "serial"
+       if(PE_parse_boot_argn("kdp_match_name", kdpname, sizeof(kdpname)) && strncmp(kdpname, "serial", sizeof(kdpname)) != 0)
+               return;
+#else
+       // serial must be explicitly requested
+       if(!PE_parse_boot_argn("kdp_match_name", kdpname, sizeof(kdpname)) || strncmp(kdpname, "serial", sizeof(kdpname)) != 0)
+               return;
+#endif
+       
+       kprintf("Intializing serial KDP\n");
+
+       kdp_register_callout(kdp_serial_callout, NULL);
+       kdp_register_send_receive(kdp_serial_send, kdp_serial_receive);
+       
+       /* fake up an ip and mac for early serial debugging */
+       macaddr.ether_addr_octet[0] = 's';
+       macaddr.ether_addr_octet[1] = 'e';
+       macaddr.ether_addr_octet[2] = 'r';
+       macaddr.ether_addr_octet[3] = 'i';
+       macaddr.ether_addr_octet[4] = 'a';
+       macaddr.ether_addr_octet[5] = 'l';
+       ipaddr.s_addr = 0xABADBABE;
+       kdp_set_ip_and_mac_addresses(&ipaddr, &macaddr);
+#endif /* CONFIG_SERIAL_KDP */
+}
index d09e4ea6e882e716efb88e6961440601c7772318..c13e4082616e4673db1af7aa1eeeaa7ef1a151d3 100644 (file)
@@ -245,6 +245,22 @@ ipc_space_t  get_task_ipcspace(task_t t)
        return(t->itk_space);
 }
 
+int get_task_numactivethreads(task_t task)
+{
+       thread_t        inc;
+       int num_active_thr=0;
+       task_lock(task);
+
+       for (inc  = (thread_t)queue_first(&task->threads);
+                       !queue_end(&task->threads, (queue_entry_t)inc); inc = (thread_t)queue_next(&inc->task_threads)) 
+       {
+               if(inc->active)
+                       num_active_thr++;
+       }
+       task_unlock(task);
+       return num_active_thr;
+}
+
 int  get_task_numacts(task_t t)
 {
        return(t->thread_count);
index c7e01a134992f1cb91480582776b045f6adcee53..679e1779c7df0a63c50dd581f57b06e4db95aab8 100644 (file)
@@ -106,7 +106,7 @@ unsigned int                panic_is_inited = 0;
 unsigned int           return_on_panic = 0;
 unsigned long          panic_caller;
 
-char *debug_buf;
+char debug_buf[PAGE_SIZE];
 ppnum_t debug_buf_page;
 char *debug_buf_ptr;
 unsigned int debug_buf_size;
@@ -183,9 +183,6 @@ debug_log_init(void)
 {
        if (debug_buf_size != 0)
                return;
-       if (kmem_alloc(kernel_map, (vm_offset_t *) &debug_buf, PAGE_SIZE)
-                       != KERN_SUCCESS)
-               panic("cannot allocate debug_buf\n");
        debug_buf_ptr = debug_buf;
        debug_buf_size = PAGE_SIZE;
         debug_buf_page = pmap_find_phys(kernel_pmap,
index 4f2ab7f87bd27a0ab64f3f57b52a0e10712f5e1f..e861592e6ef7175fc9c2e877b29eec42842e86a4 100644 (file)
@@ -35,6 +35,7 @@
 #ifdef KERNEL_PRIVATE
 
 extern unsigned int    systemLogDiags;
+extern char debug_buf[];
 
 #ifdef MACH_KERNEL_PRIVATE
 
@@ -49,7 +50,7 @@ extern unsigned int     current_debugger;
 
 extern unsigned int     active_debugger;
 extern unsigned int    debug_mode; 
-extern unsigned int    disable_debug_output;
+extern unsigned int    disable_debug_output; 
 
 extern unsigned int     panicDebugging;
 extern unsigned int    logPanicDataToScreen;
@@ -71,7 +72,6 @@ extern const char             *panicstr;
 extern volatile unsigned int   nestedpanic;
 extern int unsigned long panic_caller;
 
-extern char *debug_buf;
 extern char *debug_buf_ptr;
 extern unsigned int debug_buf_size;
 
@@ -103,6 +103,7 @@ void        panic_display_system_configuration(void);
 #define DB_DBG_POST_CORE            0x1000 /*Wait in debugger after NMI core */
 #define DB_PANICLOG_DUMP            0x2000 /* Send paniclog on panic,not core*/
 
+
 #endif /* KERNEL_PRIVATE */
 
 __BEGIN_DECLS
index 9d88531f21dce61866e62ff54eccf86065cb24c4..f30d897e271666aba80f99b8d2bb90955234258c 100644 (file)
@@ -1209,7 +1209,7 @@ kmod_free_linkedit_data(void)
             round_page_32(dt_symtab_size));
     }
 
-    PE_parse_boot_arg("keepsyms", &keepsyms);
+    PE_parse_boot_argn("keepsyms", &keepsyms, sizeof (keepsyms));
 
     segmentLE = getsegbyname(segment_name);
     if (!segmentLE) {
index 5f9d4d80ae7ce6dcee66094c8aacdf5c27b941e6..5718455f92556d731f3c3356b774519e85143a61 100644 (file)
@@ -341,14 +341,13 @@ lck_attr_setdefault(
        lck_attr_t      *attr)
 {
 #if     !DEBUG
-       if (LcksOpts & enaLkDeb)
-               attr->lck_attr_val =  LCK_ATTR_DEBUG;
-       else
-               attr->lck_attr_val =  LCK_ATTR_NONE;
+       if (LcksOpts & enaLkDeb)
+               attr->lck_attr_val =  LCK_ATTR_DEBUG;
+       else
+               attr->lck_attr_val =  LCK_ATTR_NONE;
 #else
-       attr->lck_attr_val =  LCK_ATTR_DEBUG;
-#endif
-
+       attr->lck_attr_val =  LCK_ATTR_DEBUG;
+#endif /* !DEBUG */
 }
 
 
index 1d3e4108df6271e681812008a5bcde2022ba656d..106a9b41d792370324509feb34c273b4fc3e9d82 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2004 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -46,7 +46,7 @@ extern void           processor_up(
 extern void            processor_offline(
                                        processor_t             processor);
 
-extern void            processor_start_thread(void);
+extern void            processor_start_thread(void *machine_param);
 
 /*
  * Must be implemented in machine dependent code.
index 84721990d921afb55ebacfb8d0354838edc85801..6d2ceb8989d087beded146f818008c5bcba6ee13 100644 (file)
@@ -124,6 +124,8 @@ extern int kdb_printf(const char *format, ...) __printflike(1,2);
 
 extern void printf_init(void);
 
+extern int snprintf(char *, size_t, const char *, ...) __printflike(3,4);
+
 extern void log(int level, char *fmt, ...);
 
 void 
index f416dc1054196d146f41b2e204e3ef598f416ff3..f31c699081db67c385d60e27778d4173287cb850 100644 (file)
@@ -64,7 +64,7 @@ dsmos_page_transform_hook(dsmos_page_transform_hook_t hook)
 }
 
 int
-dsmos_page_transform(const void* from, void *to)
+dsmos_page_transform(const void* from, void *to, __unused unsigned long long src_offset, __unused void *ops)
 {
 /*     printf("%s\n", __FUNCTION__); */
        if (dsmos_hook == NULL)
@@ -72,3 +72,9 @@ dsmos_page_transform(const void* from, void *to)
        return (*dsmos_hook) (from, to);
 }
 
+
+text_crypter_create_hook_t text_crypter_create=NULL;
+void text_crypter_create_hook_set(text_crypter_create_hook_t hook)
+{
+       text_crypter_create=hook;
+};
index a0517a01f6cfa1611dd9f3eb7c85a3569ac52210..f00202dfffbb1052019117f161ef5068ec4e5d8a 100644 (file)
 #ifndef _KERN_PAGE_DECRYPT_H
 #define _KERN_PAGE_DECRYPT_H
 
-typedef        int       (*dsmos_page_transform_hook_t) (const void *,void*);
+/* 
+ * Interface for DSMOS 
+ */
+typedef        int     (*dsmos_page_transform_hook_t) (const void *,void*);
 extern void    dsmos_page_transform_hook(dsmos_page_transform_hook_t hook);    /* exported */
 
-extern int             dsmos_page_transform(const void *,void*);
+extern int     dsmos_page_transform(const void *,void*, unsigned long long, void*);
+
+
+/*
+ *Interface for text decryption family
+ */
+struct pager_crypt_info {
+        /* Decrypt one page */
+        int     (*page_decrypt)(const void *src_vaddr, void *dst_vaddr, 
+                               unsigned long long src_offset, void *crypt_ops);
+        /* Pager using this crypter terminates - crypt module not needed anymore */
+        void    (*crypt_end)(void *crypt_ops);
+        /* Private data for the crypter */
+        void    *crypt_ops;
+};
+typedef struct pager_crypt_info pager_crypt_info_t;
+
+typedef int (*text_crypter_create_hook_t)(struct pager_crypt_info *crypt_info, 
+                                               const char *id, void *crypt_data);
+extern void text_crypter_create_hook_set(text_crypter_create_hook_t hook);
+//extern kern_return_t text_crypter_create(pager_crypt_info_t *crypt_info, const char *id, 
+//                                             void *crypt_data);
+extern text_crypter_create_hook_t text_crypter_create;
 
 #endif /* _KERN_PAGE_DECRYPT_H */
 
index a922e3688b8ed82997cad9aeec27aa1749415e92..1f7015c87b0e8e7d236961f7dc453b122d78e4df 100644 (file)
@@ -745,7 +745,8 @@ conslog_putc(
                cnputc(c);
 
 #ifdef MACH_BSD
-       log_putc(c);
+       if (debug_mode == 0)
+               log_putc(c);
 #endif
 }
 
index 518891c79c9de2144f1babd19ea97f1a765c9fc7..0a413bcf1f51b92ee08f72b2f0bdbb6c9be70d7b 100644 (file)
@@ -886,6 +886,15 @@ processor_set_threads(
 {
     return KERN_FAILURE;
 }
+#elif defined(CONFIG_EMBEDDED)
+kern_return_t
+processor_set_threads(
+       __unused processor_set_t                pset,
+       __unused thread_array_t         *thread_list,
+       __unused mach_msg_type_number_t *count)
+{
+    return KERN_NOT_SUPPORTED;
+}
 #else
 kern_return_t
 processor_set_threads(
index 3c132bb7495d9f636a917b44bf5807e084622b8a..d0a496a07227015bb5e487d1b1c57221582c8ba9 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
 #include <machine/commpage.h>
 #include <libkern/version.h>
 
+#if MACH_KDP
+#include <kdp/kdp.h>
+#endif
+
 #if CONFIG_MACF
 #include <security/mac_mach_internal.h>
 #endif
@@ -265,6 +269,11 @@ kernel_bootstrap_thread(void)
 
        kth_started = 1;
 
+#if MACH_KDP
+       kernel_bootstrap_kprintf("calling kdp_init\n");
+       kdp_init();
+#endif
+               
 #ifdef i386
        /*
         * Create and initialize the physical copy window for processor 0
@@ -329,7 +338,7 @@ kernel_bootstrap_thread(void)
  *     Load the first thread to start a processor.
  */
 void
-slave_main(void)
+slave_main(void *machine_param)
 {
        processor_t             processor = current_processor();
        thread_t                thread;
@@ -341,7 +350,7 @@ slave_main(void)
        if (processor->next_thread == THREAD_NULL) {
                thread = processor->idle_thread;
                thread->continuation = (thread_continue_t)processor_start_thread;
-               thread->parameter = NULL;
+               thread->parameter = machine_param;
        }
        else {
                thread = processor->next_thread;
@@ -360,12 +369,12 @@ slave_main(void)
  *     Called at splsched.
  */
 void
-processor_start_thread(void)
+processor_start_thread(void *machine_param)
 {
        processor_t             processor = current_processor();
        thread_t                self = current_thread();
 
-       slave_machine_init();
+       slave_machine_init(machine_param);
 
        /*
         *      If running the idle processor thread,
@@ -406,7 +415,7 @@ load_context(
         * to have reserved stack.
         */
        load_context_kprintf("stack %x, stackptr %x\n", 
-                             thread->kernel_stack, thread->machine.kstackptr);
+                            thread->kernel_stack, thread->machine.kstackptr);
        if (!thread->kernel_stack) {
                load_context_kprintf("calling stack_alloc_try\n");
                if (!stack_alloc_try(thread))
index 475160ce8b890e7498d0bc2181917329ba6f5e8f..bb60c7d404d8b1ac890caf935f34842d2b5bde7f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2005 Apple Computer, Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -47,14 +47,14 @@ extern void kernel_bootstrap(void) __attribute__((section("__TEXT, initcode")));
 /* Initialize machine dependent stuff */
 extern void    machine_init(void);
 
-extern void    slave_main(void);
+extern void    slave_main(void *machine_param);
 
 /*
  * The following must be implemented in machine dependent code.
  */
 
 /* Slave cpu initialization */
-extern void    slave_machine_init(void);
+extern void    slave_machine_init(void *machine_param);
 
 /* Device subystem initialization */
 extern void    device_service_create(void);
index 0d1b3065b904ea4cd606532d459b73af81a76b24..3196fb4c283e73b5a35d7a717f4c7f301fea244c 100644 (file)
@@ -193,7 +193,6 @@ syms_nameforaddr(vm_offset_t addr, vm_offset_t *ofs, kmod_info_t **km)
        return (NULL);
 }
 
-int     snprintf(char *, size_t, const char *, ...);
 
 /* Format the results of calling syms_nameforaddr into a single string.
  * The buffer must be at least 13 bytes long; 80 is recommended.
index e3dae79d908a98332321e8315710630aff890866..d0cdc4aaab08b180350a5611a88d7a3cd3fb7cd5 100644 (file)
@@ -321,6 +321,8 @@ extern void         task_set_64bit(
 extern void            task_backing_store_privileged(
                                        task_t          task);
 
+extern int             get_task_numactivethreads(
+                                       task_t          task);
 /* Get number of activations in a task */
 extern int             get_task_numacts(
                                        task_t          task);
index bdb37360462bf8f2088b898d2fe7fa4505ca151f..e189edb2c0caf7eae2a5a53c096506101f30a5bd 100644 (file)
@@ -265,7 +265,6 @@ MACRO_END
 #define zone_sleep(zone)                               \
        (void) lck_mtx_sleep(&(zone)->lock, 0, (event_t)(zone), THREAD_UNINT);
 
-extern int snprintf(char *, size_t, const char *, ...) __printflike(3,4);
 
 #define lock_zone_init(zone)                           \
 MACRO_BEGIN                                            \
@@ -615,7 +614,7 @@ zone_bootstrap(void)
        char temp_buf[16];
 
        /* see if we want freed zone element checking */
-       if (PE_parse_boot_arg("-zc", temp_buf)) {
+       if (PE_parse_boot_argn("-zc", temp_buf, sizeof (temp_buf))) {
                check_freed_element = 1;
        }
 
index 00e9580ea166b04ca9e98f686027f4b03cc6e0c7..9aa0a75c73cf1bd9e0ca4db3486029a809d97528 100644 (file)
 #define HOST_USER_NOTIFICATION_PORT     (3 + HOST_MAX_SPECIAL_KERNEL_PORT)
 #define HOST_LOCKD_PORT                 (5 + HOST_MAX_SPECIAL_KERNEL_PORT)
 #define HOST_SEATBELT_PORT              (7 + HOST_MAX_SPECIAL_KERNEL_PORT)
-#define HOST_MAX_SPECIAL_PORT           (8 + HOST_MAX_SPECIAL_KERNEL_PORT)
+
+#define HOST_UNFREED_PORT              (10 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_AMFID_PORT                        (11 + HOST_MAX_SPECIAL_KERNEL_PORT)
+#define HOST_MAX_SPECIAL_PORT           (12 + HOST_MAX_SPECIAL_KERNEL_PORT)
                                         /* room to grow here as well */
 
 /*
 #define host_set_lockd_port(host, port)        \
        (host_set_special_port((host), HOST_LOCKD_PORT, (port)))
 
+#define host_get_unfreed_port(host, port)      \
+       (host_get_special_port((host),                  \
+       HOST_LOCAL_NODE, HOST_UNFREED_PORT, (port)))
+#define host_set_unfreed_port(host, port)      \
+       (host_set_special_port((host), HOST_UNFREED_PORT, (port)))
+
+#define host_get_amfid_port(host, port)        \
+       (host_get_special_port((host),                  \
+       HOST_LOCAL_NODE, HOST_AMFID_PORT, (port)))
+#define host_set_amfid_port(host, port)        \
+       (host_set_special_port((host), HOST_AMFID_PORT, (port)))
+
 #endif /* _MACH_HOST_SPECIAL_PORTS_H_ */
index 9fd7664f4601d57de7e49da297eaf7d715bf12ce..9bc85153a54843292cedf0f22d08b18e6334bca1 100644 (file)
@@ -197,7 +197,10 @@ routine memory_object_synchronize(
  *     [Response should be a release of the named reference when
  *     the pager deems that appropriate.]
  */
-routine memory_object_unmap(
+routine memory_object_map(
+               memory_object           : memory_object_t;
+               prot                    : vm_prot_t);
+routine memory_object_last_unmap(
                memory_object           : memory_object_t);
 
 /* vim: set ft=c : */
index 01b462b12fa574effb6bdac24b2e4e52a9845cbe..739f0374d10cf631da62070cd45c684bb78cb295 100644 (file)
@@ -144,7 +144,10 @@ typedef const struct memory_object_pager_ops {
                memory_object_offset_t offset,
                vm_size_t size,
                vm_sync_t sync_flags);
-       kern_return_t (*memory_object_unmap)(
+       kern_return_t (*memory_object_map)(
+               memory_object_t mem_obj,
+               vm_prot_t prot);
+       kern_return_t (*memory_object_last_unmap)(
                memory_object_t mem_obj);
        const char *memory_object_pager_name;
 } * memory_object_pager_ops_t;
@@ -386,15 +389,17 @@ struct upl_page_info {
        ppnum_t         phys_addr;      /* physical page index number */
         unsigned int
 #ifdef  XNU_KERNEL_PRIVATE
-                        pageout:1,      /* page is to be removed on commit */
-                        absent:1,       /* No valid data in this page */
-                        dirty:1,        /* Page must be cleaned (O) */
-                       precious:1,     /* must be cleaned, we have only copy */
-                       device:1,       /* no page data, mapped dev memory */
-                       speculative:1,  /* page is valid, but not yet accessed */
-                        :0;            /* force to long boundary */
+               pageout:1,      /* page is to be removed on commit */
+               absent:1,       /* No valid data in this page */
+               dirty:1,        /* Page must be cleaned (O) */
+               precious:1,     /* must be cleaned, we have only copy */
+               device:1,       /* no page data, mapped dev memory */
+               speculative:1,  /* page is valid, but not yet accessed */
+               cs_validated:1, /* CODE SIGNING: page was validated */
+               cs_tainted:1,   /* CODE SIGNING: page is tainted */
+               :0;             /* force to long boundary */
 #else
-                       opaque;         /* use upl_page_xxx() accessor funcs */
+               opaque;         /* use upl_page_xxx() accessor funcs */
 #endif /* XNU_KERNEL_PRIVATE */
 };
 
@@ -532,6 +537,9 @@ typedef uint32_t    upl_size_t;     /* page-aligned byte size */
 #define UPL_COMMIT_INACTIVATE          0x8
 #define UPL_COMMIT_NOTIFY_EMPTY                0x10
 #define UPL_COMMIT_ALLOW_ACCESS                0x20
+#define UPL_COMMIT_CS_VALIDATED                0x40
+
+#define UPL_COMMIT_KERNEL_ONLY_FLAGS   (UPL_COMMIT_CS_VALIDATED)
 
 /* flags for return of state from vm_map_get_upl,  vm_upl address space */
 /* based call */
@@ -610,6 +618,14 @@ typedef uint32_t   upl_size_t;     /* page-aligned byte size */
        (((upl)[(index)].phys_addr != 0) ?       \
         ((upl)[(index)].pageout = FALSE) : FALSE)
 
+/* modifier macros for upl_t */
+
+#define UPL_SET_CS_VALIDATED(upl, index, value) \
+       ((upl)[(index)].cs_validated = ((value) ? TRUE : FALSE))
+
+#define UPL_SET_CS_TAINTED(upl, index, value) \
+       ((upl)[(index)].cs_tainted = ((value) ? TRUE : FALSE))
+
 /* The call prototyped below is used strictly by UPL_GET_INTERNAL_PAGE_LIST */
 
 extern vm_size_t       upl_offset_to_pagelist;
index aab39fd3ce32b89d40b5814801fbd18ea5560bff..f0bdd1a4707cf23e177d286f36da43b3edc24fff 100644 (file)
@@ -109,6 +109,8 @@ typedef struct vm_statistics        vm_statistics_data_t;
 #define VM_PAGE_QUERY_PAGE_PAGED_OUT    0x10
 #define VM_PAGE_QUERY_PAGE_COPIED       0x20
 #define VM_PAGE_QUERY_PAGE_SPECULATIVE 0x40
+#define VM_PAGE_QUERY_PAGE_CS_VALIDATED        0x100
+#define VM_PAGE_QUERY_PAGE_CS_TAINTED  0x200
 
 #ifdef MACH_KERNEL_PRIVATE
 
index 7f127ee81e844fa5b832e8027ffade5102fc4f5b..9386f8597d08af463375011e06534939afa06c40 100644 (file)
@@ -754,7 +754,7 @@ ml_init_lock_timeout(void)
        nanoseconds_to_absolutetime(NSEC_PER_SEC>>2, &abstime);
        LockTimeOut = (unsigned int)abstime;
 
-       if (PE_parse_boot_arg("mtxspin", &mtxspin)) {
+       if (PE_parse_boot_argn("mtxspin", &mtxspin, sizeof (mtxspin))) {
                if (mtxspin > USEC_PER_SEC>>4)
                        mtxspin =  USEC_PER_SEC>>4;
                nanoseconds_to_absolutetime(mtxspin*NSEC_PER_USEC, &abstime);
index 97f5276f2202b6ca0d35278866daca26e01d8e2f..a6dcb65774e0e51847649430d1e8650ea0057a79 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -229,7 +229,7 @@ machine_startup(void)
        int     boot_arg;
        unsigned int wncpu;
 
-       if (PE_parse_boot_arg("cpus", &wncpu)) {
+       if (PE_parse_boot_argn("cpus", &wncpu, sizeof (wncpu))) {
                if ((wncpu > 0) && (wncpu < MAX_CPUS))
                         max_ncpus = wncpu;
        }
@@ -237,7 +237,7 @@ machine_startup(void)
        if( PE_get_hotkey( kPEControlKey ))
             halt_in_debugger = halt_in_debugger ? 0 : 1;
 
-       if (PE_parse_boot_arg("debug", &boot_arg)) {
+       if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg))) {
                if (boot_arg & DB_HALT) halt_in_debugger=1;
                if (boot_arg & DB_PRT) disable_debug_output=FALSE; 
                if (boot_arg & DB_SLOG) systemLogDiags=TRUE; 
@@ -245,10 +245,10 @@ machine_startup(void)
                if (boot_arg & DB_LOG_PI_SCRN) logPanicDataToScreen=TRUE; 
        }
        
-       if (!PE_parse_boot_arg("nvram_paniclog", &commit_paniclog_to_nvram))
+       if (!PE_parse_boot_argn("nvram_paniclog", &commit_paniclog_to_nvram, sizeof (commit_paniclog_to_nvram)))
                commit_paniclog_to_nvram = 1;
 
-       PE_parse_boot_arg("vmmforce", &lowGlo.lgVMMforcedFeats);
+       PE_parse_boot_argn("vmmforce", &lowGlo.lgVMMforcedFeats, sizeof (lowGlo.lgVMMforcedFeats));
 
        hw_lock_init(&debugger_lock);                           /* initialize debugger lock */
        hw_lock_init(&pbtlock);                                         /* initialize print backtrace lock */
@@ -276,16 +276,16 @@ machine_startup(void)
                active_debugger =1;
        }
 #endif /* MACH_KDB */
-       if (PE_parse_boot_arg("preempt", &boot_arg)) {
+       if (PE_parse_boot_argn("preempt", &boot_arg, sizeof (boot_arg))) {
                default_preemption_rate = boot_arg;
        }
-       if (PE_parse_boot_arg("unsafe", &boot_arg)) {
+       if (PE_parse_boot_argn("unsafe", &boot_arg, sizeof (boot_arg))) {
                max_unsafe_quanta = boot_arg;
        }
-       if (PE_parse_boot_arg("poll", &boot_arg)) {
+       if (PE_parse_boot_argn("poll", &boot_arg, sizeof (boot_arg))) {
                max_poll_quanta = boot_arg;
        }
-       if (PE_parse_boot_arg("yield", &boot_arg)) {
+       if (PE_parse_boot_argn("yield", &boot_arg, sizeof (boot_arg))) {
                sched_poll_yield_shift = boot_arg;
        }
 
@@ -322,7 +322,8 @@ machine_init(void)
 
 }
 
-void slave_machine_init(void)
+void
+slave_machine_init(__unused void *param)
 {
        cpu_machine_init();                     /* Initialize the processor */
        clock_init();                           /* Init the clock */
index 1e111ec0c255a9812dbe1a5f442a6a9ceeae6465..2e100071b0182092ab245c4d2165209b851c79d1 100644 (file)
@@ -629,7 +629,7 @@ copyJoin1:                                                                          // enter from copyinstr with kkNull set
         crmove kk64bit,pf64Bitb                                // remember if this is a 64-bit processor
         stw            r7,kkCountPtr(r1)
         stw            r31,kkR31Save(r1)                               // we use r31 globally for mapped user ptr
-        li             r31,0                                                   // no mapped ptr yet
+
         
         
 // Handle buffer length > 256MB.  This is an error (ENAMETOOLONG) on copyin and copyout.
@@ -648,6 +648,7 @@ copyJoin1:                                                                          // enter from copyinstr with kkNull set
 // Set up thread_recover in case we hit an illegal address.
 
 copyin0:
+               li              r31,0                                                   // no mapped ptr yet
                mfsprg  r8,1                                                    // Get the current thread 
                lis             r2,hi16(copyinout_error)
                ori             r2,r2,lo16(copyinout_error)
index c5629bf1b7ad135a4eb3ed489511b60ba013580c..35526ab2c5ffcfd5124acad07f4b79ad4046137f 100644 (file)
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
+ * Copyright (c) 2000-2008 Apple Inc. All rights reserved.
  *
  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
  * 
@@ -201,28 +201,28 @@ ppc_init(
       
        PE_init_platform(FALSE, args);                                          /* Get platform expert set up */
 
-       if (!PE_parse_boot_arg("novmx", &novmx)) novmx=0;       /* Special run without VMX? */
+       if (!PE_parse_boot_argn("novmx", &novmx, sizeof (novmx))) novmx=0;      /* Special run without VMX? */
        if(novmx) {                                                                                     /* Yeah, turn it off */
                BootProcInfo.pf.Available &= ~pfAltivec;                /* Turn off Altivec available */
                __asm__ volatile("mtsprg 2,%0" : : "r" (BootProcInfo.pf.Available));    /* Set live value */
        }
 
-       if (!PE_parse_boot_arg("fn", &forcenap)) forcenap = 0;  /* If force nap not set, make 0 */
+       if (!PE_parse_boot_argn("fn", &forcenap, sizeof (forcenap))) forcenap = 0;      /* If force nap not set, make 0 */
        else {
                if(forcenap < 2) forcenap = forcenap + 1;               /* Else set 1 for off, 2 for on */
                else forcenap = 0;                                                              /* Clear for error case */
        }
        
-       if (!PE_parse_boot_arg("pmsx", &pmsExperimental)) pmsExperimental = 0;  /* Check if we should start in experimental power management stepper mode */
-       if (!PE_parse_boot_arg("lcks", &LcksOpts)) LcksOpts = 0;        /* Set lcks options */
-       if (!PE_parse_boot_arg("diag", &dgWork.dgFlags)) dgWork.dgFlags = 0;    /* Set diagnostic flags */
+       if (!PE_parse_boot_argn("pmsx", &pmsExperimental, sizeof (pmsExperimental))) pmsExperimental = 0;       /* Check if we should start in experimental power management stepper mode */
+       if (!PE_parse_boot_argn("lcks", &LcksOpts, sizeof (LcksOpts))) LcksOpts = 0;    /* Set lcks options */
+       if (!PE_parse_boot_argn("diag", &dgWork.dgFlags, sizeof (dgWork.dgFlags))) dgWork.dgFlags = 0;  /* Set diagnostic flags */
        if(dgWork.dgFlags & enaExpTrace) trcWork.traceMask = 0xFFFFFFFF;        /* If tracing requested, enable it */
 
-       if(PE_parse_boot_arg("ctrc", &cputrace)) {                      /* See if tracing is limited to a specific cpu */
+       if(PE_parse_boot_argn("ctrc", &cputrace, sizeof (cputrace))) {                  /* See if tracing is limited to a specific cpu */
                trcWork.traceMask = (trcWork.traceMask & 0xFFFFFFF0) | (cputrace & 0xF);        /* Limit to 4 */
        }
 
-       if(!PE_parse_boot_arg("tb", &trcWork.traceSize)) {      /* See if non-default trace buffer size */
+       if(!PE_parse_boot_argn("tb", &trcWork.traceSize, sizeof (trcWork.traceSize))) { /* See if non-default trace buffer size */
 #if DEBUG
                trcWork.traceSize = 32;                                                 /* Default 32 page trace table for DEBUG */
 #else
@@ -234,18 +234,18 @@ ppc_init(
        if(trcWork.traceSize > 256) trcWork.traceSize = 256;    /* Maximum size of 256 pages */
        trcWork.traceSize = trcWork.traceSize * 4096;           /* Change page count to size */
 
-       if (!PE_parse_boot_arg("maxmem", &maxmem))
+       if (!PE_parse_boot_argn("maxmem", &maxmem, sizeof (maxmem)))
                xmaxmem=0;
        else
                xmaxmem = (uint64_t)maxmem * (1024 * 1024);
 
-       if (!PE_parse_boot_arg("wcte", &wcte)) wcte = 0;        /* If write combine timer enable not supplied, make 1 */
+       if (!PE_parse_boot_argn("wcte", &wcte, sizeof (wcte))) wcte = 0;        /* If write combine timer enable not supplied, make 1 */
        else wcte = (wcte != 0);                                                        /* Force to 0 or 1 */
 
-       if (!PE_parse_boot_arg("mcklog", &mckFlags)) mckFlags = 0;      /* If machine check flags not specified, clear */
+       if (!PE_parse_boot_argn("mcklog", &mckFlags, sizeof (mckFlags))) mckFlags = 0;  /* If machine check flags not specified, clear */
        else if(mckFlags > 1) mckFlags = 0;                                     /* If bogus, clear */
     
-    if (!PE_parse_boot_arg("ht_shift", &hash_table_shift))  /* should we use a non-default hash table size? */
+    if (!PE_parse_boot_argn("ht_shift", &hash_table_shift, sizeof (hash_table_shift)))  /* should we use a non-default hash table size? */
         hash_table_shift = 0;                           /* no, use default size */
 
        /*   
@@ -262,7 +262,7 @@ ppc_init(
                        (void)ml_scom_write(GUSModeReg << 8, scdata);   /* Get GUS mode register */
                }
                
-               if(PE_parse_boot_arg("mcksoft", &mcksoft)) {    /* Have they supplied "machine check software recovery? */
+               if(PE_parse_boot_argn("mcksoft", &mcksoft, sizeof (mcksoft))) { /* Have they supplied "machine check software recovery? */
                        newhid = BootProcInfo.pf.pfHID5;                        /* Get the old HID5 */
                        if(mcksoft < 2) {
                                newhid &= 0xFFFFFFFFFFFFDFFFULL;                /* Clear the old one */
@@ -296,5 +296,5 @@ ppc_init_cpu(
 
        cpu_init();
        
-       slave_main();
+       slave_main(NULL);
 }
index 49d177d8032069ce113116685afa7024c17e6337..0ff41cf4d97ef5514c3ce101000c49eb9da0f80f 100644 (file)
@@ -354,7 +354,7 @@ void ppc_vm_init(uint64_t mem_limit, boot_args *args)
        kprintf("kprintf initialized\n");
 
        serialmode = 0;                                         /* Assume normal keyboard and console */
-       if(PE_parse_boot_arg("serial", &serialmode)) {          /* Do we want a serial keyboard and/or console? */
+       if(PE_parse_boot_argn("serial", &serialmode, sizeof (serialmode))) {            /* Do we want a serial keyboard and/or console? */
                kprintf("Serial mode specified: %08X\n", serialmode);
        }
        if(serialmode & 1) {                            /* Start serial if requested */
index e9af2b6ee51fd2c5490efa60111adba558302f71..fd383fea34a106f57b2d66e6175e57cb73487876 100644 (file)
@@ -126,7 +126,8 @@ const struct memory_object_pager_ops vnode_pager_ops = {
        vnode_pager_data_initialize,
        vnode_pager_data_unlock,
        vnode_pager_synchronize,
-       vnode_pager_unmap,
+       vnode_pager_map,
+       vnode_pager_last_unmap,
        "vnode pager"
 };
 
@@ -494,9 +495,9 @@ vnode_pager_bootstrap(void)
        size = (vm_size_t) sizeof(struct vnode_pager);
        vnode_pager_zone = zinit(size, (vm_size_t) MAX_VNODE*size,
                                PAGE_SIZE, "vnode pager structures");
-#ifdef __i386__
+#if CONFIG_CODE_DECRYPTION
        apple_protect_pager_bootstrap();
-#endif /* __i386__ */
+#endif /* CONFIG_CODE_DECRYPTION */
        return;
 }
 
@@ -782,12 +783,36 @@ vnode_pager_synchronize(
  *
  */
 kern_return_t
-vnode_pager_unmap(
+vnode_pager_map(
+       memory_object_t         mem_obj,
+       vm_prot_t               prot)
+{
+       vnode_pager_t           vnode_object;
+       int                     ret;
+       kern_return_t           kr;
+
+       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_map: %p %x\n", mem_obj, prot));
+
+       vnode_object = vnode_pager_lookup(mem_obj);
+
+       ret = ubc_map(vnode_object->vnode_handle, prot);
+
+       if (ret != 0) {
+               kr = KERN_FAILURE;
+       } else {
+               kr = KERN_SUCCESS;
+       }
+
+       return kr;
+}
+
+kern_return_t
+vnode_pager_last_unmap(
        memory_object_t         mem_obj)
 {
        register vnode_pager_t  vnode_object;
 
-       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_unmap: %p\n", mem_obj));
+       PAGER_DEBUG(PAGER_ALL, ("vnode_pager_last_unmap: %p\n", mem_obj));
 
        vnode_object = vnode_pager_lookup(mem_obj);
 
index 015200a6a9062b90e1b650f02a9feb997b8f5eb3..4f32ac7231e356be82f06cab67d68ca11aaddf54 100644 (file)
@@ -73,7 +73,8 @@ const struct memory_object_pager_ops device_pager_ops = {
        device_pager_data_initialize,
        device_pager_data_unlock,
        device_pager_synchronize,
-       device_pager_unmap,
+       device_pager_map,
+       device_pager_last_unmap,
        "device pager"
 };
 
@@ -424,7 +425,15 @@ device_pager_synchronize(
  *
  */
 kern_return_t
-device_pager_unmap(
+device_pager_map(
+       __unused memory_object_t        mem_obj,
+       __unused vm_prot_t              prot)
+{
+       return KERN_SUCCESS;
+}
+
+kern_return_t
+device_pager_last_unmap(
        __unused memory_object_t        mem_obj)
 {
        return KERN_SUCCESS;
index c84e776c6bac2b8798a538c5eadb4180cdce6a9f..f83dd5e8871f6e11a318dd91bfdf965a20ca7205 100644 (file)
@@ -2147,13 +2147,44 @@ kern_return_t memory_object_synchronize
                sync_flags);
 }
 
-/* Routine memory_object_unmap */
-kern_return_t memory_object_unmap
+
+/*
+ * memory_object_map() is called by VM (in vm_map_enter() and its variants)
+ * each time a "named" VM object gets mapped directly or indirectly
+ * (copy-on-write mapping).  A "named" VM object has an extra reference held
+ * by the pager to keep it alive until the pager decides that the 
+ * memory object (and its VM object) can be reclaimed.
+ * VM calls memory_object_last_unmap() (in vm_object_deallocate()) when all
+ * the mappings of that memory object have been removed.
+ *
+ * For a given VM object, calls to memory_object_map() and memory_object_unmap()
+ * are serialized (through object->mapping_in_progress), to ensure that the
+ * pager gets a consistent view of the mapping status of the memory object.
+ *
+ * This allows the pager to keep track of how many times a memory object
+ * has been mapped and with which protections, to decide when it can be
+ * reclaimed.
+ */
+
+/* Routine memory_object_map */
+kern_return_t memory_object_map
+(
+       memory_object_t memory_object,
+       vm_prot_t prot
+)
+{
+       return (memory_object->mo_pager_ops->memory_object_map)(
+               memory_object,
+               prot);
+}
+
+/* Routine memory_object_last_unmap */
+kern_return_t memory_object_last_unmap
 (
        memory_object_t memory_object
 )
 {
-       return (memory_object->mo_pager_ops->memory_object_unmap)(
+       return (memory_object->mo_pager_ops->memory_object_last_unmap)(
                memory_object);
 }
 
index 5045c1b15e078c9c41707c7755a883de68c76602..54e618e40004537aae4c35af51a38fc0987238b5 100644 (file)
@@ -110,7 +110,9 @@ kern_return_t apple_protect_pager_synchronize(memory_object_t mem_obj,
                                              memory_object_offset_t offset,
                                              vm_size_t length,
                                              vm_sync_t sync_flags);
-kern_return_t apple_protect_pager_unmap(memory_object_t mem_obj);
+kern_return_t apple_protect_pager_map(memory_object_t mem_obj,
+                                     vm_prot_t prot);
+kern_return_t apple_protect_pager_last_unmap(memory_object_t mem_obj);
 
 /*
  * Vector of VM operations for this EMM.
@@ -126,7 +128,8 @@ const struct memory_object_pager_ops apple_protect_pager_ops = {
        apple_protect_pager_data_initialize,
        apple_protect_pager_data_unlock,
        apple_protect_pager_synchronize,
-       apple_protect_pager_unmap,
+       apple_protect_pager_map,
+       apple_protect_pager_last_unmap,
        "apple protect pager"
 };
 
@@ -143,6 +146,7 @@ typedef struct apple_protect_pager {
        boolean_t               is_mapped;      /* is this mem_obj mapped ? */
        memory_object_control_t pager_control;  /* mem object control handle */
        vm_object_t             backing_object; /* VM obj w/ encrypted data */
+       struct pager_crypt_info crypt;
 } *apple_protect_pager_t;
 #define        APPLE_PROTECT_PAGER_NULL        ((apple_protect_pager_t) NULL)
 
@@ -169,7 +173,8 @@ int apple_protect_pager_num_trim_max = 0;
 int apple_protect_pager_num_trim_total = 0;
 
 /* internal prototypes */
-apple_protect_pager_t apple_protect_pager_create(vm_object_t backing_object);
+apple_protect_pager_t apple_protect_pager_create(vm_object_t backing_object,
+                                                struct pager_crypt_info *crypt_info);
 apple_protect_pager_t apple_protect_pager_lookup(memory_object_t mem_obj);
 void apple_protect_pager_dequeue(apple_protect_pager_t pager);
 void apple_protect_pager_deallocate_internal(apple_protect_pager_t pager,
@@ -315,7 +320,8 @@ apple_protect_pager_data_request(
        upl_t                   upl;
        int                     upl_flags;
        upl_size_t              upl_size;
-       upl_page_info_t         *upl_pl;
+       upl_page_info_t         *upl_pl = NULL;
+       unsigned int            pl_count;
        vm_object_t             src_object, dst_object;
        kern_return_t           kr, retval;
        vm_map_offset_t         kernel_mapping;
@@ -333,6 +339,7 @@ apple_protect_pager_data_request(
        src_object = VM_OBJECT_NULL;
        kernel_mapping = 0;
        upl = NULL;
+       upl_pl = NULL;
        fault_info = (vm_object_fault_info_t) mo_fault_info;
        interruptible = fault_info->interruptible;
 
@@ -354,6 +361,7 @@ apple_protect_pager_data_request(
                UPL_NO_SYNC |
                UPL_CLEAN_IN_PLACE |    /* triggers UPL_CLEAR_DIRTY */
                UPL_SET_INTERNAL;
+       pl_count = 0;
        kr = memory_object_upl_request(mo_control,
                                       offset, upl_size,
                                       &upl, NULL, NULL, upl_flags);
@@ -401,6 +409,7 @@ apple_protect_pager_data_request(
         * Fill in the contents of the pages requested by VM.
         */
        upl_pl = UPL_GET_INTERNAL_PAGE_LIST(upl);
+       pl_count = length / PAGE_SIZE;
        for (cur_offset = 0; cur_offset < length; cur_offset += PAGE_SIZE) {
                ppnum_t dst_pnum;
 
@@ -485,13 +494,29 @@ apple_protect_pager_data_request(
                           dst_object->wimg_bits & VM_WIMG_MASK,
                           TRUE);
 
+               /*
+                * Validate the original page...
+                */
+               if (src_page->object->code_signed) {
+                       vm_page_validate_cs_mapped(src_page,
+                                                  (const void *) src_vaddr);
+               }
+               /*
+                * ... and transfer the results to the destination page.
+                */
+               UPL_SET_CS_VALIDATED(upl_pl, cur_offset / PAGE_SIZE,
+                                    src_page->cs_validated);
+               UPL_SET_CS_TAINTED(upl_pl, cur_offset / PAGE_SIZE,
+                                  src_page->cs_tainted);
+
                /*
                 * Decrypt the encrypted contents of the source page
                 * into the destination page.
                 */
-               dsmos_page_transform((const void *) src_vaddr,
-                                    (void *) dst_vaddr);
-
+               pager->crypt.page_decrypt((const void *) src_vaddr,
+                                   (void *) dst_vaddr, offset+cur_offset,
+                                   pager->crypt.crypt_ops);
+               
                /*
                 * Remove the pmap mapping of the source and destination pages
                 * in the kernel.
@@ -535,7 +560,10 @@ done:
                if (retval != KERN_SUCCESS) {
                        upl_abort(upl, 0);
                } else {
-                       upl_commit(upl, NULL, 0);
+                       boolean_t empty;
+                       upl_commit_range(upl, 0, upl->size, 
+                                        UPL_COMMIT_CS_VALIDATED,
+                                        upl_pl, pl_count, &empty);
                }
 
                /* and deallocate the UPL */
@@ -632,6 +660,10 @@ apple_protect_pager_terminate_internal(
 
        /* trigger the destruction of the memory object */
        memory_object_destroy(pager->pager_control, 0);
+       
+       /* deallocate any crypt module data */
+       if(pager->crypt.crypt_end)
+               pager->crypt.crypt_end(pager->crypt.crypt_ops);
 }
 
 /*
@@ -762,9 +794,10 @@ apple_protect_pager_synchronize(
  * time the memory object gets mapped and we take one extra reference on the
  * memory object to account for all its mappings.
  */
-void
+kern_return_t
 apple_protect_pager_map(
-       memory_object_t         mem_obj)
+       memory_object_t         mem_obj,
+       __unused vm_prot_t      prot)
 {
        apple_protect_pager_t   pager;
 
@@ -786,21 +819,24 @@ apple_protect_pager_map(
                apple_protect_pager_count_mapped++;
        }
        mutex_unlock(&apple_protect_pager_lock);
+
+       return KERN_SUCCESS;
 }
 
 /*
- * apple_protect_pager_unmap()
+ * apple_protect_pager_last_unmap()
  *
  * This is called by VM when this memory object is no longer mapped anywhere.
  */
 kern_return_t
-apple_protect_pager_unmap(
+apple_protect_pager_last_unmap(
        memory_object_t         mem_obj)
 {
        apple_protect_pager_t   pager;
        int                     count_unmapped;
 
-       PAGER_DEBUG(PAGER_ALL, ("apple_protect_pager_unmap: %p\n", mem_obj));
+       PAGER_DEBUG(PAGER_ALL,
+                   ("apple_protect_pager_last_unmap: %p\n", mem_obj));
 
        pager = apple_protect_pager_lookup(mem_obj);
 
@@ -844,7 +880,8 @@ apple_protect_pager_lookup(
 
 apple_protect_pager_t
 apple_protect_pager_create(
-       vm_object_t     backing_object)
+       vm_object_t     backing_object,
+       struct pager_crypt_info *crypt_info)
 {
        apple_protect_pager_t   pager, pager2;
        memory_object_control_t control;
@@ -869,6 +906,8 @@ apple_protect_pager_create(
        pager->is_mapped = FALSE;
        pager->pager_control = MEMORY_OBJECT_CONTROL_NULL;
        pager->backing_object = backing_object;
+       pager->crypt = *crypt_info;
+       
        vm_object_reference(backing_object);
 
        mutex_lock(&apple_protect_pager_lock);
@@ -932,7 +971,8 @@ apple_protect_pager_create(
  */
 memory_object_t
 apple_protect_pager_setup(
-       vm_object_t     backing_object)
+                         vm_object_t   backing_object,
+                         struct pager_crypt_info *crypt_info)
 {
        apple_protect_pager_t   pager;
 
@@ -943,6 +983,12 @@ apple_protect_pager_setup(
                      apple_protect_pager_t,
                      pager_queue) {
                if (pager->backing_object == backing_object) {
+                       /* For the same object we must always use the same protection options */
+                       if (!((pager->crypt.page_decrypt == crypt_info->page_decrypt) &&
+                             (pager->crypt.crypt_ops == crypt_info->crypt_ops) )) {
+                               mutex_unlock(&apple_protect_pager_lock);
+                               return MEMORY_OBJECT_NULL;
+                       }
                        break;
                }
        }
@@ -958,7 +1004,7 @@ apple_protect_pager_setup(
        mutex_unlock(&apple_protect_pager_lock);
 
        if (pager == APPLE_PROTECT_PAGER_NULL) {
-               pager = apple_protect_pager_create(backing_object);
+               pager = apple_protect_pager_create(backing_object, crypt_info);
                if (pager == APPLE_PROTECT_PAGER_NULL) {
                        return MEMORY_OBJECT_NULL;
                }
index f5275261d3d525fe1eb11b1c4ba819a9028d45ec..77a34c912ececf45cf55a64aa08a7c0feb5b6ec1 100644 (file)
@@ -155,6 +155,14 @@ unsigned long vm_cs_revalidates = 0;
 unsigned long vm_cs_query_modified = 0;
 unsigned long vm_cs_validated_dirtied = 0;
 
+#if CONFIG_ENFORCE_SIGNED_CODE
+#if SECURE_KERNEL
+const int cs_enforcement_disable=0;
+#else
+int cs_enforcement_disable=1;
+#endif
+#endif
+
 /*
  *     Routine:        vm_fault_init
  *     Purpose:
@@ -163,6 +171,12 @@ unsigned long vm_cs_validated_dirtied = 0;
 void
 vm_fault_init(void)
 {
+#if !SECURE_KERNEL
+#if CONFIG_ENFORCE_SIGNED_CODE
+       PE_parse_boot_argn("cs_enforcement_disable", &cs_enforcement_disable, sizeof (cs_enforcement_disable));
+#endif
+       PE_parse_boot_argn("cs_debug", &cs_debug, sizeof (cs_debug));
+#endif
 }
 
 /*
@@ -1958,6 +1972,21 @@ backoff:
 
 
 
+/*
+ * CODE SIGNING:
+ * When soft faulting a page, we have to validate the page if:
+ * 1. the page is being mapped in user space
+ * 2. the page hasn't already been found to be "tainted"
+ * 3. the page belongs to a code-signed object
+ * 4. the page has not been validated yet or has been mapped for write.
+ */
+#define VM_FAULT_NEED_CS_VALIDATION(pmap, page)                                \
+       ((pmap) != kernel_pmap /*1*/ &&                                 \
+        !(page)->cs_tainted /*2*/ &&                                   \
+        (page)->object->code_signed /*3*/ &&                           \
+        (!(page)->cs_validated || (page)->wpmapped /*4*/))
+
+
 /*
  * page queue lock must NOT be held
  * m->object must be locked
@@ -1995,24 +2024,6 @@ vm_fault_enter(vm_page_t m,
 
         cache_attr = ((unsigned int)m->object->wimg_bits) & VM_WIMG_MASK;
 
-       if (m->object->code_signed && pmap != kernel_pmap &&
-           (!m->cs_validated || m->wpmapped)) {
-               vm_object_lock_assert_exclusive(m->object);
-
-               if (m->cs_validated && m->wpmapped) {
-                       vm_cs_revalidates++;
-               }
-
-               /*
-                * CODE SIGNING:
-                * This page comes from a VM object backed by a signed
-                * memory object.  We are about to enter it into a process
-                * address space, so we need to validate its signature.
-                */
-               /* VM map is locked, so 1 ref will remain on VM object */
-               vm_page_validate_cs(m);
-       }
-
        if (m->pmapped == FALSE) {
                /*
                 * This is the first time this page is being
@@ -2058,7 +2069,26 @@ vm_fault_enter(vm_page_t m,
                }
        }
 
-       if (m->cs_tainted) {
+       if (VM_FAULT_NEED_CS_VALIDATION(pmap, m)) {
+               vm_object_lock_assert_exclusive(m->object);
+
+               if (m->cs_validated) {
+                       vm_cs_revalidates++;
+               }
+
+               /* VM map is locked, so 1 ref will remain on VM object */
+               vm_page_validate_cs(m);
+       }
+
+       if (m->cs_tainted       /* always invalidate a tainted page */
+#if CONFIG_ENFORCE_SIGNED_CODE
+           /*
+            * Code Signing enforcement invalidates an executable page that
+            * has no code directory, and thus could not be validated.
+            */
+           || ((prot & VM_PROT_EXECUTE) && !m->cs_validated )
+#endif
+               ) {
                /*
                 * CODE SIGNING:
                 * This page has been tainted and can not be trusted.
@@ -2066,18 +2096,25 @@ vm_fault_enter(vm_page_t m,
                 * necessary precautions before we enter the tainted page
                 * into its address space.
                 */
-               if (cs_invalid_page()) {
-                       /* reject the tainted page: abort the page fault */
-                       kr = KERN_MEMORY_ERROR;
-                       cs_enter_tainted_rejected++;
-               } else {
-                       /* proceed with the tainted page */
-                       kr = KERN_SUCCESS;
-                       cs_enter_tainted_accepted++;
+               kr = KERN_SUCCESS;
+#if CONFIG_ENFORCE_SIGNED_CODE
+               if (!cs_enforcement_disable) {
+#endif
+                       if (cs_invalid_page((addr64_t) vaddr)) {
+                               /* reject the tainted page: abort the page fault */
+                               kr = KERN_MEMORY_ERROR;
+                               cs_enter_tainted_rejected++;
+                       } else {
+                               /* proceed with the tainted page */
+                               kr = KERN_SUCCESS;
+                               cs_enter_tainted_accepted++;
+                       }
+#if CONFIG_ENFORCE_SIGNED_CODE
                }
+#endif
                if (cs_debug || kr != KERN_SUCCESS) {
                        printf("CODESIGNING: vm_fault_enter(0x%llx): "
-                              "page %p obj %p off 0x%llx *** TAINTED ***\n",
+                              "page %p obj %p off 0x%llx *** INVALID PAGE ***\n",
                               (long long)vaddr, m, m->object, m->offset);
                }
        } else {
@@ -2092,7 +2129,7 @@ vm_fault_enter(vm_page_t m,
                 * since this is the ONLY bit updated behind the SHARED
                 * lock... however, we need to figure out how to do an atomic
                 * update on a bit field to make this less fragile... right
-                * now I don'w know how to coerce 'C' to give me the offset info
+                * now I don't know how to coerce 'C' to give me the offset info
                 * that's needed for an AtomicCompareAndSwap
                 */
                m->pmapped = TRUE;
@@ -2512,8 +2549,7 @@ RetryFault:
                        }
                        ASSERT_PAGE_DECRYPTED(m);
 
-                       if (m->object->code_signed && map != kernel_map &&
-                           (!m->cs_validated || m->wpmapped)) {
+                       if (VM_FAULT_NEED_CS_VALIDATION(map->pmap, m)) {
                                /*
                                 * We might need to validate this page
                                 * against its code signature, so we
@@ -3431,11 +3467,11 @@ vm_fault_unwire(
 
        for (va = entry->vme_start; va < end_addr; va += PAGE_SIZE) {
 
-               if (pmap) {
-                       pmap_change_wiring(pmap, 
-                                          pmap_addr + (va - entry->vme_start), FALSE);
-               }
                if (object == VM_OBJECT_NULL) {
+                       if (pmap) {
+                               pmap_change_wiring(pmap, 
+                                                  pmap_addr + (va - entry->vme_start), FALSE);
+                       }
                        (void) vm_fault(map, va, VM_PROT_NONE, 
                                        TRUE, THREAD_UNINT, pmap, pmap_addr);
                } else {
@@ -3483,6 +3519,10 @@ vm_fault_unwire(
 
                        result_object = result_page->object;
 
+                       if ((pmap) && (result_page->phys_page != vm_page_guard_addr)) {
+                               pmap_change_wiring(pmap, 
+                                                  pmap_addr + (va - entry->vme_start), FALSE);
+                       }
                        if (deallocate) {
                                assert(result_page->phys_page !=
                                       vm_page_fictitious_addr);
@@ -4130,6 +4170,89 @@ vm_fault_classify_init(void)
 
 extern int cs_validation;
 
+void
+vm_page_validate_cs_mapped(
+       vm_page_t       page,
+       const void      *kaddr)
+{
+       vm_object_t             object;
+       vm_object_offset_t      offset;
+       kern_return_t           kr;
+       memory_object_t         pager;
+       void                    *blobs;
+       boolean_t               validated, tainted;
+
+       assert(page->busy);
+       vm_object_lock_assert_exclusive(page->object);
+
+       if (!cs_validation) {
+               return;
+       }
+
+       if (page->wpmapped && !page->cs_tainted) {
+               /*
+                * This page was mapped for "write" access sometime in the
+                * past and could still be modifiable in the future.
+                * Consider it tainted.
+                * [ If the page was already found to be "tainted", no
+                * need to re-validate. ]
+                */
+               page->cs_validated = TRUE;
+               page->cs_tainted = TRUE;
+               if (cs_debug) {
+                       printf("CODESIGNING: vm_page_validate_cs: "
+                              "page %p obj %p off 0x%llx "
+                              "was modified\n",
+                              page, page->object, page->offset);
+               }
+               vm_cs_validated_dirtied++;
+       }
+
+       if (page->cs_validated) {
+               return;
+       }
+
+       vm_cs_validates++;
+
+       object = page->object;
+       assert(object->code_signed);
+       offset = page->offset;
+
+       if (!object->alive || object->terminating || object->pager == NULL) {
+               /*
+                * The object is terminating and we don't have its pager
+                * so we can't validate the data...
+                */
+               return;
+       }
+       /*
+        * Since we get here to validate a page that was brought in by
+        * the pager, we know that this pager is all setup and ready
+        * by now.
+        */
+       assert(!object->internal);
+       assert(object->pager != NULL);
+       assert(object->pager_ready);
+
+       pager = object->pager;
+
+       kr = vnode_pager_get_object_cs_blobs(pager, &blobs);
+       if (kr != KERN_SUCCESS) {
+               blobs = NULL;
+       }
+
+       /* verify the SHA1 hash for this page */
+       validated = cs_validate_page(blobs,
+                                    offset + object->paging_offset,
+                                    (const void *)kaddr,
+                                    &tainted);
+
+       page->cs_validated = validated;
+       if (validated) {
+               page->cs_tainted = tainted;
+       }
+}
+
 void
 vm_page_validate_cs(
        vm_page_t       page)
@@ -4140,9 +4263,6 @@ vm_page_validate_cs(
        vm_map_size_t           ksize;
        vm_offset_t             kaddr;
        kern_return_t           kr;
-       memory_object_t         pager;
-       void                    *blobs;
-       boolean_t               validated, tainted;
        boolean_t               busy_page;
 
        vm_object_lock_assert_held(page->object);
@@ -4151,35 +4271,25 @@ vm_page_validate_cs(
                return;
        }
 
-       if (page->cs_validated && !page->cs_tainted && page->wpmapped) {
+       if (page->wpmapped && !page->cs_tainted) {
                vm_object_lock_assert_exclusive(page->object);
 
                /*
-                * This page has already been validated and found to
-                * be valid.  However, it was mapped for "write" access
-                * sometime in the past, so we have to check if it was
-                * modified.  If so, it needs to be revalidated.
-                * If the page was already found to be "tainted", no
-                * need to re-validate.
+                * This page was mapped for "write" access sometime in the
+                * past and could still be modifiable in the future.
+                * Consider it tainted.
+                * [ If the page was already found to be "tainted", no
+                * need to re-validate. ]
                 */
-               if (!page->dirty) {
-                       vm_cs_query_modified++;
-                       page->dirty = pmap_is_modified(page->phys_page);
-               }
-               if (page->dirty) {
-                       /*
-                        * The page is dirty, so let's clear its
-                        * "validated" bit and re-validate it.
-                        */
-                       if (cs_debug) {
-                               printf("CODESIGNING: vm_page_validate_cs: "
-                                      "page %p obj %p off 0x%llx "
-                                      "was modified\n",
-                                      page, page->object, page->offset);
-                       }
-                       page->cs_validated = FALSE;
-                       vm_cs_validated_dirtied++;
+               page->cs_validated = TRUE;
+               page->cs_tainted = TRUE;
+               if (cs_debug) {
+                       printf("CODESIGNING: vm_page_validate_cs: "
+                              "page %p obj %p off 0x%llx "
+                              "was modified\n",
+                              page, page->object, page->offset);
                }
+               vm_cs_validated_dirtied++;
        }
 
        if (page->cs_validated) {
@@ -4188,8 +4298,6 @@ vm_page_validate_cs(
 
        vm_object_lock_assert_exclusive(page->object);
 
-       vm_cs_validates++;
-
        object = page->object;
        assert(object->code_signed);
        offset = page->offset;
@@ -4215,53 +4323,20 @@ vm_page_validate_cs(
                                  object,
                                  offset,
                                  &ksize,
+                                 VM_PROT_READ,
                                  FALSE); /* can't unlock object ! */
        if (kr != KERN_SUCCESS) {
                panic("vm_page_validate_cs: could not map page: 0x%x\n", kr);
        }
        kaddr = CAST_DOWN(vm_offset_t, koffset);
 
-       /*
-        * Since we get here to validate a page that was brought in by
-        * the pager, we know that this pager is all setup and ready
-        * by now.
-        */
-       assert(!object->internal);
-       assert(object->pager != NULL);
-       assert(object->pager_ready);
-
-       if (!object->alive || object->terminating || object->pager == NULL) {
-               /*
-                * The object is terminating and we don't have its pager
-                * so we can't validate the data...
-                */
-               goto out;
-       }
-
-       pager = object->pager;
-       assert(pager != NULL);
-
-       kr = vnode_pager_get_object_cs_blobs(pager, &blobs);
-       if (kr != KERN_SUCCESS) {
-               blobs = NULL;
-       }
-
-       /* verify the SHA1 hash for this page */
-       validated = cs_validate_page(blobs,
-                                    offset + object->paging_offset,
-                                    (const void *)kaddr,
-                                    &tainted);
+       /* validate the mapped page */
+       vm_page_validate_cs_mapped(page, (const void *) kaddr);
 
        assert(page->busy);
        assert(object == page->object);
        vm_object_lock_assert_exclusive(object);
 
-       page->cs_validated = validated;
-       if (validated) {
-               page->cs_tainted = tainted;
-       }
-
-out:
        if (!busy_page) {
                PAGE_WAKEUP_DONE(page);
        }
index 11927893c5c1474aab857429b896c4095806ad09..7b6b17dc6019bf9e969b8032f3ae3adf67392a8b 100644 (file)
@@ -131,7 +131,7 @@ vm_mem_bootstrap(void)
        vm_mem_bootstrap_kprintf(("vm_mem_bootstrap: calling pmap_init\n"));
        pmap_init();
        
-       if (PE_parse_boot_arg("zsize", &zsizearg))
+       if (PE_parse_boot_argn("zsize", &zsizearg, sizeof (zsizearg)))
                zsize = zsizearg * 1024ULL * 1024ULL;
        else {
                zsize = sane_size >> 2;                         /* Get target zone size as 1/4 of physical memory */
index 74e805b790c01cf8ae35b965e8a0c2a30f85c696..b4a2e5cf1bebbafeeb468ebc58f001a7f57c368e 100644 (file)
@@ -399,28 +399,6 @@ static zone_t      vm_map_copy_zone;       /* zone for vm_map_copy structures */
 
 vm_object_t    vm_submap_object;
 
-/*
- *     vm_map_init:
- *
- *     Initialize the vm_map module.  Must be called before
- *     any other vm_map routines.
- *
- *     Map and entry structures are allocated from zones -- we must
- *     initialize those zones.
- *
- *     There are three zones of interest:
- *
- *     vm_map_zone:            used to allocate maps.
- *     vm_map_entry_zone:      used to allocate map entries.
- *     vm_map_kentry_zone:     used to allocate map entries for the kernel.
- *
- *     The kernel allocates map entries from a special zone that is initially
- *     "crammed" with memory.  It would be difficult (perhaps impossible) for
- *     the kernel to allocate more memory to a entry zone when it became
- *     empty since the very act of allocating memory implies the creation
- *     of a new entry.
- */
-
 static void            *map_data;
 static vm_map_size_t   map_data_size;
 static void            *kentry_data;
@@ -433,12 +411,21 @@ static int                kentry_count = 2048;            /* to init kentry_data_size */
 /* Skip acquiring locks if we're in the midst of a kernel core dump */
 extern unsigned int not_in_kdp;
 
-#ifdef __i386__
+#if CONFIG_CODE_DECRYPTION
+/*
+ * vm_map_apple_protected:
+ * This remaps the requested part of the object with an object backed by 
+ * the decrypting pager.
+ * crypt_info contains entry points and session data for the crypt module.
+ * The crypt_info block will be copied by vm_map_apple_protected. The data structures
+ * referenced in crypt_info must remain valid until crypt_info->crypt_end() is called.
+ */
 kern_return_t
 vm_map_apple_protected(
        vm_map_t        map,
        vm_map_offset_t start,
-       vm_map_offset_t end)
+       vm_map_offset_t end,
+       struct pager_crypt_info *crypt_info)
 {
        boolean_t       map_locked;
        kern_return_t   kr;
@@ -454,7 +441,7 @@ vm_map_apple_protected(
        if (!vm_map_lookup_entry(map,
                                 start,
                                 &map_entry) ||
-           map_entry->vme_end != end ||
+           map_entry->vme_end < end ||
            map_entry->is_sub_map) {
                /* that memory is not properly mapped */
                kr = KERN_INVALID_ARGUMENT;
@@ -475,7 +462,7 @@ vm_map_apple_protected(
         * it.
         */
         
-       protected_mem_obj = apple_protect_pager_setup(protected_object);
+       protected_mem_obj = apple_protect_pager_setup(protected_object, crypt_info);
        if (protected_mem_obj == NULL) {
                kr = KERN_FAILURE;
                goto done;
@@ -499,10 +486,6 @@ vm_map_apple_protected(
                                     map_entry->max_protection,
                                     map_entry->inheritance);
        assert(map_addr == start);
-       if (kr == KERN_SUCCESS) {
-               /* let the pager know that this mem_obj is mapped */
-               apple_protect_pager_map(protected_mem_obj);
-       }
        /*
         * Release the reference obtained by apple_protect_pager_setup().
         * The mapping (if it succeeded) is now holding a reference on the
@@ -516,9 +499,30 @@ done:
        }
        return kr;
 }
-#endif /* __i386__ */
+#endif /* CONFIG_CODE_DECRYPTION */
 
 
+/*
+ *     vm_map_init:
+ *
+ *     Initialize the vm_map module.  Must be called before
+ *     any other vm_map routines.
+ *
+ *     Map and entry structures are allocated from zones -- we must
+ *     initialize those zones.
+ *
+ *     There are three zones of interest:
+ *
+ *     vm_map_zone:            used to allocate maps.
+ *     vm_map_entry_zone:      used to allocate map entries.
+ *     vm_map_kentry_zone:     used to allocate map entries for the kernel.
+ *
+ *     The kernel allocates map entries from a special zone that is initially
+ *     "crammed" with memory.  It would be difficult (perhaps impossible) for
+ *     the kernel to allocate more memory to a entry zone when it became
+ *     empty since the very act of allocating memory implies the creation
+ *     of a new entry.
+ */
 void
 vm_map_init(
        void)
@@ -612,6 +616,11 @@ vm_map_create(
        result->wiring_required = FALSE;
        result->no_zero_fill = FALSE;
        result->mapped = FALSE;
+#if CONFIG_EMBEDDED
+       result->prot_copy_allow = FALSE;
+#else
+       result->prot_copy_allow = TRUE;
+#endif
        result->wait_for_space = FALSE;
        result->first_free = vm_map_to_entry(result);
        result->hint = vm_map_to_entry(result);
@@ -1494,9 +1503,9 @@ static unsigned int vm_map_enter_restore_failures = 0;
 kern_return_t
 vm_map_enter(
        vm_map_t                map,
-       vm_map_offset_t *address,       /* IN/OUT */
+       vm_map_offset_t         *address,       /* IN/OUT */
        vm_map_size_t           size,
-       vm_map_offset_t mask,
+       vm_map_offset_t         mask,
        int                     flags,
        vm_object_t             object,
        vm_object_offset_t      offset,
@@ -1521,6 +1530,32 @@ vm_map_enter(
        boolean_t               is_submap = ((flags & VM_FLAGS_SUBMAP) != 0);
        char                    alias;
        vm_map_offset_t         effective_min_offset, effective_max_offset;
+       kern_return_t           kr;
+
+#if CONFIG_EMBEDDED
+       if (cur_protection & VM_PROT_WRITE) {
+               if (cur_protection & VM_PROT_EXECUTE) {
+                       printf("EMBEDDED: %s curprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__);
+                       cur_protection &= ~VM_PROT_EXECUTE;
+               }
+       }
+       if (max_protection & VM_PROT_WRITE) {
+               if (max_protection & VM_PROT_EXECUTE) {
+                       /* Right now all kinds of data segments are RWX. No point in logging that. */
+                       /* printf("EMBEDDED: %s maxprot cannot be write+execute. turning off execute\n", __PRETTY_FUNCTION__); */
+                       
+                       /* Try to take a hint from curprot. If curprot is not writable,
+                        * make maxprot not writable. Otherwise make it not executable. 
+                        */
+                       if((cur_protection & VM_PROT_WRITE) == 0) {
+                               max_protection &= ~VM_PROT_WRITE;
+                       } else {
+                               max_protection &= ~VM_PROT_EXECUTE;
+                       }
+               }
+       }
+       assert ((cur_protection | max_protection) == max_protection);
+#endif /* CONFIG_EMBEDDED */
 
        if (is_submap) {
                if (purgable) {
@@ -1925,8 +1960,6 @@ StartAgain: ;
                                }
                        }
                        if (use_pmap && submap->pmap != NULL) {
-                               kern_return_t kr;
-
                                kr = pmap_nest(map->pmap,
                                               submap->pmap,
                                               tmp_start,
@@ -1983,13 +2016,56 @@ StartAgain: ;
        }
 
 BailOut: ;
-       if (result == KERN_SUCCESS &&
-           pmap_empty &&
-           !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
-               assert(vm_map_pmap_is_empty(map, *address, *address+size));
-       }
+       if (result == KERN_SUCCESS) {
+               vm_prot_t pager_prot;
+               memory_object_t pager;
 
-       if (result != KERN_SUCCESS) {
+               if (pmap_empty &&
+                   !(flags & VM_FLAGS_NO_PMAP_CHECK)) {
+                       assert(vm_map_pmap_is_empty(map,
+                                                   *address,
+                                                   *address+size));
+               }
+
+               /*
+                * For "named" VM objects, let the pager know that the
+                * memory object is being mapped.  Some pagers need to keep
+                * track of this, to know when they can reclaim the memory
+                * object, for example.
+                * VM calls memory_object_map() for each mapping (specifying
+                * the protection of each mapping) and calls
+                * memory_object_last_unmap() when all the mappings are gone.
+                */
+               pager_prot = max_protection;
+               if (needs_copy) {
+                       /*
+                        * Copy-On-Write mapping: won't modify
+                        * the memory object.
+                        */
+                       pager_prot &= ~VM_PROT_WRITE;
+               }
+               if (!is_submap &&
+                   object != VM_OBJECT_NULL &&
+                   object->named &&
+                   object->pager != MEMORY_OBJECT_NULL) {
+                       vm_object_lock(object);
+                       pager = object->pager;
+                       if (object->named &&
+                           pager != MEMORY_OBJECT_NULL) {
+                               assert(object->pager_ready);
+                               vm_object_mapping_wait(object, THREAD_UNINT);
+                               vm_object_mapping_begin(object);
+                               vm_object_unlock(object);
+
+                               kr = memory_object_map(pager, pager_prot);
+                               assert(kr == KERN_SUCCESS);
+
+                               vm_object_lock(object);
+                               vm_object_mapping_end(object);
+                       }
+                       vm_object_unlock(object);
+               }
+       } else {
                if (new_mapping_established) {
                        /*
                         * We have to get rid of the new mappings since we
@@ -2120,7 +2196,7 @@ vm_map_enter_mem_object(
        map_addr = vm_map_trunc_page(*address);
        map_size = vm_map_round_page(initial_size);
        size = vm_object_round_page(initial_size);      
-       
+
        /*
         * Find the vm object (if any) corresponding to this port.
         */
@@ -2318,6 +2394,50 @@ vm_map_enter_mem_object(
                return KERN_INVALID_OBJECT;
        }
 
+       if (object != VM_OBJECT_NULL &&
+           object->named &&
+           object->pager != MEMORY_OBJECT_NULL &&
+           object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
+               memory_object_t pager;
+               vm_prot_t       pager_prot;
+               kern_return_t   kr;
+
+               /*
+                * For "named" VM objects, let the pager know that the
+                * memory object is being mapped.  Some pagers need to keep
+                * track of this, to know when they can reclaim the memory
+                * object, for example.
+                * VM calls memory_object_map() for each mapping (specifying
+                * the protection of each mapping) and calls
+                * memory_object_last_unmap() when all the mappings are gone.
+                */
+               pager_prot = max_protection;
+               if (copy) {
+                       /*
+                        * Copy-On-Write mapping: won't modify the
+                        * memory object.
+                        */
+                       pager_prot &= ~VM_PROT_WRITE;
+               }
+               vm_object_lock(object);
+               pager = object->pager;
+               if (object->named &&
+                   pager != MEMORY_OBJECT_NULL &&
+                   object->copy_strategy != MEMORY_OBJECT_COPY_NONE) {
+                       assert(object->pager_ready);
+                       vm_object_mapping_wait(object, THREAD_UNINT);
+                       vm_object_mapping_begin(object);
+                       vm_object_unlock(object);
+
+                       kr = memory_object_map(pager, pager_prot);
+                       assert(kr == KERN_SUCCESS);
+
+                       vm_object_lock(object);
+                       vm_object_mapping_end(object);
+               }
+               vm_object_unlock(object);
+       }
+
        /*
         *      Perform the copy if requested
         */
@@ -3035,6 +3155,11 @@ vm_map_protect(
 
        vm_map_lock(map);
 
+       if ((new_prot & VM_PROT_COPY) && !map->prot_copy_allow) {
+               vm_map_unlock(map);
+               return(KERN_PROTECTION_FAILURE);
+       }
+       
        /* LP64todo - remove this check when vm_map_commpage64()
         * no longer has to stuff in a map_entry for the commpage
         * above the map's max_offset.
@@ -3085,6 +3210,15 @@ vm_map_protect(
                        }
                }
 
+#if CONFIG_EMBEDDED
+               if (new_prot & VM_PROT_WRITE) {
+                       if (new_prot & VM_PROT_EXECUTE) {
+                               printf("EMBEDDED: %s can't have both write and exec at the same time\n", __FUNCTION__);
+                               new_prot &= ~VM_PROT_EXECUTE;
+                       }
+               }
+#endif
+
                prev = current->vme_end;
                current = current->vme_next;
        }
@@ -6101,15 +6235,6 @@ vm_map_copy_overwrite_aligned(
                        entry->wired_count = 0;
                        entry->user_wired_count = 0;
                        offset = entry->offset = copy_entry->offset;
-                       /*
-                        * XXX FBDP
-                        * We should propagate the submap entry's protections
-                        * here instead of forcing VM_PROT_ALL.
-                        * Or better yet, we should inherit the protection
-                        * of the copy_entry.
-                        */
-                       entry->protection = VM_PROT_ALL;
-                       entry->max_protection = VM_PROT_ALL;
 
                        vm_map_copy_entry_unlink(copy, copy_entry);
                        vm_map_copy_entry_dispose(copy, copy_entry);
@@ -10853,6 +10978,11 @@ restart_page_query:
        if (m->speculative)
                *disposition |= VM_PAGE_QUERY_PAGE_SPECULATIVE;
 
+       if (m->cs_validated)
+               *disposition |= VM_PAGE_QUERY_PAGE_CS_VALIDATED;
+       if (m->cs_tainted)
+               *disposition |= VM_PAGE_QUERY_PAGE_CS_TAINTED;
+
 page_query_done:
        vm_object_unlock(object);
 
@@ -11499,3 +11629,11 @@ vm_map_set_user_wire_limit(vm_map_t    map,
 {
        map->user_wire_limit = limit;
 }
+
+void           vm_map_set_prot_copy_allow(vm_map_t             map,
+                                          boolean_t            allow)
+{
+       vm_map_lock(map);
+       map->prot_copy_allow = allow;
+       vm_map_unlock(map);
+};
index 0b181f35b21aa0146387bc4511076a6dc932a20f..423930b9726f701bf232320fa577dac22cebf271 100644 (file)
@@ -284,6 +284,7 @@ struct _vm_map {
        boolean_t               wiring_required;/* All memory wired? */
        boolean_t               no_zero_fill;   /* No zero fill absent pages */
        boolean_t               mapped;         /* has this map been mapped */
+       boolean_t               prot_copy_allow;/* is VM_PROT_COPY allowed on this map */
        unsigned int            timestamp;      /* Version number */
        unsigned int            color_rr;       /* next color (not protected by a lock) */
 } ;
@@ -923,6 +924,10 @@ extern void                vm_map_set_user_wire_limit(
                                vm_map_t                map,
                                vm_size_t               limit);
 
+extern void            vm_map_set_prot_copy_allow(
+                               vm_map_t                map,
+                               boolean_t               allow);
+
 #ifdef MACH_KERNEL_PRIVATE
 
 /*
index 680c07f12f506ffa56905f73ce8ceebad812fcc6..d290fa8015ce733eb34863f4dbe9a129afeec78a 100644 (file)
@@ -520,6 +520,7 @@ vm_object_bootstrap(void)
        /* cache bitfields */
        vm_object_template.wimg_bits = VM_WIMG_DEFAULT;
        vm_object_template.code_signed = FALSE;
+       vm_object_template.mapping_in_progress = FALSE;
        vm_object_template.not_in_use = 0;
 #ifdef UPL_DEBUG
        vm_object_template.uplq.prev = NULL;
@@ -753,10 +754,12 @@ vm_object_deallocate(
                        /* more mappers for this object */
 
                        if (pager != MEMORY_OBJECT_NULL) {
+                               vm_object_mapping_wait(object, THREAD_UNINT);
+                               vm_object_mapping_begin(object);
                                vm_object_unlock(object);
                                vm_object_cache_unlock();
                                        
-                               memory_object_unmap(pager);
+                               memory_object_last_unmap(pager);
 
                                try_failed_count = 0;
                                for (;;) {
@@ -777,6 +780,8 @@ vm_object_deallocate(
                                        mutex_pause(try_failed_count);  /* wait a bit */
                                }
                                assert(object->ref_count > 0);
+
+                               vm_object_mapping_end(object);
                        }
                }
 
@@ -2210,7 +2215,12 @@ vm_object_copy_slowly(
                                        /* fall thru */
 
                                case VM_FAULT_INTERRUPTED:
+                                       vm_object_lock(new_object);
+                                       vm_page_lock_queues();
                                        vm_page_free(new_page);
+                                       vm_page_unlock_queues();
+                                       vm_object_unlock(new_object);
+
                                        vm_object_deallocate(new_object);
                                        vm_object_deallocate(src_object);
                                        *_result_object = VM_OBJECT_NULL;
@@ -2225,9 +2235,11 @@ vm_object_copy_slowly(
                                         *          any page fails [chosen]
                                         */
 
+                                       vm_object_lock(new_object);
                                        vm_page_lock_queues();
                                        vm_page_free(new_page);
                                        vm_page_unlock_queues();
+                                       vm_object_unlock(new_object);
 
                                        vm_object_deallocate(new_object);
                                        vm_object_deallocate(src_object);
@@ -3663,7 +3675,7 @@ vm_object_do_bypass(
         *      Since its ref_count was at least 2, it
         *      will not vanish; so we don't need to call
         *      vm_object_deallocate.
-        *      [FBDP: that doesn't seem to be true any more]
+        *      [with a caveat for "named" objects]
         * 
         *      The res_count on the backing object is
         *      conditionally decremented.  It's possible
@@ -3681,7 +3693,8 @@ vm_object_do_bypass(
         *      is temporary and cachable.
 #endif
         */
-       if (backing_object->ref_count > 1) {
+       if (backing_object->ref_count > 2 ||
+           (!backing_object->named && backing_object->ref_count > 1)) {
                vm_object_lock_assert_exclusive(backing_object);
                backing_object->ref_count--;
 #if    TASK_SWAPPER
@@ -4067,10 +4080,11 @@ vm_object_collapse(
                         * backing object that show through to the object.
                         */
 #if    MACH_PAGEMAP
-                       if (backing_rcount || backing_object->existence_map) {
+                       if (backing_rcount || backing_object->existence_map)
 #else
-                       if (backing_rcount) {
+                       if (backing_rcount)
 #endif /* MACH_PAGEMAP */
+                       {
                                offset = hint_offset;
                                
                                while((offset =
@@ -5132,6 +5146,9 @@ vm_object_lock_request(
        return (KERN_SUCCESS);
 }
 
+unsigned int vm_page_purged_wired = 0;
+unsigned int vm_page_purged_busy = 0;
+unsigned int vm_page_purged_others = 0;
 /*
  * Empty a purgeable object by grabbing the physical pages assigned to it and
  * putting them on the free queue without writing them to backing store, etc.
@@ -5200,18 +5217,38 @@ vm_object_purge(vm_object_t object)
                        /* resume with the current page and a new quota */
                        purge_loop_quota = PURGE_LOOP_QUOTA;
                }
-                               
-                      
-               if (p->busy || p->cleaning || p->laundry ||
-                   p->list_req_pending) {
-                       /* page is being acted upon, so don't mess with it */
-                       continue;
-               }
+
                if (p->wire_count) {
                        /* don't discard a wired page */
+                       vm_page_purged_wired++;
+
+               skip_page:
+                       /*
+                        * This page is no longer "purgeable",
+                        * for accounting purposes.
+                        */
+                       assert(vm_page_purgeable_count > 0);
+                       vm_page_purgeable_count--;
                        continue;
                }
 
+               if (p->busy) {
+                       /*
+                        * We can't reclaim a busy page but we can deactivate
+                        * it (if it's not wired) to make sure it gets
+                        * considered by vm_pageout_scan() later.
+                        */
+                       vm_page_deactivate(p);
+                       vm_page_purged_busy++;
+                       goto skip_page;
+               }
+
+               if (p->cleaning || p->laundry || p->list_req_pending) {
+                       /* page is being acted upon, so don't mess with it */
+                       vm_page_purged_others++;
+                       goto skip_page;
+               }
+
                assert(!p->laundry);
                assert(p->object != kernel_object);
 
@@ -5237,6 +5274,12 @@ vm_object_purge(vm_object_t object)
                }
 
                vm_page_free_prepare(p);
+               /*
+                * vm_page_purgeable_count is not updated when freeing
+                * a page from an "empty" object, so do it explicitly here.
+                */
+               assert(vm_page_purgeable_count > 0);
+               vm_page_purgeable_count--;
 
                /* ... and put it on our queue of pages to free */
                assert(p->pageq.next == NULL &&
@@ -5379,11 +5422,11 @@ vm_object_purgable_control(
 
                if (old_state != VM_PURGABLE_NONVOLATILE) {
                        vm_page_lock_queues();
-                       assert(vm_page_purgeable_count >=
-                              object->resident_page_count);
-                       vm_page_purgeable_count -= object->resident_page_count;
-
                        if (old_state==VM_PURGABLE_VOLATILE) {
+                               assert(vm_page_purgeable_count >=
+                                      object->resident_page_count);
+                               vm_page_purgeable_count -= object->resident_page_count;
+
                                assert(object->objq.next != NULL && object->objq.prev != NULL); /* object should be on a queue */
                                purgeable_q_t queue = vm_purgeable_object_remove(object);
                                assert(queue);
@@ -5397,13 +5440,14 @@ vm_object_purgable_control(
 
        case VM_PURGABLE_VOLATILE:
 
-               if ((old_state != VM_PURGABLE_NONVOLATILE) && (old_state != VM_PURGABLE_VOLATILE))
+               if (old_state == VM_PURGABLE_EMPTY &&
+                   object->resident_page_count == 0)
                        break;
                purgeable_q_t queue;
         
                /* find the correct queue */
                if ((*state&VM_PURGABLE_ORDERING_MASK) == VM_PURGABLE_ORDERING_OBSOLETE)
-                       queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
+                       queue = &purgeable_queues[PURGEABLE_Q_TYPE_OBSOLETE];
                else {
                        if ((*state&VM_PURGABLE_BEHAVIOR_MASK) == VM_PURGABLE_BEHAVIOR_FIFO)
                                queue = &purgeable_queues[PURGEABLE_Q_TYPE_FIFO];
@@ -5411,7 +5455,8 @@ vm_object_purgable_control(
                                queue = &purgeable_queues[PURGEABLE_Q_TYPE_LIFO];
                }
         
-               if (old_state == VM_PURGABLE_NONVOLATILE) {
+               if (old_state == VM_PURGABLE_NONVOLATILE ||
+                   old_state == VM_PURGABLE_EMPTY) {
                        /* try to add token... this can fail */
                        vm_page_lock_queues();
 
@@ -5474,10 +5519,12 @@ vm_object_purgable_control(
                                vm_purgeable_token_delete_first(old_queue);
                        }
 
-                       if (old_state==VM_PURGABLE_NONVOLATILE) {
-                               vm_page_purgeable_count += object->resident_page_count;
+                       if (old_state==VM_PURGABLE_NONVOLATILE ||
+                           old_state == VM_PURGABLE_EMPTY) {
                                vm_page_lock_queues();
+                               vm_page_purgeable_count += object->resident_page_count;
                        }
+                       object->purgable = VM_PURGABLE_VOLATILE;
                        (void) vm_object_purge(object);
                        vm_page_unlock_queues();
                }
index a093bf2f4a313038e37bd1b66227741b855c2878..68f60ef83c2dc519ebab7181241c94ede0ee2bc1 100644 (file)
@@ -288,7 +288,8 @@ struct vm_object {
                code_signed:1,          /* pages are signed and should be
                                           validated; the signatures are stored
                                           with the pager */
-               not_in_use:23;          /* for expansion */
+               mapping_in_progress:1,  /* pager being mapped/unmapped */
+               not_in_use:22;          /* for expansion */
 
 #ifdef UPL_DEBUG
        queue_head_t            uplq;           /* List of outstanding upls */
@@ -637,6 +638,7 @@ extern kern_return_t vm_object_range_op(
 #define        VM_OBJECT_EVENT_INITIALIZED             0
 #define        VM_OBJECT_EVENT_PAGER_READY             1
 #define        VM_OBJECT_EVENT_PAGING_IN_PROGRESS      2
+#define        VM_OBJECT_EVENT_MAPPING_IN_PROGRESS     3
 #define        VM_OBJECT_EVENT_LOCK_IN_PROGRESS        4
 #define        VM_OBJECT_EVENT_UNCACHING               5
 #define        VM_OBJECT_EVENT_COPY_CALL               6
@@ -725,6 +727,38 @@ extern kern_return_t vm_object_range_op(
        MACRO_END
 
 
+#define vm_object_mapping_begin(object)                                \
+       MACRO_BEGIN                                                     \
+       vm_object_lock_assert_exclusive((object));                      \
+       assert(! (object)->mapping_in_progress);                        \
+       (object)->mapping_in_progress = TRUE;                           \
+       MACRO_END
+
+#define vm_object_mapping_end(object)                                  \
+       MACRO_BEGIN                                                     \
+       vm_object_lock_assert_exclusive((object));                      \
+       assert((object)->mapping_in_progress);                          \
+       (object)->mapping_in_progress = FALSE;                          \
+       vm_object_wakeup((object),                                      \
+                        VM_OBJECT_EVENT_MAPPING_IN_PROGRESS);          \
+       MACRO_END
+
+#define vm_object_mapping_wait(object, interruptible)                  \
+       MACRO_BEGIN                                                     \
+       vm_object_lock_assert_exclusive((object));                      \
+       while ((object)->mapping_in_progress) {                         \
+               wait_result_t   _wr;                                    \
+                                                                       \
+               _wr = vm_object_sleep((object),                         \
+                                     VM_OBJECT_EVENT_MAPPING_IN_PROGRESS, \
+                                     (interruptible));                 \
+               /*XXX if ((interruptible) && (_wr != THREAD_AWAKENED))*/\
+                       /*XXX break; */                                 \
+       }                                                               \
+       assert(!(object)->mapping_in_progress);                         \
+       MACRO_END
+
+
 
 #define OBJECT_LOCK_SHARED     0
 #define OBJECT_LOCK_EXCLUSIVE  1
index 3319ca213c152c079d192e7460775fdcd865ffcc..89310abe8300f339894bbbf47038cfe134921aba 100644 (file)
@@ -531,6 +531,9 @@ extern void         vm_page_gobble(
                                        vm_page_t      page);
 
 extern void            vm_page_validate_cs(vm_page_t   page);
+extern void            vm_page_validate_cs_mapped(
+       vm_page_t       page,
+       const void      *kaddr);
 
 /*
  *     Functions implemented as macros. m->wanted and m->busy are
index 18ff4907a49e9489ea1305ada1d10122230c3358..9502c60ae530acbc70cbec671724f33d24cba019 100644 (file)
 
 
 #ifndef VM_PAGEOUT_BURST_ACTIVE_THROTTLE   /* maximum iterations of the active queue to move pages to inactive */
-#ifdef CONFIG_EMBEDDED
-#define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  2048
-#else
 #define VM_PAGEOUT_BURST_ACTIVE_THROTTLE  100
 #endif
-#endif
 
 #ifndef VM_PAGEOUT_BURST_INACTIVE_THROTTLE  /* maximum iterations of the inactive queue w/o stealing/cleaning a page */
 #ifdef CONFIG_EMBEDDED
@@ -1014,7 +1010,7 @@ Restart:
                 *      Don't sweep through active queue more than the throttle
                 *      which should be kept relatively low
                 */
-               active_burst_count = vm_pageout_burst_active_throttle;
+               active_burst_count = MIN(vm_pageout_burst_active_throttle, vm_page_active_count);
 
                /*
                 *      Move pages from active to inactive.
@@ -1171,9 +1167,23 @@ done_moving_active_pages:
                         * inactive target still not met... keep going
                         * until we get the queues balanced
                         */
+
+                       /*
+                        *      Recalculate vm_page_inactivate_target.
+                        */
+                       vm_page_inactive_target = VM_PAGE_INACTIVE_TARGET(vm_page_active_count +
+                                                                         vm_page_inactive_count +
+                                                                         vm_page_speculative_count);
+
+#ifndef        CONFIG_EMBEDDED
+                       /*
+                        * XXX: if no active pages can be reclaimed, pageout scan can be stuck trying 
+                        *      to balance the queues
+                        */
                        if (((vm_page_inactive_count + vm_page_speculative_count) < vm_page_inactive_target) &&
                            !queue_empty(&vm_page_queue_active))
                                continue;
+#endif
 
                        mutex_lock(&vm_page_queue_free_lock);
 
@@ -1257,7 +1267,10 @@ done_moving_active_pages:
                        msecs = vm_pageout_empty_wait;
                        goto vm_pageout_scan_delay;
 
-               } else if (inactive_burst_count >= vm_pageout_burst_inactive_throttle) {
+               } else if (inactive_burst_count >=
+                          MIN(vm_pageout_burst_inactive_throttle,
+                              (vm_page_inactive_count +
+                               vm_page_speculative_count))) {
                        vm_pageout_scan_burst_throttle++;
                        msecs = vm_pageout_burst_wait;
                        goto vm_pageout_scan_delay;
@@ -3307,15 +3320,17 @@ check_busy:
                        upl->highest_page = dst_page->phys_page;
                if (user_page_list) {
                        user_page_list[entry].phys_addr = dst_page->phys_page;
-                       user_page_list[entry].dirty     = dst_page->dirty;
                        user_page_list[entry].pageout   = dst_page->pageout;
                        user_page_list[entry].absent    = dst_page->absent;
+                       user_page_list[entry].dirty     = dst_page->dirty;
                        user_page_list[entry].precious  = dst_page->precious;
-
+                       user_page_list[entry].device    = FALSE;
                        if (dst_page->clustered == TRUE)
                                user_page_list[entry].speculative = dst_page->speculative;
                        else
                                user_page_list[entry].speculative = FALSE;
+                       user_page_list[entry].cs_validated = dst_page->cs_validated;
+                       user_page_list[entry].cs_tainted = dst_page->cs_tainted;
                }
                /*
                 * if UPL_RET_ONLY_ABSENT is set, then
@@ -4003,6 +4018,23 @@ upl_commit_range(
        }
        delayed_unlock = 1;
 
+       if (shadow_object->code_signed) {
+               /*
+                * CODE SIGNING:
+                * If the object is code-signed, do not let this UPL tell
+                * us if the pages are valid or not.  Let the pages be
+                * validated by VM the normal way (when they get mapped or
+                * copied).
+                */
+               flags &= ~UPL_COMMIT_CS_VALIDATED;
+       }
+       if (! page_list) {
+               /*
+                * No page list to get the code-signing info from !?
+                */
+               flags &= ~UPL_COMMIT_CS_VALIDATED;
+       }
+
        while (xfer_size) {
                vm_page_t       t, m;
 
@@ -4030,60 +4062,34 @@ upl_commit_range(
                                        m = vm_page_lookup(shadow_object, target_offset + object->shadow_offset);
                        }
                }
-               if (m != VM_PAGE_NULL) {
-
-                       clear_refmod = 0;
+               if (m == VM_PAGE_NULL) {
+                       goto commit_next_page;
+               }
 
-                       if (upl->flags & UPL_IO_WIRE) {
+               clear_refmod = 0;
 
-                               vm_page_unwire(m);
-
-                               if (page_list)
-                                       page_list[entry].phys_addr = 0;
+               if (flags & UPL_COMMIT_CS_VALIDATED) {
+                       /*
+                        * CODE SIGNING:
+                        * Set the code signing bits according to
+                        * what the UPL says they should be.
+                        */
+                       m->cs_validated = page_list[entry].cs_validated;
+                       m->cs_tainted = page_list[entry].cs_tainted;
+               }
+               if (upl->flags & UPL_IO_WIRE) {
 
-                               if (flags & UPL_COMMIT_SET_DIRTY)
-                                       m->dirty = TRUE;
-                               else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
-                                       m->dirty = FALSE;
-                                       if (m->cs_validated && !m->cs_tainted) {
-                                               /*
-                                                * CODE SIGNING:
-                                                * This page is no longer dirty
-                                                * but could have been modified,
-                                                * so it will need to be
-                                                * re-validated.
-                                                */
-                                               m->cs_validated = FALSE;
-                                               vm_cs_validated_resets++;
-                                       }
-                                       clear_refmod |= VM_MEM_MODIFIED;
-                               }
-                               if (flags & UPL_COMMIT_INACTIVATE)
-                                       vm_page_deactivate(m);
+                       vm_page_unwire(m);
 
-                               if (clear_refmod)
-                                       pmap_clear_refmod(m->phys_page, clear_refmod);
+                       if (page_list)
+                               page_list[entry].phys_addr = 0;
 
-                               if (flags & UPL_COMMIT_ALLOW_ACCESS) {
-                                       /*
-                                        * We blocked access to the pages in this UPL.
-                                        * Clear the "busy" bit and wake up any waiter
-                                        * for this page.
-                                        */
-                                       PAGE_WAKEUP_DONE(m);
-                               }
-                               goto commit_next_page;
-                       }
-                       /*
-                        * make sure to clear the hardware
-                        * modify or reference bits before
-                        * releasing the BUSY bit on this page
-                        * otherwise we risk losing a legitimate
-                        * change of state
-                        */
-                       if (flags & UPL_COMMIT_CLEAR_DIRTY) {
-                               m->dirty = FALSE;
-                               if (m->cs_validated && !m->cs_tainted) {
+                       if (flags & UPL_COMMIT_SET_DIRTY)
+                               m->dirty = TRUE;
+                       else if (flags & UPL_COMMIT_CLEAR_DIRTY) {
+                               m->dirty = FALSE;
+                               if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+                                   m->cs_validated && !m->cs_tainted) {
                                        /*
                                         * CODE SIGNING:
                                         * This page is no longer dirty
@@ -4096,109 +4102,89 @@ upl_commit_range(
                                }
                                clear_refmod |= VM_MEM_MODIFIED;
                        }
-                       if (clear_refmod)
-                               pmap_clear_refmod(m->phys_page, clear_refmod);
-
-                       if (page_list) {
-                               upl_page_info_t *p;
+                       
+                       if (flags & UPL_COMMIT_INACTIVATE)
+                               vm_page_deactivate(m);
 
-                               p = &(page_list[entry]);
+                       if (clear_refmod)
+                               pmap_clear_refmod(m->phys_page, clear_refmod);
 
-                               if (p->phys_addr && p->pageout && !m->pageout) {
-                                       m->busy = TRUE;
-                                       m->pageout = TRUE;
-                                       vm_page_wire(m);
-                               } else if (p->phys_addr &&
-                                          !p->pageout && m->pageout &&
-                                          !m->dump_cleaning) {
-                                       m->pageout = FALSE;
-                                       m->absent = FALSE;
-                                       m->overwriting = FALSE;
-                                       vm_page_unwire(m);
+                       if (flags & UPL_COMMIT_ALLOW_ACCESS) {
+                               /*
+                                * We blocked access to the pages in this UPL.
+                                * Clear the "busy" bit and wake up any waiter
+                                * for this page.
+                                */
+                               PAGE_WAKEUP_DONE(m);
+                       }
+                       goto commit_next_page;
+               }
+               /*
+                * make sure to clear the hardware
+                * modify or reference bits before
+                * releasing the BUSY bit on this page
+                * otherwise we risk losing a legitimate
+                * change of state
+                */
+               if (flags & UPL_COMMIT_CLEAR_DIRTY) {
+                       m->dirty = FALSE;
 
-                                       PAGE_WAKEUP_DONE(m);
-                               }
-                               page_list[entry].phys_addr = 0;
+                       if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+                           m->cs_validated && !m->cs_tainted) {
+                               /*
+                                * CODE SIGNING:
+                                * This page is no longer dirty
+                                * but could have been modified,
+                                * so it will need to be
+                                * re-validated.
+                                */
+                               m->cs_validated = FALSE;
+#if DEVELOPMENT || DEBUG
+                               vm_cs_validated_resets++;
+#endif
                        }
-                       m->dump_cleaning = FALSE;
+                       clear_refmod |= VM_MEM_MODIFIED;
+               }
+               if (clear_refmod)
+                       pmap_clear_refmod(m->phys_page, clear_refmod);
 
-                       if (m->laundry)
-                               vm_pageout_throttle_up(m);
+               if (page_list) {
+                       upl_page_info_t *p;
 
-                       if (m->pageout) {
-                               m->cleaning = FALSE;
-                               m->encrypted_cleaning = FALSE;
+                       p = &(page_list[entry]);
+                       
+                       if (p->phys_addr && p->pageout && !m->pageout) {
+                               m->busy = TRUE;
+                               m->pageout = TRUE;
+                               vm_page_wire(m);
+                       } else if (p->phys_addr &&
+                                  !p->pageout && m->pageout &&
+                                  !m->dump_cleaning) {
                                m->pageout = FALSE;
-#if MACH_CLUSTER_STATS
-                               if (m->wanted) vm_pageout_target_collisions++;
-#endif
-                               m->dirty = FALSE;
-                               if (m->cs_validated && !m->cs_tainted) {
-                                       /*
-                                        * CODE SIGNING:
-                                        * This page is no longer dirty
-                                        * but could have been modified,
-                                        * so it will need to be
-                                        * re-validated.
-                                        */
-                                       m->cs_validated = FALSE;
-                                       vm_cs_validated_resets++;
-                               }
-
-                               if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
-                                       m->dirty = TRUE;
-
-                               if (m->dirty) {
-                                      /*
-                                       * page was re-dirtied after we started
-                                       * the pageout... reactivate it since 
-                                       * we don't know whether the on-disk
-                                       * copy matches what is now in memory
-                                       */
-                                       vm_page_unwire(m);
-
-                                       if (upl->flags & UPL_PAGEOUT) {
-                                               CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
-                                               VM_STAT_INCR(reactivations);
-                                               DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
-                                       }
-                                       PAGE_WAKEUP_DONE(m);
-                               } else {
-                                       /*
-                                        * page has been successfully cleaned
-                                        * go ahead and free it for other use
-                                        */
+                               m->absent = FALSE;
+                               m->overwriting = FALSE;
+                               vm_page_unwire(m);
+                               
+                               PAGE_WAKEUP_DONE(m);
+                       }
+                       page_list[entry].phys_addr = 0;
+               }
+               m->dump_cleaning = FALSE;
 
-                                       if (m->object->internal) {
-                                               DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
-                                       } else {
-                                               DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
-                                       }
+               if (m->laundry)
+                       vm_pageout_throttle_up(m);
 
-                                       vm_page_free(m);
-                                       if (upl->flags & UPL_PAGEOUT) {
-                                               CLUSTER_STAT(vm_pageout_target_page_freed++;)
-
-                                               if (page_list[entry].dirty) {
-                                                       VM_STAT_INCR(pageouts);
-                                                       DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
-                                                       pgpgout_count++;
-                                               }
-                                       }
-                               }
-                               goto commit_next_page;
-                       }
+               if (m->pageout) {
+                       m->cleaning = FALSE;
+                       m->encrypted_cleaning = FALSE;
+                       m->pageout = FALSE;
 #if MACH_CLUSTER_STATS
-                       if (m->wpmapped)
-                               m->dirty = pmap_is_modified(m->phys_page);
-
-                       if (m->dirty)   vm_pageout_cluster_dirtied++;
-                       else            vm_pageout_cluster_cleaned++;
-                       if (m->wanted)  vm_pageout_cluster_collisions++;
+                       if (m->wanted) vm_pageout_target_collisions++;
 #endif
                        m->dirty = FALSE;
-                       if (m->cs_validated && !m->cs_tainted) {
+                       
+                       if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+                           m->cs_validated && !m->cs_tainted) {
                                /*
                                 * CODE SIGNING:
                                 * This page is no longer dirty
@@ -4207,67 +4193,138 @@ upl_commit_range(
                                 * re-validated.
                                 */
                                m->cs_validated = FALSE;
+#if DEVELOPMENT || DEBUG
                                vm_cs_validated_resets++;
+#endif
                        }
-
-                       if ((m->busy) && (m->cleaning)) {
-                               /*
-                                * the request_page_list case
+                       
+                       if (m->pmapped && (pmap_disconnect(m->phys_page) & VM_MEM_MODIFIED))
+                               m->dirty = TRUE;
+                       
+                       if (m->dirty) {
+                               /*
+                                * page was re-dirtied after we started
+                                * the pageout... reactivate it since 
+                                * we don't know whether the on-disk
+                                * copy matches what is now in memory
                                 */
-                               m->absent = FALSE;
-                               m->overwriting = FALSE;
-                               m->busy = FALSE;
-                       } else if (m->overwriting) {
-                               /*
-                                * alternate request page list, write to 
-                                * page_list case.  Occurs when the original
-                                * page was wired at the time of the list
-                                * request
+                               vm_page_unwire(m);
+                               
+                               if (upl->flags & UPL_PAGEOUT) {
+                                       CLUSTER_STAT(vm_pageout_target_page_dirtied++;)
+                                       VM_STAT_INCR(reactivations);
+                                       DTRACE_VM2(pgrec, int, 1, (uint64_t *), NULL);
+                               }
+                               PAGE_WAKEUP_DONE(m);
+                       } else {
+                               /*
+                                * page has been successfully cleaned
+                                * go ahead and free it for other use
                                 */
-                               assert(m->wire_count != 0);
-                               vm_page_unwire(m);/* reactivates */
-                               m->overwriting = FALSE;
+                               
+                               if (m->object->internal) {
+                                       DTRACE_VM2(anonpgout, int, 1, (uint64_t *), NULL);
+                               } else {
+                                       DTRACE_VM2(fspgout, int, 1, (uint64_t *), NULL);
+                               }
+                               
+                               vm_page_free(m);
+                               
+                               if (upl->flags & UPL_PAGEOUT) {
+                                       CLUSTER_STAT(vm_pageout_target_page_freed++;)
+                                       
+                                       if (page_list[entry].dirty) {
+                                               VM_STAT_INCR(pageouts);
+                                               DTRACE_VM2(pgout, int, 1, (uint64_t *), NULL);
+                                               pgpgout_count++;
+                                       }
+                               }
                        }
-                       m->cleaning = FALSE;
-                       m->encrypted_cleaning = FALSE;
+                       goto commit_next_page;
+               }
+#if MACH_CLUSTER_STATS
+               if (m->wpmapped)
+                       m->dirty = pmap_is_modified(m->phys_page);
+
+               if (m->dirty)   vm_pageout_cluster_dirtied++;
+               else            vm_pageout_cluster_cleaned++;
+               if (m->wanted)  vm_pageout_cluster_collisions++;
+#endif
+               m->dirty = FALSE;
 
+               if (! (flags & UPL_COMMIT_CS_VALIDATED) &&
+                   m->cs_validated && !m->cs_tainted) {
                        /*
-                        * It is a part of the semantic of COPYOUT_FROM
-                        * UPLs that a commit implies cache sync
-                        * between the vm page and the backing store
-                        * this can be used to strip the precious bit
-                        * as well as clean
+                        * CODE SIGNING:
+                        * This page is no longer dirty
+                        * but could have been modified,
+                        * so it will need to be
+                        * re-validated.
                         */
-                       if (upl->flags & UPL_PAGE_SYNC_DONE)
-                               m->precious = FALSE;
-
-                       if (flags & UPL_COMMIT_SET_DIRTY)
-                               m->dirty = TRUE;
+                       m->cs_validated = FALSE;
+#if DEVELOPMENT || DEBUG
+                       vm_cs_validated_resets++;
+#endif
+               }
 
-                       if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
+               if ((m->busy) && (m->cleaning)) {
+                       /*
+                        * the request_page_list case
+                        */
+                       m->absent = FALSE;
+                       m->overwriting = FALSE;
+                       m->busy = FALSE;
+               } else if (m->overwriting) {
+                       /*
+                        * alternate request page list, write to 
+                        * page_list case.  Occurs when the original
+                        * page was wired at the time of the list
+                        * request
+                        */
+                       assert(m->wire_count != 0);
+                       vm_page_unwire(m);/* reactivates */
+                       m->overwriting = FALSE;
+               }
+               m->cleaning = FALSE;
+               m->encrypted_cleaning = FALSE;
+               
+               /*
+                * It is a part of the semantic of COPYOUT_FROM
+                * UPLs that a commit implies cache sync
+                * between the vm page and the backing store
+                * this can be used to strip the precious bit
+                * as well as clean
+                */
+               if (upl->flags & UPL_PAGE_SYNC_DONE)
+                       m->precious = FALSE;
+               
+               if (flags & UPL_COMMIT_SET_DIRTY)
+                       m->dirty = TRUE;
+               
+               if ((flags & UPL_COMMIT_INACTIVATE) && !m->clustered && !m->speculative) {
+                       vm_page_deactivate(m);
+               } else if (!m->active && !m->inactive && !m->speculative) {
+                       
+                       if (m->clustered)
+                               vm_page_speculate(m, TRUE);
+                       else if (m->reference)
+                               vm_page_activate(m);
+                       else
                                vm_page_deactivate(m);
-                       } else if (!m->active && !m->inactive && !m->speculative) {
-
-                               if (m->clustered)
-                                       vm_page_speculate(m, TRUE);
-                               else if (m->reference)
-                                       vm_page_activate(m);
-                               else
-                                       vm_page_deactivate(m);
-                       }
-                       if (flags & UPL_COMMIT_ALLOW_ACCESS) {
-                               /*
-                                * We blocked access to the pages in this URL.
-                                * Clear the "busy" bit on this page before we
-                                * wake up any waiter.
-                                */
-                               m->busy = FALSE;
-                       }
+               }
+               if (flags & UPL_COMMIT_ALLOW_ACCESS) {
                        /*
-                        * Wakeup any thread waiting for the page to be un-cleaning.
+                        * We blocked access to the pages in this URL.
+                        * Clear the "busy" bit on this page before we
+                        * wake up any waiter.
                         */
-                       PAGE_WAKEUP(m);
+                       m->busy = FALSE;
                }
+               /*
+                * Wakeup any thread waiting for the page to be un-cleaning.
+                */
+               PAGE_WAKEUP(m);
+
 commit_next_page:
                target_offset += PAGE_SIZE_64;
                xfer_size -= PAGE_SIZE;
@@ -5013,15 +5070,17 @@ vm_object_iopl_request(
 
                if (user_page_list) {
                        user_page_list[entry].phys_addr = dst_page->phys_page;
-                       user_page_list[entry].dirty     = dst_page->dirty;
                        user_page_list[entry].pageout   = dst_page->pageout;
                        user_page_list[entry].absent    = dst_page->absent;
+                       user_page_list[entry].dirty     = dst_page->dirty;
                        user_page_list[entry].precious  = dst_page->precious;
-
+                       user_page_list[entry].device    = FALSE;
                        if (dst_page->clustered == TRUE)
                                user_page_list[entry].speculative = dst_page->speculative;
                        else
                                user_page_list[entry].speculative = FALSE;
+                       user_page_list[entry].cs_validated = dst_page->cs_validated;
+                       user_page_list[entry].cs_tainted = dst_page->cs_tainted;
                }
                /*
                 * someone is explicitly grabbing this page...
@@ -5274,6 +5333,7 @@ vm_paging_map_object(
        vm_object_t             object,
        vm_object_offset_t      offset,
        vm_map_size_t           *size,
+       vm_prot_t               protection,
        boolean_t               can_unlock_object)
 {
        kern_return_t           kr;
@@ -5282,7 +5342,7 @@ vm_paging_map_object(
        vm_object_offset_t      object_offset;
        int                     i;
 
-
+       
        if (page != VM_PAGE_NULL && *size == PAGE_SIZE) {
                assert(page->busy);
                /*
@@ -5356,7 +5416,7 @@ vm_paging_map_object(
                        PMAP_ENTER(kernel_pmap,
                                   page_map_offset,
                                   page,
-                                  VM_PROT_DEFAULT,
+                                  protection,
                                   ((int) page->object->wimg_bits &
                                    VM_WIMG_MASK),
                                   TRUE);
@@ -5400,7 +5460,7 @@ vm_paging_map_object(
                          object,
                          object_offset,
                          FALSE,
-                         VM_PROT_DEFAULT,
+                         protection,
                          VM_PROT_ALL,
                          VM_INHERIT_NONE);
        if (kr != KERN_SUCCESS) {
@@ -5445,14 +5505,13 @@ vm_paging_map_object(
                        pmap_sync_page_data_phys(page->phys_page);
                }
                page->pmapped = TRUE;
-               page->wpmapped = TRUE;
                cache_attr = ((unsigned int) object->wimg_bits) & VM_WIMG_MASK;
 
                //assert(pmap_verify_free(page->phys_page));
                PMAP_ENTER(kernel_pmap,
                           *address + page_map_offset,
                           page,
-                          VM_PROT_DEFAULT,
+                          protection,
                           cache_attr,
                           TRUE);
        }
@@ -5689,6 +5748,7 @@ vm_page_encrypt(
                                          page->object,
                                          page->offset,
                                          &kernel_mapping_size,
+                                         VM_PROT_READ | VM_PROT_WRITE,
                                          FALSE);
                if (kr != KERN_SUCCESS) {
                        panic("vm_page_encrypt: "
@@ -5813,6 +5873,7 @@ vm_page_decrypt(
                                          page->object,
                                          page->offset,
                                          &kernel_mapping_size,
+                                         VM_PROT_READ | VM_PROT_WRITE,
                                          FALSE);
                if (kr != KERN_SUCCESS) {
                        panic("vm_page_decrypt: "
index c7ab4ca8fb8061378cb94293d417ce4a12db4b06..d5adb8b0fbad2a4736a502dab1c34d86a783fb6c 100644 (file)
@@ -236,6 +236,7 @@ extern kern_return_t vm_paging_map_object(
        vm_object_t             object,
        vm_object_offset_t      offset,
        vm_map_size_t           *size,
+       vm_prot_t               protection,
        boolean_t               can_unlock_object);
 extern void vm_paging_unmap_object(
        vm_object_t             object,
index f1d0f65aff357f59f59e6ae957c620f650d0148a..e9fdc6ef332fc37f78f34107f64879da9c53a7cf 100644 (file)
@@ -144,15 +144,17 @@ extern mach_vm_offset_t mach_get_vm_end(vm_map_t);
 extern vm_offset_t get_vm_start(vm_map_t);
 extern vm_offset_t get_vm_end(vm_map_t);
 
-#ifdef __i386__
+#if CONFIG_CODE_DECRYPTION
+struct pager_crypt_info;
 extern kern_return_t vm_map_apple_protected(
-       vm_map_t        map,
-       vm_map_offset_t start,
-       vm_map_offset_t end);
+                                           vm_map_t    map,
+                                           vm_map_offset_t     start,
+                                           vm_map_offset_t     end,
+                                           struct pager_crypt_info *crypt_info);
 extern void apple_protect_pager_bootstrap(void);
-extern memory_object_t apple_protect_pager_setup(vm_object_t backing_object);
-extern void apple_protect_pager_map(memory_object_t mem_obj);
-#endif /* __i386__ */
+extern memory_object_t apple_protect_pager_setup(vm_object_t backing_object,
+                                                struct pager_crypt_info *crypt_info);
+#endif /* CONFIG_CODE_DECRYPTION */
 
 
 /*
@@ -238,7 +240,10 @@ extern kern_return_t vnode_pager_synchronize(
        memory_object_offset_t  offset,
        vm_size_t               length,
        vm_sync_t               sync_flags);
-extern kern_return_t vnode_pager_unmap(
+extern kern_return_t vnode_pager_map(
+       memory_object_t         mem_obj,
+       vm_prot_t               prot);
+extern kern_return_t vnode_pager_last_unmap(
        memory_object_t         mem_obj);
 extern void vnode_pager_deallocate(
        memory_object_t);
@@ -248,6 +253,9 @@ extern void vnode_pager_vrele(
        struct vnode *vp);
 extern void vnode_pager_release_from_cache(
        int     *);
+extern int  ubc_map(
+       struct vnode *vp,
+       int flags);
 extern void ubc_unmap(
        struct vnode *vp);
 
@@ -282,7 +290,9 @@ extern kern_return_t dp_memory_object_synchronize(memory_object_t,
                                                  memory_object_offset_t,
                                                  vm_size_t,
                                                  vm_sync_t);
-extern kern_return_t dp_memory_object_unmap(memory_object_t);
+extern kern_return_t dp_memory_object_map(memory_object_t,
+                                         vm_prot_t);
+extern kern_return_t dp_memory_object_last_unmap(memory_object_t);
 #endif /* _memory_object_server_ */
 #ifndef _memory_object_default_server_
 extern kern_return_t default_pager_memory_object_create(
@@ -321,7 +331,8 @@ extern kern_return_t device_pager_synchronize(memory_object_t,
                                              memory_object_offset_t,
                                              vm_size_t,
                                              vm_sync_t);
-extern kern_return_t device_pager_unmap(memory_object_t);
+extern kern_return_t device_pager_map(memory_object_t, vm_prot_t);
+extern kern_return_t device_pager_last_unmap(memory_object_t);
 extern kern_return_t device_pager_populate_object(
        memory_object_t         device,
        memory_object_offset_t  offset,
@@ -347,7 +358,7 @@ extern int macx_swapinfo(
        boolean_t               *encrypted_p);
 
 extern void log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot);
-extern int cs_invalid_page(void);
+extern int cs_invalid_page(addr64_t vaddr);
 extern boolean_t cs_validate_page(void *blobs,
                                  memory_object_offset_t offset, 
                                  const void *data,
index ab0f00a42360c7170e5d4eba0744158c910813a8..bf80947abfc47ce971a9e9b7ae64d797ed49e7a6 100644 (file)
@@ -88,9 +88,14 @@ vm_purgeable_token_check_queue(purgeable_q_t queue)
        if (unripe)
                assert(queue->token_q_unripe == unripe);
        assert(token_cnt == queue->debug_count_tokens);
-       our_inactive_count = page_cnt + queue->new_pages + token_new_pagecount;
-       assert(our_inactive_count >= 0);
-       assert((uint32_t) our_inactive_count == vm_page_inactive_count);
+       
+       /* obsolete queue doesn't maintain token counts */
+       if(queue->type != PURGEABLE_Q_TYPE_OBSOLETE)
+       {
+               our_inactive_count = page_cnt + queue->new_pages + token_new_pagecount;
+               assert(our_inactive_count >= 0);
+               assert((uint32_t) our_inactive_count == vm_page_inactive_count);
+       }
 }
 #endif
 
@@ -515,11 +520,12 @@ vm_purgeable_object_purge_one(void)
        enum purgeable_q_type i;
        int             group;
        vm_object_t     object = 0;
+       purgeable_q_t   queue, queue2;
 
        mutex_lock(&vm_purgeable_queue_lock);
        /* Cycle through all queues */
        for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) {
-               purgeable_q_t   queue = &purgeable_queues[i];
+               queue = &purgeable_queues[i];
 
                /*
                 * Are there any ripe tokens on this queue? If yes, we'll
@@ -536,17 +542,21 @@ vm_purgeable_object_purge_one(void)
                 * lock, remove a token and then purge the object.
                 */
                for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
-                       if (!queue_empty(&queue->objq[group]) && (object = vm_purgeable_object_find_and_lock(queue, group))) {
+                       if (!queue_empty(&queue->objq[group]) && 
+                           (object = vm_purgeable_object_find_and_lock(queue, group))) {
                                mutex_unlock(&vm_purgeable_queue_lock);
                                vm_purgeable_token_choose_and_delete_ripe(queue, 0);
                                goto purge_now;
-                       } else {
-                               assert(i != PURGEABLE_Q_TYPE_OBSOLETE); /* obsolete queue must
-                                                                        * have all objects in
-                                                                        * group 0 */
-                               purgeable_q_t   queue2 = &purgeable_queues[i != PURGEABLE_Q_TYPE_FIFO ? PURGEABLE_Q_TYPE_FIFO : PURGEABLE_Q_TYPE_LIFO];
-
-                               if (!queue_empty(&queue2->objq[group]) && (object = vm_purgeable_object_find_and_lock(queue2, group))) {
+                       }
+                       if (i != PURGEABLE_Q_TYPE_OBSOLETE) { 
+                               /* This is the token migration case, and it works between
+                                * FIFO and LIFO only */
+                               queue2 = &purgeable_queues[i != PURGEABLE_Q_TYPE_FIFO ? 
+                                                          PURGEABLE_Q_TYPE_FIFO : 
+                                                          PURGEABLE_Q_TYPE_LIFO];
+
+                               if (!queue_empty(&queue2->objq[group]) && 
+                                   (object = vm_purgeable_object_find_and_lock(queue2, group))) {
                                        mutex_unlock(&vm_purgeable_queue_lock);
                                        vm_purgeable_token_choose_and_delete_ripe(queue2, queue);
                                        goto purge_now;
@@ -611,7 +621,7 @@ vm_purgeable_object_remove(vm_object_t object)
        int             group;
 
        mutex_lock(&vm_purgeable_queue_lock);
-       for (i = PURGEABLE_Q_TYPE_FIFO; i < PURGEABLE_Q_TYPE_MAX; i++) {
+       for (i = PURGEABLE_Q_TYPE_OBSOLETE; i < PURGEABLE_Q_TYPE_MAX; i++) {
                purgeable_q_t   queue = &purgeable_queues[i];
                for (group = 0; group < NUM_VOLATILE_GROUPS; group++) {
                        vm_object_t     o;
index b92e35ae0abc94b7199d4f442890548be84bc082..7e7520ce18da7ff7ea604ffb9276e783cfc4246a 100644 (file)
@@ -340,7 +340,7 @@ vm_page_set_colors( void )
 {
        unsigned int    n, override;
        
-       if ( PE_parse_boot_arg("colors", &override) )           /* colors specified as a boot-arg? */
+       if ( PE_parse_boot_argn("colors", &override, sizeof (override)) )               /* colors specified as a boot-arg? */
                n = override;   
        else if ( vm_cache_geometry_colors )                    /* do we know what the cache geometry is? */
                n = vm_cache_geometry_colors;
@@ -684,7 +684,7 @@ pmap_startup(
         * Check if we want to initialize pages to a known value
         */
        fill = 0;                                                               /* Assume no fill */
-       if (PE_parse_boot_arg("fill", &fillval)) fill = 1;                      /* Set fill */
+       if (PE_parse_boot_argn("fill", &fillval, sizeof (fillval))) fill = 1;                   /* Set fill */
        
 
        /*
@@ -957,13 +957,19 @@ vm_page_insert_internal(
 
        object->resident_page_count++;
 
-       if (object->purgable == VM_PURGABLE_VOLATILE ||
-           object->purgable == VM_PURGABLE_EMPTY) {
+       if (object->purgable == VM_PURGABLE_VOLATILE) {
                if (queues_lock_held == FALSE)
                        vm_page_lockspin_queues();
 
                vm_page_purgeable_count++;
 
+               if (queues_lock_held == FALSE)
+                       vm_page_unlock_queues();
+       } else if (object->purgable == VM_PURGABLE_EMPTY &&
+                  mem->throttled) {
+               if (queues_lock_held == FALSE)
+                       vm_page_lock_queues();
+               vm_page_deactivate(mem);
                if (queues_lock_held == FALSE)
                        vm_page_unlock_queues();
        }
@@ -1053,8 +1059,7 @@ vm_page_replace(
                found_m->offset = (vm_object_offset_t) -1;
                object->resident_page_count--;
 
-               if (object->purgable == VM_PURGABLE_VOLATILE ||
-                   object->purgable == VM_PURGABLE_EMPTY) {
+               if (object->purgable == VM_PURGABLE_VOLATILE) {
                        assert(vm_page_purgeable_count > 0);
                        vm_page_purgeable_count--;
                }
@@ -1079,9 +1084,12 @@ vm_page_replace(
 
        object->resident_page_count++;
 
-       if (object->purgable == VM_PURGABLE_VOLATILE ||
-           object->purgable == VM_PURGABLE_EMPTY) {
+       if (object->purgable == VM_PURGABLE_VOLATILE) {
                vm_page_purgeable_count++;
+       } else if (object->purgable == VM_PURGABLE_EMPTY) {
+               if (mem->throttled) {
+                       vm_page_deactivate(mem);
+               }
        }
 }
 
@@ -1151,8 +1159,7 @@ vm_page_remove(
 
        mem->object->resident_page_count--;
 
-       if (mem->object->purgable == VM_PURGABLE_VOLATILE ||
-           mem->object->purgable == VM_PURGABLE_EMPTY) {
+       if (mem->object->purgable == VM_PURGABLE_VOLATILE) {
                assert(vm_page_purgeable_count > 0);
                vm_page_purgeable_count--;
        }
@@ -2306,6 +2313,24 @@ vm_page_wire(
                        mem->zero_fill = FALSE;
                        OSAddAtomic(-1, (SInt32 *)&vm_zf_count);
                }
+#if CONFIG_EMBEDDED
+               {
+               int     percent_avail;
+
+               /*
+                * Decide if we need to poke the memorystatus notification thread.
+                */
+               percent_avail = 
+                       (vm_page_active_count + vm_page_inactive_count + 
+                        vm_page_speculative_count + vm_page_free_count +
+                        (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
+                       atop_64(max_mem);
+               if (percent_avail <= (kern_memorystatus_level - 5)) {
+                       kern_memorystatus_level = percent_avail;
+                       thread_wakeup((event_t)&kern_memorystatus_wakeup);
+               }
+               }
+#endif
                /* 
                 * ENCRYPTED SWAP:
                 * The page could be encrypted, but
@@ -2374,20 +2399,29 @@ vm_page_unwire(
                assert(!mem->laundry);
                assert(mem->object != kernel_object);
                assert(mem->pageq.next == NULL && mem->pageq.prev == NULL);
-               if (!IP_VALID(memory_manager_default) && 
-                       mem->dirty && mem->object->internal && 
-                       (mem->object->purgable == VM_PURGABLE_DENY ||
-                        mem->object->purgable == VM_PURGABLE_NONVOLATILE ||
-                        mem->object->purgable == VM_PURGABLE_VOLATILE)) {
-                       queue_enter(&vm_page_queue_throttled, mem, vm_page_t, pageq);
-                       vm_page_throttled_count++;
-                       mem->throttled = TRUE;
+               if (mem->object->purgable == VM_PURGABLE_EMPTY) {
+                       vm_page_deactivate(mem);
                } else {
-                       queue_enter(&vm_page_queue_active, mem, vm_page_t, pageq);
-                       vm_page_active_count++;
-                       mem->active = TRUE;
+                       vm_page_activate(mem);
                }
-               mem->reference = TRUE;
+#if CONFIG_EMBEDDED
+               {
+               int     percent_avail;
+
+               /*
+                * Decide if we need to poke the memorystatus notification thread.
+                */
+               percent_avail = 
+                       (vm_page_active_count + vm_page_inactive_count + 
+                        vm_page_speculative_count + vm_page_free_count +
+                        (IP_VALID(memory_manager_default)?0:vm_page_purgeable_count) ) * 100 /
+                       atop_64(max_mem);
+               if (percent_avail >= (kern_memorystatus_level + 5)) {
+                       kern_memorystatus_level = percent_avail;
+                       thread_wakeup((event_t)&kern_memorystatus_wakeup);
+               }
+               }
+#endif
        }
 }
 
@@ -2992,8 +3026,10 @@ vm_page_find_contiguous(
        unsigned int    page_idx, start_idx;
        int             free_considered, free_available;
        int             substitute_needed;
-#if MACH_ASSERT
+#if DEBUG
        uint32_t        tv_start_sec, tv_start_usec, tv_end_sec, tv_end_usec;
+#endif
+#if MACH_ASSERT
        int             yielded = 0;
        int             dumped_run = 0;
        int             stolen_pages = 0;
@@ -3004,7 +3040,8 @@ vm_page_find_contiguous(
 
 #if MACH_ASSERT
        vm_page_verify_free_lists();
-
+#endif
+#if DEBUG
        clock_get_system_microtime(&tv_start_sec, &tv_start_usec);
 #endif
        vm_page_lock_queues();
@@ -3373,7 +3410,7 @@ retry:
 done_scanning:
        vm_page_unlock_queues();
 
-#if MACH_ASSERT
+#if DEBUG
        clock_get_system_microtime(&tv_end_sec, &tv_end_usec);
 
        tv_end_sec -= tv_start_sec;
@@ -3389,6 +3426,8 @@ done_scanning:
        printf("vm_find_page_contiguous(num=%d,low=%d): found %d pages in %d.%06ds...  scanned %d pages...  yielded %d times...  dumped run %d times... stole %d pages\n",
               contig_pages, max_pnum, npages, tv_end_sec, tv_end_usec, page_idx, yielded, dumped_run, stolen_pages);
 
+#endif
+#if MACH_ASSERT
        vm_page_verify_free_lists();
 #endif
        return m;
index 0ed0a84b290a794ba019c8944d45ef57c7bcd800..e551e62bd9909b2f12c7f4366d81745de1092604 100644 (file)
@@ -2870,6 +2870,10 @@ kernel_upl_commit_range(
        if (flags & UPL_COMMIT_FREE_ON_EMPTY)
                flags |= UPL_COMMIT_NOTIFY_EMPTY;
 
+       if (flags & UPL_COMMIT_KERNEL_ONLY_FLAGS) {
+               return KERN_INVALID_ARGUMENT;
+       }
+
        kr = upl_commit_range(upl, offset, size, flags, pl, count, &finished);
 
        if ((flags & UPL_COMMIT_NOTIFY_EMPTY) && finished)
index 32be95e274087ffebb18e32ecacd0d9682fb25a5..43ab61a9657461724091c1a0d8c8290c4fc12dc1 100644 (file)
@@ -39,11 +39,11 @@ int32_t gPESerialBaud = -1;
 
 void pe_init_debug(void)
 {
-  if (!PE_parse_boot_arg("debug", &DEBUGFlag))
+  if (!PE_parse_boot_argn("debug", &DEBUGFlag, sizeof (DEBUGFlag)))
     DEBUGFlag = 0;
 }
 
-void PE_enter_debugger(char *cause)
+void PE_enter_debugger(const char *cause)
 {
   if (DEBUGFlag & DB_NMI)
     Debugger(cause);
index 61b3064196e23d81d9c7ddccd9d265ddbb77afb5..316bf97806695650e7f97a1c8e961387ada1ac57 100644 (file)
@@ -28,7 +28,7 @@
 #include <pexpert/pexpert.h>
 #include <pexpert/protos.h>
 #include <machine/machine_routines.h>
-#include <i386/mp.h>
+#include <i386/lapic.h>
 #include <sys/kdebug.h>
 
 
index 7db76a179626553c4ba6c92f0396ab166c75e0fd..84855f63ec347780278cf2eba30da6a5090a83ce 100644 (file)
@@ -53,7 +53,7 @@ void PE_init_kprintf(boolean_t vm_initialized)
        if (!vm_initialized) {
                simple_lock_init(&kprintf_lock, 0);
 
-               if (PE_parse_boot_arg("debug", &boot_arg))
+               if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg)))
                        if (boot_arg & DB_KPRT)
                                disable_serial_output = FALSE;
 
index 71bc460141f6947c5a327cb429de3e6d0b07111b..5bad35b4ba51c8cd0ff7b9e7609d7b3f990c29b0 100644 (file)
@@ -40,6 +40,7 @@ ENTRY(PE_get_timebase)
         movl    S_ARG0, %ecx
 
         rdtsc
+       lfence
 
         movl    %edx, 0(%ecx)
         movl    %eax, 4(%ecx)
index 2080b6024bc2d13791d9f630d06505740de8513a..ddb48b16239e6f25692edff06802ae92faadd477 100644 (file)
@@ -52,7 +52,8 @@ enum {
     UART_LCR = 3,  /* line control register         */
     UART_MCR = 4,  /* modem control register        */
     UART_LSR = 5,  /* line status register          */
-    UART_MSR = 6   /* modem status register         */
+    UART_MSR = 6,  /* modem status register         */
+    UART_SCR = 7   /* scratch register              */
 };
 
 enum {
@@ -90,14 +91,12 @@ static int uart_initted = 0;   /* 1 if init'ed */
 static int
 uart_probe( void )
 {
-    /* Verify that the Divisor Register is accessible */
-
-    WRITE( LCR, UART_LCR_DLAB );
-    WRITE( DLL, 0x5a );
-    if (READ(DLL) != 0x5a) return 0;
-    WRITE( DLL, 0xa5 );
-    if (READ(DLL) != 0xa5) return 0;
-    WRITE( LCR, 0x00 );
+    /* Verify that the Scratch Register is accessible */
+
+    WRITE( SCR, 0x5a );
+    if (READ(SCR) != 0x5a) return 0;
+    WRITE( SCR, 0xa5 );
+    if (READ(SCR) != 0xa5) return 0;
     return 1;
 }
 
@@ -177,7 +176,7 @@ int serial_init( void )
 
     /* Set baud rate - use the supplied boot-arg if available */
 
-    if (PE_parse_boot_arg("serialbaud", &serial_baud_rate))
+    if (PE_parse_boot_argn("serialbaud", &serial_baud_rate, sizeof (serial_baud_rate)))
     {
            /* Valid divisor? */
            if (!((UART_CLOCK / 16) % serial_baud_rate)) {
index 5284266a8115eca65cfb4b0f8053240eb401c3f8..d8a013397cab984c0075f13e58871bdfe2cfeeff 100644 (file)
@@ -51,13 +51,15 @@ typedef void *cpu_id_t;
 
 
 void PE_enter_debugger(
-       char *cause);
+       const char *cause);
 
 void PE_init_platform(
        boolean_t vm_initialized, 
        void *args);
 
 
+
+
 void PE_init_kprintf(
        boolean_t vm_initialized);
 
index 9dffd0de0e19160c0d682613d89d27093ad35ff0..2509d69620392c6b1312e7adf8bfb0c4eae5b327 100644 (file)
@@ -63,7 +63,7 @@ void PE_init_kprintf(__unused boolean_t vm_initialized)
        if (PE_state.initialized == FALSE)
                panic("Platform Expert not initialized");
 
-       if (PE_parse_boot_arg("debug", &boot_arg))
+       if (PE_parse_boot_argn("debug", &boot_arg, sizeof (boot_arg)))
                if(boot_arg & DB_KPRT) disable_serial_output = FALSE; 
 
        if (DTLookupEntry(NULL, "/options", &options) == kSuccess) {
@@ -92,7 +92,7 @@ void PE_init_kprintf(__unused boolean_t vm_initialized)
        }
 
        /* Check the boot-args for new serial baud. */
-       if (PE_parse_boot_arg("serialbaud", &serial_baud))
+       if (PE_parse_boot_argn("serialbaud", &serial_baud, sizeof (serial_baud)))
                if (serial_baud != -1) gPESerialBaud = serial_baud; 
 
        if( (scc = PE_find_scc())) {                            /* See if we can find the serial port */
index 4b9613d2fca2da4d8c59089a65aa9e2b59a7179c..58f3e2b33aa5b83e538c23290cc5c779ff543bb0 100644 (file)
@@ -318,6 +318,7 @@ int mac_proc_check_getaudit(proc_t proc);
 int    mac_proc_check_getauid(proc_t proc);
 int     mac_proc_check_getlcid(proc_t proc1, proc_t proc2,
            pid_t pid);
+int    mac_proc_check_map_prot_copy_allow(proc_t proc);
 int    mac_proc_check_mprotect(proc_t proc,
            user_addr_t addr, user_size_t size, int prot);
 int    mac_proc_check_sched(proc_t proc, proc_t proc2);
@@ -439,6 +440,8 @@ int mac_vnode_check_exchangedata(vfs_context_t ctx, struct vnode *v1,
            struct vnode *v2);
 int    mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp,
            struct image_params *imgp);
+int    mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1,
+           void * signature, size_t size);
 int     mac_vnode_check_getattrlist(vfs_context_t ctx, struct vnode *vp,
            struct attrlist *alist);
 int    mac_vnode_check_getextattr(vfs_context_t ctx, struct vnode *vp,
index a12cacb301bcce13d0c5ad7bfd0c3b2826ec6549..21b645a73f34258edf2ca141e8bf961090cce756 100644 (file)
@@ -4407,6 +4407,25 @@ typedef int mpo_proc_check_get_task_t(
        kauth_cred_t cred,
        struct proc *p
 );
+
+
+/**
+ @brief Access control check for manipulating a proc's vm_map
+ @param cred Subject credential
+ @param proc Object process
+ Determine whether the vm_map map belonging to process proc with 
+ credential cred allows the VM_PROT_COPY operation.
+ @return Return 0 if access is granted, otherwise an appropriate value for
+ errno should be returned.
+ */
+typedef int mpo_proc_check_map_prot_copy_allow_t(
+       kauth_cred_t cred,
+       struct proc *p
+);
+
+
 /**
   @brief Assign a label to a new kernelspace Mach task
   @param kproc New task
@@ -4714,6 +4733,13 @@ typedef int mpo_vnode_check_exec_t(
        struct componentname *cnp,
        u_int *csflags
 );
+/**
+  @brief Access control check after determining the code directory hash
+ */
+typedef int mpo_vnode_check_signature_t(struct vnode *vp,  struct label *label, 
+                                       unsigned char *sha1, void *signature, 
+                                       int size);
+
 /**
   @brief Access control check for retrieving file attributes
   @param cred Subject credential
@@ -6003,8 +6029,8 @@ struct mac_policy_ops {
        mpo_vnode_label_update_extattr_t        *mpo_vnode_label_update_extattr;
        mpo_vnode_label_update_t                *mpo_vnode_label_update;
        mpo_vnode_notify_create_t               *mpo_vnode_notify_create;
-       mpo_reserved_hook_t                     *mpo_reserved0;
-       mpo_reserved_hook_t                     *mpo_reserved1;
+       mpo_vnode_check_signature_t             *mpo_vnode_check_signature;
+       mpo_proc_check_map_prot_copy_allow_t    *mpo_proc_check_map_prot_copy_allow;
        mpo_reserved_hook_t                     *mpo_reserved2;
        mpo_reserved_hook_t                     *mpo_reserved3;
        mpo_reserved_hook_t                     *mpo_reserved4;
index 20ca2fb6428f06b2c7f9b423606727fc8560f164..4ed4d53b798f892f42457752925323b420f9e82a 100644 (file)
@@ -366,6 +366,21 @@ mac_proc_check_mprotect(proc_t proc,
        return (error);
 }
 
+int
+mac_proc_check_map_prot_copy_allow(proc_t proc)
+{
+       kauth_cred_t cred;
+       int error;
+       
+       if (!mac_vm_enforce) return (0);
+       
+       cred = kauth_cred_proc_ref(proc);
+       MAC_CHECK(proc_check_map_prot_copy_allow, cred, proc);
+       kauth_cred_unref(&cred);
+       
+       return (error);
+}
+                                  
 int
 mac_proc_check_sched(proc_t curp, struct proc *proc)
 {
index 4316e2d3ba690d119bdb68f0fc07e87dda6b3f15..2bbfb04db49fae57e42b9ec2721dc5d52ddd1eab 100644 (file)
@@ -633,6 +633,19 @@ mac_vnode_check_exec(vfs_context_t ctx, struct vnode *vp,
        return (error);
 }
 
+int
+mac_vnode_check_signature(struct vnode *vp, unsigned char *sha1,
+                         void * signature, size_t size)
+{
+       int error;
+       
+       if (!mac_vnode_enforce || !mac_proc_enforce)
+               return (0);
+       
+       MAC_CHECK(vnode_check_signature, vp, vp->v_label, sha1, signature, size);
+       return (error);
+}
+
 #if 0
 int
 mac_vnode_check_getacl(vfs_context_t ctx, struct vnode *vp, acl_type_t type)
index a7115e56b7395823a5b7799ef0a540275dbe15f9..50eda2626ee6cd2c711b87ffae76c5db59ea4046 100644 (file)
@@ -282,13 +282,15 @@ manager_fn(void *arg)
        return (void *) iteration;
 }
 
+#define        MAX_CACHE_DEPTH 10
 static void
 auto_config(int npages, int *nbufs, int *nsets)
 {
        int     len;
        int     ncpu;
-       int64_t cacheconfig[10];
-       int64_t cachesize[10];
+       int     llc;
+       int64_t cacheconfig[MAX_CACHE_DEPTH];
+       int64_t cachesize[MAX_CACHE_DEPTH];
 
        mutter("Autoconfiguring...\n");
 
@@ -305,21 +307,28 @@ auto_config(int npages, int *nbufs, int *nsets)
                exit(1);
        }
 
+       /*
+        * Find LLC
+        */
+       for (llc = MAX_CACHE_DEPTH - 1; llc > 0; llc--)
+               if (cacheconfig[llc] != 0)
+                       break;
+
        /*
         * Calculate number of buffers of size pages*4096 bytes
         * fit into 90% of an L2 cache.
         */
-       *nbufs = cachesize[2] * 9 / (npages * 4096 * 10);
-       mutter("  L2 cache %qd bytes: "
+       *nbufs = cachesize[llc] * 9 / (npages * 4096 * 10);
+       mutter("  L%d (LLC) cache %qd bytes: "
                "using %d buffers of size %d bytes\n",
-               cachesize[2], *nbufs, (npages * 4096));
+               llc, cachesize[llc], *nbufs, (npages * 4096));
 
        /* 
         * Calcalute how many sets:
         */
-       *nsets = cacheconfig[0]/cacheconfig[2];
-       mutter("  %qd cpus; %qd cpus per L2 cache: using %d sets\n",
-               cacheconfig[0], cacheconfig[2], *nsets);
+       *nsets = cacheconfig[0]/cacheconfig[llc];
+       mutter("  %qd cpus; %qd cpus per L%d cache: using %d sets\n",
+               cacheconfig[0], cacheconfig[llc], llc, *nsets);
 }
 
 void (*producer_fnp)(int *data, int isize) = &writer_fn;